解析库bs4及爬取示例

  • A+
所属分类:Python
from bs4 import Beautiful Soup text = ''' <ul class="clearfix"> <li> <a href="/tupian/26783.html" target="_blank"> <img src="/uploads/allimg/210122/210154-16113205145cce.jpg" alt="解析库bs4及爬取示例" alt="动漫女孩 黑发 露肩 4k壁纸3840x2160" /> <b>动漫女孩 黑发 露肩 4k壁</b> </a> </li> <li> <a href="/tupian/26780.html" target="_blank"> <img src="/uploads/allimg/210122/195550-1611316550c9d7.jpg" alt="解析库bs4及爬取示例" alt="古风 美少女 伞 长发 女孩大长腿4k唯美动漫壁纸" /> <b>古风 美少女 伞 长发 女</b> </a> </li> <li> <a href="/tupian/24695.html" target="_blank"> <img src="/uploads/allimg/190824/212516-1566653116f355.jpg" alt="解析库bs4及爬取示例" alt="下午 趴在桌子的女孩4k动漫壁纸3840x2160" /> <b>下午 趴在桌子的女孩4k动</b> </a> </li> </ul> ''' soup = BeautifulSoup(text,'lxml') # 需要安装lxml库哦 # 方法二 print(type(soup.ul.descendants)) for child in soup.ul.descendants: print(child)from bs4 import BeautifulSoup text = ''' <ul class="clearfix"> <li> <a class="hello" href="/tupian/26783.html" target="_blank">你好</a> <a class="white" href="/tupian/26783.html" target="_blank">白菜</a> <a class="white" href="/tupian/26783.html" target="_blank">白菜</a> <a class="white" href="/tupian/26783.html" target="_blank">白菜</a> <a class="white" href="/tupian/26783.html" target="_blank">白菜</a> <a class="black" href="/tupian/26783.html" target="_blank">黑彩</a> </li> </ul> ''' soup = BeautifulSoup(text,'lxml') # 需要安装lxml库哦 # 筛选出白菜 result_list = soup.find_all('a',attrs={'class':'white'}) print(type(result_list)) print('-'*50) for result in result_list: print(result)
  • 我的微信
  • 这是我的微信扫一扫
  • weinxin
  • 我的微信公众号
  • 我的微信公众号扫一扫
  • weinxin