标签:mozilla ext hot http sele find tle index agent
# -*- coding:utf-8 -*- import requests from bs4 import BeautifulSoup url = "http://top.baidu.com/buzz?b=1&fr=topindex" header = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36", } content = [] r = requests.get(url,headers = header) respond = r.text soup = BeautifulSoup(respond,"html.parser") # HotSearchs = soup.find_all("td",class_="keyword") # HotSearchs = soup.select("td[class=‘keyword‘]") HotSearchs = soup.find_all("tr")[1:] for HotSearch in HotSearchs: if HotSearch.find(class_ ="list-title") != None: title = HotSearch.find(class_ = "list-title").text.encode("iso-8859-1").decode("gbk") number = HotSearch.find(class_ = "last").text.strip() content.append([title,number]) print(content)
标签:mozilla ext hot http sele find tle index agent
原文地址:https://www.cnblogs.com/python-kp/p/13254943.html