标签:style 数据采集 tle res return code book ade https
import requests from bs4 import BeautifulSoup import pymysql import json import lxml import xlwt def getTitle(url): response = requests.get(url, headers=headers) # 发送网络请求 content = response.content.decode(‘utf-8‘) soup = BeautifulSoup(content, ‘html.parser‘) list=soup.select(‘div:nth-child(2) > h2:nth-child(1) > a:nth-child(1)‘) for i in range(18): print(list[i].text) return list url = "https://news.cnblogs.com/n/recommend?page={}" headers = {‘user-agent‘:‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36‘}#创建头部信息 f = xlwt.Workbook(encoding=‘utf-8‘) sheet01 = f.add_sheet(u‘sheet1‘, cell_overwrite_ok=True) sheet01.write(0, 0, ‘博客最热新闻‘) # 第一行第一列 temp = 0 for i in range(1,100): newurl = url.format(i) title = getTitle(newurl) for j in range(len(title)): sheet01.write(temp + j + 1, 0, title[j].text) temp += len(title) print("第"+str(i)+"页打印完!") print("全部打印完!!!") f.save(‘Hotword02.xls‘)
明天继续完成其他方面的要求
标签:style 数据采集 tle res return code book ade https
原文地址:https://www.cnblogs.com/haobox/p/15041851.html