标签:mac ade tps aging order cti task fse cio
import requests import json import time from pyquery import PyQuery import pandas as pd from collections import OrderedDict import multiprocessing import asyncio from functools import partial # cookies = input(‘请输入Cookie:‘) # url = input(‘请输入url:‘) init_url = ‘https://www.zhihu.com/api/v4/topics/19562045/feeds/top_activity?offset=5&limit=10‘ headers = { ‘User-Agent‘: ‘Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1‘, ‘Cookie‘: ‘**‘, ‘Referer‘: ‘https://www.zhihu.com/topic/19606409/hot‘, ‘Host‘: ‘www.zhihu.com‘, ‘X-UDID‘: ‘AGDlzA1itw2PTr6aWsPp6OtejkxQ9iF7xgA=‘ } def get_all_url(url): res = requests.get(url,headers=headers) data = json.loads(res.text) next_page_url = data[‘paging‘][‘next‘] url_list.append(next_page_url) print(len(url_list)) end_page = data[‘paging‘][‘is_end‘] # true if end_page: return url_list else: get_all_url(next_page_url) async def get_all_data(url): future = loop.run_in_executor(None,partial(requests.get,url,headers=headers)) #res = requests.get(url,headers=headers) res = await future data = json.loads(res.text) res_data = data[‘data‘] print(len(data_list)) for i in res_data: final_data = OrderedDict() type = i[‘target‘][‘type‘] if type ==‘answer‘: final_data[‘title‘] = i[‘target‘][‘question‘][‘title‘] or ‘‘ try: final_data[‘content‘] = PyQuery(i[‘target‘][‘content‘]).text() except Exception as e: final_data[‘content‘] = PyQuery(i[‘target‘][‘excerpt‘]).text() final_data[‘comment_count‘] = i[‘target‘][‘comment_count‘] final_data[‘voteup_count‘] = i[‘target‘][‘voteup_count‘] data_list.append(final_data) if __name__ == ‘__main__‘: data_list=[] url_list = [] get_all_url(init_url) tasks = [asyncio.ensure_future(get_all_data(url)) for url in url_list] loop = asyncio.get_event_loop() loop.run_until_complete(asyncio.wait(tasks)) loop.close() df1 =pd.DataFrame(data_list) df1.to_excel(‘保险‘+time.strftime("%Y%m%d%H%M%S")+‘.xlsx‘,index=False) print(‘done‘)
标签:mac ade tps aging order cti task fse cio
原文地址:https://www.cnblogs.com/Erick-L/p/9415677.html