标签:python爬虫 art use html referer span mic load col
import json from time import sleep import requests url = "https://web-api.juejin.im/query" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36", "Referer": "https://juejin.im/", "X-Agent": "Juejin/Web", "Content-Type": "application/json", } def get_content(after=‘‘): info = {"operationName": "", "query": "", "variables": {"first": 20, "after": after, "order": "POPULAR"}, "extensions": {"query": {"id": "21207e9ddb1de777adeaca7a2fb38030"}}} resp = requests.post(url, headers=headers, data=json.dumps(info)) content = resp.content.decode(‘utf-8‘) content = json.loads(content) edges = content[‘data‘][‘articleFeed‘][‘items‘][‘edges‘] pageInfo = content[‘data‘][‘articleFeed‘][‘items‘][‘pageInfo‘] return edges, pageInfo def getList(edges): tmp = [] for item in edges: one = {} node = item[‘node‘] one[‘title‘] = node[‘title‘] # one[‘links‘] = node[‘originalUrl‘] # one[‘content‘] = node[‘content‘] tmp.append(one) return tmp data = [] content = get_content() edges = content[0] pageInfo = content[1] tmpList = getList(edges) #data = data + tmpList print(tmpList) while (pageInfo[‘hasNextPage‘]): content = get_content(pageInfo[‘endCursor‘]) edges = content[0] pageInfo = content[1] tmpList = getList(edges) #data = data + tmpList print(tmpList) sleep(2)
标签:python爬虫 art use html referer span mic load col
原文地址:https://www.cnblogs.com/php-linux/p/12491967.html