标签:from for coding tar geo git dss lock elf
# -*- coding: utf-8 -*-
import scrapy,json
from urllib import parse
class GoogleTrendsSpider(scrapy.Spider):
name = ‘google_trends‘
allowed_domains = [‘google.com‘]
#获取token地址
GENERAL_URL = ‘https://trends.google.com/trends/api/explore?{}‘
#获取关键字csv地址
INTEREST_OVER_TIME_URL = ‘https://trends.google.com/trends/api/widgetdata/multiline/csv?{}‘
#开启useragent和代理中间件
custom_settings = {
‘DOWNLOADER_MIDDLEWARES‘ : {
‘blockchain.middlewares.RandomUserAgent‘: 390,
‘blockchain.middlewares.RandomProxy‘: 544,
},
# ‘COOKIES_ENABLED‘ : False
‘DOWNLOAD_DELAY‘:1
}
def start_requests(self):
‘‘‘
从页面中获取的csv下载url参数
‘keyword‘: ‘关键字‘,
‘time‘: ‘now 7-d‘,
‘geo‘: ‘‘
‘‘‘
req = {
‘comparisonItem‘:[{‘keyword‘: ‘关键字‘, ‘time‘: ‘now 7-d‘, ‘geo‘: ‘‘}],
‘category‘: 0
}
req = json.dumps(req).encode(‘utf-8‘)
token_payload = {
‘hl‘: b‘en-US‘,
‘tz‘: b‘-480‘,
‘req‘: req,
‘property‘: b‘‘,
}
body = parse.urlencode(token_payload)
url = self.GENERAL_URL.format(body)
reqs.append(scrapy.Request(url=url,callback=self.parse_token,meta={‘item‘:{‘coin_id‘:row.id}}))
return reqs
def parse_token(self,response):
‘‘‘
解析结果,获取token
‘‘‘
bodyObj = json.loads(response.body.decode(‘utf-8‘)[4:])
for row in bodyObj[‘widgets‘]:
if row[‘id‘] == ‘TIMESERIES‘:
token = row[‘token‘]
request = row[‘request‘]
params = {
‘tz‘: ‘-480‘,
‘req‘: json.dumps(request),
‘token‘:token
}
body = parse.urlencode(params)
url = self.INTEREST_OVER_TIME_URL.format(body)
yield scrapy.Request(url=url,callback=self.parse_row,meta={‘item‘:response.meta[‘item‘]})
def parse_row(self, response):
‘‘‘
解析csv
‘‘‘
bodytext = response.body.decode(‘utf-8‘)
print(bodytext)
抄自:pytrends
标签:from for coding tar geo git dss lock elf
原文地址:https://www.cnblogs.com/qy-brother/p/9003844.html