标签:
本代码主要实现了从上证交易所获取上市公司列表信息。并输出到指定的文件中
import urllib import urllib2 import json import random import sys import os import codecs DEBUG=False # function get the current dirctory def cur_file_dir(): path = sys.path[0] if os.path.isdir(path): return path elif os.path.isfile(path): return os.path.dirname(path) url = ‘http://query.sse.com.cn/commonQuery.do‘ headers = { ‘Accept‘:r‘*/*‘, ‘Accept-Encoding‘:r‘gzip, deflate, sdch‘, ‘Accept-Language‘:r‘zh-CN,zh;q=0.8‘, ‘Connection‘:r‘keep-alive‘, ‘Host‘:r‘query.sse.com.cn‘, ‘Referer‘:r‘http://www.sse.com.cn/assortment/stock/list/name/?type=5‘, ‘User-Agent‘:r‘Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.80 Safari/537.36‘ } values = { ‘jsonCallBack‘:‘jsonpCallback47701‘, ‘isPagination‘:‘true‘, ‘sqlId‘:‘COMMON_SSE_ZQPZ_GPLB_MCJS_SSAG_L‘, ‘pageHelp.pageSize‘:‘2000‘, ‘_‘:‘1447409570125‘ } #generate a random integer, used as query parameter value values[‘_‘] = random.randint(1000000000000,1999999999999) data = urllib.urlencode(values) req = urllib2.Request(url, data, headers) response = urllib2.urlopen(req) the_page = response.read() the_page = unicode(the_page, "utf8").encode("gb2312") page02 = the_page[19:-1] jsonContent = json.loads(page02, ‘gb2312‘) if DEBUG: print json.dumps(jsonContent, ensure_ascii=False,indent=1) jsonContent.keys() #Create a new utf-8 file used to store the stock info stockFile = codecs.open(cur_file_dir() + ‘stocklist.txt‘,‘w‘,‘utf-8‘) for d in jsonContent[‘pageHelp‘][‘data‘]: info = (d[‘NUM‘],d[‘PRODUCTID‘],d[‘PRODUCTNAME‘],d[‘FULLNAME‘]) stockFile.write(‘ ‘.join(info) + ‘\n‘) stockFile.close()
标签:
原文地址:http://www.cnblogs.com/chujq/p/4966142.html