标签:success mysql连接 symbol div exist inf pen 抓取 win
# -*- coding: utf-8 -*- import requests from lxml import etree import pandas as pd import tushare as ts from redis import Redis import hashlib import pymysql def stock(): # 初始化tushare.pro接口 pro = ts.pro_api(‘ac16b470869c5d82db5033ae9288f77b282d2b5519507d6d2c72fdd7‘) # 创建MySQL链接对象 conn1 = pymysql.connect(user=‘root‘, password=‘123456‘, database=‘stock‘, charset=‘utf8‘) cursor = conn1.cursor() # 创建redis链接对象 conn2 = Redis(host=‘127.0.0.1‘, port=6379) # 定义成功、失败条数 success = 0 fail = 0 exist = 0 # 定义空list code_list = [] # 获取数据 stock_basic = pro.stock_basic(list_status=‘L‘, fields=‘ts_code,symbol‘) for index,row in stock_basic.iterrows(): ts_code = row[‘ts_code‘] symbol = row[‘symbol‘] xl_code = ts_code[-2:] + ts_code[:6] code_list.append(xl_code) return code_list # 将解析到的数据值生成一个唯一的标识进行redis存储 source = ts_code source_id = hashlib.sha256(source.encode()).hexdigest() # 将解析内容的唯一表示存储到redis的data_id中 ex = conn2.sadd(‘data_id‘, source_id) # 获取没有爬取的内容 if ex == 1: try: sql1 = ‘‘‘insert into stk_code_list(ts_code,symbol,xl_code) values(‘%s‘,‘%s‘,‘%s‘)‘‘‘%(ts_code,symbol,xl_code) cursor.execute(sql1) conn1.commit() success +=1 except: conn1.rollback() fail +=1 # 获取已经爬取的内容 else: exist +=1 # 关闭MySQL连接 cursor.close() conn1.close() print(‘程序抓取成功数据:%d条‘%success) print(‘程序抓取失败数据:%d条‘%fail) print(‘MySQL原来有数据:%d条‘%exist) def companyInfo(code_list): basic_url = ‘http://vip.stock.finance.sina.com.cn/corp/go.php/vCI_CorpInfo/stockid/‘ headers = { ‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0‘ } for code in code_list: print(str(code[2:])) url = basic_url + str(code[2:]) + ‘.phtml‘ html = requests.get(url=url, headers=headers).text tree = etree.HTML(html) tr_list = tree.xpath(‘//*[@id="comInfo1"]/tbody/tr‘) print(tr_list) for tr in tr_list: print(tr) list_date = tr.xpath(‘./tr[3]/td[4]/a‘) print(list_date) def main(): code_list = stock() companyInfo(code_list) if __name__==‘__main__‘: main()
标签:success mysql连接 symbol div exist inf pen 抓取 win
原文地址:https://www.cnblogs.com/Iceredtea/p/12089236.html