码迷,mamicode.com
首页 > 其他好文 > 详细

哈哈哈

时间:2019-12-24 09:25:50      阅读:75      评论:0      收藏:0      [点我收藏+]

标签:success   mysql连接   symbol   div   exist   inf   pen   抓取   win   

# -*- coding: utf-8 -*-
import requests
from lxml import etree
import pandas as pd
import tushare as ts
from redis import Redis
import hashlib
import pymysql

def stock():
    # 初始化tushare.pro接口
    pro = ts.pro_api(ac16b470869c5d82db5033ae9288f77b282d2b5519507d6d2c72fdd7)

    # 创建MySQL链接对象
    conn1 = pymysql.connect(user=root, password=123456, database=stock, charset=utf8)
    cursor = conn1.cursor()

    # 创建redis链接对象
    conn2 = Redis(host=127.0.0.1, port=6379)

    # 定义成功、失败条数
    success = 0
    fail = 0
    exist = 0

    # 定义空list
    code_list = []

    # 获取数据
    stock_basic = pro.stock_basic(list_status=L, fields=ts_code,symbol)
    for index,row in stock_basic.iterrows():
        ts_code = row[ts_code]
        symbol = row[symbol]
        xl_code = ts_code[-2:] + ts_code[:6]

        code_list.append(xl_code)
        return code_list

        # 将解析到的数据值生成一个唯一的标识进行redis存储
        source = ts_code
        source_id = hashlib.sha256(source.encode()).hexdigest()
        # 将解析内容的唯一表示存储到redis的data_id中
        ex = conn2.sadd(data_id, source_id)

        # 获取没有爬取的内容
        if ex == 1:
            try:
                sql1 = ‘‘‘insert into stk_code_list(ts_code,symbol,xl_code) values(‘%s‘,‘%s‘,‘%s‘)‘‘‘%(ts_code,symbol,xl_code)
                cursor.execute(sql1)
                conn1.commit()
                success +=1
            except:
                conn1.rollback()
                fail +=1

        # 获取已经爬取的内容
        else:
            exist +=1

    # 关闭MySQL连接
    cursor.close()
    conn1.close()

    print(程序抓取成功数据:%d条%success)
    print(程序抓取失败数据:%d条%fail)
    print(MySQL原来有数据:%d条%exist)

def companyInfo(code_list):
    basic_url = http://vip.stock.finance.sina.com.cn/corp/go.php/vCI_CorpInfo/stockid/
    headers = {
        User-Agent: Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0
    }
    for code in code_list:
        print(str(code[2:]))
        url = basic_url + str(code[2:]) + .phtml
        html = requests.get(url=url, headers=headers).text

        tree = etree.HTML(html)
        tr_list = tree.xpath(//*[@id="comInfo1"]/tbody/tr)
        print(tr_list)
        for tr in tr_list:
            print(tr)
            list_date = tr.xpath(./tr[3]/td[4]/a)
            print(list_date)
def main():
    code_list = stock()
    companyInfo(code_list)

if __name__==__main__:
    main()

哈哈哈

标签:success   mysql连接   symbol   div   exist   inf   pen   抓取   win   

原文地址:https://www.cnblogs.com/Iceredtea/p/12089236.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!