码迷,mamicode.com
首页 > 其他好文 > 详细

puuuu

时间:2019-11-22 21:02:53      阅读:200      评论:0      收藏:0      [点我收藏+]

标签:cursor   soup   mysqldb   sse   股票代码   取数   数据库   arch   注册   

hhhhhh

# -*- coding: utf-8 -*-
import requests
from bs4 import BeautifulSoup
import lxml
import json
import re
import time
import tushare as ts
import pandas as pd
import pymysql
from sqlalchemy import create_engine
pymysql.install_as_MySQLdb()

def EMydSpider(conn, current):
    # 抓取东财个股盘口异动数据:http://quote.eastmoney.com/changes

    cursor = conn.cursor()
    header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (Khtml, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3676.400 QQBrowser/10.5.3738.400"}
    url = "http://push2ex.eastmoney.com/getAllStockChanges?type=8201,8202,8193,4,32,64,8207,8209,8211,8213,8215,8204,8203,8194,8,16,128,8208,8210,8212,8214,8216"
    for page in range(200):
        param = {"pageindex": page, "pagesize": 64, "ut": 7eea3edcaed734bea9cbfc24409ed989, "dpt": wzchanges}
        html = json.loads(requests.get(url=url, params=param, headers=header).text)
        if html[data] is None:
            print("东方财富:共爬取%d页数据" % page)
            print("东方财富:个股盘口异动数据已抓取完成")
            break
        else:
            allstock = html[data][allstock]
            for stock in allstock:
                stk_code = stock[c]
                stk_name = stock[n]
                chg_time = stock[tm]
                chg_type = stock[t]
                chg_value = stock[i]

                sql = "insert into emyd_data(current,stk_code,stk_name,chg_time,chg_type,chg_value) values(%s,%s,%s,%s,%s,%s)" %(current, stk_code, stk_name, chg_time, chg_type, chg_value)
                try:
                    cursor.execute(sql)
                except:
                    cursor.rollback
                    print("新增失败,数据已回滚")
    cursor.commit()
    cursor.close()

def TXfjSpider(current,code_list):
    # 抓取每日腾讯股票分价表数据

    cursor = conn.cursor()
    header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3676.400 QQBrowser/10.5.3738.400"}
    for code in code_list:
        url = "http://stock.gtimg.cn/data/index.php?appn=price&c=" + code
        html = requests.get(url=url,headers=header).text

        start = re.search(v\wp.{10}\d+\,\d+\,\d+\,\", html).group()
        end = re.search(\"\W+, html).group()

        fj_list = txfj_html.replace(start, ‘‘).replace(end, ‘‘).split(^)
        for fj in fj_list:
            price = fj.split(~)[0]
            volumn = fj.split(~)[2]
            sql = "insert into txfj_data(current,stk_code,price,volumn) values(%s,%s,%s,%s)" %(current, code, price, volumn)
            try:
                cursor.execute(sql)
            except:
                cursor.rollback()
                print("新增失败,数据已回滚")
    cursor.commit()
    cursor.close()

def TXddSpider(current,code_list):
    # 抓取每日腾讯股票大单数据(大于100万)
    cursor = conn.cursor()
    header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3676.400 QQBrowser/10.5.3738.400"}
    url = "http://stock.finance.qq.com/sstock/list/view/dadan.php?"
    for page in range(50):
        for code in code_list:
            param = {"t": "js","c": code,"max": 80,"p": page,"opt": 10,"o": 0}
            html = requests.get(url=url, headers=header).text

            start = re.search(var v_dadan_data_.{12}\‘, html).group()
            end = re.search(\‘\]\;, html).group()

            dd_list = html.replace(start, ‘‘).replace(end, ‘‘).split(^)
            for dd in dd_list:
                ddprice = dd.split(~)[0]
                volumn = dd.split(~)[2]
                sql = "insert into txdd_data(current,stk_code,ddprice,volumn) values(%s,%s,%s,%s)" % (current, code, ddprice, volumn)
                try:
                    cursor.execute(sql)
                except:
                    cursor.rollback()
                    print("新增失败,数据已回滚")
    cursor.commit()
    cursor.close()

def Tstockbasic(conn):
    # 调用stock_basic,获取股票代码、股票名称、所属行业、上市日期、上市场所

    cursor = conn.cursor()
    # 读取数据库已存储数据
    sql_stk_data = pd.read_sql("select ts_code from stk_basic_data",con=conn)

    # 创建空列表,存储ts_code
    code_list = []
    stk_basic = pro.stock_basic(list_status=L, fields=ts_code,symbol,name,industry,list_date,exchange)
    for index, row in stk_basic.iterrows():
        new_code = row[ts_code]
        stk_code = row[symbol]
        stk_name = row[name]
        industry = row[industry]
        list_date = row[list_date]
        exchange = row[exchange]

        code_list.append(new_code[7:]+new_code[:6])
        if new_code in sql_stk_data:
            sql = "update {} set {}={} where {}={}".format("stk_basic_data", "industry", industry, ts_code, new_code)
            try:
                cursor.execute(sql)
            except:
                cursor.rollback
                print("更新失败,数据已回滚")
        else:
            sql = "insert into stk_basic_data(ts_code,stk_name,industry,list_date,exchange) values(%s,%s,%s,%s,%s)" %(new_code,stk_name,industry,list_date,exchange)
            try:
                cursor.execute(sql)
            except:
                cursor.rollback
                print("新增失败,数据已回滚")
    conn.commit()
    conn.close()
    return code_list

def Tstockcompany(conn):
    # 调用stock_company,获取上市公司:所在省份、城市、注册日期、员工人数

    cursor = conn.cursor()
    # 读取数据库已存储数据
    sql_company_data = pd.read_sql("select ts_code from stk_company_data", con=conn)

    # 交易所代码 ,SSE上交所 SZSE深交所 ,默认SSE
    df1 = pro.stock_company(exchange=SZSE, fields=ts_code, province, city, setup_date, employees)
    df2 = pro.stock_company(exchange=SSE, fields=ts_code, province, city, setup_date, employees)
    stk_company = pd.concat([df1,df2],axis=0)

    for index, row in stk_company.iterrows():
        new_code = row[ts_code]
        province = row[province]
        city = row[city]
        setup_date = row[setup_date]
        employees = row[employees]

        if new_code in sql_company_data:
            sql = "update {} set {}={} where {}={}".format("stk_company_data", "employees", ts_code, new_code, )
            try:
                cursor.execute(sql)
            except:
                cursor.rollback
                print("更新失败,数据已回滚")
        else:
            sql = "insert into stk_basic_data(ts_code,province,province,setup_date,employees) values(%s,%s,%s,%s,%s)" % (new_code, province,province,setup_date,employees)
            try:
                cursor.execute(sql)
            except:
                cursor.rollback
                print("新增失败,数据已回滚")
    conn.commit()
    conn.close()

def Tstockconcept():
    # 调用concept_detail,获取概念股分类明细数据

    df = pro.concept_detail(ts_code, fields=ts_code,concept_name,in_date)

if __name__ == "__main__":
    # 设置tushare社区token
    ts.set_token(ac16b470869c5d82db5033ae9288f77b282d2b5519507d6d2c72fdd7)
    pro = ts.pro_api()

    # 创建数据库连接
    conn =pymysql.connect(host=127.0.0.1, port=3306, user=root, passwd=123456, database=quantitative_trading_data, charset=utf8)
    engine = create_engine(mysql://root:123456@127.0.0.1/quantitative_trading_data?charset=utf8)

    # 获取当前年月日,传递给各函数
    current = time.strftime("%Y%m%d", time.localtime())

    # 调用Tstockbasich获取股票列表,并完成数据更新
    code_list = Tstockbasic(conn)

    # 调用EMydSpider获取东财盘口异动数据
    EMydSpider(conn, current)

    # 调用TXfjSpider获取腾讯财经当日股票分价数据
    TXfjSpider(conn, current, code_list)

    # 调用TXddSpiderr获取腾讯财经当日股票大单数据
    TXddSpider(conn, current, code_list)

    # 调用Tstockcompany获取公司信息,并完成数据更新
    Tstockcompany(conn)

 

puuuu

标签:cursor   soup   mysqldb   sse   股票代码   取数   数据库   arch   注册   

原文地址:https://www.cnblogs.com/Iceredtea/p/11913980.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!