码迷,mamicode.com
首页 > 编程语言 > 详细

python实例:从excel读取股票代码,爬取股票信息写到代码后面的单元格中

时间:2020-02-01 23:35:38      阅读:166      评论:0      收藏:0      [点我收藏+]

标签:市值   for   tip   补齐   webp   aac   import   append   dup   

关键词:爬虫、python、request、接口、excel处理

思路:

1、首先准备好excel文档,把股票代码事先编辑进去。

技术图片

2、脚本读取文档,依次读出股票代码到指定站点发起请求获取股票信息

3、将获取的股票信息简单处理,依次写入到指定的文档单元格中,完成整个实例过程

用到的python库:xlrd(读取excel)、requests(获取网页数据)、lxml(处理网页数据)、openpyyxl(对excel进行写入编辑)

具体步骤:

一,导入相关库

import xlrd  #引入读取excel库
import requests   #倒入requests库
from lxml import etree  #倒入lxml 库(没有这个库,pip install lxml安装)
import os
import sys
import openpyxl

二,读取excel内的股票代码,写入数组(共后面的函数调用)

#读取excel文档内的股票代码
def code():
    wb = xlrd.open_workbook(path+\\stock.xlsx)# 打开Excel文件
    data = wb.sheet_by_name(Sheet1)#通过excel表格名称(rank)获取工作表
    b=data.col_values(0)#获取第一列数据(数组)
    list=[]
    for c in b[1:]:#for循环,排除第一行数据
        d=int(c)
        s="%06d" % d#股票代码一共有6位,常规打印无法打印出首位带0的代码的0部分,补齐缺失的0
        #print(s)
        list.append(s)
    return(list)
code=code()

三、循环读取股票代码查询股票信息,写入同一类数据的数组内(共后面写入excel)

#code函数获取的代码,循环爬取代码对应的股票数据,将股票数据写入对应的数组(同一类)中
def get(code):
    list_name=[]#股票名称
    list_score=[]#综合评分
    list_Short=[]#短期趋势   
    list_Metaphase=[]#中期趋势
    list_Long=[]#长期趋势
    list_comprehensive=[]#综合评判
    list_day=[]#5日涨幅
    list_mouth=[]#3个月涨幅
    list_year=[]#1年涨幅
    for num in code:
        url=http://stockpage.10jqka.com.cn/+num+/
        headers = {
            Accept-Encoding: gzip, deflate,
            Accept-Language: zh-CN,zh;q=0.9,
            Upgrade-Insecure-Requests: 1,
            User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36,
            Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,
            Referer: http://doctor.10jqka.com.cn/603160/,
            Connection: keep-alive,
            Cache-Control: max-age=0,
            }

        response = requests.get(url, headers=headers).text
        html = etree.HTML(response)
        b = html.xpath(//h1[@class = "m_logo fl"]/a/strong/text())
        #print(b[0])#股票名称
        c = html.xpath(//span[@class = "analyze-tips mt7"]/text())
        #print(c[0])#综合评分
        d = html.xpath(//div[@class = "analyze-txt fr"]/div/div[2]/text())
        #print("短期趋势:",d[0])#短期趋势
        e = html.xpath(//div[@class = "analyze-txt fr"]/div[2]/div[2]/text())
        #print("中期趋势:",e[0])#中期趋势
        f = html.xpath(//div[@class = "analyze-txt fr"]/div[3]/div[2]/text())
        #print("远期趋势:",f[0])#远期趋势
        g = html.xpath(//div[@class = "txt-phra"]/text())
        h = html.xpath(//div[@class = "txt-phra"]/strong/text())
        i = html.xpath(//div[@class = "txt-phra"]/text()[2])
        #print(g[0],h[0],i[0])#综合评判
        m=g[0]+h[0]+i[0]
        j = html.xpath(//tr[@class = "even hot_cont"]/td[2]/text())
        k = html.xpath(//tr[@class = "even hot_cont"]/td[3]/text())
        l = html.xpath(//tr[@class = "even hot_cont"]/td[4]/text())
        #print("5日涨幅:",j[0])#5日涨幅
        #print("3个月涨幅:",k[0])#3个月涨幅
        #print("1年涨幅:",l[0])#1年涨幅
        list_name.append(b[0])#股票名称数组
        list_score.append(c[0])#综合评分
        list_Short.append(d[0])#短期趋势   
        list_Metaphase.append(e[0])#中期趋势
        list_Long.append(f[0])#长期趋势
        list_comprehensive.append(m)#综合评判
        list_day.append(j[0])#5日涨幅
        list_mouth.append(k[0])#3个月涨幅
        list_year.append(l[0])#1年涨幅

    return(list_name,list_score,list_Short,list_Metaphase,
    list_Long,list_comprehensive,list_day,list_mouth,list_year)
get=get(code)

四、将写入数组的股票数据,依次写入到对应股票代码后的单元格中

#读取get函数生成的股票数据,依次写入到excel文档中
xfile = openpyxl.load_workbook(path+\\stock.xlsx)#加载文件
sheet1 = xfile.worksheets[0] 
#excel中单元格为B2开始,即第2列,第2行
for i in range(len(get[0])):#股票名称
    sheet1.cell(i+2, 2).value=get[0][i]

for i in range(len(baidu[0])):#当前价格
    sheet1.cell(i+2, 3).value=baidu[0][i]

for i in range(len(baidu[1])):#当前市值
    sheet1.cell(i+2, 4).value=baidu[1][i]

for i in range(len(get[1])):#综合评分
    sheet1.cell(i+2, 5).value=get[1][i]

for i in range(len(get[2])):#短期趋势  
    sheet1.cell(i+2, 6).value=get[2][i]

for i in range(len(get[3])):#中期趋势
    sheet1.cell(i+2, 7).value=get[3][i]

for i in range(len(get[4])):#长期趋势
    sheet1.cell(i+2, 8).value=get[4][i]

for i in range(len(get[5])):#综合评判
    sheet1.cell(i+2, 9).value=get[5][i]

for i in range(len(get[6])):#5日涨幅
    sheet1.cell(i+2, 10).value=get[6][i]

for i in range(len(get[7])):#3个月涨幅
    sheet1.cell(i+2, 11).value=get[7][i]

for i in range(len(get[8])):#1年涨幅
    sheet1.cell(i+2, 12).value=get[8][i]
xfile.save(path+\\stock.xlsx)

直接后的文档内容

技术图片

 

全部代码

#本脚本主要实现循环爬取数据后:
# 1、同一类数据统一写入到同一个数组中,
# 2、读取数组数据写入指定的excel列中,实现最终数据爬取
import xlrd  #引入读取excel库
import requests   #倒入requests库
from lxml import etree  #倒入lxml 库(没有这个库,pip install lxml安装)
import os
import sys
import openpyxl

path = os.path.abspath(os.path.dirname(sys.argv[0]))

#读取excel文档内的股票代码
def code():
    wb = xlrd.open_workbook(path+\\stock.xlsx)# 打开Excel文件
    data = wb.sheet_by_name(Sheet1)#通过excel表格名称(rank)获取工作表
    b=data.col_values(0)#获取第一列数据(数组)
    list=[]
    for c in b[1:]:#for循环,排除第一行数据
        d=int(c)
        s="%06d" % d#股票代码一共有6位,常规打印无法打印出首位带0的代码的0部分,补齐缺失的0
        #print(s)
        list.append(s)
    return(list)
code=code()

#code函数获取的代码,循环爬取代码对应的股票数据,将股票数据写入对应的数组(同一类)中
def get(code):
    list_name=[]#股票名称
    list_score=[]#综合评分
    list_Short=[]#短期趋势   
    list_Metaphase=[]#中期趋势
    list_Long=[]#长期趋势
    list_comprehensive=[]#综合评判
    list_day=[]#5日涨幅
    list_mouth=[]#3个月涨幅
    list_year=[]#1年涨幅
    for num in code:
        url=http://stockpage.10jqka.com.cn/+num+/
        headers = {
            Accept-Encoding: gzip, deflate,
            Accept-Language: zh-CN,zh;q=0.9,
            Upgrade-Insecure-Requests: 1,
            User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36,
            Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,
            Referer: http://doctor.10jqka.com.cn/603160/,
            Connection: keep-alive,
            Cache-Control: max-age=0,
            }

        response = requests.get(url, headers=headers).text
        html = etree.HTML(response)
        b = html.xpath(//h1[@class = "m_logo fl"]/a/strong/text())
        #print(b[0])#股票名称
        c = html.xpath(//span[@class = "analyze-tips mt7"]/text())
        #print(c[0])#综合评分
        d = html.xpath(//div[@class = "analyze-txt fr"]/div/div[2]/text())
        #print("短期趋势:",d[0])#短期趋势
        e = html.xpath(//div[@class = "analyze-txt fr"]/div[2]/div[2]/text())
        #print("中期趋势:",e[0])#中期趋势
        f = html.xpath(//div[@class = "analyze-txt fr"]/div[3]/div[2]/text())
        #print("远期趋势:",f[0])#远期趋势
        g = html.xpath(//div[@class = "txt-phra"]/text())
        h = html.xpath(//div[@class = "txt-phra"]/strong/text())
        i = html.xpath(//div[@class = "txt-phra"]/text()[2])
        #print(g[0],h[0],i[0])#综合评判
        m=g[0]+h[0]+i[0]
        j = html.xpath(//tr[@class = "even hot_cont"]/td[2]/text())
        k = html.xpath(//tr[@class = "even hot_cont"]/td[3]/text())
        l = html.xpath(//tr[@class = "even hot_cont"]/td[4]/text())
        #print("5日涨幅:",j[0])#5日涨幅
        #print("3个月涨幅:",k[0])#3个月涨幅
        #print("1年涨幅:",l[0])#1年涨幅
        list_name.append(b[0])#股票名称数组
        list_score.append(c[0])#综合评分
        list_Short.append(d[0])#短期趋势   
        list_Metaphase.append(e[0])#中期趋势
        list_Long.append(f[0])#长期趋势
        list_comprehensive.append(m)#综合评判
        list_day.append(j[0])#5日涨幅
        list_mouth.append(k[0])#3个月涨幅
        list_year.append(l[0])#1年涨幅

    return(list_name,list_score,list_Short,list_Metaphase,
    list_Long,list_comprehensive,list_day,list_mouth,list_year)
get=get(code)

def baidu(code):
    list_Price=[]
    list_market=[]
    for num in code:
        cookies = {
            BIDUPSID: 90EF3BD78F53BC8C96DF84CD3854CA2D,
            PSTM: 1578233930,
            BD_UPN: 12314753,
            BAIDUID: 885754C8E6BD7B1A771802631815CC6D:FG=1,
            BDORZ: B490B5EBF6F3CD402E515D22BCDA1598,
            BDUSS: mxYdVpwOEx0eGJsT3VUYTJXbkZJYWhKSGpQWnlqaVBwMlExTWNNRkR4cWtabHRlSVFBQUFBJCQAAAAAAAAAAAEAAACRJsY-cGlwacnxu7AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKTZM16k2TNeV,
            COOKIE_SESSION: 7_0_5_3_11_3_0_0_4_2_1_0_73199_0_169_0_1580456363_0_1580456194%7C9%23622712_32_1580376248%7C6,
            cflag: 13%3A3,
            BD_HOME: 1,
            BDRCVFR[feWj1Vr5u3D]: I67x6TjHwwYf0,
            delPer: 0,
            BD_CK_SAM: 1,
            PSINO: 3,
            H_PS_PSSID: 1438_21104_26350,
            H_PS_645EC: 29b8ZVy4WP7OUTz6%2FjeON9IexqLhOnMXkLTzhD5NfPu4fH%2FPZmThFknleY0LwzNQZ8j8,
            BDSVRTM: 121,
            WWW_ST: 1580466352318,
            }

        headers = {
            is_xhr: 1,
            Accept-Encoding: gzip, deflate, br,
            Accept-Language: zh-CN,zh;q=0.9,
            User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36,
            is_pbs: num,
            Accept: */*,
            Referer: https://www.baidu.com/s?wd=+num+&rsv_spt=1&rsv_iqid=0xa5a17c8700013159&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&rqlang=cn&tn=baiduhome_pg&rsv_enter=0&rsv_dl=tb&oq=+num+&rsv_t=29b8ZVy4WP7OUTz6%2FjeON9IexqLhOnMXkLTzhD5NfPu4fH%2FPZmThFknleY0LwzNQZ8j8&rsv_pq=b379448d00013935,
            X-Requested-With: XMLHttpRequest,
            Connection: keep-alive,
            is_referer: https://www.baidu.com/s?wd=+num+&rsv_spt=1&rsv_iqid=0xa5a17c8700013159&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&tn=baiduhome_pg&rsv_enter=1&rsv_dl=tb&rsv_n=2&rsv_sug3=1&rsv_sug1=1&rsv_sug7=100&rsv_sug2=0&inputT=359&rsv_sug4=359,
            }

        params = (
            (ie, [utf-8, utf-8]),
            (newi, 1),
            (mod, 1),
            (isbd, 1),
            (isid, b379448d00013935),
            (wd, num),
            (rsv_spt, 1),
            (rsv_iqid, 0xa5a17c8700013159),
            (issp, 1),
            (f, 8),
            (rsv_bp, 1),
            (rsv_idx, 2),
            (rqlang, cn),
            (tn, baiduhome_pg),
            (rsv_enter, 0),
            (rsv_dl, tb),
            (oq, num),
            (rsv_t, 29b8ZVy4WP7OUTz6/jeON9IexqLhOnMXkLTzhD5NfPu4fH/PZmThFknleY0LwzNQZ8j8),
            (rsv_pq, b379448d00013935),
            (bs, num),
            (rsv_sid, 1438_21104_26350),
            (_ss, 1),
            (clist, ‘‘),
            (hsug, ‘‘),
            (f4s, 1),
            (csor, 6),
            (_cr1, 29647),
            )

        response = requests.get(https://www.baidu.com/s, headers=headers, params=params, cookies=cookies).text
        html = etree.HTML(response)
        a = html.xpath(//span[@class = "op-stockdynamic-moretab-cur-num c-gap-right-small"]/text())
        #print(‘当前价格:‘,a[0])#当前价格
        b = html.xpath(//ul[@class = "op-stockdynamic-moretab-info"]/li[8]/span[2]/text())
        #print(‘当前市值:‘,b[0])#当前市值

        list_Price.append(a[0])#当前价格
        list_market.append(b[0])#当前市值

    return(list_Price,list_market)

baidu=baidu(code)

#读取get函数生成的股票数据,依次写入到excel文档中
xfile = openpyxl.load_workbook(path+\\stock.xlsx)#加载文件
sheet1 = xfile.worksheets[0] 
#excel中单元格为B2开始,即第2列,第2行
for i in range(len(get[0])):#股票名称
    sheet1.cell(i+2, 2).value=get[0][i]

for i in range(len(baidu[0])):#当前价格
    sheet1.cell(i+2, 3).value=baidu[0][i]

for i in range(len(baidu[1])):#当前市值
    sheet1.cell(i+2, 4).value=baidu[1][i]

for i in range(len(get[1])):#综合评分
    sheet1.cell(i+2, 5).value=get[1][i]

for i in range(len(get[2])):#短期趋势  
    sheet1.cell(i+2, 6).value=get[2][i]

for i in range(len(get[3])):#中期趋势
    sheet1.cell(i+2, 7).value=get[3][i]

for i in range(len(get[4])):#长期趋势
    sheet1.cell(i+2, 8).value=get[4][i]

for i in range(len(get[5])):#综合评判
    sheet1.cell(i+2, 9).value=get[5][i]

for i in range(len(get[6])):#5日涨幅
    sheet1.cell(i+2, 10).value=get[6][i]

for i in range(len(get[7])):#3个月涨幅
    sheet1.cell(i+2, 11).value=get[7][i]

for i in range(len(get[8])):#1年涨幅
    sheet1.cell(i+2, 12).value=get[8][i]
xfile.save(path+\\stock.xlsx)

print("爬取完成")

 

 

 

 
 

python实例:从excel读取股票代码,爬取股票信息写到代码后面的单元格中

标签:市值   for   tip   补齐   webp   aac   import   append   dup   

原文地址:https://www.cnblogs.com/becks/p/12250310.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!