码迷,mamicode.com
首页 > 编程语言 > 详细

python-股票数据定向爬取

时间:2017-04-04 09:38:38      阅读:460      评论:0      收藏:0      [点我收藏+]

标签:turn   .text   ***   err   fine   att   awb   encoding   rom   

re.findall

soup.find_all

---------Q----

for i in ***:

***可以是什么类型,主要是关心什么类型的不可以

------------trackback用法------------

>>>
>>> import traceback
>>> try:
... 1/0
... except (Exception,e):
... traceback.print_exc()
...
Traceback (most recent call last):
File "<stdin>", line 2, in <module>
ZeroDivisionError: division by zero

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "<stdin>", line 3, in <module>
NameError: name ‘e‘ is not defined

(说是这样报错更加直观

 http://blog.csdn.net/handsomekang/article/details/9373035

 

 

--------------有毛病,睡了--------------

  1. #CrawBaiduStocksB.py
  2. import requests
  3. from bs4 import BeautifulSoup
  4. import traceback
  5. import re
  6.  
  7. def getHTMLText(url, code="utf-8"):
  8.     try:
  9.         requests.get(url)
  10.         r.raise_for_status()
  11.         r.encoding code
  12.         return r.text
  13.     except:
  14.         return ""
  15.  
  16. def getStockList(lst, stockURL):
  17.     html getHTMLText(stockURL, "GB2312")
  18.     soup BeautifulSoup(html, ‘html.parser‘
  19.     soup.find_all(‘a‘)
  20.     for in a:
  21.         try:
  22.             href i.attrs[‘href‘]
  23.             lst.append(re.findall(r"[s][hz]\d{6}", href)[0])
  24.         except:
  25.             continue
  26.  
  27. def getStockInfo(lst, stockURL, fpath):
  28.     count 0
  29.     for stock in lst:
  30.         url stockURL stock ".html"
  31.         html getHTMLText(url)
  32.         try:
  33.             if html=="":
  34.                 continue
  35.             infoDict {}
  36.             soup BeautifulSoup(html, ‘html.parser‘)
  37.             stockInfo soup.find(‘div‘,attrs={‘class‘:‘stock-bets‘})
  38.  
  39.             name stockInfo.find_all(attrs={‘class‘:‘bets-name‘})[0]
  40.             infoDict.update({‘股票名称‘: name.text.split()[0]})
  41.              
  42.             keyList stockInfo.find_all(‘dt‘)
  43.             valueList stockInfo.find_all(‘dd‘)
  44.             for in range(len(keyList)):
  45.                 key keyList[i].text
  46.                 val valueList[i].text
  47.                 infoDict[key] val
  48.              
  49.             with open(fpath, ‘a‘, encoding=‘utf-8‘) as f:
  50.                 f.write( str(infoDict) ‘\n‘ )
  51.                 count count 1
  52.                 print("\r当前进度: {:.2f}%".format(count*100/len(lst)),end="")
  53.         except:
  54.             count count 1
  55.             print("\r当前进度: {:.2f}%".format(count*100/len(lst)),end="")
  56.             continue
  57.  
  58. def main():
  59.     stock_list_url ‘http://quote.eastmoney.com/stocklist.html‘
  60.     stock_info_url ‘https://gupiao.baidu.com/stock/‘
  61.     output_file ‘D:/BaiduStockInfo.txt‘
  62.     slist=[]
  63.     getStockList(slist, stock_list_url)
  64.     getStockInfo(slist, stock_info_url, output_file)
  65.  
  66. main()
  67. ---------------------------------------------
    1. #CrawBaiduStocksA.py
    2. import requests
    3. from bs4 import BeautifulSoup
    4. import traceback
    5. import re
    6.  
    7. def getHTMLText(url):
    8.     try:
    9.         requests.get(url)
    10.         r.raise_for_status()
    11.         r.encoding r.apparent_encoding
    12.         return r.text
    13.     except:
    14.         return ""
    15.  
    16. def getStockList(lst, stockURL):
    17.     html getHTMLText(stockURL)
    18.     soup BeautifulSoup(html, ‘html.parser‘
    19.     soup.find_all(‘a‘)
    20.     for in a:
    21.         try:
    22.             href i.attrs[‘href‘]
    23.             lst.append(re.findall(r"[s][hz]\d{6}", href)[0])
    24.         except:
    25.             continue
    26.  
    27. def getStockInfo(lst, stockURL, fpath):
    28.     for stock in lst:
    29.         url stockURL stock ".html"
    30.         html getHTMLText(url)
    31.         try:
    32.             if html=="":
    33.                 continue
    34.             infoDict {}
    35.             soup BeautifulSoup(html, ‘html.parser‘)
    36.             stockInfo soup.find(‘div‘,attrs={‘class‘:‘stock-bets‘})
    37.  
    38.             name stockInfo.find_all(attrs={‘class‘:‘bets-name‘})[0]
    39.             infoDict.update({‘股票名称‘: name.text.split()[0]})
    40.              
    41.             keyList stockInfo.find_all(‘dt‘)
    42.             valueList stockInfo.find_all(‘dd‘)
    43.             for in range(len(keyList)):
    44.                 key keyList[i].text
    45.                 val valueList[i].text
    46.                 infoDict[key] val
    47.              
    48.             with open(fpath, ‘a‘, encoding=‘utf-8‘) as f:
    49.                 f.write( str(infoDict) ‘\n‘ )
    50.         except:
    51.             traceback.print_exc()
    52.             continue
    53.  
    54. def main():
    55.     stock_list_url ‘http://quote.eastmoney.com/stocklist.html‘
    56.     stock_info_url ‘https://gupiao.baidu.com/stock/‘
    57.     output_file ‘D:/BaiduStockInfo.txt‘
    58.     slist=[]
    59.     getStockList(slist, stock_list_url)
    60.     getStockInfo(slist, stock_info_url, output_file)
    61.  
    62. main()
  68. -------------------学校这破网,让我开始怀疑人生了-------吃屎,强力吃屎,学校吃屎了?ta为什么要吃屎呢?-------

python-股票数据定向爬取

标签:turn   .text   ***   err   fine   att   awb   encoding   rom   

原文地址:http://www.cnblogs.com/wanghui626/p/6664242.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!