码迷,mamicode.com
首页 > 编程语言 > 详细

python多线程爬虫:亚马逊价格

时间:2016-05-03 23:52:12      阅读:256      评论:0      收藏:0      [点我收藏+]

标签:

import re
import requests

import threading
import time
from time import ctime,sleep

from queue import Queue


keywords_a=[
    ‘ELPLP80‘,
    ‘ELPLP23‘,
    ‘ELPLP29‘,
    ‘NP14LP‘,
    ‘POA-LMP126‘,
    ‘ELPLP66‘,
]



keywords_b=[
    ‘VIP230W0.8E20.8‘,
    ‘VIP240W0.8E20.9N‘,
    ‘NP30LP‘,
    ‘LMP-C162‘,
    ‘VT70LP‘,
]



keywords_c= [
    ‘TLPLV4‘,
    ‘POA-LMP131‘,
    ‘BL-FP240A‘,
    ‘VLT-XD3200LP‘,
    ‘ET-LAD35‘,
    ‘BL-FU240A‘,
    ‘20-01032-20‘,


]

keywords_d =[
    ‘ELPLP76‘,
    ‘VLT-HC3800LP‘,
    ‘BL-FP240C‘,
    ‘5811116765-S‘,
    ‘ELPLP69‘,
    ‘BL-FP200H‘,

]


keywords_e = [
    ‘5100MP‘,
    ‘RLC-057‘,
    ‘ELPLP71‘,
    ‘ELPLP64‘,
    ‘BL-FS300B‘,
]


Re_rule = ‘<span class="a-size-base a-color-price s-price a-text-bold">(.*?)</span>‘  #正则表达式匹配价格

headers_am = {‘User-Agent‘:‘Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.86 Safari/537.36‘}
#浏览器页面headers
def Scraper(kw):
    for i in kw:
        url_keyword = ‘https://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords={}‘.format(i)
#亚马逊页面链接 url_Epharos = ‘http://www.amazon.com/s/ref=nb_sb_noss?url=srs%3D9143518011%26search-alias%3Dspecialty-aps&field-keywords={}‘.format(i) #店铺链接 response = requests.get(url_keyword,headers=headers_am) #requests.get(url,params=params) 可以提交表单用request.post(url,params=params)
response_a = requests.get(url_Epharos,headers=headers_am) price = re.findall(Re_rule,response.text) price_e = re.findall(Re_rule,response_a.text) print(‘--------------------------{0}爬去完毕at{1}j结果:\n\n市场价:{2}\n\nEpharos:{3}‘.format(i,time.ctime(),price[0],price_e[0])) time.sleep(1) threads = [] t1 = threading.Thread(target=Scraper,args=(keywords_a,)) #args为元祖 threads.append(t1) t2 = threading.Thread(target=Scraper,args=(keywords_b,)) threads.append(t2) t3 = threading.Thread(target=Scraper,args=(keywords_c,)) threads.append(t3) t4 = threading.Thread(target=Scraper,args=(keywords_d,)) threads.append(t4) t5 = threading.Thread(target=Scraper,args=(keywords_e,)) threads.append(t5) if __name__ == ‘__main__‘: for t in threads: t.setDaemon(True) #守护线程 t.start() for t in threads: t.join() #等待子线程 print ("all over %s" %ctime())

  

python多线程爬虫:亚马逊价格

标签:

原文地址:http://www.cnblogs.com/alan-babyblog/p/5456875.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!