码迷,mamicode.com
首页 > 编程语言 > 详细

单线程爬虫VS多线程爬虫的效率对比

时间:2016-04-30 01:01:15      阅读:645      评论:0      收藏:0      [点我收藏+]

标签:

单线程爬虫:

import re
import requests
import time

url_EB = ‘http://www.amazon.com/gp/search/other/ref=sr_sa_p_4?me=A22XNR713HGDVG&rh=n%3A9063592011%2Ck%3Aprojector&bbn=9063592011&keywords=projector&pickerToList=brandtextbin&ie=UTF8&qid=1461902521‘
headers_EB = {‘User-Agent‘:‘Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.86 Safari/537.36‘}


url_AML = ‘‘‘https://www.amazon.com/gp/search/other/ref=sr_sa_p_4?me=A3UJI9WWE6PRP5&rh=i%3Amerchant-items
&pickerToList=brandtextbin&ie=UTF8&qid=1461899728‘‘‘
headers_AML ={‘User-Agent‘:‘Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.86 Safari/537.36‘}


url_DL= ‘https://www.amazon.com/gp/search/other/ref=sr_sa_p_4?me=AS7ZU4MN0FPOY&rh=i%3Amerchant-items&pickerToList=brandtextbin&ie=UTF8&qid=1461901862‘
headers_DL = {‘User-Agent‘:‘Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.86 Safari/537.36‘}






name = {‘a‘:‘ExclusiveBulbs‘,
        ‘b‘:‘Amazing Lamps‘,
        ‘c‘:‘Dynamic Lamps‘}

# listing_count = re.findall(‘<span class="narrowValue">(.*?)</span‘,data.text)
# f = dict(map(lambda x,y:[x,y],store_name,listing_count))
#
# for k,v in f.items():
#     print(k,v)







def foo_one(url,headers,name):
    print(‘--------------------------开始爬去{0}at{1}---------------------------‘.format(name,time.ctime()))

    response = requests.get(url,headers=headers)
    store_name = re.findall(‘<span class="refinementLink">(.*?)</span><span class="narrowValue">(.*?)</span‘,response.text)
    for i in store_name:
        print(i)
    print(‘--------------------------爬去完毕at{}----------------------------‘.format(time.ctime()))
    time.sleep(1)






if __name__ == ‘__main__‘:
    foo_one(url_EB,headers_EB,name[‘a‘])
    foo_one(url_AML,headers_AML,name[‘b‘])
    foo_one(url_DL,headers_DL,name[‘c‘])

输出:00:25:33开始,00:26:02结束 耗时29秒

--------------------------开始爬去ExclusiveBulbsatSat Apr 30 00:25:33 2016---------------------------
(‘A.Shine‘, ‘ (97)‘)
(‘AmpacElectronics‘, ‘ (1,644)‘)
(‘AuraBeam‘, ‘ (33,084)‘)
(‘AWO‘, ‘ (1,206)‘)
(‘Battery1inc‘, ‘ (694)‘)
(‘Comoze Lamps‘, ‘ (6,172)‘)
(‘Compatible Lamp‘, ‘ (317)‘)
(‘Corgi Lamps‘, ‘ (2,124)‘)
(‘CTLAMP‘, ‘ (3,499)‘)
(‘Dell‘, ‘ (191)‘)
(‘Diamond Lamps‘, ‘ (966)‘)
(‘Dynamic‘, ‘ (4)‘)
(‘Eiki‘, ‘ (460)‘)
(‘ePharos‘, ‘ (2,592)‘)
(‘Epson‘, ‘ (1,456)‘)
(‘EREPLACEMENT‘, ‘ (115)‘)
(‘eReplacements‘, ‘ (814)‘)
(‘eWo‘s‘, ‘ (120)‘)
(‘eWorldlamp‘, ‘ (354)‘)
(‘FI Lamps‘, ‘ (5,707)‘)
(‘FL Projector Lamp For Mitsubishi‘, ‘ (1)‘)
(‘For Epson‘, ‘ (3)‘)
(‘Generic‘, ‘ (9,769)‘)
(‘Good Lamp‘, ‘ (819)‘)
(‘HCDZ‘, ‘ (2,746)‘)
(‘Hitachi‘, ‘ (935)‘)
(‘IET Lamps‘, ‘ (2,144)‘)
(‘InFocus‘, ‘ (44)‘)
(‘JVC‘, ‘ (326)‘)
(‘KCL‘, ‘ (3,781)‘)
(‘Lampedia‘, ‘ (618)‘)
(‘Lutema‘, ‘ (1,956)‘)
(‘Mitsubishi‘, ‘ (1,006)‘)
(‘Mogobe‘, ‘ (1,335)‘)
(‘MyProjectorLamps‘, ‘ (473)‘)
(‘NEC‘, ‘ (446)‘)
(‘Nec Computers‘, ‘ (13)‘)
(‘Optoma‘, ‘ (956)‘)
(‘Osram Sylvania‘, ‘ (78)‘)
(‘Panasonic‘, ‘ (820)‘)
(‘Philips‘, ‘ (7,502)‘)
(‘Powerwarehouse‘, ‘ (9,971)‘)
(‘Projector Lamps World‘, ‘ (112)‘)
(‘Pureglare‘, ‘ (369)‘)
(‘Samsung‘, ‘ (1,078)‘)
(‘Sharp‘, ‘ (426)‘)
(‘Shopforbattery‘, ‘ (2,510)‘)
(‘SMART BOARD‘, ‘ (66)‘)
(‘Sony‘, ‘ (990)‘)
(‘TVLampsforless‘, ‘ (14)‘)
(‘Unknown‘, ‘ (722)‘)
--------------------------爬去完毕atSat Apr 30 00:25:57 2016----------------------------
--------------------------开始爬去Amazing LampsatSat Apr 30 00:25:58 2016---------------------------
(‘AWO‘, ‘ (1)‘)
(‘Comoze Lamps‘, ‘ (2)‘)
(‘DNGO‘, ‘ (8)‘)
(‘Electrified‘, ‘ (9)‘)
(‘ELECTRIFIED‘, ‘ (10)‘)
(‘Electrified Discounters‘, ‘ (5)‘)
(‘ELECTRIFIED LAMPS‘, ‘ (1,177)‘)
(‘ELECTRIFIED PRINTHEAD‘, ‘ (24)‘)
(‘ELECTRIFIED PRINTHEADS‘, ‘ (2)‘)
(‘FI Lamps‘, ‘ (2)‘)
(‘Generic‘, ‘ (34)‘)
(‘GloWatt‘, ‘ (1)‘)
(‘KCL‘, ‘ (1)‘)
(‘OEM‘, ‘ (1)‘)
(‘Powerwarehouse‘, ‘ (7)‘)
(‘SKU‘, ‘ (5)‘)
(‘Top Lamp‘, ‘ (1)‘)
(‘Unknown‘, ‘ (1)‘)
(‘USOM‘, ‘ (3)‘)
--------------------------爬去完毕atSat Apr 30 00:26:00 2016----------------------------
--------------------------开始爬去Dynamic LampsatSat Apr 30 00:26:01 2016---------------------------
(‘Battery1inc‘, ‘ (85)‘)
(‘BenQ‘, ‘ (237)‘)
(‘Buslink‘, ‘ (31)‘)
(‘Calumet‘, ‘ (2)‘)
(‘Comoze Lamps‘, ‘ (405)‘)
(‘CTLAMP‘, ‘ (615)‘)
(‘Dell‘, ‘ (82)‘)
(‘Divine Lighting‘, ‘ (36)‘)
(‘DNGO‘, ‘ (63)‘)
(‘Dynamic‘, ‘ (4)‘)
(‘Eiko‘, ‘ (140)‘)
(‘Electrified‘, ‘ (2)‘)
(‘ELECTRIFIED LAMPS‘, ‘ (24)‘)
(‘Electronix Xpress‘, ‘ (418)‘)
(‘ePharos‘, ‘ (502)‘)
(‘Epson‘, ‘ (631)‘)
(‘eReplacements‘, ‘ (119)‘)
(‘FI Lamps‘, ‘ (505)‘)
(‘FL Projector Lamp For Mitsubishi‘, ‘ (1)‘)
(‘G-lamps‘, ‘ (43)‘)
(‘GE‘, ‘ (248)‘)
(‘GE Lighting‘, ‘ (152)‘)
(‘General Electric‘, ‘ (53)‘)
(‘Generic‘, ‘ (1,671)‘)
(‘Genie‘, ‘ (101)‘)
(‘GLAMPS‘, ‘ (2)‘)
(‘Impact‘, ‘ (7)‘)
(‘Industrial Lighting Solutions‘, ‘ (9)‘)
(‘KCL‘, ‘ (280)‘)
(‘Kodak‘, ‘ (1)‘)
(‘Lampedia‘, ‘ (63)‘)
(‘M-Wave‘, ‘ (830)‘)
(‘Mitsubishi‘, ‘ (406)‘)
(‘Mitsubishi DLP TV Bulbs‘, ‘ (29)‘)
(‘Mocpinc‘, ‘ (10)‘)
(‘MyProjectorLamps‘, ‘ (344)‘)
(‘Nec‘, ‘ (19)‘)
(‘Optoma‘, ‘ (161)‘)
(‘Osram‘, ‘ (1,295)‘)
(‘Panasonic‘, ‘ (245)‘)
(‘Philips‘, ‘ (988)‘)
(‘Powerwarehouse‘, ‘ (239)‘)
(‘Projector Lamps World‘, ‘ (45)‘)
(‘Pureglare‘, ‘ (107)‘)
(‘Samsung‘, ‘ (323)‘)
(‘ShopJimmy‘, ‘ (3)‘)
(‘Sony‘, ‘ (141)‘)
(‘Sylvania‘, ‘ (115)‘)
(‘Technical Precision‘, ‘ (10)‘)
(‘Unknown‘, ‘ (167)‘)
(‘Welch Allyn Compatible‘, ‘ (1)‘)
--------------------------爬去完毕atSat Apr 30 00:26:02 2016----------------------------

多线程:00:32:37开始00:32:39结束 耗时2秒

import re
import requests

import threading
import time
from time import ctime,sleep

url_EB = ‘http://www.amazon.com/gp/search/other/ref=sr_sa_p_4?me=A22XNR713HGDVG&rh=n%3A9063592011%2Ck%3Aprojector&bbn=9063592011&keywords=projector&pickerToList=brandtextbin&ie=UTF8&qid=1461902521‘
headers_EB = {‘User-Agent‘:‘Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.86 Safari/537.36‘}


url_AML = ‘‘‘https://www.amazon.com/gp/search/other/ref=sr_sa_p_4?me=A3UJI9WWE6PRP5&rh=i%3Amerchant-items
&pickerToList=brandtextbin&ie=UTF8&qid=1461899728‘‘‘
headers_AML ={‘User-Agent‘:‘Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.86 Safari/537.36‘}


url_DL= ‘https://www.amazon.com/gp/search/other/ref=sr_sa_p_4?me=AS7ZU4MN0FPOY&rh=i%3Amerchant-items&pickerToList=brandtextbin&ie=UTF8&qid=1461901862‘
headers_DL = {‘User-Agent‘:‘Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.86 Safari/537.36‘}






name = {‘a‘:‘ExclusiveBulbs‘,
        ‘b‘:‘Amazing Lamps‘,
        ‘c‘:‘Dynamic Lamps‘}

# listing_count = re.findall(‘<span class="narrowValue">(.*?)</span‘,data.text)
# f = dict(map(lambda x,y:[x,y],store_name,listing_count))
#
# for k,v in f.items():
#     print(k,v)







def foo_one(url,headers,name):
    print(‘--------------------------开始爬去{0}at{1}---------------------------‘.format(name,time.ctime()))

    response = requests.get(url,headers=headers)
    store_name = re.findall(‘<span class="refinementLink">(.*?)</span><span class="narrowValue">(.*?)</span‘,response.text)
    for i in store_name:
        print(i)
    print(‘--------------------------爬去完毕{0}at{1}----------------------------‘.format(name,time.ctime()))














threads = []
t1 = threading.Thread(target=foo_one,args=(url_EB,headers_EB,name[‘a‘]))
threads.append(t1)
t2 = threading.Thread(target=foo_one,args=(url_AML,headers_AML,name[‘b‘]))
threads.append(t2)
t3 = threading.Thread(target=foo_one,args=(url_DL,headers_DL,name[‘c‘]))
threads.append(t3)

if __name__ == ‘__main__‘:
    for t in threads:
        t.setDaemon(True)
        t.start()
    t.join()

    print ("all over %s" %ctime())

输出:

--------------------------开始爬去ExclusiveBulbsatSat Apr 30 00:32:37 2016---------------------------
--------------------------开始爬去Amazing LampsatSat Apr 30 00:32:37 2016---------------------------
--------------------------开始爬去Dynamic LampsatSat Apr 30 00:32:37 2016---------------------------
(‘A.Shine‘, ‘ (97)‘)
(‘AmpacElectronics‘, ‘ (1,645)‘)
(‘AuraBeam‘, ‘ (33,088)‘)
(‘AWO‘, ‘ (1,209)‘)
(‘Battery1inc‘, ‘ (694)‘)
(‘Comoze Lamps‘, ‘ (6,172)‘)
(‘Compatible Lamp‘, ‘ (317)‘)
(‘Corgi Lamps‘, ‘ (2,123)‘)
(‘CTLAMP‘, ‘ (3,501)‘)
(‘Dell‘, ‘ (191)‘)
(‘Diamond Lamps‘, ‘ (966)‘)
(‘Dynamic‘, ‘ (4)‘)
(‘Eiki‘, ‘ (457)‘)
(‘ePharos‘, ‘ (2,592)‘)
(‘Epson‘, ‘ (1,456)‘)
(‘EREPLACEMENT‘, ‘ (115)‘)
(‘eReplacements‘, ‘ (813)‘)
(‘eWo‘s‘, ‘ (120)‘)
(‘eWorldlamp‘, ‘ (354)‘)
(‘FI Lamps‘, ‘ (5,710)‘)
(‘FL Projector Lamp For Mitsubishi‘, ‘ (1)‘)
(‘For Epson‘, ‘ (3)‘)
(‘Generic‘, ‘ (9,771)‘)
(‘Good Lamp‘, ‘ (819)‘)
(‘HCDZ‘, ‘ (2,748)‘)
(‘Hitachi‘, ‘ (935)‘)
(‘IET Lamps‘, ‘ (2,137)‘)
(‘InFocus‘, ‘ (44)‘)
(‘JVC‘, ‘ (326)‘)
(‘KCL‘, ‘ (3,783)‘)
(‘Lampedia‘, ‘ (618)‘)
(‘Lutema‘, ‘ (1,955)‘)
(‘Mitsubishi‘, ‘ (1,006)‘)
(‘Mogobe‘, ‘ (1,336)‘)
(‘MyProjectorLamps‘, ‘ (473)‘)
(‘NEC‘, ‘ (450)‘)
(‘Nec Computers‘, ‘ (13)‘)
(‘Optoma‘, ‘ (956)‘)
(‘Osram Sylvania‘, ‘ (78)‘)
(‘Panasonic‘, ‘ (820)‘)
(‘Philips‘, ‘ (7,502)‘)
(‘Powerwarehouse‘, ‘ (9,972)‘)
(‘Projector Lamps World‘, ‘ (112)‘)
(‘Pureglare‘, ‘ (369)‘)
(‘Samsung‘, ‘ (1,078)‘)
(‘Sharp‘, ‘ (426)‘)
(‘Shopforbattery‘, ‘ (2,511)‘)
(‘SMART BOARD‘, ‘ (66)‘)
(‘Sony‘, ‘ (990)‘)
(‘TVLampsforless‘, ‘ (14)‘)
(‘Unknown‘, ‘ (722)‘)
--------------------------爬去完毕ExclusiveBulbsatSat Apr 30 00:32:38 2016----------------------------
(‘Battery1inc‘, ‘ (85)‘)
(‘BenQ‘, ‘ (237)‘)
(‘Buslink‘, ‘ (31)‘)
(‘Calumet‘, ‘ (2)‘)
(‘Comoze Lamps‘, ‘ (405)‘)
(‘CTLAMP‘, ‘ (615)‘)
(‘Dell‘, ‘ (82)‘)
(‘Divine Lighting‘, ‘ (36)‘)
(‘DNGO‘, ‘ (63)‘)
(‘Dynamic‘, ‘ (4)‘)
(‘Eiko‘, ‘ (140)‘)
(‘Electrified‘, ‘ (2)‘)
(‘ELECTRIFIED LAMPS‘, ‘ (24)‘)
(‘Electronix Xpress‘, ‘ (418)‘)
(‘ePharos‘, ‘ (502)‘)
(‘Epson‘, ‘ (631)‘)
(‘eReplacements‘, ‘ (119)‘)
(‘FI Lamps‘, ‘ (505)‘)
(‘FL Projector Lamp For Mitsubishi‘, ‘ (1)‘)
(‘G-lamps‘, ‘ (43)‘)
(‘GE‘, ‘ (248)‘)
(‘GE Lighting‘, ‘ (152)‘)
(‘General Electric‘, ‘ (53)‘)
(‘Generic‘, ‘ (1,671)‘)
(‘Genie‘, ‘ (101)‘)
(‘GLAMPS‘, ‘ (2)‘)
(‘Impact‘, ‘ (7)‘)
(‘Industrial Lighting Solutions‘, ‘ (9)‘)
(‘KCL‘, ‘ (280)‘)
(‘Kodak‘, ‘ (1)‘)
(‘Lampedia‘, ‘ (63)‘)
(‘M-Wave‘, ‘ (830)‘)
(‘Mitsubishi‘, ‘ (406)‘)
(‘Mitsubishi DLP TV Bulbs‘, ‘ (29)‘)
(‘Mocpinc‘, ‘ (10)‘)
(‘MyProjectorLamps‘, ‘ (344)‘)
(‘Nec‘, ‘ (19)‘)
(‘Optoma‘, ‘ (161)‘)
(‘Osram‘, ‘ (1,295)‘)
(‘Panasonic‘, ‘ (245)‘)
(‘Philips‘, ‘ (988)‘)
(‘Powerwarehouse‘, ‘ (239)‘)
(‘Projector Lamps World‘, ‘ (45)‘)
(‘Pureglare‘, ‘ (107)‘)
(‘Samsung‘, ‘ (323)‘)
(‘ShopJimmy‘, ‘ (3)‘)
(‘Sony‘, ‘ (141)‘)
(‘Sylvania‘, ‘ (115)‘)
(‘Technical Precision‘, ‘ (10)‘)
(‘Unknown‘, ‘ (167)‘)
(‘Welch Allyn Compatible‘, ‘ (1)‘)
--------------------------爬去完毕Dynamic LampsatSat Apr 30 00:32:39 2016----------------------------
all over Sat Apr 30 00:32:39 2016

  

单线程爬虫VS多线程爬虫的效率对比

标签:

原文地址:http://www.cnblogs.com/alan-babyblog/p/5447946.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!