标签:img adp isp .sh 线程池 url done ade imp
代码
在编写爬虫时,性能的消耗主要在IO请求中,当单进程单线程模式下请求URL时必然会引起等待,从而使得请求整体变慢。
import requests from concurrent.futures import ThreadPoolExecutor #引入线程池模块 def asyns_url(url): try: response = requests.get(url) except Exception as e: print(‘异常结果‘,response.url,response.content) print(‘获取结果‘, response.url, response.content) url_list ={ ‘http://www.baidu.com‘, ‘http://www/google.com‘, ‘http://dig.chouti.com‘, ‘http://www.bing.com‘ } pool =ThreadPoolExecutor(5) for url in url_list: print(‘开始请求‘,url) pool.submit(asyns_url,url) pool.shutdown(wait=True)#终止线程
from concurrent.futures import ProcessPoolExecutor import requests def fetch_async(url): response = requests.get(url) return response url_list = [‘http://www.github.com‘, ‘http://www.bing.com‘] pool = ProcessPoolExecutor(5) for url in url_list: pool.submit(fetch_async, url) pool.shutdown(wait=True) 3.多进程执行
from concurrent.futures import ProcessPoolExecutor import requests def fetch_async(url): response = requests.get(url) return response def callback(future): print(future.result()) url_list = [‘http://www.github.com‘, ‘http://www.bing.com‘] pool = ProcessPoolExecutor(5) for url in url_list: v = pool.submit(fetch_async, url) v.add_done_callback(callback) pool.shutdown(wait=True) 3.多进程+回调函数执行
标签:img adp isp .sh 线程池 url done ade imp
原文地址:http://www.cnblogs.com/Mjonj/p/7746335.html