用多进程来获取多个网站的源码
1 import requests 2 from multiprocessing import Pool 3 4 def get_url(url): 5 ret = requests.get(url) 6 return {‘url‘:url, 7 ‘status_code‘:ret.status_code, 8 ‘content‘:ret.text} 9 10 def parser(dic): 11 print(dic[‘url‘],dic[‘status_code‘],len(dic[‘content‘])) 12 with open(dic[‘url‘][7:],‘w‘,encoding=‘utf-8‘) as f: 13 f.write(dic[‘content‘]) 14 if __name__ == ‘__main__‘: 15 url_l = [ 16 ‘http://www.baidu.com‘, 17 ‘http://www.sogou.com‘, 18 ‘http://www.hao123.com‘, 19 ‘http://www.yangxiaoer.cc‘, 20 ‘http://www.python.org‘ 21 ] 22 p = Pool(4) 23 for url in url_l: 24 p.apply_async(get_url,args=(url,),callback=parser) 25 p.close() 26 p.join()