标签:for star .text 守护 无法 ade ipo requests 信号
class BdSpider(threading.Thread): """搜狗移动相关词获取爬虫""" def __init__(self, waiting): super(BdSpider, self).__init__() self.waiting = waiting self.headers = { ‘User-Agent‘: "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Mobile Safari/537.36" } def run(self): flag = True while flag: ipone = self.waiting.get() error = self.get_html(ipone) if error == -1: flag = False # self.waiting.task_done() def get_html(self, ipone,num_retries=3): global allipus global error_counter error_counter = 0 try: ip = {"http":"http://"+str(ipone) , "https":"http://"+str(ipone)} requests.get(‘https://m.sogou.com/‘, proxies=ip , timeout = 3 , headers = self.headers) except: error_counter += 1 if num_retries > 0: return self.get_html(ipone , num_retries - 1) else: if str(ipone) not in str(allipus): print("保留ip:" + str(ipone)) allipus.append(ipone) error_counter = 0 if error_counter > 50: return -1
#更新获取ip def ipAction(ipsss): ipall = [] if ipsss: for iss in ipsss: if iss: ipall.append(iss) url = "http://api.ip.data5u.com/api/get.shtml?order=194469bc039df0eb7580e09541aa0624&num=20&carrier=0&protocol=2&an1=1&an2=2&an3=3&sp1=1&sp2=2&sp3=3&sort=1&system=1&distinct=0&rettype=1&seprator=%0A" for num in range(5): time.sleep(10) sss = requests.get(url) soup = BeautifulSoup(sss.text ,‘html.parser‘) guoneiip = str(soup).split("\n") for ip in guoneiip: if ip: ipall.append(ip) wait_list = Queue() thread_num = 20 for keyip in ipall: if keyip: keyip = keyip.strip()#去除空格 wait_list.put(keyip)#往Queue添加 for i in range(thread_num): thread = BdSpider(wait_list) thread.setDaemon(True)#设置守护进程 thread.start() wait_list.join() return allipus allipus = []
传入一个
self.waiting.task_done()此行注释程序会一直卡死,无限挂起状态。此时被认为线程没结束,被 .join()。守护。主线程结束不了
原文参考http://www.vuln.cn/8610
多线程中的 Queue队列中join()与task_done() ,
标签:for star .text 守护 无法 ade ipo requests 信号
原文地址:https://www.cnblogs.com/dahuag/p/9198618.html