标签:sel for col headers star html ports 日志 ignore
import requests import time import threading from queue import Queue import random import sys def getheaders(): user_agent_list = [ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1" "Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6", "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6", "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1" ] UserAgent = random.choice(user_agent_list) #随机选择头部 headers = {"User-Agent": UserAgent} return headers class Logger(object): #这里一段是把日志写txt的 可以直接复制的 def __init__(self, filename="Default.log"): self.terminal = sys.stdout self.log = open(filename, "a",errors=‘ignore‘) def write(self, message): self.terminal.write(message) self.log.write(message) def flush(self): pass sys.stdout = Logger(‘IpProxy/spiders/a.txt‘) #提取日志到指定目录 def is_enable(ip_port): proxies = { "http": "http://" + ip_port, "https": "http://" + ip_port, } try: res = requests.get(‘https://www.baidu.com/‘, headers=getheaders(), proxies=proxies, timeout=2) #t = "‘"+ip_port+"‘" print(ip_port) except Exception as e: fh = open("testfile", "w") fh.write("这是一个测试文件,用于测试异常!!")#当跳过用 def main(): ips_ports_list = [] # 提取IP with open(‘IpProxy/spiders/IP.txt‘) as f: for line in f: ips_ports_list.append(line.strip()) t_list = [] for ip_port in ips_ports_list: t = threading.Thread(target=is_enable, args=(ip_port,)) t.start() t_list.append(t) for t in t_list: t.join() if __name__ == ‘__main__‘: main()
标签:sel for col headers star html ports 日志 ignore
原文地址:https://www.cnblogs.com/aotumandaren/p/13663725.html