标签:不可用 lxml exception try rom dai type http arp
import requests from lxml import etree url = ‘http://www.xicidaili.com/‘ headers = { ‘user-agent‘: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36‘ } response = requests.get(url, headers=headers) # with open(‘xicidaili.html‘, ‘wb‘) as f: # f.write(response.content) html_ele = etree.HTML(response.text) tr_list = html_ele.xpath(‘//table[@id="ip_list"]/tr‘) # print(tr_list) country_td = [] for tr_ele in tr_list: # print(tr_ele) country_list = tr_ele.xpath(‘./td[2]/text()‘) country_list1 = tr_ele.xpath(‘./td[3]/text()‘) # country = country_list + country_list1 # print(country) # print(type(country_list)) # print(country_list) if country_list == []: continue elif country_list1 == []: continue else: ip = country_list[0] ip_d = country_list1[0] # print(ip) # print(ip_d) # ip_list = country_list # print(ip_list) ip_1 = ip + ‘:‘ + ip_d # print(ip_1) # print(type(ip_1)) # http_t = ‘http://‘ + ip_1 proxies = ip_1 url = ‘http://www.baidu.com‘ # response = requests.get(url, headers=headers, proxies=proxies) normal_proxies = [] count= 1 print("第%s个。。" % count) count += 1 try: response = requests.get(url, headers=headers, proxies={"http": proxies}, timeout=1) if response.status_code == 200: print("该代理IP可用:", proxies) normal_proxies.append(proxies) else: print("该代理IP不可用:", proxies) except Exception: print("该代理IP无效:", proxies) pass
标签:不可用 lxml exception try rom dai type http arp
原文地址:https://www.cnblogs.com/gxsmm/p/9508373.html