标签:urllib2 lte utf-8 usr find htm table parser for
#!/usr/bin/env python # -*- coding:utf-8 -*- # __author__ = ‘kiki‘ import urllib2 import time from bs4 import BeautifulSoup import sys reload(sys) sys.setdefaultencoding(‘utf8‘) def getexpiredate(domain): url = "http://tool.chinaz.com/DomainDel/?wd=" + domain #domain为txt文件装要查询的url header = {‘User-Agent‘: ‘Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)‘} request = urllib2.Request(url, None, header) response = urllib2.urlopen(request, None, timeout=30).read() soup = BeautifulSoup(response, "html.parser", from_encoding=‘utf-8‘) content = soup.find_all(‘div‘, attrs={‘class‘: ‘fr zTContrig‘}) title = ‘<tr align="center"><td>%s</td>\n‘ % domain f = open(‘DomainExpireDate.html‘, ‘a+‘) f.write(title) time.sleep(2) for div in content: for i in div.strings: table_text = "<td>%s</td>\n" % i f.write(table_text) f.write(‘</tr>\n‘) if __name__ == "__main__":
#生成html表格 with open(‘DomainExpireDate.html‘, ‘w‘) as DomainExpireDate: head = ‘<html lang="en">\n<head>\n<meta http-equiv="Content-Type" content="text/html;charset=utf-8"/>\n<title>DomainExpireDate</title>\n<style type="text/css">thead {color:green; height:50px;} tbody{color:blue; height:50px}</style></head>\n<body>\n<table width="1000" border="1" align="center">\n<caption align="top">域名过期时间表</caption>\n‘ table_head = ‘<thead><tr align="center" bgcolor="#ccc"><th>Domain</th><th>域名年龄</th><th>域名创建时间</th><th>域名过期时间</th><th>域名删除时间</th><th>删除倒计时</tr></tr></thead>\n<tbody>\n‘ DomainExpireDate.write(head) DomainExpireDate.write(table_head) with open(‘domains.txt‘, ‘r‘) as domains: for domain in domains.read().splitlines(): getexpiredate(domain) with open(‘DomainExpireDate.html‘, ‘a+‘) as DomainExpireDate: DomainExpireDate.write(‘</tbody>\n</body>\n</html>‘)
标签:urllib2 lte utf-8 usr find htm table parser for
原文地址:https://www.cnblogs.com/qiqi-yhq/p/12028326.html