标签:
代码如下:
1 import re 2 import urllib.request 3 import urllib 4 import time 5 6 from collections import deque 7 8 head = { 9 ‘Connection‘: ‘Keep-Alive‘, 10 ‘Accept‘: ‘text/html, application/xhtml+xml, */*‘, 11 ‘Accept-Language‘: ‘en-US,en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3‘, 12 ‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko‘ 13 } 14 visited = set() 15 16 url = ‘http://xlfans.com‘ # 入口页面, 可以换成别的 17 data = None 18 full_url=urllib.request.Request(url,data,head) 19 urlop = urllib.request.urlopen(full_url) 20 data = urlop.read().decode(‘utf-8‘) 21 temp = re.search(r‘href=\"http://xlfans.com/archives/(.{4})\" class=\"thumbnail\">(.*) alt=\"迅雷粉 (.*) 迅雷会员账号分享 共享中‘, data, re.M|re.I) 22 result = re.search(r‘href=\"http://xlfans.com/archives/(.{4})‘, temp.group(), re.M|re.I) 23 url = url + "/archives/" + temp.group(1) 24 data = None 25 full_url=urllib.request.Request(url,data,head) 26 urlop = urllib.request.urlopen(full_url) 27 data = urlop.read().decode(‘utf-8‘) 28 save_path = ‘D:\\Program Files\\python\\test.txt‘ 29 f_obj = open(save_path, ‘w‘) 30 #获取系统时间,来判断是否为周末 31 cur_day = time.strftime("%w",time.localtime(time.time())) 32 if(cur_day == ‘5‘): 33 string = "迅雷粉周末迅雷会员账号" 34 elif cur_day == ‘6‘: 35 string = "迅雷粉周末迅雷会员账号" 36 print(cur_day) 37 else: 38 string = "迅雷粉专享迅雷会员账号" 39 #娘的,是你逼我的 40 start = data.find(string) 41 data = data[start:] 42 data_que = data.split("</p>") 43 count = 0 44 for i in range(3): 45 data_temp = data_que[i] 46 num = -1 47 acc_que = data_temp.split("<br />") 48 for result in acc_que: 49 num = num + 1 50 if(count != 0): 51 if(num == 0): 52 continue 53 f_obj.write(result) 54 f_obj.write("\n") 55 count = count + 1 56 f_obj.close() 57
标签:
原文地址:http://www.cnblogs.com/kingatnuaa/p/4802715.html