1 # -*- coding:utf-8 -*- 2 import requests 3 from bs4 import BeautifulSoup 4 import time 5 6 7 8 title_list=[{‘原创发布区‘:‘http://www.52pojie.cn/forum-2-1.html‘}, 9 {‘逆向资源区‘:‘http://www.52pojie.cn/forum-4-1.html‘}, 10 {‘脱壳破解区‘:‘http://www.52pojie.cn/forum-5-1.html‘}, 11 {‘动画发布区‘:‘http://www.52pojie.cn/forum-6-1.html‘}, 12 {‘悬赏问答区‘:‘http://www.52pojie.cn/forum-8-1.html‘}, 13 {‘水漫金山‘:‘http://www.52pojie.cn/forum-10-1.html‘}, 14 {‘站点公告‘:‘http://www.52pojie.cn/forum-13-1.html‘}, 15 {‘精品软件区‘:‘http://www.52pojie.cn/forum-16-1.html‘}, 16 {‘音乐视频‘:‘http://www.52pojie.cn/forum-19-1.html‘}, 17 {‘编程语言区‘:‘http://www.52pojie.cn/forum-24-1.html‘}, 18 {‘申请专区‘:‘http://www.52pojie.cn/forum-25-1.html‘}, 19 {‘LCG Area‘:‘http://www.52pojie.cn/forum-28-1.html‘}, 20 {‘病毒分析区‘:‘http://www.52pojie.cn/forum-32-1.html‘}, 21 {‘周年庆典活动专区‘:‘https://www.52pojie.cn/forum-36-1.html‘}, 22 {‘招聘求职‘:‘http://www.52pojie.cn/forum-39-1.html‘}, 23 {‘病毒样本区‘:‘http://www.52pojie.cn/forum-40-1.html‘}, 24 {‘安全工具区‘:‘http://www.52pojie.cn/forum-41-1.html‘}, 25 {‘电子书策划制作区‘:‘http://www.52pojie.cn/forum-42-1.html‘}, 26 {‘Key|Patch|共享账号‘:‘http://www.52pojie.cn/forum-44-1.html‘}, 27 {‘病毒救援区‘:‘http://www.52pojie.cn/forum-50-1.html‘}, 28 {‘影视推荐‘:‘http://www.52pojie.cn/forum-56-1.html‘}, 29 {‘LSG Area‘:‘http://www.52pojie.cn/forum-58-1.html‘}, 30 {‘软件调试区‘:‘http://www.52pojie.cn/forum-59-1.html‘}, 31 {‘T恤活动作品区‘:‘http://www.52pojie.cn/forum-62-1.html‘}, 32 {‘移动安全区‘:‘http://www.52pojie.cn/forum-65-1.html‘}, 33 {‘福利经验‘:‘http://www.52pojie.cn/forum-66-1.html‘}, 34 {‘2014CrackMe大赛‘:‘http://www.52pojie.cn/forum-67-1.html‘}, 35 {‘吾爱破解2016安全挑战赛‘:‘http://www.52pojie.cn/forum-71-1.html‘}, 36 {‘站务处理‘:‘http://www.52pojie.cn/forum-72-1.html‘}] 37 38 39 40 41 42 43 def get_html(url): 44 while True: 45 try: 46 response = requests.get(url) 47 return response.text 48 except Exception as e: 49 time.sleep(10) 50 continue 51 52 53 # 得到区域总页数 54 def get_page(url): 55 html = get_html(url) 56 soup = BeautifulSoup(html,‘lxml‘) 57 label_list =soup.find_all(‘label‘) 58 page = int(label_list[3].span.string[3:-2]) 59 return page 60 61 # 下载指定页面 62 def page_down(url): 63 64 page = get_page(url) 65 print("总页数:"+str(page)) 66 txt = input("请输入保存到的文件名(注意添加后缀):") 67 for j in range(1,page+1): 68 print(("第"+str(j)+"页下载中").center(40,"■")) 69 html = get_html(url[:-7]+‘-‘+str(j)+‘.html‘) 70 soup = BeautifulSoup(html,‘lxml‘) 71 label_list =soup.find_all(‘label‘) 72 a_list =soup.find_all(‘a‘,attrs={‘class‘:‘s xst‘}) 73 #写入到文件 74 for a in a_list: 75 #print(a.string) 76 #print("https://www.52pojie.cn/"+a.attrs[‘href‘]) 77 with open(txt,‘a+‘,encoding=‘utf-8‘) as f: 78 f.write(a.get_text()) 79 f.write(‘\n‘) 80 f.write("https://www.52pojie.cn/"+a.attrs[‘href‘]) 81 f.write(‘\n‘) 82 83 print(("第"+str(j)+"页下载完成").center(40,"■")) 84 85 def main(): 86 i = 0 87 time = 0 88 url = ‘‘ 89 # 输出列表 90 for title in title_list: 91 #print(title) 92 for key in title: 93 url = str(title[key]) 94 if time==1: 95 print((str(i)+‘:‘+key).ljust(20)) 96 time=0 97 98 else: 99 print((str(i)+‘:‘+key).ljust(20),end=" ") 100 time+=1 101 i+=1 102 103 # 判断输入是否在范围内 104 while True: 105 try: 106 print() 107 num = int(input(‘请输入你要浏览的代号:‘)) 108 if num>28 or num<0: 109 print(‘输入有误请重新输入‘) 110 continue 111 else: 112 break 113 except Exception as e: 114 print(‘输入有误请重新输入‘) 115 continue 116 # 获得区域链接 117 dict_t = title_list[num] 118 for key in dict_t: 119 print(dict_t[key]) 120 page_down(dict_t[key]) 121 122 if __name__ == ‘__main__‘: 123 main()