码迷,mamicode.com
首页 > 编程语言 > 详细

scrapy_novel_python

时间:2018-03-22 13:40:09      阅读:173      评论:0      收藏:0      [点我收藏+]

标签:show   style   .text   loader   nbsp   write   .com   class   开始   

 1 # _*_ coding:UTF _8_
 2 from bs4 import BeautifulSoup
 3 import requests,sys
 4 class downloader(object):
 5     def __init__(self):
 6         self.server = http://www.biqukan.com/
 7         self.target = http://www.biqukan.com//1_1094/
 8         self.names = []
 9         self.urls = []
10         self.nums = 0
11     #获取下载链接
12     def get_download_url(self):
13         reg = requests.get(url = self.target)
14         html = reg.text
15         div_bf = BeautifulSoup(html)
16         div = div_bf.find_all(div, class_ = listmain)
17         a_bf = BeautifulSoup(str(div[0]))
18         a = a_bf.find_all(a)
19         self.nums = len(a[15:])
20         for each in a[15:]:
21             self.names.append(each.string)
22             self.urls.append(self.server + each.get(href))
23     #获取章节内容
24     def get_contents(self,target):
25         req = requests.get(url = target)
26         html = req.text
27         bf = BeautifulSoup(html)
28         texts = bf.find_all(div,class_ = showtxt)
29         texts = texts[0].text.replace(\xa0*8,\n\n)
30         return texts
31     #写入文件
32     def writer(self,name,path,text):
33         write_flag = True
34         with open(path,a,encoding=utf-8) as f:
35             f.write(name + \n)
36             f.writelines(text)
37             f.write(\n\n)
38 if __name__ == __main__:
39     dl = downloader()
40     dl.get_download_url()
41     print(《一念永恒》开始下载:)
42     for i in range(dl.nums):
43         dl.writer(dl.names[i],一念永恒.txt,dl.get_contents(dl.urls[i]))
44         sys.stdout.write("已下载:%.3f%%" % float(i/dl.nums) + \r)
45         sys.stdout.flush()
46     print(《一念永恒》下载完成)

 

scrapy_novel_python

标签:show   style   .text   loader   nbsp   write   .com   class   开始   

原文地址:https://www.cnblogs.com/zysps1/p/novel_scrapy.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!