python爬取小说

时间：2019-12-06 21:55:34 阅读：114 评论：0 收藏：0 [点我收藏+]

标签：python ISE idt __name__ down source url path 结果

运行结果：

技术图片

代码：

 1 import requests
 2 from bs4 import BeautifulSoup
 3 from selenium import webdriver
 4 import os
 5  
 6 class NovelSpider:
 7     def __init__(self):
 8         self.start_url = ‘https://www.biqukan.com/1_1680/‘
 9  
10     def get_novel(self):
11         response = requests.get(self.start_url)
12         soup = BeautifulSoup(response.text, ‘html.parser‘)
13         div_chapter = soup.find(class_="listmain")
14         chapter_list = div_chapter.find_all(‘a‘)
15         chapter_list = chapter_list[12:]
16         chapter = []
17         chapter_num = len(chapter_list)
18         count = 0
19         print(‘《凡人修仙传仙界篇》开始下载:‘)
20         for cl in chapter_list:
21             chapter_dict = {}
22             chapter_name = cl.get_text()
23             chapter_dict[‘name‘] = chapter_name
24             chapter_url = cl.get(‘href‘)
25             chapter_dict[‘value‘] = ‘https://www.biqukan.com‘ + chapter_url
26             if chapter_dict not in chapter:
27                 chapter.append(chapter_dict)
28             print(f"已下载:{count}/{chapter_num}")
29             self.download_novel(chapter_dict)
30             count += 1
31  
32     def parse_novel(self, url):
33         browser = webdriver.PhantomJS(executable_path=r‘F:\Spider\novelSpider\phantomjs.exe‘)
34         browser.get(url)
35         soup = BeautifulSoup(browser.page_source, ‘html.parser‘)
36         find_txt = soup.find(class_=‘showtxt‘)
37         # print(type(find_txt.get_text()))
38         return find_txt.get_text()
39  
40     def download_novel(self, data): 
41         filename = data[‘name‘]
42         url = data[‘value‘]
43         txt = self.parse_novel(url)
44  
45         path = r"F:\Spider\novelSpider"
46         isExists = os.path.exists(path)
47         if not isExists:
48             os.mkdir(path)
49         else:
50             pass
51  
52         with open(path + f‘\凡人修仙传仙界篇.txt‘, ‘a‘, encoding=‘utf-8‘) as f:
53             f.write(f‘{filename}\n\n‘)
54             f.write(txt)
55             f.write(‘\n======\n\n‘)
56             f.close()
57  
58 if __name__ == ‘__main__‘:
59     ns = NovelSpider()
60     ns.get_novel()

python爬取小说

标签：python ISE idt __name__ down source url path 结果

原文地址：https://www.cnblogs.com/huanghuangwei/p/11997460.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行