标签:filename 失败 class 写入文件 文字 with open 其他 html break
最近迷上听小说了,但几个app上有声小说更新太慢,自己看小说的话不能同时去做其他事情,所以想了个办法从网上下载小说并将文字转换成语音。以后其他小说只需要改改下载小说地址即可。故写在博客上供以后使用。
一、main.py
from spider import Spider from txt import Txt from mp3 import Mp3 if __name__ == ‘__main__‘: clsSpider = Spider() clsTxt = Txt() clsMp3 = Mp3() dictTitle = clsSpider.getTitle() print(dictTitle) i = 0 filename = ‘‘ for title in dictTitle: filename = ‘%05d-%s‘ % (i, dictTitle[title]) if clsTxt.existTxt(filename) and clsMp3.existMp3(filename): i = i + 1 continue content = clsSpider.getContent(title) if (content is None) or (len(content) == 0): i = i + 1 continue if clsTxt.existTxt(filename) is False: clsTxt.writeTxt(filename, content) if clsMp3.existMp3(filename) is False: clsMp3.writMp3(filename, content) i = i + 1
二、spider.py
import requests from bs4 import BeautifulSoup class Spider(object): def __init__(self): self.session = requests.session() def __del__(self): self.session.close() def getTitle(self): dictTitle = {} soup = None try: req = self.session.get(‘https://www.xxxxxx.com/xx/x/xxx/‘) req.encoding = ‘utf-8‘ soup = BeautifulSoup(req.text, ‘html.parser‘) for a in soup.select(‘#list dd a‘): dictTitle[a[‘href‘]] = a.text.strip() return dictTitle except: print(‘获取章节列表失败!‘) dictTitle.clear() return None finally: if soup != None: soup.clear() def getContent(self, url): soup = None try: req = self.session.get(‘https://www.xxxxxx.com/xx/x/xxx/‘ + url) req.encoding = ‘utf-8‘ soup = BeautifulSoup(req.text, ‘html.parser‘) return soup.select_one(‘#content‘).text.strip() except: print(‘获取[https://www.xxxxxx.com/xx/x/xxx/%s]内容异常!‘ % url) return None finally: if soup != None: soup.clear()
三、txt.py
import os class Txt(object): def __init__(self): if os.path.exists(‘txt‘) is False: os.mkdir(‘txt‘) def existTxt(self, title): return os.path.exists(‘txt\\%s.txt‘ % title) def writeTxt(self, title, content): try: with open(‘txt\\%s.txt‘ % title, ‘wb‘) as f: f.write(content.encode(‘utf-8‘)) except: print(‘写入文件txt\\%s.txt异常!‘ % title)
四、mp3.py
import os from aip import AipSpeech class Mp3(object): def __init__(self): self.client = AipSpeech(‘xxx‘, ‘xxx‘, ‘xxx‘) if os.path.exists(‘mp3‘) is False: os.mkdir(‘mp3‘) def existMp3(self, title): return os.path.exists(‘mp3\\%s.mp3‘ % title) def writMp3(self, title, content): try: pos = 0 msg = ‘‘ while(pos < len(content)): msg = content[pos:pos+1000] while(True): try: result = self.client.synthesis(msg) if not isinstance(result, dict): with open(‘mp3\\%s.mp3‘ % title, ‘ab+‘) as f: f.write(result) break else: print(result) except: print(‘1‘) pos = pos + 1000 except: print(‘写入文件mp3\\%s.mp3异常!‘ % title)
标签:filename 失败 class 写入文件 文字 with open 其他 html break
原文地址:https://www.cnblogs.com/qiyueliuguang/p/11109409.html