标签:htm dict port move os.path name 一段 png enc
import requests import re import os import time #获取阵营名字 def get_camp(html): patten = re.findall(r‘<ul><li><b><a href=".*?" title=".*?">(.*?)</a></b>.*?</li></ul>‘,html,re.S) return patten #创建相对于的阵营的文件夹 def make_camp_file(list1): camp_path = [] for camp_name in list1: file_path = "C:\\Users\\16609\\Desktop\\blhxS\\" + camp_name[0] camp_path.append(file_path) if not os.path.exists(file_path): os.makedirs(file_path) return camp_path #按阵营分类提取出对应的角色的url def get_Char_url(html): Char_url = [] patten = re.findall(r‘<p><a href="(.*?)" title=".*?">‘,html) for url in patten: url = url.replace(‘:‘,‘:‘) Char_url.append(url) return Char_url #人物Q版立绘 def get_Q_Pic(response,path): demo = re.findall(r‘<div class="qchar-container" data-ship-name="(.*?)">(.*?)</div>‘, response.text, re.S) name = demo[0][0] Q_Lipainted = demo[0][1] Q_name = re.findall(r‘alt="(.*?)"‘, Q_Lipainted) Q_url = re.findall(r‘src="(.*?)"‘, Q_Lipainted) Name_Url = dict(zip(Q_name, Q_url)) for i in Name_Url: url = Name_Url[i] Resp_pic = requests.get(url) Q_img = Resp_pic.content with open(path + i,‘wb‘) as f: f.write(Q_img) # 人物信息 def get_info(response,path,file_name): Info = re.findall(r‘<tr data-key=".*?">(.*)</tr>‘, response.text, re.S) Info_Speak = re.findall(r‘<th>(.*?)</th>.*?data-lang="zh">.(.*?)</p>‘, Info[0], re.S) Info_Speak_dict = {} for i in Info_Speak: Info_Speak_dict[i[0]] = i[1] for i in Info_Speak_dict: if not os.path.exists(path): os.makedirs(path) with open(path + file_name + ‘.txt‘,‘a+‘,encoding=‘utf-8‘) as f: cont = i + ":" + Info_Speak_dict[i] + ‘\n‘ f.write(cont) #获取每个角色网页的html def get_html(char_url,path): for url in char_url: response = Session.get(url) Name = re.findall(r‘http://wiki.joyme.com/blhx/(.+)‘,url) file_path = path + ‘\\‘ + Name[0] + ‘\\‘ get_info(response,file_path,Name[0]) get_pic(response,file_path) get_Q_Pic(response,file_path) #人物图片 def get_pic(response,path): Pic = re.findall(r‘<div class="tab_con.*?" style=".*?">.*?<img alt="(.*?)" src="(.*?)".*?</div>‘, response.text,re.S) Pic_dict = {} for i in Pic: i = list(i) if i[0] == ‘‘: i[0] = ‘Q_GIF.gif‘ Pic_dict[i[0]] = i[1] for i in Pic_dict: url = Pic_dict[i] LB_pic_resp = requests.get(url) LB_pic = LB_pic_resp.content with open(path + i,‘wb‘) as f: f.write(LB_pic) #碧蓝航线wiki阵营分类网页 url = ‘http://wiki.joyme.com/blhx/%E8%88%B0%E5%A8%98%E5%9B%BE%E9%89%B4‘ Session = requests.session() response = Session.get(url) #将网页以阵营分类进行截取 camp = re.split(‘<img alt="分割线.png"‘,response.text) list1 = [] for camp_name in camp: #获取阵营的名字 并将阵营名放入list1列表中 s = get_camp(camp_name) if s != []: list1.append(s) # 将阵营list列表传入创建对应名称的文件夹 camp_path = make_camp_file(list1) # 去掉分割首页html中的第一段 因为这一段为网页顶部代码 没有人物名称及对应url camp.remove(camp[0]) camp_char = [] for i in camp: Char_url = get_Char_url(i) camp_char.append(Char_url) i = 0 for url in camp_char: get_html(url,camp_path[i]) i = i + 1
标签:htm dict port move os.path name 一段 png enc