标签:img init == port ade enum exist param down
import requests import re import os class DouTuLaSpider: def __init__(self): self.url = ‘http://www.doutula.com/article/list/‘ self.heade = { ‘User-Agent‘:‘Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36‘ } def get_imgUrl(self,pagenum): response = requests.get(self.url,headers=self.heade,params={‘page‘:pagenum}) print(response.url) html = response.text imageList = re.findall(‘data-original="(.*?)".*?alt="(.*?)"‘,html) return imageList def download_Img(self,imgList,page): floder = ‘第%d页‘%(page) if not os.path.exists(floder): os.mkdir(floder) os.chdir(floder) for url,title in imgList: title = re.sub(‘[??!,]‘,‘‘,title) + ‘.‘ + url.split(‘.‘)[-1] print(title) data = requests.get(url).content with open(title,‘wb‘) as f: f.write(data) os.chdir(‘..‘) def run(self,page): for page in range(1, page+1): imagList = self.get_imgUrl(page) self.download_Img(imagList, page) if __name__ == ‘__main__‘: doutula = DouTuLaSpider() doutula.run(5)
标签:img init == port ade enum exist param down
原文地址:https://www.cnblogs.com/remoting-py/p/10111181.html