标签:text gbk body 应用 tor ons title pass sts
在PY文件中:
from scrapy.selector import Selector
from scrapy.http import HtmlResponse
url="https://m.mm131.net/" r=requests.get(url) r.encoding=‘gbk‘ #根据情况可设置为utf-8 body=r.text tx=Selector(text=body).xpath(‘//h2[@class="mm-title"]/text()‘).extract() tx1=Selector(text=body).xpath(‘//article[@class="post"]/div‘).extract() tx1=Selector(text=body).xpath(‘//article[@id="post-1"]/div‘).extract() tx2=re.findall(‘src="(http.*?.jpg)"‘,str(tx1)) tx4=re.findall(‘data-img="(http.*?.jpg)"‘,str(tx1)) #tx2=re.findall(‘[^(gif)]‘,str(tx2)) tx3=re.findall(‘href="(.*?)"‘,str(tx1)) #print(tx4) #print(tx2) #print(tx3) #print(tx1) for aa in list(set(tx4)): adir=aa.split(‘/‘)[-2] name=aa.split(‘/‘)[-1] time.sleep(3) if os.path.exists(wz+adir+"/"): pass else: os.mkdir(wz+adir+"/") #break fname=wz+adir+"/"+adir+name print(fname) baocun(aa,fname) #break
标签:text gbk body 应用 tor ons title pass sts
原文地址:https://www.cnblogs.com/xkdn/p/12268259.html