标签:logs tco cep rom close import retrieve web etc
#-*- coding: UTF-8 -*- import urllib.request import socket import re import sys import os imagGetPath = "G:\\zxh_python\\Spider\\ImagesDown" def saveFile(path): if not os.path.isdir(imagGetPath): os.mkdir(imagGetPath) pos = path.rindex(‘/‘) t = os.path.join(imagGetPath,path[pos+1:]) return t #网址 url = "https://alpha.wallhaven.cc/random/" headers = {‘User-Agent‘:‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)‘ ‘Chrome/59.0.3071.115 Safari/537.36‘} #请求 request = urllib.request.Request(url=url,headers=headers) #爬取结果 response = urllib.request.urlopen(request) data = response.read() #saveFile(data) ##设置解码方式 #data = data.decode(‘utf-8‘) ##打印结果 #print(data) ##打印爬取网页的各类信息 #print(type(response)) #print(response.geturl()) #print(response.info()) #print(response.getcode()) for link,t in set(re.findall(r‘(https?://[\w./]+\.(jpg|gif|png))‘, str(data))): print(link) try: urllib.request.urlretrieve(link,saveFile(link)) except: print(‘失败‘)
标签:logs tco cep rom close import retrieve web etc
原文地址:http://www.cnblogs.com/codebirdhan/p/7464907.html