标签:win 保存 桌面 header read src 文件 des lib
import re
import urllib2
headers = {‘User-agent‘ : ‘Mozilla/5.0 (Windows NT 6.2; WOW64; rv:22.0) Gecko/20100101 Firefox/22.0‘}
with open(‘C:\\Users\\yaxin\\Desktop\\1.txt‘,‘r‘) as file: #预先将网页源码保存到1.txt里
data = file.read()
pattern = re.compile(r"<img src=‘(.*?)jpg") #匹配图片地址
need = pattern.findall(data)
count = 0
for i in need:
i = i + ‘jpg‘ #补全网址
with open(‘C:\\Users\\yaxin\\Desktop\\boot\\%s.jpg‘%i[-8:-4],‘wb‘) as f: #以网址中后8位到后5位作为文件名
request = urllib2.Request(i, headers = headers)
data_res = urllib2.urlopen(request)
data_b = data_res.read() #二进制数据
f.write(data_b)
count = count + 1
print ‘done ‘+ str(count)
print(‘done‘)#结束标志
一只小爬虫下载jpg图片到桌面 >>>>>python2.7.x
标签:win 保存 桌面 header read src 文件 des lib
原文地址:http://www.cnblogs.com/yaxin1989/p/6129083.html