import re import urllib import os def rename(name): name = name + '.jpg' return name def getHtml(url): page = urllib.urlopen(url) html = page.read() return html def getImg(html): reg = r'src="(.+?\.jpg)" pic_ext' imgre = re.compile(reg) imglist = re.findall(imgre,html) os.chdir("E:\\pic") os.getcwd() x=1 for imgurl in imglist: img=urllib.urlopen(imgurl) name=str(x) name = rename(name) print(name) x=x+1 f=open(name,'wb') f.write(img.read()) f.close() html = getHtml("http://tieba.baidu.com/p/3553148164") getImg(html) print 'pic save!'
爬取的网页是 http://tieba.baidu.com/p/3553148164
图片保存在E盘pic文件夹下
爬取结果如下:
原文地址:http://blog.csdn.net/u013018721/article/details/43280425