import re import urllib def getHtml(url): html = urllib.urlopen(url) scode = html.read() return scode def getImage(source): reg = r'src="(.*?\.jpg)"' imgre = re.compile(reg) images = re.findall(imgre,source) x = 0 for i in images: urllib.urlretrieve(i,'%s.jpg' % x) x+=1 source = getHtml('http://tieba.baidu.com/p/3237470549') print getImage(source)
python写的简单有效的爬虫代码,布布扣,bubuko.com
原文地址:http://blog.csdn.net/rainlesvio/article/details/38660987