python 爬虫爬取美女图片
#coding=utf-8 import urllib import re import os import time import threading def getHtml(url): page = urllib.urlopen(url) html = page.read() return html def getImgUrl(html,src): srcre = re.compile(src) srclist = re.findall(srcre,html) return srclist def getImgPage(html): url = r'http://.*\.html' urlre = re.compile(url) urllist = re.findall(urlre,html) return urllist def downloadImg(url): html = getHtml(url) src = r'rel=.*\.jpg' srclist = getImgUrl(html,src) srclist2 = [] for srcs in srclist: temp = srcs.replace("'",'"') temp = temp.split('"') srclist2.append(temp[1]) for srcurl in srclist2: imgName = srcurl.replace(':','_') imgName = imgName.replace('/','_') print 'download pic %s .........' % srcurl if os.path.isfile('pic/%s' % imgName): continue urllib.urlretrieve(srcurl,'pic/%s' % imgName) class MyThread(threading.Thread): def __init__(self,urllist): threading.Thread.__init__(self) self.urllist = urllist def run(self): for u in self.urllist: downloadImg(u) def main(): url = 'http://www.6188.net/' html = getHtml(url) urllist = getImgPage(html) urllist2 = [] length = len(urllist) / 7 for i in range(1,8): temp = urllist[(i-1)*length:i*length] urllist2.append(temp) for u in urllist2: t = MyThread(u) t.start() main()
原文地址:http://blog.csdn.net/u013480667/article/details/44986047