标签:blog http io ar os 使用 sp for on
因为平时爱好摄影,所以喜欢看看色影无忌论坛的获奖摄影作品,所以写了个小script用来抓取上面的获奖图片,亲自测试可以使用。
# -*- coding: UTF-8 -*-
#作者Rocky Chen import re, urllib, sys, os, time, urllib2, cookielib, string class Download: def __init__(self, url): self.url=url def getPhotos(self): #获取的是跳转收的各个页面的图片 如: http://vision.xitek.com/monthly/yuesaipingxuan/201404/14-149893.html user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' header = { 'User-Agent' : user_agent } req=urllib2.Request(self.url,headers=header); resp=urllib2.urlopen(req) web_content=resp.read() print web_content all_link = re.findall(r'<div class="mshow"><a href="(.+?)" target="_blank">', web_content) print all_link print "All link done" for link in all_link: print "One link" print link req1=urllib2.Request(link,headers=header) resp1=urllib2.urlopen(req1) web_content_each=resp1.read() print web_content_each my_photos=re.findall(r'<img class="mimg" .+? src="(.+?)" .+? border=0/></div>',web_content_each) print my_photos for my_photo in my_photos: file_name_obj=re.findall(r'http://.+?/(\w+.jpg)',my_photo) file_name=file_name_obj[0] print file_name urllib.urlretrieve(my_photo,file_name) time.sleep(4) def Usage(): print "Usage: xitek_month_match.py http://--Help" if __name__ == "__main__": if len(sys.argv)<1: Usage() exit() reload(sys) sys.setdefaultencoding('utf-8') for arg in sys.argv[1:]: print arg xitek=Download(arg) xitek.getPhotos() print "Done"
自动抓取一月到十二月的获奖图片
标签:blog http io ar os 使用 sp for on
原文地址:http://blog.csdn.net/yagamil/article/details/41942877