之前见过别人写的抓取图片的python脚本,自己之前用正则写过,最近看到beautifulsoup 所以拿来练练手
# -*- coding:utf8 -*- from bs4 import BeautifulSoup import os, sys, urllib2,time,random path = os.getcwd() new_path = os.path.join(path,u‘sexy‘) if not os.path.isdir(new_path): os.mkdir(new_path) def page_loop(page=1): url = ‘http://sexy.faceks.com/tag/美女摄影?page=%s‘ % page print url content = urllib2.urlopen(url) soup = BeautifulSoup(content) my_girl = soup.findAll(‘a‘,attrs={‘class‘:‘img‘})#先获取首页每个美女图片的进入链接 for girl in my_girl: #link = girl.get(‘src‘) girlink = girl.get(‘href‘) print girlink response = urllib2.urlopen(girlink) per_soup = BeautifulSoup(response) img_urls = per_soup.findAll(‘img‘,attrs={‘class‘:None}) #print img_urls for img_url in img_urls: #获取单个美女的所有图片链接 girlurl = img_url.get(‘src‘) print girlurl content2 = urllib2.urlopen(girlurl).read() with open(u‘sexy‘+‘/‘+time.strftime(‘%H%M%S‘)+str(random.randint(1000,9999)),‘wb‘) as code: code.write(content2) page_loop()
效果图如下:
本文出自 “孜孜不倦的学习着...” 博客,请务必保留此出处http://jonyisme.blog.51cto.com/3690784/1698896
原文地址:http://jonyisme.blog.51cto.com/3690784/1698896