python+selenium+phantomjs爬百度美女图片

时间：2017-06-29 19:12:00 阅读：194 评论：0 收藏：0 [点我收藏+]

标签：pil host 结束 end ima hostname width 高度 port

#conding:utf-8
import unittest
from selenium import webdriver
from urllib.request import *
import re
import time
from bs4 import BeautifulSoup

#测试类
class baidupic(unittest.TestCase):
    #初始化测试
    def setUp(self):
        self.dv = webdriver.PhantomJS()
    #测试方法
    def test_getPic(self):
        dv = self.dv
        dv.get("http://image.baidu.com/")
        dv.find_element_by_id("kw").send_keys("美女")
        dv.find_element_by_class_name("s_btn").click()
        time.sleep(1)
        #滚轮到最下面，滚动的次数越多，下载的美女图片就越多
        js = "window.scrollTo(0, document.body.scrollHeight)"
        dv.execute_script(js)
        time.sleep(1)
        dv.execute_script(js)
        time.sleep(1)
        #正则获取图片地址，宽度，高度，后缀
        pattern = re.compile(u‘data-objurl="(.*?)" data-thumburl=".*?" data-fromurl=".*?" data-fromurlhost=".*?" data-ext="(.*?)" data-saved=".*?" data-pi=".*?" data-specialtype=".*?" data-cs=".*?" data-width="(.*?)" data-height="(.*?)" data-hostname=‘,re.S)
        items = re.findall(pattern,dv.page_source)
        
        index = 1
        for item in items:
            print("图片地址：%s\r\n类型：%s\r\n宽度：%s\r\n高度：%s\r\n " % (item[0],item[1],item[2],item[3]))
            try:
                self.saveImg(item[0],"d:\\mm\\%s.%s"%(index,item[1]))
            except:
                continue
            index = index + 1

    #保存图片到本地
    def saveImg(self,imgURL,fileName):
        img = urlopen(imgURL)
        data = img.read()
        f = open(fileName,"wb")
        f.write(data)
        f.close()

    #结束测试
    def tearDown(self):
        self.dv.quit()

标签：pil host 结束 end ima hostname width 高度 port

原文地址：http://www.cnblogs.com/ylsd80/p/7095835.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行