码迷,mamicode.com
首页 > 其他好文 > 详细

005 动态加载实例

时间:2019-04-22 18:17:32      阅读:171      评论:0      收藏:0      [点我收藏+]

标签:实例化   pool   proc   ram   sleep   www   adl   less   page   

技术图片
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from time import sleep

# 创建一个对象,用来控制chorme以无界面模式打开
chrome_options = Options()
chrome_options.add_argument(--headless)
chrome_options.add_argument(--disable-gpu)

# 实例化driver对象
bro = webdriver.Chrome(executable_path=F:\\anaconda\chromedriver.exe, chrome_options=chrome_options)

# 发送请求
bro.get(url=http://www.baidu.com)

# 截图
# bro.save_screenshot(‘first.jpg‘)

# 查找命令
my_input = bro.find_element_by_id(kw)

# 向标签中录入指定的数据
my_input.send_keys(美女)

# 查找“百度一下”
my_button = bro.find_element_by_id(su)
my_button.click()

# 获取当前浏览器显示页面的页面源码
page_text = bro.page_source

print(type(page_text))

bro.quit()
无头浏览器实现
技术图片
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from time import sleep

bro = webdriver.Chrome(F:\\anaconda\chromedriver.exe)

url = https://36kr.com/information/contact

bro.get(url=url)

# 爬取动态加载出来的数据
js = window.scrollTo(0, document.body.scrollHeight)
bro.execute_script(js)
sleep(2)
bro.execute_script(js)
sleep(2)
bro.execute_script(js)

page_text = bro.page_source
print(page_text)

with open(./36k.html, w, encoding=utf-8) as fp:
    fp.write(page_text)

bro.close()
加载滚动条
技术图片
import requests
from lxml import etree
import re
from multiprocessing.dummy import Pool
import random

url = https://www.pearvideo.com/category_8
headers = {
    Use-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36,
}

response = requests.get(url=url, headers=headers, verify=False).content.decode()
xpath_data = etree.HTML(response)
li_list = xpath_data.xpath(//*[@id="listvideoListUl"]/li)

# 实现并发建立的数据池
video_url_list = []

for li in li_list:
    # print(li)
    v_href = https://www.pearvideo.com/ + li.xpath(.//div[@class="vervideo-bd"]/a/@href)[0]
    # print(v_href)
    d_response = requests.get(url=v_href, headers=headers).content.decode()
    video_url = re.findall(srcUrl="(.*?)",, d_response, re.S)[0]
    video_url_list.append(video_url)
    # print(video_url)

# 常见5个线程
pool = Pool(5)
dowmloadVideo = lambda link: requests.get(url=link, headers=headers).content
# map返回的列表中存储的就是下载完毕的视频二进制数据值
video_url_list = pool.map(dowmloadVideo, video_url_list)

def save_video(data):
    i = random.randint(1, 1000)
    video_name = video/ + str(i) + .mp4
    # i = i + 1
    with open(video_name, wb) as fp:
        fp.write(data)

pool.map(save_video, video_url_list)

pool.close()
pool.join()
多线程的实现
技术图片
from selenium import webdriver
from time import sleep

bro = webdriver.Chrome(F:\\anaconda\chromedriver.exe)
bro.get(https://qzone.qq.com/)
sleep(1)

# 定位到iframe
bro.switch_to.frame(login_frame)
user = bro.find_element_by_id(switcher_plogin)
user.click()

sleep(3)
username = bro.find_element_by_id(u)
username.send_keys(*****)

sleep(3)
password = bro.find_element_by_id(p)
password.send_keys(*****)

sleep(2)
login = bro.find_element_by_id(login_button)
login.click()


sleep(10)
bro.quit()
iframe的实现

 

005 动态加载实例

标签:实例化   pool   proc   ram   sleep   www   adl   less   page   

原文地址:https://www.cnblogs.com/abc23/p/10751549.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!