标签:driver append win32 ror save div googl set url
利用 Python + Selenium 实现对页面的指定元素截图(可截长图元素)
#!/usr/bin/env python
# -*- coding: UTF-8 -*
import time
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from selenium.webdriver.common.action_chains import ActionChains
def scroll_page(url, browser=‘chrome‘): if browser==‘chrome‘: driver = webdriver.Chrome() else: driver = webdriver.Firefox() #右键异常 ActionChains(driver).context_click(e).perform() # driver = webdriver.PhantomJS() #可截长图!可是加载长网页时间超长? driver.set_window_size(1200, 900) driver.get(url) # Load page start = time.time() print driver.title driver.execute_script(""" (function () { var y = 0; var step = 1000; //100 window.scroll(0, 0); function f() { if (y < document.body.scrollHeight) { y += step; window.scroll(0, y); setTimeout(f, 100); //100 递归循环调用 } else { if(document.title.indexOf("scroll-done") < 0){ //-1 找不到,还没执行下文的driver.execute_script y -= step; window.scroll(0, y); setTimeout(f, 100); //100 //window.scroll(0, 0); //document.title += "scroll-done"; } //else{ //window.scroll(0, 0); //} } } setTimeout(f, 1000); //1000 })(); """) #拉到最后 出现元素“没有更多了” WebDriverWait(driver, 500).until(lambda x: x.find_element_by_xpath(‘//div[@style="text-align:center"]/em‘)) #或者引发 TimeoutException #停止上面的js到终点再上拉 循环 driver.execute_script(""" (function () { function f() { document.title += " scroll-done"; } setTimeout(f, 1000); })(); """) # <div class="js-infinite-layout"> # <div class="js-infinite-item"> # Jetbrains公司正式发布Pycharm 5 rst = driver.find_element_by_xpath(‘//div[@class="js-infinite-item"][last()]//div[@class="header"]‘).text #不能在path内部写入 text() print rst if rst != u‘Jetbrains公司正式发布Pycharm 5‘: raise RuntimeError(‘wrong!!!‘) print time.time()-start print driver.title # time.sleep(3) #执行js也需要时间 WebDriverWait(driver, 10).until(lambda x: x.title == u‘编程派 | Coding Python scroll-done‘) #或者引发 TimeoutException print driver.title return driver
import win32con
import win32gui
import win32process
import psutil
def get_hwnds_for_pid (pid): def callback (hwnd, hwnds): if win32gui.IsWindowVisible(hwnd) and win32gui.IsWindowEnabled(hwnd): _, found_pid = win32process.GetWindowThreadProcessId(hwnd) if found_pid == pid: hwnds.append(hwnd) return True hwnds = [] win32gui.EnumWindows(callback, hwnds) return hwnds def set_process_foreground(pid_part_name, pid_window_text): pids_target = [] for pid in psutil.pids(): p = psutil.Process(pid) if pid_part_name in p.name().lower(): #‘chrome.exe‘ ‘EXCEL.EXE‘ pids_target.append(pid) for pid in pids_target: for hwnd in get_hwnds_for_pid(pid): # 92292 chrome.exe 137328 编程派 | Coding Python - Google Chrome # EXCEL.EXE 857830 Microsoft Excel - Book1.xlsx # 90644 firefox.exe 595556 编程派 | Coding Python scroll-done - Mozilla Firefox if pid_window_text.encode(‘gbk‘) in win32gui.GetWindowText(hwnd): print pid, psutil.Process(pid).name(), hwnd, win32gui.GetWindowText(hwnd) win32gui.SetForegroundWindow(hwnd) return raise RuntimeError(‘process not found‘)
from autopy import key, mouse def save_result(driver): time_for_filename = time.strftime(‘%H%M%S‘) with open(‘%s.html‘%time_for_filename,‘wb‘) as f: f.write(driver.page_source.encode(‘utf-8‘)) # e = driver.find_element_by_xpath(‘//img[@src="/static/images/logo.png"]‘) # ActionChains(driver).context_click(e).perform() # time.sleep(1.5) # esc 退出右键菜单 # key.tap(key.K_ESCAPE) # driver.set_window_position(0,0) # window_position = driver.get_window_position() # {u‘x‘: 10, u‘y‘: 10} # mouse.move(int(window_position[‘x‘]+150), int(window_position[‘y‘]+150)) #右键再左键 避免错误点开链接 # mouse.click(mouse.RIGHT_BUTTON) # mouse.click(mouse.LEFT_BUTTON) # 关键是浏览器位于foreground set_process_foreground(driver.name,driver.title) key.tap(‘s‘, key.MOD_CONTROL) time.sleep(1.5) key.type_string(time_for_filename) time.sleep(0.5) key.tap(key.K_RETURN) driver.save_screenshot(‘%s.png‘%time_for_filename) # driver.close() if __name__ == "__main__": browser = ‘chrome‘ # browser = ‘firefox‘ url = "http://codingpy.com" driver = scroll_page(url,browser) save_result(driver) print ‘All DONE‘
python 通过js控制滚动条拉取全文 通过psutil获取pid窗口句柄,通过win32gui使程序窗口前置 通过pyauto实现右键菜单和另存为操作
标签:driver append win32 ror save div googl set url
原文地址:http://www.cnblogs.com/my8100/p/7233444.html