标签:ast int http 代码 限制 ide 传值 sleep dom
#设置超时时间爬取网页速度相对要快些
#encoding:utf8
from lxml import etree
#xpath
import re
#正则
import time
#时间
import requests
#传值
from selenium import webdriver
#通用阅览器
from selenium.webdriver.support.ui import WebDriverWait
#引用超时模块
import random
#自定义模块
url = ‘(网址)‘
driver = webdriver.Firefox()
#用火狐阅览器
driver.get(url)
time.sleep(3)
#限制时间
driver.maximize_window()
#页面放大
thelist = []
for i in range(1,10):
time.sleep(1)
WebDriverWait(driver,10).until(lambda the_driver:
the_driver.find_element_by_xpath(‘//*[@id="tableData_"]/div[3]/nav/ul/li[last()]/a‘).is_displayed())
#设定超时和超时时间,超时时间是10秒
driver.find_element_by_xpath(‘//*[@id="tableData_"]/div[3]/nav/ul//li[last()]/a‘).click()
#模拟点击
yuan = driver.page_source
#获取源代码
xpa = etree.HTML(yuan)
zheng = xpa.xpath(‘//*[@id="tableData_"]/div[2]/table/tbody/tr/td[8]/a/@href‘)
thelist = thelist+zheng
# print(thelist)
for k in thelist:
xin = "http://www.sse.com.cn"+k
# print(xin)
yuna2 = driver.get(xin)
driver.maximize_window()
me2 = driver.page_source
WebDriverWait(driver,10).until(lambda the_driver:
the_driver.find_element_by_xpath(‘//*[@id="overview-slide"]/div[1]/div[2]/div/a/span‘).is_displayed())
#设定超时和超时时间,超时时间是10秒
driver.find_element_by_xpath(‘//*[@id="overview-slide"]/div[1]/div[2]/div/a/span‘).click()
time.sleep(2)
yuna3 = driver.page_source
lxmls = etree.HTML(yuna3)
zhua2 = lxmls.xpath(‘//*[@id="tableData_two"]/div[2]/table/tbody/tr/td[1]/text()‘)
zhua3 = lxmls.xpath(‘//*[@id="tableData_two"]/div[2]/table/tbody/tr/td[2]/text()‘)
zhua4 = lxmls.xpath(‘//*[@id="tableData_two"]/div[2]/table/tbody/tr/td[3]/div/text()‘)
for g in range(len(zhua2)):
print(zhua2[g])
print(zhua3[g])
print(zhua4[g])
标签:ast int http 代码 限制 ide 传值 sleep dom
原文地址:http://www.cnblogs.com/lianghongrui/p/6897176.html