标签:
参考笔记 虫师 http://www.cnblogs.com/fnng/p/3576154.html
#自动访某个网址 from selenium import webdriver import time M = 100000 i = 0 URL = ‘http://www.yyxxww.com/html/2015/edu_0318/3386.html‘ browser = webdriver.Firefox() #浏览器名字,以本机安装为准 while i < M: browser.get(URL) time.sleep(1) i += 1 browser.quit() print ‘本次python总共打开了‘, i, ‘次‘ #提取一级标题 import urllib2 from sgmllib import SGMLParser URL = ‘http://www.yyxxww.com/html/2015/edu_0318/3386.html‘ class ListName(SGMLParser): def __init__(self): SGMLParser.__init__(self) self.is_h4 = "" self.name = [] def start_h4(self, attrs): self.is_h4 = 1 def end_h4(self): self.is_h4 = "" def handle_data(self, text): if self.is_h4 == 1: self.name.append(text) content = urllib2.urlopen(URL).read() listname = ListName() listname.feed(content) for item in listname.name: print item.decode(‘gbk‘).encode(‘utf8‘) #访问百度,并填写表单,中文暂时不好解决,英文没问题 # coding = utf-8 import sys reload(sys) sys.setdefaultencoding(‘utf8‘) from selenium import webdriver browser = webdriver.Firefox() browser.get("http://www.baidu.com") browser.find_element_by_id("kw").send_keys("你好").decode(‘gbk‘).encode(‘gb2312‘) browser.find_element_by_id("su").click() time.sleep(30) # 休眠3秒 browser.quit()
标签:
原文地址:http://www.cnblogs.com/hdu-2010/p/4617641.html