标签:密码 line list test 帐号 内容 login 保存 css_
抓取微博24小时热门话题的前15个,抓取的内容请保存至txt文件中,需要抓取阅读数
1 #coding=utf-8 2 from selenium import webdriver 3 import unittest 4 from time import sleep 5 6 class Weibo(unittest.TestCase): 7 8 def setUp(self): 9 self.dr = webdriver.Chrome() 10 self.hot_list = self.get_weibo_hot_topic() 11 self.weibo_topic = self.get_top_rank_file() 12 13 def get_weibo_hot_topic(self): 14 self.dr.get(‘http://weibo.com/‘) 15 sleep(5) 16 self.login(‘649_xxxx@qq.com‘,‘kemi_xxxx‘) #微博帐号密码 17 self.dr.get(‘http://d.weibo.com/100803?refer=index_hot_new‘) 18 sleep(5) 19 hot_topic_list = [] 20 i = 0 21 while i < 15: 22 rank_and_topic = self.dr.find_elements_by_css_selector(‘.title.W_autocut‘)[i].text #定位排行和话题 23 number = self.dr.find_elements_by_css_selector(‘.number‘)[i].text #定位阅读数 24 hot_topic_list.append([rank_and_topic, number]) 25 i += 1 26 return hot_topic_list 27 28 def get_top_rank_file(self): 29 self.file_title = ‘微博24小时热门话题‘ 30 self.file = open(self.file_title + ‘.txt‘, ‘wb‘) 31 for item in self.hot_list: 32 separate_line = ‘~~~~~~~~~~~~~~~~~~~~~~~~\n‘ #分隔线 33 self.file.write(separate_line.encode(‘utf-8‘)) 34 self.file.write((item[0]+‘ ‘+‘阅读数:‘+item[1]+‘\n‘).encode(‘utf-8‘)) 35 self.file.close() 36 37 def login(self, username, password): 38 self.dr.find_element_by_name(‘username‘).clear() 39 self.dr.find_element_by_name(‘username‘).send_keys(username) 40 self.dr.find_element_by_name(‘password‘).send_keys(password) 41 self.dr.find_element_by_css_selector(‘.info_list.login_btn‘).click() 42 43 def test_weibo_topic(self): 44 pass 45 print(‘抓取完毕‘) 46 47 def tearDown(self): 48 self.dr.quit() 49 50 if __name__== ‘__main__‘: 51 unittest.main()
网页如下:
生成txt文件如下:
用python+selenium抓取微博24小时热门话题的前15个并保存到txt中
标签:密码 line list test 帐号 内容 login 保存 css_
原文地址:http://www.cnblogs.com/cnkemi/p/6185331.html