标签:content bdr quit eva data- sig header tor elf
代码如下:
1 # coding:utf-8 2 from selenium import webdriver 3 import requests 4 import sys 5 import time 6 from lxml import etree 7 # reload(sys) 8 # sys.setdefaultencoding(‘utf-8‘) 9 10 class Zhihu: 11 def __init__(self,homeurl): 12 self.homeurl = homeurl 13 14 def GetCookies(self): 15 browser = webdriver.Chrome() 16 browser.get("https://www.zhihu.com/signin") 17 browser.find_element_by_css_selector(".SignFlow-accountInput.Input-wrapper input").send_keys("13060882373") 18 browser.find_element_by_css_selector(".SignFlow-password input").send_keys("XXXXXX") 19 browser.find_element_by_css_selector(".Button.SignFlow-submitButton").click() 20 time.sleep(3) 21 # js = "window.scrollTo(0, document.body.scrollHeight);" 22 # browser.execute_script(js) 23 # time.sleep(3) 24 cookies = browser.get_cookies() 25 browser.quit() 26 return cookies 27 28 def Crawl(self): 29 s = requests.Session() 30 s.headers.clear() 31 for cookie in self.GetCookies(): 32 s.cookies.set(cookie[‘name‘], cookie[‘value‘]) 33 html = s.get(self.homeurl).text 34 html_tree = etree.HTML(html) 35 items = html_tree.xpath(‘//*[@id="root"]/div/main/div/div/div[1]/div[2]/div//div[@class="ContentItem AnswerItem"]/@data-zop‘) 36 for item in items: 37 # print item 38 content = eval(item) 39 authorName = content[‘authorName‘] 40 title = content[‘title‘] 41 print authorName + "回答了:" + title 42 43 44 zhihu = Zhihu(‘https://www.zhihu.com/‘) 45 zhihu.Crawl()
Python爬虫 —— 知乎之selenium模拟登陆+requests.Session()获取cookies
标签:content bdr quit eva data- sig header tor elf
原文地址:https://www.cnblogs.com/DOLFAMINGO/p/9170429.html