标签:except 搜索 div 输入 like common click imp try
项目1:
基于搜狗微信公众号的关键字搜索
from selenium import webdriver import os from bs4 import BeautifulSoup from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By DRIVER_PATH = os.path.join(os.path.dirname(__file__), ‘chromedriver.exe‘) opt = webdriver.ChromeOptions() opt.headless = True opt.add_argument(‘User-Agent=Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)‘ ‘ Chrome/63.0.3239.132 Safari/537.36‘) webphjs = webdriver.Chrome(executable_path=DRIVER_PATH, options=opt) webphjs.get(‘https://weixin.sogou.com/‘) _input = webphjs.find_element_by_id(‘query‘) search = input(‘请输入公众号关键字:‘) _input.send_keys(search) webphjs.find_element_by_class_name(‘swz2‘).click() while 1: soup = BeautifulSoup(webphjs.page_source, ‘lxml‘) try: new_list = soup.find(‘ul‘, {‘class‘: ‘news-list2‘}).find_all(‘li‘) for i in new_list: title = i.find(‘p‘, {‘class‘: ‘tit‘}).find(‘a‘).text wxid = i.find(‘p‘, {‘class‘: ‘info‘}).find(‘label‘).text jt = {} for item in i.find_all(‘dl‘): key = item.find(‘dt‘) [s.extract() for s in key(‘script‘)] key = key.text.replace(‘\n‘, ‘‘) value = item.find(‘a‘) or item.find(‘dd‘) value = value.text.replace(‘\n‘, ‘‘) jt[key] = value print(‘-‘*20) print(‘公众号名称:{}‘.format(title)) print(‘微信号:{}‘.format(wxid)) for k, v in jt.items(): print(‘{} {}‘.format(k, v)) locator = (By.ID, ‘sogou_next‘) try: ele = WebDriverWait(webphjs, 3).until(EC.presence_of_element_located(locator)) webphjs.find_element_by_id(‘sogou_next‘).click() except: break except: print(‘查询不到有关于此关键字的内容‘) break webphjs.close()
设置Chrome为无头浏览器,可能会出现访问异常,被网页发现是自动测试软件,所以给浏览器加了一个User-Agent进行伪装。
标签:except 搜索 div 输入 like common click imp try
原文地址:https://www.cnblogs.com/lpapython/p/11203372.html