selenium +chrome+ firefox + webdriver 遇到的坑
lunix中启动webdriver时报错一:
测试代码为:
-
-
-
-
-
from selenium import webdriver
-
-
driver = webdriver.Firefox()
-
driver.get("https://www.baidu.com")
运行报错信息如下:
-
Traceback (most recent call last):
-
File "maimai_web.py", line 14, in <module>
-
driver = webdriver.Firefox()
-
File "/usr/local/python3.6/lib/python3.6/site-packages/selenium/webdriver/firefox/webdriver.py", line 152, in __init__
-
-
File "/usr/local/python3.6/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 98, in __init__
-
self.start_session(desired_capabilities, browser_profile)
-
File "/usr/local/python3.6/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 188, in start_session
-
response = self.execute(Command.NEW_SESSION, parameters)
-
File "/usr/local/python3.6/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 256, in execute
-
self.error_handler.check_response(response)
-
File "/usr/local/python3.6/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py", line 194, in check_response
-
raise exception_class(message, screen, stacktrace)
-
selenium.common.exceptions.WebDriverException: Message: Process unexpectedly closed with status 1
处理方法:
-
-
-
-
-
from pyvirtualdisplay import Display
-
from selenium import webdriver
-
-
-
display = Display(visible=0, size=(1920, 1080))
-
-
driver = webdriver.Firefox()
-
driver.get("https://www.baidu.com")
结果:
运行ok,搞定!
坑二、webdriver实例化报错
采用多线程调用webdriver时候,偶尔会出现这样的错:selenium.common.exceptions.WebDriverException: Message: connection refused
-
Exception in thread Thread-2:
-
Traceback (most recent call last):
-
File "/usr/local/python3.6/lib/python3.6/threading.py", line 916, in _bootstrap_inner
-
-
File "/usr/local/python3.6/lib/python3.6/threading.py", line 864, in run
-
self._target(*self._args, **self._kwargs)
-
File "maimai_tran_account_driver.py", line 591, in debug
-
t = TrainAccount(count,lock)
-
File "maimai_tran_account_driver.py", line 32, in __init__
-
self.chrome = webdriver.Firefox()
-
File "/usr/local/python3.6/lib/python3.6/site-packages/selenium/webdriver/firefox/webdriver.py", line 152, in __init__
-
-
File "/usr/local/python3.6/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 98, in __init__
-
self.start_session(desired_capabilities, browser_profile)
-
File "/usr/local/python3.6/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 188, in start_session
-
response = self.execute(Command.NEW_SESSION, parameters)
-
File "/usr/local/python3.6/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 256, in execute
-
self.error_handler.check_response(response)
-
File "/usr/local/python3.6/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py", line 194, in check_response
-
raise exception_class(message, screen, stacktrace)
-
selenium.common.exceptions.WebDriverException: Message: connection refused
查看geckodriver.log具体报错信息。
坑三、模拟器被反爬
原因是在webdriver发送请求的时候,会有webdriver的js判断,当检测到此字段时会被作为爬虫处理,应对策略如下。
工具:mitmproxy做代理,替换掉请求里面的webdriver为别的字段
部分代码如下:
-
if "/_next/static/js/common_pdd" in flow.request.url:
-
flow.response.text = flow.response.text.replace("webdriver", "userAgent")
坑四、滑动验证码验证失败
同样的代码,chromedriver验证码通过,firefox滑动到正常位置报失败,最后发现原因是firefox在滑动模块的时候速度太慢被机器识别出来,解决方法,增大滑动的速度,附上滑动验证的部分代码,如下:
-
def crack_geetest(self, max_retry=10):
-
-
-
l.info("process handle geetest captcha...")
-
-
-
-
-
-
-
img = driver.find_element_by_xpath(‘//div[@class="geetest_canvas_img geetest_absolute"]‘)
-
-
-
-
top, bottom, left, right = location[‘y‘], location[‘y‘] + size[‘height‘], location[‘x‘], location[‘x‘] + \
-
-
return (top, bottom, left, right)
-
-
def get_geetest_image(name):
-
-
-
-
-
full_img_path = ‘./zhilian_screenshot_{}.png‘.format(self.account[‘user_id‘])
-
driver.save_screenshot(filename=full_img_path)
-
image = Image.open(fp=full_img_path, mode=‘r‘)
-
top, bottom, left, right = get_position()
-
print(‘验证码位置:({},{},{},{})‘.format(left, top, right, bottom))
-
t = driver.execute_script(‘var q=document.documentElement.scrollTop; return q;‘)
-
print(‘验证码位置:({},{},{},{})‘.format(left, top - int(t), right, bottom - int(t)))
-
-
captcha = image.crop((left, top - int(t), right, bottom - int(t)))
-
captcha_file_name = ‘./zhilian_captcha_{}_{}.png‘.format(self.account[‘user_id‘], name)
-
captcha.save(captcha_file_name)
-
return captcha, captcha_file_name
-
-
-
-
-
-
-
slider = driver.find_element_by_xpath(‘//div[@class="geetest_slider_button"]‘)
-
-
-
def get_gap(captcha_file_name):
-
-
-
-
-
-
-
res = self.dama2.decode_captcha(6137, captcha_file_name)
-
-
-
-
-
def calculate_tracks(distance):
-
def generate_rand(n, sum_v):
-
Vector = [random.randint(1, 3) for _ in range(n)]
-
Vector = [int(i / sum(Vector) * sum_v) for i in Vector]
-
-
res = sum_v - sum(Vector)
-
-
Vector[random.randint(0, n - 1)] += 1
-
return [0 - i for i in Vector]
-
-
back_dis = random.randint(16, 26)
-
-
-
-
-
-
-
-
while current < distance:
-
-
-
-
-
-
s = v * t + 0.5 * a * (t ** 2)
-
-
-
forward_tracks.append(round(s))
-
-
-
back_tracks = generate_rand(15, back_dis)
-
return {‘forward_tracks‘: forward_tracks, ‘back_tracks‘: back_tracks}
-
-
def move_to_gap(slider, tracks):
-
-
-
-
-
-
-
ActionChains(driver).click_and_hold(slider).perform()
-
-
-
for i in tracks[‘forward_tracks‘]:
-
ActionChains(driver).move_by_offset(i, 0).perform()
-
-
-
-
for i in tracks[‘back_tracks‘]:
-
ActionChains(driver).move_by_offset(i, 0).perform()
-
-
-
-
random_sc = random.randint(3, 8)
-
ActionChains(driver).move_by_offset(0-random_sc, 0).perform()
-
-
ActionChains(driver).move_by_offset(random_sc, 0).perform()
-
-
-
-
ActionChains(driver).release().perform()
-
-
-
-
-
-
print(‘get_geetest_image‘)
-
captcha_obj, captcha_file_name = get_geetest_image(‘2‘)
-
gap = get_gap(captcha_file_name)
-
l.info(‘缺口位置:{}‘.format(gap))
-
print(‘缺口位置:{}‘.format(gap))
-
-
-
-
-
track = calculate_tracks(gap)
-
l.info(‘滑动轨迹:{}‘.format(track))
-
print(‘滑动轨迹:{}‘.format(track))
-
-
-
move_to_gap(slider, track)
-
driver.save_screenshot(‘./zhilian_capresult_{}_{}.png‘.format(self.account[‘user_id‘], retry))
-
-
-
-
result = driver.find_element_by_xpath(‘//div[@class="geetest_result_title"]‘).get_attribute(‘textContent‘)
-
-
-
-
-
-
-
l.info(f‘{retry}/{max_retry} crack geetest.‘)
-
-
l.info("max retry reached, return False")
-
-
-
if ‘秒的速度超过‘ in success or ‘passport.lagou.com/login/login‘ not in driver.current_url:
-
l.info("crack succeeded!")
-
print("crack succeeded!")
-
-
elif ‘拖动滑块将悬浮图像正确拼合‘ in success:
-
-
l.info("crack failed, retry:{}/{}".format(retry, max_retry))
-
driver.find_element_by_xpath(‘//a[@class="geetest_refresh_1"]‘).click()
-
-
-
-
-
-
l.info("crack failed, retry:{}/{}".format(retry, max_retry))
-
来源:https://blog.csdn.net/wenq_yang/article/details/81258932