标签:one ace exit team height min username replace print
from selenium import webdriver import os import time import pymysql from bs4 import BeautifulSoup import requests import threading from selenium.webdriver.common.keys import Keys h, pt, u, p, db = ‘localhost‘, 3306, ‘root‘, ‘‘, ‘qqzone‘ def mysql_fetch(sql, res_type=‘tuple‘): global h, pt, u, p, db try: conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset=‘utf8mb4‘) except Exception as e: print(e) return () if res_type == ‘dic‘: cursor = conn.cursor(pymysql.cursors.DictCursor) else: cursor = conn.cursor() cursor.execute(sql) conn.commit() r = cursor.fetchall() cursor.close() conn.close() return r def mysql_write(sql): global h, pt, u, p, db try: conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset=‘utf8mb4‘) except Exception as e: print(e) return 1 cursor = conn.cursor() cursor.execute(sql) conn.commit() cursor.close() conn.close() return 0 # D:\pyaction\toutiao_team_win img_dir = ‘C:/Users/Administrator/Desktop/1/toutiao_team/dl_img/‘ img_dir = ‘D:/pyaction/toutiao_team_win/dl_img/‘ import random def spider_webimg_dl_return_local_img_path(img_dir, img_url, media_type=‘img‘, local_default=‘default.DONOT_REMOVE.png‘): r = ‘%s%s‘ % (img_dir, local_default) if media_type == ‘img‘: try: req = requests.get(img_url) time.sleep(3) if req.status_code != 200: print(‘-!=200‘) return r time.sleep(30) print(img_url) bytes = req._content # r = ‘%s%s%s%s%s‘ % ( # img_dir, time.strftime(‘%Y%m%d%H%M%S‘, time.localtime(time.time())), str(threading.get_ident()), # img_url.split(‘!/c‘)[0].split(‘/‘)[-1].replace(‘*‘, ‘_‘), ‘.png‘) # print(r) r = ‘%s%s%s%s%s‘ % ( img_dir, time.strftime(‘%Y%m%d%H%M%S‘, time.localtime(time.time())), str(threading.get_ident()), str(random.randrange(1000, 9999)), ‘.png‘) print(r) filter_l = [‘&‘, ‘=‘, ‘?‘, ‘-‘] for fi in filter_l: r.replace(fi, ‘‘) if bytes != 0: with open(r, ‘wb‘)as f: f.write(bytes) except Exception as e: print(e) elif media_type == ‘mp4‘: try: time.sleep(30) print(img_url) r = ‘%s%s%s%s%s‘ % ( img_dir, time.strftime(‘%Y%m%d%H%M%S‘, time.localtime(time.time())), str(threading.get_ident()), img_url.split(‘.mp4?‘)[0].split(‘/‘)[-1].replace(‘*‘, ‘_‘), ‘.mp4‘) print(r) req = requests.get(img_url) time.sleep(3) if req.status_code != 200: print(‘-!=200‘) return ‘‘ bytes = req._content time.sleep(210) if bytes != 0: with open(r, ‘wb‘)as f: f.write(bytes) except Exception as e: r = ‘‘ print(e) return r driver = webdriver.Chrome() myurl = ‘https://weibo.com/u/1779073702‘ myurl = ‘https://weibo.com/u/1779073702?is_all=1‘ myurl = ‘https://weibo.com/login.php‘ driver.get(myurl) # 此处需要内存和cpu空余,能够支持dom解析和处理重js页面 time.sleep(10) driver.refresh() time.sleep(10) # xp = ‘//*[@id="pl_common_top"]/div/div/div[3]/div[2]/ul/li[3]/a‘ # try: # # 此处解决了不能点击该元素报错,第三次尝试ok # driver.find_element_by_xpath(xp).click() # time.sleep(40) # # except Exception as e: # print(e) # os._exit(1024) js = ‘document.getElementsByClassName("username")[1].childNodes[0].value="p.cn";‘ ‘document.getElementsByClassName("password")[0].childNodes[0].value="welcome";‘ ‘document.getElementsByClassName("form_login_register")[0].childNodes[5].childNodes[0].click();‘ js = ‘document.getElementById("loginname").value="p.cn";‘ ‘document.getElementsByName("password")[0].value="welcome";‘ ‘document.getElementsByClassName("W_btn_a btn_32px")[0].click();‘ try: driver.execute_script(js) time.sleep(30) except Exception as e: print(e) os._exit(1024) time.sleep(random.randrange(3, 6)) for isc in range(2): # 今日头条 非iframe 无限次 # qq空间说说 iframe 固定20条 2次报错 # memory cpu time.sleep(1) js = ‘window.scrollTo(0,document.body.scrollHeight)‘ driver.execute_script(js) while True: sql = ‘SELECT id, words,imgurls,time_site FROM qqzoneshuoshuo WHERE lefttimes_weibo>0 AND INSTR(imgurls,".mp4")=0 AND id IN ( SELECT MAX(id) FROM qqzoneshuoshuo GROUP BY id_site) ORDER BY time_script DESC,id ASC ;‘ res_content = mysql_fetch(sql, ‘dic‘) print(res_content) if len(res_content) == 0: continue comment_l_sq = 0 for i in res_content[0:]: # id, words,imgurls,time_site dbid, content, img_list, time_site = i[‘id‘], i[‘words‘], i[‘imgurls‘], i[‘time_site‘] if 1 > 13: if ‘天‘ in time_site or ‘月‘ in time_site: continue lh = int(time.strftime("%H", time.localtime())) if lh - int(time_site.split(‘:‘)[0]) >= 24: continue if ‘早安‘ in content and lh >= 11: continue elif ‘晚安‘ in content and lh <= 20: continue time.sleep(10) # because another element <div> obscures it content = content.split(‘展开全文‘)[0].split(‘上传‘)[0].split(‘浏览‘)[0].replace(‘"‘, ‘ ‘).replace("‘", ‘ ‘) content = content.replace(‘"‘, ‘ ‘).replace("‘", ‘ ‘).replace(‘\n‘, ‘ ‘) filter_l = [‘密龄素材空间‘, ‘评论‘] for fi in filter_l: content = content.replace(fi, ‘ ‘) # js = ‘document.getElementsByTagName("textarea")[0].value="{}新年快乐-密龄白藜芦醇DOAEZ朵韵诗-阿静艾卡尔@ http://www.icarei.cn期待与你携手前行!!";‘.format( # content) # js = ‘document.getElementsByTagName("textarea")[0].value="{}白藜芦醇-燕窝美妆-密龄DOAEZ朵韵诗-阿静艾卡尔@ http://www.icarei.cn期待与你携手前行!!";‘.format( # content) js = ‘document.getElementsByTagName("textarea")[0].value="{}南京同仁堂密龄白藜芦醇-燕窝美妆-DOAEZ朵韵诗-阿静@ http://www.icarei.cn期待与你携手前行!!";‘.format( content) # https://item.taobao.com/item.htm?id=567557180229 ad_url_l = [‘567557180229‘, ‘565875313425‘, ‘545159271159‘, ‘546048319163‘] # 补水喷雾 手链 面膜 防晒喷雾 ad_url_l = [‘567557180229‘, ‘565875313425‘, ‘545159271159‘, ‘546048319163‘, ‘567693004121‘] # 补水喷雾 手链 面膜 防晒喷雾 ad_this = ad_url_l[int(time.time()) % len(ad_url_l)] ad_url = ‘https://item.taobao.com/item.htm?id={}‘.format(ad_this) # js = ‘document.getElementsByTagName("textarea")[0].value="{}#话题# 磁石娃娃 南京同仁堂密龄白藜芦醇-燕窝美妆-DOAEZ朵韵诗-阿静@ {}!!";‘.format( # js = ‘document.getElementsByTagName("textarea")[0].value="{}#doaez朵韵诗磁石娃娃燕窝润颜面膜# 磁石娃娃 南京同仁堂密龄白藜芦醇-燕窝美妆-DOAEZ朵韵诗-阿静@ {}!!";‘.format( # content, ad_url) # print(js) hot_topic_list_url = ‘https://weibo.com/u/1779073702/home‘ js = ‘window.location.href="{}"‘.format(hot_topic_list_url) driver.execute_script(js) time.sleep(10) time.sleep(10) hot_url_l = [i.get_attribute(‘href‘) for i in driver.find_elements_by_css_selector(‘li>p>a‘)] try: hot_url_l_index = random.choice([int(time.time()) % len(hot_url_l), 0, 1]) except Exception as e: print(e) continue js = ‘window.location.href="{}"‘.format(hot_url_l[hot_url_l_index]) driver.execute_script(js) time.sleep(10) driver.refresh() time.sleep(random.randrange(3, 6)) # for isc in range(2): # # 今日头条 非iframe 无限次 # # qq空间说说 iframe 固定20条 2次报错 # # memory cpu # time.sleep(1) # js = ‘window.scrollTo(0,document.body.scrollHeight)‘ # driver.execute_script(js) # driver.refresh() # time.sleep(10) time.sleep(15) comment_l = driver.find_elements_by_css_selector(‘.WB_row_line>li:nth-child(3)>a>span>span>span‘) ele_clickable = False for isc in range(20): time.sleep(1) js = ‘window.scrollTo(0,{})‘.format(isc * 50) driver.execute_script(js) time.sleep(2) try: # comment_l_sq = random.choice([0, 0, int(time.time()) % len(comment_l)]) comment_l_sq = random.choice([0, 0, 1, 1, 1, 2, 2, 3]) comment_l[comment_l_sq].click() comment_l_sq += 1 comment_l_sq = 0 ele_clickable = True break except Exception as e: print(e) continue if not ele_clickable: continue time.sleep(12) # ‘.WB_publish>div>textarea‘ mytopic, myname = ‘ #doaez朵韵诗磁石娃娃燕窝润颜面膜# ‘, ‘南京同仁堂密龄白藜芦醇-燕窝美妆-DOAEZ朵韵诗-阿静@ ‘ mystr = ‘{}{}{}{}‘.format(mytopic, myname, content, ad_url) js = ‘document.getElementsByTagName("textarea")[1].value="{}"‘.format(mystr) try: # 需要键盘事件 - response driver.find_elements_by_tag_name("textarea")[1].send_keys(Keys.SPACE) time.sleep(2) driver.find_elements_by_tag_name("textarea")[1].send_keys(Keys.BACK_SPACE) driver.execute_script(js) time.sleep(2) except Exception as e: print(e) continue js = "document.getElementsByName(‘forward‘)[0].click();" driver.execute_script(js) time.sleep(2) js = "document.getElementsByClassName(‘btn W_fr‘)[0].childNodes[0].click()" driver.execute_script(js) time.sleep(2) driver.refresh() # # 先填充文本:动态d # # om # # for iimg in range(2): # js = ‘document.getElementsByClassName("ficon_image")[0].click();‘ # driver.execute_script(js) # time.sleep(2) # upload = driver.find_element_by_id(‘pic_upload‘).find_element_by_tag_name(‘input‘) # img_url_list = img_list.split(‘,‘) # try: # # MAX=8 # for img_url in img_url_list: # if ‘.gif‘ in img_url or ‘qzonestyle‘ in img_url: # continue # local_img_path = spider_webimg_dl_return_local_img_path(img_dir, img_url, # local_default=‘default.DONOT_REMOVE.png‘) # print(local_img_path) # time.sleep(2) # upload.send_keys(local_img_path) # except Exception as e: # print(e) # try: # js = ‘document.getElementsByClassName("W_layer_close")[0].click();document.getElementsByClassName("func")[0].childNodes[3].click();‘ # driver.execute_script(js) # time.sleep(10) # driver.refresh() # except: # pass # # continue # # time.sleep(5) # js = ‘document.getElementsByClassName("W_layer_close")[0].click();document.getElementsByClassName("func")[0].childNodes[3].click();‘ # # js = ‘document.getElementsByTagName("textarea")[0].click();document.getElementsByClassName("func")[0].childNodes[3].click();‘ # # js = ‘document.getElementsByClassName("func")[0].childNodes[3].click();‘ # driver.execute_script(js) # # time.sleep(10) sql = ‘UPDATE qqzoneshuoshuo SET lefttimes_weibo=lefttimes_weibo-1 WHERE id={}‘.format(dbid) print(sql) try: mysql_write(sql) except: pass driver.refresh() time.sleep(random.randint(60 * 0.5, 60 * 1)) # 15min后刷新,循环存入数据,期间定时刷新,维持页面 for si in range(15): try: driver.refresh() time.sleep(60) time.sleep(random.randint(0, 10)) print(si) except Exception as e: print(145, e)
标签:one ace exit team height min username replace print