码迷,mamicode.com
首页 > 其他好文 > 详细

代码与性格

时间:2018-04-20 23:33:19      阅读:211      评论:0      收藏:0      [点我收藏+]

标签:one   ace   exit   team   height   min   username   replace   print   

 

 

技术分享图片

 

 

技术分享图片

 

从代码里能分析出一个人的处事的积极与消极,妥协退让与迎面直击

 

 

from selenium import webdriver
import os
import time
import pymysql
from bs4 import BeautifulSoup
import requests
import threading
from selenium.webdriver.common.keys import Keys

h, pt, u, p, db = ‘localhost‘, 3306, ‘root‘, ‘‘, ‘qqzone‘


def mysql_fetch(sql, res_type=‘tuple‘):
    global h, pt, u, p, db
    try:
        conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset=‘utf8mb4‘)
    except Exception as e:
        print(e)
        return ()
    if res_type == ‘dic‘:
        cursor = conn.cursor(pymysql.cursors.DictCursor)
    else:
        cursor = conn.cursor()
    cursor.execute(sql)
    conn.commit()
    r = cursor.fetchall()
    cursor.close()
    conn.close()
    return r


def mysql_write(sql):
    global h, pt, u, p, db
    try:
        conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset=‘utf8mb4‘)
    except Exception as e:
        print(e)
        return 1
    cursor = conn.cursor()
    cursor.execute(sql)
    conn.commit()
    cursor.close()
    conn.close()
    return 0


# D:\pyaction\toutiao_team_win

img_dir = ‘C:/Users/Administrator/Desktop/1/toutiao_team/dl_img/‘
img_dir = ‘D:/pyaction/toutiao_team_win/dl_img/‘

import random


def spider_webimg_dl_return_local_img_path(img_dir, img_url, media_type=‘img‘,
                                           local_default=‘default.DONOT_REMOVE.png‘):
    r = ‘%s%s‘ % (img_dir, local_default)

    if media_type == ‘img‘:
        try:
            req = requests.get(img_url)
            time.sleep(3)
            if req.status_code != 200:
                print(‘-!=200‘)
                return r
            time.sleep(30)
            print(img_url)
            bytes = req._content
            # r = ‘%s%s%s%s%s‘ % (
            #     img_dir, time.strftime(‘%Y%m%d%H%M%S‘, time.localtime(time.time())), str(threading.get_ident()),
            #     img_url.split(‘!/c‘)[0].split(‘/‘)[-1].replace(‘*‘, ‘_‘), ‘.png‘)
            # print(r)

            r = ‘%s%s%s%s%s‘ % (
                img_dir, time.strftime(‘%Y%m%d%H%M%S‘, time.localtime(time.time())), str(threading.get_ident()),
                str(random.randrange(1000, 9999)), ‘.png‘)
            print(r)
            filter_l = [‘&‘, ‘=‘, ‘?‘, ‘-‘]
            for fi in filter_l:
                r.replace(fi, ‘‘)
            if bytes != 0:
                with open(r, ‘wb‘)as f:
                    f.write(bytes)
        except Exception as e:
            print(e)
    elif media_type == ‘mp4‘:
        try:
            time.sleep(30)

            print(img_url)
            r = ‘%s%s%s%s%s‘ % (
                img_dir, time.strftime(‘%Y%m%d%H%M%S‘, time.localtime(time.time())), str(threading.get_ident()),
                img_url.split(‘.mp4?‘)[0].split(‘/‘)[-1].replace(‘*‘, ‘_‘), ‘.mp4‘)
            print(r)
            req = requests.get(img_url)
            time.sleep(3)
            if req.status_code != 200:
                print(‘-!=200‘)
                return ‘‘
            bytes = req._content
            time.sleep(210)
            if bytes != 0:
                with open(r, ‘wb‘)as f:
                    f.write(bytes)
        except Exception as e:
            r = ‘‘
            print(e)

    return r


driver = webdriver.Chrome()
myurl = ‘https://weibo.com/u/1779073702‘
myurl = ‘https://weibo.com/u/1779073702?is_all=1‘
myurl = ‘https://weibo.com/login.php‘
driver.get(myurl)
# 此处需要内存和cpu空余,能够支持dom解析和处理重js页面
time.sleep(10)
driver.refresh()
time.sleep(10)
# xp = ‘//*[@id="pl_common_top"]/div/div/div[3]/div[2]/ul/li[3]/a‘
# try:
#     # 此处解决了不能点击该元素报错,第三次尝试ok
#     driver.find_element_by_xpath(xp).click()
#     time.sleep(40)
#
# except Exception as e:
#     print(e)
#     os._exit(1024)


js = ‘document.getElementsByClassName("username")[1].childNodes[0].value="p.cn";‘      ‘document.getElementsByClassName("password")[0].childNodes[0].value="welcome";‘      ‘document.getElementsByClassName("form_login_register")[0].childNodes[5].childNodes[0].click();‘
js = ‘document.getElementById("loginname").value="p.cn";‘      ‘document.getElementsByName("password")[0].value="welcome";‘      ‘document.getElementsByClassName("W_btn_a btn_32px")[0].click();‘
try:
    driver.execute_script(js)
    time.sleep(30)
except Exception as e:
    print(e)
    os._exit(1024)

time.sleep(random.randrange(3, 6))
for isc in range(2):
    # 今日头条 非iframe  无限次
    # qq空间说说 iframe 固定20条 2次报错
    # memory cpu
    time.sleep(1)
    js = ‘window.scrollTo(0,document.body.scrollHeight)‘
    driver.execute_script(js)

while True:
    sql = ‘SELECT id, words,imgurls,time_site FROM qqzoneshuoshuo WHERE lefttimes_weibo>0 AND  INSTR(imgurls,".mp4")=0 AND id IN ( SELECT MAX(id) FROM qqzoneshuoshuo GROUP BY id_site) ORDER BY time_script DESC,id ASC ;‘
    res_content = mysql_fetch(sql, ‘dic‘)
    print(res_content)
    if len(res_content) == 0:
        continue
    comment_l_sq = 0
    for i in res_content[0:]:
        # id, words,imgurls,time_site
        dbid, content, img_list, time_site = i[‘id‘], i[‘words‘], i[‘imgurls‘], i[‘time_site‘]
        if 1 > 13:
            if ‘天‘ in time_site or ‘月‘ in time_site:
                continue
            lh = int(time.strftime("%H", time.localtime()))
            if lh - int(time_site.split(‘:‘)[0]) >= 24:
                continue
            if ‘早安‘ in content and lh >= 11:
                continue
            elif ‘晚安‘ in content and lh <= 20:
                continue

        time.sleep(10)
        # because another element <div> obscures it
        content = content.split(‘展开全文‘)[0].split(‘上传‘)[0].split(‘浏览‘)[0].replace(‘"‘, ‘ ‘).replace("‘", ‘ ‘)
        content = content.replace(‘"‘, ‘ ‘).replace("‘", ‘ ‘).replace(‘\n‘, ‘ ‘)
        filter_l = [‘密龄素材空间‘, ‘评论‘]

        for fi in filter_l:
            content = content.replace(fi, ‘ ‘)
        # js = ‘document.getElementsByTagName("textarea")[0].value="{}新年快乐-密龄白藜芦醇DOAEZ朵韵诗-阿静艾卡尔@ http://www.icarei.cn期待与你携手前行!!";‘.format(
        #     content)
        # js = ‘document.getElementsByTagName("textarea")[0].value="{}白藜芦醇-燕窝美妆-密龄DOAEZ朵韵诗-阿静艾卡尔@ http://www.icarei.cn期待与你携手前行!!";‘.format(
        #     content)
        js = ‘document.getElementsByTagName("textarea")[0].value="{}南京同仁堂密龄白藜芦醇-燕窝美妆-DOAEZ朵韵诗-阿静@ http://www.icarei.cn期待与你携手前行!!";‘.format(
            content)
        # https://item.taobao.com/item.htm?id=567557180229
        ad_url_l = [‘567557180229‘, ‘565875313425‘, ‘545159271159‘, ‘546048319163‘]  # 补水喷雾 手链 面膜  防晒喷雾
        ad_url_l = [‘567557180229‘, ‘565875313425‘, ‘545159271159‘, ‘546048319163‘, ‘567693004121‘]  # 补水喷雾 手链 面膜  防晒喷雾
        ad_this = ad_url_l[int(time.time()) % len(ad_url_l)]
        ad_url = ‘https://item.taobao.com/item.htm?id={}‘.format(ad_this)
        # js = ‘document.getElementsByTagName("textarea")[0].value="{}#话题# 磁石娃娃 南京同仁堂密龄白藜芦醇-燕窝美妆-DOAEZ朵韵诗-阿静@ {}!!";‘.format(
        # js = ‘document.getElementsByTagName("textarea")[0].value="{}#doaez朵韵诗磁石娃娃燕窝润颜面膜# 磁石娃娃 南京同仁堂密龄白藜芦醇-燕窝美妆-DOAEZ朵韵诗-阿静@ {}!!";‘.format(
        #     content, ad_url)
        # print(js)


        hot_topic_list_url = ‘https://weibo.com/u/1779073702/home‘
        js = ‘window.location.href="{}"‘.format(hot_topic_list_url)
        driver.execute_script(js)
        time.sleep(10)

        time.sleep(10)
        hot_url_l = [i.get_attribute(‘href‘) for i in driver.find_elements_by_css_selector(‘li>p>a‘)]

        try:
            hot_url_l_index = random.choice([int(time.time()) % len(hot_url_l), 0, 1])
        except Exception as e:
            print(e)
            continue

        js = ‘window.location.href="{}"‘.format(hot_url_l[hot_url_l_index])
        driver.execute_script(js)
        time.sleep(10)
        driver.refresh()
        time.sleep(random.randrange(3, 6))
        # for isc in range(2):
        #     # 今日头条 非iframe  无限次
        #     # qq空间说说 iframe 固定20条 2次报错
        #     # memory cpu
        #     time.sleep(1)
        #     js = ‘window.scrollTo(0,document.body.scrollHeight)‘
        #     driver.execute_script(js)
        #     driver.refresh()

        # time.sleep(10)
        time.sleep(15)
        comment_l = driver.find_elements_by_css_selector(‘.WB_row_line>li:nth-child(3)>a>span>span>span‘)

        ele_clickable = False
        for isc in range(20):
            time.sleep(1)
            js = ‘window.scrollTo(0,{})‘.format(isc * 50)
            driver.execute_script(js)
            time.sleep(2)

            try:
                #  comment_l_sq = random.choice([0, 0, int(time.time()) % len(comment_l)])
                comment_l_sq = random.choice([0, 0, 1, 1, 1, 2, 2, 3])
                comment_l[comment_l_sq].click()
                comment_l_sq += 1
                comment_l_sq = 0
                ele_clickable = True
                break
            except Exception as e:
                print(e)
                continue
        if not ele_clickable:
            continue

        time.sleep(12)
        # ‘.WB_publish>div>textarea‘
        mytopic, myname = ‘ #doaez朵韵诗磁石娃娃燕窝润颜面膜# ‘, ‘南京同仁堂密龄白藜芦醇-燕窝美妆-DOAEZ朵韵诗-阿静@ ‘
        mystr = ‘{}{}{}{}‘.format(mytopic, myname, content, ad_url)
        js = ‘document.getElementsByTagName("textarea")[1].value="{}"‘.format(mystr)
        try:
            # 需要键盘事件 - response
            driver.find_elements_by_tag_name("textarea")[1].send_keys(Keys.SPACE)
            time.sleep(2)
            driver.find_elements_by_tag_name("textarea")[1].send_keys(Keys.BACK_SPACE)
            driver.execute_script(js)
            time.sleep(2)
        except Exception as e:
            print(e)
            continue

        js = "document.getElementsByName(‘forward‘)[0].click();"
        driver.execute_script(js)
        time.sleep(2)
        js = "document.getElementsByClassName(‘btn W_fr‘)[0].childNodes[0].click()"
        driver.execute_script(js)
        time.sleep(2)

        driver.refresh()
        # # 先填充文本:动态d
        # # om
        #
        # for iimg in range(2):
        #     js = ‘document.getElementsByClassName("ficon_image")[0].click();‘
        #     driver.execute_script(js)
        #     time.sleep(2)
        # upload = driver.find_element_by_id(‘pic_upload‘).find_element_by_tag_name(‘input‘)
        # img_url_list = img_list.split(‘,‘)
        # try:
        #     # MAX=8
        #     for img_url in img_url_list:
        #         if ‘.gif‘ in img_url or ‘qzonestyle‘ in img_url:
        #             continue
        #         local_img_path = spider_webimg_dl_return_local_img_path(img_dir, img_url,
        #                                                                 local_default=‘default.DONOT_REMOVE.png‘)
        #         print(local_img_path)
        #         time.sleep(2)
        #         upload.send_keys(local_img_path)
        # except Exception as e:
        #     print(e)
        #     try:
        #         js = ‘document.getElementsByClassName("W_layer_close")[0].click();document.getElementsByClassName("func")[0].childNodes[3].click();‘
        #         driver.execute_script(js)
        #         time.sleep(10)
        #         driver.refresh()
        #     except:
        #         pass
        #
        #     continue
        #
        # time.sleep(5)
        # js = ‘document.getElementsByClassName("W_layer_close")[0].click();document.getElementsByClassName("func")[0].childNodes[3].click();‘
        # # js = ‘document.getElementsByTagName("textarea")[0].click();document.getElementsByClassName("func")[0].childNodes[3].click();‘
        # # js = ‘document.getElementsByClassName("func")[0].childNodes[3].click();‘
        # driver.execute_script(js)
        #
        # time.sleep(10)

        sql = ‘UPDATE qqzoneshuoshuo SET lefttimes_weibo=lefttimes_weibo-1 WHERE id={}‘.format(dbid)
        print(sql)
        try:
            mysql_write(sql)
        except:
            pass

        driver.refresh()

        time.sleep(random.randint(60 * 0.5, 60 * 1))
    # 15min后刷新,循环存入数据,期间定时刷新,维持页面
    for si in range(15):
        try:
            driver.refresh()
            time.sleep(60)
            time.sleep(random.randint(0, 10))
            print(si)
        except Exception as e:
            print(145, e)

  

 

代码与性格

标签:one   ace   exit   team   height   min   username   replace   print   

原文地址:https://www.cnblogs.com/yuanjiangw/p/8893738.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!