码迷,mamicode.com
首页 > 其他好文 > 详细

根据以前的脚本改的

时间:2015-04-03 20:48:47      阅读:133      评论:0      收藏:0      [点我收藏+]

标签:

也就是从百度抓点对公司有用的问题,然后能及时了解信息。

没有注释 ,太简单了,一看就懂哈。

只是作写烂的地方是,没有直接从结果里发邮件,而是用多线程生成文件,然后通过邮件发这些文件里的内容。。

UI没有,直接干!

主要涉及一个转码,可以直接搜索

#!/usr/bin/python
# coding: UTF-8

import os,sys,time,commands
import urllib
import urllib2
import string
from bs4 import BeautifulSoup
import threading
import smtplib
from email.mime.text import MIMEText

mail_host = smtp.x.x.com
mail_user = xx@xx.xx
mail_pwd = xxxx

keywordList = []

f1= open("ZhidaoKeyword",r)
for line in f1:
    line = line.strip(\n)
    keywordList.append(line)
f1.close()

def send_email( content, mailto, get_sub ):
    print Setting MIMEText
    msg = MIMEText( content.encode(utf8), _subtype = html, _charset = utf8)
    msg[From] = mail_user
    msg[Subject] = u%s % get_sub
    msg[To] = ",".join( mailto )

    try:
        print connecting , mail_host
        s = smtplib.SMTP_SSL( mail_host, 465 )

        #s.connect(mail_host)

        print login to mail_host
        s.login(mail_user, mail_pwd )

        print send email
        s.sendmail(mail_user, mailto, msg.as_string())

        print close the connection between the mail server
        s.close()
    except Exception as e:
        print Exception: , e

class SearchZhidao(threading.Thread):
    def __init__(self,keywordList):
        threading.Thread.__init__(self)
        self.keywordList = keywordList
        

    def SearchZhidao(self):
        for keyword in self.keywordList:
            try:
        str = keyword.encode(gb2312)
        str_dic = {word:str}
        encode_keyword = urllib.urlencode(str_dic)
        url = "http://zhidao.baidu.com/browse?" + encode_keyword + "&pn=0&cid=0&lm=8960"
            htmlpage = urllib2.urlopen(url).read()
        htmlpage = unicode(htmlpage, "gb2312").encode("utf8")
            soup = BeautifulSoup(htmlpage)
                for result_li in soup.findAll("li", {"class": "question-item"}):
            question_time = result_li.find("div", {"class": "question-time"})
            q_time = question_time.get_text().split()
            if len(q_time) > 1 and u小时 in q_time[1]: 
                        html_output = "<tr><td>" + keyword  + "</td><td>"
            a_click =  result_li.find("a")
            html_output += "<a class=‘question-title‘ href=‘http://zhidao.baidu.com" + a_click.get("href") + "‘ target=‘_blank‘>"
            html_output += a_click.renderContents() + "</a></td>"
            html_output += "<td>" + question_time.get_text() + "</td></tr>"
            file_object.write(html_output)
            else:
            pass
            except Exception as e:
                print e,keyword
                continue
        

    def run(self):
        self.SearchZhidao()

        

if __name__ == "__main__":
   
    print "start the programe...."
    SearchZhidaoThreads = []
    file_object = open(zhidao_html_mail.html, w)
    file_object.write("<!DOCTYPE html><html><head><meta charset=‘UTF-8‘><title>Seo</title></head><body><table>")
    for i in range(20):
        t = SearchZhidao(keywordList[((len(keywordList)+19)/20) * i:((len(keywordList)+19)/20) * (i+1)])
        SearchZhidaoThreads.append(t)

    for i in range(len(SearchZhidaoThreads)):
        SearchZhidaoThreads[i].start()
    for i in range(len(SearchZhidaoThreads)):
        SearchZhidaoThreads[i].join()
    file_object.write("</table></body></html>")
    file_object.close()
    print "finished this job!"

    html_mail = ""
    f2= open("zhidao_html_mail.html",r)
    for line in f2:
        html_mail += line
    f2.close()
    time_title = curTime = time.strftime("%Y-%m-%d %X", time.localtime(time.time()))
    to_list = [ xxx@xx.xx, xx@x.xxx ]
    send_email( html_mail, to_list, 百度知道最近问题+ time_title )

邮件输出样子:

技术分享

根据以前的脚本改的

标签:

原文地址:http://www.cnblogs.com/aguncn/p/4390893.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!