标签:sub readlines action logs etc charset 微博 html getc
1 #-*-coding:utf8-*- 2 3 import smtplib 4 from email.mime.text import MIMEText 5 import requests 6 from lxml import etree 7 import os 8 import time 9 import sys 10 reload(sys) 11 sys.setdefaultencoding(‘utf-8‘) 12 13 14 15 class mailhelper(object): 16 ‘‘‘ 17 这个类实现发送邮件的功能 18 ‘‘‘ 19 def __init__(self): 20 21 self.mail_host="smtp.xxxx.com" #设置服务器 22 self.mail_user="xxxx" #用户名 23 self.mail_pass="xxxx" #密码 24 self.mail_postfix="xxxx.com" #发件箱的后缀 25 26 def send_mail(self,to_list,sub,content): 27 me="xxoohelper"+"<"+self.mail_user+"@"+self.mail_postfix+">" 28 msg = MIMEText(content,_subtype=‘plain‘,_charset=‘utf-8‘) 29 msg[‘Subject‘] = sub 30 msg[‘From‘] = me 31 msg[‘To‘] = ";".join(to_list) 32 try: 33 server = smtplib.SMTP() 34 server.connect(self.mail_host) 35 server.login(self.mail_user,self.mail_pass) 36 server.sendmail(me, to_list, msg.as_string()) 37 server.close() 38 return True 39 except Exception, e: 40 print str(e) 41 return False 42 43 class xxoohelper(object): 44 ‘‘‘ 45 这个类实现将爬取微博第一条内容 46 ‘‘‘ 47 def __init__(self): 48 self.url = ‘http://weibo.cn/u/xxxxxxx‘ #请输入准备抓取的微博地址 49 self.url_login = ‘https://login.weibo.cn/login/‘ 50 self.new_url = self.url_login 51 52 def getSource(self): 53 html = requests.get(self.url).content 54 return html 55 56 def getData(self,html): 57 selector = etree.HTML(html) 58 password = selector.xpath(‘//input[@type="password"]/@name‘)[0] 59 vk = selector.xpath(‘//input[@name="vk"]/@value‘)[0] 60 action = selector.xpath(‘//form[@method="post"]/@action‘)[0] 61 self.new_url = self.url_login + action 62 data = { 63 ‘mobile‘ : ‘xxxxx@xxx.com‘, 64 password : ‘xxxxxx‘, 65 ‘remember‘ : ‘on‘, 66 ‘backURL‘ : ‘http://weibo.cn/u/xxxxxx‘, #此处请修改为微博地址 67 ‘backTitle‘ : u‘微博‘, 68 ‘tryCount‘ : ‘‘, 69 ‘vk‘ : vk, 70 ‘submit‘ : u‘登录‘ 71 } 72 return data 73 74 def getContent(self,data): 75 newhtml = requests.post(self.new_url,data=data).content 76 new_selector = etree.HTML(newhtml) 77 content = new_selector.xpath(‘//span[@class="ctt"]‘) 78 newcontent = unicode(content[2].xpath(‘string(.)‘)).replace(‘http://‘,‘‘) 79 sendtime = new_selector.xpath(‘//span[@class="ct"]/text()‘)[0] 80 sendtext = newcontent + sendtime 81 return sendtext 82 83 def tosave(self,text): 84 f= open(‘weibo.txt‘,‘a‘) 85 f.write(text + ‘\n‘) 86 f.close() 87 88 def tocheck(self,data): 89 if not os.path.exists(‘weibo.txt‘): 90 return True 91 else: 92 f = open(‘weibo.txt‘, ‘r‘) 93 existweibo = f.readlines() 94 if data + ‘\n‘ in existweibo: 95 return False 96 else: 97 return True 98 99 if __name__ == ‘__main__‘: 100 mailto_list=[‘xxxxx@qq.com‘] #此处填写接收邮件的邮箱 101 helper = xxoohelper() 102 while True: 103 source = helper.getSource() 104 data = helper.getData(source) 105 content = helper.getContent(data) 106 if helper.tocheck(content): 107 if mailhelper().send_mail(mailto_list,u"女神更新啦",content): 108 print u"发送成功" 109 else: 110 print u"发送失败" 111 helper.tosave(content) 112 print content 113 else: 114 print u‘pass‘ 115 time.sleep(30)
标签:sub readlines action logs etc charset 微博 html getc
原文地址:http://www.cnblogs.com/themost/p/7101651.html