前言:
某天突发奇想,想要实现一个农历生日提醒。这个时候有了如下的大概思路:
获取农历 ——> 匹配 ——> 提醒
发现实现这个需求最主要的在获取农历这一块,后边两个不是什么巨大挑战。遂查阅一番资料,发现数据库可以实现阳历转阴历功能。在数据库这一块,我是在是小菜,看不懂存储过程。蛋疼!直接放弃改用其他思路。思考了许久,想到可以通过爬虫爬取现成的日历信息,存库再进行匹配。所以,现在思路如下:
爬取农历存库 ——> 匹配姓名表 ——> 提醒
一、爬取日历网站
刚开始是按照每年一个表的思路去爬,代码及注释如下:
首先是建表:
(文件名:reptile\CreateDb.py)
# -*- coding:utf-8 -*- import MySQLdb import os #将敏感信息写入环境变量 通过export去设置value MYSQLDB_HOST=os.environ.get(‘MYSQLDB_HOST‘) MYSQLDB_USER=os.environ.get(‘MYSQLDB_USER‘) MYSQLDB_PASSWD=os.environ.get(‘MYSQLDB_PASSWD‘) db = MySQLdb.connect( host=MYSQLDB_HOST, port=3306, user=MYSQLDB_USER, passwd = MYSQLDB_PASSWD, db=‘Calendar‘, charset = "utf8", ) cursor = db.cursor() #数据库插入 def Insert_mysql(sql): cursor.execute(sql) db.commit() #数据库查询 def Inquire_mysql(sql): cursor.execute(sql) request = cursor.fetchall() return request if __name__ == "__main__": st_sql = "show tables;" cursor.execute(st_sql) request = cursor.fetchall() for year in range(1900,2050): if year in request: print "[*]%d is in database!" else: print "[!]%s No in the Database,create now." % year ct_sql = """CREATE TABLE `%d` ( `DAY` date NOT NULL , `WEEK` varchar(50) NULL , `CONSTELLATON` varchar(50) NULL , `FESTIVAL` varchar(24) NULL , `YEAR` varchar(24) NULL , `LUNARCALENDAR` varchar(60) NULL , `LUNNAR` varchar(24) NULL , `ERSHIBASU` varchar(24) NULL , `JIAZI` varchar(24) NULL , PRIMARY KEY (`DAY`) ) ENGINE=InnoDB DEFAULT CHARACTER SET=utf8 ;""" % year try: cursor.execute(ct_sql) db.commit() print "[.] %s OK!" % year except: print "Error:unable to fecth data" db.close()
接着爬取:
(文件名:reptile\Spider.py)
#coding:utf-8 import re,urllib2 from bs4 import BeautifulSoup from urllib import urlencode class ListMark: def __init__(self,text): self.Text = text def base(self): items = self.Text.split(‘\n‘) return List def day(self): Base = self.base[1] return Base class SiteData: def __init__(self,url): self.Url = url def Data(self): #伪装头 values = {‘Accept‘:‘text/html,application/xhtml+xml,application/xml;q=0.9,image/w ebp,*/*;q=0.8‘, ‘Accept-Language‘:‘zh-CN,zh;q=0.8‘, ‘User-Agent‘:‘Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36‘ } SiteUrl = urllib2.Request(self.Url,headers=values) WebSite = urllib2.urlopen(SiteUrl) HtmlData = WebSite.read() WebSite.close() return HtmlData def Find(self): #BeautifulSoup筛选出我要的内容 HtmlData = self.Data() HtmlSoup = BeautifulSoup(HtmlData,"html5lib") FindALL = HtmlSoup.find_all("a") List=[] for i in FindALL: filter = i.encode(‘gb2312‘).replace(‘ ‘,‘ ‘).replace(‘;‘,‘‘) if "#" in filter: ReplaceText = filter.replace(‘<a href="#" onclick="return(false)" title="‘,‘‘) Extract2 = re.sub(r"\<.*\>","",ReplaceText) Extract1 = re.sub(r"[0-9]+\:[0-9]+","",Extract2) Extract = Extract1.replace(‘ 0-‘,‘‘).replace(‘">‘,‘‘) List.append(Extract) else: pass return List
开始爬取:
(文件名:reptile\Run.py)
#coding:utf-8 import time from CreateDb import db,cursor #将数据库操作的一些方法加进来 from Spider import SiteData #将爬虫操作的方法加进来 def ListMark(list): LIST = [] items = list.split(‘\n‘) List = [items[1],items[2],items[3],items[4],items[6],items[7],items[8]] for i in List: item,value = i.split(‘:‘) LIST.append(value) return LIST def get_item(text): item,value = text.split(‘ ‘) return item def get_value(text): item,value = text.split(‘ ‘) return value def UTF(text): TEXT = text.decode(‘gbk‘).encode(‘utf8‘) return TEXT if __name__ == ‘__main__‘: for year in range(1900,2050): sql = "select DAY from Calendar.%s order by DAY desc limit 1" % year cursor.execute(sql) request = cursor.fetchall() print "[!]process %s" % year try: Month = str(request).split(‘,‘)[1] except: Month = str(request) if Month == "()": print "[!]No data in Table! Crawling now.." for m in range(1,13): url = "http://www.nongli.com/item3/rili_%s-%s.htm" % (year,m) OpenSite = SiteData(url) returnList = OpenSite.Find() for i in returnList: List = ListMark(i) Day = get_item(List[0]) Week = get_value(List[0]) Constellation = List[1] Festival = List[2] Year = List[3][0:4] Lunarcalendar = List[3][4:] Lunnar = List[4] Ershibasu = List[5] Jiazi = List[6] Insert_sql = "insert into Calendar.%s values(‘%s‘,‘%s‘,‘%s‘,‘%s‘,‘%s‘,‘%s‘,‘%s‘,‘%s‘,‘%s‘)" % (year,Day,UTF(Week),UTF(Constellation),UTF(Festival), UTF(Year),UTF(Lunarcalendar),UTF(Lunnar),UTF(Ershibasu),UTF(Jiazi)) try: cursor.execute(Insert_sql) db.commit() print "[ ]Insert %s data ..." % Day except: print "[!]Insert %s Error!!!" % Day db.rollback() time.sleep(0.1) try: if Month == 12: print "[ ] Have data in Table." else: NodataMoth = range(int(Month)+1,13) for M in NodataMoth: url = "http://www.nongli.com/item3/rili_%s-%s.htm" % (year,M) OpenSite = SiteData(url) returnList = OpenSite.Find() for i in returnList: List = ListMark(i) Day = get_item(List[0]) Week = get_value(List[0]) Constellation = List[1] Festival = List[2] Year = List[3][0:4] Lunarcalendar = List[3][4:] Lunnar = List[4] Ershibasu = List[5] Jiazi = List[6] Insert_sql = "insert into Calendar.%s values(‘%s‘,‘%s‘,‘%s‘,‘%s‘,‘%s‘,‘%s‘,‘%s‘,‘%s‘,‘%s‘)" % (year,Day,UTF(Week),UTF(Constellation),UTF(Festival), UTF(Year),UTF(Lunarcalendar),UTF(Lunnar),UTF(Ershibasu),UTF(Jiazi)) try: cursor.execute(Insert_sql) print "[ ]Insert %s data ..." % Day db.commit() except: print "[!]Insert %s Error!!!" % Day db.rollback() time.sleep(0.1) except: print "[!]Unknown Error!" db.close()
效果:
平均1条6条数据 从1900-2049年需要2个半小时以上时间。
爬完仔细一下,一个表才365条记录,1900-2049才150年 完全可以合成一张表。
遂,写脚本,将所有表合体,现在所有日期数据都在数据库里了:
二、将想要提醒的人加入一张表,用于匹配
姓名表比较简单,主要有姓名,性别,日期,农历日期。一些星座等其他信息可以根据需要自己添加。一个字段一个字段添加太过蛋疼,可以用数据库触发器自动填充一些,但是。。我!不!会!遂,写了个脚本用于添加:
(文件名:add_use.py)
#coding:utf-8 import re,sys from reptile.CreateDb import Insert_mysql,Inquire_mysql def UTF(text): TEXT = text.decode(‘UTF-8‘).encode(‘UTF-8‘) return TEXT #新增用户的方法 def add(Name,Sex,Birthday): #自动新增ID,匹配最后一个ID,新ID是最后一个ID+1 Last_id_sql = "SELECT Tb_use.USE_ID FROM Calendar.`Tb_use` ORDER BY Tb_use.USE_ID DESC LIMIT 1" Last_id = Inquire_mysql(Last_id_sql) try: m = re.search(r"[0-9]+",str(Last_id)) n = m.group() id = int(n)+1 except: id = 1 #如果填入为1 则为男性 if Sex == "1": SEX = "男" else: SEX = "女" birthday_sql = "SELECT Calendar.Lunarcalendar FROM Calendar WHERE Calendar.TB_DAY = ‘%s‘;" % Birthday #匹配日历表 自动填充农历信息 Lunarcalendar = Inquire_mysql(birthday_sql) Lunar = Lunarcalendar[0][0].encode(‘UTF-8‘) Insert_sql ="INSERT INTO `Tb_use` (`USE_ID`,`NAME`,`SEX`,`TB_DAY`, `LUNARCALENDAR`) VALUES (‘%s‘,‘%s‘,‘%s‘,‘%s‘,‘%s‘);" % (id,UTF(Name),UTF(SEX),Birthday,Lunar) Insert_mysql(Insert_sql) print "[!]新增新记录,编号为%s\n姓名:%s 性别:%s \n生日:%s 农历:%s" %(id,Name,SEX,Birthday,Lunar) #删除用户的方法可以根据新增用户方法去修改,这里pass def DEL(self): pass #将方法映射到字典 def run(type,Name=None,Sex=None,Birthday=None): RUN = { ‘add‘:lambda:add(Name,Sex,Birthday), ‘del‘:lambda:DEL() } return RUN[type]() if __name__ == "__main__": #实现类似命令行交互式效果 while True: echo = raw_input(">>") Split = echo.split(‘ ‘) if echo == "exit": print "Exit!" break elif echo == "": pass #如果输入信息正确,则运行方法 elif Split[0] == "add" or Split[0] == "del": try: a = run(Split[0],Split[1],Split[2],Split[3]) except: pass else: print "Please Use ‘add‘ to Add user."
三、匹配后发送提醒
(文件名:Run.py)
#coding:utf-8 import time,json,smtplib,os from reptile.CreateDb import Insert_mysql,Inquire_mysql from email.mime.text import MIMEText from email.MIMEMultipart import MIMEMultipart from email.MIMEBase import MIMEBase #今天的日期 Today = time.strftime(‘%Y-%m-%d‘,time.localtime(time.time())) #从数据库从获取今天的ID Today_Sql = "SELECT Calendar.ID FROM `Calendar` WHERE TB_DAY=‘%s‘;" % Today TodayID = Inquire_mysql(Today_Sql)[0][0] #这个方法用于获取今天日期 def Get_today(id): Sql = "SELECT Calendar.TB_DAY FROM `Calendar` WHERE ID=‘%s‘;" % id Today = Inquire_mysql(Sql)[0][0] return Today #用于获取今天有哪些小伙伴生日 def Today_Birthday(id): Text = [] Today_Sql = "SELECT Calendar.LUNARCALENDAR FROM `Calendar` WHERE ID=‘%s‘" % id TodayLunar = Inquire_mysql(Today_Sql)[0][0] Today_Birthday_sql = "SELECT Tb_use.NAME FROM `Tb_use` WHERE LUNARCALENDAR=‘%s‘;" % TodayLunar Today_Birthday = Inquire_mysql(Today_Birthday_sql) if str(Today_Birthday) == ‘()‘: Text = [] else: for i in range(0,len(Today_Birthday)): Text.append(Today_Birthday[i][0]) if Text == []: Str = "" else: Str = ‘\t‘.join(i.encode(‘utf-8‘) for i in Text) return Str #用于获取7天内有哪些小伙伴生日 def Week_Birthday(): Text = [] for item in range(TodayID,TodayID+7): if Today_Birthday(item) == "": pass else: Text.append("%s : %s" % (Get_today(item),Today_Birthday(item))) Str = ‘\n‘.join(i for i in Text) return Str #发邮件提醒 def Mail(Subject,text): MAIL_USER=os.environ.get(‘MAIL_USER‘) MAIL_PASSWD=os.environ.get(‘MAIL_PASSWD‘) sender = ‘cctv<你的邮箱@qq.com>‘ receiver = [‘发送到@139.com‘] subject = Subject smtpserver = ‘smtp.mail.qq.com‘ msg = MIMEMultipart(‘alternative‘) msg[‘Subject‘] = subject html = text part = MIMEText(html,‘html‘,‘utf-8‘) msg.attach(part) smtp = smtplib.SMTP() smtp.connect(‘smtp.mail.qq.com‘) smtp.login(MAIL_USER,MAIL_PASSWD) smtp.sendmail(sender,receiver,msg.as_string()) smtp.quit() if __name__ == ‘__main__‘: #获取今天星期 a = time.localtime() Time = time.strftime("%w",a) #今天生日的小伙伴 tb_text = Today_Birthday(TodayID) #7天内生日的小伙伴 wb_text = Week_Birthday() #如果今天有人生日就发邮件 if tb_text == ‘‘: pass else: Mail(‘TodayBirthday‘,tb_text) #每周一查一下,如果未来7天内有人生日就发邮件 if Time == ‘1‘: if wb_text == ‘‘: pass else: Mail(‘WeekBirthday‘,wb_text) else: pass
最后,加入任务计划
(文件名:Run.sh)
#!/bin/bash export MYSQLDB_USER=**** export MYSQLDB_PASSWD=**** export MYSQLDB_HOST=**** export MAIL_PASSWD=**** export MAIL_USER=**** #我是在虚拟环境下的,所以要用虚拟环境的路径运行 `/home/ubuntu/class/env/bin/python /home/ubuntu/class/LunarBirthday/Run.py`
加入任务计划,大功告成!
本文出自 “猫妖的博客” 博客,请务必保留此出处http://maoyao.blog.51cto.com/8253780/1786994
原文地址:http://maoyao.blog.51cto.com/8253780/1786994