标签:add auto try code str def ror address hang
#!/usr/local/bin/python # -*- coding: utf-8 -*- # Python: 3.5 # Author: wucl(),zhenghai.zhang # Program: 爬取CBO网站上所有电影的名称并写入数据库。 # Version: 0.1 # History: 2017.10.25 import requests,time, pymysql, re, datetime from exchangelib import DELEGATE, Account, Credentials, Message, Mailbox, HTMLBody host = ‘xxx‘ user = ‘xxx‘ passwd = ‘xxx‘ dbme = ‘crawl‘ dbtarget = ‘back_brace‘ table = ‘movie_hotwords‘ tabledelta = ‘movie_hotwords_delta‘ tablesync = ‘slot_value‘ port = 3306 tolist = [‘xxx@xxx.com‘] def get_info(): try: url = ‘http://www.cbooo.cn/Mdata/getMdata_movie?area=50&type=0&year=0&initial=%E5%85%A8%E9%83%A8&pIndex=1‘ pData = requests.get(url).json() return pData[‘tPage‘], pData[‘tCount‘] except: print("获取总页数和总电影数失败") def get_movies(page): try: url = ‘http://www.cbooo.cn/Mdata/getMdata_movie?area=50&type=0&year=0&initial=%E5%85%A8%E9%83%A8&pIndex=‘ + str(page) pData = requests.get(url).json() movies_list = pData[‘pData‘] return movies_list except: print(‘获取第%s页电影列表失败‘ % page) def Movie_insert(host, user, passwd, dbme, port, table, movies_list): conn=pymysql.connect(host=host, user=user, passwd=passwd, db=dbme, port=port, charset="utf8") cur=conn.cursor() new_movies = [] punc = "!??"#$%&'()*+,-/:;<=>@[\]^_`{|}~?????、〃》「」『』【】〔〕〖〗?????〝〞????–—‘’?“”??…?﹏.()::。·" punctuation = punc for movie in movies_list: try: movie[‘MovieName‘] = re.sub(r"[%s]+" % punctuation, "", movie["MovieName"]) cmd = ‘insert into %s(movie_id, movie_name) values("%s", "%s")‘ % (table, movie[‘ID‘], movie[‘MovieName‘]) cur.execute(cmd) new_movies.append(movie) except pymysql.Error: print(" "*20, movie[‘MovieName‘], "already exists, skip……") cur.close() conn.commit() conn.close() return new_movies def Movie_new_and_sync(host, user, passwd, dbme, dbtarget, port, tabledelta, movies_list, tablesync): conn = pymysql.connect(host=host, user=user, passwd=passwd, db=dbme, port=port, charset="utf8") cur = conn.cursor() cur.execute("delete from %s " % dbme+"."+tabledelta) for movie in movies_list: try: cmd = ‘insert into %s(movie_id, movie_name) values("%s", "%s")‘ % (tabledelta, movie[‘ID‘], movie[‘MovieName‘]) cmdsync = ‘insert into %s(slot_type_id, slot_value, create_by, modify_by, gmt_create, gmt_modify, out_value) values("%s", "%s", "%s", "%s", "%s", "%s", "%s")‘ % (dbtarget+"."+tablesync, "xxxxxx", movie[‘MovieName‘], "system", "system", datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),"") cur.execute(cmd) cur.execute(cmdsync) except pymysql.Error: print(" " * 20, movie[‘MovieName‘], "already exists, skip……") try: cmdbacktoskill = ‘insert into back_brace.release_task(app_type,app_status,type,ref_id,status,register_id,create_by,modify_by,gmt_create,gmt_modify) values("BACKBRACE","testpass","SLOT","xxxxxx","init","SLOT_BACKBRACE_TESTPASS" ,"zhenghai.zhang","zhenghai.zhang","%s","%s")‘ % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) cmdskilltoskillpro = ‘insert into back_brace.release_task(app_type,app_status,type,ref_id,status,register_id,create_by,modify_by,gmt_create,gmt_modify) values("SKILL","deploy","SLOT","xxxxxx","init","SLOT_SKILL_DEPLOY" ,"zhenghai.zhang","zhenghai.zhang","%s","%s")‘ % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) print(cmdbacktoskill) cur.execute(cmdbacktoskill) print(cmdskilltoskillpro) cur.execute(cmdskilltoskillpro) except pymysql.Error: print("write into back_brace.release_task error!!!") cur.close() conn.commit() conn.close() def Email(to, subject, body): creds = Credentials( username=‘xxxxxx‘, password=‘xxxxxx‘) account = Account( primary_smtp_address=‘xxx@xxx.com‘, credentials=creds, autodiscover=True, access_type=DELEGATE) m = Message( account=account, subject=subject, body=HTMLBody(body), to_recipients=[Mailbox(email_address=to)]) m.send_and_save() if __name__ == ‘__main__‘: update_movies = [] pages, counts = get_info() pages = 1 for i in range(1,pages + 1): print("*"*30,i,"*"*30) movies_list = get_movies(i) new_movies = Movie_insert(host, user, passwd, dbme, port, table, movies_list) for new_movie in new_movies: print(new_movie[‘MovieName‘],"Added") onemovie = {} onemovie["ID"] = new_movie["ID"] onemovie["MovieName"] = new_movie["MovieName"] update_movies.append(onemovie) time.sleep(1) print(update_movies) try: Movie_new_and_sync(host, user, passwd, dbme, dbtarget, port, tabledelta, update_movies, tablesync) # 将增加的电影写入movie_hotwords_delta表中 except: print("Movie update and sync Error!") subject = ‘本次新增电影名称‘ body = "本次新增的电影名称为:<hr>" for movie in update_movies: body += movie["MovieName"] + "<br>" for to in tolist: Email(to, subject, body)
欢迎大侠指点
Python3.5爬取cbooo.cn数据并且同步到mysql中
标签:add auto try code str def ror address hang
原文地址:http://www.cnblogs.com/zhzhang/p/7766928.html