码迷,mamicode.com
首页 > 其他好文 > 详细

世界500强

时间:2019-01-07 17:50:50      阅读:180      评论:0      收藏:0      [点我收藏+]

标签:tpi   int   soc   database   into   type   ***   tags   word   

import psycopg2
import json
from news_project.config.sql_log import log
from news_project.middlewares import Deal_Content

class NewsProjectPipeline(object):
def open_spider(self, spider):
l = self.l = log()
self.conn = psycopg2.connect(database=l.database, user=l.user, password=l.password, host=l.host, port=l.port)


def process_item(self, item, spider):
l = self.l = log()
self.conn = psycopg2.connect(database=l.database, user=l.user, password=l.password, host=l.host, port=l.port)
self.cur = self.conn.cursor()

item = dict(item)
d = Deal_Content()
item[‘time‘] = d.handleTime(item[‘time‘],item[‘title_url‘]) #修改时间格式
print("item*************************************///////////////////////", item[‘time‘])

for i in item.keys():
if item[i] == "" or item[i] == None:
item[i] = None

if item[‘type_cn‘] == None:
item[‘type_cn‘] = "行业新闻"

if item[‘type_no‘] == None:
item[‘type_no‘] = 16

if item[‘content‘] == None:
return 0
#两种存储状态。
if not item.get(‘association_id‘):
self.cur.execute(
"INSERT INTO bjzs_big_data.baoji_news(type_cn,source,level2,level1,event_time,title,url,content,lable,type_no) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)",
(item[‘type_cn‘], item[‘news‘], item[‘id‘], item[‘pid‘], item[‘time‘], item[‘title‘], item[‘title_url‘],
item[‘content‘], item[‘tags‘], item[‘type_no‘]))
elif item.get(‘association_id‘):
self.cur.execute(
"INSERT INTO bjzs_big_data.baoji_news(type_cn,source,level2,level1,event_time,title,url,content,lable,type_no,association_id) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)",(item[‘type_cn‘],item[‘news‘],item[‘id‘],item[‘pid‘],item[‘time‘],item[‘title‘],item[‘title_url‘],item[‘content‘],item[‘tags‘],item[‘type_no‘],item[‘association_id‘]))
else:
pass
#提交
self.conn.commit()
self.cur.close()
self.conn.close()

return item


def close_spider(self, spider):

self.conn.close()

世界500强

标签:tpi   int   soc   database   into   type   ***   tags   word   

原文地址:https://www.cnblogs.com/yuanjia8888/p/10233834.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!