码迷,mamicode.com
首页 > 编程语言 > 详细

python爬取数据保存入库

时间:2017-01-05 09:52:16      阅读:262      评论:0      收藏:0      [点我收藏+]

标签:exist   table   value   ima   urllib   span   except   .exe   rip   

import urllib2

import re

import MySQLdb

class LatestTest:
    #初始化
    def __init__(self):
        self.url="https://toutiao.io/latest"
        self.UserAgent=Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36
        self.header={User-Agent:self.UserAgent}
        
    #获取URL、标题、邮箱 保存到list
    def getDate(self):
        
        request=urllib2.Request(self.url,headers=self.header)
        respone=urllib2.urlopen(request).read()
        #print respone
        content=re.compile(r<div class="post">.*?class="title">.*?href="(.*?)">(.*?)</a>.*?<div class="meta">.*?<span>(.*?)</span>,re.S)
        urls=re.findall(content,respone)
        namelist=[]

        for url in urls:
                #print url[0],url[1],url[2]
            namelist.append([url[0].strip(),url[1].strip(),url[2].strip()])
            if len(namelist)>=10:
                break
        
        return namelist
    #保存数据到mysql数据库
    def savaDateMysql(self,url,title,email):
        sql="insert into content(url,title,email)values(‘%s‘,‘%s‘,‘%s‘)" %(url,title,email)
        try: 
            
            conn=MySQLdb.connect(192.168.200.23,root,g6s8m3t7s,mysql,charset=utf8)
            cursor=conn.cursor()
           # cursor.execute(‘create table IF NOT EXISTS content(id int AUTO_INCREMENT PRIMARY KEY,url varchar(100),title varchar(100),email varchar(100))‘)
            #cursor.execute(‘drop table IF EXISTS content‘)
            cursor.execute(sql)
            conn.commit()
        except Exception,e:
            print e
        finally:
            conn.close()


if __name__==__main__:
    lat=LatestTest()    
    contentlist=lat.getDate()
    try:
        for tent in contentlist:
            url=tent[0].strip()
            title=tent[1].strip()
            email=tent[2].strip()
            print url,title,email
            lat.savaDateMysql(url,title,email)
    except Exception,e:
        print e

    

 

python爬取数据保存入库

标签:exist   table   value   ima   urllib   span   except   .exe   rip   

原文地址:http://www.cnblogs.com/be-come/p/6251073.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!