码迷,mamicode.com
首页 > 数据库 > 详细

python 爬虫爬取几十家门店在美团外卖上的排名,并插入数据库,最后在前端显示

时间:2016-08-04 19:54:03      阅读:1191      评论:0      收藏:0      [点我收藏+]

标签:localhost   python   version   insert   import   web   

爬虫脚本:

#!/usr/bin/env python
# encoding: utf-8

"""
@version: ??
@author: phpergao
@license: Apache Licence 
@file: meituan_paiming.py
@time: 2016/8/1 15:16
"""


import urllib,json,re
import urllib.parse
import http.cookiejar
import urllib.request,datetime,time,SQL
from multiprocessing import Process
import collections

def main(store_name,paiming):
## ms = MSSQL(host="localhost",user="sa",pwd="123456",db="PythonWeiboStatistics")
## #返回的是一个包含tuple的list,list的元素是记录行,tuple的元素是每行记录的字段
## ms.ExecNonQuery("insert into WeiBoUser values(‘2‘,‘3‘)")

    ms = SQL.MSSQL(host=‘192.168.72.172‘,user="stdservice",pwd="7数据库密码",db="stddata")
    #resList = ms.ExecQuery(cmd)
    #print(resList)
    #for i in resList:
     #   print(i)
    #dele =ms.ExecNonQuery("DELETE FROM waimai4.dbo.baidu_rueren")
    #update=ms.ExecNonQuery("UPDATE dbo.GOODS SET CLASSID=‘19‘ WHERE GOODSNAME LIKE‘%牛肉%‘")
    today = datetime.date.today()
    tm=time.strftime("%H:%M:%S")
    sj="{} {}".format(str(today),tm)
    insert=ms.ExecNonQuery(‘‘‘
INSERT INTO dbo.meituan_paiming

VALUES  ( ‘{}‘ , -- store_name - char(20)
          ‘{}‘ , -- paiming - int
          ‘ ‘ , -- dingwei_address - char(500)
          ‘{}‘  -- updatetime - char(50)
        )
        ‘‘‘.format(store_name,paiming,sj))


def paiming(url):
    cj = http.cookiejar.LWPCookieJar()
    cookies_support = urllib.request.HTTPCookieProcessor(cj)
    opener = urllib.request.build_opener(cookies_support, urllib.request.HTTPHandler)
    urllib.request.install_opener(opener)
    User_Agent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36"#伪装成浏览器访问
    headers = (‘User-Agent‘, User_Agent)
    opener = urllib.request.build_opener()
    opener.addheaders = [headers]
    ret = urllib.request.urlopen(url).read()
    #ret = opener.open(url).read()
    html=re.findall(r‘‘‘<div data-title="(.*?)" data-bulletin=".*?\n*?.*?" data-poiid=".*?" class="restaurant" data-all=".*?"‘‘‘,str(ret,‘utf8‘))

    today = datetime.date.today()
    tm = time.strftime("%H:%M:%S")
    sj = "{} {}".format(str(today), tm)
    a=0
    for i in html:
        a+=1
        if "72" in i:
            print(i,a,sj)
            main(i,a)
            print("ok")
urllist = [
    "http://waimai.meituan.com/home/ws0e9gmds0u7",
    "http://waimai.meituan.com/home/ws0edu48zvm8",
    "http://waimai.meituan.com/home/ws0e6v9brqq8",
    "http://waimai.meituan.com/home/ws0ec83j0fbm",
    "http://waimai.meituan.com/home/ws0efv4veqks",
    "http://waimai.meituan.com/home/ws0edg19uxt6",
    "http://waimai.meituan.com/home/ws0esdnh56um",
    "http://waimai.meituan.com/home/ws0e937zdbph",
    "http://waimai.meituan.com/home/ws0e3rfy1wxb",
    "http://waimai.meituan.com/home/ws0eddzptu5e",
    "http://waimai.meituan.com/home/ws0eeh52gdry",
    "http://waimai.meituan.com/home/ws0e7jmpm28g",
    "http://waimai.meituan.com/home/ws0e9pqmfr47",
    "http://waimai.meituan.com/home/ws0edyvqgmrt",
    "http://waimai.meituan.com/home/ws0edjnftj0h",
    "http://waimai.meituan.com/home/ws0e3u0fb2gx",
    "http://waimai.meituan.com/home/ws0ed9dq6x1f",
    "http://waimai.meituan.com/home/ws0eehrxexqc",
    "http://waimai.meituan.com/home/ws0dec6tzjwm",
    "http://waimai.meituan.com/home/ws0e3txh5ym6",
    "http://waimai.meituan.com/home/ws0ecqzp6n82",
    "http://waimai.meituan.com/home/ws0e4g1dxshy",
    "http://waimai.meituan.com/home/ws0g8ejh80rp",
    "http://waimai.meituan.com/home/ws0ec37vje4d",
    "http://waimai.meituan.com/home/ws0eg711k1t1",
    "http://waimai.meituan.com/home/ws0dgmq924yy",
    "http://waimai.meituan.com/home/ws0cff7x3m4u",
    "http://waimai.meituan.com/home/ws0cg5zd5g4y",
    "http://waimai.meituan.com/home/ws0ghenxxz82",
    "http://waimai.meituan.com/home/ws0cfvkuzvtk",
    "http://waimai.meituan.com/home/ws14dtvjhqm4",
    "http://waimai.meituan.com/home/ws100stcewjn",
    "http://waimai.meituan.com/home/ws104zssdsyp",
    "http://waimai.meituan.com/home/ws102hkctrhh",
    "http://waimai.meituan.com/home/ws10m19qgq7h",
    "http://waimai.meituan.com/home/ws1079s3ek0m",
    "http://waimai.meituan.com/home/ws0cq7hwhebm",
    "http://waimai.meituan.com/home/ws10hyydu2f0",
    "http://waimai.meituan.com/home/ws06vy2w07yr"

           ]
if __name__ == ‘__main__‘:
    for i in urllist:
        p = Process(target=paiming, args=(i,))
        p.start()
        time.sleep(10)


操作数据库的脚本:

#!/usr/bin/env python
# encoding: utf-8

"""
@version: ??
@author: phpergao
@license: Apache Licence 
@file: SQL.py
@time: 2016/7/25 17:56
"""


import pymssql
class MSSQL:
    """
    对pymssql的简单封装
    pymssql库,该库到这里下载:http://www.lfd.uci.edu/~gohlke/pythonlibs/#pymssql
    使用该库时,需要在Sql Server Configuration Manager里面将TCP/IP协议开启

    用法:

    """

    def __init__(self,host,user,pwd,db):
        self.host = host
        self.user = user
        self.pwd = pwd
        self.db = db

    def __GetConnect(self):
        """
        得到连接信息
        返回: conn.cursor()
        """
        if not self.db:
            raise(NameError,"没有设置数据库信息")
        self.conn = pymssql.connect(host=self.host,user=self.user,password=self.pwd,database=self.db,charset="utf8")
        cur = self.conn.cursor()
        if not cur:
            raise(NameError,"连接数据库失败")
        else:
            return cur

    def ExecQuery(self,sql):
        """
        执行查询语句
        返回的是一个包含tuple的list,list的元素是记录行,tuple的元素是每行记录的字段

        调用示例:
                ms = MSSQL(host="localhost",user="sa",pwd="123456",db="PythonWeiboStatistics")
                resList = ms.ExecQuery("SELECT id,NickName FROM WeiBoUser")
                for (id,NickName) in resList:
                    print str(id),NickName
        """
        cur = self.__GetConnect()
        cur.execute(sql)
        resList = cur.fetchall()

        #查询完毕后必须关闭连接
        self.conn.close()
        return resList

    def ExecNonQuery(self,sql):
        """
        执行非查询语句

        调用示例:
            cur = self.__GetConnect()
            cur.execute(sql)
            self.conn.commit()
            self.conn.close()
        """
        cur = self.__GetConnect()
        cur.execute(sql)
        self.conn.commit()
        self.conn.close()

def main(ip,username,password,dbname,cmd):
## ms = MSSQL(host="localhost",user="sa",pwd="123456",db="PythonWeiboStatistics")
## #返回的是一个包含tuple的list,list的元素是记录行,tuple的元素是每行记录的字段
## ms.ExecNonQuery("insert into WeiBoUser values(‘2‘,‘3‘)")

    ms = MSSQL(host=ip,user=username,pwd=password,db=dbname)
    resList = ms.ExecQuery(cmd)
    #print(resList)
    for i in resList:
        print(i)
    #dele =ms.ExecNonQuery("DELETE FROM waimai4.dbo.baidu_rueren")
    #update=ms.ExecNonQuery("UPDATE dbo.GOODS SET CLASSID=‘19‘ WHERE GOODSNAME LIKE‘%牛肉%‘")
if __name__ == ‘__main__‘:
    pass

前端PHP网页脚本:

<!DOCTYPE html>
<html>
<head>
<?php
include "config.php";
session_start();
if(!isset($_SESSION[‘username‘])){
	$home_url = ‘logIn.php‘;
header(‘Location:‘.$home_url);
    }
$ms=0;
$mt=0;
$ds=0;
$dt=0;
    
?>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>主页</title>

<script src="js/jquery-1.9.1.min.js" type="text/javascript"></script>
<script src="js/bootstrap-select.js" type="text/javascript"></script>
<script type="text/javascript" src="grid.js"></script>
    <link id="bs-css" href="css/bootstrap-cerulean.min.css" rel="stylesheet">
    <link href="css/charisma-app.css" rel="stylesheet">      
    <link href="css/bootstrap-select.css" rel="stylesheet">
    <link href=‘bower_components/fullcalendar/dist/fullcalendar.css‘ rel=‘stylesheet‘>
    <link href=‘bower_components/fullcalendar/dist/fullcalendar.print.css‘ rel=‘stylesheet‘ media=‘print‘>
    <link href=‘bower_components/chosen/chosen.min.css‘ rel=‘stylesheet‘>
    <link href=‘bower_components/colorbox/example3/colorbox.css‘ rel=‘stylesheet‘>
    <link href=‘bower_components/responsive-tables/responsive-tables.css‘ rel=‘stylesheet‘>

    <link href=‘bower_components/bootstrap-tour/build/css/bootstrap-tour.min.css‘ rel=‘stylesheet‘>
    <link href=‘css/jquery.noty.css‘ rel=‘stylesheet‘>
    <link href=‘css/noty_theme_default.css‘ rel=‘stylesheet‘>
    <link href=‘css/elfinder.min.css‘ rel=‘stylesheet‘>
    <link href=‘css/elfinder.theme.css‘ rel=‘stylesheet‘>
    <link href=‘css/jquery.iphone.toggle.css‘ rel=‘stylesheet‘>
    <link href=‘css/uploadify.css‘ rel=‘stylesheet‘>
    <link href=‘css/animate.min.css‘ rel=‘stylesheet‘>


<link href="grid.css" type="text/css" rel="stylesheet"> 
<script type="text/javascript" src="laydate/laydate.js"></script>
<script type="text/javascript" src="bower_components/responsive-tables/responsive-tables.js"></script>
<script type="text/javascript"> laydate.skin(‘danlan‘);</script>
<script src="js/jquery.noty.js"></script>





</head>
<body>
    <!-- topbar starts -->
	<?php include ‘dropdownmenu.php‘ ?>
    <!-- topbar ends -->
<div class="ch-container" style="position:relative;top:80px">
    <div class="row">
	
	
	
<?php include ‘navi.php‘; ?>
		
		
		
        <div id="content" class="col-lg-10 col-sm-10">
            <!-- content starts -->
        <div id="dlayerreport">

</div>    
        
<div class="row">
    <div class="box col-md-12">
        <div class="box-inner">
            <div class="box-header well">
                <h2><i class="glyphicon glyphicon-info-sign"></i>美团店铺排名</h2>
                <div class="box-icon">
                    <a href="orderlist.php" class="btn btn-minimize btn-round btn-default"><i class="glyphicon glyphicon-chevron-up"></i></a>
                </div>
            </div>
            <div class="box-content row">
                <div class="col-lg-7 col-md-12" style="width:100%;">
 <div id="wepaydailysales" class="box-content" style="width:100%;">
 	<form id="grid_form_id">
 		<table class="table table-striped table-bordered bootstrap-datatable datatable responsive">		
			<thead><tr>
			<!--<th class="th1" style="width:5%;">排序</th>-->
			<th class="th1" style="width:15%;">店铺</th>
			<th class="th2"  style="width:15%;">排名</th>
			<th class="th5"  style="width:25%;">更新时间</th>
			<th class="th4"  style="width:25%;"> 定位地址</th>
			
			
			</tr></thead>
<?php            

  	$serverName = "localhost"; 
		$connectionInfo = array(  "UID"=>"stdservice", "PWD"=>"数据库密码","Database"=>"STDdata"); 
		$conn = sqlsrv_connect( $serverName, $connectionInfo);
    $queryString = "SELECT * FROM dbo.meituan_paiming ORDER BY paiming aSC";
	    if($result = sqlsrv_query($conn,$queryString))
    {
		$lb=‘‘;
		$num=0;
      while($row = sqlsrv_fetch_array( $result,SQLSRV_FETCH_ASSOC))
      {
		$num=$num+1;
		//$action=‘<a class="btn btn-info" style="margin-left:10px;" data-toggle="modal" href="#menu" onclick="show(\‘‘.$row[‘goodsid‘].‘\‘,\‘‘.iconv("gbk//ignore", "utf-8",$row[‘goodsname‘]).‘\‘);"><i class="glyphicon glyphicon-zoom-in icon-white"></i>修改</a>‘;
		//$lb=iconv("gbk//ignore", "utf-8",$row[‘classid2‘]);
		//if( strpos($lb, ‘1‘) !== false){
		//$lb=‘超值‘;
		//}else if( strpos($lb, ‘2‘) !== false){
		//$lb=‘简餐‘;
		//}else if(strpos($lb, ‘3‘) !== false){
		//$lb=‘套餐‘;
		//}else if (strpos($lb, ‘4‘) !== false){
		//$lb=‘炖汤‘;
		//}else if (strpos($lb, ‘5‘) !== false){
		//$lb=‘小吃‘;
		//}else if (strpos($lb, ‘6‘) !== false){
		//$lb=‘积分换购‘;
		//}else{
		//$lb=‘未分类‘;
		//};
		//echo ‘<td>‘.iconv("gbk//ignore", "utf-8",$num).‘</td>‘;
		echo ‘<td>‘.iconv("gbk//ignore", "utf-8",$row[‘store_name‘]).‘</td>‘;
        //echo ‘<td>‘.$row[‘store_name‘].‘</td>‘;
        echo ‘<td>‘.$row[‘paiming‘].‘</td>‘;
        
        echo ‘<td>‘.iconv("gbk//ignore", "utf-8",$row[‘updatetime‘]).‘</td>‘;
		echo ‘<td>‘.iconv("gbk//ignore", "utf-8",$row[‘dingwei_address‘]).‘</td>‘;
        //echo ‘<td>‘.$row[‘price‘].‘</td>‘;
      //  echo ‘<td>‘.$lb.‘</td>‘;
       // echo ‘<td>‘.$row[‘classid‘].‘</td>‘;	
		//echo ‘<td>‘.$row[‘meituanID‘].‘</td>‘;	
       // echo ‘<td><img border="0" width="80px" src="getimage.php?id=‘.$row[‘goodsid‘].‘" ></td>‘;
		//echo ‘<td>‘.$action.‘</td>‘;
        echo ‘</tr>‘;
      }

    }
    sqlsrv_close($conn);
 
?>

<script language="javascript">
   function show(itemid,itemname) {
$("#itemid").attr("value",itemid);
$("#itemname").html(itemname);
    }
</script>

</table></form></div>
                </div>
            </div>
        </div>
    </div>
</div>

</div>


<div class="modal fade" id="menu" tabindex="-1" role="dialog" aria-labelledby="myModalLabel" aria-hidden="true" style="display: none;">
        <div class="modal-dialog">
            <div class="modal-content">
                <div class="modal-header">
                    <button type="button" class="close" data-dismiss="modal">×</button>
                    <h3 id="itemname"></h3>
                </div>
                <div class="modal-body">
					<form class="form-horizontal" action="action_updateimage.php" method="post"  enctype="multipart/form-data" >
					<fieldset>
					<div class="input-group input-group-lg">
					
					<div style="display:none;" class="col-sm-8"><input type="text" name="itemid" id="itemid" class="form-control" readonly ="readonly"></div>
					</div>   

					<div class="input-group input-group-lg">
					<span class="input-group-addon"><i class="glyphicon glyphicon-folder-open red"></i></span>
					<input type="file" name="FileUpload1" id="FileUpload1"  class="btn btn-primary green" />
					</div>					
					
					
                </div>
				 <div  class="clearfix"></div><br>
						 <p style="text-align:center"> <button type="submit" name="submit" class="btn btn-primary" style="text-align:center">确认</button></p>
				</form>
                <div class="modal-footer">
                    <a href="rider" class="btn btn-default" data-dismiss="modal">关闭</a>	
				</div>
             </div>
         </div>
 </div>



<?php include ‘userprof.php‘; ?>
<?php include ‘footer.php‘; ?>
</div>
</body>
</html>


最后执行完成后在前端显示的效果图:

技术分享


python 爬虫爬取几十家门店在美团外卖上的排名,并插入数据库,最后在前端显示

标签:localhost   python   version   insert   import   web   

原文地址:http://9272317.blog.51cto.com/9262317/1834268

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!