百度地图爬取数据

时间：2018-10-22 17:52:44 阅读：656 评论：0 收藏：0 [点我收藏+]

标签：imp tail ocata web 打印 code window parse gae

# -*- coding:utf-8 -*-
import requests
import re
import xlwt
import demjson
import time
import json

class get_location():

    def __init__(self):
        self.i = 0
        self.li = []
        self.dict1 = {}
        self.li_ak = ‘U0QGae7viQsN0yLBirGsRD90XI0tlcGO‘
        self.headers = {
            ‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36‘,
            ‘Cookie‘: ‘JSESSIONID=15BF07D8D4F4515A5C2247D5606AECB2‘
        }
        self.f = xlwt.Workbook(encoding=‘utf-8‘)
        self.sheet1 = self.f.add_sheet(u‘sheet1‘, cell_overwrite_ok=True)

    def get_html(self, url):
        ‘‘‘
        请求
        :param url:
        :return:
        ‘‘‘
        #print(url)
        try:
            res = requests.get(url, headers=self.headers)
            res.encoding = res.apparent_encoding
            if res.status_code == 200:
                html = res.text
                return html
            else:
                time.sleep(0.1)
                return self.get_html(url)
        except Exception as e:  # except BaseException  这个也可以     e是打印出错误的原因
            print("问题是", e)


    def parse(self, response,url_location_detail):  # 解析
        ‘‘‘
        :param response:   网页源码
        :param url_location_detail:   拼接url的一部分
        :return:
        ‘‘‘
        #num = 0
        response_json = json.loads(response)  #[{"id":610300,"lat":34.36784,"lng":107.24291,"name":"宝鸡市","}] 把这样的数据转成response_json
        for i in response_json:
            if "区" in i[‘name‘]:
                url = url_location_detail + "?query="+ i[‘name‘] + "&region="  + "宝鸡市"+ i[‘name‘] + "&output=json&ak=" + self.li_ak
                #print(url)
                self.li.append(url)

            if "街道" in i[‘name‘]:
                url = url_location_detail + "?query="+ i[‘name‘] + "&region="  + "宝鸡市"+ i[‘name‘] + "&output=json&ak=" + self.li_ak
                #print(url)
                self.li.append(url)


            if "县" in i[‘name‘]:
                url = url_location_detail + "?query="+ i[‘name‘] + "&region=" + i[‘name‘] + "&output=json&ak=" + self.li_ak
                #print(url)
                self.li.append(url)


            if "镇" in i[‘name‘]:
                #http://api.map.baidu.com/place/v2/search?query=坪头镇&region=宝鸡市坪头镇&output=json&ak=cnYtqDjL7NzjYIVQXV67RVxy6oIF0Nsp
                url = url_location_detail + "?query="+ i[‘name‘] + "&region=" + "宝鸡市"+ i[‘name‘] + "&output=json&ak=" + self.li_ak
                #print(url)
                self.li.append(url)


            if "公园" in i[‘name‘]:
                url = url_location_detail + "?query=" + i[‘name‘] + "&region=" + "宝鸡市" + i[‘name‘] + "&output=json&ak=" + self.li_ak
                #print(url)
                self.li.append(url)



            if "法门寺" in i[‘name‘]:
                url = url_location_detail + "?query"+ i[‘name‘] + "&region=" + "宝鸡市"+ i[‘name‘] + "&output=json&ak=" + self.li_ak
                #print(url)
                self.li.append(url)

            # num += 1
            # if num > 3:
            #     num = 0


    def red_dot_parse(self,response):  #红点的坐标拼接url.
        response_json = json.loads(response)
        view_url = "http://api.map.baidu.com/panorama/v2?ak={}&width=512&height=256&location={},{}&fov=180"
        for i in response_json.get("results"):
            name = i.get("name")
            a = i.get(‘location‘)
            try:
                lat = a[‘lat‘]
                lng = a[‘lng‘]
                detail_view_url = view_url.format(self.li_ak,lng,lat)
                self.dict1[detail_view_url] = name
                self.Judge(detail_view_url)  # 去判断
            except Exception:
                pass

    def Judge(self,detail_view_url):
        #print("判断是否全景的url",detail_view_url)
        res = requests.get(detail_view_url, headers=self.headers)
        #print(res.headers[‘Content-Type‘])
        if res.headers[‘Content-Type‘] == ‘image/jpeg‘:   #判断如果等于image/jpeg
            print("有全景的地方",self.dict1[res.url])



        else:
            js = res.json()
            #print(js[‘status‘])
            #print(‘没有全景‘,self.dict1[res.url])

            self.excel_write(self.dict1[res.url])

    def excel_write(self, text_ip):

        self.sheet1.write(self.i, 0, text_ip)
        self.i += 1
        self.f.save(r‘C:\Users\85740\Desktop\get_ip.xls‘)  # 保存

if __name__ == ‘__main__‘:
    url = "http://imapway.cn:8098/bjzs/video/getVideoCatalog"
    a = get_location()
    url_location_detail = ‘http://api.map.baidu.com/place/v2/search‘
    a.parse(a.get_html(url),url_location_detail)  # 解析详细页面， 调用requests请求
    for url in a.li:
        a.red_dot_parse(a.get_html(url))  #提取红点的坐标.

百度地图爬取数据

标签：imp tail ocata web 打印 code window parse gae

原文地址：https://www.cnblogs.com/yuanjia8888/p/9831060.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行