码迷,mamicode.com
首页 > 其他好文 > 详细

百度地图爬取数据

时间:2018-10-22 17:52:44      阅读:656      评论:0      收藏:0      [点我收藏+]

标签:imp   tail   ocata   web   打印   code   window   parse   gae   

# -*- coding:utf-8 -*-
import requests
import re
import xlwt
import demjson
import time
import json

class get_location():

def __init__(self):
self.i = 0
self.li = []
self.dict1 = {}
self.li_ak = ‘U0QGae7viQsN0yLBirGsRD90XI0tlcGO‘
self.headers = {
‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36‘,
‘Cookie‘: ‘JSESSIONID=15BF07D8D4F4515A5C2247D5606AECB2‘
}
self.f = xlwt.Workbook(encoding=‘utf-8‘)
self.sheet1 = self.f.add_sheet(u‘sheet1‘, cell_overwrite_ok=True)

def get_html(self, url):
‘‘‘
请求
:param url:
:return:
‘‘‘
#print(url)
try:
res = requests.get(url, headers=self.headers)
res.encoding = res.apparent_encoding
if res.status_code == 200:
html = res.text
return html
else:
time.sleep(0.1)
return self.get_html(url)
except Exception as e: # except BaseException 这个也可以 e是打印出错误的原因
print("问题是", e)


def parse(self, response,url_location_detail): # 解析
‘‘‘
:param response: 网页源码
:param url_location_detail: 拼接url的一部分
:return:
‘‘‘
#num = 0
response_json = json.loads(response) #[{"id":610300,"lat":34.36784,"lng":107.24291,"name":"宝鸡市","}] 把这样的数据转成response_json
for i in response_json:
if "区" in i[‘name‘]:
url = url_location_detail + "?query="+ i[‘name‘] + "&region=" + "宝鸡市"+ i[‘name‘] + "&output=json&ak=" + self.li_ak
#print(url)
self.li.append(url)

if "街道" in i[‘name‘]:
url = url_location_detail + "?query="+ i[‘name‘] + "&region=" + "宝鸡市"+ i[‘name‘] + "&output=json&ak=" + self.li_ak
#print(url)
self.li.append(url)


if "县" in i[‘name‘]:
url = url_location_detail + "?query="+ i[‘name‘] + "&region=" + i[‘name‘] + "&output=json&ak=" + self.li_ak
#print(url)
self.li.append(url)


if "镇" in i[‘name‘]:
#http://api.map.baidu.com/place/v2/search?query=坪头镇&region=宝鸡市坪头镇&output=json&ak=cnYtqDjL7NzjYIVQXV67RVxy6oIF0Nsp
url = url_location_detail + "?query="+ i[‘name‘] + "&region=" + "宝鸡市"+ i[‘name‘] + "&output=json&ak=" + self.li_ak
#print(url)
self.li.append(url)


if "公园" in i[‘name‘]:
url = url_location_detail + "?query=" + i[‘name‘] + "&region=" + "宝鸡市" + i[‘name‘] + "&output=json&ak=" + self.li_ak
#print(url)
self.li.append(url)



if "法门寺" in i[‘name‘]:
url = url_location_detail + "?query"+ i[‘name‘] + "&region=" + "宝鸡市"+ i[‘name‘] + "&output=json&ak=" + self.li_ak
#print(url)
self.li.append(url)

# num += 1
# if num > 3:
# num = 0


def red_dot_parse(self,response): #红点的坐标拼接url.
response_json = json.loads(response)
view_url = "http://api.map.baidu.com/panorama/v2?ak={}&width=512&height=256&location={},{}&fov=180"
for i in response_json.get("results"):
name = i.get("name")
a = i.get(‘location‘)
try:
lat = a[‘lat‘]
lng = a[‘lng‘]
detail_view_url = view_url.format(self.li_ak,lng,lat)
self.dict1[detail_view_url] = name
self.Judge(detail_view_url) # 去判断
except Exception:
pass

def Judge(self,detail_view_url):
#print("判断是否全景的url",detail_view_url)
res = requests.get(detail_view_url, headers=self.headers)
#print(res.headers[‘Content-Type‘])
if res.headers[‘Content-Type‘] == ‘image/jpeg‘: #判断如果等于image/jpeg
print("有全景的地方",self.dict1[res.url])



else:
js = res.json()
#print(js[‘status‘])
#print(‘没有全景‘,self.dict1[res.url])

self.excel_write(self.dict1[res.url])

def excel_write(self, text_ip):

self.sheet1.write(self.i, 0, text_ip)
self.i += 1
self.f.save(r‘C:\Users\85740\Desktop\get_ip.xls‘) # 保存

if __name__ == ‘__main__‘:
url = "http://imapway.cn:8098/bjzs/video/getVideoCatalog"
a = get_location()
url_location_detail = ‘http://api.map.baidu.com/place/v2/search‘
a.parse(a.get_html(url),url_location_detail) # 解析详细页面, 调用requests请求
for url in a.li:
a.red_dot_parse(a.get_html(url)) #提取红点的坐标.

百度地图爬取数据

标签:imp   tail   ocata   web   打印   code   window   parse   gae   

原文地址:https://www.cnblogs.com/yuanjia8888/p/9831060.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!