标签:
本文主要讲解的就是用Python计算公交线路图的功能,即输入起始点和结束点,即能够得出公交的线路。
先说下数据的来源,直接网上爬取,也可以直接略过此点,直接下载我的源码获取。
# coding=utf-8
import requests
from bs4 import BeautifulSoup
import time
def download_line(url, line_number):
headers = {
‘Host‘: ‘ditie.114huoche.com‘,
‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0‘,
‘Accept‘: ‘text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8‘,
‘Accept-Language‘: ‘zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3‘,
‘Accept-Encoding‘: ‘gzip, deflate‘,
‘Connection‘: ‘keep-alive‘,
‘If-None-Match‘: ‘W/"aa248d9ab9daa155024a37bbfb5ce775"‘,
‘Cache-Control‘: ‘max-age=0‘
}
s = requests.session()
resp = s.get(url, headers=headers)
resp.encoding = ‘gb2312‘
if resp.status_code == 200:
print ‘ok‘
text = resp.text
soup = BeautifulSoup(text, ‘html.parser‘)
with open(‘./line/‘ + str(line_number) + ‘.txt‘, ‘w+‘) as f:
for row in soup.find_all(‘span‘, attrs={‘class‘: ‘yx_span‘}):
if row.a is not None and len(row.a) > 0:
f.write(row.a.string.encode(‘utf-8‘) + ‘\n‘)
else:
print ‘connect fail‘
all_address = [
# [‘http://ditie.114huoche.com/ShangHai/x_24/‘, ‘2‘],
# [‘http://ditie.114huoche.com/ShangHai/x_25/‘, ‘3‘],
[‘http://ditie.114huoche.com/ShangHai/x_26/‘, ‘4w‘],
[‘http://ditie.114huoche.com/ShangHai/x_27/‘, ‘4n‘],
[‘http://ditie.114huoche.com/ShangHai/x_28/‘, ‘5‘],
[‘http://ditie.114huoche.com/ShangHai/x_29/‘, ‘6‘],
[‘http://ditie.114huoche.com/ShangHai/x_30/‘, ‘7‘],
[‘http://ditie.114huoche.com/ShangHai/x_31/‘, ‘8‘],
[‘http://ditie.114huoche.com/ShangHai/x_32/‘, ‘9‘],
[‘http://ditie.114huoche.com/ShangHai/x_33/‘, ‘10zhu‘],
[‘http://ditie.114huoche.com/ShangHai/x_34/‘, ‘10zhi‘],
[‘http://ditie.114huoche.com/ShangHai/x_35/‘, ‘11zhu‘],
[‘http://ditie.114huoche.com/ShangHai/x_36/‘, ‘11zhi‘],
[‘http://ditie.114huoche.com/ShangHai/x_37/‘, ‘cixuanfu‘],
[‘http://ditie.114huoche.com/ShangHai/x_38/‘, ‘1‘],
[‘http://ditie.114huoche.com/ShangHai/x_39/‘, ‘12‘],
[‘http://ditie.114huoche.com/ShangHai/x_40/‘, ‘13‘],
[‘http://ditie.114huoche.com/ShangHai/x_43/‘, ‘16‘]
]
for row in all_address:
print row
download_line(row[0], row[1])
time.sleep(10)
下面开始进行地铁线路的计算
先读取数据
def read_line(filename):
with open(‘d:/pythondemo/subline/download/line/‘ + filename) as f:
value = [[row.replace(‘\n‘, ‘‘), []] for row in f.readlines()]
return value
计算一个站点在一条线路中的个数,以及计算位置position
def __count(data, site):
number = 0
for row in data:
if row[0] == site:
number += 1
return number
def __index(data, site):
for index, row in enumerate(data):
if row[0] == site:
return index
else:
return -1
下面就是具体的实现模块,有待改善
def statistics_transfer(data):
‘‘‘
统计可以在哪一站换乘
那两条路线之间可以换乘
:param data:总的数据集合
:return:
‘‘‘
line_data = []
keys = data.keys()
for row in keys:
for r in keys:
if row != r:
for index, site in enumerate(data[row]):
if __count(data[r], site[0]) > 0:
data[row][index][1].append(r)
if line_data.count([row, r]) == 0:
line_data.append([row, r])
return data, line_data
def sub_line(data, line, start, end):
‘‘‘
截取对应的站点
:param data: 总的数据集合
:param line: 路线
:param start: 开始点
:param end: 结束点
:return:
‘‘‘
value = []
start_index = __index(data[line], start)
end_index = __index(data[line], end)
if start_index < end_index:
for i in range(start_index, end_index + 1):
value.append(data[line][i][0])
return value
else:
for i in range(end_index, start_index + 1):
value.append(data[line][i][0])
return [row for row in value[::-1]]
def find_line(data, site, line=[]):
‘‘‘
查找站点位于哪一条线路
:param data: 总的线路数据
:param site: 站点
:return:
‘‘‘
for row in data.keys():
if line.count(row) == 0:
if __count(data[row], site) > 0:
return row
return -1
def find_transfer(data, start, direction, line=[]):
‘‘‘
查找在开始点后哪一站可以换乘
:param data:线路上的站点
:param start:开始点
:param direction:换乘的方向
:param line:线路
:return:
‘‘‘
start_index = __index(data, start)
if direction == 1:
if start >= len(data):
return None
else:
for i in range(start_index + 1, len(data)):
if len(data[i][1]) > 0: # and len([row for row in data[i][1] if row in line]) == 0:
for row in data[i][1]:
if row not in line:
return data[i][0], row
return None
else:
return None
def find_all_start_line(line_data, start_line):
return [row[1] for row in line_data if row[0] == start_line]
def find_transfer_data(line_data, start_line, end_line):
‘‘‘
查找重开始线路到终点线路的所有的可能的情况
:param line_data:
:param start_line:
:param end_line:
:return:
‘‘‘
t = find_all_start_line(line_data, start_line)
all_date = [[start_line, r] for r in t]
length = len([r for r in all_date if r[-1] == end_line])
while length != len(all_date):
for i in range(len(all_date)):
if all_date[i][-1] != end_line:
t = find_all_start_line(line_data, all_date[i][-1])
i_v = all_date[i]
del all_date[i]
for r in t:
if i_v.count(r) == 0:
c = [r1 for r1 in i_v]
c.append(r)
all_date.append(c)
break
length = len([r for r in all_date if r[-1] == end_line])
return all_date
def sub_all_site_start_end(data, start, start_line, end_line):
‘‘‘
截取两条线路之间的站点
:param data:
:param start:
:param start_line:
:param end_line:
:return:
‘‘‘
value = []
index = __index(data[start_line], start)
for i in range(0, index)[::-1]:
if data[start_line][i][1].count(end_line) > 0:
value.append(sub_line(data, start_line, data[start_line][i][0], start)[::-1])
for i in range(index, len(data[start_line])):
if data[start_line][i][1].count(end_line) > 0:
value.append(sub_line(data, start_line, start, data[start_line][i][0]))
return value
def get_lines(data, lines, start, end):
‘‘‘
根据[‘1‘,‘2‘]获取到整条路线站点信息
:param lines:
:param start:
:param end:
:return:
‘‘‘
t = []
for index, r in enumerate(lines):
if index == 0:
start_tmp = start
continue
if len(t) == 0:
index_value = sub_all_site_start_end(data, start_tmp, lines[index - 1], lines[index])
for r1 in index_value:
lineData = LineData(lines[index - 1], r1, r1[0], r1[-1], None)
t.append(lineData)
else:
row_list = []
find_all_linedata(t, end, row_list)
for row in row_list:
index_value = sub_all_site_start_end(data, row.end, lines[index - 1], lines[index])
rowt = []
for rowr in index_value:
lineData = LineData(lines[index - 1], rowr, row.end, rowr[-1], None)
rowt.append(lineData)
row.next_d = rowt
if index == len(lines) - 1:
row_list = []
find_all_linedata(t, end, row_list)
for row in row_list:
rowt = sub_line(data, r, row.end, end)
lineData = LineData(lines[index], rowt, row.end, rowt[-1], None)
row.next_d = lineData
return t
def find_all_linedata(linedata, end, list):
for row in linedata:
if row.next_d is None:
if row.end != end:
list.append(row)
else:
find_all_linedata(row.next_d, end, list)
def calculator_line(data, line_data, start, end, line=[]):
start_line = find_line(data, start, line)
end_line = find_line(data, end, line)
if start_line == end_line:
return [sub_line(data, start_line, start, end)]
else:
# 查找start后面的哪一个站点可以换乘
# site, line = find_transfer(data[start_line], start, 1, line=[])
all_line = find_transfer_data(line_data, start_line, end_line)
value = []
for row in all_line:
value.append(get_lines(data, row, start, end))
lines = []
get_all_line(value, end, lines, [])
return lines
def get_end_number(lineData, end, nuber):
‘‘‘
取得能够到达终点的个数
:param lineData:
:param end:
:return:
‘‘‘
if lineData.next_d is None:
if lineData.end == end:
nuber += 1
return nuber
else:
return nuber
else:
if type(lineData.next_d) == LineData:
nuber += get_end_number(lineData.next_d, end, nuber)
else:
for row in lineData.next_d:
nuber += get_end_number(row, end, nuber)
return nuber
def get_all_line(lineData, end, list, data):
‘‘‘
根据数据取得所有的路线图
:param lineData:
:return:
‘‘‘
if type(lineData) == LineData:
if lineData.next_d is None:
if lineData.end == end:
data.append({‘start‘: lineData.start, ‘end‘: lineData.end, ‘line‘: lineData.line, ‘data‘: lineData.ld})
list.append(data)
else:
if type(lineData.next_d) == LineData:
d = data[:]
d.append({‘start‘: lineData.start, ‘end‘: lineData.end, ‘line‘: lineData.line, ‘data‘: lineData.ld})
get_all_line(lineData.next_d, end, list, d)
else:
for row in lineData.next_d:
d = data[:]
d.append({‘start‘: lineData.start, ‘end‘: lineData.end, ‘line‘: lineData.line, ‘data‘: lineData.ld})
get_all_line(row, end, list, d)
else:
for row in lineData:
get_all_line(row, end, list, data[:])
def print_lineData(lineData):
‘‘‘
打印出路线图
:param lineData:
:return:
‘‘‘
class LineData(object):
def __init__(self, line, ld, start, end, next):
self.line = line
self.ld = ld
self.start = start
self.end = end
self.next_d = next
self.number = len(self.ld) - 1
data = {‘1‘: read_line(‘1.txt‘),
‘2‘: read_line(‘2.txt‘),
‘3‘: read_line(‘3.txt‘),
‘4n‘: read_line(‘4n.txt‘),
‘6‘: read_line(‘6.txt‘)
}
data, line_data = statistics_transfer(data)
# end = u‘富锦路‘.encode(‘utf-8‘)
end = u‘曹杨路‘.encode(‘utf-8‘)
# end = u‘莘庄‘.encode(‘utf-8‘)
start = u‘徐泾东‘.encode(‘utf-8‘)
# end = u‘港城路‘.encode(‘utf-8‘)
value = calculator_line(data, line_data, start, end)
for row in value:
print ‘---------------------‘
for r in row:
if type(r) == dict:
vl = ‘start:‘ + r[‘start‘] + ‘;end:‘ + r[‘end‘] + ‘;line:‘ + r[‘line‘] + ‘;data:‘
for d in r[‘data‘]:
vl += ‘-->‘ + d
print vl
else:
print r
希望大神多多指导,共同改进改进,其中没有公交路线,需要的话可以自己添加进去就行了,按照txt的格式进行,不需要修改其他的。
标签:
原文地址:http://blog.csdn.net/u010154424/article/details/52259402