最近一直在处理交通数据,有时间、车牌,经过的路口地址,数据量较大,本篇针对各车经过的路口时间先后顺序,生成贵阳交通的可通行有向图,即相连的交通路口间是否是双向通行、单向通行。
一、关于数据的说明
rm(list=ls(all=TRUE)) gc() library(RODBC) channel=odbcConnect("transport-connector-R", uid="transport", pwd="transport") #连接mysql test 数据库 sqlTables(channel) # 显示test数据库中的表格 #检索test.transport20140901 中贵阳的车辆信息,含车牌,经过的路口 transections_data<-sqlQuery(channel,"select plate,address from transport20140901 where plate like '贵A%' order by plate,time") odbcClose(channel) # 读取文件中排序好的路口地址数据 address_file <-file("/home/wanglinlin/transport/address.txt","r") sorted_address <-readLines(address_file) close(address_file) #sorted_address[256] #生成贵阳交通路口连通性有向图初始矩阵 transection_count <- length(sorted_address) tansport_map <- matrix(0,transection_count,transection_count) #tansport_map #根据目标地址名称,在地址表中查找其位置编号 find_address<- function(target,address_table){ len=length(address_table) for(i in 1:len) if(target==address_table[i]) return (i) return (0) } #根据贵阳本地车辆信息,生成贵阳交通图的双向有向图矩阵 transport_data_count <- 6725490 counter <- transport_data_count-1 transection_id_one=find_address(transections_data[1,2],sorted_address) for (i in 1:counter){ transection_id_two=find_address(transections_data[i+1,2],sorted_address) if (transections_data[i,1]==transections_data[i+1,1]){ tansport_map[transection_id_one,transection_id_two] <- 1 } transection_id_one <- transection_id_two } write.table(tansport_map,"/home/wanglinlin/transport/tansport_map_two.txt",row.names = FALSE,col.names = FALSE)
<ul><li><span style="font-family: Arial, Helvetica, sans-serif;">find_address(transections_data[i+1,2],sorted_address)</span></li><li><span style="font-family: Arial, Helvetica, sans-serif;">transections_data[i,1]==transections_data[i+1,1]</span></li></ul>这两个操作分别是在数组中查找字符串的位置(当前路口地址在地址列表中的位置),比较两个字符串是否相等(两个车牌号是否相同),都是关于字符串的操作,相当耗时。
rm(list=ls(all=TRUE)) gc() library(RODBC) library(hash) channel=odbcConnect("transport-connector-R", uid="transport", pwd="transport") #连接mysql test 数据库 sqlTables(channel) # 显示test数据库中的表格 #检索test.transport20140901 中贵阳的车辆信息,含车牌,经过的路口 transections_data<-sqlQuery(channel,"select plate,address from transport20140901 where plate like '贵A%' order by plate,time")
#找出贵阳所有车牌号,并散列化,形成键值对表 plates<-sqlQuery(channel,"select distinct plate from transport20140901 where plate like '贵A%'") odbcClose(channel) plate_list=(as.matrix(plates))[,1] plate_count=length(plate_list) plate_hash_pairs=hash(plate_list,1:plate_count) # 读取文件中排序好的路口地址数据 address_file <-file("/home/wanglinlin/transport/address.txt","r") sorted_address <-readLines(address_file) sorted_address_hash_pairs<-hash(sorted_address,1:269) close(address_file) #sorted_address[256] #生成贵阳交通路口连通性有向图初始矩阵 transection_count <- length(sorted_address) transport_map <- matrix(0,transection_count,transection_count) #tansport_map #根据贵阳本地车辆信息,生成贵阳交通图的双向有向图矩阵 transport_data_count <- 6725490 counter <- transport_data_count-1 plate_hash_pairs[[as.character(transections_data[1,1])]] plate_hash_pairs[[as.character(transections_data[2,1])]] sorted_address_hash_pairs[[as.character(transections_data[1,2])]] sorted_address_hash_pairs[[as.character(transections_data[2,2])]] for (i in 1:counter){ if (plate_hash_pairs[[as.character(transections_data[i,1])]]==plate_hash_pairs[[as.character(transections_data[i+1,1])]]){ transport_map[sorted_address_hash_pairs[[as.character(transections_data[i,2])]],sorted_address_hash_pairs[[as.character(transections_data[i+1,2])]]] <- 1 } } write.table(transport_map,"/home/wanglinlin/transport/transport_map.txt",row.names = FALSE,col.names = FALSE)
原文地址:http://blog.csdn.net/gufe_hfding/article/details/46371819