r - R - ggmap - 通过地理编码计算城市之间的最短距离
问题描述
我有一个放在数据框中的城市列表和相关信息,如下所示:
library(plyr)
library(dplyr)
library(ggmap)
library(Imap)
cities <- c("washington, dc", "wilmington, de", "amarillo, tx",
"denver, co", "needham, ma", "philadelphia, pa",
"doylestown, pa", "galveston, tx", "tuscaloosa, al",
"hollywood, fl"
)
id <- c(156952, 154222, 785695, 154423, 971453, 149888, 1356987,
178946, 169944, 136421)
month <- c(201811, 201811, 201912, 201912, 202005, 202005,
202005, 202106, 202106, 202106 )
category<- c("home", "work", "home", "home", "home", "work",
"cell", "home", "work", "cell")
places <- data.frame(cities, id, category, month)
使用Imap
andggmap
包,我可以检索每个城市的经度和纬度:
lat <- geocode(location = places$cities, source = "google")$lat
lon <- geocode(location = places$cities, source = "google")$lon
places <- cbind(places, lat, lon)
我想做的是以下几点:
- 按月份和类别计算每个城市之间的距离
- 在单独的列中返回第二个最短距离以及相应的城市和 id
places
我写了一个for
循环来计算距离:
for (i in 1:nrow(places)) {
dist_list[[i]] <- gdist(lon.1 = places$lon[i],
lat.1 = places$lat[i],
lon.2 = places$lon,
lat.2 = places$lat,
units="miles")
}
这会产生以下数据:
dput(dist_list)
list(c(0, 98.3464717885451, 1386.25425677199, 1489.87718040776,
383.083760289456, 123.232894969413, 140.284537078237, 1209.23510542932,
706.670452283757, 906.79542720295), c(98.4762434610638, 0, 1472.06660056474,
1560.93398322985, 285.23618862797, 24.9195071209828, 44.8853561530985,
1308.60741637919, 805.755084908157, 983.102810248198), c(1389.07354011351,
1472.06660056474, 0, 356.573530670257, 1712.29111612461, 1493.39302974566,
1497.2125164277, 579.329313217289, 827.577713357261, 1434.82691622332
), c(1492.80130415651, 1560.93398322985, 356.573530670257, 0,
1761.3773163288, 1578.71125031146, 1576.80713231756, 923.725006795209,
1067.04809350934, 1717.32991551111), c(383.551997010915, 285.23618862797,
1712.29111612461, 1761.3773163288, 0, 260.382178510916, 243.947043197789,
1588.85470703957, 1088.38640303169, 1230.47219244291), c(123.395655314093,
24.9195071209827, 1493.39302974566, 1578.71125031146, 260.382178510916,
0, 24.7382114555287, 1333.29925285915, 830.581742827321, 1002.94777739349
), c(140.431447025301, 44.8853561530986, 1497.2125164277, 1576.80713231756,
243.947043197789, 24.7382114555285, 0, 1346.44527983873, 844.827513981938,
1026.98263808807), c(1211.16392416136, 1308.60741637919, 579.329313217289,
923.725006795209, 1588.85470703957, 1333.29925285915, 1346.44527983873,
0, 505.292529136012, 925.512554201542), c(707.73957320737, 805.755084908157,
827.577713357261, 1067.04809350934, 1088.38640303169, 830.581742827321,
844.827513981938, 505.292529136012, 0, 666.837848781548), c(906.880841903584,
983.102810248198, 1434.82691622332, 1717.32991551111, 1230.47219244291,
1002.94777739349, 1026.98263808807, 925.512554201542, 666.837848781548,
0))
所需的结果将如下所示(第一行):
cities id category month lat lon min.dist closest city closest city id
washington, dc 156952 home 201811 38.90719 -77.03687 98.34647 wilmington, de 154222
通过中的nth
函数,Rfast
我可以获得第二小的距离
nth(dist_list[[1]], 2)
我遇到的问题是我不知道如何将列表中的信息连接到 df places
。任何帮助或建议将不胜感激。
解决方案
更新
假设我们month
只分组,我们可以试试下面的代码
f <- function(df) {
r <- list()
for (i in 1:nrow(df)) {
x <- c()
for (j in 1:nrow(df)) {
x <- append(
x,
with(df, gdist(lat[i], lon[i], lat[j], lon[j], units = "miles"))
)
}
x <- replace(x, x == 0, Inf)
idx <- which.min(x)
r[[i]] <- data.frame(
min.dist = min(x),
closest_city = df$cities[idx],
closest_city_id = df$id[idx]
)
}
do.call(rbind, r)
}
places %>%
group_by(month) %>%
do(cbind(., f(.))) %>%
ungroup()
这使
# A tibble: 10 x 9
cities id category month lat lon min.dist closest_city
<chr> <int> <chr> <int> <dbl> <dbl> <dbl> <chr>
1 washington, dc 156952 home 201811 38.9 -77.0 104. wilmington, de
2 wilmington, de 154222 work 201811 39.7 -75.5 104. washington, dc
3 amarillo, tx 785695 home 201912 35.2 -102. 232. denver, co
4 denver, co 154423 home 201912 39.8 -105. 232. amarillo, tx
5 needham, ma 971453 home 202005 42.3 -71.2 273. doylestown, pa
6 philadelphia, ~ 149888 work 202005 40.0 -75.2 6.81 doylestown, pa
7 doylestown, pa 1356987 cell 202005 40.3 -75.1 6.81 philadelphia, ~
8 galveston, tx 178946 home 202106 29.2 -94.9 11405. hollywood, fl
9 tuscaloosa, al 169944 work 202106 33.2 -87.6 517. hollywood, fl
10 hollywood, fl 136421 cell 202106 26.0 -80.1 517. tuscaloosa, al
# ... with 1 more variable: closest_city_id <int>
根据你得到的dist_list
,我们可以试试下面的代码
closest <- do.call(
rbind,
lapply(
dist_list,
function(x) {
x <- replace(x, x == 0, Inf)
idx <- which.min(x)
with(
places,
data.frame(
min.dist = min(x),
closest_city = cities[idx],
closest_city_id = id[idx]
)
)
}
)
)
这使
min.dist closest_city closest_city_id
1 98.34647 wilmington, de 154222
2 24.91951 philadelphia, pa 149888
3 356.57353 denver, co 154423
4 356.57353 amarillo, tx 785695
5 243.94704 doylestown, pa 1356987
6 24.73821 doylestown, pa 1356987
7 24.73821 philadelphia, pa 149888
8 505.29253 tuscaloosa, al 169944
9 505.29253 galveston, tx 178946
10 666.83785 tuscaloosa, al 169944
此外,如果您想将上述数据框附加到您现有的places
,您可以使用
places <- cbind(places, closest)
推荐阅读
- c++ - 复数算术出错
- unity3d - dontDestroyOnLoad 在不同场景中的问题
- python - 如果字母和数字正确,我如何打印更多的 dict 网格?
- firebase - FLUTTER:如何在流构建器中使用导航器?
- r - 如何从R字符串中提取某些单词?
- reactjs - 尝试将响应数组映射到状态对象时出现未处理的拒绝错误
- java - Vs Code java The import cannot be resolved
- netlogo - 如何让乌龟移动到最近的具有相同类别的补丁?
- html - 如何在 inline-block-div 中垂直和水平居中内容?
- python - 制作一个函数来改变一个变量