首页 > 解决方案 > R - ggmap - 通过地理编码计算城市之间的最短距离

问题描述

我有一个放在数据框中的城市列表和相关信息,如下所示:

library(plyr)
library(dplyr)
library(ggmap)
library(Imap)

cities <- c("washington, dc", "wilmington, de", "amarillo, tx", 
            "denver, co", "needham, ma", "philadelphia, pa", 
            "doylestown, pa", "galveston, tx", "tuscaloosa, al", 
            "hollywood, fl"
            )

id <- c(156952, 154222, 785695, 154423, 971453, 149888, 1356987,
        178946, 169944, 136421)

month <- c(201811, 201811, 201912, 201912, 202005, 202005, 
           202005, 202106, 202106, 202106 )

category<- c("home", "work", "home", "home", "home", "work",
             "cell", "home", "work", "cell")

places <- data.frame(cities, id, category, month)

使用Imapandggmap包,我可以检索每个城市的经度和纬度:

lat <- geocode(location = places$cities, source = "google")$lat
lon <- geocode(location = places$cities, source = "google")$lon

places <- cbind(places, lat, lon)

我想做的是以下几点:

  1. 按月份和类别计算每个城市之间的距离
  2. 在单独的列中返回第二个最短距离以及相应的城市和 idplaces

我写了一个for循环来计算距离:

for (i in 1:nrow(places)) {




dist_list[[i]] <- gdist(lon.1 = places$lon[i], 
                          lat.1 = places$lat[i], 
                          lon.2 = places$lon, 
                          lat.2 = places$lat,
                          units="miles")
  
}

这会产生以下数据:

dput(dist_list)
list(c(0, 98.3464717885451, 1386.25425677199, 1489.87718040776, 
383.083760289456, 123.232894969413, 140.284537078237, 1209.23510542932, 
706.670452283757, 906.79542720295), c(98.4762434610638, 0, 1472.06660056474, 
1560.93398322985, 285.23618862797, 24.9195071209828, 44.8853561530985, 
1308.60741637919, 805.755084908157, 983.102810248198), c(1389.07354011351, 
1472.06660056474, 0, 356.573530670257, 1712.29111612461, 1493.39302974566, 
1497.2125164277, 579.329313217289, 827.577713357261, 1434.82691622332
), c(1492.80130415651, 1560.93398322985, 356.573530670257, 0, 
1761.3773163288, 1578.71125031146, 1576.80713231756, 923.725006795209, 
1067.04809350934, 1717.32991551111), c(383.551997010915, 285.23618862797, 
1712.29111612461, 1761.3773163288, 0, 260.382178510916, 243.947043197789, 
1588.85470703957, 1088.38640303169, 1230.47219244291), c(123.395655314093, 
24.9195071209827, 1493.39302974566, 1578.71125031146, 260.382178510916, 
0, 24.7382114555287, 1333.29925285915, 830.581742827321, 1002.94777739349
), c(140.431447025301, 44.8853561530986, 1497.2125164277, 1576.80713231756, 
243.947043197789, 24.7382114555285, 0, 1346.44527983873, 844.827513981938, 
1026.98263808807), c(1211.16392416136, 1308.60741637919, 579.329313217289, 
923.725006795209, 1588.85470703957, 1333.29925285915, 1346.44527983873, 
0, 505.292529136012, 925.512554201542), c(707.73957320737, 805.755084908157, 
827.577713357261, 1067.04809350934, 1088.38640303169, 830.581742827321, 
844.827513981938, 505.292529136012, 0, 666.837848781548), c(906.880841903584, 
983.102810248198, 1434.82691622332, 1717.32991551111, 1230.47219244291, 
1002.94777739349, 1026.98263808807, 925.512554201542, 666.837848781548, 
0))

所需的结果将如下所示(第一行):

cities          id         category  month      lat        lon   min.dist  closest city  closest city id  
washington, dc  156952     home      201811 38.90719  -77.03687   98.34647  wilmington, de  154222 

通过中的nth函数,Rfast我可以获得第二小的距离

nth(dist_list[[1]], 2)

我遇到的问题是我不知道如何将列表中的信息连接到 df places。任何帮助或建议将不胜感激。

标签: rlistdataframegeocodingggmap

解决方案


更新

假设我们month只分组,我们可以试试下面的代码

f <- function(df) {
    r <- list()
    for (i in 1:nrow(df)) {
        x <- c()
        for (j in 1:nrow(df)) {
            x <- append(
                x,
                with(df, gdist(lat[i], lon[i], lat[j], lon[j], units = "miles"))
            )
        }
        x <- replace(x, x == 0, Inf)
        idx <- which.min(x)
        r[[i]] <- data.frame(
            min.dist = min(x),
            closest_city = df$cities[idx],
            closest_city_id = df$id[idx]
        )
    }
    do.call(rbind, r)
}

places %>%
    group_by(month) %>%
    do(cbind(., f(.))) %>%
    ungroup()

这使

# A tibble: 10 x 9
   cities               id category  month   lat    lon min.dist closest_city   
   <chr>             <int> <chr>     <int> <dbl>  <dbl>    <dbl> <chr>
 1 washington, dc   156952 home     201811  38.9  -77.0   104.   wilmington, de
 2 wilmington, de   154222 work     201811  39.7  -75.5   104.   washington, dc
 3 amarillo, tx     785695 home     201912  35.2 -102.    232.   denver, co
 4 denver, co       154423 home     201912  39.8 -105.    232.   amarillo, tx
 5 needham, ma      971453 home     202005  42.3  -71.2   273.   doylestown, pa
 6 philadelphia, ~  149888 work     202005  40.0  -75.2     6.81 doylestown, pa
 7 doylestown, pa  1356987 cell     202005  40.3  -75.1     6.81 philadelphia, ~
 8 galveston, tx    178946 home     202106  29.2  -94.9 11405.   hollywood, fl
 9 tuscaloosa, al   169944 work     202106  33.2  -87.6   517.   hollywood, fl
10 hollywood, fl    136421 cell     202106  26.0  -80.1   517.   tuscaloosa, al
# ... with 1 more variable: closest_city_id <int>

根据你得到的dist_list,我们可以试试下面的代码

closest <- do.call(
    rbind,
    lapply(
        dist_list,
        function(x) {
            x <- replace(x, x == 0, Inf)
            idx <- which.min(x)
            with(
                places,
                data.frame(
                    min.dist = min(x),
                    closest_city = cities[idx],
                    closest_city_id = id[idx]
                )
            )
        }
    )
)

这使

    min.dist     closest_city closest_city_id
1   98.34647   wilmington, de          154222
2   24.91951 philadelphia, pa          149888
3  356.57353       denver, co          154423
4  356.57353     amarillo, tx          785695
5  243.94704   doylestown, pa         1356987
6   24.73821   doylestown, pa         1356987
7   24.73821 philadelphia, pa          149888
8  505.29253   tuscaloosa, al          169944
9  505.29253    galveston, tx          178946
10 666.83785   tuscaloosa, al          169944

此外,如果您想将上述数据框附加到您现有的places,您可以使用

places <- cbind(places, closest)

推荐阅读