r - 仅第一次使用 for 循环进行地理编码,并放入数据帧(在 R 中)
问题描述
这是我需要帮助的时候,因为我已经尝试了一切来解决我的 FOR 循环的问题。我想使用 API 从地址进行地理编码,我使用了一个非常清晰的函数,以及更多的数据框来安排每个步骤的结果并检查是否存在问题,但现在我找不到更多...
地址:是我的带有地址列的数据框,结果将放在那里
"address_ID","address","accuracy","lon_geop","lat_geop","address_geop","geopID","success"
1,"4 Kiricheneck 9990"
2,"10 Kiricheneck 9990"
3,"26 Kiricheneck 9990""
4,"27 Kiricheneck 9990"
5,"6 Avenue D'oberkorn 4640"
编码:
plcUrl <- "https://apiv3.geoportail.lu/geocode/search?queryString="
getGeoDetails <- function(address)
{
query <- paste(addresses$address)
strurl <- as.character(paste(plcUrl,query))
rd <- fromJSON(URLencode(strurl))
df <- data.frame(matrix(unlist(rd), nrow = 22, byrow = T),stringsAsFactors = FALSE)
colnames(df)[1] <- "results_geop"
answer <- data.frame(lat = NA, lon = NA, accuray = NA, address_geop = NA, success = NA, geopID = NA)
answer$status <- df$results_geop[22]
#return Na's if we didn't get a match
if (df$results_geop[22] != "TRUE")
{
return(answer)
}
#else, extract what we need from the GeoPortail server reply into a dataframe
answer$lat <- df$results_geop[9]
answer$lon <- df$results_geop[8]
answer$accuracy <- df$results_geop[21]
answer$geopID <- df$results_geop[19]
answer$address_geop <- df$results_geop[6]
answer$success <- df$results_geop[22]
return(answer)
}
#initialise a dataframe to hold the results
geocoded <- data.frame()
startindex <- 1
row_addresses <- as.numeric(rownames(addresses))
# Start the geocoding process - address by address
for (j in startindex:row_addresses)
{
#query the GeoPortail geocoder
result = getGeoDetails(addresses[j])
print(result$status)
result$index <- j
#append the answer to the results file
geocoded <- rbind(geocoded, result)
#now we add all the results to the main data
addresses$lat_geop[j] <- geocoded$lat[j]
addresses$lon_geop[j] <- geocoded$lon[j]
addresses$accuracy[j] <- geocoded$accuracy[j]
addresses$address_geop[j] <- geocoded$address_geop[j]
addresses$geopID[j] <- geocoded$geopID[j]
addresses$success[j] <- geocoded$success[j]
return(j)}
最后: 警告信息:在 startindex:row_addresses 中:数值表达式有 5 个元素:仅使用第一个
并且addresses数据框只有第一行效果好,其他都是空的。我试过了:
- 索引:for(i in 1:x)
- 为结果和循环构建空数据框(索引 i):d[i, ] = c(x, y, z)
- 中断命令
- 下一个命令
还没有任何帮助......我的其他 for 循环可以完成这项工作,所以这非常令人沮丧。
解决方案
让我们制作一些可用的数据:
data.frame(
address_ID = 1:5,
address = c(
"4 Kiricheneck 9990", "10 Kiricheneck 9990",
"26 Kiricheneck 9990", "27 Kiricheneck 9990",
"6 Avenue D'oberkorn 4640"
),
stringsAsFactors = FALSE
) -> xdf
现在,让我们为该端点制作一个适当的 API 包装器:
geoportail_geocode <- function(query) {
suppressPackageStartupMessages({ # this makes it self-contained and quiet
library(httr, warn.conflicts = FALSE, quietly = TRUE, verbose = FALSE)
library(jsonlite, warn.conflicts = FALSE, quietly = TRUE, verbose = FALSE)
})
`%||%` <- function(x, y) { if (length(x)) x else y } # this makes the code below less 'if-else'y
httr::GET(
url = "https://apiv3.geoportail.lu/geocode/search",
httr::user_agent("geoportail_geocode R function used by me@example.com"), # you should add your email to this string
query = list(
queryString = query[1]
)
) -> res
httr::stop_for_status(res) # halts on API/network errors; you may not want this but it's standard practice in API packages
out <- httr::content(res, as = "text", encoding = "UTF-8")
out <- jsonlite::fromJSON(out)
if (length(out$success) && out$success) { # if the return looks valid
# MAKES A MAJOR ASSUMPTION A Point IS BEING RETURNED
# YOU SHOULD DO A *TON* MORE VALIDATION AND ERROR CHECKING
ret <- out$results[,c("ratio", "name", "easting", "address", "northing", "matching street", "accuracy")]
ret <- cbind.data.frame(ret, out$results$AddressDetails)
ret$type <- out$results$geomlonlat$type %||% NA_character_
ret$lng <- out$results$geomlonlat$coordinates[[1]][1] %||% NA_real_
ret$lat <- out$results$geomlonlat$coordinates[[1]][2] %||% NA_real_
ret$geom <- out$results$geom$type %||% NA_character_
ret$geom_x <- out$results$geom$coordinates[[1]][1] %||% NA_real_
ret$geom_y <- out$results$geom$coordinates[[1]][2] %||% NA_real_
ret
} else {
warning("Error in geocoding")
data.frame(stringsAsFactors = FALSE)
}
}
我们会做一个:
str(geoportail_geocode(xdf$address[1]))
## 'data.frame': 1 obs. of 19 variables:
## $ ratio : num 1
## $ name : chr "4,Kiricheneck 9990 Weiswampach"
## $ easting : num 73344
## $ address : chr "4 Kiricheneck,9990 Weiswampach"
## $ northing : num 133788
## $ matching street : chr "Kiricheneck"
## $ accuracy : int 8
## $ zip : chr "9990"
## $ locality : chr "Weiswampach"
## $ id_caclr_street : chr "8188"
## $ street : chr "Kiricheneck"
## $ postnumber : chr "4"
## $ id_caclr_building: chr "181679"
## $ type : chr "Point"
## $ lng : num 6.08
## $ lat : num 50.1
## $ geom : chr "Point"
## $ geom_x : num 73344
## $ geom_y : num 133788
并使用tidyverse
来完成所有操作并避免for
像瘟疫 b/c 这样的循环,这不是 Java 或 icky Python:
str(dplyr::bind_cols(
xdf,
purrr::map_df(xdf$address, geoportail_geocode)
))
## 'data.frame': 5 obs. of 21 variables:
## $ address_ID : int 1 2 3 4 5
## $ address : chr "4 Kiricheneck 9990" "10 Kiricheneck 9990" "26 Kiricheneck 9990" "27 Kiricheneck 9990" ...
## $ ratio : num 1 1 1 1 1
## $ name : chr "4,Kiricheneck 9990 Weiswampach" "10,Kiricheneck 9990 Weiswampach" "26,Kiricheneck 9990 Weiswampach" "27,Kiricheneck 9990 Weiswampach" ...
## $ easting : num 73344 73280 73203 73241 60462
## $ address1 : chr "4 Kiricheneck,9990 Weiswampach" "10 Kiricheneck,9990 Weiswampach" "26 Kiricheneck,9990 Weiswampach" "27 Kiricheneck,9990 Weiswampach" ...
## $ northing : num 133788 133732 133622 133591 65234
## $ matching street : chr "Kiricheneck" "Kiricheneck" "Kiricheneck" "Kiricheneck" ...
## $ accuracy : int 8 8 8 8 8
## $ zip : chr "9990" "9990" "9990" "9990" ...
## $ locality : chr "Weiswampach" "Weiswampach" "Weiswampach" "Weiswampach" ...
## $ id_caclr_street : chr "8188" "8188" "8188" "8188" ...
## $ street : chr "Kiricheneck" "Kiricheneck" "Kiricheneck" "Kiricheneck" ...
## $ postnumber : chr "4" "10" "26" "27" ...
## $ id_caclr_building: chr "181679" "181752" "181672" "181668" ...
## $ type : chr "Point" "Point" "Point" "Point" ...
## $ lng : num 6.08 6.07 6.07 6.07 5.9
## $ lat : num 50.1 50.1 50.1 50.1 49.5
## $ geom : chr "Point" "Point" "Point" "Point" ...
## $ geom_x : num 73344 73280 73203 73241 60462
## $ geom_y : num 133788 133732 133622 133591 65234
如函数代码中所述,该stop_for_status
调用将终止该函数,因此您可能希望warn_for_status
检查响应的状态代码并返回一个空的data.frame(stringsAsFactors=FALSE)
.
推荐阅读
- vb.net - 如何在没有循环的情况下直接查找多个文本
- javascript - 根据下面给出的逻辑实现父子关系逻辑
- excel - 复制转置粘贴到下一行的基本代码
- mongodb - 将 AWS EC2 Cloud9 连接到 mongoDB Atlas 集群的最佳方法是什么?
- python - ImportError:没有命名的模块
文件夹内的模块 - r - R中的并行计算:多核“分叉”和雪“分叉”有什么区别?
- java - 使用此模板找不到 div xpath
- javascript - 等待与 Promise 或 async/await 的连接
- vue.js - VueJS分页不会改变数据
- postgresql - psycopg2 无法连接到 dockerized Python-Flask 应用程序中的 postgres 数据库