r - st_join 几何和分组列在一起
问题描述
如果您有空间点和多边形时间序列数据,您如何将非空间变量进行空间连接/合并和“正常”合并?
多年来我想合并到年度多边形中的点数据,然后xvar
按年份汇总 ( ):
#spatial point data by year
library(sf)
set.seed(10)
df_point <- data.frame(id = c(1, 1, 1, 1, 2, 2, 2, 2, 3, 3,
4, 4, 5, 5,
6, 6, 7, 7),
year = c(2016, 2017, 2018, 2019, 2016, 2017, 2018, 2019, 2016, 2017,
2016, 2017, 2016, 2017,
2016, 2017, 2016, 2017),
xvar = sample(1:10, 18, replace = T))
df_point$geometry <- st_cast(st_sfc(st_multipoint(rbind(c(.1, .2), c(.1, .2), c(.1, .2), c(.1, .2),
c(.3, 1), c(.3, 1), c(.3, 1), c(.3, 1),
c(1, 1), c(1, 1),
c(2, 2.1), c(2, 2.1), c(2.2, 2.4), c(2.2, 2.4),
c(4, 2.1), c(4, 2.1), c(4, 2.2), c(4, 2.2)))), "POINT")
df_point <- st_as_sf(df_point)
df_point
# Simple feature collection with 18 features and 3 fields
# geometry type: POINT
# dimension: XY
# bbox: xmin: 0.1 ymin: 0.2 xmax: 4 ymax: 2.4
# CRS: NA
# First 10 features:
# id year xvar geometry
# 1 1 2016 9 POINT (0.1 0.2)
# 2 1 2017 10 POINT (0.1 0.2)
# 3 1 2018 7 POINT (0.1 0.2)
# 4 1 2019 8 POINT (0.1 0.2)
# 5 2 2016 6 POINT (0.3 1)
# 6 2 2017 7 POINT (0.3 1)
# 7 2 2018 3 POINT (0.3 1)
# 8 2 2019 8 POINT (0.3 1)
# 9 3 2016 10 POINT (1 1)
# 10 3 2017 7 POINT (1 1)
和多边形数据:
df_poly <- data.frame(poly_id = c(1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3),
year = rep(2016:2019, each = 3))
pol = st_polygon(list(rbind(c(0, 0), c(2, 0), c(2, 2), c(0, 2), c(0, 0))))
b = st_sfc(pol, pol + c(2, 2), pol + c(4, .8))
df_poly$geomtry <- c(b, b, b, b)
df_poly <- st_as_sf(df_poly)
df_poly
# Simple feature collection with 12 features and 2 fields
# geometry type: POLYGON
# dimension: XY
# bbox: xmin: 0 ymin: 0 xmax: 6 ymax: 4
# CRS: NA
# First 10 features:
# poly_id year geomtry
# 1 1 2016 POLYGON ((0 0, 2 0, 2 2, 0 ...
# 2 2 2016 POLYGON ((2 2, 4 2, 4 4, 2 ...
# 3 3 2016 POLYGON ((4 0.8, 6 0.8, 6 2...
# 4 1 2017 POLYGON ((0 0, 2 0, 2 2, 0 ...
# 5 2 2017 POLYGON ((2 2, 4 2, 4 4, 2 ...
# 6 3 2017 POLYGON ((4 0.8, 6 0.8, 6 2...
# 7 1 2018 POLYGON ((0 0, 2 0, 2 2, 0 ...
# 8 2 2018 POLYGON ((2 2, 4 2, 4 4, 2 ...
# 9 3 2018 POLYGON ((4 0.8, 6 0.8, 6 2...
# 10 1 2019 POLYGON ((0 0, 2 0, 2 2, 0 ...
期望的输出:
df_sf_merge
# Simple feature collection with 12 features and 3 fields
# geometry type: POLYGON
# dimension: XY
# bbox: xmin: 0 ymin: 0 xmax: 6 ymax: 4
# CRS: NA
# poly_id year total_sum geomtry
# 1 1 2016 25 POLYGON ((0 0, 2 0, 2 2, 0 ...
# 2 2 2016 32 POLYGON ((2 2, 4 2, 4 4, 2 ...
# 3 3 2016 14 POLYGON ((4 0.8, 6 0.8, 6 2...
# 4 1 2017 24 POLYGON ((0 0, 2 0, 2 2, 0 ...
# 5 2 2017 22 POLYGON ((2 2, 4 2, 4 4, 2 ...
# 6 3 2017 12 POLYGON ((4 0.8, 6 0.8, 6 2...
# 7 1 2018 10 POLYGON ((0 0, 2 0, 2 2, 0 ...
# 8 2 2018 NA POLYGON ((2 2, 4 2, 4 4, 2 ...
# 9 3 2018 NA POLYGON ((4 0.8, 6 0.8, 6 2...
# 10 1 2019 16 POLYGON ((0 0, 2 0, 2 2, 0 ...
# 11 2 2019 NA POLYGON ((2 2, 4 2, 4 4, 2 ...
# 12 3 2019 NA POLYGON ((4 0.8, 6 0.8, 6 2...
一个时间点的一般方法是这样的:
df_sf_merge <- df_poly %>%
st_join(df_point) %>% #AND MERGE OF YEAR?
group_by(poly_id, year) %>% #year.x or year.y
summarise(total_sum = sum(xvar, na.rm = T))
但这不起作用,因为合并会创建重复副本:
df_sf_merge <- df_poly %>%
st_join(df_point) %>%
dplyr::arrange(id, year.x)
df_sf_merge
# Simple feature collection with 88 features and 5 fields
# geometry type: POLYGON
# dimension: XY
# bbox: xmin: 0 ymin: 0 xmax: 6 ymax: 4
# CRS: NA
# First 10 features:
# poly_id year.x id year.y xvar geomtry
# 1 1 2016 1 2016 9 POLYGON ((0 0, 2 0, 2 2, 0 ...
# 2 1 2016 1 2017 10 POLYGON ((0 0, 2 0, 2 2, 0 ...
# 3 1 2016 1 2018 7 POLYGON ((0 0, 2 0, 2 2, 0 ...
# 4 1 2016 1 2019 8 POLYGON ((0 0, 2 0, 2 2, 0 ...
# 5 1 2017 1 2016 9 POLYGON ((0 0, 2 0, 2 2, 0 ...
# 6 1 2017 1 2017 10 POLYGON ((0 0, 2 0, 2 2, 0 ...
# 7 1 2017 1 2018 7 POLYGON ((0 0, 2 0, 2 2, 0 ...
# 8 1 2017 1 2019 8 POLYGON ((0 0, 2 0, 2 2, 0 ...
# 9 1 2018 1 2016 9 POLYGON ((0 0, 2 0, 2 2, 0 ...
# 10 1 2018 1 2017 10 POLYGON ((0 0, 2 0, 2 2, 0 ...
我可以以一种大致的方式删除重复项,但我不希望首先制作重复的副本,因为它会在我处理大文件时大大减慢进程。
我不确定您是否可以同时进行空间连接和普通连接,但我确定有一个更简单的解决方法?
有什么建议么?谢谢
解决方案
一种解决方案是将两个数据框拆分为两个列表,每个列表由每年的单独数据框组成,然后使用map2()
. 因此,2016 点st_joined()
仅被编辑到 2016 多边形,2017 点被编辑到 2017 多边形,等等。
map2_dfr()
与 相同map2()
,只是它将结果列表展平为数据框。
library(dplyr)
library(purrr)
df_point_list <- split(select(df_point, -year), # drop the year column for one of these objects so we don't get year.x and year.y
df_point$year)
df_poly_list <- split(df_poly, df_poly$year)
df_sf_merge<- map2_dfr(df_poly_list, df_point_list,
~ .x %>%
st_join(.y) %>%
group_by(poly_id, year) %>%
summarise(total_sum = sum(xvar, na.rm = T)))
df_sf_merge
Simple feature collection with 12 features and 3 fields
geometry type: POLYGON
dimension: XY
bbox: xmin: 0 ymin: 0 xmax: 6 ymax: 4
CRS: NA
First 10 features:
poly_id year total_sum geomtry
1 1 2016 25 POLYGON ((0 0, 2 0, 2 2, 0 ...
2 2 2016 32 POLYGON ((2 2, 4 2, 4 4, 2 ...
3 3 2016 14 POLYGON ((4 0.8, 6 0.8, 6 2...
4 1 2017 24 POLYGON ((0 0, 2 0, 2 2, 0 ...
5 2 2017 22 POLYGON ((2 2, 4 2, 4 4, 2 ...
6 3 2017 12 POLYGON ((4 0.8, 6 0.8, 6 2...
7 1 2018 10 POLYGON ((0 0, 2 0, 2 2, 0 ...
8 2 2018 0 POLYGON ((2 2, 4 2, 4 4, 2 ...
9 3 2018 0 POLYGON ((4 0.8, 6 0.8, 6 2...
10 1 2019 16 POLYGON ((0 0, 2 0, 2 2, 0 ...
推荐阅读
- java - Spring - 根据数据库决定配置文件
- microsoft-graph-api - 使用 Bot 向 Microsoft Teams 会议发送消息
- html - 如何导出 pdf 或发送包含包含边框的 html 的电子邮件?
- react-native - 在设置 redux 状态后呈现经过身份验证和未经身份验证的条件堆栈导航器
- python - 为什么滚动与我的网页抓取效果不佳?
- mqtt - 使用 MQTT 和 Solace 限制/保护主题
- javascript - javascript为谷歌地图latlng添加数组值lat和lng的键
- google-sheets-formula - 如何在有条件的情况下从 Google 表格获取数据到另一个 Google 表格
- python - 带有熊猫df的Python列表,似乎没有复制到新列表?
- angular - 如何在Angular中动态更改输入标签?