首页 > 解决方案 > 用于凸包敏感性分析的空间多边形重叠百分比

问题描述

为了重现性,让我们将我的问题简化如下:我有 100 个空间多边形,代表从总体(100 次)中抽取的 N 个随机样本的凸包,以计算模型对单个值的敏感性。如何计算这些多边形的重叠百分比?理想的解决方案应该是快速的并尽可能少地引入近似值。

我没有特别的理由使用 R 的 GIS 功能,除了我认为这可能是解决问题的最简单方法。

library(sp)
library(raster)
library(sf)
#> Linking to GEOS 3.8.1, GDAL 3.1.1, PROJ 6.3.1

set.seed(11)

dt <- data.frame(x = rnorm(1e3, 10, 3) + sample(-5:5, 1e3, replace = TRUE))
dt$y <- (rnorm(1e3, 3, 4) + sample(-10:10, 1e3, replace = TRUE)) + dt$x

dt <- rbind(dt, data.frame(x = -dt$x, y = dt$y))

plot(dt, asp = 1)

dt.chull <- dt[chull(dt),]
dt.chull <- rbind(dt.chull, dt.chull[1,])

lines(dt.chull, col = "green")

uncert.polys <- lapply(1:100, function(i) {

tmp <- dt[sample(rownames(dt), 1e2),]

# points(tmp, col = "red")

tmp <- tmp[chull(tmp),]
tmp <- rbind(tmp, tmp[1,])

tmp <- sp::SpatialPolygons(list(sp::Polygons(list(sp::Polygon(tmp)), ID = i)))

sp::SpatialPolygonsDataFrame(tmp, data = data.frame(id = i, row.names = i))

# lines(tmp, col = "red")

})

polys <- do.call(rbind, uncert.polys)

plot(polys, add = TRUE, border = "red")

我最初的尝试是使用该sf::st_intersection功能:

sf.polys <- sf::st_make_valid(sf::st_as_sf(polys))
all(sf::st_is_valid(sf.polys))
#> [1] TRUE

sf::st_intersection(sf.polys)
#> Error in CPL_nary_intersection(x): Evaluation error: TopologyException: found non-noded intersection between LINESTRING (-9.80706 -0.619557, -7.66331 -3.55177) and LINESTRING (-9.80706 -0.619557, -9.80706 -0.619557) at -9.8070645468969637 -0.61955676978603658.

该错误可能与“几乎重合但不相同”的多边形线有关。已经提出了多种解决方案(12)来解决这个与 GEOS 相关的问题,但我都没有设法处理我的数据:

sf.polys <- sf::st_set_precision(sf.polys, 1e6) 

sf.polys <- sf::st_snap(sf.polys, sf.polys, tolerance = 1e-4)

sf::st_intersection(sf.polys)
#> Error in CPL_nary_intersection(x): Evaluation error: TopologyException: found non-noded intersection between LINESTRING (-13.7114 32.7341, 3.29417 30.3736) and LINESTRING (3.29417 30.3736, 3.29417 30.3736) at 3.2941702528617176 30.373627946201278.

所以,我必须使用光栅化来近似多边形重叠:

GT <- sp::GridTopology(cellcentre.offset = c(round(min(dt$x),1), round(min(dt$y),1)), 
                       cellsize = c(diff(round(range(dt$x), 1))/100, diff(round(range(dt$y), 1))/100),
                       cells.dim = c(100, 100)
)

SG <- sp::SpatialGrid(GT)

tmp <- lapply(seq_along(uncert.polys), function(i) {
  
  out <- sp::over(SG, uncert.polys[[i]])
  out[!is.na(out)] <- 1
  out[is.na(out)] <- 0
  out
})

tmp <- data.frame(overlapping.n = Reduce("+", lapply(tmp, "[[", 1)))
tmp$overlapping.pr <- 100*tmp$overlapping.n/100

uncert.data <- SpatialGridDataFrame(SG, tmp)

## Plot


plot(x = range(dt$x),
     y = range(dt$y), 
     type = "n"
)

plot(raster::raster(uncert.data), col = colorRampPalette(c("white", "red", "blue", "white"))(100), add = TRUE)
plot(polys, add = TRUE, border = adjustcolor("black", alpha.f = 0.2), cex = 0.5)
points(dt, pch = ".", col = "black", cex = 3)
lines(dt.chull, col = "green")

该方法给出了结果,但输出是近似的,需要很长时间来处理。必须有更好的方法来做到这一点。

出于性能比较的目的,这是我目前的解决方案:

gridOverlap <- function(dt, uncert.polys) {
  GT <- sp::GridTopology(cellcentre.offset = c(round(min(dt$x),1), round(min(dt$y),1)), 
                         cellsize = c(diff(round(range(dt$x), 1))/100, diff(round(range(dt$y), 1))/100),
                         cells.dim = c(100, 100)
  )
  
  SG <- sp::SpatialGrid(GT)
  
  tmp <- lapply(seq_along(uncert.polys), function(i) {
    
    out <- sp::over(SG, uncert.polys[[i]])
    out[!is.na(out)] <- 1
    out[is.na(out)] <- 0
    out
  })
  
  tmp <- data.frame(overlapping.n = Reduce("+", lapply(tmp, "[[", 1)))
  tmp$overlapping.pr <- 100*tmp$overlapping.n/100
  
  SpatialGridDataFrame(SG, tmp)
}

system.time(gridOverlap(dt = dt, uncert.polys = uncert.polys))
#   user  system elapsed 
#   3.011   0.083   3.105 

对于较大的数据集,性能很重要(此解决方案在实际应用中需要几分钟时间)。

reprex 包(v0.3.0)于 2020-09-01 创建

标签: rspatialconvex-hullgeos

解决方案


spatstat 这是使用底层包查找内部而没有任何错误的解决方案polyclip

library(spatstat)

# Data from OP
set.seed(11)
dt <- data.frame(x = rnorm(1e3, 10, 3) + sample(-5:5, 1e3, replace = TRUE))
dt$y <- (rnorm(1e3, 3, 4) + sample(-10:10, 1e3, replace = TRUE)) + dt$x
dt <- rbind(dt, data.frame(x = -dt$x, y = dt$y))

# Converted to spatstat classes (`ppp` not strictly necessary -- just a habit)
X <- as.ppp(dt, W = owin(c(-25,25),c(-15,40)))
p1 <- owin(poly = dt[rev(chull(dt)),])

# Plot of data and convex hull
plot(X, main = "")
plot(p1, add = TRUE, border = "green")

# Convex hulls of sampled points in spatstat format
polys <- lapply(1:100, function(i) {
  tmp <- dt[sample(rownames(dt), 1e2),]
  owin(poly = tmp[rev(chull(tmp)),])
})

# Plot of convex hulls
for(i in seq_along(polys)){
  plot(polys[[i]], add = TRUE, border = "red")
}

# Intersection of all convex hulls plotted in transparent blue
interior <- do.call(intersect.owin, polys)
plot(interior, add = TRUE, col = rgb(0,0,1,0.1))

我不清楚你想从这里做什么,但至少这种方法避免了多边形裁剪的错误。

要执行基于网格的解决方案,spatstat我会将窗口转换为二进制图像掩码,然后从那里开始工作:

Wmask <- as.im(Window(X), dimyx = c(200, 200))
masks <- lapply(polys, as.im.owin, xy = Wmask, na.replace = 0)
maskmean <- Reduce("+", masks)/100
plot(maskmean)

速度取决于您选择的分辨率,但我猜它比当前使用sp/的建议要快得多raster(使用与此处相同的逻辑可能会改进很多,所以这将是坚持的另一个选择raster)。


推荐阅读