r - r 循环优化(带有 sf 和 data.table 的 getMinBBox)
问题描述
我有一些问题如何加快我的代码。
我有一个超过 100000 个多边形的 shp,我想为每个多边形计算最小边界框。我在这里找到了这部分的一些代码:https ://rdrr.io/cran/flightplanning/src/R/utils.R
代码一次从一个多边形的矩阵形式的坐标中计算最小边界框。
我使用 loop 和 data.table 但对于更大的 shp(50000+ 多边形),这个过程真的很耗时。
是否可以在 data.table 中使用带有 lapply 的 getMinBBox()?
谢谢你
我的代码:
library(sf)
library(data.table)
nc <- st_read(system.file("shape/nc.shp", package="sf"))[1:4,]
nc
nc=st_cast(nc,"POLYGON")
cor=as.data.frame(st_coordinates(nc))
cor
setDT(cor)
setDT(nc)
data <- data.table(width=numeric(), height=numeric())
for(i in 1:nrow(nc)){
l=as.matrix(cor[L2==i,1:2])
l=getMinBBox(l)
data=rbind(data,t(l[2:3]))}
nc=cbind(nc,data)
nc
## execution time for each part of code inside loop
library(microbenchmark)
mb=microbenchmark(
l<-as.matrix(cor[L2==1,1:2]),
ll<-getMinBBox(l),
data<-rbind(data,t(ll[2:3]))
)
Unit: microseconds
expr min lq mean median uq max neval cld
l <- as.matrix(cor[L2 == 1, 1:2]) 2144.7 2478.40 2606.981 2574.05 2711.55 4396.3 100 c
ll <- getMinBBox(l) 885.3 976.05 1050.203 1054.10 1103.55 1788.3 100 b
data <- rbind(data, t(ll[2:3])) 152.6 182.95 207.617 201.60 224.05 546.1 100 a
## getMinBBox code from https://rdrr.io/cran/flightplanning/src/R/utils.R
getMinBBox <- function(xy) {
stopifnot(is.matrix(xy), is.numeric(xy), nrow(xy) >= 2, ncol(xy) == 2)
## rotating calipers algorithm using the convex hull
H <- grDevices::chull(xy) ## hull indices, vertices ordered clockwise
n <- length(H) ## number of hull vertices
hull <- xy[H, ] ## hull vertices
## unit basis vectors for all subspaces spanned by the hull edges
hDir <- diff(rbind(hull, hull[1, ])) ## hull vertices are circular
hLens <- sqrt(rowSums(hDir^2)) ## length of basis vectors
huDir <- diag(1/hLens) %*% hDir ## scaled to unit length
## unit basis vectors for the orthogonal subspaces
## rotation by 90 deg -> y' = x, x' = -y
ouDir <- cbind(-huDir[ , 2], huDir[ , 1])
## project hull vertices on the subspaces spanned by the hull edges, and on
## the subspaces spanned by their orthogonal complements - in subspace coords
projMat <- rbind(huDir, ouDir) %*% t(hull)
## range of projections and corresponding width/height of bounding rectangle
rangeH <- matrix(numeric(n*2), ncol=2) ## hull edge
rangeO <- matrix(numeric(n*2), ncol=2) ## orthogonal subspace
widths <- numeric(n)
heights <- numeric(n)
for(i in seq(along=numeric(n))) {
rangeH[i, ] <- range(projMat[ i, ])
## the orthogonal subspace is in the 2nd half of the matrix
rangeO[i, ] <- range(projMat[n+i, ])
widths[i] <- abs(diff(rangeH[i, ]))
heights[i] <- abs(diff(rangeO[i, ]))
}
## extreme projections for min-area rect in subspace coordinates
## hull edge leading to minimum-area
eMin <- which.min(widths*heights)
hProj <- rbind( rangeH[eMin, ], 0)
oProj <- rbind(0, rangeO[eMin, ])
## move projections to rectangle corners
hPts <- sweep(hProj, 1, oProj[ , 1], "+")
oPts <- sweep(hProj, 1, oProj[ , 2], "+")
## corners in standard coordinates, rows = x,y, columns = corners
## in combined (4x2)-matrix: reverse point order to be usable in polygon()
## basis formed by hull edge and orthogonal subspace
basis <- cbind(huDir[eMin, ], ouDir[eMin, ])
hCorn <- basis %*% hPts
oCorn <- basis %*% oPts
pts <- t(cbind(hCorn, oCorn[ , c(2, 1)]))
## angle of longer edge pointing up
dPts <- diff(pts)
e <- dPts[which.max(rowSums(dPts^2)), ] ## one of the longer edges
eUp <- e * sign(e[2]) ## rotate upwards 180 deg if necessary
deg <- atan2(eUp[2], eUp[1])*180 / pi ## angle in degrees
return(list(pts=pts, width=heights[eMin], height=widths[eMin], angle=deg))
}
#' Provided an angle, calculate the corresponding minimum bounding box
#'
#' @param vertices the vertices which to get the bounding box from
#' @param alpha the angle to rotate the bounding box
#'
#' @importFrom grDevices chull
getBBoxAngle = function(vertices, alpha) {
centroid = apply(vertices, 2, mean)
angleRadians = (90-alpha) * pi/180
rotatedX = cos(angleRadians) * (vertices[, 1] - centroid[1]) -
sin(angleRadians) * (vertices[, 2] - centroid[2]) +
centroid[1]
rotatedY = sin(angleRadians) * (vertices[, 1] - centroid[1]) +
cos(angleRadians) * (vertices[, 2] - centroid[2]) +
centroid[2]
bBox = cbind(range(rotatedX), range(rotatedY))
height = diff(bBox[,2])
width = diff(bBox[,1])
bBox = rbind(bBox[1,],
c(bBox[1, 1], bBox[2, 2]),
(bBox[2, ]),
c(bBox[2, 1], bBox[1, 2]))
bBoxUnrotatedX = cos(-angleRadians) * (bBox[, 1] - centroid[1]) -
sin(-angleRadians) * (bBox[, 2] - centroid[2]) +
centroid[1]
bBoxUnrotatedY = sin(-angleRadians) * (bBox[, 1] - centroid[1]) +
cos(-angleRadians) * (bBox[, 2] - centroid[2]) +
centroid[2]
unrotatedBbox = cbind(bBoxUnrotatedX, bBoxUnrotatedY)
list(angle=alpha, height=height, width=width, pts=unrotatedBbox)
}
#' Given a xy matrix of points, adjust the
#' points to avoid acute angles < 80 degrees
#'
#' @param xy xy dataframe
#' @param angle angle of the flight lines
#' @param minAngle the minimum angle to below which will be projected
adjustAcuteAngles = function(xy, angle, minAngle = 80) {
xy_mat = as.matrix(xy[,1:2])
rads = angle*pi/180
nPoints = nrow(xy)
# Using cosine rule on vectors
# acos(theta) = (AB_vec x BC_vec) / (|AB| * |BC|)
vectors = xy[-1,]-xy[-nPoints,]
# Final angles for each point
angles = getAngles(xy_mat)
# Mask angles less than minimum
mask = angles <= minAngle
mask[is.na(mask)] = FALSE
# Index of point with angles less than minimum
indices = (seq_along(mask)+1)[mask]
indicesOffset = (indices %% 2 == 1) * 2 - 1
# Calculate perpendicular lines for points
x = xy[indices, 1]
y = xy[indices, 2]
nIndices = length(indices)
a = rep(tan(rads), nIndices)
b = y - a * x
a_perpendicular = -1/a
b_perpendicular = y - a_perpendicular*x
b2 = b_perpendicular
a2 = a_perpendicular
# Intersect previous straight line with the perpendicular
x = xy[indices-indicesOffset, 1]
y = xy[indices-indicesOffset, 2]
a1 = a
b1 = y - a1 * x
xs = (b2-b1)/(a1-a2)
ys = a2*xs+b2
# Replace angled points
xy[indices-indicesOffset, 1] = xs
xy[indices-indicesOffset, 2] = ys
return(xy)
}
#' Create outer curves for the flight lines
#'
#' @param waypoints the waypoints of the flight plan
#' @param angle angle for the flight lines
#' @param flightLineDistance the distance between the flight lines in meters
outerCurvePoints = function(waypoints, angle, flightLineDistance) {
mask = getAngles(waypoints) == 180
mask[is.na(mask)] = TRUE
rads = angle*pi/180
angularCoef = tan(rads)
nWaypoints = nrow(waypoints)
oddPoints = seq(1, nWaypoints, 2)
evenPoints = seq(2, nWaypoints, 2)
offsetX = waypoints[evenPoints,1]-waypoints[oddPoints,1]
intercept = waypoints[oddPoints, 2] - angularCoef*waypoints[oddPoints, 1]
xMove = rep(flightLineDistance / sqrt(1 + angularCoef**2), nWaypoints)
isNegative = rep(offsetX < 0, each=2)
isNegative[seq(1, nWaypoints, 2)] = !isNegative[seq(1, nWaypoints, 2)]
xMove[isNegative] = -xMove[isNegative]
yMove = xMove * angularCoef
xs = waypoints[, 1] + xMove
ys = waypoints[, 2] + yMove
curvePoints = as.data.frame(cbind(xs, ys))
curvePoints$index = seq(1, nWaypoints)
curvePoints$before = (curvePoints$index %% 2) == 1
curvePoints = curvePoints[!c(FALSE, mask),]
return(curvePoints)
}
#' Get angles for each point considering
#' the two neighbors points
#'
#' @param waypoints the waypoints of the flight plan
getAngles = function(waypoints) {
nPoints = nrow(waypoints)
vectors = waypoints[-1,]-waypoints[-nPoints,]
dists = sqrt(apply(vectors ** 2, 1, sum))
ab = vectors[-nPoints+1,]
bc = vectors[-1,]
dist_ab = dists[-nPoints+1]
dist_bc = dists[-1]
dotProds = sapply(seq_len(nPoints-2), function(x) ab[x,] %*% bc[x,])
# Final angles for each point
angles = suppressWarnings(180-round(acos(dotProds/(dist_ab*dist_bc))*180/pi,2))
angles
}
#' Class for Flight Parameters
setClass("Flight Parameters",
slots = c(
flight.line.distance = "numeric",
flight.speed.kmh = "numeric",
front.overlap = "numeric",
gsd = "numeric",
height = "numeric",
ground.height = "numeric",
minimum.shutter.speed = "character",
photo.interval = "numeric"
),
prototype = list(
flight.line.distance = NA_real_,
flight.speed.kmh = NA_real_,
front.overlap = NA_real_,
gsd = NA_real_,
ground.height = NA_real_,
height = NA_real_,
minimum.shutter.speed = NA_character_,
photo.interval = NA_real_
)
)
解决方案
示例代码非常小,如果您查看更大的 data.tables,某些方法可能会更快。在我看来,您可以通过拆分组合方法来节省一些时间,原则上,您可以并行运行,例如通过使用mclapply
而不是lapply
(在 linux/macos 上)。如果您使用rbindlist
而不是rbind
和转置,您可能也会快一点。在我的机器上,这种方法几乎将这个小集合的执行时间减半。mclapply(co, gmb2, mc.cores=4L)
如果单个 L2 组较大,使用或类似方法可能是有益的,但只会损害这个小数据集的性能。如果不改变getMinBBox
函数的代码,这至少会减少一些时间,可能值得一试。
附带说明:使用 openblas 或 Intel 的 MKL 而不是默认的 R 库可以对矩阵计算产生很大影响,因此您绝对应该确保使用它们。
# function code
getMinBBox <- function(xy) {
stopifnot(is.matrix(xy), is.numeric(xy), nrow(xy) >= 2, ncol(xy) == 2)
## rotating calipers algorithm using the convex hull
H <- grDevices::chull(xy) ## hull indices, vertices ordered clockwise
n <- length(H) ## number of hull vertices
hull <- xy[H, ] ## hull vertices
## unit basis vectors for all subspaces spanned by the hull edges
hDir <- diff(rbind(hull, hull[1, ])) ## hull vertices are circular
hLens <- sqrt(rowSums(hDir^2)) ## length of basis vectors
huDir <- diag(1/hLens) %*% hDir ## scaled to unit length
## unit basis vectors for the orthogonal subspaces
## rotation by 90 deg -> y' = x, x' = -y
ouDir <- cbind(-huDir[ , 2], huDir[ , 1])
## project hull vertices on the subspaces spanned by the hull edges, and on
## the subspaces spanned by their orthogonal complements - in subspace coords
projMat <- rbind(huDir, ouDir) %*% t(hull)
## range of projections and corresponding width/height of bounding rectangle
rangeH <- matrix(numeric(n*2), ncol=2) ## hull edge
rangeO <- matrix(numeric(n*2), ncol=2) ## orthogonal subspace
widths <- numeric(n)
heights <- numeric(n)
for(i in seq(along=numeric(n))) {
rangeH[i, ] <- range(projMat[ i, ])
## the orthogonal subspace is in the 2nd half of the matrix
rangeO[i, ] <- range(projMat[n+i, ])
widths[i] <- abs(diff(rangeH[i, ]))
heights[i] <- abs(diff(rangeO[i, ]))
}
## extreme projections for min-area rect in subspace coordinates
## hull edge leading to minimum-area
eMin <- which.min(widths*heights)
hProj <- rbind( rangeH[eMin, ], 0)
oProj <- rbind(0, rangeO[eMin, ])
## move projections to rectangle corners
hPts <- sweep(hProj, 1, oProj[ , 1], "+")
oPts <- sweep(hProj, 1, oProj[ , 2], "+")
## corners in standard coordinates, rows = x,y, columns = corners
## in combined (4x2)-matrix: reverse point order to be usable in polygon()
## basis formed by hull edge and orthogonal subspace
basis <- cbind(huDir[eMin, ], ouDir[eMin, ])
hCorn <- basis %*% hPts
oCorn <- basis %*% oPts
pts <- t(cbind(hCorn, oCorn[ , c(2, 1)]))
## angle of longer edge pointing up
dPts <- diff(pts)
e <- dPts[which.max(rowSums(dPts^2)), ] ## one of the longer edges
eUp <- e * sign(e[2]) ## rotate upwards 180 deg if necessary
deg <- atan2(eUp[2], eUp[1])*180 / pi ## angle in degrees
return(list(pts=pts, width=heights[eMin], height=widths[eMin], angle=deg))
}
#> Reading layer `nc' from data source `/home/b/R/x86_64-pc-linux-gnu-library/4.0/sf/shape/nc.shp' using driver `ESRI Shapefile'
#> Simple feature collection with 100 features and 14 fields
#> geometry type: MULTIPOLYGON
#> dimension: XY
#> bbox: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
#> epsg (SRID): 4267
#> proj4string: +proj=longlat +datum=NAD27 +no_defs
height = NA_real_,
minimum.shutter.speed = NA_character_,
photo.interval = NA_real_
)
)
# common code:
lapply(c("sf", "data.table", "microbenchmark"),
require, character.only = TRUE)
nc <- st_read(system.file("shape/nc.shp", package="sf"))[1:4,]
nc <- st_cast(nc,"POLYGON")
#> Warning in st_cast.sf(nc, "POLYGON"): repeating attributes for all sub-
#> geometries for which they may not be constant
coor <- as.data.frame(st_coordinates(nc))
setDT(coor)
setDT(nc)
# your code (wrapped in a function):
getbox1 <- function(){
data <- data.table(width=numeric(), height=numeric())
for(i in 1:nrow(nc)){
l=as.matrix(coor[L2==i,1:2])
ll=getMinBBox(l)
data=rbind(data,t(ll[2:3]))}
data
}
## split - apply code:
gmb2 <- function(i) getMinBBox(as.matrix(i[,1:2]))[c(2:3)]
getbox2 <- function(){
co <- split(coor, coor$L2)
rbindlist(lapply(co, gmb2))
}
microbenchmark(b1 = getbox1(), b2 = getbox2())
#> Unit: milliseconds
#> expr min lq mean median uq max neval cld
#> b1 7.804903 8.391429 20.539996 9.870947 38.71889 66.95236 100 b
#> b2 3.956369 4.186426 9.919638 4.562312 19.13847 47.27808 100 a
最后的编辑:您可以通过稍微简化一下 getMinBBox 函数来提高性能。
# original function code (removed some comments for brevity)
getMinBBox <- function(xy) {
stopifnot(is.matrix(xy), is.numeric(xy), nrow(xy) >= 2, ncol(xy) == 2)
H <- grDevices::chull(xy) ## hull indices, vertices ordered clockwise
n <- length(H) ## number of hull vertices
hull <- xy[H, ] ## hull vertices
hDir <- diff(rbind(hull, hull[1, ])) ## hull vertices are circular
hLens <- sqrt(rowSums(hDir^2)) ## length of basis vectors
huDir <- diag(1/hLens) %*% hDir ## scaled to unit length
ouDir <- cbind(-huDir[ , 2], huDir[ , 1])
projMat <- rbind(huDir, ouDir) %*% t(hull)
rangeH <- matrix(numeric(n*2), ncol=2) ## hull edge
rangeO <- matrix(numeric(n*2), ncol=2) ## orthogonal subspace
widths <- numeric(n)
heights <- numeric(n)
for(i in seq(along=numeric(n))) {
rangeH[i, ] <- range(projMat[ i, ])
rangeO[i, ] <- range(projMat[n+i, ])
widths[i] <- abs(diff(rangeH[i, ]))
heights[i] <- abs(diff(rangeO[i, ]))
}
eMin <- which.min(widths*heights)
hProj <- rbind( rangeH[eMin, ], 0)
oProj <- rbind(0, rangeO[eMin, ])
hPts <- sweep(hProj, 1, oProj[ , 1], "+")
oPts <- sweep(hProj, 1, oProj[ , 2], "+")
basis <- cbind(huDir[eMin, ], ouDir[eMin, ])
hCorn <- basis %*% hPts
oCorn <- basis %*% oPts
pts <- t(cbind(hCorn, oCorn[ , c(2, 1)]))
dPts <- diff(pts)
e <- dPts[which.max(rowSums(dPts^2)), ] ## one of the longer edges
eUp <- e * sign(e[2]) ## rotate upwards 180 deg if necessary
deg <- atan2(eUp[2], eUp[1])*180 / pi ## angle in degrees
return(list(pts=pts, width=heights[eMin], height=widths[eMin], angle=deg))
}
# common code:
lapply(c("sf", "data.table", "microbenchmark", "compiler"),
require, character.only = TRUE)
compiler::enableJIT(3)
nc <- st_read(system.file("shape/nc.shp", package="sf"))[1:4,]
nc <- st_cast(nc,"POLYGON")
#> Warning in st_cast.sf(nc, "POLYGON"): repeating attributes for all sub-
#> geometries for which they may not be constant
coor <- as.data.frame(st_coordinates(nc))
setDT(coor)
setDT(nc)
getMinBBox2 <- function(xy) {
# stopifnot(is.matrix(xy), is.numeric(xy), nrow(xy) >= 2, ncol(xy) == 2)
# skip checking
H <- grDevices::chull(xy) ## hull indices, vertices ordered clockwise
n <- length(H) ## number of hull vertices
hull <- xy[H, ] ## hull vertices
hDir <- matrixStats::colDiffs(rbind(hull, hull[1, ]))
hLens <- sqrt(rowSums(hDir^2)) ## length of basis vectors
huDir <- diag(1/hLens) %*% hDir ## scaled to unit length
ouDir <- cbind(-huDir[ , 2], huDir[ , 1])
projMat <- tcrossprod(rbind(huDir, ouDir), hull)
HO <- list(seq_len(n), (seq_len(n)+n))
rr <- matrixStats::rowRanges(projMat)
rd <- as.numeric(matrixStats::rowDiffs(rr))
rangeH <- rr[HO[[1]],]
rangeO <- rr[HO[[2]],]
widths <- rd[HO[[1]]]
heights <- rd[HO[[2]]]
eMin <- which.min(widths*heights)
## seems like sweeping is not required here?
# hProj <- rbind( rangeH[eMin, ], 0)
# oProj <- rbind(0, rangeO[eMin, ])
# hPts <- sweep(hProj, 1, oProj[ , 1], "+")
# oPts <- sweep(hProj, 1, oProj[ , 2], "+")
hPts <- rbind(rangeH[eMin, ], rep(rangeO[eMin, 1], 2))
oPts <- rbind(rangeH[eMin, ], rep(rangeO[eMin, 2], 2))
basis <- cbind(huDir[eMin, ], ouDir[eMin, ])
hCorn <- basis %*% hPts
oCorn <- basis %*% oPts
pts <- t(cbind(hCorn, oCorn[ , c(2, 1)]))
colnames(pts) <- c("X", "Y")
dPts <- diff(pts)
e <- dPts[which.max(rowSums(dPts^2)), ] ## one of the longer edges
eUp <- e * sign(e[2]) ## rotate upwards 180 deg if necessary
deg <- atan2(eUp[2], eUp[1])*180 / pi ## angle in degrees
return(list(pts=pts, width=heights[eMin], height=widths[eMin], angle=deg))
}
# your code (wrapped in a function):
getbox1 <- function(){
data <- data.table(width=numeric(), height=numeric())
for(i in 1:nrow(nc)){
l=as.matrix(coor[L2==i,1:2])
ll=getMinBBox(l)
data=rbind(data,t(ll[2:3]))}
data
}
## split - apply code:
gmb2 <- function(i) getMinBBox(as.matrix(i[,1:2]))[c(2:3)]
gmb3 <- function(i) getMinBBox2(as.matrix(i[,1:2]))[c(2:3)]
getbox2 <- function(){
co <- split(coor, coor$L2)
rbindlist(lapply(co, gmb2))
}
getbox3 <- function(){
co <- split(coor, coor$L2)
rbindlist(lapply(co, gmb3))
}
all(identical(getbox1(), getbox2()), identical(getbox2(), getbox3()))
#> [1] TRUE
microbenchmark(b1 = getbox1(), b2 = getbox2(), b3=getbox3())
#> Unit: milliseconds
#> expr min lq mean median uq max neval cld
#> b1 7.852630 8.067991 8.862175 8.269768 8.632328 14.474924 100 c
#> b2 4.020513 4.071012 4.444000 4.166884 4.274048 7.728204 100 b
#> b3 2.871639 2.938294 3.111886 2.986174 3.046096 5.935133 100 a
推荐阅读
- c# - 将 SQL 查询更改为 LINQ、asp.net MVC
- java - 每次给出 Generate JavaDoc 命令时,javadoc 是否会扫描所有类?
- vb.net - 我在 vb.net 中遇到来自 Web 请求的响应字符串不可读的问题
- python - 在多个路由中将变量添加到烧瓶会话
- reactjs - Nextjs 你可能忘记从它定义的文件中导出你的组件,或者你可能混淆了默认和命名导入
- python - 如何在我的 python-2.7 代码中使用 `format` 的地方自动插入 `u`(unicode 指示符)到 `format` 函数调用?
- mysql - 在 MySQL 备份中混淆个人数据
- angular - 为什么我的角度应用程序每次发送发布请求时都会创建套接字连接?
- java - 如何使用 Spring 更新或删除 @CompoundIndex
- sql - 如何使alter命令在MonetDB中生效?