首页 > 解决方案 > r 循环优化(带有 sf 和 data.table 的 getMinBBox)

问题描述

我有一些问题如何加快我的代码。

我有一个超过 100000 个多边形的 shp,我想为每个多边形计算最小边界框。我在这里找到了这部分的一些代码:https ://rdrr.io/cran/flightplanning/src/R/utils.R

代码一次从一个多边形的矩阵形式的坐标中计算最小边界框。

我使用 loop 和 data.table 但对于更大的 shp(50000+ 多边形),这个过程真的很耗时。

是否可以在 data.table 中使用带有 lapply 的 getMinBBox()?

谢谢你

我的代码:

library(sf)
library(data.table)

nc <- st_read(system.file("shape/nc.shp", package="sf"))[1:4,]
nc

nc=st_cast(nc,"POLYGON")
cor=as.data.frame(st_coordinates(nc))
cor
setDT(cor)
setDT(nc)
data <- data.table(width=numeric(), height=numeric())

for(i in 1:nrow(nc)){
    l=as.matrix(cor[L2==i,1:2])
    l=getMinBBox(l)
    data=rbind(data,t(l[2:3]))}

nc=cbind(nc,data)
nc


## execution time for each part of code inside loop
library(microbenchmark)
mb=microbenchmark(
l<-as.matrix(cor[L2==1,1:2]),
ll<-getMinBBox(l),
data<-rbind(data,t(ll[2:3]))
)

Unit: microseconds
                              expr    min      lq     mean  median      uq    max neval cld
 l <- as.matrix(cor[L2 == 1, 1:2]) 2144.7 2478.40 2606.981 2574.05 2711.55 4396.3   100   c
               ll <- getMinBBox(l)  885.3  976.05 1050.203 1054.10 1103.55 1788.3   100  b 
   data <- rbind(data, t(ll[2:3]))  152.6  182.95  207.617  201.60  224.05  546.1   100 a  




## getMinBBox code from https://rdrr.io/cran/flightplanning/src/R/utils.R


getMinBBox <- function(xy) {
  stopifnot(is.matrix(xy), is.numeric(xy), nrow(xy) >= 2, ncol(xy) == 2)

  ## rotating calipers algorithm using the convex hull
  H    <- grDevices::chull(xy)      ## hull indices, vertices ordered clockwise
  n    <- length(H)      ## number of hull vertices
  hull <- xy[H, ]        ## hull vertices

  ## unit basis vectors for all subspaces spanned by the hull edges
  hDir  <- diff(rbind(hull, hull[1, ])) ## hull vertices are circular
  hLens <- sqrt(rowSums(hDir^2))        ## length of basis vectors
  huDir <- diag(1/hLens) %*% hDir       ## scaled to unit length

  ## unit basis vectors for the orthogonal subspaces
  ## rotation by 90 deg -> y' = x, x' = -y
  ouDir <- cbind(-huDir[ , 2], huDir[ , 1])

  ## project hull vertices on the subspaces spanned by the hull edges, and on
  ## the subspaces spanned by their orthogonal complements - in subspace coords
  projMat <- rbind(huDir, ouDir) %*% t(hull)

  ## range of projections and corresponding width/height of bounding rectangle
  rangeH  <- matrix(numeric(n*2), ncol=2)  ## hull edge
  rangeO  <- matrix(numeric(n*2), ncol=2)  ## orthogonal subspace
  widths  <- numeric(n)
  heights <- numeric(n)

  for(i in seq(along=numeric(n))) {
    rangeH[i, ] <- range(projMat[  i, ])

    ## the orthogonal subspace is in the 2nd half of the matrix
    rangeO[i, ] <- range(projMat[n+i, ])
    widths[i]   <- abs(diff(rangeH[i, ]))
    heights[i]  <- abs(diff(rangeO[i, ]))
  }

  ## extreme projections for min-area rect in subspace coordinates
  ## hull edge leading to minimum-area
  eMin  <- which.min(widths*heights)
  hProj <- rbind(   rangeH[eMin, ], 0)
  oProj <- rbind(0, rangeO[eMin, ])

  ## move projections to rectangle corners
  hPts <- sweep(hProj, 1, oProj[ , 1], "+")
  oPts <- sweep(hProj, 1, oProj[ , 2], "+")

  ## corners in standard coordinates, rows = x,y, columns = corners
  ## in combined (4x2)-matrix: reverse point order to be usable in polygon()
  ## basis formed by hull edge and orthogonal subspace
  basis <- cbind(huDir[eMin, ], ouDir[eMin, ])
  hCorn <- basis %*% hPts
  oCorn <- basis %*% oPts
  pts   <- t(cbind(hCorn, oCorn[ , c(2, 1)]))

  ## angle of longer edge pointing up
  dPts <- diff(pts)
  e    <- dPts[which.max(rowSums(dPts^2)), ] ## one of the longer edges
  eUp  <- e * sign(e[2])       ## rotate upwards 180 deg if necessary
  deg  <- atan2(eUp[2], eUp[1])*180 / pi     ## angle in degrees

  return(list(pts=pts, width=heights[eMin], height=widths[eMin], angle=deg))
}

#' Provided an angle, calculate the corresponding minimum bounding box
#'
#' @param vertices the vertices which to get the bounding box from
#' @param alpha the angle to rotate the bounding box
#'
#' @importFrom grDevices chull
getBBoxAngle = function(vertices, alpha) {
  centroid = apply(vertices, 2, mean)

  angleRadians = (90-alpha) * pi/180

  rotatedX = cos(angleRadians) * (vertices[, 1] - centroid[1]) -
             sin(angleRadians) * (vertices[, 2] - centroid[2]) +
             centroid[1]
  rotatedY = sin(angleRadians) * (vertices[, 1] - centroid[1]) +
             cos(angleRadians) * (vertices[, 2] - centroid[2]) +
             centroid[2]

  bBox = cbind(range(rotatedX), range(rotatedY))
  height = diff(bBox[,2])
  width = diff(bBox[,1])

  bBox = rbind(bBox[1,],
               c(bBox[1, 1], bBox[2, 2]),
               (bBox[2, ]),
               c(bBox[2, 1], bBox[1, 2]))

  bBoxUnrotatedX = cos(-angleRadians) * (bBox[, 1] - centroid[1]) -
                   sin(-angleRadians) * (bBox[, 2] - centroid[2]) +
                   centroid[1]
  bBoxUnrotatedY = sin(-angleRadians) * (bBox[, 1] - centroid[1]) +
                   cos(-angleRadians) * (bBox[, 2] - centroid[2]) +
                   centroid[2]
  unrotatedBbox = cbind(bBoxUnrotatedX, bBoxUnrotatedY)
  list(angle=alpha, height=height, width=width, pts=unrotatedBbox)
}

#' Given a xy matrix of points, adjust the
#' points to avoid acute angles < 80 degrees
#'
#' @param xy xy dataframe
#' @param angle angle of the flight lines
#' @param minAngle the minimum angle to below which will be projected
adjustAcuteAngles = function(xy, angle, minAngle = 80) {
  xy_mat = as.matrix(xy[,1:2])
  rads = angle*pi/180
  nPoints = nrow(xy)

  # Using cosine rule on vectors
  # acos(theta) = (AB_vec x BC_vec) / (|AB| * |BC|)
  vectors = xy[-1,]-xy[-nPoints,]

  # Final angles for each point
  angles = getAngles(xy_mat)

  # Mask angles less than minimum
  mask = angles <= minAngle
  mask[is.na(mask)] = FALSE
  # Index of point with angles less than minimum
  indices = (seq_along(mask)+1)[mask]

  indicesOffset = (indices %% 2 == 1) * 2 - 1

  # Calculate perpendicular lines for points
  x = xy[indices, 1]
  y = xy[indices, 2]
  nIndices = length(indices)
  a = rep(tan(rads), nIndices)
  b = y - a * x
  a_perpendicular = -1/a
  b_perpendicular = y - a_perpendicular*x


  b2 = b_perpendicular
  a2 = a_perpendicular

  # Intersect previous straight line with the perpendicular
  x = xy[indices-indicesOffset, 1]
  y = xy[indices-indicesOffset, 2]
  a1 = a
  b1 = y - a1 * x
  xs = (b2-b1)/(a1-a2)
  ys = a2*xs+b2

  # Replace angled points
  xy[indices-indicesOffset, 1] = xs
  xy[indices-indicesOffset, 2] = ys
  return(xy)
}

#' Create outer curves for the flight lines
#'
#' @param waypoints the waypoints of the flight plan
#' @param angle angle for the flight lines
#' @param flightLineDistance the distance between the flight lines in meters
outerCurvePoints = function(waypoints, angle, flightLineDistance) {
  mask = getAngles(waypoints) == 180
  mask[is.na(mask)] = TRUE

  rads = angle*pi/180
  angularCoef = tan(rads)
  nWaypoints = nrow(waypoints)

  oddPoints = seq(1, nWaypoints, 2)
  evenPoints = seq(2, nWaypoints, 2)

  offsetX = waypoints[evenPoints,1]-waypoints[oddPoints,1]

  intercept = waypoints[oddPoints, 2] - angularCoef*waypoints[oddPoints, 1]
  xMove = rep(flightLineDistance / sqrt(1 + angularCoef**2), nWaypoints)
  isNegative = rep(offsetX < 0, each=2)
  isNegative[seq(1, nWaypoints, 2)] = !isNegative[seq(1, nWaypoints, 2)]
  xMove[isNegative] = -xMove[isNegative]
  yMove = xMove * angularCoef

  xs = waypoints[, 1] + xMove
  ys = waypoints[, 2] + yMove

  curvePoints = as.data.frame(cbind(xs, ys))
  curvePoints$index = seq(1, nWaypoints)
  curvePoints$before = (curvePoints$index %% 2) == 1
  curvePoints = curvePoints[!c(FALSE, mask),]

  return(curvePoints)
}

#' Get angles for each point considering
#' the two neighbors points
#'
#' @param waypoints the waypoints of the flight plan
getAngles = function(waypoints) {
  nPoints = nrow(waypoints)
  vectors = waypoints[-1,]-waypoints[-nPoints,]
  dists = sqrt(apply(vectors ** 2, 1, sum))

  ab = vectors[-nPoints+1,]
  bc = vectors[-1,]
  dist_ab = dists[-nPoints+1]
  dist_bc = dists[-1]
  dotProds = sapply(seq_len(nPoints-2), function(x) ab[x,] %*% bc[x,])
  # Final angles for each point
  angles = suppressWarnings(180-round(acos(dotProds/(dist_ab*dist_bc))*180/pi,2))
  angles
}

#' Class for Flight Parameters
setClass("Flight Parameters",
         slots = c(
           flight.line.distance  = "numeric",
           flight.speed.kmh      = "numeric",
           front.overlap         = "numeric",
           gsd                   = "numeric",
           height                = "numeric",
           ground.height         = "numeric",
           minimum.shutter.speed = "character",
           photo.interval        = "numeric"
         ),
         prototype = list(
           flight.line.distance  = NA_real_,
           flight.speed.kmh      = NA_real_,
           front.overlap         = NA_real_,
           gsd                   = NA_real_,
           ground.height         = NA_real_,
           height                = NA_real_,
           minimum.shutter.speed = NA_character_,
           photo.interval        = NA_real_
         )
)

标签: rdata.tablesf

解决方案


示例代码非常小,如果您查看更大的 data.tables,某些方法可能会更快。在我看来,您可以通过拆分组合方法来节省一些时间,原则上,您可以并行运行,例如通过使用mclapply而不是lapply(在 linux/macos 上)。如果您使用rbindlist而不是rbind和转置,您可能也会快一点。在我的机器上,这种方法几乎将这个小集合的执行时间减半。mclapply(co, gmb2, mc.cores=4L)如果单个 L2 组较大,使用或类似方法可能是有益的,但只会损害这个小数据集的性能。如果不改变getMinBBox函数的代码,这至少会减少一些时间,可能值得一试。

附带说明:使用 openblas 或 Intel 的 MKL 而不是默认的 R 库可以对矩阵计算产生很大影响,因此您绝对应该确保使用它们。

# function code
    getMinBBox <- function(xy) {
        stopifnot(is.matrix(xy), is.numeric(xy), nrow(xy) >= 2, ncol(xy) == 2)

        ## rotating calipers algorithm using the convex hull
        H    <- grDevices::chull(xy)      ## hull indices, vertices ordered clockwise
        n    <- length(H)      ## number of hull vertices
        hull <- xy[H, ]        ## hull vertices

        ## unit basis vectors for all subspaces spanned by the hull edges
        hDir  <- diff(rbind(hull, hull[1, ])) ## hull vertices are circular
        hLens <- sqrt(rowSums(hDir^2))        ## length of basis vectors
        huDir <- diag(1/hLens) %*% hDir       ## scaled to unit length

        ## unit basis vectors for the orthogonal subspaces
        ## rotation by 90 deg -> y' = x, x' = -y
        ouDir <- cbind(-huDir[ , 2], huDir[ , 1])

        ## project hull vertices on the subspaces spanned by the hull edges, and on
        ## the subspaces spanned by their orthogonal complements - in subspace coords
        projMat <- rbind(huDir, ouDir) %*% t(hull)

        ## range of projections and corresponding width/height of bounding rectangle
        rangeH  <- matrix(numeric(n*2), ncol=2)  ## hull edge
        rangeO  <- matrix(numeric(n*2), ncol=2)  ## orthogonal subspace
        widths  <- numeric(n)
        heights <- numeric(n)

        for(i in seq(along=numeric(n))) {
            rangeH[i, ] <- range(projMat[  i, ])

            ## the orthogonal subspace is in the 2nd half of the matrix
            rangeO[i, ] <- range(projMat[n+i, ])
            widths[i]   <- abs(diff(rangeH[i, ]))
            heights[i]  <- abs(diff(rangeO[i, ]))
        }

        ## extreme projections for min-area rect in subspace coordinates
        ## hull edge leading to minimum-area
        eMin  <- which.min(widths*heights)
        hProj <- rbind(   rangeH[eMin, ], 0)
        oProj <- rbind(0, rangeO[eMin, ])

        ## move projections to rectangle corners
        hPts <- sweep(hProj, 1, oProj[ , 1], "+")
        oPts <- sweep(hProj, 1, oProj[ , 2], "+")

        ## corners in standard coordinates, rows = x,y, columns = corners
        ## in combined (4x2)-matrix: reverse point order to be usable in polygon()
        ## basis formed by hull edge and orthogonal subspace
        basis <- cbind(huDir[eMin, ], ouDir[eMin, ])
        hCorn <- basis %*% hPts
        oCorn <- basis %*% oPts
        pts   <- t(cbind(hCorn, oCorn[ , c(2, 1)]))

        ## angle of longer edge pointing up
        dPts <- diff(pts)
        e    <- dPts[which.max(rowSums(dPts^2)), ] ## one of the longer edges
        eUp  <- e * sign(e[2])       ## rotate upwards 180 deg if necessary
        deg  <- atan2(eUp[2], eUp[1])*180 / pi     ## angle in degrees

        return(list(pts=pts, width=heights[eMin], height=widths[eMin], angle=deg))
    }

#> Reading layer `nc' from data source `/home/b/R/x86_64-pc-linux-gnu-library/4.0/sf/shape/nc.shp' using driver `ESRI Shapefile'
#> Simple feature collection with 100 features and 14 fields
#> geometry type:  MULTIPOLYGON
#> dimension:      XY
#> bbox:           xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
#> epsg (SRID):    4267
#> proj4string:    +proj=longlat +datum=NAD27 +no_defs
            height                = NA_real_,
            minimum.shutter.speed = NA_character_,
            photo.interval        = NA_real_
        )
    )


# common code:
lapply(c("sf", "data.table", "microbenchmark"),
    require, character.only = TRUE)
nc <- st_read(system.file("shape/nc.shp", package="sf"))[1:4,]
nc <- st_cast(nc,"POLYGON")
#> Warning in st_cast.sf(nc, "POLYGON"): repeating attributes for all sub-
#> geometries for which they may not be constant
coor <- as.data.frame(st_coordinates(nc))
setDT(coor)
setDT(nc)

# your code (wrapped in a function):
getbox1 <- function(){
    data <- data.table(width=numeric(), height=numeric())
    for(i in 1:nrow(nc)){
        l=as.matrix(coor[L2==i,1:2])
        ll=getMinBBox(l)
        data=rbind(data,t(ll[2:3]))}
    data
}

## split - apply code:
gmb2 <- function(i) getMinBBox(as.matrix(i[,1:2]))[c(2:3)]
getbox2 <- function(){
    co <- split(coor, coor$L2)
    rbindlist(lapply(co, gmb2))
}

microbenchmark(b1 = getbox1(), b2 = getbox2())
#> Unit: milliseconds
#>  expr      min       lq      mean   median       uq      max neval cld
#>    b1 7.804903 8.391429 20.539996 9.870947 38.71889 66.95236   100   b
#>    b2 3.956369 4.186426  9.919638 4.562312 19.13847 47.27808   100  a

最后的编辑:您可以通过稍微简化一下 getMinBBox 函数来提高性能。

# original function code (removed some comments for brevity)
getMinBBox <- function(xy) {
    stopifnot(is.matrix(xy), is.numeric(xy), nrow(xy) >= 2, ncol(xy) == 2)
    H    <- grDevices::chull(xy)      ## hull indices, vertices ordered clockwise
    n    <- length(H)      ## number of hull vertices
    hull <- xy[H, ]        ## hull vertices
    hDir  <- diff(rbind(hull, hull[1, ])) ## hull vertices are circular
    hLens <- sqrt(rowSums(hDir^2))        ## length of basis vectors
    huDir <- diag(1/hLens) %*% hDir       ## scaled to unit length
    ouDir <- cbind(-huDir[ , 2], huDir[ , 1])
    projMat <- rbind(huDir, ouDir) %*% t(hull)
    rangeH  <- matrix(numeric(n*2), ncol=2)  ## hull edge
    rangeO  <- matrix(numeric(n*2), ncol=2)  ## orthogonal subspace
    widths  <- numeric(n)
    heights <- numeric(n)
    for(i in seq(along=numeric(n))) {
        rangeH[i, ] <- range(projMat[  i, ])
        rangeO[i, ] <- range(projMat[n+i, ])
        widths[i]   <- abs(diff(rangeH[i, ]))
        heights[i]  <- abs(diff(rangeO[i, ]))
    }
    eMin  <- which.min(widths*heights)
    hProj <- rbind(   rangeH[eMin, ], 0)
    oProj <- rbind(0, rangeO[eMin, ])
    hPts <- sweep(hProj, 1, oProj[ , 1], "+")
    oPts <- sweep(hProj, 1, oProj[ , 2], "+")
    basis <- cbind(huDir[eMin, ], ouDir[eMin, ])
    hCorn <- basis %*% hPts
    oCorn <- basis %*% oPts
    pts   <- t(cbind(hCorn, oCorn[ , c(2, 1)]))
    dPts <- diff(pts)
    e    <- dPts[which.max(rowSums(dPts^2)), ] ## one of the longer edges
    eUp  <- e * sign(e[2])       ## rotate upwards 180 deg if necessary
    deg  <- atan2(eUp[2], eUp[1])*180 / pi     ## angle in degrees
    return(list(pts=pts, width=heights[eMin], height=widths[eMin], angle=deg))
}


# common code:
lapply(c("sf", "data.table", "microbenchmark", "compiler"),
    require, character.only = TRUE)
compiler::enableJIT(3)
nc <- st_read(system.file("shape/nc.shp", package="sf"))[1:4,]
nc <- st_cast(nc,"POLYGON")
#> Warning in st_cast.sf(nc, "POLYGON"): repeating attributes for all sub-
#> geometries for which they may not be constant
coor <- as.data.frame(st_coordinates(nc))
setDT(coor)
setDT(nc)

getMinBBox2 <- function(xy) {
    # stopifnot(is.matrix(xy), is.numeric(xy), nrow(xy) >= 2, ncol(xy) == 2)
    # skip checking
    H    <- grDevices::chull(xy)      ## hull indices, vertices ordered clockwise
    n    <- length(H)      ## number of hull vertices
    hull <- xy[H, ]        ## hull vertices
    hDir <- matrixStats::colDiffs(rbind(hull, hull[1, ]))
    hLens <- sqrt(rowSums(hDir^2))        ## length of basis vectors
    huDir <- diag(1/hLens) %*% hDir       ## scaled to unit length
    ouDir <- cbind(-huDir[ , 2], huDir[ , 1])
    projMat <- tcrossprod(rbind(huDir, ouDir), hull)
    HO <- list(seq_len(n), (seq_len(n)+n))
    rr <- matrixStats::rowRanges(projMat)
    rd <- as.numeric(matrixStats::rowDiffs(rr))
    rangeH <- rr[HO[[1]],]
    rangeO <- rr[HO[[2]],]
    widths <- rd[HO[[1]]]
    heights <- rd[HO[[2]]]
    eMin  <- which.min(widths*heights)
    ## seems like sweeping is not required here?
    # hProj <- rbind(   rangeH[eMin, ], 0)
    # oProj <- rbind(0, rangeO[eMin, ])
    # hPts <- sweep(hProj, 1, oProj[ , 1], "+")
    # oPts <- sweep(hProj, 1, oProj[ , 2], "+")
    hPts <- rbind(rangeH[eMin, ], rep(rangeO[eMin, 1], 2))
    oPts <- rbind(rangeH[eMin, ], rep(rangeO[eMin, 2], 2))
    basis <- cbind(huDir[eMin, ], ouDir[eMin, ])
    hCorn <- basis %*% hPts
    oCorn <- basis %*% oPts
    pts   <- t(cbind(hCorn, oCorn[ , c(2, 1)]))
    colnames(pts) <- c("X", "Y")
    dPts <- diff(pts)
    e    <- dPts[which.max(rowSums(dPts^2)), ] ## one of the longer edges
    eUp  <- e * sign(e[2])       ## rotate upwards 180 deg if necessary
    deg  <- atan2(eUp[2], eUp[1])*180 / pi     ## angle in degrees
    return(list(pts=pts, width=heights[eMin], height=widths[eMin], angle=deg))
}

# your code (wrapped in a function):
getbox1 <- function(){
    data <- data.table(width=numeric(), height=numeric())
    for(i in 1:nrow(nc)){
        l=as.matrix(coor[L2==i,1:2])
        ll=getMinBBox(l)
        data=rbind(data,t(ll[2:3]))}
    data
}

## split - apply code:
gmb2 <- function(i) getMinBBox(as.matrix(i[,1:2]))[c(2:3)]
gmb3 <- function(i) getMinBBox2(as.matrix(i[,1:2]))[c(2:3)]
getbox2 <- function(){
    co <- split(coor, coor$L2)
    rbindlist(lapply(co, gmb2))
}
getbox3 <- function(){
    co <- split(coor, coor$L2)
    rbindlist(lapply(co, gmb3))
}

all(identical(getbox1(), getbox2()), identical(getbox2(), getbox3()))
#> [1] TRUE
microbenchmark(b1 = getbox1(), b2 = getbox2(), b3=getbox3())
#> Unit: milliseconds
#>  expr      min       lq     mean   median       uq       max neval cld
#>    b1 7.852630 8.067991 8.862175 8.269768 8.632328 14.474924   100   c
#>    b2 4.020513 4.071012 4.444000 4.166884 4.274048  7.728204   100  b 
#>    b3 2.871639 2.938294 3.111886 2.986174 3.046096  5.935133   100 a

推荐阅读