首页 > 解决方案 > 在多列上聚合 sf 对象

问题描述

我正在处理一些地理参考的工业数据。坐标系指某工业项目所在城市的质心。

structure(list(date = c("01dec2013", "01jul2003", "01nov2008", 
"01dec2017", "01dec2017", "01dec2003"), company = c("Shwe Taung", 
"PetroChina Exploration and Development", "Repsol SA", "Repsol SA", 
"Ipsen Pharmaceutical", "Ceva Laval"), parent_company = c("Shwe Taung", 
"China National Petroleum (CNPC)", "Repsol SA", "Repsol SA", 
"Ipsen Pharmaceutical", "Ceva Sante Animale"), Website = c("www.shwetaunggroup.com", 
"www.cnpc.com.cn", "www.repsol.com", "www.repsol.com", "www.ipsen.com", 
"www.ceva.com"), revenues_usd_ml = c(NA, 394554.53, 53215.45, 
53215.45, 1760.671, 967.152), Headcount = c(NA, 1396144L, 24634L, 
24634L, NA, 3500L), r_d_exp = c(NA, NA, 77.67, 77.67, NA, NA), 
    est_year = c(NA, 1988L, 1927L, 1927L, 1929L, 1989L), o_country = c("Myanmar", 
    "China", "Spain", "Spain", "France", "France"), o_state = c("Rangoon (Yangon)", 
    "Beijing Municipality", "Comunidad de Madrid", "Comunidad de Madrid", 
    "Ile-de-France", "Sud-Ouest (FR)"), o_admin = c("Not Specified", 
    "Not Specified", "Madrid", "Madrid", "Ile-de-France", "Not Specified"
    ), o_city = c("Rangoon (Yangon)", "Beijing", "Madrid", "Madrid", 
    "Paris", "Not Specified"), country = c("Algeria", "Algeria", 
    "Algeria", "Algeria", "Algeria", "Algeria"), state = c("Adrar", 
    "Adrar", "Adrar", "Adrar", "Adrar", "Adrar"), region = c("Not Specified", 
    "Not Specified", "Not Specified", "Not Specified", "Not Specified", 
    "Not Specified"), city = c("Adrar", "Adrar", "Reggane", "Reggane", 
    "Sidi Abdallah", "Sidi Abdallah"), free_zone = c("", "", 
    "", "", "", ""), relocation = c("", "", "", "", "", ""), 
    sector = c("Building materials", "Coal, oil & gas", "Coal, oil & gas", 
    "Coal, oil & gas", "Pharmaceuticals", "Healthcare"), sub_sector = c("Cement & concrete products", 
    "Oil & gas extraction", "Oil & gas extraction", "Oil & gas extraction", 
    "Pharmaceutical preparations", "Other (Healthcare)"), cluster = c("Construction", 
    "Energy", "Energy", "Energy", "Life sciences", "Life sciences"
    ), activity = c("Manufacturing", "Extraction", "Extraction", 
    "Extraction", "Manufacturing", "Manufacturing"), fdi_jobs = c(351L, 
    145L, 235L, 227L, 150L, 45L), est_fdi_jobs = c("Yes", "Yes", 
    "Yes", "Yes", "No", "No"), capital = c(139.9, 350, 565, 299.7, 
    29.55, 2.5), est_capital = c("Yes", "No", "No", "Yes", "No", 
    "No"), fdi_type = c("New", "New", "New", "Expansion", "New", 
    "New"), fdi_status = c("Announced", "Announced", "Announced", 
    "Opened", "Announced", "Opened"), source = c("geocode", "geocode", 
    "opencagegeo", "opencagegeo", "opencagegeo", "opencagegeo"
    ), year = c(2013L, 2003L, 2008L, 2017L, 2017L, 2003L), code_d = c(12L, 
    12L, 12L, 12L, 12L, 12L), income_d = c("MIDLW", "MIDLW", 
    "MIDLW", "MIDLW", "MIDLW", "MIDLW"), continent_d = c("Africa", 
    "Africa", "Africa", "Africa", "Africa", "Africa"), lang_d = c("Arabic", 
    "Arabic", "Arabic", "Arabic", "Arabic", "Arabic"), landlocked = c(0L, 
    0L, 0L, 0L, 0L, 0L), iso_d = c("DZA", "DZA", "DZA", "DZA", 
    "DZA", "DZA"), isic = c(26L, 11L, 11L, 11L, 24L, 85L), isic4 = c(2695L, 
    1110L, 1110L, 1110L, 2411L, 8519L), sector_eora = c("Petroleum, Chemical and Non-Metallic Mineral Products", 
    "Mining and Quarrying", "Mining and Quarrying", "Mining and Quarrying", 
    "Petroleum, Chemical and Non-Metallic Mineral Products", 
    "Mining and Quarrying"), n_proj = c(1, 1, 1, 1, 1, 1), geometry = structure(list(
        structure(c(-0.287488, 27.87338), class = c("XY", "POINT", 
        "sfg")), structure(c(-0.287488, 27.87338), class = c("XY", 
        "POINT", "sfg")), structure(c(0.1751507, 26.7207267), class = c("XY", 
        "POINT", "sfg")), structure(c(0.1751507, 26.7207267), class = c("XY", 
        "POINT", "sfg")), structure(c(5.0152099, 35.726173), class = c("XY", 
        "POINT", "sfg")), structure(c(5.0152099, 35.726173), class = c("XY", 
        "POINT", "sfg"))), class = c("sfc_POINT", "sfc"), precision = 0, bbox = structure(c(xmin = -0.287488, 
    ymin = 26.7207267, xmax = 5.0152099, ymax = 35.726173), class = "bbox"), crs = structure(list(
        input = "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0", 
        wkt = "BOUNDCRS[\n    SOURCECRS[\n        GEOGCRS[\"unknown\",\n            DATUM[\"World Geodetic System 1984\",\n                ELLIPSOID[\"WGS 84\",6378137,298.257223563,\n                    LENGTHUNIT[\"metre\",1]],\n                ID[\"EPSG\",6326]],\n            PRIMEM[\"Greenwich\",0,\n                ANGLEUNIT[\"degree\",0.0174532925199433],\n                ID[\"EPSG\",8901]],\n            CS[ellipsoidal,2],\n                AXIS[\"longitude\",east,\n                    ORDER[1],\n                    ANGLEUNIT[\"degree\",0.0174532925199433,\n                        ID[\"EPSG\",9122]]],\n                AXIS[\"latitude\",north,\n                    ORDER[2],\n                    ANGLEUNIT[\"degree\",0.0174532925199433,\n                        ID[\"EPSG\",9122]]]]],\n    TARGETCRS[\n        GEOGCRS[\"WGS 84\",\n            DATUM[\"World Geodetic System 1984\",\n                ELLIPSOID[\"WGS 84\",6378137,298.257223563,\n                    LENGTHUNIT[\"metre\",1]]],\n            PRIMEM[\"Greenwich\",0,\n                ANGLEUNIT[\"degree\",0.0174532925199433]],\n            CS[ellipsoidal,2],\n                AXIS[\"latitude\",north,\n                    ORDER[1],\n                    ANGLEUNIT[\"degree\",0.0174532925199433]],\n                AXIS[\"longitude\",east,\n                    ORDER[2],\n                    ANGLEUNIT[\"degree\",0.0174532925199433]],\n            ID[\"EPSG\",4326]]],\n    ABRIDGEDTRANSFORMATION[\"Transformation from unknown to WGS84\",\n        METHOD[\"Geocentric translations (geog2D domain)\",\n            ID[\"EPSG\",9603]],\n        PARAMETER[\"X-axis translation\",0,\n            ID[\"EPSG\",8605]],\n        PARAMETER[\"Y-axis translation\",0,\n            ID[\"EPSG\",8606]],\n        PARAMETER[\"Z-axis translation\",0,\n            ID[\"EPSG\",8607]]]]"), class = "crs"), n_empty = 0L)), sf_column = "geometry", agr = structure(c(date = NA_integer_, 
company = NA_integer_, parent_company = NA_integer_, Website = NA_integer_, 
revenues_usd_ml = NA_integer_, Headcount = NA_integer_, r_d_exp = NA_integer_, 
est_year = NA_integer_, o_country = NA_integer_, o_state = NA_integer_, 
o_admin = NA_integer_, o_city = NA_integer_, country = NA_integer_, 
state = NA_integer_, region = NA_integer_, city = NA_integer_, 
free_zone = NA_integer_, relocation = NA_integer_, sector = NA_integer_, 
sub_sector = NA_integer_, cluster = NA_integer_, activity = NA_integer_, 
fdi_jobs = NA_integer_, est_fdi_jobs = NA_integer_, capital = NA_integer_, 
est_capital = NA_integer_, fdi_type = NA_integer_, fdi_status = NA_integer_, 
source = NA_integer_, year = NA_integer_, code_d = NA_integer_, 
income_d = NA_integer_, continent_d = NA_integer_, lang_d = NA_integer_, 
landlocked = NA_integer_, iso_d = NA_integer_, isic = NA_integer_, 
isic4 = NA_integer_, sector_eora = NA_integer_, n_proj = NA_integer_
), .Label = c("constant", "aggregate", "identity"), class = "factor"), row.names = c(NA, 
6L), class = c("sf", "data.frame"))

我想计算每个点城市(变量:) n_proj每个部门(变量:)的项目数量(变量:) 。sectorgeometry

我的想法是aggregate超过两列:

FDI_sf_ag1 <- aggregate(FDI_sf["n_proj"] ~ FDI_sf["geometry"] + FDI_sf["sector"], FDI_sf, sum)

但它给我返回了这个错误:

invalid type (list) for variable 'FDI_sf["n_proj"]'

然后,我重试取消列出列:

FDI_sf_ag1 <- aggregate(unlist(FDI_sf["n_proj"]) ~ cbind(unlist(FDI_sf["geometry"]),unlist(FDI_sf["sector"])), FDI_sf, sum)

但它给我带来了另一个错误:

Error in aggregate.data.frame(lhs, mf[-1L], FUN = FUN, ...) : arguments must have same length In addition: Warning message: In cbind(unlist(FDI_sf["geometry"]), unlist(FDI_sf["sector"])) : number of rows of result is not a multiple of vector length (arg 1)

问题:如何聚合n_project几何(sfc 对象)和扇区?

标签: raggregatemultiple-columnssf

解决方案


aggregate(fdi_sf[, "n_proj"], 
with(fdi_sf, list(sector=sector, city=city)),
 sum)
Simple feature collection with 5 features and 3 fields
Attribute-geometry relationship: 0 constant, 1 aggregate, 2 identity
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: -0.287488 ymin: 26.72073 xmax: 5.01521 ymax: 35.72617
CRS:           +proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0
              sector          city n_proj
1 Building materials         Adrar      1
2    Coal, oil & gas         Adrar      1
3    Coal, oil & gas       Reggane      2
4         Healthcare Sidi Abdallah      1
5    Pharmaceuticals Sidi Abdallah      1
                    geometry
1 POINT (-0.287488 27.87338)
2 POINT (-0.287488 27.87338)
3 POINT (0.1751507 26.72073)
4   POINT (5.01521 35.72617)
5   POINT (5.01521 35.72617)

推荐阅读