首页 > 解决方案 > 从值创建列并将其他列作为这些值

问题描述

我有一个名为 的列的栖息地数据habitat_size,范围从小、中、大和缺失,表示为S,M,L,X,我希望这些值作为列Small, Medium, Large, Missing,但是,对于它的值,它必须从其他栖息地获取值,并且这些栖息地的列名将被安排到一个单独的列中,每个名称都与其特定值匹配。

我试过了:

 dcast(habitat_case, month + season + year + LOC_ID + GBW_NUM + OBS_DT ~ GARDEN_SIZE)

Using LARGE_PONDS as value column: use value.var to override.
  month season year    LOC_ID GBW_NUM     OBS_DT    L    M    S    X
1     8 Summer 2003 LOC569491   76550 2003-08-06 <NA>    X <NA> <NA>
2    11 Autumn 2003 LOC575706   76863 2003-11-06 <NA> <NA>    X <NA>
3    10 Autumn 2008 LOC575706   76863 2008-10-09 <NA> <NA>    X <NA>
4    11 Autumn 2003 LOC575712   76869 2003-11-06 <NA>    X <NA> <NA>
5     8 Summer 2008 LOC575712   76869 2008-08-08 <NA> <NA>    X <NA>
6    11 Autumn 2003 LOC591752   76998 2003-11-06 <NA>    X <NA> <NA>

但是,它只使用其他列之一作为值,我希望是这样的:

month season   year LOC_ID      GBW_NUM OBS_DT   Habitat_type    Large    Medium    Small    Missing
8       Summer 2003 LOC569491   76550 2003-08-06  lawn             A         A         A        X
11      Autumn 2003 LOC575706   76863 2003-11-06  garden_age       A         A         X        A
10      Autumn 2008 LOC575706   76863 2008-10-09  small_pond       1         0         0        1

这是一个可重现的代码:

structure(list(month = c(8, 11, 10, 11, 8, 11, 3, 4, 3, 1, 10, 
12, 6, 6, 12, 10, 10, 11, 1, 1, 1, 1, 1, 1, 1), season = c("Summer", 
"Autumn", "Autumn", "Autumn", "Summer", "Autumn", "Spring", "Spring", 
"Spring", "Winter", "Autumn", "Winter", "Summer", "Summer", "Winter", 
"Autumn", "Autumn", "Autumn", "Winter", "Winter", "Winter", "Winter", 
"Winter", "Winter", "Winter"), year = c(2003, 2003, 2008, 2003, 
2008, 2003, 2005, 2005, 2020, 2008, 2010, 2013, 2012, 2018, 2012, 
2016, 2017, 2003, 1995, 1995, 1995, 2003, 1995, 1995, 1995), 
    LOC_ID = c("LOC569491", "LOC575706", "LOC575706", "LOC575712", 
    "LOC575712", "LOC591752", "LOC610239", "LOC614296", "LOC674315", 
    "LOC675786", "LOC794326", "LOC794326", "LOC973627", "LOC973627", 
    "LOC1354719486425", "LOC1314108", "LOC2174194", "LOC1300619774", 
    "LOC1300613011", "LOC1300612920", "LOC1300615617", "LOC1300609972", 
    "LOC1300611058", "LOC1300612277", "LOC1300605354"), GBW_NUM = c("76550", 
    "76863", "76863", "76869", "76869", "76998", "79154", "79846", 
    "85416", "85665", "89078", "89078", "90835", "90835", "97926", 
    "93406", "26517", "72370", "65158", "65056", "68017", "61805", 
    "63019", "64369", "54273"), OBS_DT = structure(c(12270, 12362, 
    14161, 12362, 14099, 12362, 12856, 12903, 18352, 13890, 14889, 
    16040, 15502, 17693, 15679, 17100, 17443, 12362, 9131, 9131, 
    9131, 12078, 9131, 9131, 9131), class = "Date"), GARDEN_SIZE = c("M", 
    "S", "S", "M", "S", "M", "L", "S", "M", "S", "M", "M", "L", 
    "L", "S", "M", "M", "M", "M", "S", "M", "L", "M", "L", "L"
    ), GARDEN_AGE = c("A", "B", "C", "D", "D", "D", "B", "C", 
    "D", "C", "D", "C", "E", "E", "B", "B", "D", "A", "E", "B", 
    "D", "D", "D", "E", "E"), SMALL_WATER_BODY = c("X", "X", 
    "X", "X", "X", "X", "X", "Q", "X", "X", "X", "X", "Q", "Q", 
    "X", "X", "X", "X", "X", "X", "X", "X", "X", "X", "X"), LARGE_WATER_BODY = c("X", 
    "X", "X", "X", "X", "X", "X", "X", "X", "X", "X", "X", "X", 
    "X", "X", "X", "X", "X", "X", "X", "X", "X", "X", "X", "X"
    ), STREAM = c("X", "X", "X", "X", "X", "X", "X", "S", "S", 
    "X", "X", "X", "S", "S", "X", "X", "X", "S", "X", "X", "X", 
    "X", "X", "X", "X"), RIVER = c("X", "X", "X", "X", "X", "X", 
    "X", "X", "X", "X", "X", "X", "X", "X", "X", "X", "X", "X", 
    "X", "X", "X", "X", "X", "X", "X"), CANAL = c("X", "X", "X", 
    "X", "X", "X", "X", "X", "X", "X", "X", "X", "X", "X", "X", 
    "X", "X", "X", "X", "X", "X", "X", "X", "X", "X"), SEASHORE = c("X", 
    "X", "X", "X", "X", "X", "X", "X", "X", "X", "X", "X", "X", 
    "X", "X", "X", "X", "X", "X", "X", "X", "X", "X", "X", "X"
    ), SMALL_PONDS = c("X", "X", "X", "X", "X", "X", "X", "X", 
    "0", "X", "X", "X", "X", "2", "X", "X", "X", "X", "X", "X", 
    "X", "X", "X", "X", "X"), MEDIUM_PONDS = c("X", "X", "X", 
    "X", "X", "X", "X", "X", "0", "X", "X", "X", "X", "X", "X", 
    "X", "X", "X", "X", "X", "X", "X", "X", "X", "X"), LARGE_PONDS = c("X", 
    "X", "X", "X", "X", "X", "X", "X", "0", "X", "X", "X", "X", 
    "X", "X", "X", "X", "X", "X", "X", "X", "X", "X", "X", "X"
    )), row.names = c(NA, 25L), class = "data.frame")

标签: r

解决方案


您可以尝试以长格式获取数据,recode然后以宽格式将其取回。使用dplyr并且tidyr您可以这样做:

library(dplyr)
library(tidyr)

df %>%
  pivot_longer(cols = GARDEN_AGE:LARGE_PONDS) %>%
  mutate(GARDEN_SIZE = recode(GARDEN_SIZE, `L` = 'Large', `M` = 'Medium', 
                              `S` = 'Small', `X` = 'Missing')) %>%
  pivot_wider(names_from = GARDEN_SIZE, values_from = value)

推荐阅读