首页 > 解决方案 > 如何重塑数据框,使每个 id 只有 1 个观察值?

问题描述

我需要重塑(我假设它是某种重塑,就像我在 stata 中所做的那样)这个数据框,以便每个 id 只有 1 个观察值。此外,我需要保留所有其他变量。因此,一行应该有 id 列,每年包含 var1、x、var2 的值(var2 不是绝对必要的。我尝试了很多不同的东西,并且我不断为 id 变量获得相同的长数据. 我为冗长的 dput() 道歉,但如果我只做了前 6 行,就不会有信息。

  structure(list(id= c(1806968L, 1806968L, 1806968L, 1806968L, 
        1806968L, 1806968L, 1806968L, 1806968L, 1806968L, 1806968L, 1806968L, 
        1806968L, 1806968L, 1806968L, 1806968L, 2022610L, 2022610L, 2022610L, 
        2022610L, 2022610L), var1 = c(0, 0, 0, 0, 0, 0, 0, 0, 
        0, 0, 0, 4877, 5819, 6560, 8262, 0, 0, 0, 0, 0), x = c(25518, 
        25518, 25518, 25518, 25518, 25518, 25518, 25518, 25518, 25518, 
        25518, 25518, 25518, 25518, 25518, 34611, 34611, 34611, 34611, 
        34611), var2 = c(200812L, 200912L, 201012L, 201112L, 201212L, 
        201312L, 201512L, 201612L, 201712L, 201812L, 201912L, 200612L, 
        200512L, 200712L, 201412L, 199612L, 199712L, 199812L, 199912L, 
        200012L), `1987` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
        0, 0, 0, 0, 0, 0), `1988` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
        0, 0, 0, 0, 0, 0, 0, 0, 0), `1989` = c(0, 0, 0, 0, 0, 0, 0, 0, 
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1990` = c(0, 0, 0, 0, 0, 
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1991` = c(0, 0, 
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1992` = c(0, 
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1993` = c(0, 
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1994` = c(0, 
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1995` = c(0, 
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1996` = c(0, 
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1997` = c(0, 
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1998` = c(0, 
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1999` = c(0, 
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2000` = c(0, 
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2001` = c(0, 
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2002` = c(0, 
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2003` = c(0, 
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2004` = c(0, 
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2005` = c(0, 
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5819, 0, 0, 0, 0, 0, 0, 0), 
            `2006` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4877, 0, 0, 0, 
            0, 0, 0, 0, 0), `2007` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
            0, 0, 0, 6560, 0, 0, 0, 0, 0, 0), `2008` = c(0, 0, 0, 0, 
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2009` = c(0, 
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
            `2010` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
            0, 0, 0, 0), `2011` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
            0, 0, 0, 0, 0, 0, 0, 0, 0), `2012` = c(0, 0, 0, 0, 0, 0, 
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2013` = c(0, 
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
            `2014` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8262, 
            0, 0, 0, 0, 0), `2015` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2016` = c(0, 0, 0, 0, 0, 
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2017` = c(0, 
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
            `2018` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
            0, 0, 0, 0), `2019` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
            0, 0, 0, 0, 0, 0, 0, 0, 0), `2020` = c(0, 0, 0, 0, 0, 0, 
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), row.names = c(NA, 
        -20L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x55b410de6890>, sorted = c("id", 
        "var1", "x", "var2"))

我希望它看起来像这样,每年的值是与那一年对应的 var1 的值。这是我多年来想要的东西。多年来我仍然需要一个专栏。

id <- c(1806968L, 2022610L)
"1987" <- c(0, 8262)
x <- c(25518, 34611)
data <- data.frame(id, `1987`, x)

标签: rdataframereshape

解决方案


目前尚不清楚当 x 为 0 时您是否还想要为 0 的年份。如果这样做,请& var1 > 0在下面删除。

library(tidyr)
library(dplyr)
tmp %>%
      tidyr::pivot_longer(c(starts_with("1"), starts_with("2")), names_to = "year") %>%
      filter(value == var1 & var1 > 0) %>% 
      select(-value)

推荐阅读