首页 > 解决方案 > 在列列表中添加几个滞后/移位

问题描述

我想滞后几列(例如 value_1 + value_2 + x - 见下文),定义它们的滞后数(例如 3)及其命名。这是一些工作乏味/手动代码:

library(dplyr)
library(lubridate)
library(data.table)

haves <- data.frame(
      id = c("a", "a", "a", "a", "a", "a", "b", "b", "b", "b", "b", "b")
    , date = as.Date(c("2015-01-01", "2015-02-01", "2015-03-01", "2015-04-01", "2015-05-01", "2015-06-01", "2015-01-01", "2015-02-01", "2015-03-01", "2015-04-01", "2015-05-01", "2015-06-01"))
    , value_1 = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)
    , value_2 = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)
    , x = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)
) 
haves$value_2 <- haves$value_2 + 1
haves$x <- haves$x + 2

haves

wants <- haves %>%
    group_by(id) %>% 
    mutate(
        value_1_lag_1 = lag(value_1, n = 1, order_by = date)
        , value_1_lag_2 = lag(value_1, n = 2, order_by = date)
        , value_1_lag_3 = lag(value_1, n = 3, order_by = date)

        , value_2_lag_1 = lag(value_2, n = 1, order_by = date)
        , value_2_lag_2 = lag(value_2, n = 2, order_by = date)
        , value_2_lag_3 = lag(value_2, n = 3, order_by = date)

        , x_lag_1 = lag(x, n = 1, order_by = date)
        , x_lag_2 = lag(x, n = 2, order_by = date)
        , x_lag_3 = lag(x, n = 3, order_by = date)
    )

wants

有人建议提供了一个解决方案,所以我尝试了数据表方法但没有成功:

setDT(haves)
haves[, sapply(1:3, function(x){paste0('', x, '_lag_', 1:3)}) := shift(.SD, 1:3), 
   by = id, .SDcols = value_1:x][]

它不会产生我的需求。这更接近:

colnames <- colnames(haves)

setDT(haves)
haves[, sapply(1:3, function(x){paste0(colnames[x + 2], x, '_lag_', 1:3)}) := shift(.SD, 1:3), by = id, .SDcols = value_1:x][]

或者,我可以只使用一个循环和一个这样的函数:

appender <- function(df, column, lag){

    df %>%
        group_by(
            id
        ) %>%
        mutate(
            !!paste0(column, "_lag_", lag) := lag(!!rlang::sym(column), n = lag, order_by = date) 
        )
}

temp <- appender(haves, "value_2", 3)

任何帮助将不胜感激。谢谢!

标签: rdplyr

解决方案


这是来自链接答案的改编 dplyr 解决方案。

haves %>%
  group_by(id) %>%
  nest %>%
  mutate(data = map(data, ~arrange(., date))) %>%
  mutate(lags = map(data, function(dat) {
    imap_dfc(dat[-1], ~set_names(map(1:3, lag, x = .x),
                                 paste0(.y, "_lag_", 1:3)))
  })) %>%
  unnest(c(data, lags))

这是你要找的吗?


推荐阅读