首页 > 解决方案 > 从 R 中的另一个数据框创建多个数据框

问题描述

我有一个包含聚合数据的数据框,我想从主数据框的每一行创建多个数据框。

df <- as.data.frame(matrix(0, ncol = 7, nrow = 10))
setnames(df, c("V1", "V2", "V3", "V4", "V5", "V6", "V7"), c("wait", "check", "stay", "con1", "con2", "con3", "con4"))
df$wait <- c(8, 9, 8, 7, 9, 8, 10, 6, 10, 5) 
df$check <- c(7, 7, 7, 7, 8, 7, 9, 6, 10, 5) 
df$stay <- c(6, 10, 6, 6, 8, 7, 6, 6, 8, 4) 
df$con1 <- c(3, 4, 0, 2, 4, 2, 0, 3, 0, 1) 
df$con2 <- c(2, 3, 0, 2, 3, 2, 3, 0, 3, 1) 
df$con3 <- c(1, 3, 0, 2, 1, 3, 0, 1, 2, 0) 
df$con4 <- c(0, 0, 6, 0, 0, 0, 3, 2, 3, 2)
rownames(df) <- c("dep1", "dep2", "dep3", "dep4", "dep5", "dep6", "dep7", "dep8", "dep9", "dep10")
df

          wait check stay con1 con2 con3 con4
    dep1     8     7    6    3    2    1    0
    dep2     9     7   10    4    3    3    0
    dep3     8     7    6    0    0    0    6
    dep4     7     7    6    2    2    2    0
    dep5     9     8    8    4    3    1    0
    dep6     8     7    7    2    2    3    0
    dep7    10     9    6    0    3    0    3
    dep8     6     6    6    3    0    1    2
    dep9    10    10    8    0    3    2    3
    dep10    5     5    4    1    1    0    2

这是从第一行手动创建的第一个新数据框df。我可以一个一个地手动执行此操作,但我想编写一个循环或函数来加速该过程 - 生成多个新数据帧,例如dep1, dep2, dep3... dep10。任何帮助将不胜感激。

dep1 <- as.data.frame(matrix(0, ncol = 4, nrow = 8))
setnames(dep1, c("V1", "V2", "V3", "V4"), c("wait", "check", "stay", "symp"))
setDT(dep1, keep.rownames = "id")[]
dep1$wait <- 1
dep1[c(1:7), "check"] = 1
dep1[c(1:6), "stay"] = 1
dep1[, symp:=as.character(symp)]
dep1[c(1:3), "symp"] = "cond1"
dep1[c(4:5), "symp"] = "cond2"
dep1[c(6), "symp"] = "cond3"
dep1[c(7:8), "symp"] = "clear"
dep1

   id wait check stay  symp
1:  1    1     1    1 cond1
2:  2    1     1    1 cond1
3:  3    1     1    1 cond1
4:  4    1     1    1 cond2
5:  5    1     1    1 cond2
6:  6    1     1    1 cond3
7:  7    1     1    0 clear
8:  8    1     0    0 clear

标签: rfunctiondataframedata.tabletidyr

解决方案


这是在基础 R 中执行的一种方法:

apply_fun <- function(x) {
   temp <- as.data.frame(matrix(0,ncol = 4, nrow = x[1], 
             dimnames = list(NULL, c("wait", "check", "stay", "symp"))))
   temp$wait <- 1
   temp$check[seq_len(x[2])] <- 1
   temp$stay[seq_len(x[3])] <- 1
   temp$symp <- rep(c(paste0('cond', 1:4), 'clear'), 
                    c(x[4:7], x[1] - sum(x[4:7])))
   temp
}

然后将其应用于每一行

list_out <- apply(df, 1, apply_fun)
list_out

#$dep1
#  wait check stay  symp
#1    1     1    1 cond1
#2    1     1    1 cond1
#3    1     1    1 cond1
#4    1     1    1 cond2
#5    1     1    1 cond2
#6    1     1    1 cond3
#7    1     1    0 clear
#8    1     0    0 clear

#$dep2
#  wait check stay  symp
#1    1     1    1 cond1
#2    1     1    1 cond2
#3    1     1    1 cond2
#4    1     1    1 cond2
#5    1     1    1 cond3
#6    1     1    1 cond3
#7    1     1    1 cond3
#8    1     0    0 clear
#9    1     0    0 clear
#...
#...

如果您需要它们作为单独的数据框等dep1dep2我们可以使用list2env.

list2env(list_out, .GlobalEnv)

推荐阅读