首页 > 解决方案 > 从列表列表中提取结果并维护列指标

问题描述

我一直在尝试从摘要列表中提取数据并保留列指示符。

这是数据的示例

data = data.frame( Var1 = c("Esp1"), Var2 = c("Tra1", "Tra2", "Tra3"))
data$New[[1]] <- c(list(data.frame( P.value =runif(1, 0.03, 0.08) , DF =runif(1, 1, 4) , ChisQ =runif(1, 0.03, 0.08) )),list(data.frame(factor = c("z"), value= sample(100,1))) , list(data.frame(factor = c(1:4), value= sample(100,4))) , list(data.frame( Group = c("A"), row.names =c("Control", "X1", "X2", "X3", "X4"), Value =sample(100, size=5, replace = TRUE))))
data$New[[2]] <- c(list(data.frame( P.value =runif(1, 0.03, 0.08) , DF =runif(1, 1, 4) , ChisQ =runif(1, 0.03, 0.08) )),list(data.frame(factor = c("z"), value= sample(100,1))) , list(data.frame(factor = c(1:4), value= sample(100,4))) , list(data.frame( Group = c("A"), row.names =c("Control", "X1", "X2", "X3", "X4"), Value =sample(100, size=5, replace = TRUE))))
data$New[[3]] <- c(list(data.frame( P.value =runif(1, 0.03, 0.08) , DF =runif(1, 1, 4) , ChisQ =runif(1, 0.03, 0.08) )),list(data.frame(factor = c("z"), value= sample(100,1))) , list(data.frame(factor = c(1:4), value= sample(100,4))) , list(data.frame( Group = c("A"), row.names =c("Control", "X1", "X2", "X3", "X4"), Value =sample(100, size=5, replace = TRUE))))
names(data$New[[1]]) <- c("Statistics","xx1","xx2",  "groups")
names(data$New[[2]]) <- c("Statistics","xx1","xx2",  "groups")
names(data$New[[3]]) <- c("Statistics","xx1","xx2",  "groups")

我想从每个结果列表中仅提取组(5 行)和统计信息(1 行)选项卡,并将它们与列 Var 1 和 Var2 的相应值一起放在表中。在选项卡组选项卡中,row.names 指示用于分析的处理。

我尝试使用 broom::tidy (它适用于其他结果列表),但此列表分发失败

到目前为止,我已经能够从提取的组及其相关的行名创建表,但无法正确设置正确的 Var1 和 Var2 值。

data.1 <- lapply(data[[3]], function(x) x$groups)
data.2 <- lapply(data.1, function(x) { x$Treatment <-rownames(x);return(x)})
data.group<- do.call(rbind.data.frame, data.2)
rownames(data.group) <- 1:nrow(data.group)

这就是我已经走了多远

> data.group
   Group Value Treatment
1      A    24   Control
2      A    96        X1
3      A    76        X2
4      A    26        X3
5      A    10        X4
6      A    58   Control
7      A    33        X1
8      A    30        X2
9      A    54        X3
10     A    48        X4
11     A    66   Control
12     A    80        X1
13     A    97        X2
14     A    86        X3
15     A    86        X4

此行不起作用,无法读取列 Var1 和 Var2

data.2.1 <- lapply(data.2, function(x) { x$Var1 <-unlist(data$Var1[[(x)]])  ;return(x)})
data.2.2 <- lapply(data.2, function(x) { x$Var2 <-data[[2]][[x]] ;return(x)})

这就是我希望表格输出的方式

    > data.group
       Var1 Var2 Treatment Value Group            P.value                 Df
    1  Esp1 Tra1   Control    70     A 0.0730726366001181 0.0566315333195962
    2  Esp1 Tra1        X1    27     A                                      
    3  Esp1 Tra1        X2     3     A                                      
    4  Esp1 Tra1        X3    16     A                                      
    5  Esp1 Tra1        X4    58     A                                      
    6  Esp1 Tra2   Control     2     A 0.0669188804645091  0.043313137262594
    7  Esp1 Tra2        X1    58     A                                      
    8  Esp1 Tra2        X2    87     A                                      
    9  Esp1 Tra2        X3    12     A                                      
    10 Esp1 Tra2        X4    23     A                                      
    11 Esp1 Tra3   Control    58     A 0.0698359214654192 0.0380288420431316
    12 Esp1 Tra3        X1    80     A                                      
    13 Esp1 Tra3        X2    44     A                                      
    14 Esp1 Tra3        X3   100     A                                      
    15 Esp1 Tra3        X4    78     A                                      
                    ChisQ
    1  0.0551552523346618
    2                    
    3                    
    4                    
    5                    
    6  0.0415172106772661
    7                    
    8                    
    9                    
    10                   
    11 0.0434505424182862
    12                   
    13                   
    14                   
    15                   
    >  

感谢您的回复!

标签: rdataframelapply

解决方案


作为包含不相关信息的数据框,我建议您将它们保存在一个列表中,并仅选择感兴趣的那些Statisticsgroups

data$New <- lapply(data$New,function(x)
                   list(Statistics = x$Statistics, groups = x$groups))
data$New

#[[1]]
#[[1]]$Statistics
#  P.value   DF  ChisQ
#1  0.0747 2.22 0.0345

#[[1]]$groups
#        Group Value
#Control     A    98
#X1          A    76
#X2          A    71
#X3          A    62
#X4          A    25


#[[2]]
#[[2]]$Statistics
#  P.value   DF  ChisQ
#1   0.074 3.71 0.0781

#[[2]]$groups
#        Group Value
#Control     A    31
#X1          A    92
#...
#....

对于更新后的预期输出,我们可以做

list_df <- lapply(data$New,function(x) data.frame(Control = rownames(x$groups), 
  Value = x$groups$Value, Group = x$groups$Group, P.value = x$Statistics$P.value, 
  DF = x$Statistics$DF, ChisQ = x$Statistics$ChisQ))

new_df <- data[rep(seq_len(nrow(data)), sapply(list_df, nrow)), ]
new_df$New <- NULL
cbind(new_df, do.call(rbind, list_df))

或类似但使用tidyverse

data$New <- purrr::map(data$New,function(x) data.frame(Control = rownames(x$groups),
   Value = x$groups$Value, Group = x$groups$Group, P.value = x$Statistics$P.value, 
    DF = x$Statistics$DF, ChisQ = x$Statistics$ChisQ))

data %>% tidyr::unnest(New)

推荐阅读