首页 > 解决方案 > 跨嵌套列表的元素(或不同级别)获取汇总估计

问题描述

我在获取嵌套列表元素的摘要估计时遇到了一些困难。请参阅下面的一个非常简化的示例。本质上,我想访问列表中的元素并轻松操作它们(例如,获取列表中的方法等...)。有时元素处于相同的位置,有时它们处于不同的位置。我很确定在 tidyverse(或 base R)中有更简单直接的方法,但我似乎无法找到它。如果您能帮我解决这个问题,我将不胜感激。谢谢你


library(reprex)
library(tidyverse)

#1) Creating a certain function that does some simulations
to_estimate <- function(){
  mean_age_female <- rnorm(1, mean=27, sd=1)
  mean_age_male <- rnorm(1, mean=30, sd=1)
  
  return(list(sim=cbind(mean_age_female,mean_age_male)))
}

to_estimate()
#> $sim
#>      mean_age_female mean_age_male
#> [1,]        26.35325      31.04636


#2) replicating these simulations and obtain estimates (mean, sd, percentile)
rep=4
res <- vector(mode = "list", length = rep)
set.seed(123)
for (i in 1:rep){
  res[[i]] <- to_estimate()
}
res
#> [[1]]
#> [[1]]$sim
#>      mean_age_female mean_age_male
#> [1,]        26.43952      29.76982
#> 
#> 
#> [[2]]
#> [[2]]$sim
#>      mean_age_female mean_age_male
#> [1,]        28.55871      30.07051
#> 
#> 
#> [[3]]
#> [[3]]$sim
#>      mean_age_female mean_age_male
#> [1,]        27.12929      31.71506
#> 
#> 
#> [[4]]
#> [[4]]$sim
#>      mean_age_female mean_age_male
#> [1,]        27.46092      28.73494


#Summarizing the results

means_female <- mean(res[[1]]$sim[,"mean_age_female"],
     res[[2]]$sim[,"mean_age_female"],
     res[[3]]$sim[,"mean_age_female"],
     res[[4]]$sim[,"mean_age_female"])

sd_female <-sd(c(res[[1]]$sim[,"mean_age_female"],
     res[[2]]$sim[,"mean_age_female"],
     res[[3]]$sim[,"mean_age_female"],
     res[[4]]$sim[,"mean_age_female"]))


upper_female <-quantile(c(res[[1]]$sim[,"mean_age_female"],
         res[[2]]$sim[,"mean_age_female"],
         res[[3]]$sim[,"mean_age_female"],
         res[[4]]$sim[,"mean_age_female"]), 0.975)

lower_female <-quantile(c(res[[1]]$sim[,"mean_age_female"],
                   res[[2]]$sim[,"mean_age_female"],
                   res[[3]]$sim[,"mean_age_female"],
                   res[[4]]$sim[,"mean_age_female"]), 0.025)

res_female <- cbind(means_female, sd_female, lower_female, upper_female)
rownames(res_female) <- NULL

res_female
#>      means_female sd_female lower_female upper_female
#> [1,]     26.43952 0.8835687     26.49126     28.47637


means_male <- mean(res[[1]]$sim[,"mean_age_male"],
                     res[[2]]$sim[,"mean_age_male"],
                     res[[3]]$sim[,"mean_age_male"],
                     res[[4]]$sim[,"mean_age_male"])

sd_male <-sd(c(res[[1]]$sim[,"mean_age_male"],
                 res[[2]]$sim[,"mean_age_male"],
                 res[[3]]$sim[,"mean_age_male"],
                 res[[4]]$sim[,"mean_age_male"]))


upper_male <-quantile(c(res[[1]]$sim[,"mean_age_male"],
                          res[[2]]$sim[,"mean_age_male"],
                          res[[3]]$sim[,"mean_age_male"],
                          res[[4]]$sim[,"mean_age_male"]), 0.975)

lower_male <-quantile(c(res[[1]]$sim[,"mean_age_male"],
                          res[[2]]$sim[,"mean_age_male"],
                          res[[3]]$sim[,"mean_age_male"],
                          res[[4]]$sim[,"mean_age_male"]), 0.025)

res_male <- cbind(means_male, sd_male, lower_male, upper_male)
rownames(res_male) <- NULL



cbind(res_female, res_male) %>% 
  data.frame() %>% 
  pivot_longer(cols = everything(),
       names_to = c(".value", "gender"),
       names_sep = "_",
       values_to = c("gender", "means", "sd", "lower", "upper")) 
#> # A tibble: 2 x 5
#>   gender means    sd lower upper
#>   <chr>  <dbl> <dbl> <dbl> <dbl>
#> 1 female  26.4 0.884  26.5  28.5
#> 2 male    29.8 1.24   28.8  31.6

#is there a function in tidyverse that can do this automatically such as
# in purrr package with map or modify?


标签: rlistnestedtidyverse

解决方案


我会在tidyverse中这样做......

library(tidyverse)
tibble(RunId = 1:4) %>% # There are 4 runs of the sim...
  mutate(Results = map(RunId, ~ to_estimate()$sim %>% as_tibble())) %>% # Get results for each run
  unnest(cols = c(Results)) %>% # unnest them
  gather(Key, Value, -RunId) %>% # make the frame long
  separate(Key, c("junk", "junk2", "gender")) %>% # pull out gender from the key name...
  select(-junk, -junk2) %>% # get rid of the junk
  group_by(gender) %>% # group by gender
  summarize(means = mean(Value),
            sd = sd(Value),
            lower = quantile(Value, .025),
            upper = quantile(Value, .975)) # calculate your summary statistics

...这导致了这个框架:

结果


推荐阅读