首页 > 解决方案 > R:循环从多个数据帧中提取数据?

问题描述

我已经运行了几 (17) 个元分析(由特定名称标识),我需要将模型的输出提取到一个表中,并添加一个包含每个名称名称的列。我已经手动完成了,但我想知道是否可以构建一个循环来这样做。

我附上了 17 个分析中的前三个,“名称”是“cent”、“dist”和“sqrs”

#meta-analyses
res_cent<-rma.mv(yi, vi, mods = ~ factor(drug)-1, random = list(~ 1 | publication_id,~ 1 | strain_def), 
             data = SR_meta,subset=(SR_meta$measure=="cent"))
res_dist<-rma.mv(yi, vi, mods = ~ factor(drug)-1, random = list(~ 1 | publication_id,~ 1 | strain_def), 
             data = SR_meta,subset=(SR_meta$measure=="dist"))
res_sqrs<-rma.mv(yi, vi, mods = ~ factor(drug)-1, random = list(~ 1 | publication_id,~ 1 | strain_def), 
             data = SR_meta,subset=(SR_meta$measure=="sqrs"))

#Creating list for model output - cent
list_cent<-coef(summary(res_cent))
list_cent<-setNames(cbind(rownames(list_cent), list_cent, row.names = NULL), 
         c("Drug", "Estimate", "se","zval","p-value","CI_l","CI_u"))

df_cent <- list_cent[ -c(3,4) ]
df_cent$Drug<-gsub("factor*","",df_cent$Drug)
df_cent$Drug<-gsub("drug*","",df_cent$Drug)
df_cent$Drug<-gsub("[[:punct:]]","",df_cent$Drug)

n_cent<-plyr::count(cent_sum2, vars = "drug")
names(n_cent)[names(n_cent) == "freq"] <- "n_cent"

df_cent<-cbind(df_cent,n_cent[2])
##same thing can be repeated for the other two measures "dist", and "sqrs". 

输出是一个数据框,其中包含在荟萃分析中用作因素的药物名称、它们的估计效应大小、p 值、置信区间以及每个因素 (n) 的测量值。我想在一个表中编译所有这些输出(在名为“matrix_ps”的代码末尾)并添加一个带有度量名称的列。我已经手动完成了所有步骤(如下),但它看起来效率极低。有没有办法创建一个循环来执行此操作,其中所有措施的名称都被更改,然后结果被附加?

诸如measures<-c("cent","dist","sqrs") for(i in measure)之类的东西-不确定如何继续?

matrix_cent<-data.frame(df_cent$Drug,list_cent$`p-value`,df_cent$n_cent,df_cent$Estimate,df_cent$CI_l,df_cent$CI_u)
matrix_dist<-data.frame(df_dist$Drug,list_dist$`p-value`,df_dist$n_dist,df_dist$Estimate,df_dist$CI_l,df_dist$CI_u)
matrix_sqrs<-data.frame(df_sqrs$Drug,list_sqrs$`p-value`,df_sqrs$n_sqrs,df_sqrs$Estimate,df_sqrs$CI_l,df_sqrs$CI_u)

matrix_cent$measure<-"cent"
matrix_dist$measure<-"dist"
matrix_sqrs$measure<-"sqrs"

matrix_cent<-matrix_cent%>% rename(drug=df_cent.Drug,measure=measure,p=list_cent..p.value.,n=df_cent.n_cent,estimate=df_cent.Estimate,ci_low=df_cent.CI_l,ci_up=df_cent.CI_u)
matrix_dist<-matrix_dist%>% rename(drug=df_dist.Drug,measure=measure,p=list_dist..p.value.,n=df_dist.n_dist,estimate=df_dist.Estimate,ci_low=df_dist.CI_l,ci_up=df_dist.CI_u)
matrix_sqrs<-matrix_sqrs%>% rename(drug=df_sqrs.Drug,measure=measure,p=list_sqrs..p.value.,n=df_sqrs.n_sqrs,estimate=df_sqrs.Estimate,ci_low=df_sqrs.CI_l,ci_up=df_sqrs.CI_u)

matrix_ps<-rbind(matrix_cent,matrix_dist,matrix_rear,matrix_sqrs,matrix_toa,matrix_eca,matrix_eoa,matrix_trans,matrix_dark,matrix_light,matrix_stps,matrix_rrs,matrix_time,matrix_toc,matrix_cross,matrix_hd,matrix_lat)

标签: rloops

解决方案


我们没有您的数据,但您可以将所有代码放在一个函数中:

get_result <- function(x, y) {

  list_cent<-coef(summary(x))
  list_cent<-setNames(cbind(rownames(list_cent), list_cent, row.names = NULL), 
                      c("Drug", "Estimate", "se","zval","p-value","CI_l","CI_u"))
  
  df_cent <- list_cent[ -c(3,4) ]
  df_cent$Drug<-gsub("factor*","",df_cent$Drug)
  df_cent$Drug<-gsub("drug*","",df_cent$Drug)
  df_cent$Drug<-gsub("[[:punct:]]","",df_cent$Drug)
  
  n_cent<-plyr::count(cent_sum2, vars = "drug")
  names(n_cent)[names(n_cent) == "freq"] <- y
  
  df_cent<-cbind(df_cent,n_cent[2])
  return(df_cent)
}

现在假设您的所有分析都遵循'res_'您可以执行的模式:

library(purrr)

list_models <- mget(ls(pattern = 'res_'))
result <- imap(list_models, get_result) %>% reduce(inner_join)

推荐阅读