首页 > 解决方案 > 森林图,对多个变量进行排序和汇总

问题描述

我有以下数据:

DF<-structure(list(ref = structure(c(15L, 15L, 16L, 19L, 2L, 12L, 
11L, 23L, 6L, 21L, 5L, 13L, 8L, 22L, 26L, 27L, 20L, 17L, 9L, 
7L, 24L, 25L, 18L, 1L, 3L, 14L, 16L, 12L, 23L, 6L, 21L, 8L, 22L, 
26L, 27L, 20L, 17L, 9L, 7L, 24L, 25L, 18L, 4L, 1L, 14L, 16L, 
19L, 2L, 11L, 23L, 21L, 8L, 26L, 27L, 17L, 9L, 7L, 24L, 1L, 10L, 
14L), .Label = c("Bob 2012", "Bob 2016", "Arnez 2004", 
"Smithy 2013", "Smithy 2014", "Smithy 2016", "Carole 2011", "Craig 2014", 
"Fansa 2008", "Johnson 2010", "Joel 2017", "Joelo 2016", 
"Bob2 2017", "Bob2 2020", "Hunter 2015", "Hush 2016", 
"Lock 2012", "Mcdoo 2012", "Nick 2018", "Park 2015", "Joe 2012", 
"Sour 2017", "Shoe 2008", "Vega 2009", "West 2004", 
"West2016", "Smith 2016"), class = "factor"), yi = c(1, 
0.909090909090909, 1, 1, 0.98780487804878, 0.933333333333333, 
0.882352941176471, 0.980519480519481, 0.977272727272727, 1, 1, 
0.98019801980198, 0.959183673469388, 1, 1, 0.982758620689655, 
0.96969696969697, 0.6875, 1, 1, 1, 1, 1, 1, 0.75, 0.969811320754717, 
0, 0.0333333333333333, 0.064935064935065, 0.0227272727272727, 
0, 0.0204081632653061, 0.142857142857143, 0.0384615384615384, 
0.120689655172414, 0.0303030303030303, 0.0625, 0, 0.0625000000000001, 
0.148148148148148, 0.333333333333333, 0.0322580645161291, 0.0625, 
0, 0.0150943396226415, 0, 0.027027027027027, 0.0182926829268293, 
0.0588235294117647, 0.0324675324675325, 0.0416666666666667, 0.0408163265306122, 
0.192307692307692, 0.103448275862069, 0.0625, 0.03125, 0, 0, 
0.037037037037037, 0.0526315789473685, 0.0264150943396226), ci.lb = c(0.968401784273333, 
0.745137584391619, 0.957452087056599, 0.954039295289784, 0.963597464688465, 
0.809439442909756, 0.67719312002544, 0.951199930155904, 0.905001120558666, 
0.929555376052338, 0.880663089089027, 0.941246506281999, 0.880901216198665, 
0.880663089089027, 0.934891169467222, 0.927453022366531, 0.874486623056924, 
0.435962472420225, 0.946947080517241, 0.946947080517241, 0.937267052265125, 
0.861434988827223, 0.945257646596841, 0.937267052265125, 0.384687131024181, 
0.945252480837292, 0, 0, 0.0306637200529119, 0, 0, 0, 0.00329369106613314, 
0, 0.0474215778277017, 0, 0, 0, 0.00112833931883988, 0.0347070885129207, 
0.0895601878163022, 0, 0, 0, 0.00321663954072449, 0, 0, 0.00226571557474109, 
0, 0.00919930409127839, 0, 0.000687698884629828, 0.0597984369364536, 
0.0359775093204114, 0, 0, 0, 0, 0, 0, 0.00995385402759386), ci.ub = c(1, 
0.998207039140277, 1, 1, 0.999812850010077, 0.998780552481617, 
0.997483360196549, 0.997584224395838, 1, 1, 1, 0.999688395336243, 
0.99931230111537, 1, 1, 1, 1, 0.895437964404381, 1, 1, 1, 1, 
1, 1, 0.992197756884658, 0.987557818958737, 0.0425479129434015, 
0.137561603224075, 0.11002605111172, 0.0949988794413338, 0.070444623947662, 
0.0855442043005818, 0.384071178226987, 0.157747957353967, 0.21886405934029, 
0.125513376943076, 0.249041832299857, 0.0530529194827593, 0.179245839830917, 
0.311107006224451, 0.628630049521593, 0.133296666133586, 0.249041832299857, 
0.062732947734875, 0.0340807242282984, 0.0425479129434015, 0.112387275591248, 
0.0458586358986253, 0.235428911558493, 0.0674429497369029, 0.170235847270992, 
0.119098783801335, 0.36946549674197, 0.197004260641943, 0.249041832299857, 
0.129288064181111, 0.0530529194827593, 0.062732947734875, 0.152166113984736, 
0.212219059832308, 0.0497506289906541), TypeTwo = structure(c(2L, 
1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L), .Label = c("BAR", 
"FOO"), class = "factor"), Variable = c("Death", "Death", 
"Death", "Death", "Death", "Death", "Death", "Death", 
"Death", "Death", "Death", "Death", "Death", "Death", 
"Death", "Death", "Death", "Death", "Death", "Death", 
"Death", "Death", "Death", "Death", "Death", "Death", 
"Vein Problems", "Vein Problems", "Vein Problems", 
"Vein Problems", "Vein Problems", "Vein Problems", 
"Vein Problems", "Vein Problems", "Vein Problems", 
"Vein Problems", "Vein Problems", "Vein Problems", 
"Vein Problems", "Vein Problems", "Vein Problems", 
"Vein Problems", "Vein Problems", "Vein Problems", 
"Vein Problems", "Gas", "Gas", "Gas", 
"Gas", "Gas", "Gas", "Gas", "Gas", "Gas", 
"Gas", "Gas", "Gas", "Gas", "Gas", "Gas", 
"Gas")), row.names = c(NA, -61L), yi.names = "yi", ci.lb.names = "ci.lb", ci.ub.names = "ci.ub", digits = c(est = 4, 
se = 4, test = 4, pval = 4, ci = 4, var = 4, sevar = 4, fit = 4, 
het = 4), class = c("escalc", "data.frame"))

我用这段代码创建了一个森林图:

DF%>%ggplot(aes(x=yi,y=ref,xmin=ci.lb,xmax=ci.ub,color=TypeTwo, group=TypeTwo))+geom_point()+geom_errorbarh(height=.1, size=.5)+geom_vline(xintercept = 0,color="black", linetype="dashed", alpha=.5)+facet_grid(Variable~.,scales="free",space="free")+labs(title="Forest Plot Combined",x="Effect Size",y="Study")

看起来像这样: 在此处输入图像描述

我想重新排序图表,使“Foo”和“Bar”彼此靠近(如下图所示),并且我在不同的数据集中有单独的“摘要”数据,我想添加一个如果有意义的话,在每个方面下为每个组划线。如何为此添加新行但将其保留在方面?

在此处输入图像描述

我完全不知道该怎么做,请帮忙!

标签: rggplot2forestplot

解决方案


有很多方法可以解决这个问题,但这里有一个。请注意,由于您在多个方面有相同的研究,并且TypeTwo不一致,我们必须做一些技巧才能在每个方面进行排序。

我还按效果大小排序,因为这很令人愉悦并且很常见。

你的 Hunter 2015 数据中有一个错误,它有两种死亡效应大小,所以这就是为什么有一个红色条和绿色条。

使用一些随机数据获得平均效果:

library(tidyverse)

avg <- data.frame(
  Variable = c('Death', 'Gas', 'Vein Problems'),
  yi = c(0.9, 0.1, 0.1),
  ci.lb = c(0.5, 0, 0),
  ci.ub = c(1, 0.5, 0.5),
  TypeTwo = 'mean effect',
  ref = ''
)
DF2 <- bind_rows(DF, avg) %>% 
  arrange(desc(TypeTwo), yi) %>% 
  mutate(ref2 = fct_inorder(paste(ref, Variable)))

ggplot(DF2, aes(x=yi,y=ref2,xmin=ci.lb,xmax=ci.ub,color=TypeTwo, group=TypeTwo))+
  geom_point()+
  geom_errorbarh(height=.1, size=.5)+
  geom_vline(xintercept = 0,color="black", linetype="dashed", alpha=.5)+
  facet_grid(Variable~.,scales="free",space="free")+
  scale_y_discrete(breaks = DF2$ref2, labels = DF2$ref) +
  labs(title="Forest Plot Combined",x="Effect Size",y="Study")

在此处输入图像描述


推荐阅读