首页 > 解决方案 > 如何使用组在 geom_barplot 上手动添加 pvalue?

问题描述

我有这个数据框

bat=rep(c("A", "B", "C", "D"),3)
loc=c(rep("ab",4), rep("te",4), rep("po", 4))
value=c(17,8,14,20,2,9,11,18,5,17,7,14)
tot=rep(c(30,45,36,58),3)
say=rep()

dt=data_frame(bat, loc, value, tot) %>% mutate(pct=value/tot*100) %>% mutate(se=sqrt(((pct*(100-pct))/(tot)))) %>% mutate(value2=tot-value)

> dt
# A tibble: 12 x 7
   bat   loc   value   tot   pct    se value2
   <chr> <chr> <dbl> <dbl> <dbl> <dbl>  <dbl>
 1 A     ab       17    30 56.7   9.05     13
 2 B     ab        8    45 17.8   5.70     37
 3 C     ab       14    36 38.9   8.12     22
 4 D     ab       20    58 34.5   6.24     38
 5 A     te        2    30  6.67  4.55     28
 6 B     te        9    45 20     5.96     36
 7 C     te       11    36 30.6   7.68     25
 8 D     te       18    58 31.0   6.07     40
 9 A     po        5    30 16.7   6.80     25
10 B     po       17    45 37.8   7.23     28
11 C     po        7    36 19.4   6.60     29
12 D     po       14    58 24.1   5.62     44

我使用以下代码在每个位置的每对蝙蝠之间进行了 Fisher 测试。

dt_ab=dt %>% filter(loc=="ab")
dt_po=dt %>% filter(loc=="po")
dt_te=dt %>% filter(loc=="te")


# ab

idx = t(combn(seq_along(dt_ab$bat),2))

res_ab = lapply(1:nrow(idx),function(i){
  test = fisher.test(dt_ab[idx[i,],c("value","value2")])
  data.frame(
    group1 = dt_ab$bat[idx[i,1]],
    group2 = dt_ab$bat[idx[i,2]],
    loc=dt_ab$loc[idx[i,1]],
    odds_ratio = as.numeric(test$estimate),
    p = as.numeric(test$p.value)
  )
})

res_ab = do.call(rbind,res_ab)
res_ab

# po

idx = t(combn(seq_along(dt_po$bat),2))

res_po = lapply(1:nrow(idx),function(i){
  test = fisher.test(dt_po[idx[i,],c("value","value2")])
  data.frame(
    group1 = dt_po$bat[idx[i,1]],
    group2 = dt_po$bat[idx[i,2]],
    loc=dt_po$loc[idx[i,1]],
    odds_ratio = as.numeric(test$estimate),
    p = as.numeric(test$p.value)
  )
})

res_po = do.call(rbind,res_po)
res_po

# te

idx = t(combn(seq_along(dt_te$bat),2))

res_te = lapply(1:nrow(idx),function(i){
  test = fisher.test(dt_te[idx[i,],c("value","value2")])
  data.frame(
    group1 = dt_te$bat[idx[i,1]],
    group2 = dt_te$bat[idx[i,2]],
    loc=dt_te$loc[idx[i,1]],
    odds_ratio = as.numeric(test$estimate),
    p = as.numeric(test$p.value)
  )
})

res_te= do.call(rbind,res_te)
res_te


res=NULL
res=rbind(res_ab, res_po, res_te)

res
  group1 group2 loc odds_ratio            p
1       A      B  ab  5.8817685 0.0009288055
2       A      C  ab  2.0321378 0.2157927844
3       A      D  ab  2.4578341 0.0678462682
4       B      C  ab  0.3445393 0.0451456088
5       B      D  ab  0.4143084 0.0750566107
6       C      D  ab  1.2066231 0.6665182345
7       A      B  po  0.3341536 0.0700086821
8       A      C  po  0.8309230 1.0000000000
9       A      D  po  0.6317547 0.5859688210
10      B      C  po  2.4870606 0.0896596469
11      B      D  po  1.8959538 0.1934540389
12      C      D  po  0.7608147 0.7994419705
13      A      B  te  0.2899040 0.1823689517
14      A      C  te  0.1664441 0.0270616991
15      A      D  te  0.1614577 0.0141330017
16      B      C  te  0.5722244 0.3084931936
17      B      D  te  0.5586957 0.2609751992
18      C      D  te  0.9780082 1.0000000000

现在我想在下面的条形图上手动添加每对的 pvalue 和奇数比,我看到我们可以使用 stat_pvalue_manual 函数来做到这一点,但我在使用它时遇到了一些困难。请有人可以查看我的代码并告诉我出了什么问题。下面是我获得的代码和绘图。

bp_tab_pct<- ggplot(dt, aes(x=loc, y=pct, fill=cat))+
  geom_bar(stat = "identity",position="dodge") + 
  scale_fill_manual(values = c("A"= "#5977FF", "B"="#FF7F50", "C"=  "#00CED1", "D" = "#FFFF33"))+
  theme(plot.title = element_text(color="black", size=15, face="bold.italic")) +
  labs(fill = "") +
  ylim(c(0,100))+
  ggtitle(label = "")+
  geom_text(aes(y = pct+se+2, label = paste(round(pct,2),"%", "\n","(",value,"/",tot,")")), color = "black",size=5, position = position_dodge(width = 0.9))+
  theme(plot.title = element_text(color="black", size=15, face="bold.italic"))+
  theme(axis.text.x = element_text(size=16), axis.text.y = element_text(size=14))+
  theme(axis.title.y = element_text(size=16), axis.title.x = element_blank(),) +
  geom_errorbar(aes(ymin=pct-se, ymax=pct+se), width=.2,
                position=position_dodge(.9))  +
  stat_pvalue_manual(res,  y.position = 35, step.increase = 0.1,
                     label = "p", inherit.aes = FALSE)

在此处输入图像描述

我也尝试使用组的 facet insted,但我仍然没有我想要的情节。似乎 stat_pvalue_manual 函数只考虑前 10 行,我不知道为什么。

bp_tab_pct<- ggplot(dt, aes(x=bat, y=pct, fill=bat))+
  facet_wrap(~loc) +
  geom_bar(stat = "identity",position="dodge") + 
  scale_fill_manual(values = c("A"= "#5977FF", "B"="#FF7F50", "C"=  "#00CED1", "D" = "#FFFF33"))+
  theme(plot.title = element_text(color="black", size=15, face="bold.italic")) +
  labs(fill = "") +
  ylim(c(0,100))+
  ggtitle(label = "")+
  geom_text(aes(y = pct+se+2, label = paste(round(pct,2),"%", "\n","(",value,"/",tot,")")), color = "black",size=5, position = position_dodge(width = 0.9))+
  theme(plot.title = element_text(color="black", size=15, face="bold.italic"))+
  theme(axis.text.x = element_text(size=16), axis.text.y = element_text(size=14))+
  theme(axis.title.y = element_text(size=16), axis.title.x = element_blank(),) +
  geom_errorbar(aes(ymin=pct-se, ymax=pct+se), width=.2,
                position=position_dodge(.9))  +
  stat_pvalue_manual(res,  y.position = 35, step.increase = 0.1,
                     label = "p", inherit.aes = FALSE)


在此处输入图像描述

标签: rggplot2statisticsbar-chartp-value

解决方案


我明白了这个问题。这是由于 ylim 掩盖了 y 轴上的最高值。现在我希望能够调整每个 pvalue 的 y 位置,就像我对指示到栏的 pct 所做的那样,但我会为此发布另一个问题

在此处输入图像描述


推荐阅读