首页 > 解决方案 > group_by %>% anova_test 在 DFn DFd 中不一致

问题描述

我有一个不平衡的重复测量设计,我想为每个时间段(即曲线)运行单独的方差分析,然后 Bonferroni 纠正结果。

这是数据,其中曲线是重复测量:

T_data <-structure(list(mod_id = c(1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 
1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 
7, 8, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 
1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 
7, 8, 1, 2, 3, 4, 5, 6, 7, 8), Curve = structure(c(3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
6L, 6L), .Label = c("First", "Second", "Third", "Fourth", "Fifth", 
"Sixth"), class = "factor"), Treatment = structure(c(2L, 2L, 
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L), .Label = c("GH", "T1", "T2", "T3"), class = "factor"), 
    Topt = c(28.85, 29.83, 29.89, 28.26, 29.2, 29.1, 31.06, 32.24, 
    33.03, 31.1, 32.51, 31.91, 31.42, 31.92, 32.02, 33.75, 32.87, 
    32.76, 28.15, 28.2, 30.89, 29.62, 29.74, 29.36, 29.41, 28.36, 
    29.41, 32.53, 33.03, 31.44, 31.15, 32.15, 32.87, 30.79, 32.75, 
    32.75, 35.02, 30.34, 33.68, 35.01, 32.61, 31.16, 32.11, 30.28, 
    30, 31.86, 29.49, 28.96, 31.29, 32.11, 30.98, 31.92, 31.41, 
    31.09, 32.9, 32.54, 33.16, 33.99, 34.18, 34.14, 28.67, 26.96, 
    27.9, 24.8, 30.76, 28.56, 29.05, 27.08, 29.32, 32.96, 34.25, 
    34.25, 32.17, 31.4, 31.09, 34.68, 33.65, 33.96, 33.04, 33.12, 
    34, 33.18, 34.3, 34.46, 34.02), A_at_Topt = c(20.36, 18.25, 
    18.62, 15.51, 21.39, 16.95, 21.73, 14.43, 16.29, 16.52, 17.65, 
    18.68, 22.13, 21.77, 20.97, 17.75, 19.83, 18.32, 12.6, 17.72, 
    16.91, 19.22, 19.05, 20.49, 16.36, 16.81, 16.48, 21.29, 19.92, 
    18.2, 16.09, 21.56, 19.56, 17.09, 16.71, 20.65, 20.2, 25.19, 
    21.46, 22.63, 22.18, 21.9, 17.86, 16.34, 17.85, 16.25, 20.65, 
    22.92, 19.16, 17.77, 19.5, 20.1, 21.5, 24.58, 22.88, 14.97, 
    20.52, 22.77, 19.96, 16.91, 17.82, 18, 13.13, 16.43, 13.09, 
    11.07, 7.2, 12.87, 12.99, 17.28, 17.04, 21.78, 19.2, 16.42, 
    18.35, 12.51, 18.72, 17.01, 17.75, 19.62, 19.28, 15.32, 19.24, 
    17.22, 17.6)), row.names = c(NA, -85L), class = c("tbl_df", 
"tbl", "data.frame"))

我遇到的这个问题与 'rstatix' 包和 anova_test() 函数有关。它对 Topt 变量运行得很好。

library(rstatix)
Topt_bonf <- T_data %>%
  group_by(Curve) %>%
  anova_test(dv = Topt, wid = mod_id, within = Treatment) %>%
  get_anova_table() %>%
  adjust_pvalue(method = "bonferroni")
Topt_bonf

这给出了:

曲线 DFn DFD
第三 2 10
第四 2 12
第五 2 10
第六 2 14

但是,相同的代码对 Aopt 变量给出了一个奇怪的结果,其中 DFn 和 DFd 对于 Curve = "Fifth" 不正确。

Aopt_bonf <- T_data %>%
  group_by(Curve) %>%
  anova_test(dv = A_at_Topt, wid = mod_id, within = Treatment) %>%
  get_anova_table() %>%
  adjust_pvalue(method = "bonferroni")
Aopt_bonf

这给出了:

曲线 DFn DFD
第三 2 10
第四 2 12
第五 1.07 5.35
第六 2 14

有任何想法吗?谢谢

标签: ranovarstatix

解决方案


您看到的问题不是使用group_by(). 相反,这是由于 Greenhouse-Geisser 校正。所以,让我们取一个“T_data”的子集来简化。在这里,我们将查看 Curve == 'Fifth' 的数据子集。

# subset
T_data_sub <- T_data[T_data$Curve %in% "Fifth",]

将其与您的原始代码一起使用,anova_test()是基于car::Anova()函数执行类型 III ANOVA。

> anova_test(data = T_data_sub, dv = A_at_Topt , wid = mod_id, within = Treatment)
ANOVA Table (type III tests)

$ANOVA
     Effect DFn DFd     F     p p<.05   ges
1 Treatment   2  10 0.726 0.508       0.079

$`Mauchly's Test for Sphericity`
     Effect     W     p p<.05
1 Treatment 0.131 0.017     *

$`Sphericity Corrections` # HERE IS WHERE THE DIFFERENT DEGREES OF FREEDOM COME FROM
     Effect   GGe     DF[GG] p[GG] p[GG]<.05   HFe     DF[HF] p[HF] p[HF]<.05
1 Treatment 0.535 1.07, 5.35  0.44           0.562 1.12, 5.62 0.446 

由于 Mauchly 球形检验的 p 值显着,因此使用 Greenhouse-Geisser 校正调整了自由度。这就是为什么您的第二个屏幕截图与第一个屏幕截图具有不同的自由度。

如果您改为指定公式来指定模型,则anova_test()执行基于 stats:aov() 的 Type II Anova。

> anova_test(data = T_data_sub, A_at_Topt ~ Treatment, wid = mod_id, within = Treatment) 
Coefficient covariances computed by hccm()
ANOVA Table (type II tests)

     Effect DFn DFd     F    p p<.05   ges
1 Treatment   2  15 0.642 0.54       0.079

请注意,在这种情况下,使用公式时,widandwithin参数不会影响输出。它只是在做单向方差分析。

> anova_test(data = T_data_sub, A_at_Topt ~ Treatment) 
Coefficient covariances computed by hccm()
ANOVA Table (type II tests)

     Effect DFn DFd     F    p p<.05   ges
1 Treatment   2  15 0.642 0.54       0.079

推荐阅读