首页 > 解决方案 > R - 以其他 2 列为条件的几列之间的 T 检验

问题描述

我正在尝试计算我的 df 中每个物种的性别差异超过 5 个变量(WING;WINGPRI;WEIGHT,BEAK 和 TARSUS)我尝试了几十个代码(我是初学者)并且我不断收到错误或代码没有给我想要的输出等。

这与我得到的差不多:

library(tidyverse)
library(broom)

df %>% 
      select(SPECIES, SEX, WING, WINGPRI, WEIGHT, BEAK, TARSUS) %>% 
      gather(key = variable, value=value, -SEX, -SPECIES) %>% 
      group_by(SEX, SPECIES, variable) %>%
      summarise(value = list(value)) %>%
      na.rm=TRUE %>%
      tibble::rowid_to_column() %>%
      spread(SPECIES, value) %>%
      group_by(variable) %>% 
      mutate(p_value = t.test(unlist("F"), unlist("M"))$p.value,
             t_value = t.test(unlist("F"), unlist("M"))$statistic)

但是在解决了许多错误之后,我无法克服这个错误Error: is.data.frame(df) is not TRUE

非常感谢有关如何解决此代码或完全不同的代码来完成我正在尝试做的任何想法

> dput(sample)
structure(list(RING = c("A264874", "A432586", "O92477", "B9124", 
"C95571", "A395011", "C88213", "C58443", "A95422", "C58409"), 
    SPECIES = c("CARDUELIS CARDUELIS", "SYLVIA ATRICAPILLA", 
    "ESTRILDA ASTRILD", "ALCEDO ATTHIS", "CHLORIS CHLORIS", "FRINGILLA COELEBS", 
    "SYLVIA ATRICAPILLA", "CHLORIS CHLORIS", "SYLVIA ATRICAPILLA", 
    "PARUS MAJOR"), SEX = c("U", "M", "F", "F", "F", "F", "F", 
    "M", "F", "M"), AGE = c(2L, 3L, 3L, 3L, 4L, 2L, 4L, 4L, 6L, 
    3L), FAT = c(0L, 0L, 0L, NA, 0L, 0L, 0L, 0L, 1L, 0L), WEIGHT = c(148, 
    185, 85, 32, 225, 20, 245, 22, 197, 19), WING = c(775, 69, 
    45, 76, 82, 84, 77, 83, 69, 72), WINGPRI = c(58L, NA, 32L, 
    NA, NA, NA, 57L, 64L, 52L, 54L), BEAK = c(156, 132, 86, NA, 
    NA, 138, 125, 13, 145, 125), TARSUS = c(148, 199, 146, NA, 
    NA, 178, 18, 177, 207, 205), BROODPATCH = c(0L, NA, 0L, 0L, 
    0L, NA, 0L, 0L, 0L, 0L), MUSCLE = c(2L, 3L, 2L, 2L, 2L, 2L, 
    2L, 1L, 2L, 2L), PROGRAM = c("MAI", "MAI", "MAI", "MIGRA<c7><c3>O", 
    "PEEC", "MAI", "MAI", "PEEC", "MAI", "MIGRA<c7><c3>O")), .Names = c("RING", 
"SPECIES", "SEX", "AGE", "FAT", "WEIGHT", "WING", "WINGPRI", 
"BEAK", "TARSUS", "BROODPATCH", "MUSCLE", "PROGRAM"), row.names = c(NA, 
-10L), class = c("tbl_df", "tbl", "data.frame"))

标签: rautomationtidyverse

解决方案


考虑base每次安装 R 并在 R 的每个会话中加载的包,不需要base::library(). 具体来说,使用它的by(面向对象的包装器tapply)按SPECIES对数据帧进行子集化,并用 构建一个 t-stat 和 p-value 矩阵sapply

因为 OP 的数据没有足够的观察值t.test,下面生成一个数据集

set.seed(10102018)

species_df <- data.frame(
  SPECIES = sample(unique(df$SPECIES), 500, replace=TRUE),
  SEX = sample(c("F", "M"), 500, replace=TRUE),
  WING = runif(500) * 100,
  WINGPRI = runif(500) * 100,
  WEIGHT = runif(500) * 100,
  BEAK = runif(500) * 100,
  TARSUS =  runif(500) * 100,
  stringsAsFactors = FALSE
)

# NAMED LIST OF MATRICES
mat_list <- by(species_df, species_df$SPECIES, function(sub){     
   sapply(c("WING", "WINGPRI", "WEIGHT", "BEAK", "TARSUS"), function(col) {
      tryCatch({
        output <- t.test(sub[sub$SEX=="F", col], sub[sub$SEX=="M", col])          
        c(p_value = output$p.value, t_value = output$statistic)
      }, warning = function(w) c(p_value=NA, t_value=NA)
       , error = function(e) c(p_value=NA, t_value=NA))
    })      
})

输出

mat_list

# species_df$SPECIES: ALCEDO ATTHIS
#                WING    WINGPRI     WEIGHT      BEAK     TARSUS
# p_value   0.7273165  0.8382798  0.3180979 0.6450270  0.3856571
# t_value.t 0.3501749 -0.2048995 -1.0055505 0.4629014 -0.8733496
# ---------------------------------------------------------------------------------------- 
# species_df$SPECIES: CARDUELIS CARDUELIS
#                WING    WINGPRI    WEIGHT      BEAK    TARSUS
# p_value   0.5200729  0.8520463 0.3370721 0.8189008 0.1212502
# t_value.t 0.6470729 -0.1873091 0.9678003 0.2299977 1.5716422
# ---------------------------------------------------------------------------------------- 
# species_df$SPECIES: CHLORIS CHLORIS
#                 WING    WINGPRI     WEIGHT       BEAK    TARSUS
# p_value    0.1115453  0.5689228 0.94825726  0.5989776 0.9108546
# t_value.t -1.6129915 -0.5725928 0.06514506 -0.5284384 0.1124033
# ---------------------------------------------------------------------------------------- 
# species_df$SPECIES: ESTRILDA ASTRILD
#                 WING   WINGPRI    WEIGHT      BEAK    TARSUS
# p_value   0.09291222 0.7700545 0.6859697 0.1958938 0.6452502
# t_value.t 1.70719717 0.2935269 0.4062293 1.3054498 0.4624954
# ---------------------------------------------------------------------------------------- 
# species_df$SPECIES: FRINGILLA COELEBS
#                 WING   WINGPRI    WEIGHT      BEAK   TARSUS
# p_value   0.06157204 0.8636649 0.2183259 0.4757378 0.274626
# t_value.t 1.89924201 0.1723255 1.2416417 0.7170863 1.101813
# ---------------------------------------------------------------------------------------- 
# species_df$SPECIES: PARUS MAJOR
#                 WING   WINGPRI    WEIGHT       BEAK    TARSUS
# p_value   0.96688923 0.5857059 0.1140328  0.5055508 0.5747242
# t_value.t 0.04168846 0.5481212 1.6046303 -0.6694396 0.5643418
# ---------------------------------------------------------------------------------------- 
# species_df$SPECIES: SYLVIA ATRICAPILLA
#                 WING   WINGPRI    WEIGHT      BEAK     TARSUS
# p_value    0.4350621 0.5446387 0.7073097 0.3911381  0.7631614
# t_value.t -0.7851506 0.6091449 0.3770283 0.8628441 -0.3024993

推荐阅读