r - R - 以其他 2 列为条件的几列之间的 T 检验
问题描述
我正在尝试计算我的 df 中每个物种的性别差异超过 5 个变量(WING;WINGPRI;WEIGHT,BEAK 和 TARSUS)我尝试了几十个代码(我是初学者)并且我不断收到错误或代码没有给我想要的输出等。
这与我得到的差不多:
library(tidyverse)
library(broom)
df %>%
select(SPECIES, SEX, WING, WINGPRI, WEIGHT, BEAK, TARSUS) %>%
gather(key = variable, value=value, -SEX, -SPECIES) %>%
group_by(SEX, SPECIES, variable) %>%
summarise(value = list(value)) %>%
na.rm=TRUE %>%
tibble::rowid_to_column() %>%
spread(SPECIES, value) %>%
group_by(variable) %>%
mutate(p_value = t.test(unlist("F"), unlist("M"))$p.value,
t_value = t.test(unlist("F"), unlist("M"))$statistic)
但是在解决了许多错误之后,我无法克服这个错误Error: is.data.frame(df) is not TRUE
非常感谢有关如何解决此代码或完全不同的代码来完成我正在尝试做的任何想法
> dput(sample)
structure(list(RING = c("A264874", "A432586", "O92477", "B9124",
"C95571", "A395011", "C88213", "C58443", "A95422", "C58409"),
SPECIES = c("CARDUELIS CARDUELIS", "SYLVIA ATRICAPILLA",
"ESTRILDA ASTRILD", "ALCEDO ATTHIS", "CHLORIS CHLORIS", "FRINGILLA COELEBS",
"SYLVIA ATRICAPILLA", "CHLORIS CHLORIS", "SYLVIA ATRICAPILLA",
"PARUS MAJOR"), SEX = c("U", "M", "F", "F", "F", "F", "F",
"M", "F", "M"), AGE = c(2L, 3L, 3L, 3L, 4L, 2L, 4L, 4L, 6L,
3L), FAT = c(0L, 0L, 0L, NA, 0L, 0L, 0L, 0L, 1L, 0L), WEIGHT = c(148,
185, 85, 32, 225, 20, 245, 22, 197, 19), WING = c(775, 69,
45, 76, 82, 84, 77, 83, 69, 72), WINGPRI = c(58L, NA, 32L,
NA, NA, NA, 57L, 64L, 52L, 54L), BEAK = c(156, 132, 86, NA,
NA, 138, 125, 13, 145, 125), TARSUS = c(148, 199, 146, NA,
NA, 178, 18, 177, 207, 205), BROODPATCH = c(0L, NA, 0L, 0L,
0L, NA, 0L, 0L, 0L, 0L), MUSCLE = c(2L, 3L, 2L, 2L, 2L, 2L,
2L, 1L, 2L, 2L), PROGRAM = c("MAI", "MAI", "MAI", "MIGRA<c7><c3>O",
"PEEC", "MAI", "MAI", "PEEC", "MAI", "MIGRA<c7><c3>O")), .Names = c("RING",
"SPECIES", "SEX", "AGE", "FAT", "WEIGHT", "WING", "WINGPRI",
"BEAK", "TARSUS", "BROODPATCH", "MUSCLE", "PROGRAM"), row.names = c(NA,
-10L), class = c("tbl_df", "tbl", "data.frame"))
解决方案
考虑base
每次安装 R 并在 R 的每个会话中加载的包,不需要base::library()
. 具体来说,使用它的by
(面向对象的包装器tapply
)按SPECIES对数据帧进行子集化,并用 构建一个 t-stat 和 p-value 矩阵sapply
。
因为 OP 的数据没有足够的观察值t.test
,下面生成一个数据集
set.seed(10102018)
species_df <- data.frame(
SPECIES = sample(unique(df$SPECIES), 500, replace=TRUE),
SEX = sample(c("F", "M"), 500, replace=TRUE),
WING = runif(500) * 100,
WINGPRI = runif(500) * 100,
WEIGHT = runif(500) * 100,
BEAK = runif(500) * 100,
TARSUS = runif(500) * 100,
stringsAsFactors = FALSE
)
# NAMED LIST OF MATRICES
mat_list <- by(species_df, species_df$SPECIES, function(sub){
sapply(c("WING", "WINGPRI", "WEIGHT", "BEAK", "TARSUS"), function(col) {
tryCatch({
output <- t.test(sub[sub$SEX=="F", col], sub[sub$SEX=="M", col])
c(p_value = output$p.value, t_value = output$statistic)
}, warning = function(w) c(p_value=NA, t_value=NA)
, error = function(e) c(p_value=NA, t_value=NA))
})
})
输出
mat_list
# species_df$SPECIES: ALCEDO ATTHIS
# WING WINGPRI WEIGHT BEAK TARSUS
# p_value 0.7273165 0.8382798 0.3180979 0.6450270 0.3856571
# t_value.t 0.3501749 -0.2048995 -1.0055505 0.4629014 -0.8733496
# ----------------------------------------------------------------------------------------
# species_df$SPECIES: CARDUELIS CARDUELIS
# WING WINGPRI WEIGHT BEAK TARSUS
# p_value 0.5200729 0.8520463 0.3370721 0.8189008 0.1212502
# t_value.t 0.6470729 -0.1873091 0.9678003 0.2299977 1.5716422
# ----------------------------------------------------------------------------------------
# species_df$SPECIES: CHLORIS CHLORIS
# WING WINGPRI WEIGHT BEAK TARSUS
# p_value 0.1115453 0.5689228 0.94825726 0.5989776 0.9108546
# t_value.t -1.6129915 -0.5725928 0.06514506 -0.5284384 0.1124033
# ----------------------------------------------------------------------------------------
# species_df$SPECIES: ESTRILDA ASTRILD
# WING WINGPRI WEIGHT BEAK TARSUS
# p_value 0.09291222 0.7700545 0.6859697 0.1958938 0.6452502
# t_value.t 1.70719717 0.2935269 0.4062293 1.3054498 0.4624954
# ----------------------------------------------------------------------------------------
# species_df$SPECIES: FRINGILLA COELEBS
# WING WINGPRI WEIGHT BEAK TARSUS
# p_value 0.06157204 0.8636649 0.2183259 0.4757378 0.274626
# t_value.t 1.89924201 0.1723255 1.2416417 0.7170863 1.101813
# ----------------------------------------------------------------------------------------
# species_df$SPECIES: PARUS MAJOR
# WING WINGPRI WEIGHT BEAK TARSUS
# p_value 0.96688923 0.5857059 0.1140328 0.5055508 0.5747242
# t_value.t 0.04168846 0.5481212 1.6046303 -0.6694396 0.5643418
# ----------------------------------------------------------------------------------------
# species_df$SPECIES: SYLVIA ATRICAPILLA
# WING WINGPRI WEIGHT BEAK TARSUS
# p_value 0.4350621 0.5446387 0.7073097 0.3911381 0.7631614
# t_value.t -0.7851506 0.6091449 0.3770283 0.8628441 -0.3024993
推荐阅读
- javascript - 如何在 React js 中进入(导航)下一页
- arrays - 使用离子角度计算并获取数组的总和
- shopware - Shopware 6、管理、订单列表、更改订单日期格式
- node.js - 使用猫鼬在nodejs中保存文件类型内容
- next.js - Next JS 中动态生成的静态页面
- javascript - NextJs/JS - 在 Uncaught SyntaxError 上出现错误:Unexpected identifier onchange function
- macos - 如何在 MacOS 上重置推送通知权限警报
- windows - 调用带有特殊符号的文件失败
- r - 如何将十进制度分转换为十进制度
- reactjs - 使用 math.js 中的 parser.evaluate() 是否应该担心安全性?