r - R dplyr cross:动态指定函数 t.test 和 varTest 的参数
问题描述
我正在跨语句写一些 dplyr。想要使用函数 t.test 和 varTest 创建一些 p 值。用于计算的 x= 列在 df_vars 中,mu= 和 sigma.squared= 参数值在 df_mu_sigma 中。
我需要的数据的硬编码版本在 df_sumry 中。如果在运行代码时变量名总是相同的,那么这样的事情就足够了。然而,事实并非如此。
我需要的非硬编码版本的开头是在 df_sumry2 中。但这并不能产生正确的结果,因为 mu= 和 sigma.squared= 的值不是动态指定的。df_sumry2 中只有前两个 p 值是正确的。在那之后它们总是错误的,因为代码总是使用 mpg 变量的值。
如何始终如一地为 mu 和 sigma.squared 插入正确的值?
library(dplyr)
library(magrittr)
library(EnvStats)
df_vars <- mtcars %>%
select(mpg, cyl, disp, hp)
set.seed(9302)
df_mu_sigma <- mtcars %>%
select(mpg, cyl, disp, hp) %>%
slice_sample(n = 12) %>%
summarize(
across(
everything(),
list(mean = mean,
std = sd
))
)
df_sumry <- df_vars %>%
summarize(
mpg_mean = mean(mpg),
mpg_mean_prob = t.test(mpg, mu = df_mu_sigma$mpg_mean)$p.value,
mpg_std = sd(mpg),
mpg_std_prob = varTest(mpg, sigma.squared = df_mu_sigma$mpg_std^2)$p.value,
cyl_mean = mean(cyl),
cyl_mean_prob = t.test(cyl, mu = df_mu_sigma$cyl_mean)$p.value,
cyl_std = sd(cyl),
cyl_std_prob = varTest(cyl, sigma.squared = df_mu_sigma$cyl_std^2)$p.value,
disp_mean = mean(disp),
disp_mean_prob = t.test(disp, mu = df_mu_sigma$disp_mean)$p.value,
disp_std = sd(disp),
disp_std_prob = varTest(disp, sigma.squared = df_mu_sigma$disp_std^2)$p.value,
hp_mean = mean(hp),
hp_mean_prob = t.test(hp, mu = df_mu_sigma$hp_mean)$p.value,
hp_std = sd(hp),
hp_std_prob = varTest(hp, sigma.squared = df_mu_sigma$hp_std^2)$p.value
)
vars_num <- names(df_vars)
df_sumry2 <- df_vars %>%
summarize(
across(
all_of(vars_num),
list(mean = mean,
mean_prob = function(x) t.test(x, mu = df_mu_sigma$mpg_mean)$p.value,
std = sd,
std_prob = function(x) varTest(x, sigma.squared = df_mu_sigma$mpg_std^2)$p.value)
)
)
解决方案
我似乎想出了解决我自己问题的方法。我很高兴看到替代解决方案,因为它们可能比我的更好。
library(dplyr)
library(magrittr)
library(EnvStats)
df_vars <- mtcars %>%
select(mpg, cyl, disp, hp)
df_mu_sigma <- mtcars %>%
select(mpg, cyl, disp, hp) %>%
slice_sample(n = 12) %>%
summarize(
across(
everything(),
list(mean = mean,
std = sd
))
)
df_sumry <- df_vars %>%
summarize(
mpg_mean = mean(mpg),
mpg_mean_prob = t.test(mpg, mu = df_mu_sigma$mpg_mean)$p.value,
mpg_std = sd(mpg),
mpg_std_prob = varTest(mpg, sigma.squared = df_mu_sigma$mpg_std^2)$p.value,
cyl_mean = mean(cyl),
cyl_mean_prob = t.test(cyl, mu = df_mu_sigma$cyl_mean)$p.value,
cyl_std = sd(cyl),
cyl_std_prob = varTest(cyl, sigma.squared = df_mu_sigma$cyl_std^2)$p.value,
disp_mean = mean(disp),
disp_mean_prob = t.test(disp, mu = df_mu_sigma$disp_mean)$p.value,
disp_std = sd(disp),
disp_std_prob = varTest(disp, sigma.squared = df_mu_sigma$disp_std^2)$p.value,
hp_mean = mean(hp),
hp_mean_prob = t.test(hp, mu = df_mu_sigma$hp_mean)$p.value,
hp_std = sd(hp),
hp_std_prob = varTest(hp, sigma.squared = df_mu_sigma$hp_std^2)$p.value
)
vars_num <- names(df_vars)
library(glue)
df_sumry2 <- df_vars %>%
summarize(
across(
all_of(vars_num),
list(mean = mean,
mean_prob = function(x) {
mu_name <- glue("{ensym(x)}_mean")
t.test(x, mu = df_mu_sigma[[mu_name]])$p.value
},
std = sd,
std_prob = function(x) {
sigma_name <- glue("{ensym(x)}_std")
varTest(x, sigma.squared = df_mu_sigma[[sigma_name]]^2)$p.value
}
)
)
)
all.equal(df_sumry, df_sumry2)
推荐阅读
- algorithm - 不可变数组中归并排序的空间复杂度
- sql - 创建具有多个表的动态视图
- html - HTML电子邮件段落格式不起作用
- c# - 如何在正文中发送带有 DB 行的事务性电子邮件
- sql-server - 使用变量运行动态 SQL 时的单引号数量
- python - 使用唯一列值作为键将 Pandas Dataframe 转换为 to_dict()
- r - 如何为组中具有目标值的行设置子集,如果没有,则为同一组中的不同行设置子集?
- hibernate - 我可以在 entityManager.persist 时用其他 ID 覆盖 @SequenceGenerator 值吗
- arrays - 数组函数 VB
- ios - 嵌入式 UITextView 的圆角——什么是(错误地)保持圆角不透明?