r - 将“总体”列添加到 R 中的汇总表
问题描述
我正在创建一个关于来自 4 个不同区域(西部、中西部、东北部和南部)样本的健康调查数据的汇总统计表,编码为区域 1-4。现在我可以创建一个表格来显示每个区域的统计数据,但我还想添加一个额外的列来显示样本中所有个体的总体平均值或中位数。我怎样才能做到这一点?我已经包含了到目前为止我已经完成的代码。谢谢!
#the data frame
structure(list(AGE = c(40L, 23L, 24L, 18L, 30L, 33L, 32L, 63L,
22L, 24L), FAMSIZE = c(2L, 2L, 2L, 3L, 2L, 6L, 2L, 1L, 2L, 1L
), HYPERTEN = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0), ALC = c(0, 2,
3, 0, 2, 0, 3, 0, 2, 2), region_group = c("Region 4", "Region 3",
"Region 4", "Region 3", "Region 1", "Region 2", "Region 1", "Region 2",
"Region 4", "Region 4"), PSU = c(2L, 1L, 2L, 2L, 2L, 1L, 2L,
2L, 1L, 2L), PERWEIGHT_MERGE = c(31.2, 615.2, 37.6, 1626, 44,
149.8, 745.2, 984.2, 1512, 399.6), SAMPWEIGHT_MERGE = c(65, 860.4,
94.4, 9146, 170.8, 310.4, 755.2, 1053.4, 3964.4, 706.2), STRATA = c(6296L,
6165L, 6296L, 6224L, 6045L, 6083L, 6029L, 6073L, 6287L, 6247L
)), row.names = c(NA, 10L), class = "data.frame")
#Code for the table
out1<-sample_survey %>%
group_by(region_group) %>%
summarise("Number of drinks (mean)"=survey_mean(ALC),
"Number of drinks (median)"=survey_median(ALC),"Hypertension"=survey_mean(HYPERTEN), "Family
Size"=survey_mean(FAMSIZE), "Age"=survey_median(AGE))
out1=t(out1)
out1
[,1] [,2] [,3] [,4]
region_group "Region 1" "Region 2" "Region 3" "Region 4"
Number of drinks (mean) "1.663778" "2.131566" "1.744107" "2.009594"
Number of drinks (mean)_se "0.1375124" "0.1245772" "0.0957500" "0.1199982"
Number of drinks (median) "1" "2" "1" "2"
Number of drinks (median)_se "0.0000000" "0.2531528" "0.0000000" "0.2533324"
Hypertension "0.1340147" "0.1685102" "0.1834528" "0.1225418"
Hypertension_se "0.01623974" "0.01529678" "0.01463019" "0.01475651"
Family \n Size "3.121062" "2.883905" "3.107202" "3.265012"
Family \n Size_se "0.11668906" "0.07435704" "0.08004129" "0.11138869"
Age "30" "27" "30" "28"
Age_se "1.3615690" "1.0126110" "0.7616152" "0.7599972"
解决方案
虽然我很确定有一些包可以开箱即用地实现这一目标,但实现这一目标的一种方法是重复整个数据集的摘要并在转置之前将表绑定在一起。为此,为了减少代码重复,我将汇总代码放在辅助函数中。尝试这个:
library(dplyr)
library(srvyr)
# Helper function
mysum <- function(d) {
d %>%
summarise(
"Number of drinks (mean)" = survey_mean(ALC),
"Number of drinks (median)" = survey_median(ALC),
"Hypertension" = survey_mean(HYPERTEN),
"Family Size" = survey_mean(FAMSIZE),
"Age" = survey_median(AGE)
)
}
sample_survey <- sample_survey %>%
as_survey_design(strata = region_group)
#Code for the table
out1<-sample_survey %>%
group_by(region_group) %>%
mysum()
out2<-sample_survey %>%
mysum() %>%
mutate(region_group = "Total")
bind_rows(out1, out2) %>%
t()
#> [,1] [,2] [,3]
#> region_group "Region 1" "Region 2" "Region 3"
#> Number of drinks (mean) "2.50" "0.00" "1.00"
#> Number of drinks (mean)_se "0.5000000" "0.0000000" "1.0000000"
#> Number of drinks (median) "2" "0" "0"
#> Number of drinks (median)_se "0.03935085" "0.00000000" "0.07870171"
#> Hypertension "0.0" "0.5" "0.0"
#> Hypertension_se "0.0" "0.5" "0.0"
#> Family Size "2.00" "3.50" "2.50"
#> Family Size_se "0.0000000" "2.5000000" "0.5000000"
#> Age "30" "33" "18"
#> Age_se "0.07870171" "1.18052560" "0.19675427"
#> [,4] [,5]
#> region_group "Region 4" "Total"
#> Number of drinks (mean) "1.75" "1.40"
#> Number of drinks (mean)_se "0.6291529" "0.3366502"
#> Number of drinks (median) "2" "2"
#> Number of drinks (median)_se "0.47133552" "0.50276759"
#> Hypertension "0.0" "0.1"
#> Hypertension_se "0.0" "0.1"
#> Family Size "1.75" "2.30"
#> Family Size_se "0.2500000" "0.5196152"
#> Age "24" "24"
#> Age_se "2.82801315" "2.02169598"
推荐阅读
- php - 当我从外部文件调用它时使用裁剪 php 调整图像大小的问题不在在线服务器上显示图像
- sql - SQL 查询 - 查找具有基数大于一的字段的记录
- laravel - Laravel:在 Blade 文件中,有没有办法在允许再次单击之前等待表单提交的响应?
- javascript - 链接到另一个页面的特定 div;只能在该页面上链接到该 div
- c# - 每次打开模态时都重新加载新数据
- node.js - 如何验证字段是电子邮件或用户名
- python - 有条件地格式化数据框突出显示
- java - 如何将字符串转换为日期(无法解析的日期:“03062019”)
- mysql - 如何改进/简化包含大量子查询的查询?
- c++ - 数组的大小可以是 2/9 还是 3/9?