首页 > 解决方案 > 将“总体”列添加到 R 中的汇总表

问题描述

我正在创建一个关于来自 4 个不同区域(西部、中西部、东北部和南部)样本的健康调查数据的汇总统计表,编码为区域 1-4。现在我可以创建一个表格来显示每个区域的统计数据,但我还想添加一个额外的列来显示样本中所有个体的总体平均值或中位数。我怎样才能做到这一点?我已经包含了到目前为止我已经完成的代码。谢谢!

#the data frame 
structure(list(AGE = c(40L, 23L, 24L, 18L, 30L, 33L, 32L, 63L, 
22L, 24L), FAMSIZE = c(2L, 2L, 2L, 3L, 2L, 6L, 2L, 1L, 2L, 1L
), HYPERTEN = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0), ALC = c(0, 2, 
3, 0, 2, 0, 3, 0, 2, 2), region_group = c("Region 4", "Region 3", 
"Region 4", "Region 3", "Region 1", "Region 2", "Region 1", "Region 2", 
"Region 4", "Region 4"), PSU = c(2L, 1L, 2L, 2L, 2L, 1L, 2L, 
2L, 1L, 2L), PERWEIGHT_MERGE = c(31.2, 615.2, 37.6, 1626, 44, 
149.8, 745.2, 984.2, 1512, 399.6), SAMPWEIGHT_MERGE = c(65, 860.4, 
94.4, 9146, 170.8, 310.4, 755.2, 1053.4, 3964.4, 706.2), STRATA = c(6296L, 
6165L, 6296L, 6224L, 6045L, 6083L, 6029L, 6073L, 6287L, 6247L
)), row.names = c(NA, 10L), class = "data.frame") 

#Code for the table 
out1<-sample_survey %>% 
  group_by(region_group) %>% 
  summarise("Number of drinks (mean)"=survey_mean(ALC),
            "Number of drinks (median)"=survey_median(ALC),"Hypertension"=survey_mean(HYPERTEN), "Family 
            Size"=survey_mean(FAMSIZE), "Age"=survey_median(AGE))

out1=t(out1)
out1 

                             [,1]         [,2]         [,3]         [,4]        
region_group                 "Region 1"   "Region 2"   "Region 3"   "Region 4"  
Number of drinks (mean)      "1.663778"   "2.131566"   "1.744107"   "2.009594"  
Number of drinks (mean)_se   "0.1375124"  "0.1245772"  "0.0957500"  "0.1199982" 
Number of drinks (median)    "1"          "2"          "1"          "2"         
Number of drinks (median)_se "0.0000000"  "0.2531528"  "0.0000000"  "0.2533324" 
Hypertension                 "0.1340147"  "0.1685102"  "0.1834528"  "0.1225418" 
Hypertension_se              "0.01623974" "0.01529678" "0.01463019" "0.01475651"
Family \n            Size    "3.121062"   "2.883905"   "3.107202"   "3.265012"  
Family \n            Size_se "0.11668906" "0.07435704" "0.08004129" "0.11138869"
Age                          "30"         "27"         "30"         "28"        
Age_se                       "1.3615690"  "1.0126110"  "0.7616152"  "0.7599972"  

标签: rdatatablecluster-analysismultiple-columnssurvey

解决方案


虽然我很确定有一些包可以开箱即用地实现这一目标,但实现这一目标的一种方法是重复整个数据集的摘要并在转置之前将表绑定在一起。为此,为了减少代码重复,我将汇总代码放在辅助函数中。尝试这个:

library(dplyr)
library(srvyr)

# Helper function
mysum <- function(d) {
  d %>%
    summarise(
      "Number of drinks (mean)" = survey_mean(ALC),
      "Number of drinks (median)" = survey_median(ALC),
      "Hypertension" = survey_mean(HYPERTEN), 
      "Family Size" = survey_mean(FAMSIZE), 
      "Age" = survey_median(AGE)
    )
}

sample_survey <- sample_survey %>%
  as_survey_design(strata = region_group)

#Code for the table 
out1<-sample_survey %>% 
  group_by(region_group) %>% 
  mysum()

out2<-sample_survey %>% 
  mysum() %>% 
  mutate(region_group = "Total")

bind_rows(out1, out2) %>% 
  t()
#>                              [,1]         [,2]         [,3]        
#> region_group                 "Region 1"   "Region 2"   "Region 3"  
#> Number of drinks (mean)      "2.50"       "0.00"       "1.00"      
#> Number of drinks (mean)_se   "0.5000000"  "0.0000000"  "1.0000000" 
#> Number of drinks (median)    "2"          "0"          "0"         
#> Number of drinks (median)_se "0.03935085" "0.00000000" "0.07870171"
#> Hypertension                 "0.0"        "0.5"        "0.0"       
#> Hypertension_se              "0.0"        "0.5"        "0.0"       
#> Family Size                  "2.00"       "3.50"       "2.50"      
#> Family Size_se               "0.0000000"  "2.5000000"  "0.5000000" 
#> Age                          "30"         "33"         "18"        
#> Age_se                       "0.07870171" "1.18052560" "0.19675427"
#>                              [,4]         [,5]        
#> region_group                 "Region 4"   "Total"     
#> Number of drinks (mean)      "1.75"       "1.40"      
#> Number of drinks (mean)_se   "0.6291529"  "0.3366502" 
#> Number of drinks (median)    "2"          "2"         
#> Number of drinks (median)_se "0.47133552" "0.50276759"
#> Hypertension                 "0.0"        "0.1"       
#> Hypertension_se              "0.0"        "0.1"       
#> Family Size                  "1.75"       "2.30"      
#> Family Size_se               "0.2500000"  "0.5196152" 
#> Age                          "24"         "24"        
#> Age_se                       "2.82801315" "2.02169598"

推荐阅读