r - 如何使用 gt() 包将组百分比添加到表中?
问题描述
在另一篇文章中,我概述了一种将总体百分比添加到gt( )
包中的表的方法(如何自动将总体百分比添加到 gt() 包中的 row_summary?)我确定的解决方案涉及对row_summary( )
函数的单独调用对于添加的每个整体行百分比。但是,如果将这个相当笨重的解决方案应用于整体组百分比,则即使是这种相当笨重的解决方案也不起作用,如下面的工作示例所示。解决方案?
# Create baseline data
set.seed(1)
df <- tibble(some_letter = sample(letters, size = 10, replace = FALSE),
some_group = sample(c("A", "B"), size = 10, replace = TRUE),
num1 = sample(100:200, size = 10, replace = FALSE),
num2 = sample(100:200, size = 10, replace = FALSE),
n = num1 + num2) %>%
mutate(across(starts_with("num"), ~(.x)/(n), .names = "pct_{col}"))
> df
# A tibble: 10 x 7
some_letter some_group num1 num2 n pct_num1 pct_num2
<chr> <chr> <int> <int> <int> <dbl> <dbl>
1 g A 194 148 342 0.567 0.433
2 j A 121 159 280 0.432 0.568
3 n B 164 200 364 0.451 0.549
4 u A 112 118 230 0.487 0.513
5 e B 125 180 305 0.410 0.590
6 s A 137 164 301 0.455 0.545
7 w B 101 175 276 0.366 0.634
8 m B 135 110 245 0.551 0.449
9 l A 180 167 347 0.519 0.481
10 b B 131 137 268 0.489 0.511
# Target: the weighted group percentages to be added to the table in gt( )
df %>% group_by(some_group) %>%
summarise_at(vars(num1, num2, n), funs(sum)) %>%
mutate(across(starts_with("num"), ~(.x)/(n), .names = "pct_{col}"))
# A tibble: 2 x 6
some_group num1 num2 n pct_num1 pct_num2
<chr> <int> <int> <int> <dbl> <dbl>
1 A 744 756 1500 0.496 0.504
2 B 656 802 1458 0.450 0.550
# Create table in gt( ), attempting to use the summary_rows( ) function to pass
# group-specific percentages for pct_num1, the result of which is that the last
# passed value is recycled across all groups...
gt(df, groupname_col = "some_group", rowname_col="some_letter") %>%
summary_rows(groups = TRUE, columns = vars(num1, num2, n), fns = list( TOTAL = "sum" ) ) %>%
summary_rows(groups = TRUE,
columns = vars(pct_num1),
fns = list(TOTAL = ~ c(0.493,0.454) )
)
解决方案
正如我在您的另一个问题中回答的那样“如何自动将总体百分比添加到 gt() 包中的行摘要中?” , 包gt允许您逐个单元格地控制摘要行中显示的所有信息。缺点是表格的代码变得非常冗长。
为了空间,我使用了一个比你的例子更短的例子,但解决方案可以应用于你的问题
library(dplyr)
library(gt)
df2_ex <- tribble(
~some_letter, ~some_group, ~num1, ~num2,
"c" , "A", 1, 2,
"d" , "A", 3, 4,
"x" , "B", 5, 6,
"y" , "B", 7, 8
) %>%
rowwise() %>%
mutate(pct_num1 = num1 / sum(c_across(starts_with("num"))),
pct_num2 = num2 / sum(c_across(starts_with("num"))))
df2_ex
#> # A tibble: 4 x 6
#> # Rowwise:
#> some_letter some_group num1 num2 pct_num1 pct_num2
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 c A 1 2 0.333 0.667
#> 2 d A 3 4 0.429 0.571
#> 3 x B 5 6 0.455 0.545
#> 4 y B 7 8 0.467 0.533
基于 some_group 列的分组表的摘要行将读取
df2_ex_grouped <- df2_ex %>%
group_by(some_group) %>%
summarise_at(vars(num1, num2), sum) %>%
rowwise() %>%
mutate(pct_num1 = num1 / sum(c_across(starts_with("num"))),
pct_num2 = num2 / sum(c_across(starts_with("num"))))
df2_ex_grouped
#> # A tibble: 2 x 5
#> # Rowwise:
#> some_group num1 num2 pct_num1 pct_num2
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 A 4 6 0.4 0.6
#> 2 B 12 14 0.462 0.538
最后,为了完整起见,我使用相同的方法包含了一个宏大的摘要
df2_ex_total <- df2_ex %>%
ungroup() %>%
summarise_at(vars(num1, num2), sum) %>%
rowwise() %>%
mutate(pct_num1 = num1 / sum(c_across(starts_with("num"))),
pct_num2 = num2 / sum(c_across(starts_with("num"))))
df2_ex_total
#> # A tibble: 1 x 4
#> # Rowwise:
#> num1 num2 pct_num1 pct_num2
#> <dbl> <dbl> <dbl> <dbl>
#> 1 16 20 0.444 0.556
获取所需表格的代码如下所示。请注意,我使用了两种方法来识别应该出现在摘要行右侧单元格中的值:
- 使用 base R 从
df2_ex_grouped
- 使用
pull()
选择你喜欢的那个。
您的代码中缺少的部分是指定 some_groups 列的哪个值您正在应用该summary_rows
函数而不是使用groups = TRUE
. 希望这个答案能解决你的问题。
df2_ex %>%
gt(groupname_col = "some_group", rowname_col="some_letter") %>%
summary_rows(groups = TRUE, columns = vars(num1, num2), fns = list(TOTAL = "sum"),
formatter = fmt_number, decimals = 0) %>%
summary_rows(groups = TRUE, columns = vars(num1, num2), fns = list(TOTAL = "sum"),
formatter = fmt_number, decimals = 0) %>%
summary_rows(groups = "A", columns = vars(pct_num1),
fns = list(TOTAL = ~ df2_ex_grouped$pct_num1[1]),
formatter = fmt_number, decimals = 4) %>%
summary_rows(groups = "A", columns = vars(pct_num2),
fns = list(TOTAL = ~ df2_ex_grouped$pct_num2[1]),
formatter = fmt_number, decimals = 4) %>%
summary_rows(groups = "B", columns = vars(pct_num1),
fns = list(TOTAL = ~ df2_ex_grouped$pct_num1[2]),
formatter = fmt_number, decimals = 4) %>%
summary_rows(groups = "B", columns = vars(pct_num2),
fns = list(TOTAL = ~ (
df2_ex_grouped %>%
filter(some_group == "B") %>%
select(pct_num2) %>%
pull())),
formatter = fmt_number, decimals = 4) %>%
grand_summary_rows(columns = vars(num1, num2), fns = list(`grand TOTAL` = "sum"),
formatter = fmt_number, decimals = 0) %>%
grand_summary_rows(columns = vars(pct_num1),
fns = list(
`grand TOTAL` = ~ (df2_ex_total$pct_num1)),
formatter = fmt_number, decimals = 3) %>%
grand_summary_rows(columns = vars(pct_num2),
fns = list(
`grand TOTAL` = ~ (df2_ex_total$pct_num2)),
formatter = fmt_number, decimals = 3)
由reprex 包(v0.3.0)于 2020 年 11 月 14 日创建
推荐阅读
- python - 根据 groupby 输出更新列值
- python - TensorFlow:将 GRUCell 权重从 compat.v1 转换为 tensorflow 2
- video - 使用 ffmpeg 获取多个视频输入的多个缩略图
- c# - 无法从 Azure 上的 Visual Studio Test 运行测试程序集
- docker - Docker golang busybox“没有这样的文件或目录”错误
- laravel - 错误 500:Mailgun 的服务器当前无法访问
- flutter - 使用多个构造函数创建自定义小部件
- php - PHP S3Client uploadAsync 方法的测试错误处理
- javascript - 如何在Angular中切片一个json数组
- python - 如何运行泊松过程模拟?