r - mutate 和 case_when 可变数量的变量时的替代代码
问题描述
在下面的例子中
df <- data.frame(
Sample = c(1:18),
Group = c("A","A","A","A","A","A","A","A","A","B","B","B","B","B","B","B","B","B"),
B1 = c(34.4, 34.2, 34.1, 32.2, 32.8, 32.9, 30.8, 31.2, 30.5, 32.4, 32.4, 32.1, 31.2, 31.8, 31.9, 30.8, 30.2, 30.5),
S1 = c("sample", "sample", "sample", "std", "std", "std", "sample", "sample", "sample","sample", "sample", "sample", "std", "std", "std", "sample", "sample", "sample")
)
B1
我计算perGroup
和 when 的最大值S1 == "std"
for(Var in unique(df$Group)) {
assign(paste("maxB_", Var, sep = ""), max(filter(df, S1 == "std" & Group == Var)$B1, na.rm=T))
}
我想df$Bdl
根据这段代码在 df 中创建一个新列
df %>% mutate(Bdl = case_when(!S1 == "std" & Group == "A" & B1 > maxB_A ~ "Y",
!S1 == "std" & Group == "B" & B1 > maxB_B ~ "Y")
df$Bdl <- ifelse(df$S1!="std" & is.na(df$Bdl), "N", df$Bdl)
得到以下df
> df
Sample Group B1 S1 Bdl
1 1 A 34.4 sample Y
2 2 A 34.2 sample Y
3 3 A 34.1 sample Y
4 4 A 32.2 std <NA>
5 5 A 32.8 std <NA>
6 6 A 32.9 std <NA>
7 7 A 30.8 sample N
8 8 A 31.2 sample N
9 9 A 30.5 sample N
10 10 B 32.4 sample Y
11 11 B 32.4 sample Y
12 12 B 32.1 sample Y
13 13 B 31.2 std <NA>
14 14 B 31.8 std <NA>
15 15 B 31.9 std <NA>
16 16 B 30.8 sample N
17 17 B 30.2 sample N
18 18 B 30.5 sample N
>
当我有超过 2 个组时,我想使用此代码,而不必在 mutate 部分中添加所有这些代码/条件。我想出了以下方法,但我不确定这是否是最好的
test <- df %>% filter(S1 %in% "std") %>% group_by(Group) %>% summarise(maxB=max(B1))
df2 <- left_join(df, test) %>% mutate(Bdl = case_when(B1 > maxB ~ "Y"))
df2$Bdl <- ifelse(df2$S1!="std" & is.na(df2$Bdl), "N", df2$Bdl)
解决方案
您不需要for
循环或临时变量,您可以在以下位置处理它mutate
:
df %>%
group_by(Group) %>%
mutate(Bdl = if_else(S1 == "std", NA, B1 > B1[S1 == "std"])) %>%
ungroup()
# # A tibble: 18 x 5
# Sample Group B1 S1 Bdl
# <int> <chr> <dbl> <chr> <lgl>
# 1 1 A 34.4 sample TRUE
# 2 2 A 34.2 sample TRUE
# 3 3 A 34.1 sample TRUE
# 4 4 A 32.2 std NA
# 5 5 A 32.8 std NA
# 6 6 A 32.9 std NA
# 7 7 A 30.8 sample FALSE
# 8 8 A 31.2 sample FALSE
# 9 9 A 30.5 sample FALSE
# 10 10 B 32.4 sample TRUE
# 11 11 B 32.4 sample TRUE
# 12 12 B 32.1 sample TRUE
# 13 13 B 31.2 std NA
# 14 14 B 31.8 std NA
# 15 15 B 31.9 std NA
# 16 16 B 30.8 sample FALSE
# 17 17 B 30.2 sample FALSE
# 18 18 B 30.5 sample FALSE
我通常推荐/更喜欢存储logical
而不是"Y"
/ "N"
,但如果你需要这些字母,你可以很容易地调整它:
df %>%
group_by(Group) %>%
mutate(Bdl = case_when(
S1 == "std" ~ NA_character_,
B1 > B1[S1 == "std"] ~ "Y",
TRUE ~ "N")
) %>%
ungroup()