首页 > 解决方案 > 将 select_helpers 与 dplyr::coalesce 一起使用

问题描述

我有一个非常宽的数据框(比这里为 reprex 提供的数据大得多)。

使用下面提供的数据(分配给my_wide_data),我想dplyr::coalesce与来自 d​​plyr 的选择助手一起使用(例如dplyr::starts_with)。

# dput output assigned to my_wide_data    
structure(list(myvar1 = c(10L, 3L, 11L, 2L, 4L, 5L, 2L, 6L, 1L, 
                              4L, 12L, 9L, 12L, 2L, 3L, 1L, 2L, 8L, 1L, 2L, 3L, 3L, 8L, 11L, 
                              10L, 6L, 3L, 10L, 5L, 2L, 8L, 3L, 1L, 6L, 2L, 1L, 8L, 4L, 10L, 
                              3L, 1L, 4L, 2L, 12L, 3L, 2L, 5L, 1L, 3L, 5L, 3L, 2L, 12L, 3L, 
                              6L, 11L, 12L, 2L, 6L, 10L, 3L, 10L, 3L, 2L, 2L, 2L, 2L, 3L, 6L, 
                              3L, 6L, 10L, 1L, 3L, 3L, 6L, 2L, 3L, 3L, 3L, 2L, 3L, 2L, 10L, 
                              3L, 3L, 4L, 1L, 3L, 2L, 3L, 9L, 1L, 1L, NA, 5L, 1L, 8L, 3L, 10L, 
                              3L, 3L, 4L, 7L, 10L, 2L, 2L, 11L, 6L, 11L, 6L, 4L, 4L, 12L, 6L, 
                              6L, 1L, 2L, 11L, 2L, 2L, 11L, 3L, 2L, 3L, 2L, 2L, 3L, 3L, 9L, 
                              2L, 1L, 1L, 4L, 2L, 8L, 2L, 10L, 6L, 3L, 1L, 6L, 2L, 10L, 3L, 
                              5L, 6L, 3L, 4L, 10L, 9L, 3L, 4L, 3L, 2L, 3L, 9L, 3L, 3L, 1L, 
                              10L, 4L, 4L, 6L, 2L, 7L, 3L, 2L, 3L, 1L, 3L, 3L, 3L, 7L, 2L, 
                              2L, 6L, 2L, 4L, 3L, 3L, 4L, 2L, 4L, 2L, 5L, 5L, 3L, 6L, 5L, 4L, 
                              5L, 4L, 4L, 10L, 1L, 9L, 4L, 4L, 4L, 4L, 8L, 6L, 5L), myvar2 = c(24L, 
                                                                                               24L, 27L, 8L, 9L, 15L, 1L, 27L, 3L, 23L, 28L, 10L, 24L, 5L, 14L, 
                                                                                               17L, 16L, 28L, 29L, 16L, 3L, 13L, 7L, 13L, 18L, 25L, 10L, 10L, 
                                                                                               15L, 27L, 21L, 17L, 25L, 25L, 15L, 25L, 21L, 13L, 9L, 28L, 1L, 
                                                                                               13L, 19L, 21L, 23L, 15L, NA, 29L, 12L, 25L, 1L, 5L, 12L, 7L, 
                                                                                               15L, 25L, 4L, 8L, 30L, 25L, 8L, NA, 6L, 16L, 14L, 7L, 20L, 26L, 
                                                                                               19L, 10L, 1L, 15L, 30L, 7L, 16L, 23L, 24L, 21L, 8L, 1L, 1L, 10L, 
                                                                                               26L, 28L, 5L, 7L, 21L, 10L, 13L, 26L, 14L, 5L, 22L, 18L, NA, 
                                                                                               NA, 9L, 20L, 17L, 23L, 3L, 13L, 7L, 5L, 6L, 9L, 8L, 15L, 9L, 
                                                                                               10L, 15L, 13L, NA, 30L, 22L, 14L, 9L, 16L, 6L, 13L, 19L, 15L, 
                                                                                               1L, 7L, 19L, 25L, 10L, NA, 8L, 25L, 5L, 2L, 16L, 8L, 19L, 18L, 
                                                                                               27L, 2L, NA, 16L, 29L, 4L, 7L, 27L, 24L, 5L, 6L, 17L, 16L, 13L, 
                                                                                               11L, NA, 12L, 9L, 8L, 1L, NA, 5L, 12L, 3L, 3L, 10L, 16L, 16L, 
                                                                                               5L, 24L, 10L, 17L, 23L, 19L, 12L, 12L, 18L, 6L, 1L, 3L, 15L, 
                                                                                               26L, 28L, 28L, 27L, 3L, 18L, 22L, 13L, 11L, 30L, 24L, 1L, 25L, 
                                                                                               21L, 7L, 14L, 16L, 9L, 3L, 28L, 11L, 17L, 11L, 25L, 23L, 7L, 
                                                                                               21L), myvar3 = c(78L, 79L, 78L, 78L, 79L, 78L, 79L, 77L, 79L, 
                                                                                                                79L, 76L, 78L, 78L, 79L, 79L, 79L, 79L, 78L, 79L, 79L, 79L, 79L, 
                                                                                                                78L, 78L, 78L, 79L, 79L, 78L, 78L, 79L, 78L, 79L, 79L, 78L, 79L, 
                                                                                                                79L, 78L, 78L, 78L, 79L, 79L, 79L, 79L, 78L, 79L, 79L, 73L, 79L, 
                                                                                                                79L, 79L, 79L, 79L, 72L, 79L, 78L, 78L, 78L, 79L, 78L, 78L, 79L, 
                                                                                                                78L, 79L, 79L, 79L, 79L, 79L, 78L, 78L, 79L, 78L, 78L, 79L, 79L, 
                                                                                                                79L, 76L, 79L, 78L, 79L, 79L, 79L, 79L, 79L, 75L, 79L, 79L, 79L, 
                                                                                                                79L, 79L, 79L, 79L, 78L, 79L, 79L, 77L, 78L, 79L, 78L, 79L, 78L, 
                                                                                                                79L, 79L, 79L, 78L, 78L, 79L, 79L, 78L, 78L, 78L, 78L, 79L, 79L, 
                                                                                                                78L, 78L, 76L, 79L, 76L, 77L, 79L, 79L, 78L, 79L, 79L, 79L, 79L, 
                                                                                                                79L, 79L, 79L, 78L, 78L, 79L, 78L, 79L, 79L, 78L, 79L, 78L, 79L, 
                                                                                                                79L, 79L, 79L, 79L, 78L, 79L, 79L, 77L, 79L, 79L, 78L, 78L, 79L, 
                                                                                                                78L, 79L, 79L, 79L, 78L, 79L, 79L, 79L, 78L, 79L, 79L, 78L, 79L, 
                                                                                                                78L, 79L, 79L, 78L, 79L, 79L, 79L, 79L, 79L, 79L, 79L, 78L, 79L, 
                                                                                                                78L, 79L, 79L, 79L, 79L, 79L, 78L, 79L, 79L, 79L, 79L, 79L, 79L, 
                                                                                                                79L, 78L, 79L, 78L, 79L, 78L, 79L, 79L, 79L, 79L, 76L, 78L, 79L
                                                                                               )), class = "data.frame", row.names = c(NA, -204L)) -> my_wide_data

换句话说,而不是

my_wide_data %>%
  mutate(coalesce_var <- coalesce(myvar1, myvar2, myvar3))

我希望能够做类似的事情

my_wide_data %>%
  mutate(coalesce_var <- coalesce(starts_with("my")))

dplyr问题:是否有可能在内部或其他地方完成类似的事情tidyverse

标签: rdplyr

解决方案


以下作品利用coalesce(...)可以接受列表的优势

vecs <- list(
  c(1, 2, NA, NA, 5),
  c(NA, NA, 3, 4, 5)
)
coalesce(!!! vecs)

您可以结合使用辅助函数select并将生成的选定数据框转换为列表

my_wide_data %>%
  mutate(coalesce_var = coalesce(!!! select(., starts_with("my"))))

#     myvar1 myvar2 myvar3 coalesce_var
# 1       10     24     78           10
# 2        3     24     79            3
# 3       11     27     78           11
# 4        2      8     78            2
# 5        4      9     79            4
# etc

编辑这是另一种结构-我更喜欢

library(rlang)
library(tidyselect)
my_wide_data %>%
  mutate(coalesce_var = coalesce(!!! syms(vars_select(names(.), starts_with("my")))))

推荐阅读