首页 > 解决方案 > 在为多个搜索字符串搜索多个列时如何使用str_detect

问题描述

我希望将我的功能迁移到新创建的across.

我有一个函数使用filter_at.

但是,我正在努力复制它across,如下所示:

library(tidyverse)

raw_df <- tibble::tribble(
  ~cust_name, ~other_desc, ~trans, ~val,
     "Cisco",   "nothing",    "a", 100L,
    "bad_cs",     "cisCo",    "s", 101L,
       "Ibm",   "nothing",    "d", 102L,
    "bad_ib",       "ibM",    "f", 102L,
    "oraCle",    "Oracle",    "g", 103L,
      "mSft",   "nothing",    "k", 103L,
      "noth",      "Msft",    "j", 104L,
      "noth",    "oracle",    "l", 104L
  )


search_string = c("ibm", "cisco")


# Done using `filter_at`
raw_df %>% 
  filter_at(.vars = vars(cust_name, other_desc),
            .vars_predicate = any_vars(str_detect(., regex(paste(search_string, collapse = "|"), ignore_case = TRUE)))
            
  ) %>% unique()
  
  
# Not able to replicate result with `across`
raw_df %>% 
  filter(across(
    .cols = c(cust_name, other_desc), 
    .fns = ~str_detect(.), regex(paste(search_string, collapse = "|"), ignore_case = TRUE)))



raw_df %>% 
  filter(str_detect,
         across(any_of(cust_name, other_desc),
         regex(paste(search_string, collapse = "|"), ignore_case = TRUE)))

标签: racross

解决方案


结合acrossReduce选择出现任何模式的行。

library(dplyr)
library(stringr)

pat <- paste(search_string, collapse = "|")

raw_df %>% 
  filter(Reduce(`|`, across(c(cust_name, other_desc), 
        ~str_detect(., regex(pat, ignore_case = TRUE)))))

但是,我认为 usingif_any在这里更合适,因为它是为处理此类情况而构建的 -

raw_df %>%
  filter(if_any(c(cust_name, other_desc), 
                ~str_detect(., regex(pat, ignore_case = TRUE))))

# cust_name other_desc trans   val
#  <chr>     <chr>      <chr> <int>
#1 Cisco     nothing    a       100
#2 bad_cs    cisCo      s       101
#3 Ibm       nothing    d       102
#4 bad_ib    ibM        f       102

推荐阅读