首页 > 解决方案 > R - 逗号分隔的列表之间是否至少有一个匹配项?

问题描述

这是我目前正在使用的数据:

    structure(list(PortfolioID = c("z036527321", "z036527321", "z036527321", 
"z036526431", "z036526421", "z036526421", "z036526301", "z036525571", 
"z036525571", "z036525571", "z036525391", "z036525391", "z036525391", 
"z036525391", "z036525391", "z036525391", "z036525391", "z036525341", 
"z036525341", "z036524141", "z036524061", "z036523701", "z036523701", 
"z036523701", "z036523701", "z036523701", "z036523701", "z036523701", 
"z036523701", "z036523701", "z036522711", "z036521591", "z036521591", 
"z036521541", "z036521541", "z036521431", "z036521431", "z036521431", 
"z036521431", "z036521431", "z036521431", "z036521431", "z036521431", 
"z036521431", "z036521431", "z036520791", "z036520791", "z036520791", 
"z036520791", "z036520671", "z036520671", "z036520671", "z036520671", 
"z036520671", "z036520051", "z036520041", "z036519831", "z036519831", 
"z036519831", "z036519831", "z036519221", "z036519191", "z036518281", 
"z036518281", "z036517381", "z036516621", "z036516571", "z036516571", 
"z036516571", "z036516571", "z036516571", "z036516571", "z036516571", 
"z036516571", "z036516541", "z036516541", "z036516541", "z036515311", 
"z036515311", "z036515311", "z036515311", "z036515301", "z036515301", 
"z036515301", "z036515301", "z036515301", "z036515301", "z036515301", 
"z036515301", "z036515301", "z036515301", "z036515301", "z036515301", 
"z036515301", "z036515301", "z036515301", "z036515301", "z036515301", 
"z036515301", "z036515301"), `Enterprise Customer Party ID` = c("100600263, 100782628, 100753805, 100770731, 101360754, 103126734", 
"100600263, 100782628, 100753805, 100770731, 101360754, 103126734", 
"100600263, 100782628, 100753805, 100770731, 101360754, 103126734", 
"100175650, 100434367, 100600263, 103507556, 101519578, 103134259, 103134510", 
"100022778, 100570834, 100600263, 103514678, 101514778", "100022778, 100570834, 100600263, 103514678, 101514778", 
"100189878, 100600263, 101199515, 100746736", "101065177, 103502728, 101392726, 101404531", 
"101065177, 103502728, 101392726, 101404531", "101491312, 101404531, 103493687", 
"100363017, 100379794, 100600263, 100616003, 101296443, 103133788", 
"100363017, 100379794, 100600263, 100616003, 101296443", "100363017, 100379794, 100600263, 100616003, 101296443, 103130494", 
"100363017, 100379794, 100600263, 100616003, 101296443, 103130494", 
"100363017, 100379794, 100600263, 100616003, 101296443, 103130494", 
"100379794, 100363017, 100600263, 100616003, 101296443", "100379794, 100363017, 100600263, 100616003, 101296443, 103130494", 
"100175650, 100219612, 100434367, 100600263, 103510210, 103124599", 
"100175650, 100219612, 100600263, 103510210, 103124599, 103124680", 
"100189878, 100386081, 100556993, 100951842, 103521916", "100255427, 100599889, 100600263, 100787593, 101318358, 101466804", 
"100541378, 100600263, 100917340, 100991230", "100600263, 100640734, 100917340, 100991230, 103135413, 103489659", 
"100600263, 100640734, 100917340, 100991230, 103489659", "100600263, 100640734, 100917340, 100991230, 103489659", 
"100541378, 100600263, 100917340, 100991230", "100600263, 100640734, 100917340, 100991230, 103135413, 103489659", 
"100541378, 100600263, 100917340, 100991230", "100600263, 100640734, 100917340, 100991230, 103489659", 
"100600263, 100640734, 100917340, 100991230, 103489659", "100600263, 100987563, 101475026, 101513471, 101558213", 
"100180069, 100600263, 100714430, 101380893", "100180069, 100600263, 100693475, 100714430, 101380893", 
"100600263, 100647941, 100714430, 101497006, 101380893, 103663428", 
"100600263, 100693475, 100714430, 101497006, 101380893", "100513394, 100600263, 101250322, 100888261, 101484512", 
"100513394, 100600263, 101250322, 101488513", "100513394, 100600263, 100680615, 101250322", 
"100513394, 100600263, 100748241, 101250322", "100259186, 100513394, 100600263, 100647941, 101250322", 
"100325289, 100513394, 100600263, 101250322", "100513394, 100600263, 101250322, 101270920", 
"100358051, 100513394, 100600263, 101250322", "100513394, 100600263, 100845917, 101250322", 
"100513394, 100566608, 100600263, 101250322", "100154267, 100219612, 100600263, 101213228, 103498394, 103147666", 
"100154267, 100219612, 100600263, 101213228, 103498394, 103147666", 
"100154267, 100219612, 100600263, 101213228, 103498394, 103147666", 
"100154267, 100219612, 100600263, 101213228, 103498394, 103147666", 
"100386081, 100600263, 100746736, 100856914", "100386081, 100600263, 100746736, 100856914", 
"100386081, 100600263, 100746736, 100856914", "100386081, 100600263, 100746736, 100856914", 
"100386081, 100600263, 100746736, 100856914", "100482176, 100600263, 100615160, 100784263, 101374469", 
"100600263, 101360754, 103124701, 101436015", "100600263, 100655349, 100684621, 100823164, 101254987, 100770731, 100896421", 
"100600263, 100655349, 100684621, 100823164, 101254987, 100896421", 
"100600263, 100655349, 100684621, 100823164, 101254987, 100770731, 100896421", 
"100600263, 100684621, 100655349, 100823164, 101254987, 100770731, 100896421", 
"100513394, 100569469, 100600263, 100693475, 101344508", "100513394, 100600263, 101250322, 103124606, 103143455", 
"100178006, 103513498, 101404531", "100178006, 103513498, 101404531", 
"100532433, 100600263, 100685774, 100787593, 101344508, 103524479", 
"100110952, 100600263, 100879526, 103147348, 103147682, 103147890", 
"100086400, 100600263, 100991230, 103524505, 103125230, 103135888", 
"100086400, 100600263, 100991230, 103524505, 103125230, 103135888", 
"100086400, 100600263, 100991230, 103524505, 103125230, 103135888", 
"100086400, 100600263, 100991230, 103524505, 103125230, 103135888", 
"100086400, 100600263, 100991230, 103524505, 103125230, 103135888", 
"100086400, 100600263, 100991230, 103524505, 103125230, 103135888", 
"100086400, 100600263, 100991230, 103524505, 103125230, 103135888", 
"100086400, 100600263, 100991230, 103524505, 103125230, 103135888", 
"100566667, 100600263, 100603186, 103130050", "100566667, 100600263, 100603186, 103531703, 103128007", 
"100566667, 100600263, 100603186, 103531703, 103128007", "100505868, 100600263, 100879526, 101436015, 103139491, 103482495", 
"100505868, 100600263, 100879526, 103139491, 101436015, 103482495", 
"100505868, 100600263, 100879526, 101436015, 103139491, 103482495", 
"100505868, 100600263, 100879526, 101436015, 103139491, 103482495", 
"100386081, 100600263, 100746736, 103145053", "100386081, 100600263, 101050657, 100746736", 
"100386081, 100600263, 101050657, 100746736", "100386081, 100600263, 100746736, 101050657", 
"100386081, 100600263, 100746736, 103145053", "100386081, 100600263, 100746736, 101381649", 
"100386081, 100600263, 101050657, 100746736", "100386081, 100600263, 100746736, 103145053", 
"100386081, 100600263, 101050657, 100746736", "100386081, 100600263, 100746736, 103145053", 
"100019889, 100386081, 100587086, 100600263, 100746736, 103516226, 103629951, 101540702, 103143165", 
"100386081, 100600263, 100746736, 101381649", "100386081, 100600263, 100746736, 101381649", 
"100386081, 100587086, 100600263, 100746736, 101540702, 103147108", 
"100386081, 100600263, 100746736, 101381649", "100019889, 100386081, 100587086, 100600263, 100746736, 103516226, 103629951, 101540702, 103143165", 
"100386081, 100600263, 100746736, 101381649", "100019889, 100386081, 100587086, 100600263, 100746736, 103516226, 101540702, 103629951, 103143165", 
"100189878, 100386081, 100951842, 101381649")), row.names = c(NA, 
-100L), class = c("tbl_df", "tbl", "data.frame"))

我需要知道是否有办法找出在所有相同的投资组合 ID 中 B 列中是否至少有一个匹配值。例如,对于 PortfolioID z036527321,我们可以看到列表中肯定有一个匹配项(100600263 和许多其他匹配项)。将有一些 PortfolioID,它们之间没有一个匹配项。我所需要的只是某种指标。如果所有投资组合都有一个匹配值,则为 1,否则为 0。我真的很感谢提前的帮助。谢谢!

标签: r

解决方案


按'PortfolioID'分组后,拆分Enterprise Customer Party ID列,使用 获取intersect元素listreduce检查其length并转换为逻辑is_greater_than

library(dplyr)
library(purrr)
library(magrittr)
df1 %>% 
    group_by(PortfolioID) %>% 
    mutate(new = reduce(strsplit(`Enterprise Customer Party ID`, 
           ",\\s+"), intersect) %>%
        length %>%
        is_greater_than(0)) %>%
    ungroup

-输出

# A tibble: 100 x 3
   PortfolioID `Enterprise Customer Party ID`                                              new  
   <chr>       <chr>                                                                       <lgl>
 1 z036527321  100600263, 100782628, 100753805, 100770731, 101360754, 103126734            TRUE 
 2 z036527321  100600263, 100782628, 100753805, 100770731, 101360754, 103126734            TRUE 
 3 z036527321  100600263, 100782628, 100753805, 100770731, 101360754, 103126734            TRUE 
 4 z036526431  100175650, 100434367, 100600263, 103507556, 101519578, 103134259, 103134510 TRUE 
 5 z036526421  100022778, 100570834, 100600263, 103514678, 101514778                       TRUE 
 6 z036526421  100022778, 100570834, 100600263, 103514678, 101514778                       TRUE 
 7 z036526301  100189878, 100600263, 101199515, 100746736                                  TRUE 
 8 z036525571  101065177, 103502728, 101392726, 101404531                                  TRUE 
 9 z036525571  101065177, 103502728, 101392726, 101404531                                  TRUE 
10 z036525571  101491312, 101404531, 103493687                                             TRUE 
# … with 90 more rows

推荐阅读