r - R - 逗号分隔的列表之间是否至少有一个匹配项?
问题描述
这是我目前正在使用的数据:
structure(list(PortfolioID = c("z036527321", "z036527321", "z036527321",
"z036526431", "z036526421", "z036526421", "z036526301", "z036525571",
"z036525571", "z036525571", "z036525391", "z036525391", "z036525391",
"z036525391", "z036525391", "z036525391", "z036525391", "z036525341",
"z036525341", "z036524141", "z036524061", "z036523701", "z036523701",
"z036523701", "z036523701", "z036523701", "z036523701", "z036523701",
"z036523701", "z036523701", "z036522711", "z036521591", "z036521591",
"z036521541", "z036521541", "z036521431", "z036521431", "z036521431",
"z036521431", "z036521431", "z036521431", "z036521431", "z036521431",
"z036521431", "z036521431", "z036520791", "z036520791", "z036520791",
"z036520791", "z036520671", "z036520671", "z036520671", "z036520671",
"z036520671", "z036520051", "z036520041", "z036519831", "z036519831",
"z036519831", "z036519831", "z036519221", "z036519191", "z036518281",
"z036518281", "z036517381", "z036516621", "z036516571", "z036516571",
"z036516571", "z036516571", "z036516571", "z036516571", "z036516571",
"z036516571", "z036516541", "z036516541", "z036516541", "z036515311",
"z036515311", "z036515311", "z036515311", "z036515301", "z036515301",
"z036515301", "z036515301", "z036515301", "z036515301", "z036515301",
"z036515301", "z036515301", "z036515301", "z036515301", "z036515301",
"z036515301", "z036515301", "z036515301", "z036515301", "z036515301",
"z036515301", "z036515301"), `Enterprise Customer Party ID` = c("100600263, 100782628, 100753805, 100770731, 101360754, 103126734",
"100600263, 100782628, 100753805, 100770731, 101360754, 103126734",
"100600263, 100782628, 100753805, 100770731, 101360754, 103126734",
"100175650, 100434367, 100600263, 103507556, 101519578, 103134259, 103134510",
"100022778, 100570834, 100600263, 103514678, 101514778", "100022778, 100570834, 100600263, 103514678, 101514778",
"100189878, 100600263, 101199515, 100746736", "101065177, 103502728, 101392726, 101404531",
"101065177, 103502728, 101392726, 101404531", "101491312, 101404531, 103493687",
"100363017, 100379794, 100600263, 100616003, 101296443, 103133788",
"100363017, 100379794, 100600263, 100616003, 101296443", "100363017, 100379794, 100600263, 100616003, 101296443, 103130494",
"100363017, 100379794, 100600263, 100616003, 101296443, 103130494",
"100363017, 100379794, 100600263, 100616003, 101296443, 103130494",
"100379794, 100363017, 100600263, 100616003, 101296443", "100379794, 100363017, 100600263, 100616003, 101296443, 103130494",
"100175650, 100219612, 100434367, 100600263, 103510210, 103124599",
"100175650, 100219612, 100600263, 103510210, 103124599, 103124680",
"100189878, 100386081, 100556993, 100951842, 103521916", "100255427, 100599889, 100600263, 100787593, 101318358, 101466804",
"100541378, 100600263, 100917340, 100991230", "100600263, 100640734, 100917340, 100991230, 103135413, 103489659",
"100600263, 100640734, 100917340, 100991230, 103489659", "100600263, 100640734, 100917340, 100991230, 103489659",
"100541378, 100600263, 100917340, 100991230", "100600263, 100640734, 100917340, 100991230, 103135413, 103489659",
"100541378, 100600263, 100917340, 100991230", "100600263, 100640734, 100917340, 100991230, 103489659",
"100600263, 100640734, 100917340, 100991230, 103489659", "100600263, 100987563, 101475026, 101513471, 101558213",
"100180069, 100600263, 100714430, 101380893", "100180069, 100600263, 100693475, 100714430, 101380893",
"100600263, 100647941, 100714430, 101497006, 101380893, 103663428",
"100600263, 100693475, 100714430, 101497006, 101380893", "100513394, 100600263, 101250322, 100888261, 101484512",
"100513394, 100600263, 101250322, 101488513", "100513394, 100600263, 100680615, 101250322",
"100513394, 100600263, 100748241, 101250322", "100259186, 100513394, 100600263, 100647941, 101250322",
"100325289, 100513394, 100600263, 101250322", "100513394, 100600263, 101250322, 101270920",
"100358051, 100513394, 100600263, 101250322", "100513394, 100600263, 100845917, 101250322",
"100513394, 100566608, 100600263, 101250322", "100154267, 100219612, 100600263, 101213228, 103498394, 103147666",
"100154267, 100219612, 100600263, 101213228, 103498394, 103147666",
"100154267, 100219612, 100600263, 101213228, 103498394, 103147666",
"100154267, 100219612, 100600263, 101213228, 103498394, 103147666",
"100386081, 100600263, 100746736, 100856914", "100386081, 100600263, 100746736, 100856914",
"100386081, 100600263, 100746736, 100856914", "100386081, 100600263, 100746736, 100856914",
"100386081, 100600263, 100746736, 100856914", "100482176, 100600263, 100615160, 100784263, 101374469",
"100600263, 101360754, 103124701, 101436015", "100600263, 100655349, 100684621, 100823164, 101254987, 100770731, 100896421",
"100600263, 100655349, 100684621, 100823164, 101254987, 100896421",
"100600263, 100655349, 100684621, 100823164, 101254987, 100770731, 100896421",
"100600263, 100684621, 100655349, 100823164, 101254987, 100770731, 100896421",
"100513394, 100569469, 100600263, 100693475, 101344508", "100513394, 100600263, 101250322, 103124606, 103143455",
"100178006, 103513498, 101404531", "100178006, 103513498, 101404531",
"100532433, 100600263, 100685774, 100787593, 101344508, 103524479",
"100110952, 100600263, 100879526, 103147348, 103147682, 103147890",
"100086400, 100600263, 100991230, 103524505, 103125230, 103135888",
"100086400, 100600263, 100991230, 103524505, 103125230, 103135888",
"100086400, 100600263, 100991230, 103524505, 103125230, 103135888",
"100086400, 100600263, 100991230, 103524505, 103125230, 103135888",
"100086400, 100600263, 100991230, 103524505, 103125230, 103135888",
"100086400, 100600263, 100991230, 103524505, 103125230, 103135888",
"100086400, 100600263, 100991230, 103524505, 103125230, 103135888",
"100086400, 100600263, 100991230, 103524505, 103125230, 103135888",
"100566667, 100600263, 100603186, 103130050", "100566667, 100600263, 100603186, 103531703, 103128007",
"100566667, 100600263, 100603186, 103531703, 103128007", "100505868, 100600263, 100879526, 101436015, 103139491, 103482495",
"100505868, 100600263, 100879526, 103139491, 101436015, 103482495",
"100505868, 100600263, 100879526, 101436015, 103139491, 103482495",
"100505868, 100600263, 100879526, 101436015, 103139491, 103482495",
"100386081, 100600263, 100746736, 103145053", "100386081, 100600263, 101050657, 100746736",
"100386081, 100600263, 101050657, 100746736", "100386081, 100600263, 100746736, 101050657",
"100386081, 100600263, 100746736, 103145053", "100386081, 100600263, 100746736, 101381649",
"100386081, 100600263, 101050657, 100746736", "100386081, 100600263, 100746736, 103145053",
"100386081, 100600263, 101050657, 100746736", "100386081, 100600263, 100746736, 103145053",
"100019889, 100386081, 100587086, 100600263, 100746736, 103516226, 103629951, 101540702, 103143165",
"100386081, 100600263, 100746736, 101381649", "100386081, 100600263, 100746736, 101381649",
"100386081, 100587086, 100600263, 100746736, 101540702, 103147108",
"100386081, 100600263, 100746736, 101381649", "100019889, 100386081, 100587086, 100600263, 100746736, 103516226, 103629951, 101540702, 103143165",
"100386081, 100600263, 100746736, 101381649", "100019889, 100386081, 100587086, 100600263, 100746736, 103516226, 101540702, 103629951, 103143165",
"100189878, 100386081, 100951842, 101381649")), row.names = c(NA,
-100L), class = c("tbl_df", "tbl", "data.frame"))
我需要知道是否有办法找出在所有相同的投资组合 ID 中 B 列中是否至少有一个匹配值。例如,对于 PortfolioID z036527321,我们可以看到列表中肯定有一个匹配项(100600263 和许多其他匹配项)。将有一些 PortfolioID,它们之间没有一个匹配项。我所需要的只是某种指标。如果所有投资组合都有一个匹配值,则为 1,否则为 0。我真的很感谢提前的帮助。谢谢!
解决方案
按'PortfolioID'分组后,拆分Enterprise Customer Party ID
列,使用 获取intersect
元素list
,reduce
检查其length
并转换为逻辑is_greater_than
library(dplyr)
library(purrr)
library(magrittr)
df1 %>%
group_by(PortfolioID) %>%
mutate(new = reduce(strsplit(`Enterprise Customer Party ID`,
",\\s+"), intersect) %>%
length %>%
is_greater_than(0)) %>%
ungroup
-输出
# A tibble: 100 x 3
PortfolioID `Enterprise Customer Party ID` new
<chr> <chr> <lgl>
1 z036527321 100600263, 100782628, 100753805, 100770731, 101360754, 103126734 TRUE
2 z036527321 100600263, 100782628, 100753805, 100770731, 101360754, 103126734 TRUE
3 z036527321 100600263, 100782628, 100753805, 100770731, 101360754, 103126734 TRUE
4 z036526431 100175650, 100434367, 100600263, 103507556, 101519578, 103134259, 103134510 TRUE
5 z036526421 100022778, 100570834, 100600263, 103514678, 101514778 TRUE
6 z036526421 100022778, 100570834, 100600263, 103514678, 101514778 TRUE
7 z036526301 100189878, 100600263, 101199515, 100746736 TRUE
8 z036525571 101065177, 103502728, 101392726, 101404531 TRUE
9 z036525571 101065177, 103502728, 101392726, 101404531 TRUE
10 z036525571 101491312, 101404531, 103493687 TRUE
# … with 90 more rows