r - 从多列中选择组中的值
问题描述
我的数据具有以下结构。对于每个国家和 id,从货件列(china 和 hk)中,如果一个组同时具有 china 和 hk,如果一个组有 china,则新列货件应该有 HK,如果一个组有,则新列货件应该有 China HK , 新列发货应该有 HK 如果 NA 应该是 NA
M、Q 和 Y 列类似。根据哪一列具有给定组的值,它应该在单列期间。
我在下面包含了我的尝试。但想知道是否有更好的方法来做到这一点。而不是真或假,我如何包含shipment_hk
or shipment_china
。这样它对用户来说是可读的。此外,在 ifelse 条件下,季度值似乎不起作用。
library(dplyr)
sample_df %>% tibble::as.tibble() %>%
dplyr::group_by(country,id) %>%
dplyr::mutate(periods = ifelse(Monthly == "Monthly", "monthly", ifelse(Quarterly == "Quarterly", "quarterly", ifelse(Yearly == "Yearly", "yearly", "")))) %>%
dplyr::mutate(shipment = any(shipment_hk %in% "send to hk")) %>%
dplyr::select(country,id, type,periods,shipment)
#> # A tibble: 20 x 5
#> # Groups: country, id [7]
#> country id type periods shipment
#> <chr> <chr> <chr> <chr> <lgl>
#> 1 group_1 2.1 "" <NA> FALSE
#> 2 group_1 2.1 "bar" monthly FALSE
#> 3 group_1 2.1 "chocolate" monthly FALSE
#> 4 group_1 2.17 "" <NA> FALSE
#> 5 group_1 2.17 "bar" monthly FALSE
#> 6 group_1 2.17 "chocolate" monthly FALSE
#> 7 group_1 2.2 "" <NA> TRUE
#> 8 group_1 2.2 "" <NA> TRUE
#> 9 group_1 2.2 "bar" <NA> TRUE
#> 10 group_1 2.2 "chocolate" <NA> TRUE
#> 11 group_2 1 "" <NA> TRUE
#> 12 group_2 1 "" <NA> TRUE
#> 13 group_2 1 "bar" monthly TRUE
#> 14 group_2 2.1 "" <NA> FALSE
#> 15 group_2 2.1 "bar" monthly FALSE
#> 16 group_2 2.12.1 "" <NA> TRUE
#> 17 group_2 2.12.1 "" <NA> TRUE
#> 18 group_2 2.12.1 "donut" <NA> TRUE
#> 19 group_2 2.12.2 "" <NA> FALSE
#> 20 group_2 2.12.2 "bar" <NA> FALSE
由reprex 包于 2020-11-03 创建(v0.3.0)
dput(sample_df)
structure(list(country = c("group_1", "group_1", "group_1", "group_1",
"group_1", "group_1", "group_1", "group_1", "group_1", "group_1",
"group_2", "group_2", "group_2", "group_2", "group_2", "group_2",
"group_2", "group_2", "group_2", "group_2", "group_3", "group_3",
"group_3", "group_3", "group_3", "group_3"), id = c("2.1", "2.1",
"2.1", "2.17", "2.17", "2.17", "2.2", "2.2", "2.2", "2.2", "1",
"1", "1", "2.1", "2.1", "2.12.1", "2.12.1", "2.12.1", "2.12.2",
"2.12.2", "2.17", "2.17", "2.17", "2.18", "2.18", "2.18"), type = c("",
"bar", "chocolate", "", "bar", "chocolate", "", "", "bar", "chocolate",
"", "", "bar", "", "bar", "", "", "donut", "", "bar", "tiles",
"tiles", "tiles", "tiles", "tiles", "tiles"), shipment_china = c("send to china",
NA, NA, "send to china", NA, NA, "send to china", NA, NA, NA,
"send to china", NA, NA, "send to china", NA, "send to china",
NA, NA, "send to china", NA, NA, NA, NA, NA, NA, NA), shipment_hk = c(NA,
NA, NA, NA, NA, NA, NA, "send to hk", NA, NA, NA, "send to hk",
NA, NA, NA, NA, "send to hk", NA, NA, NA, NA, NA, NA, NA, NA,
NA), Monthly = c(NA, "Monthly", "Monthly", NA, "Monthly", "Monthly",
NA, NA, NA, NA, NA, NA, "Monthly", NA, "Monthly", NA, NA, NA,
NA, NA, NA, "Monthly", NA, NA, NA, NA), Quarterly = c(NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Quarterly",
NA, "Quarterly", NA, NA, NA, "Quarterly", NA, NA), Yearly = c(NA,
NA, NA, NA, NA, NA, NA, NA, "Yearly", "Yearly", NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), class = "data.frame", row.names = c(NA,
-26L))
解决方案
一种使用coalesce
和使用case_when
而不是嵌套的方法ifelse
:
library(dplyr)
sample_df %>%
group_by(country, id) %>%
mutate(periods = coalesce(Monthly, Quarterly, Yearly),
shipment = case_when(
any(shipment_hk == "send to hk") ~ "send to hk",
any(shipment_china == "send to china") ~ "send to china",
TRUE ~ NA_character_
))
shipment_hk
这将在shipment_china
您的新shipment
列中优先考虑。
推荐阅读
- python - 划分列表元素并将它们附加到新列表
- appium - WebDriverAgent 文件夹缺少 appium 1.15.1
- amazon-web-services - AWS Lambda 函数:它不显示现有角色的列表
- mysql - 查看 Libreoffice Base 对 mysql 的查询
- python - 如何使用 Django Filters 过滤同一字段的多个值?
- python - 从 Python 中的父文件夹导入模块
- c# - 无法使用 SharePoint 客户端 API 列出文件/文件夹
- sql - 如何使用 LINQ 使用一个表获取另一个表的值
- python - 使用 selenium 捕获网络请求
- authentication - 在 cakephp 3 中对多个模型使用身份验证