首页 > 解决方案 > 从多列中选择组中的值

问题描述

我的数据具有以下结构。对于每个国家和 id,从货件列(china 和 hk)中,如果一个组同时具有 china 和 hk,如果一个组有 china,则新列货件应该有 HK,如果一个组有,则新列货件应该有 China HK , 新列发货应该有 HK 如果 NA 应该是 NA

M、Q 和 Y 列类似。根据哪一列具有给定组的值,它应该在单列期间。

我在下面包含了我的尝试。但想知道是否有更好的方法来做到这一点。而不是真或假,我如何包含shipment_hkor shipment_china。这样它对用户来说是可读的。此外,在 ifelse 条件下,季度值似乎不起作用。

library(dplyr)
sample_df %>% tibble::as.tibble() %>% 
dplyr::group_by(country,id) %>% 
dplyr::mutate(periods = ifelse(Monthly == "Monthly", "monthly", ifelse(Quarterly == "Quarterly", "quarterly", ifelse(Yearly == "Yearly", "yearly", "")))) %>%
dplyr::mutate(shipment = any(shipment_hk %in% "send to hk")) %>% 
dplyr::select(country,id, type,periods,shipment)
#> # A tibble: 20 x 5
#> # Groups:   country, id [7]
#>    country id     type        periods shipment
#>    <chr>   <chr>  <chr>       <chr>   <lgl>   
#>  1 group_1 2.1    ""          <NA>    FALSE   
#>  2 group_1 2.1    "bar"       monthly FALSE   
#>  3 group_1 2.1    "chocolate" monthly FALSE   
#>  4 group_1 2.17   ""          <NA>    FALSE   
#>  5 group_1 2.17   "bar"       monthly FALSE   
#>  6 group_1 2.17   "chocolate" monthly FALSE   
#>  7 group_1 2.2    ""          <NA>    TRUE    
#>  8 group_1 2.2    ""          <NA>    TRUE    
#>  9 group_1 2.2    "bar"       <NA>    TRUE    
#> 10 group_1 2.2    "chocolate" <NA>    TRUE    
#> 11 group_2 1      ""          <NA>    TRUE    
#> 12 group_2 1      ""          <NA>    TRUE    
#> 13 group_2 1      "bar"       monthly TRUE    
#> 14 group_2 2.1    ""          <NA>    FALSE   
#> 15 group_2 2.1    "bar"       monthly FALSE   
#> 16 group_2 2.12.1 ""          <NA>    TRUE    
#> 17 group_2 2.12.1 ""          <NA>    TRUE    
#> 18 group_2 2.12.1 "donut"     <NA>    TRUE    
#> 19 group_2 2.12.2 ""          <NA>    FALSE   
#> 20 group_2 2.12.2 "bar"       <NA>    FALSE

reprex 包于 2020-11-03 创建(v0.3.0)


dput(sample_df)
structure(list(country = c("group_1", "group_1", "group_1", "group_1", 
"group_1", "group_1", "group_1", "group_1", "group_1", "group_1", 
"group_2", "group_2", "group_2", "group_2", "group_2", "group_2", 
"group_2", "group_2", "group_2", "group_2", "group_3", "group_3", 
"group_3", "group_3", "group_3", "group_3"), id = c("2.1", "2.1", 
"2.1", "2.17", "2.17", "2.17", "2.2", "2.2", "2.2", "2.2", "1", 
"1", "1", "2.1", "2.1", "2.12.1", "2.12.1", "2.12.1", "2.12.2", 
"2.12.2", "2.17", "2.17", "2.17", "2.18", "2.18", "2.18"), type = c("", 
"bar", "chocolate", "", "bar", "chocolate", "", "", "bar", "chocolate", 
"", "", "bar", "", "bar", "", "", "donut", "", "bar", "tiles", 
"tiles", "tiles", "tiles", "tiles", "tiles"), shipment_china = c("send to china", 
NA, NA, "send to china", NA, NA, "send to china", NA, NA, NA, 
"send to china", NA, NA, "send to china", NA, "send to china", 
NA, NA, "send to china", NA, NA, NA, NA, NA, NA, NA), shipment_hk = c(NA, 
NA, NA, NA, NA, NA, NA, "send to hk", NA, NA, NA, "send to hk", 
NA, NA, NA, NA, "send to hk", NA, NA, NA, NA, NA, NA, NA, NA, 
NA), Monthly = c(NA, "Monthly", "Monthly", NA, "Monthly", "Monthly", 
NA, NA, NA, NA, NA, NA, "Monthly", NA, "Monthly", NA, NA, NA, 
NA, NA, NA, "Monthly", NA, NA, NA, NA), Quarterly = c(NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Quarterly", 
NA, "Quarterly", NA, NA, NA, "Quarterly", NA, NA), Yearly = c(NA, 
NA, NA, NA, NA, NA, NA, NA, "Yearly", "Yearly", NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), class = "data.frame", row.names = c(NA, 
-26L))

标签: rdplyrdata.table

解决方案


一种使用coalesce和使用case_when而不是嵌套的方法ifelse

library(dplyr)

sample_df %>%
  group_by(country, id) %>%
  mutate(periods = coalesce(Monthly, Quarterly, Yearly),
         shipment = case_when(
           any(shipment_hk == "send to hk") ~ "send to hk",
           any(shipment_china == "send to china") ~ "send to china",
           TRUE ~ NA_character_
         ))

shipment_hk这将在shipment_china您的新shipment列中优先考虑。


推荐阅读