首页 > 解决方案 > 强大的 R 函数

问题描述

基于下面定义的手臂,我在dplyr::mutate函数下编写了这些语句来定义事件。我想知道您是否可以帮助我编写一个可以减少代码行的函数,因为有些代码是重复的?谢谢

Arm- 1 (September 1, 2017- August 31, 2018)
 arm_1_event_1  <- c(as.Date("2017-09-01"), as.Date("2017-11-30"))
 arm_1_event_2  <- c(as.Date("2017-12-01"), as.Date("2018-02-28"))
 arm_1_event_3  <- c(as.Date("2018-03-01"), as.Date("2018-05-31"))
 arm_1_event_4  <- c(as.Date("2018-06-01"), as.Date("2018-08-31"))
Arm- 2 (September 1, 2018- August 31, 2019)
  arm_2_event_1  <- c(as.Date("2018-09-01"), as.Date("2018-11-30"))
  arm_2_event_2  <- c(as.Date("2018-12-01"), as.Date("2019-02-28"))
  arm_2_event_3  <- c(as.Date("2019-03-01"), as.Date("2019-05-31"))
  arm_2_event_4  <- c(as.Date("2019-06-01"), as.Date("2019-08-31"))
Arm- 3 (September 1, 2019- August 31, 2020)
  arm_3_event_1  <- c(as.Date("2019-09-01"), as.Date("2019-11-30"))
  arm_3_event_2  <- c(as.Date("2019-12-01"), as.Date("2020-02-29"))
  arm_3_event_3  <- c(as.Date("2020-03-01"), as.Date("2020-05-31"))
  arm_3_event_4  <- c(as.Date("2020-06-01"), as.Date("2020-08-31"))
dplyr::mutate(
 arms_and_events   =         NA_character_,
 arms_and_events   =         dplyr::case_when(
                                                                                    ((date_application_received >= arm_1_event_1[1] & 
date_application_received <= arm_1_event_1[2])  ~ "arm_1_event_1"),
                                                                                    ((date_application_received >= arm_1_event_2[1] & 
date_application_received <= arm_1_event_2[2])  ~ "arm_1_event_2"),
                                                                                    ((date_application_received >= arm_1_event_3[1] & 
date_application_received <= arm_1_event_3[2])  ~ "arm_1_event_3"),
                                                                                    ((date_application_received >= arm_1_event_4[1] & 
date_application_received <= arm_1_event_4[2])  ~ "arm_1_event_4"),
                                                                                    ((date_application_received >= arm_2_event_1[1] & 
date_application_received <= arm_2_event_1[2])  ~ "arm_2_event_1"),
                                                                                    ((date_application_received >= arm_2_event_2[1] &  
date_application_received <= arm_2_event_2[2])  ~ "arm_2_event_2"),
                                                                                    ((date_application_received >= arm_2_event_3[1] &  
date_application_received <= arm_2_event_3[2])  ~ "arm_2_event_3"),
                                                                                    ((date_application_received >= arm_2_event_4[1] 
& date_application_received <= arm_2_event_4[2]) ~ "arm_2_event_4"),
                                                                                    ((date_application_received >= arm_3_event_1[1] & 
date_application_received <= arm_3_event_1[2])   ~ "arm_3_event_1"),
                                                                                    ((date_application_received >= arm_3_event_2[1] & 
date_application_received <= arm_3_event_2[2])   ~ "arm_3_event_2"),
                                                                                    ((date_application_received >= arm_3_event_3[1] & 
date_application_received <= arm_3_event_3[2])   ~ "arm_3_event_3"),
                                                                                    ((date_application_received >= arm_3_event_4[1] & 
date_application_received <= arm_3_event_4[2])   ~ "arm_3_event_4"),
                                                                                    
((date_voucher_issued       >= arm_1_event_1[1] & 
date_voucher_issued       <= arm_1_event_1[2])   ~ "arm_1_event_1"),

((date_voucher_issued       >= arm_1_event_2[1] & 
date_voucher_issued       <= arm_1_event_2[2]) ~ "arm_1_event_2"),

((date_voucher_issued       >= arm_1_event_3[1] & 
date_voucher_issued       <= arm_1_event_3[2]) ~ "arm_1_event_3"),

((date_voucher_issued       >= arm_1_event_4[1] & 
date_voucher_issued       <= arm_1_event_4[2]) ~ "arm_1_event_4"),
                                                                                    
                                                                                    
((date_voucher_issued       >= arm_2_event_1[1] & 
date_voucher_issued       <= arm_2_event_1[2]) ~ "arm_2_event_1"),
                                                                                    
((date_voucher_issued       >= arm_2_event_2[1] & 
date_voucher_issued       <= arm_2_event_2[2]) ~ "arm_2_event_2"),

((date_voucher_issued       >= arm_2_event_3[1] & 
date_voucher_issued       <= arm_2_event_3[2]) ~ "arm_2_event_3"),
                                                                                    
((date_voucher_issued       >= arm_2_event_4[1] & 
date_voucher_issued       <= arm_2_event_4[2]) ~ "arm_2_event_4"),
                                                                                    
                                                                                    
((date_voucher_issued       >= arm_3_event_1[1] & 
date_voucher_issued       <= arm_3_event_1[2]) ~ "arm_3_event_1"),
                                                                                    
((date_voucher_issued       >= arm_3_event_2[1] & 
date_voucher_issued       <= arm_3_event_2[2]) ~ "arm_3_event_2"),

((date_voucher_issued       >= arm_3_event_3[1] & 
date_voucher_issued       <= arm_3_event_3[2]) ~ "arm_3_event_3"),
                                                                                    
((date_voucher_issued       >= arm_3_event_4[1] & 
date_voucher_issued       <= arm_3_event_4[2]) ~ "arm_3_event_4")
              
       )

标签: rfunctiondatedplyr

解决方案


您的整个代码(包括第一个块中的分配)可以简化为一个简单的函数:

arm_and_event <- function(d)
{
  df <- data.frame(arm   = rep(1:3, each = 4), 
                   event = rep(1:4, 3),
                   start = as.Date(c("2017-09-01", "2017-12-01", "2018-03-01", 
                                     "2018-06-01", "2018-09-01", "2018-12-01", 
                                     "2019-03-01", "2019-06-01", "2019-09-01", 
                                     "2019-12-01", "2020-03-01", "2020-06-01")),
                   stop  = as.Date(c("2017-11-30", "2018-02-28", "2018-05-31", 
                                     "2018-08-31", "2018-11-30", "2019-02-28", 
                                     "2019-05-31", "2019-08-31", "2019-11-30", 
                                     "2020-02-29", "2020-05-31", "2020-08-31")))
  
  matches <- sapply(d, function(x) which(x >= df$start & x <= df$stop))  
  paste0("arm", df$arm[matches], "_event", df$event[matches])
}

因此,例如,如果我们有一个这样的日期向量:

date_application_received <- as.Date(c("2017-09-30", "2019-01-05"))

然后我们可以这样做:

arm_and_event(date_application_received)
#> [1] "arm1_event1" "arm2_event2"

额外的好处是我们的全局工作空间没有被大量变量污染,并且代码更易于维护。


推荐阅读