首页 > 解决方案 > 如何创建在某些条件下计算另一列的列?R

问题描述

下面,数据已经被重塑,并列出了输入和预期输出。

数据

structure(list(record_id = c(110101, 110101, 110101, 110101, 
110101, 110101, 110101, 110101, 110101, 110101, 110101, 110101, 
110101, 110101, 110101, 110101, 110101, 110101, 110101, 110101, 
110101, 110101, 110101, 110101, 110101, 110101, 110101, 110101, 
110101, 110101, 110101, 110101, 110101, 110101, 110101, 110101, 
110101, 110101, 110101, 110101, 110101, 110101, 110101, 110101, 
110101, 110101, 110101, 110101, 110101, 110101, 110101, 110101, 
110101, 110101, 110101, 110101, 110101, 110101, 110101, 110101
), start = c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 
31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 
47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59), stop = c(1, 
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 
52, 53, 54, 55, 56, 57, 58, 59, 60), `treatment (type)` = c(1, 
1, 1, 0, 0, 0, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 3, 3, 0, 3, 3, 3, 
0, 2, 2, 2, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), n_interruption_periods = c(0, 
0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 
4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), n_interruption_periods_3days = c(0, 
0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3), n_interruption_days_3days = c(0, 
0, 0, 0, 0, 1, 2, 2, 2, 2, 2, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 
6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7)), row.names = c(NA, 
-60L), class = c("tbl_df", "tbl", "data.frame"))

解释

输入 startstop是天数。每日治疗列于 中treatment,0 = 不治疗,为中断治疗,1:3 为治疗 A/B/C。

输出 基于该treatment列,我想每天计算:

  1. n_interruption_periods: 中断周期的总和/数量,与中断持续时间无关
  2. n_interruption_periods_3days:总和/中断次数,条件是您只应在持续时间 >= 3 天时计算。不关注少于 3 天的中断
  3. n_interruption_days_3days: 累计总和/中断天数,中断仅从中断的第 3 天开始计算。

问题 我想创建一个脚本,根据变量自动计算上述输出treatment变量。

希望你能帮忙

体重

响应操作

以下是说明问题的部分数据:

structure(list(record_id = c(110001, 110002, 110002, 110002, 
110001), day_count = c(732, 0, 1, 2, 733), day_count_stop = c(733, 
1, 2, 3, 734), oac_class = c(0, 1, 1, 1, 1), n_interruption_periods = c(1, 
1, 0, 0, 1), n_interruption_periods_3days = c(1, 1, 0, 0, 1)), row.names = c(NA, 
-5L), groups = structure(list(record_id = c(110001, 110002), 
    .rows = structure(list(c(1L, 5L), 2:4), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), row.names = c(NA, -2L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

使用建议的代码,有两个问题:

  1. 我相信结果向量没有分配到正确的位置。在这里,您可以看到 110002 的第一个数据是从 110001n_interruption_periods结果n_interruption_periods_3days扩展而来的。

  2. 当我尝试运行第三个向量时,我收到此错误: while (any(d != 0)) { : 需要 TRUE/FALSE 的缺失值

体重

标签: rdplyr

解决方案


编辑:完全删除所有内容并重新开始。

我真的希望为你着想,有人想出一个不那么混乱的答案,但这些功能应该可以工作。

FindFirstVector = function(TreatmentVector){
  #Which entries are equal to 0
  id = which(TreatmentVector == 0)
  #IDs of first zeros occuring (First day w.o. treatment)
  id1 = id[c(0,diff(id)) != 1]
  #Create vector of zeroes
  temp = rep(0,length(TreatmentVector))
  #Add 1 for the first zero
  temp[id1] = 1
  #Take cumulative sum
  cumsum(temp)
}


FindSecondVector = function(TreatmentVector){
  #Which entries are equal to 0
  id = which(TreatmentVector == 0)
  #IDs of first zeros occuring (First day w.o. treatment)
  id1 = id[c(0,diff(id)) != 1]
  #IDs of last zeros (Last day w.o. treatment)
  id2 = id[c(diff(id),2) > 1]
  #Amount of days w.o. treatment is then:
  d = id2 - id1 + 1
  #id3 is then the starting id of period of no treatment, if the period is longer
  #than 2 days. Then 2 is added, so start counting from day 3 of the period.
  id3 = id1[id2 - id1 + 1 > 2] + 2
  temp = rep(0,length(TreatmentVector))
  temp[id3] = 1
  cumsum(temp)
}


# Building third vector ---------------------------------------------------
FindThirdVector = function(TreatmentVector){
  #Which entries are equal to 0
  id = which(TreatmentVector == 0)
  #IDs of first zeros occuring (First day w.o. treatment)
  id1 = id[c(0,diff(id)) != 1]
  #IDs of last zeros (Last day w.o. treatment)
  id2 = id[c(diff(id),2) > 1]
  #Amount of days w.o. treatment is then:
  d = id2 - id1 + 1
  #id3 is then the starting id of period of no treatment, if the period is longer
  #than 2 days. Then 2 is added, so start counting from day 3 of the period.
  id3 = id1[id2 - id1 + 1 > 2] + 2
  #The id of the ending day of period w.o. treatment longer than 2 days.
  id4 = id2[id2 - id1 + 1 > 2]
  
  #d is the amount of days to add 1's
  d = id4-id3
  temp = rep(0,length(TreatmentVector))
  while(any(d!=0)){
    temp[id3 + d] = 1
    d = d - 1
    d[d<0] = 0
  }
  temp[id3 + d] = 1
  cumsum(temp)
}

然后你应该通过运行找到你的三个向量:

FindFirstVector(dat$`treatment (type)`)
FindSecondVector(dat$`treatment (type)`)
FindThirdVector(dat$`treatment (type)`)

推荐阅读