r - 如何创建在某些条件下计算另一列的列?R
问题描述
下面,数据已经被重塑,并列出了输入和预期输出。
数据
structure(list(record_id = c(110101, 110101, 110101, 110101,
110101, 110101, 110101, 110101, 110101, 110101, 110101, 110101,
110101, 110101, 110101, 110101, 110101, 110101, 110101, 110101,
110101, 110101, 110101, 110101, 110101, 110101, 110101, 110101,
110101, 110101, 110101, 110101, 110101, 110101, 110101, 110101,
110101, 110101, 110101, 110101, 110101, 110101, 110101, 110101,
110101, 110101, 110101, 110101, 110101, 110101, 110101, 110101,
110101, 110101, 110101, 110101, 110101, 110101, 110101, 110101
), start = c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59), stop = c(1,
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
52, 53, 54, 55, 56, 57, 58, 59, 60), `treatment (type)` = c(1,
1, 1, 0, 0, 0, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 3, 3, 0, 3, 3, 3,
0, 2, 2, 2, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), n_interruption_periods = c(0,
0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3,
4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), n_interruption_periods_3days = c(0,
0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3), n_interruption_days_3days = c(0,
0, 0, 0, 0, 1, 2, 2, 2, 2, 2, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7)), row.names = c(NA,
-60L), class = c("tbl_df", "tbl", "data.frame"))
解释
输入
start
和stop
是天数。每日治疗列于 中treatment
,0 = 不治疗,为中断治疗,1:3 为治疗 A/B/C。
输出
基于该treatment
列,我想每天计算:
n_interruption_periods
: 中断周期的总和/数量,与中断持续时间无关n_interruption_periods_3days
:总和/中断次数,条件是您只应在持续时间 >= 3 天时计算。不关注少于 3 天的中断n_interruption_days_3days
: 累计总和/中断天数,中断仅从中断的第 3 天开始计算。
问题
我想创建一个脚本,根据变量自动计算上述输出treatment
变量。
希望你能帮忙
体重
响应操作
以下是说明问题的部分数据:
structure(list(record_id = c(110001, 110002, 110002, 110002,
110001), day_count = c(732, 0, 1, 2, 733), day_count_stop = c(733,
1, 2, 3, 734), oac_class = c(0, 1, 1, 1, 1), n_interruption_periods = c(1,
1, 0, 0, 1), n_interruption_periods_3days = c(1, 1, 0, 0, 1)), row.names = c(NA,
-5L), groups = structure(list(record_id = c(110001, 110002),
.rows = structure(list(c(1L, 5L), 2:4), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -2L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
使用建议的代码,有两个问题:
我相信结果向量没有分配到正确的位置。在这里,您可以看到 110002 的第一个数据是从 110001
n_interruption_periods
结果n_interruption_periods_3days
扩展而来的。当我尝试运行第三个向量时,我收到此错误: while (any(d != 0)) { : 需要 TRUE/FALSE 的缺失值
体重
解决方案
编辑:完全删除所有内容并重新开始。
我真的希望为你着想,有人想出一个不那么混乱的答案,但这些功能应该可以工作。
FindFirstVector = function(TreatmentVector){
#Which entries are equal to 0
id = which(TreatmentVector == 0)
#IDs of first zeros occuring (First day w.o. treatment)
id1 = id[c(0,diff(id)) != 1]
#Create vector of zeroes
temp = rep(0,length(TreatmentVector))
#Add 1 for the first zero
temp[id1] = 1
#Take cumulative sum
cumsum(temp)
}
FindSecondVector = function(TreatmentVector){
#Which entries are equal to 0
id = which(TreatmentVector == 0)
#IDs of first zeros occuring (First day w.o. treatment)
id1 = id[c(0,diff(id)) != 1]
#IDs of last zeros (Last day w.o. treatment)
id2 = id[c(diff(id),2) > 1]
#Amount of days w.o. treatment is then:
d = id2 - id1 + 1
#id3 is then the starting id of period of no treatment, if the period is longer
#than 2 days. Then 2 is added, so start counting from day 3 of the period.
id3 = id1[id2 - id1 + 1 > 2] + 2
temp = rep(0,length(TreatmentVector))
temp[id3] = 1
cumsum(temp)
}
# Building third vector ---------------------------------------------------
FindThirdVector = function(TreatmentVector){
#Which entries are equal to 0
id = which(TreatmentVector == 0)
#IDs of first zeros occuring (First day w.o. treatment)
id1 = id[c(0,diff(id)) != 1]
#IDs of last zeros (Last day w.o. treatment)
id2 = id[c(diff(id),2) > 1]
#Amount of days w.o. treatment is then:
d = id2 - id1 + 1
#id3 is then the starting id of period of no treatment, if the period is longer
#than 2 days. Then 2 is added, so start counting from day 3 of the period.
id3 = id1[id2 - id1 + 1 > 2] + 2
#The id of the ending day of period w.o. treatment longer than 2 days.
id4 = id2[id2 - id1 + 1 > 2]
#d is the amount of days to add 1's
d = id4-id3
temp = rep(0,length(TreatmentVector))
while(any(d!=0)){
temp[id3 + d] = 1
d = d - 1
d[d<0] = 0
}
temp[id3 + d] = 1
cumsum(temp)
}
然后你应该通过运行找到你的三个向量:
FindFirstVector(dat$`treatment (type)`)
FindSecondVector(dat$`treatment (type)`)
FindThirdVector(dat$`treatment (type)`)
推荐阅读
- sql - 选择邮政编码并放入新列
- selenium-chromedriver - 如何在 beforeSession webdriverIO+Cucumber 中从 chromeOptions 设置参数
- android - 为什么 Kendo Dropdownlist 不适用于 chrome for android?
- c# - DataGrid DateTime IValueConverter,仅编辑时间,保留日期
- python - python中的多个循环
- php - 什么是在 PHP7 中使用匿名类的好例子
- mysql - 如果不满足条件,我如何在选定范围内包含上一行
- javascript - Vue搜索过滤器复选框问题
- javascript - 将数据 (d3) 投影到传单地图上的问题
- javascript - 如何获得类型化反应组件属性的代码完成?