r - 如何使用 tidyverse 根据某些条件添加新列?
问题描述
我正在尝试根据受访者是否健康创建一个新列。
这是我拥有的数据类型:
test <- structure(list(`cutree(hc_diana, k = 4)` = c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), id = c("117dbbbf15", "117dbbbf15", "117dbbbf15", "117dbbbf15",
"117dbbbf15", "117dbbbf15", "117dbbbf15", "117dbbbf15", "117dbbbf15",
"3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3",
"3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "8a594e9340",
"8a594e9340"), covid_tested = c("positive", "positive", "positive",
"positive", "positive", "positive", "positive", "positive", "positive",
"positive", "positive", "positive", "positive", "positive", "positive",
"positive", "positive", "positive", "positive", "positive"),
age = c(51, 51, 51, 51, 51, 51, 51, 51, 51, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28), gender = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("Female", "Male", "Other"), class = "factor"),
number_morbidities = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 1), chills = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = c("No", "Yes"), class = "factor"), cough = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"),
diarrhoea = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No",
"Yes"), class = "factor"), fatigue = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"),
headache = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No",
"Yes"), class = "factor"), loss_smell_taste = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L), .Label = c("No", "Yes"), class = "factor"),
muscle_ache = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No",
"Yes"), class = "factor"), nasal_congestion = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"),
nausea_vomiting = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No",
"Yes"), class = "factor"), shortness_breath = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"),
sore_throat = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No",
"Yes"), class = "factor"), sputum = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = c("No", "Yes"), class = "factor"), temperature = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"),
loss_appetite = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No",
"Yes"), class = "factor"), chest_pain = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"),
itchy_eyes = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No",
"Yes"), class = "factor"), joint_pain = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"),
comorbidities = c("asthma", "diabetes_type_one", "diabetes_type_two",
"obesity", "hypertension", "heart_disease", "lung_condition",
"liver_disease", "kidney_disease", "asthma", "diabetes_type_one",
"diabetes_type_two", "obesity", "hypertension", "heart_disease",
"lung_condition", "liver_disease", "kidney_disease", "asthma",
"diabetes_type_one"), bolean_yes_no = c("No", "No", "No",
"Yes", "No", "No", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "No", "No", "No", "No", "No")), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
我有 15 行,其中有 3 个唯一 ID,但是,我想根据几个条件获取新列:
- 如果他们有合并症-> 则仅选择具有该合并症的行并将其添加到具有其名称的新列中,但所有其他列都应具有 NA
- 如您所见,第二个 id 没有任何合并症,因此我想要一个新类别并将其视为“健康”类别,而与该患者有关的其余行显示为 NA。第三响应者也是如此。
如何使用 tidyverse 做到这一点?
我希望新列的外观示例在这里,请检查总结上述要点的最后一列。
structure(list(id = c("117dbbbf15", "117dbbbf15", "117dbbbf15",
"117dbbbf15", "117dbbbf15", "117dbbbf15", "117dbbbf15", "117dbbbf15",
"117dbbbf15", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3",
"3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3",
"8a594e9340", "8a594e9340"), number_morbidities = c(1, 1, 1,
1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1), chills = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), cough = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), diarrhoea = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), fatigue = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), headache = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), loss_smell_taste = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L), .Label = c("No", "Yes"), class = "factor"), muscle_ache = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), nasal_congestion = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), nausea_vomiting = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), shortness_breath = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), sore_throat = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), sputum = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), temperature = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), loss_appetite = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), chest_pain = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), itchy_eyes = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), joint_pain = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), comorbidities = c("asthma",
"diabetes_type_one", "diabetes_type_two", "obesity", "hypertension",
"heart_disease", "lung_condition", "liver_disease", "kidney_disease",
"asthma", "diabetes_type_one", "diabetes_type_two", "obesity",
"hypertension", "heart_disease", "lung_condition", "liver_disease",
"kidney_disease", "asthma", "diabetes_type_one"), bolean_yes_no = c("No",
"No", "No", "Yes", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "No", "No", "No", "No", "No", "No", "No"), morbiditiy_healthy = c(NA,
NA, NA, "obesity", NA, NA, NA, NA, NA, "healthy", NA, NA, NA,
NA, NA, NA, NA, NA, "healthy", NA)), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame"))
解决方案
我们按'id'分组,创建'morbidity_healthy' case_when
,我们在'bolean_yes_no'列中检查'Yes',如果它是TRUE,则得到相应的'comorbidities',并且if
没有(!
)any
'Yes'和row_number
为 1,然后返回该行的“健康”
library(dplyr)
test %>%
group_by(id) %>%
mutate(morbidity_healthy = case_when(bolean_yes_no == 'Yes' ~ comorbidities,
(!any(bolean_yes_no == 'Yes')) & row_number()==1 ~ 'healthy'))
推荐阅读
- javascript - 将数据从 HTML 页面发送到 Google 表格
- hadoop - 如何hdfs复制带有后缀的文件到文件名
- json - 识别类似于json的格式
- reactjs - 用vite,如何处理SSR动态路由
- oracle12c - OUD 12c 与 OIM 12c 集成
- python - python 以什么顺序执行打印语句?
- python - 如何使用 pytest 参数化流列表
- python - 根据烧瓶中 SQL 表的值更改图标颜色
- php - PHP - 整个站点在等待 cURL 上传请求的响应时被阻止。有没有办法避免这种情况?
- javascript - 生成的excel中出现额外的行