首页 > 解决方案 > 如何使用 tidyverse 根据某些条件添加新列?

问题描述

我正在尝试根据受访者是否健康创建一个新列。

这是我拥有的数据类型:

test <- structure(list(`cutree(hc_diana, k = 4)` = c(1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), id = c("117dbbbf15", "117dbbbf15", "117dbbbf15", "117dbbbf15", 
"117dbbbf15", "117dbbbf15", "117dbbbf15", "117dbbbf15", "117dbbbf15", 
"3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", 
"3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "8a594e9340", 
"8a594e9340"), covid_tested = c("positive", "positive", "positive", 
"positive", "positive", "positive", "positive", "positive", "positive", 
"positive", "positive", "positive", "positive", "positive", "positive", 
"positive", "positive", "positive", "positive", "positive"), 
    age = c(51, 51, 51, 51, 51, 51, 51, 51, 51, 28, 28, 28, 28, 
    28, 28, 28, 28, 28, 28, 28), gender = structure(c(1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L), .Label = c("Female", "Male", "Other"), class = "factor"), 
    number_morbidities = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 1, 1), chills = structure(c(1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L), .Label = c("No", "Yes"), class = "factor"), cough = structure(c(2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), 
    diarrhoea = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No", 
    "Yes"), class = "factor"), fatigue = structure(c(2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), 
    headache = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No", 
    "Yes"), class = "factor"), loss_smell_taste = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 2L, 2L), .Label = c("No", "Yes"), class = "factor"), 
    muscle_ache = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No", 
    "Yes"), class = "factor"), nasal_congestion = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), 
    nausea_vomiting = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No", 
    "Yes"), class = "factor"), shortness_breath = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), 
    sore_throat = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No", 
    "Yes"), class = "factor"), sputum = structure(c(1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L), .Label = c("No", "Yes"), class = "factor"), temperature = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), 
    loss_appetite = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No", 
    "Yes"), class = "factor"), chest_pain = structure(c(1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), 
    itchy_eyes = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No", 
    "Yes"), class = "factor"), joint_pain = structure(c(1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), 
    comorbidities = c("asthma", "diabetes_type_one", "diabetes_type_two", 
    "obesity", "hypertension", "heart_disease", "lung_condition", 
    "liver_disease", "kidney_disease", "asthma", "diabetes_type_one", 
    "diabetes_type_two", "obesity", "hypertension", "heart_disease", 
    "lung_condition", "liver_disease", "kidney_disease", "asthma", 
    "diabetes_type_one"), bolean_yes_no = c("No", "No", "No", 
    "Yes", "No", "No", "No", "No", "No", "No", "No", "No", "No", 
    "No", "No", "No", "No", "No", "No", "No")), row.names = c(NA, 
-20L), class = c("tbl_df", "tbl", "data.frame"))

我有 15 行,其中有 3 个唯一 ID,但是,我想根据几个条件获取新列:

  1. 如果他们有合并症-> 则仅选择具有该合并症的行并将其添加到具有其名称的新列中,但所有其他列都应具有 NA
  2. 如您所见,第二个 id 没有任何合并症,因此我想要一个新类别并将其视为“健康”类别,而与该患者有关的其余行显示为 NA。第三响应者也是如此。

如何使用 tidyverse 做到这一点?

我希望新列的外观示例在这里,请检查总结上述要点的最后一列。

structure(list(id = c("117dbbbf15", "117dbbbf15", "117dbbbf15", 
"117dbbbf15", "117dbbbf15", "117dbbbf15", "117dbbbf15", "117dbbbf15", 
"117dbbbf15", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", 
"3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", 
"8a594e9340", "8a594e9340"), number_morbidities = c(1, 1, 1, 
1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1), chills = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), cough = structure(c(2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), diarrhoea = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), fatigue = structure(c(2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), headache = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), loss_smell_taste = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 2L, 2L), .Label = c("No", "Yes"), class = "factor"), muscle_ache = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), nasal_congestion = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), nausea_vomiting = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), shortness_breath = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), sore_throat = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), sputum = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), temperature = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), loss_appetite = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), chest_pain = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), itchy_eyes = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), joint_pain = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), comorbidities = c("asthma", 
"diabetes_type_one", "diabetes_type_two", "obesity", "hypertension", 
"heart_disease", "lung_condition", "liver_disease", "kidney_disease", 
"asthma", "diabetes_type_one", "diabetes_type_two", "obesity", 
"hypertension", "heart_disease", "lung_condition", "liver_disease", 
"kidney_disease", "asthma", "diabetes_type_one"), bolean_yes_no = c("No", 
"No", "No", "Yes", "No", "No", "No", "No", "No", "No", "No", 
"No", "No", "No", "No", "No", "No", "No", "No", "No"), morbiditiy_healthy = c(NA, 
NA, NA, "obesity", NA, NA, NA, NA, NA, "healthy", NA, NA, NA, 
NA, NA, NA, NA, NA, "healthy", NA)), row.names = c(NA, -20L), class = c("tbl_df", 
"tbl", "data.frame"))

标签: rdplyr

解决方案


我们按'id'分组,创建'morbidity_healthy' case_when,我们在'bolean_yes_no'列中检查'Yes',如果它是TRUE,则得到相应的'comorbidities',并且if没有(!any'Yes'和row_number为 1,然后返回该行的“健康”

library(dplyr)
test %>% 
    group_by(id) %>%
    mutate(morbidity_healthy = case_when(bolean_yes_no == 'Yes' ~ comorbidities,
      (!any(bolean_yes_no == 'Yes')) & row_number()==1 ~ 'healthy')) 

推荐阅读