r - 如何仅将所有合并症字符串转移到新行中
问题描述
我正在尝试获取一个仅包含合并症字符串和无类别的新列。这是在 R 中完成的,具有 tidyverse 的偏好。您会看到,其中 2 行有我不感兴趣的奇数字符串。这是我拥有的数据类型。
structure(list(id = c("1", "2", "3", "4", "5", "6", "7", "9",
"8", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19",
"20"), health_care_worker = c("No", "No", "No", "No", "Yes",
"No", "No", "Yes", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "No", "No", "No"), how_unwell = c(1, 6, 1, 1, 1,
6, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), health_cnd = c("None",
NA, NA, "Diabetes Type 2,No,Yes,Yes,No,4,No,Showing Symptoms But Not Tested,Mild,Yes,No,No,No,Spanish,No, No,No,Yes,No 3bad24c8-0ac9-4269-aa53-5e8d41b03142,35,Female,Rio de Janeiro",
NA, NA, NA, NA, "High Blood Pressure (hypertension),No,Yes,No,No,0,No,Self-Isolating With No Symptoms,None,No,No,No,No,Portuguese,No, No,No,No,Yes 2656b3f2-d916-43e1-96b2-1d371d8c7b12,58,Female,Belém/ Pará",
NA, NA, NA, NA, NA, NA, NA, NA, "High Blood Pressure (hypertension),No,Yes,No,No,15,No,Showing Symptoms But Not Tested,Moderate,Yes,No,No,No,Spanish,No, No,No,Yes,No 41cf840a-cfcc-441f-a995-f6b75ecee967,22,Male,Agb,India,2020-08-04 05:25:00,No,No,No,No,No,No,No,1,None,N",
NA, NA), health_1 = c("None", "None", "None", "Diabetes Type 2,No, Asthma, Yes,Yes,No,4,No,Showing Symptoms But Not Tested,Mild,Yes,No,No,No,Spanish,No, No,No,Yes,No 3bad24c8-0ac9-4269-aa53-5e8d41b03142,35,Female,Rio de Janeiro",
"None", "None", "None", "None", "High Blood Pressure (hypertension),No, Obesity, Yes,No,No,0,No,Self-Isolating With No Symptoms,None,No,No,No,No,Portuguese,No, No,No,No,Yes 2656b3f2-d916-43e1-96b2-1d371d8c7b12,58,Female,Belém/ Pará",
"None", "None", "None", "None", "None", "None", "None", "None",
"High Blood Pressure (hypertension),No,Lung-condition, Yes,No,No,15,No,Showing Symptoms But Not Tested,Moderate,Yes,No,No,No,Spanish,No, No,No,Yes,No 41cf840a-cfcc-441f-a995-f6b75ecee967,22,Male,Agb,India,2020-08-04 05:25:00,No,No,No,No,No,No,No,1,None,N",
"None", "None")), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame"))
这就是我想要我的新专栏的方式。
structure(list(id = c("1", "2", "3", "4", "5", "6", "7", "9",
"8", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19",
"20"), health_care_worker = c("No", "No", "No", "No", "Yes",
"No", "No", "Yes", "No", "No", "No", "No", "No", "No", "No",
"No", "No", "No", "No", "No"), how_unwell = c(1, 6, 1, 1, 1,
6, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), health_1 = c("None",
"None", "None", "Diabetes Type 2,No, Asthma, Yes,Yes,No,4,No,Showing Symptoms But Not Tested,Mild,Yes,No,No,No,Spanish,No, No,No,Yes,No 3bad24c8-0ac9-4269-aa53-5e8d41b03142,35,Female,Rio de Janeiro",
"None", "None", "None", "None", "High Blood Pressure (hypertension),No, Obesity, Yes,No,No,0,No,Self-Isolating With No Symptoms,None,No,No,No,No,Portuguese,No, No,No,No,Yes 2656b3f2-d916-43e1-96b2-1d371d8c7b12,58,Female,Belém/ Pará",
"None", "None", "None", "None", "None", "None", "None", "None",
"High Blood Pressure (hypertension),No,Lung-condition, Yes,No,No,15,No,Showing Symptoms But Not Tested,Moderate,Yes,No,No,No,Spanish,No, No,No,Yes,No 41cf840a-cfcc-441f-a995-f6b75ecee967,22,Male,Agb,India,2020-08-04 05:25:00,No,No,No,No,No,No,No,1,None,N",
"None", "None"), copy_health_column = c("None", "None", "None",
"Diabetes Type 2, Asthma", "None", "None", "None", "None", "High Blood Pressure (hypertension), Obesity",
"None", "None", "None", "None", "None", "None", "None", "None",
"High Blood Pressure (hypertension),Lung-condition", "None",
"None")), row.names = c(NA, -20L), class = c("tbl_df", "tbl",
"data.frame"))
现在,我的原始数据有超过 10 万个数据点。因此,我希望我能得到一个适用于更大数据集的解决方案。
解决方案
library(tidyverse)
df %>%
rowwise() %>%
mutate(copy_health_column =
str_extract_all(health_1, pattern = "Diabetes Type 2|Asthma|Obesity|High Blood Pressure \\(hypertension\\)"),
copy_health_column = paste(copy_health_column, collapse = ","))
推荐阅读
- flutter - 无法重建 StreamProvider 以更新其当前数据
- google-kubernetes-engine - 使用 GKE 工作负载标识时出现 MetadataServerException
- java - 从 Windows 10 平台上的 USB 设备读取 Java usb4java
- javascript - 我如何创建一个简单的 onclick 添加函数()
- python - Google App Engine 上的 Golang 应用程序调用 Python 脚本
- django - Django rest 从桥模型中获取数据
- intellij-idea - 在 JetBrains IDE 中自动填充“Find in Path”光标下的单词的任何快捷方式?
- bash - 使用文件夹中最新的 7 倍更新 JPG 制作 gif(每周延时拍摄!)
- linux - 如何重定向带括号的 URL
- php - 如何获得可评分的总用户数?