r - 基于来自其他列的特定字符的新数据集列(在 R 中)
问题描述
在我的数据集中,我想创建一个以其他两列中的字符为条件的新列。如果longDescriptions.desc.en_US中包含单词Plage并且同时externalCode以数字1开头,则在新列中添加值A。如果longDescriptions.desc.en_US中没有单词Plage并且同时externalCode以数字1开头,则在新列中添加值B。否则,将其留空或 NA。
df <- structure(list(X.OPERATOR. = c(" Clear and Delete", NA, NA, NA,
NA, "<p>Je voornaamste taken:</p>"), externalCode = c("Job Profile.GUID",
"1008141", "1008168", "1008170", "1008170", NA), longDesciptions.sectionId = c("sectionId",
"199624017", "200226564", "200226592", "200226594", NA), longDesciptions.sectionType = c("sectionType",
"LONGDESCRIPTION", "LONGDESCRIPTION", "LONGDESCRIPTION", "LONGDESCRIPTION",
NA), longDesciptions.desc.en_US = c("US English", "Class: 06, Plage: C, Function code:",
"Class: 03", "Class: 03", "<p>Als Legal Counsel maak je deel uit van het departement Secretariaat-Generaal. Je ondersteunt zowel de secretaris-generaal en de directie alsook de verschillende entiteiten van Elia groep, zowel op nationaal als internationaal niveau.</p>",
NA), longDesciptions.desc.defaultValue = c("Default Value", "Class: 06, Plage: C, Function code:",
"Class: 03", "Class: 03", NA, NA), longDesciptions.desc.en_GB = c("English (United Kingdom)",
"Class: 06, Plage: C, Function code:", "Class: 03", "Class: 03",
NA, NA), longDesciptions.desc.de_DE = c("German (Germany)", NA,
NA, NA, NA, NA), longDesciptions.desc.fr_FR = c("French (France)",
"Classe: 06, Plage: C, Code de la fonction:", "Classe: 03", "Classe: 03",
NA, NA), longDesciptions.desc.nl_NL = c("Dutch (Netherlands)",
"Klasse: 06, Plage: C, Functiecode:", "Klasse: 03", "Klasse: 03",
NA, NA), longDesciptions.status = c("status(Valid Values : A/I A for Active I for Inactive )",
"A", "A", "A", NA, NA), longDesciptions.externalCode = c("externalCode",
"1035137", "1035330", "1035330", NA, NA), longDesciptions.subModule = c("subModule",
NA, NA, NA, NA, NA), NA. = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), NA..1 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_
), NA..2 = c(NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_), NA..3 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_),
NA..4 = c(NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_), NA..5 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), NA..6 = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), NA..7 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), NA..8 = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), NA..9 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), NA..10 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), NA..11 = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), NA..12 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), NA..13 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), NA..14 = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), NA..15 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), NA..16 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), NA..17 = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), NA..18 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), NA..19 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), NA..20 = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), NA..21 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), NA..22 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), NA..23 = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), NA..24 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), NA..25 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), NA..26 = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), NA..27 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), NA..28 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), NA..29 = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), NA..30 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), NA..31 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), NA..32 = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), NA..33 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), NA..34 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
)), class = "data.frame", row.names = c(NA, -6L))
我已经尝试过这段代码,但它不起作用:
df2[,49] <- NA #
names(df2)[49] <- "JobDescrip"
for (i in 1 : nrow(df2)) {
if (df2$externalCode[i] == '^1' && df2$longDesciptions.sectionId[i]==
'^P') {
df2[i,49] <- "A"
}
if (df2$externalCode[i] == '^1') {
df2[i,49] <- "B"
}
else {
df2[i,49] <- ""
}
}
Error in if (df2$externalCode[i] == "^1" && df2$longDesciptions.sectionId[i] == :
missing value where TRUE/FALSE needed
我知道这种类型的问题已经被问过很多次,但我找不到对我的数据可行的解决方案。任何帮助,将不胜感激!
解决方案
这是tidyverse
您可以考虑的一种方法。我会考虑其他矢量化方法而不是循环。
在这种情况下,您可以使用mutate
fromdplyr
添加新列,而case_when
不是使用多个if
语句来添加逻辑。如果第一次评估为假,则测试第二次评估,依此类推。
如果您使用grepl
,您可以检查字符串是否包含“Plage”(您可以考虑其他正则表达式模式的替代方案)。的使用substr
可以查看字符串中的特定字符。
library(dplyr)
df %>%
mutate(job_descrip = case_when(
grepl("Plage", longDesciptions.desc.en_US) & substr(externalCode, 1, 1) == "1" ~ "A",
substr(externalCode, 1, 1) == "1" ~ "B",
TRUE ~ NA_character_
))
推荐阅读
- java - 设置日期为 30/12 或 31/12 时的 Java 日历错误
- python - 如何解决Django中的属性错误?
- validation - 如何将动态电子邮件重用于 jmeter 中的另一个请求
- java - java项目如何利用wavm(网络组装虚拟机)?
- branch.io - Branch.io:如果 MAU 超出许可限制会怎样?
- laravel-5 - 如何配置 WebSocket 服务器以接受安全连接请求
- java - Websphere 9.0.0 无法从 Linux 中的 WAS 管理员连接 Db2 数据库
- c# - 如何设置小型扩展器标题并扩大 WPF 中的内容?
- wordpress - wordpress .htaccess www 到 nonwww (不工作)
- json - 如何使用声明的变量来创建 json 元素