r - 如何选择未出现在第二个 tibble 中的 tibble 行
问题描述
我有一个 tibble 和该 tibble 的一个子集(它们被称为mytrees
and findOut
)。我想创建第三个 tibble,mytreesCorr
它只有mytrees
那些不匹配 任何行的行findOut
。如果我想根据列过滤掉行,比如Árvore
(对不起,它不是英文,但它用作 ID),我会这样做:
mytreesCorr <- mytrees[-(mytrees$Árvore %in% findOut$Árvore)]
但我想根据整行进行过滤,而不仅仅是索引。这是mytrees
:
> dput(mytrees)
structure(list(Dia = c("26/10/2020", "26/10/2020", "26/10/2020",
"26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020",
"26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020",
"26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020",
"26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020",
"26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020",
"26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020",
"26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020",
"26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020",
"26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020",
"26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020",
"26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020",
"26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020",
"26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020",
"26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020",
"26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020",
"26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020", "26/10/2020",
"27/10/2020", "26/10/2020", "27/10/2020", "27/10/2020", "27/10/2020",
"27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020",
"27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020",
"27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020",
"27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020",
"27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020",
"27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020",
"27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020",
"27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020",
"27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020",
"27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020",
"27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020",
"27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020",
"27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020", "27/10/2020",
"27/10/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020",
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020",
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020",
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020",
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020",
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020",
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020",
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020",
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020",
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020",
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020",
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020",
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020",
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020",
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020",
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020",
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020",
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020",
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020",
"9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020", "9/11/2020",
"9/11/2020"), Árvore = c("398", "414", "415", "416", "416",
"417", "417", "419", "432", "432", "434", "435", "436", "439",
"441", "442", "443", "451", "452", "454", "459", "482", "484",
"485", "486", "489", "490", "492", "496", "497", "498", "502",
"503", "504", "505", "506", "507", "508", "513", "513", "514",
"514", "515", "516", "518", "519", "520", "521", "525", "526",
"534", "535", "536", "538", "547", "547", "552", "555", "556",
"557", "559", "564", "565", "568", "569", "572", "573", "581",
"582", "584", "585", "586", "588", "593", "596", "597", "603",
"604", "605", "606", "608", "612", "614", "369", "616", "373",
"386", "399", "415", "434", "435", "436", "441", "442", "448",
"449", "451", "451", "452", "454", "456", "463", "464", "482",
"484", "485", "485", "486", "487", "491", "492", "492", "493",
"495", "496", "497", "500", "501", "503", "505", "507", "508",
"510", "513", "514", "515", "515", "516", "517", "519", "519",
"520", "526", "528", "547", "548", "549", "555", "560", "564",
"572", "573", "575", "581", "582", "586", "596", "599", "604",
"606", "607", "612", "613", "614", "575", "607", "513", "569",
"565", "559", "567", "514", "555", "520", "564", "560", "557",
"556", "537", "536", "521", "513", "508", "511", "548", "599",
"515", "504", "609", "500", "497", "502", "499", "608", "509",
"498", "496", "540", "497", "500", "593", "581", "567", "516",
"517", "571", "516", "568", "560", "554", "568", "555", "546",
"547", "548", "406", "538", "549", "543", "550", "504", "534",
"536", "535", "521", "526", "485", "524", "538", "482", "483",
"612", "589", "489", "490", "487", "615", "496", "497", "498",
"457", "613", "454", "449", "616", "443", "614", "442", "452",
"453", "441", "450", "452", "461", "459", "462", "286", "468",
"469", "458", "474", "284", "463", "251"), `Circunferência (cm)` = c(28.5,
21.2, 107, 44.2, 44.1, 21.1, 21.6, 17.8, 44.7, 34.1, 28.5, 78.7,
52.5, 117, 15.5, 100, 29, 70.5, 69.4, 242.5, 68.5, 49, 30.4,
24.5, 46.8, 21.6, 163.4, 61, 80.5, 38, 21, 74.5, 95.5, 48.7,
86, 106.5, 54, 85.4, 45.2, 41.1, 19.2, 19.1, 88, 19.7, 259, 21.5,
26.2, 35.3, 47.2, 38, 25, 81.5, 22.3, 178.8, 48.3, 48.6, 35.3,
47, 86, 24.4, 71, 212, 47.3, 19.7, 173, 257.7, 26, 43.8, 85.2,
130, 77.5, 63.2, 201, 319, 33, 21.6, 137.2, 84.7, 40.8, 65, 73.2,
50.2, 104.3, 47, 86, 62, 158, 138, 117.5, 30, 78, 52, 17.5, 54,
80.5, 100, 71, 70.5, 69, 238, 17, 74, 22, 42, 26.5, 24, 23, 49,
185, 113.5, 62, 63, 55, 95, 80, 40, 78.5, 212, 97, 89, 32, 86,
80, 44, 20, 87, 86.5, 20, 89, 24.5, 23, 26, 40, 40, 48, 41, 116,
48, 134, 213, 252, 26, 113.5, 46, 85, 64.5, 33, 222, 85, 88,
32, 51, 88, 108.5, 27, 29.5, 43.5, 107.8, 48, 71.5, 136, 20.5,
46.9, 27.2, 211, 130, 24.1, 89, 68.5, 22.5, 35.4, 43, 85, 63.4,
210, 222, 98, 315.4, 63, 79, 38.1, 75.5, 98, 76, 215, 21.2, 80.4,
19.5, 38.4, 81, 334, 43.5, 136.4, 20, 89.5, 16.3, 198, 21, 131,
23.5, 22, 48.5, 240, 48, 41, 238.4, 178.2, 123, 43.5, 132.1,
259, 25, 22, 51, 35.2, 39.5, 24.3, 25.4, 179, 50.5, 134.7, 51,
260, 23, 199, 188.5, 67, 80.2, 39, 22, 248, 87, 238, 99.2, 87,
30, 106, 98, 69.8, 218, 16, 27, 70.3, 170, 66.5, 35, 238, 38.3,
233.3, 132.5, 19.5, 324.5, 69.7, 677.2)), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -254L), spec = structure(list(
cols = list(Dia = structure(list(), class = c("collector_character",
"collector")), Árvore = structure(list(), class = c("collector_character",
"collector")), `Circunferência (cm)` = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1), class = "col_spec"))
这是findOut
(我只给出结果而不是复制程序):
> dput(findOut)
structure(list(Dia = c("27/10/2020", "27/10/2020", "26/10/2020",
"9/11/2020", "9/11/2020"), Árvore = c("442", "482", "504", "504",
"548"), `Circunferência (cm)` = c(54, 42, 48.7, 315.4, 210),
Range = c(46, 8.5, 266.7, 266.7, 169)), row.names = c(NA,
-5L), groups = structure(list(Árvore = c("442", "482", "504",
"548"), .rows = structure(list(1L, 2L, 3:4, 5L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -4L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
解决方案
以下是根据您的描述过滤数据base
的一些方法:dplyr
# load functions ----
"%>%" <- magrittr::"%>%"
# some data ----
dat1 <- dplyr::tibble(ID=1:100,
X=sample(1:1000,size=100,replace=T))
dat2 <- dplyr::tibble(ID=sample(1:100,size=20,replace=F),
Y=sample(1:1000,size=20,replace=T))
# filter data ----
# base
dat1[!dat1$ID %in% dat2$ID,]
# dplyr: filter ####
dat1 %>%
dplyr::filter(!ID %in% dat2$ID)
# dplyr: anti_join ####
dat1 %>%
dplyr::anti_join(dat2,by="ID")
推荐阅读
- parallel-processing - 使用并行化时自定义度量解析外部函数时出错
- sockets - ObjectInputStream readObject 意外关闭套接字
- python - 用户在 Python 中输入保留关键字。发生错误
- c++ - 从文本文件中读取数据并删除所有换行符并在 C++ 中的控制台中显示
- python-3.x - 将一行拆分为多行以反映列中的更改(保持所有列值相同但一个)
- c# - 使用 C# 创建 Azure DataDisk
- r - 在一行中查找两个最高值,然后在同一列但不同行中查找值
- c# - 使用 RegularExpressionValidator 确保文本框至少有一个数字
- javascript - 公共资产在产品构建(webpack)上是 404
- python - 抓取href链接并从这些链接中抓取