首页 > 解决方案 > 如何按多次出现的值删除行

问题描述

我有一个数据集,我在同一列中合并了两个数据框。这样做之后,我发现该列的值在一天/一周中LOC_ID不止一个。ID这是 BTO 数据,我认为这是因为他们更新了栖息地指标并为同一个花园发送了多个栖息地数据。

我想存储最新的,我猜这是一组重复 ID 值中最新出现的 ID 值。

可重现的代码:

structure(list(year = c(2009, 2009, 2009, 2009, 2009, 2009, 2009, 
2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009, 
2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009, 
2009), week = c(52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 
52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 
52, 52), POSTCODE = c("IP21 4TR", "IP22 4BD", "IP22 4BD", "IP22 4BE", 
"IP22 4YW", "IP22 5SB", "IP22 5SB", "IP24 2YW", "IP24 3EP", "IP24 3HG", 
"IP24 3NF", "IP25 6QU", "IP25 6QU", "IP25 6TD", "IP25 7DB", "IP25 7EY", 
"IP25 7HG", "IP25 7SX", "IP26 5HH", "IP26 5JD", "IP27 0RQ", "IP27 0RQ", 
"NR10 4EA", "NR10 4EA", "NR10 4PF", "NR11 6JP", "NR11 7QB", "NR11 7QB", 
"NR11 7QG", "NR11 8PA"), OBS_DT = structure(c(14605, 14605, 14605, 
14605, 14605, 14605, 14605, 14605, 14605, 14605, 14605, 14605, 
14605, 14605, 14605, 14605, 14605, 14605, 14605, 14605, 14605, 
14605, 14605, 14605, 14605, 14605, 14605, 14605, 14605, 14605
), class = "Date"), LOC_ID = c("LOC633232", "LOC1153084541859", 
"LOC1153084541859", "LOC1300605020", "LOC1049970899816", "LOC628053", 
"LOC628053", "LOC1300611642", "LOC710372", "LOC676185", "LOC696817", 
"LOC1300602161", "LOC1300602161", "LOC1039890913279", "LOC287351", 
"LOC13006219110", "LOC1300605191", "LOC1133275915797", "LOC614479", 
"LOC1300611662", "LOC575712", "LOC575712", "LOC575593", "LOC575593", 
"LOC1300604079", "LOC1300604818", "LOC606431", "LOC606431", "LOC1300612920", 
"LOC1300604135"), HOW_MANY = c(14L, 6L, 6L, 9L, 3L, 15L, 15L, 
9L, 13L, 9L, 10L, 8L, 8L, 15L, 14L, 6L, 4L, 8L, 8L, 7L, 11L, 
11L, 16L, 16L, 7L, 3L, 13L, 13L, 15L, 8L), GARDEN_SIZE = c("M", 
"S", "S", "M", "S", "L", "L", "M", "X", "X", "M", "M", "L", "S", 
"L", "X", "M", "L", "M", "L", "M", "S", "L", "L", "L", "M", "L", 
"L", "S", "M"), GARDEN_AGE = c("B", "D", "D", "C", "C", "D", 
"E", "B", "X", "X", "D", "E", "E", "D", "E", "X", "D", "E", "D", 
"A", "D", "D", "E", "E", "E", "D", "E", "D", "B", "C"), SMALL_DECID_TREES = c("C", 
"C", "C", "B", "A", "D", "D", "D", "X", "X", "C", "D", "E", "C", 
"E", "X", "D", "E", "A", "X", "C", "C", "D", "E", "C", "D", "D", 
"E", "C", "E"), LARGE_DECID_TREES = c("B", "A", "A", "A", "B", 
"C", "D", "B", "X", "X", "B", "C", "D", "C", "D", "X", "B", "E", 
"A", "D", "B", "B", "C", "E", "C", "C", "E", "E", "X", "C")), row.names = c(NA, 
-30L), groups = structure(list(year = c(2009, 2009, 2009, 2009, 
2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009, 
2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009), week = c(52, 
52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 
52, 52, 52, 52, 52, 52, 52), POSTCODE = c("IP21 4TR", "IP22 4BD", 
"IP22 4BE", "IP22 4YW", "IP22 5SB", "IP24 2YW", "IP24 3EP", "IP24 3HG", 
"IP24 3NF", "IP25 6QU", "IP25 6TD", "IP25 7DB", "IP25 7EY", "IP25 7HG", 
"IP25 7SX", "IP26 5HH", "IP26 5JD", "IP27 0RQ", "NR10 4EA", "NR10 4PF", 
"NR11 6JP", "NR11 7QB", "NR11 7QG", "NR11 8PA"), OBS_DT = structure(c(14605, 
14605, 14605, 14605, 14605, 14605, 14605, 14605, 14605, 14605, 
14605, 14605, 14605, 14605, 14605, 14605, 14605, 14605, 14605, 
14605, 14605, 14605, 14605, 14605), class = "Date"), .rows = structure(list(
    1L, 2:3, 4L, 5L, 6:7, 8L, 9L, 10L, 11L, 12:13, 14L, 15L, 
    16L, 17L, 18L, 19L, 20L, 21:22, 23:24, 25L, 26L, 27:28, 29L, 
    30L), ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr", 
"list"))), row.names = c(NA, 24L), class = c("tbl_df", "tbl", 
"data.frame"), .drop = TRUE), class = c("grouped_df", "tbl_df", 
"tbl", "data.frame"))

它的外观和我想要的输出:

original:
 year  week POSTCODE OBS_DT     LOC_ID           HOW_MANY GARDEN_SIZE GARDEN_AGE SMALL_DECID_TREES LARGE_DECID_TREES
   <dbl> <dbl> <chr>    <date>     <chr>               <int> <chr>       <chr>      <chr>             <chr>            
 1  2009    52 IP21 4TR 2009-12-27 LOC633232              14 M           B          C                 B                
 2  2009    52 IP22 4BD 2009-12-27 LOC1153084541859        6 S           D          C                 A                
 3  2009    52 IP22 4BD 2009-12-27 LOC1153084541859        6 S           D          C                 A                
 4  2009    52 IP22 4BE 2009-12-27 LOC1300605020           9 M           C          B                 A                
 5  2009    52 IP22 4YW 2009-12-27 LOC1049970899816        3 S           C          A                 B                
 6  2009    52 IP22 5SB 2009-12-27 LOC628053              15 L           D          D                 C                
 7  2009    52 IP22 5SB 2009-12-27 LOC628053              15 L           E          D                 D                
 8  2009    52 IP24 2YW 2009-12-27 LOC1300611642           9 M           B          D                 B                
 9  2009    52 IP24 3EP 2009-12-27 LOC710372              13 X           X          X                 X                
10  2009    52 IP24 3HG 2009-12-27 LOC676185               9 X           X          X                 X             

New Output:

 year  week POSTCODE OBS_DT     LOC_ID           HOW_MANY GARDEN_SIZE GARDEN_AGE SMALL_DECID_TREES LARGE_DECID_TREES
   <dbl> <dbl> <chr>    <date>     <chr>               <int> <chr>       <chr>      <chr>             <chr>            
 1  2009    52 IP21 4TR 2009-12-27 LOC633232              14 M           B          C                 B                         
 2  2009    52 IP22 4BD 2009-12-27 LOC1153084541859        6 S           D          C                 A                
 3  2009    52 IP22 4BE 2009-12-27 LOC1300605020           9 M           C          B                 A                
 4  2009    52 IP22 4YW 2009-12-27 LOC1049970899816        3 S           C          A                 B                
 5  2009    52 IP22 5SB 2009-12-27 LOC628053              15 L           E          D                 D                
 6  2009    52 IP24 2YW 2009-12-27 LOC1300611642           9 M           B          D                 B                
 7  2009    52 IP24 3EP 2009-12-27 LOC710372              13 X           X          X                 X                
 8  2009    52 IP24 3HG 2009-12-27 LOC676185               9 X           X          X                 X             

标签: r

解决方案


也许你可以试试

subset(df,as.logical(ave(LOC_ID, year, week, FUN = Negate(duplicated))))

推荐阅读