首页 > 解决方案 > 通过使用 R 选择上一行中的值来填充缺失值

问题描述

在下表中,第三列是三元字符(A、B 和 C):

我喜欢以这种方式用前一个单元格中的值填充空单元格,但 C 除外:

数据框为:

df <- data.frame(
  'ID' = c(1,   2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23)
  ,'V1' = c("desc1",    "desc2",    "desc3",    "desc4",    "desc5",    "desc6",    "desc7",    "desc8",    "desc9",    "desc10",   "desc11",   "desc12",   "desc13",   "desc14",   "desc15",   "desc16",   "desc17",   "desc18",   "desc19",   "desc20",   "desc21",   "desc22",   "desc23")
  ,'V2' =c("A", "", "", "B",    "", "", "", "C",    "", "A",    "", "", "", "B",    "", "", "C",    "", "A",    "B",    "", "C",    ""))

标签: r

解决方案


library(data.table)
library(magrittr)
df <- data.frame(
  'ID' = c(1,   2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23)
  ,'V1' = c("desc1",    "desc2",    "desc3",    "desc4",    "desc5",    "desc6",    "desc7",    "desc8",    "desc9",    "desc10",   "desc11",   "desc12",   "desc13",   "desc14",   "desc15",   "desc16",   "desc17",   "desc18",   "desc19",   "desc20",   "desc21",   "desc22",   "desc23")
  ,'V2' =c("A", "", "", "B",    "", "", "", "C",    "", "A",    "", "", "", "B",    "", "", "C",    "", "A",    "B",    "", "C",    ""))

df[df == ""] <- NA
setDT(df)[, V2 := zoo::na.locf(V2)] %>% 
  .[, id := rowid(V2), by = rleid(V2)] %>% 
  .[, V2 := ifelse(V2 == "C" & id > 1, NA, V2)] %>% 
  .[, id := NULL] %>% 
  .[]
#>     ID     V1   V2
#>  1:  1  desc1    A
#>  2:  2  desc2    A
#>  3:  3  desc3    A
#>  4:  4  desc4    B
#>  5:  5  desc5    B
#>  6:  6  desc6    B
#>  7:  7  desc7    B
#>  8:  8  desc8    C
#>  9:  9  desc9 <NA>
#> 10: 10 desc10    A
#> 11: 11 desc11    A
#> 12: 12 desc12    A
#> 13: 13 desc13    A
#> 14: 14 desc14    B
#> 15: 15 desc15    B
#> 16: 16 desc16    B
#> 17: 17 desc17    C
#> 18: 18 desc18 <NA>
#> 19: 19 desc19    A
#> 20: 20 desc20    B
#> 21: 21 desc21    B
#> 22: 22 desc22    C
#> 23: 23 desc23 <NA>
#>     ID     V1   V2

reprex 包于 2021-08-18 创建(v2.0.1)


推荐阅读