首页 > 解决方案 > 如何在网络分析中处理空值/NA

问题描述

这个问题基本上是我之前在此处发布的问题的扩展。
如何在这些类型的情况下处理空值/NA。示例场景和数据

df1 <- data.frame(
  stringsAsFactors = FALSE,
                    id_1 = c("ABC","ABC","BCD",
                             "CDE","DEF","EFG","GHI","HIJ","IJK","JKL",
                             "GHI","KLM","LMN","MNO","NOP"),
                    id_2 = c("1A","2A","3A",
                             "1A","4A","5A","6A",NA,"9A","10A","7A",
                             "12A","13A",NA,"15A"),
                    id_3 = c("Z3","Z2","Z1",
                             "Z4","Z1","Z5","Z5","Z6","Z7","Z8","Z6","Z8",
                             "Z9","Z9","Z1"),
                    Name = c("StackOverflow1",
                             "StackOverflow2","StackOverflow3","StackOverflow4",
                             "StackOverflow5","StackOverflow6",
                             "StackOverflow7","StackOverflow8","StackOverflow9",
                             "StackOverflow10","StackOverflow11","StackOverflow12",
                             "StackOverflow13","StackOverflow14","StackOverflow15"),
          desired_output = c(1L,1L,2L,1L,2L,
                             3L,3L,3L,4L,5L,3L,5L,6L,6L,2L)
      )

df1
   id_1 id_2 id_3            Name desired_output
1   ABC   1A   Z3  StackOverflow1              1
2   ABC   2A   Z2  StackOverflow2              1
3   BCD   3A   Z1  StackOverflow3              2
4   CDE   1A   Z4  StackOverflow4              1
5   DEF   4A   Z1  StackOverflow5              2
6   EFG   5A   Z5  StackOverflow6              3
7   GHI   6A   Z5  StackOverflow7              3
8   HIJ <NA>   Z6  StackOverflow8              3
9   IJK   9A   Z7  StackOverflow9              4
10  JKL  10A   Z8 StackOverflow10              5
11  GHI   7A   Z6 StackOverflow11              3
12  KLM  12A   Z8 StackOverflow12              5
13  LMN  13A   Z9 StackOverflow13              6
14  MNO <NA>   Z9 StackOverflow14              6
15  NOP  15A   Z1 StackOverflow15              2

但是链接帖子中建议的三种方法不起作用并给我错误。

请建议。

标签: rigraphnetwork-analysis

解决方案


更新

如果您在某行中有多个NA,您可以尝试下面的代码

transform(
  df,
  GRP = membership(
    components(
      graph_from_data_frame(
        transform(
          reshape(
            df,
            direction = "long",
            idvar = c("id_1", "Name"),
            varying = 2:3,
            v.names = "to"
          )[c("id_1", "to")],
          to = ifelse(is.na(to), id_1, to)
        )
      )
    )
  )[id_1]
)

这使

   id_1 id_2 id_3            Name GRP
1   ABC   1A   Z3  StackOverflow1   1
2   ABC   2A   Z2  StackOverflow2   1
3   BCD   3A   Z1  StackOverflow3   2
4   CDE   1A   Z4  StackOverflow4   1
5   DEF   4A   Z1  StackOverflow5   2
6   EFG   5A   Z5  StackOverflow6   3
7   GHI   6A   Z5  StackOverflow7   3
8   HIJ <NA> <NA>  StackOverflow8   4
9   IJK   9A   Z7  StackOverflow9   5
10  JKL  10A   Z8 StackOverflow10   6
11  GHI   7A   Z6 StackOverflow11   3
12  KLM  12A   Z8 StackOverflow12   6
13  LMN  13A <NA> StackOverflow13   7
14  MNO <NA> <NA> StackOverflow14   8
15  NOP  15A   Z1 StackOverflow15   2

虚拟数据

> dput(df)
structure(list(id_1 = c("ABC", "ABC", "BCD", "CDE", "DEF", "EFG", 
"GHI", "HIJ", "IJK", "JKL", "GHI", "KLM", "LMN", "MNO", "NOP"
), id_2 = c("1A", "2A", "3A", "1A", "4A", "5A", "6A", NA, "9A",
"10A", "7A", "12A", "13A", NA, "15A"), id_3 = c("Z3", "Z2", "Z1",
"Z4", "Z1", "Z5", "Z5", NA, "Z7", "Z8", "Z6", "Z8", NA, NA, "Z1"
), Name = c("StackOverflow1", "StackOverflow2", "StackOverflow3",
"StackOverflow4", "StackOverflow5", "StackOverflow6", "StackOverflow7",
"StackOverflow8", "StackOverflow9", "StackOverflow10", "StackOverflow11",
"StackOverflow12", "StackOverflow13", "StackOverflow14", "StackOverflow15"
)), row.names = c(NA, -15L), class = "data.frame")

上一个答案

也许您可以将NAin 中的值替换为 中id_2的值id_1,然后按照前面问题中的答案进行操作。

你可以试试这个

transform(
  df,
  GRP = membership(
    components(
      graph_from_data_frame(
        reshape(
          transform(
            df,
            id_2 = ifelse(is.na(id_2), id_1, id_2)
          ),
          direction = "long",
          idvar = c("id_1", "Name"),
          varying = 2:3,
          v.names = "to"
        )[c("id_1", "to")]
      )
    )
  )[id_1]
)

这使

   id_1 id_2 id_3            Name GRP
1   ABC   1A   Z3  StackOverflow1   1
2   ABC   2A   Z2  StackOverflow2   1
3   BCD   3A   Z1  StackOverflow3   2
4   CDE   1A   Z4  StackOverflow4   1
5   DEF   4A   Z1  StackOverflow5   2
6   EFG   5A   Z5  StackOverflow6   3
7   GHI   6A   Z5  StackOverflow7   3
8   HIJ <NA>   Z6  StackOverflow8   3
9   IJK   9A   Z7  StackOverflow9   4
10  JKL  10A   Z8 StackOverflow10   5
11  GHI   7A   Z6 StackOverflow11   3
12  KLM  12A   Z8 StackOverflow12   5
13  LMN  13A   Z9 StackOverflow13   6
14  MNO <NA>   Z9 StackOverflow14   6
15  NOP  15A   Z1 StackOverflow15   2

推荐阅读