首页 > 解决方案 > 计算网络 R 中每个节点的特定大小的路径和循环

问题描述

我有一个网络,想计算大小为 4 的循环(长度为 4 的路径从同一节点开始和结束)和大小为 3 的路径从一个节点开始,然后按节点将它们列在数据集中。

g <- read.table(text= "supplier buyer
                              a    b
                              b    c
                              c    d
                              c    e
                              a    e
                              e    f
                              b    f",header=TRUE)
g <- graph.data.frame(g, directed=F)
adjacency <- as.matrix(as_adjacency_matrix(g))

我已经参考了这篇文章来计算周期,但它计算整个图表的周期,而不是按节点。

我还阅读了和的文档。但是,这些仅允许指定最大长度 ( )。我也没有完全理解输出:kpath.censuskcycle.censuslength <=3, rather than ==3

library(sna)
node_path<-sna::kpath.census(adjacency, maxlen = 3, mode = "graph", 
             tabulate.by.vertex = TRUE, dyadic.tabulation = "sum")
node_kcycle <- sna::kcycle.census(adjacency, maxlen = 4, mode = "graph", tabulate.by.vertex = TRUE, cycle.comembership = "sum")

> node_path$path.count
  Agg  a  b  c  e  f d
1  14  4  6  6  6  4 2
2  22 10 14 14 14 10 4
3  32 20 28 24 28 20 8
> node_kcycle$cycle.count
  Agg a b c e f d
2   0 0 0 0 0 0 0
3   0 0 0 0 0 0 0
4   3 2 3 2 3 2 0

我在下面绘制了网络,据我计算,节点a应该有 4 个长度为 4 的循环和 6 个长度为 3 的路径。但是输出 node_kcycle$cycle.countnode_path$path.count提供不同的数字。

在此处输入图像描述

有没有更好的方法来计算每个节点的大小为 4 和长度为 3 的循环?我想产生这样的输出:

   cycles4   path3
a    4         6
b
c      ....
d
e
f

编辑:我已经弄清楚了我对kpath.census输出的困惑:它计算了包括该节点在内的所有路径,而不是该节点开始,正如我想要的那样。不过,仍然不确定如何计算从节点开始的路径

EDIT2:这个函数all_simple_paths(g, from = V(g))计算简单路径,我想我会尝试从这个函数中提取 3 的路径。仍在努力寻找大小为 4 的循环。

标签: r

解决方案


这是一个很长的答案,因为我尝试使用 R6 并自己定义一些东西,而不是使用任何可用的路径相关包

定义控制逻辑的路径类

library(dplyr)
library(purrr)
library(R6)

# R6 Class
path <- R6Class(
  classname = "point",
  private = list(
    .points = c(),
    .is_circle = FALSE,
    .path_length = 0,
    check_path = function() {
      private$.path_length <- length(private$.points)
    }
  ),
  active = list(
    points = function(value) {
      if (missing(value)) {
        private$.points
      } else {
        stop("No manual edit allowed")
      }
    },
    is_circle = function(value) {
      if (missing(value)) {
        private$.is_circle
      } else {
        stop("No manual edit allowed")
      }
    },
    path_length = function(value) {
      if (missing(value)) {
        private$.path_length
      } else {
        stop("No manual edit allowed")
      }
    }
  ),
  public = list(
    initialize = function(supplier, buyer) {
      private$.points <- c(supplier, buyer)
      private$check_path()
    },
    # This function will add a 2 points path into this path object
    # handle some logics about cirle & duplicated path
    add_2_points_path = function(addition_path) {
      stopifnot(addition_path$path_length == 2)
      if (length(intersect(addition_path$points, private$.points)) == 2) {
        if (first(private$.points) %in% addition_path$points & 
            last(private$.points) %in% addition_path$points &
            private$.path_length > 2) {
          private$.is_circle <- TRUE
        } else {
          stop("No new node introduce")
        }
      } else {
        if (first(addition_path$points) == last(private$.points)) {
          private$.points <- c(private$.points, addition_path$points[2])
        } else if (last(addition_path$points) == last(private$.points)) {
          private$.points <- c(private$.points, addition_path$points[1])
        } else {
          stop("No common points for connection. No connection build")
        }
      }
      private$check_path()
    },
    add_point = function(new_point) {
      stopifnot(is.character(new_point) & length(new_point) == 1)
      private$.points <- c(private$.points, new_point)
      private$check_path()
    },
    get_start_point = function() {
      first(private$.points)
    },
    get_end_point = function() {
      last(private$.points)
    },
    print = function(...) {
      cat("Path: ", private$.points, "\n")
      cat("Length: ", private$.path_length, "\n")
      cat("Is circle: ", private$.is_circle, "\n")
    }
  ),
  cloneable = TRUE
)

使用原始数据运行路径构建器

# original data
g
#>   supplier buyer
#> 1        a     b
#> 2        b     c
#> 3        c     d
#> 4        c     e
#> 5        a     e
#> 6        e     f
#> 7        b     f

# Define maximum loops to count. The maximum path_length
# will be loop_count + 1
loop_count <- 4
paths <- pmap(g, path$new)
index <- 2
for (index in 2:loop_count) {
  current_paths_level <- keep(.x = paths,
    .p = function(x) { x$path_length == index})
  for (i_path in current_paths_level) {
    possible_paths <- pmap(g, path$new)
    for (i_possible_paths in possible_paths) {
      new_path <- i_path$clone()
      tryCatch({
        new_path$add_2_points_path(i_possible_paths)
        if (new_path$is_circle) {
          paths <- keep(.x = paths,
            .p = function(x) { !identical(x$points, new_path$points) })
        }
        paths <- append(paths, new_path)
      }, error = function(e) {
        # skip error handling just ignore the path not able to connect
      })
    }
  }
}

# Filter duplicated paths
identical_path <- function(path_1, path_2) {
  identical <- FALSE
  if (path_1$path_length <= 3 | path_2$path_length <= 3) {
    identical <- length(intersect(path_1$points, path_2$points)) == path_1$path_length &
      path_1$path_length == path_2$path_length 
  } else {
    identical <- identical(path_1$points, path_2$points)
  }
  identical
}
path_filter <- rep(FALSE, length(paths))
for(i in 1:(length(paths) - 1)) {
  for(j in (i+1):length(paths)) {
    path_filter[j] <- path_filter[j] | identical_path(paths[[i]], paths[[j]])
  }
}
paths <- paths[!path_filter]

结果的一些样本

sample(paths, 10)
#> [[1]]
#> Path:  a b 
#> Length:  2 
#> Is circle:  FALSE 
#> 
#> [[2]]
#> Path:  b c e a 
#> Length:  4 
#> Is circle:  TRUE 
#> 
#> [[3]]
#> Path:  d c e f b 
#> Length:  5 
#> Is circle:  FALSE 
#> 
#> [[4]]
#> Path:  c e a b f 
#> Length:  5 
#> Is circle:  FALSE 
#> 
#> [[5]]
#> Path:  c b f 
#> Length:  3 
#> Is circle:  FALSE 
#> 
#> [[6]]
#> Path:  f b a e c 
#> Length:  5 
#> Is circle:  FALSE 
#> 
#> [[7]]
#> Path:  a e c b 
#> Length:  4 
#> Is circle:  TRUE 
#> 
#> [[8]]
#> Path:  a e f 
#> Length:  3 
#> Is circle:  FALSE 
#> 
#> [[9]]
#> Path:  e a b c d 
#> Length:  5 
#> Is circle:  FALSE 
#> 
#> [[10]]
#> Path:  a b f 
#> Length:  3 
#> Is circle:  FALSE

最后做统计

# Define the counting functions
counting <- function(starting_point, list_paths) {
  starting_paths <- keep(paths,
    function(x) { x$get_start_point() == starting_point })
  circle4 <- length(keep(starting_paths,
    function(x) { x$is_circle & x$path_length == 4}))
  path3 <- length(keep(starting_paths,
    function(x) { x$path_length == 4}))
  return(tibble(point = starting_point,
    circle4 = circle4, path3 = path3))
}

map_dfr(unique(c(g$supplier, g$buyer)), counting, list_paths = paths)
#> # A tibble: 6 x 3
#>   point circle4 path3
#>   <chr>   <int> <int>
#> 1 a           4     6
#> 2 b           6     6
#> 3 c           4     4
#> 4 e           6     6
#> 5 d           0     4
#> 6 f           4     6

更新类以获得一个不错的print功能

reprex 包(v1.0.0)于 2021-04-06 创建


推荐阅读