我有兴趣展示细胞的比例如何从健康变为疾病。我想展示从健康到疾病的“流程”,而不仅仅是两个单独的堆叠条形图,但我不确定这种类型的可视化是否有名称,而且我无法在网上找到很多示例。我想在 R 中执行此操作。它几乎是 sankey 图和和弦图的混合体。

我希望你们中的一些人对我可以使用哪些包在 R 中实现这一点有一些想法。


正如 Ian Campbell 在评论中指出的那样,这被称为冲积地块,你可能会非常接近这个ggalluvial包。但是,可以使用 justgeom_ribbongeom_textfrom获得几乎相同的情节再现ggplot2


但是,这有点棘手。首先,我们需要一种方法来生成从一侧到另一侧的漂亮平滑曲线。以下函数采用开始和结束级别(作为 0 和 1 之间的数字)。它还允许可选地增加或减少任一侧列的宽度:

ribbon_line <- function(p1, p2, width = 10, len = 100)
  if (width > 50) width <- 50
  if (width < 0) width <- 0
  if (p1 < 0) p1 <- 0
  if (p1 > 1) p1 <- 1
  if (p2 < 0) p2 <- 0
  if (p2 > 1) p2 <- 1

  yvals <- c(p1, p1, pnorm(seq(-2.5, 2.5, length.out = len)) * (p2 - p1) + p1, p2, p2)
  xvals <- c(0, seq(width, 100 - width, length.out = len + 2), 100)
  list(x = xvals, y = yvals)


ribbon_df <- function(uppers, lowers, group, width = 10)
  data.frame(x    = ribbon_line(uppers[1], uppers[2], width)$x,
             ymax = ribbon_line(uppers[1], uppers[2], width)$y,
             ymin = ribbon_line(lowers[1], lowers[2], width)$y,
             group = group, stringsAsFactors = FALSE)


multi_ribbons <- function(left_bottom, right_bottom, left_top, right_top,
                          groups, width = 10)
  if (length(left_bottom) != length(right_bottom) |
      length(left_bottom) != length(left_top)     |
      length(left_top)    != length(right_top)) 
    stop("Left and right columns different length")
  if (length(groups) != length(left_bottom))
    stop("Group length has to be same length as columns")

  d <- lapply(seq_along(groups), function(i) {
    ribbon_df(c(left_top[i], right_top[i]),
              c(left_bottom[i], right_bottom[i]),
              groups[i], width)})

  left_cols <- lapply(d, function(x) x[1:2,])
  right_cols <- lapply(d, function(x) x[nrow(x) - 1:0,])

  res <- list( left = do.call(rbind, left_cols),
               right = do.call(rbind, right_cols),
               bands = do.call(rbind, d))

  text_y <- c((res$left$ymax + res$left$ymin)/2,
             (res$right$ymax + res$right$ymin)/2)
  text_x <- c(rep(width / 2, length(res$left$x)), 
              rep(100 - width/2, length(res$left$x)))
  text_labels <- paste0(round(c(res$left$ymax - res$left$ymin,
                         res$right$ymax - res$right$ymin), 3) * 100, "%")
  res$text <- data.frame(x = text_x, y = text_y, labels = text_labels)


alluvial <- function(yvar, xvar, width = 20)
  tab <- table(yvar, xvar)
  x_labs <- rownames(tab)
  y_labs <- colnames(tab)
  left <- tab[1,]/sum(tab[1,])
  left <- cumsum(sort(left))
  right <- tab[2,]/sum(tab[2,])
  right <- cumsum(sort(right))
  left_lower <- c(0, left[-length(left)])
  names(left_lower) <- names(left)
  right_lower <- c(0, right[-length(right)])
  names(right_lower) <- names(right)
  right <- right[match(names(left), names(right))]
  right_lower <- right_lower[match(names(left), names(right_lower))]
  df_list <- multi_ribbons(left_lower, right_lower, left, right, 
                           names(left), width = 20)
  ggplot(df_list$bands, aes(x = x, ymin = ymin, ymax = ymax, fill = group)) + 
    geom_ribbon(alpha = 0.5) +
    geom_ribbon(alpha = 1, data = df_list$left) +
    geom_ribbon(alpha = 1, data = df_list$right) +
    geom_text(data = df_list$text, inherit.aes = FALSE, colour = "white",
              aes(x = x, y = y, label = labels), size = 8) +
    geom_text(data = data.frame(x = c(width / 2, 100 - width /2), y = c(1.05, 1.05),
                         labels = factor(x_labs, levels = x_labs)),
              inherit.aes = FALSE,
              mapping = aes(x = x, y = y, label = labels), size = 12) +
    geom_text(data = data.frame(x = rep(-5, length(y_labs)), 
                                y = unique(df_list$text$y[1:(nrow(df_list$text)/2)]), 
                                labs = unique(df_list$bands$group)),
              mapping = aes(x = x, y = y, colour = labs, label = labs),
              inherit.aes = FALSE, size = 8, hjust = 1) +
    scale_fill_manual(values = c("#e64b35", "#806249", "#00a087", "#3c5488")) +
    scale_colour_manual(values = c("#e64b35", "#806249", "#00a087", "#3c5488")) +
    coord_cartesian(xlim = c(-15, 101)) +
    theme_void() + theme(legend.position = "none")


head(df, 20)
#>     condition    variable
#> 110   Disease      Immune
#> 149   Disease  Fibroblast
#> 133   Disease      Immune
#> 184   Disease Endothelial
#> 137   Disease      Immune
#> 200   Disease Endothelial
#> 30     Health      Immune
#> 11     Health      Immune
#> 63     Health  Fibroblast
#> 88     Health Endothelial
#> 42     Health  Fibroblast
#> 38     Health  Fibroblast
#> 106   Disease      Immune
#> 139   Disease      Immune
#> 6      Health  Epithelial
#> 21     Health      Immune
#> 27     Health      Immune
#> 181   Disease Endothelial
#> 95     Health Endothelial
#> 108   Disease      Immune


alluvial(df$condition, df$variable)


alluvial(sample(c(TRUE, FALSE), 200, replace = TRUE), 
         sample(LETTERS[1:4], 200, replace = TRUE))



alluvial(sample(c(TRUE, FALSE), 200, replace = TRUE), 
         sample(LETTERS[1:20], 200, replace = TRUE))

