首页 > 解决方案 > 为什么 fct_inorder 没有按出现顺序显示我的因素?

问题描述

总的来说,我对 R 和编码非常陌生,所以请耐心等待!我到处寻找解决方案,但我尝试过的一切都没有奏效或我不明白,所以我很感激你能给我的任何帮助。

我正在尝试制作一个点图,使颜色对应于一系列倍数变化值,并且点的大小对应于 p 值。我首先传入一个包含四列基因、浓度、倍数变化和 p 值的 csv 文件。下面是一些示例数据和代码,我一直在尝试生成一个点图,其中 x 轴上的浓度、y 轴上的基因以及对应于倍数变化的每个点颜色(但我想指定对应的值范围如下图的一种颜色),点的大小是 p 值。

genes <- c('ABC', 'CDE', 'EFG', 'HIJ', 'JKL', 'LMN', 'ABC', 'CDE', 'EFG', 'HIJ', 'JKL', 'LMN', 'ABC', 'CDE', 'EFG', 'HIJ', 'JKL', 'LMN', 'ABC', 'CDE', 'EFG', 'HIJ', 'JKL', 'LMN', 'ABC', 'CDE', 'EFG', 'HIJ', 'JKL', 'LMN', 'ABC', 'CDE', 'EFG', 'HIJ', 'JKL', 'LMN')
concentration <- c('PR1.2', 'PR1.2','PR1.2', 'PR1.2','PR1.2', 'PR1.2','PR2.4', 'PR2.4','PR2.4', 'PR2.4','PR2.4','PR2.4', 'PR3.6', 'PR3.6','PR3.6', 'PR3.6','PR3.6','PR3.6','PR1.2T', 'PR1.2T','PR1.2T', 'PR1.2T','PR1.2T', 'PR1.2T', 'PR2.4T', 'PR2.4T','PR2.4T', 'PR2.4T','PR2.4T', 'PR2.4T', 'PR3.6T', 'PR3.6T','PR3.6T', 'PR3.6T','PR3.6T','PR3.6T')
foldchange <- c(577.19, 2.642, -697.90, 50.23, 12.582, -30.542, -15.376, 30.674, -1.973, -5.324, -132.761, 146.678, 500.19, 2.233, -656.90, 49.23, 13.582, -80.542, 577.19, 2.642, -697.90, 50.23, 12.582, -30.542, 577.19, 2.642, -697.90, 50.23, 12.582, -30.542, 577.19, 2.642, -697.90, 50.23, 12.582, -30.542)
pval <- c(4, 2, 2, 2, 3, 3, 2, 3,1,1,4,4, 4, 2, 2, 2, 3, 3)
data <- data.frame(genes, concentration, foldcahnge, pval)

f <- fct_inorder(genes)
g <- fct_inorder(concentration)

# now make dot plot using ggplot2 
p <- ggplot(data, aes(x=concentration, y=genes, size=pval)) + labs(y="Gene") +
     geom_point(alpha = 0.9) + geom_point(aes(colour = cut(foldchange), c(Inf, 500, 250, 
                                         100, 50, 25, 10, 5, 2, -2, 5, -10, -25, -50, 
                                         -100, -250, -500, -Inf)))) + 
     scale_color_manual(name = "fold change", values= c("(500, Inf]" = "firebrick4", 
                                                        "(250,500]" = "firebrick", 
                                                        "(100,250]" = "red3",
                                                        "(500,100] = "red2", 
                                                        "(25,50]" = "red",
                                                        "(10,25]" = "firebrick2", 
                                                        "(5,10]" = "firebrick1", 
                                                        "(2,5]" = "rosybrown1", 
                                                        "(-2,2]" = "gray98"
                                                        "(-5,-2]" = "lightskyblue",
                                                        "(-10,-5]" = "deepskyblue", 
                                                        "(-25,-10]" = "dodgerblue2", 
                                                        "(-50,-25]" = "dodgerblue4", 
                                                        "(-100,-50]" = "blue"
                                                        "(-250,-100]" = "blue3"
                                                        "(-500,-250]" = "darkblue"
                                                        "(-Inf,-500]" = "navy"),
                     labels = c("500", "250", "100", "50", "25", "10", "5", "2", "-2", "-5", "-10", "-25", "-50", "-100", "-250", "-500"))
p + scale_size(range = c(1,4), breaks = c(1,2,3,4), labels=c("pval >0.05", "0.01<pval<0.05", "0.001<pval<0.01", "pval<0.001")

在这里,我有相同浓度的相同药物,但在不同的细胞系上,浓度后没有任何内容或“T”细胞系用“T”表示。我遇到了一个问题,其中一个浓度不正常,但其他浓度都很好。当我运行它时,它返回一个点图,其中列的顺序是“PR1.2”、“PR2.4”、“PR2.4T”、“PR3.6”、“PR1.2T”、“PR3 .6T”。我希望它们按细胞系顺序排列:“PR1.2”、“PR2.4”、“PR3.6”、“PR1.2T”、“PR2.4T”、“PR3.6T”。谁能解释为什么我的浓度顺序不正确以及如何解决?提前致谢!

标签: rggplot2forcats

解决方案


您的代码中有一些拼写错误 - 如果您更正这些错误并进行一些小的更改,我认为您的代码按预期工作:

library(ggplot2)
library(forcats)

genes <- fct_inorder(
  f = c(
    'ABC',
    'CDE',
    'EFG',
    'HIJ',
    'JKL',
    'LMN',
    'ABC',
    'CDE',
    'EFG',
    'HIJ',
    'JKL',
    'LMN',
    'ABC',
    'CDE',
    'EFG',
    'HIJ',
    'JKL',
    'LMN',
    'ABC',
    'CDE',
    'EFG',
    'HIJ',
    'JKL',
    'LMN',
    'ABC',
    'CDE',
    'EFG',
    'HIJ',
    'JKL',
    'LMN',
    'ABC',
    'CDE',
    'EFG',
    'HIJ',
    'JKL',
    'LMN'
  ),
  ordered = TRUE
)
concentration <-
  fct_inorder(
    f = c(
      'PR1.2',
      'PR1.2',
      'PR1.2',
      'PR1.2',
      'PR1.2',
      'PR1.2',
      'PR2.4',
      'PR2.4',
      'PR2.4',
      'PR2.4',
      'PR2.4',
      'PR2.4',
      'PR3.6',
      'PR3.6',
      'PR3.6',
      'PR3.6',
      'PR3.6',
      'PR3.6',
      'PR1.2T',
      'PR1.2T',
      'PR1.2T',
      'PR1.2T',
      'PR1.2T',
      'PR1.2T',
      'PR2.4T',
      'PR2.4T',
      'PR2.4T',
      'PR2.4T',
      'PR2.4T',
      'PR2.4T',
      'PR3.6T',
      'PR3.6T',
      'PR3.6T',
      'PR3.6T',
      'PR3.6T',
      'PR3.6T'
    ),
    ordered = TRUE
  )
foldchange <-
  c(
    577.19,
    2.642,
    -697.90,
    50.23,
    12.582,
    -30.542,
    -15.376,
    30.674,
    -1.973,
    -5.324,
    -132.761,
    146.678,
    500.19,
    2.233,-656.90,
    49.23,
    13.582,
    -80.542,
    577.19,
    2.642,
    -697.90,
    50.23,
    12.582,
    -30.542,
    577.19,
    2.642,
    -697.90,
    50.23,
    12.582,
    -30.542,
    577.19,
    2.642,
    -697.90,
    50.23,
    12.582,
    -30.542
  )
pval <- c(4, 2, 2, 2, 3, 3, 2, 3, 1, 1, 4, 4, 4, 2, 2, 2, 3, 3)
data <- data.frame(genes, concentration, foldchange, pval)


# now make dot plot using ggplot2
p <- ggplot(data, aes(x = concentration, y = genes, size = pval)) +
  geom_point(alpha = 0.9) +
  geom_point(aes(colour = cut(foldchange,
                              breaks = c(
    500,
    250,
    100,
    50,
    25,
    10,
    5,
    2,
    -2,
    -5,
    -10,
    -25,
    -50,
    -100,
    -250,
    -500
  )))) +
  labs(y = "Gene") +
  scale_color_manual(
    name = "fold change",
    values = c(
      "(500, Inf]" = "firebrick4",
      "(250,500]" = "firebrick",
      "(100,250]" = "red3",
      "(500,100]" = "red2",
      "(25,50]" = "red",
      "(10,25]" = "firebrick2",
      "(5,10]" = "firebrick1",
      "(2,5]" = "rosybrown1",
      "(-2,2]" = "gray98",
      "(-5,-2]" = "lightskyblue",
      "(-10,-5]" = "deepskyblue",
      "(-25,-10]" = "dodgerblue2",
      "(-50,-25]" = "dodgerblue4",
      "(-100,-50]" = "blue",
      "(-250,-100]" = "blue3",
      "(-500,-250]" = "darkblue",
      "(-Inf,-500]" = "navy"),
    labels = c("500", "250", "100", "50", "25", "10", "5", "2", "-2", "-5", "-10", "-25", "-50", "-100", "-250", "-500"),
    breaks = c(500, 250, 100, 50, 25, 10, 5, 2, -2, -5, -10, -25, -50, -100, -250, -500))
p + scale_size(breaks = c(1,2,3,4), labels=c("pval >0.05", "0.01<pval<0.05", "0.001<pval<0.01", "pval<0.001"))

示例_1.png

由于格式的原因,很难在原始代码中找到拼写错误 - 如果您使用的是 RStudio,您可以使用 Menu>Code>Reformat Code / Menu>Code>Reindent lines 重新格式化/重新缩进您的代码,它使事情变得像缺少逗号更容易看到。

编辑

如果你想指定一个因子的顺序(例如“基因”),你可以使用 base Rfactor而不是 forcats fct_inorder。例如

genes <- factor(x = c(
    'ABC',
    'CDE',
    'EFG',
    'HIJ',
    'JKL',
    'LMN',
    'ABC',
    'CDE',
    'EFG',
    'HIJ',
    'JKL',
    'LMN',
    'ABC',
    'CDE',
    'EFG',
    'HIJ',
    'JKL',
    'LMN',
    'ABC',
    'CDE',
    'EFG',
    'HIJ',
    'JKL',
    'LMN',
    'ABC',
    'CDE',
    'EFG',
    'HIJ',
    'JKL',
    'LMN',
    'ABC',
    'CDE',
    'EFG',
    'HIJ',
    'JKL',
    'LMN'
  ), levels = c(
    'LMN',
    'JKL',
    'HIJ',
    'EFG',
    'CDE',
    'ABC'
  ),
  ordered = TRUE
)
concentration <-
  fct_inorder(
    f = c(
      'PR1.2',
      'PR1.2',
      'PR1.2',
      'PR1.2',
      'PR1.2',
      'PR1.2',
      'PR2.4',
      'PR2.4',
      'PR2.4',
      'PR2.4',
      'PR2.4',
      'PR2.4',
      'PR3.6',
      'PR3.6',
      'PR3.6',
      'PR3.6',
      'PR3.6',
      'PR3.6',
      'PR1.2T',
      'PR1.2T',
      'PR1.2T',
      'PR1.2T',
      'PR1.2T',
      'PR1.2T',
      'PR2.4T',
      'PR2.4T',
      'PR2.4T',
      'PR2.4T',
      'PR2.4T',
      'PR2.4T',
      'PR3.6T',
      'PR3.6T',
      'PR3.6T',
      'PR3.6T',
      'PR3.6T',
      'PR3.6T'
    ),
    ordered = TRUE
  )
foldchange <-
  c(
    577.19,
    2.642,
    -697.90,
    50.23,
    12.582,
    -30.542,
    -15.376,
    30.674,
    -1.973,
    -5.324,
    -132.761,
    146.678,
    500.19,
    2.233,-656.90,
    49.23,
    13.582,
    -80.542,
    577.19,
    2.642,
    -697.90,
    50.23,
    12.582,
    -30.542,
    577.19,
    2.642,
    -697.90,
    50.23,
    12.582,
    -30.542,
    577.19,
    2.642,
    -697.90,
    50.23,
    12.582,
    -30.542
  )
pval <- c(4, 2, 2, 2, 3, 3, 2, 3, 1, 1, 4, 4, 4, 2, 2, 2, 3, 3)
data <- data.frame(genes, concentration, foldchange, pval)


# now make dot plot using ggplot2
p <- ggplot(data, aes(x = concentration, y = genes, size = pval)) +
  geom_point(alpha = 0.9) +
  geom_point(aes(colour = cut(foldchange,
                              breaks = c(
    500,
    250,
    100,
    50,
    25,
    10,
    5,
    2,
    -2,
    -5,
    -10,
    -25,
    -50,
    -100,
    -250,
    -500
  )))) +
  labs(y = "Gene") +
  scale_color_manual(
    name = "fold change",
    values = c(
      "(500, Inf]" = "firebrick4",
      "(250,500]" = "firebrick",
      "(100,250]" = "red3",
      "(500,100]" = "red2",
      "(25,50]" = "red",
      "(10,25]" = "firebrick2",
      "(5,10]" = "firebrick1",
      "(2,5]" = "rosybrown1",
      "(-2,2]" = "gray98",
      "(-5,-2]" = "lightskyblue",
      "(-10,-5]" = "deepskyblue",
      "(-25,-10]" = "dodgerblue2",
      "(-50,-25]" = "dodgerblue4",
      "(-100,-50]" = "blue",
      "(-250,-100]" = "blue3",
      "(-500,-250]" = "darkblue",
      "(-Inf,-500]" = "navy"),
    labels = c("500", "250", "100", "50", "25", "10", "5", "2", "-2", "-5", "-10", "-25", "-50", "-100", "-250", "-500"),
    breaks = c(500, 250, 100, 50, 25, 10, 5, 2, -2, -5, -10, -25, -50, -100, -250, -500))
p + scale_size(breaks = c(1,2,3,4), labels=c("pval >0.05", "0.01<pval<0.05", "0.001<pval<0.01", "pval<0.001"))

示例_2.png


推荐阅读