r - 为什么 fct_inorder 没有按出现顺序显示我的因素?
问题描述
总的来说,我对 R 和编码非常陌生,所以请耐心等待!我到处寻找解决方案,但我尝试过的一切都没有奏效或我不明白,所以我很感激你能给我的任何帮助。
我正在尝试制作一个点图,使颜色对应于一系列倍数变化值,并且点的大小对应于 p 值。我首先传入一个包含四列基因、浓度、倍数变化和 p 值的 csv 文件。下面是一些示例数据和代码,我一直在尝试生成一个点图,其中 x 轴上的浓度、y 轴上的基因以及对应于倍数变化的每个点颜色(但我想指定对应的值范围如下图的一种颜色),点的大小是 p 值。
genes <- c('ABC', 'CDE', 'EFG', 'HIJ', 'JKL', 'LMN', 'ABC', 'CDE', 'EFG', 'HIJ', 'JKL', 'LMN', 'ABC', 'CDE', 'EFG', 'HIJ', 'JKL', 'LMN', 'ABC', 'CDE', 'EFG', 'HIJ', 'JKL', 'LMN', 'ABC', 'CDE', 'EFG', 'HIJ', 'JKL', 'LMN', 'ABC', 'CDE', 'EFG', 'HIJ', 'JKL', 'LMN')
concentration <- c('PR1.2', 'PR1.2','PR1.2', 'PR1.2','PR1.2', 'PR1.2','PR2.4', 'PR2.4','PR2.4', 'PR2.4','PR2.4','PR2.4', 'PR3.6', 'PR3.6','PR3.6', 'PR3.6','PR3.6','PR3.6','PR1.2T', 'PR1.2T','PR1.2T', 'PR1.2T','PR1.2T', 'PR1.2T', 'PR2.4T', 'PR2.4T','PR2.4T', 'PR2.4T','PR2.4T', 'PR2.4T', 'PR3.6T', 'PR3.6T','PR3.6T', 'PR3.6T','PR3.6T','PR3.6T')
foldchange <- c(577.19, 2.642, -697.90, 50.23, 12.582, -30.542, -15.376, 30.674, -1.973, -5.324, -132.761, 146.678, 500.19, 2.233, -656.90, 49.23, 13.582, -80.542, 577.19, 2.642, -697.90, 50.23, 12.582, -30.542, 577.19, 2.642, -697.90, 50.23, 12.582, -30.542, 577.19, 2.642, -697.90, 50.23, 12.582, -30.542)
pval <- c(4, 2, 2, 2, 3, 3, 2, 3,1,1,4,4, 4, 2, 2, 2, 3, 3)
data <- data.frame(genes, concentration, foldcahnge, pval)
f <- fct_inorder(genes)
g <- fct_inorder(concentration)
# now make dot plot using ggplot2
p <- ggplot(data, aes(x=concentration, y=genes, size=pval)) + labs(y="Gene") +
geom_point(alpha = 0.9) + geom_point(aes(colour = cut(foldchange), c(Inf, 500, 250,
100, 50, 25, 10, 5, 2, -2, 5, -10, -25, -50,
-100, -250, -500, -Inf)))) +
scale_color_manual(name = "fold change", values= c("(500, Inf]" = "firebrick4",
"(250,500]" = "firebrick",
"(100,250]" = "red3",
"(500,100] = "red2",
"(25,50]" = "red",
"(10,25]" = "firebrick2",
"(5,10]" = "firebrick1",
"(2,5]" = "rosybrown1",
"(-2,2]" = "gray98"
"(-5,-2]" = "lightskyblue",
"(-10,-5]" = "deepskyblue",
"(-25,-10]" = "dodgerblue2",
"(-50,-25]" = "dodgerblue4",
"(-100,-50]" = "blue"
"(-250,-100]" = "blue3"
"(-500,-250]" = "darkblue"
"(-Inf,-500]" = "navy"),
labels = c("500", "250", "100", "50", "25", "10", "5", "2", "-2", "-5", "-10", "-25", "-50", "-100", "-250", "-500"))
p + scale_size(range = c(1,4), breaks = c(1,2,3,4), labels=c("pval >0.05", "0.01<pval<0.05", "0.001<pval<0.01", "pval<0.001")
在这里,我有相同浓度的相同药物,但在不同的细胞系上,浓度后没有任何内容或“T”细胞系用“T”表示。我遇到了一个问题,其中一个浓度不正常,但其他浓度都很好。当我运行它时,它返回一个点图,其中列的顺序是“PR1.2”、“PR2.4”、“PR2.4T”、“PR3.6”、“PR1.2T”、“PR3 .6T”。我希望它们按细胞系顺序排列:“PR1.2”、“PR2.4”、“PR3.6”、“PR1.2T”、“PR2.4T”、“PR3.6T”。谁能解释为什么我的浓度顺序不正确以及如何解决?提前致谢!
解决方案
您的代码中有一些拼写错误 - 如果您更正这些错误并进行一些小的更改,我认为您的代码按预期工作:
library(ggplot2)
library(forcats)
genes <- fct_inorder(
f = c(
'ABC',
'CDE',
'EFG',
'HIJ',
'JKL',
'LMN',
'ABC',
'CDE',
'EFG',
'HIJ',
'JKL',
'LMN',
'ABC',
'CDE',
'EFG',
'HIJ',
'JKL',
'LMN',
'ABC',
'CDE',
'EFG',
'HIJ',
'JKL',
'LMN',
'ABC',
'CDE',
'EFG',
'HIJ',
'JKL',
'LMN',
'ABC',
'CDE',
'EFG',
'HIJ',
'JKL',
'LMN'
),
ordered = TRUE
)
concentration <-
fct_inorder(
f = c(
'PR1.2',
'PR1.2',
'PR1.2',
'PR1.2',
'PR1.2',
'PR1.2',
'PR2.4',
'PR2.4',
'PR2.4',
'PR2.4',
'PR2.4',
'PR2.4',
'PR3.6',
'PR3.6',
'PR3.6',
'PR3.6',
'PR3.6',
'PR3.6',
'PR1.2T',
'PR1.2T',
'PR1.2T',
'PR1.2T',
'PR1.2T',
'PR1.2T',
'PR2.4T',
'PR2.4T',
'PR2.4T',
'PR2.4T',
'PR2.4T',
'PR2.4T',
'PR3.6T',
'PR3.6T',
'PR3.6T',
'PR3.6T',
'PR3.6T',
'PR3.6T'
),
ordered = TRUE
)
foldchange <-
c(
577.19,
2.642,
-697.90,
50.23,
12.582,
-30.542,
-15.376,
30.674,
-1.973,
-5.324,
-132.761,
146.678,
500.19,
2.233,-656.90,
49.23,
13.582,
-80.542,
577.19,
2.642,
-697.90,
50.23,
12.582,
-30.542,
577.19,
2.642,
-697.90,
50.23,
12.582,
-30.542,
577.19,
2.642,
-697.90,
50.23,
12.582,
-30.542
)
pval <- c(4, 2, 2, 2, 3, 3, 2, 3, 1, 1, 4, 4, 4, 2, 2, 2, 3, 3)
data <- data.frame(genes, concentration, foldchange, pval)
# now make dot plot using ggplot2
p <- ggplot(data, aes(x = concentration, y = genes, size = pval)) +
geom_point(alpha = 0.9) +
geom_point(aes(colour = cut(foldchange,
breaks = c(
500,
250,
100,
50,
25,
10,
5,
2,
-2,
-5,
-10,
-25,
-50,
-100,
-250,
-500
)))) +
labs(y = "Gene") +
scale_color_manual(
name = "fold change",
values = c(
"(500, Inf]" = "firebrick4",
"(250,500]" = "firebrick",
"(100,250]" = "red3",
"(500,100]" = "red2",
"(25,50]" = "red",
"(10,25]" = "firebrick2",
"(5,10]" = "firebrick1",
"(2,5]" = "rosybrown1",
"(-2,2]" = "gray98",
"(-5,-2]" = "lightskyblue",
"(-10,-5]" = "deepskyblue",
"(-25,-10]" = "dodgerblue2",
"(-50,-25]" = "dodgerblue4",
"(-100,-50]" = "blue",
"(-250,-100]" = "blue3",
"(-500,-250]" = "darkblue",
"(-Inf,-500]" = "navy"),
labels = c("500", "250", "100", "50", "25", "10", "5", "2", "-2", "-5", "-10", "-25", "-50", "-100", "-250", "-500"),
breaks = c(500, 250, 100, 50, 25, 10, 5, 2, -2, -5, -10, -25, -50, -100, -250, -500))
p + scale_size(breaks = c(1,2,3,4), labels=c("pval >0.05", "0.01<pval<0.05", "0.001<pval<0.01", "pval<0.001"))
由于格式的原因,很难在原始代码中找到拼写错误 - 如果您使用的是 RStudio,您可以使用 Menu>Code>Reformat Code / Menu>Code>Reindent lines 重新格式化/重新缩进您的代码,它使事情变得像缺少逗号更容易看到。
编辑
如果你想指定一个因子的顺序(例如“基因”),你可以使用 base Rfactor
而不是 forcats fct_inorder
。例如
genes <- factor(x = c(
'ABC',
'CDE',
'EFG',
'HIJ',
'JKL',
'LMN',
'ABC',
'CDE',
'EFG',
'HIJ',
'JKL',
'LMN',
'ABC',
'CDE',
'EFG',
'HIJ',
'JKL',
'LMN',
'ABC',
'CDE',
'EFG',
'HIJ',
'JKL',
'LMN',
'ABC',
'CDE',
'EFG',
'HIJ',
'JKL',
'LMN',
'ABC',
'CDE',
'EFG',
'HIJ',
'JKL',
'LMN'
), levels = c(
'LMN',
'JKL',
'HIJ',
'EFG',
'CDE',
'ABC'
),
ordered = TRUE
)
concentration <-
fct_inorder(
f = c(
'PR1.2',
'PR1.2',
'PR1.2',
'PR1.2',
'PR1.2',
'PR1.2',
'PR2.4',
'PR2.4',
'PR2.4',
'PR2.4',
'PR2.4',
'PR2.4',
'PR3.6',
'PR3.6',
'PR3.6',
'PR3.6',
'PR3.6',
'PR3.6',
'PR1.2T',
'PR1.2T',
'PR1.2T',
'PR1.2T',
'PR1.2T',
'PR1.2T',
'PR2.4T',
'PR2.4T',
'PR2.4T',
'PR2.4T',
'PR2.4T',
'PR2.4T',
'PR3.6T',
'PR3.6T',
'PR3.6T',
'PR3.6T',
'PR3.6T',
'PR3.6T'
),
ordered = TRUE
)
foldchange <-
c(
577.19,
2.642,
-697.90,
50.23,
12.582,
-30.542,
-15.376,
30.674,
-1.973,
-5.324,
-132.761,
146.678,
500.19,
2.233,-656.90,
49.23,
13.582,
-80.542,
577.19,
2.642,
-697.90,
50.23,
12.582,
-30.542,
577.19,
2.642,
-697.90,
50.23,
12.582,
-30.542,
577.19,
2.642,
-697.90,
50.23,
12.582,
-30.542
)
pval <- c(4, 2, 2, 2, 3, 3, 2, 3, 1, 1, 4, 4, 4, 2, 2, 2, 3, 3)
data <- data.frame(genes, concentration, foldchange, pval)
# now make dot plot using ggplot2
p <- ggplot(data, aes(x = concentration, y = genes, size = pval)) +
geom_point(alpha = 0.9) +
geom_point(aes(colour = cut(foldchange,
breaks = c(
500,
250,
100,
50,
25,
10,
5,
2,
-2,
-5,
-10,
-25,
-50,
-100,
-250,
-500
)))) +
labs(y = "Gene") +
scale_color_manual(
name = "fold change",
values = c(
"(500, Inf]" = "firebrick4",
"(250,500]" = "firebrick",
"(100,250]" = "red3",
"(500,100]" = "red2",
"(25,50]" = "red",
"(10,25]" = "firebrick2",
"(5,10]" = "firebrick1",
"(2,5]" = "rosybrown1",
"(-2,2]" = "gray98",
"(-5,-2]" = "lightskyblue",
"(-10,-5]" = "deepskyblue",
"(-25,-10]" = "dodgerblue2",
"(-50,-25]" = "dodgerblue4",
"(-100,-50]" = "blue",
"(-250,-100]" = "blue3",
"(-500,-250]" = "darkblue",
"(-Inf,-500]" = "navy"),
labels = c("500", "250", "100", "50", "25", "10", "5", "2", "-2", "-5", "-10", "-25", "-50", "-100", "-250", "-500"),
breaks = c(500, 250, 100, 50, 25, 10, 5, 2, -2, -5, -10, -25, -50, -100, -250, -500))
p + scale_size(breaks = c(1,2,3,4), labels=c("pval >0.05", "0.01<pval<0.05", "0.001<pval<0.01", "pval<0.001"))
推荐阅读
- python - 有没有在边缘中间用箭头绘制边缘的功能?
- spring - Spring boot 无法构建 Hibernate SessionFactory 不显示错误
- windows - 在 Windows 上,Git Bash vs Windows Power Shell vs 命令提示符有什么区别
- javascript - CSS 滚动捕捉延迟多长时间?
- vba - 使用 Excel VBA 从 Iseries (AS400) 获取数据的宏
- sql-server - 仅选择完成所有其他活动的行
- c# - Linq 查询自引用对象以获取无子元素
- angularjs - 在 AngularJS 文件中使用常量
- r - R中的精确Cochran Q检验
- node.js - 如何在浏览器中查看我的 Compute Engine 应用?