首页 > 解决方案 > 使用 ggplot2 (R) 通过重复为箱线图中的单个数据点着色

问题描述

我正在尝试使用箱线图显示已重复 x 次的实验中的数据。我想显示每个单独的数据点。此外,为了帮助人们了解单个重复的效果,我想根据重复对数据点进行着色。
以下代码有效,但它不允许我为数据点着色。
一如既往地感谢任何帮助

下面的示例数据和代码

  #   conditions    repeats fold
  #  A  1   7.11415E-05
  #  B  1   0.094383838
  #  C  1   2.13914E-05
  #  D  1   1
  #  E  1   0.418905744
  #  F  1   0.62318476
  #  A  2   0.00069782
  #  B  2   0.006064881
  #  C  2   0.00090097
  #  D  2   1
  #  E  2   1.198034711
  #  F  2   1.20836637

library(ggplot2)

# Load data file from current working directory:
data <- read.delim("data_example.txt",header=T,sep="\t")
data
data$conditions <- factor(data$conditions,levels=c("A","B","C","D","E","F"))

cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

ggplot(data, aes(conditions, fold, hue=repeats)) + 
  geom_boxplot(outlier.size=0) + 
  geom_jitter( aes(conditions, fold),
              position=position_jitter(width=0,height=0),
              alpha=1,
              size=3,
              show_guide=FALSE) +
  ylab("Y-AXIS") +
  theme(axis.title.x=element_blank(),
        axis.title.y = element_text(face='bold',size=16,vjust=1),
        axis.text.x = element_text(face='bold',size=12,color='black'),
        axis.text.y = element_text(face='bold',size=14,color='black'),
        legend.position="none")

编辑:

> dput(data)
structure(list(conditions = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 
1L, 2L, 3L, 4L, 5L, 6L), .Label = c("A", "B", "C", "D", "E", 
"F"), class = "factor"), repeats = c(1L, 1L, 1L, 1L, 1L, 1L, 
2L, 2L, 2L, 2L, 2L, 2L), fold = c(7.11415e-05, 0.094383838, 2.13914e-05, 
1, 0.418905744, 0.62318476, 0.00069782, 0.006064881, 0.00090097, 
1, 1.198034711, 1.20836637)), .Names = c("conditions", "repeats", 
"fold"), row.names = c(NA, -12L), class = "data.frame")

编辑2:

我的代码给出:

在此处输入图像描述

添加“col=factor(repeats)”给出: 在此处输入图像描述

我想要的是(如果可能的话,允许我控制颜色): 在此处输入图像描述

标签: rggplot2boxplot

解决方案


这做我想要的......

library(ggplot2)

# Load data file from current working directory:
data <- read.delim("data_example.txt",header=T,sep="\t")

data$conditions <- factor(data$conditions,levels=c("luciferase","HPV18 E6-E7","luciferase + Dox","HPV18 E6-E7 + Dox","HPV18 E6TTL-E7 + Dox","HPV18 E6-E7TTL + Dox"))

cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

ggplot(data, aes(conditions, fold)) + 
  geom_boxplot(outlier.size=0, width = 0.3) + 
  geom_dotplot( aes(fill=factor(repeats)),
                binaxis='y', 
                stackdir='center',
                alpha=1,
                dotsize=0.5) +
  ylab("fold E6*I mRNA level") +
  theme(axis.title.x=element_blank(),
        axis.title.y = element_text(face='bold',size=16,vjust=1),
        axis.text.x = element_text(face='bold',size=12,color='black',angle = 90, hjust = 1),
        axis.text.y = element_text(face='bold',size=14,color='black'),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(), 
        axis.line = element_line(colour = "black")) +
  scale_fill_manual(values=cbbPalette) +
  guides(fill=guide_legend(title="Biological repeats")) +
  scale_y_continuous(breaks = seq(0, 1.2, by = 0.2), expand = c(0, 0)) +
  geom_hline(yintercept=1, linetype="dashed", color = "red")

在此处输入图像描述


推荐阅读