r - 我想创建一个数值数据的箱线图,但不包括在另一列上标记为“0”的情况?
问题描述
我为单个因素制作了箱线图,如下所示:
ggplot(data = dataframe2, aes(x=factor(0), y = RPSdata$Survival.One.Year)) + geom_boxplot(...)
数据框很简单:
dataframe2 <- data.frame(RPSdata$Survival.One.Year)
我想制作相同的箱线图,但仅包括在 RPSdata$Survival.Complete.Sense 列中编码为“1”的案例
太感谢了!R新手非常感谢任何帮助
数据样本:
> dput(head(RPSdata, 5))
structure(list(ID.Rank = 1:5, ID.Participant = c("8571762481",
"7351340719", "7396795819", "3790978753", "6450996320"), Population.Risk = structure(c(1L,
2L, 3L, 2L, 2L), .Label = c("1", "2", "3", "4", "5", "6"), class = "factor"),
Personal.Risk = c(50, 60, 30, 40, 10), Comparative.Risk.Age = structure(c(2L,
NA, 3L, 4L, 3L), .Label = c("1", "2", "3", "4", "5"), class = "factor"),
Comparative.Risk.Current = structure(c(NA, 3L, 3L, NA, NA
), .Label = c("1", "2", "3", "4", "5"), class = "factor"),
Comparative.Risk.Ex = structure(c(2L, 3L, NA, NA, 3L), .Label = c("1",
"2", "3", "4", "5"), class = "factor"), Score.Exposure = structure(c(1L,
1L, 1L, 2L, 1L), .Label = c("1", "2", "4", "5"), class = "factor"),
RF.Age = structure(c(1L, NA, 1L, 1L, 2L), .Label = c("0",
"1", "2"), class = "factor"), RF.Pollution = structure(c(1L,
NA, 3L, 2L, 2L), .Label = c("0", "1", "2"), class = "factor"),
RF.Asbestos = structure(c(1L, NA, 1L, 1L, 1L), .Label = c("1",
"2"), class = "factor"), RF.Asthma = structure(c(2L, NA,
3L, 2L, 1L), .Label = c("0", "1", "2"), class = "factor"),
RF.BMI = structure(c(2L, NA, 1L, 2L, 3L), .Label = c("0",
"1", "2"), class = "factor"), RF.Gene = structure(c(2L, NA,
3L, 3L, 3L), .Label = c("0", "1", "2"), class = "factor"),
RF.COPD = structure(c(2L, NA, 2L, 2L, 2L), .Label = c("0",
"1", "2"), class = "factor"), RF.History = structure(c(2L,
NA, 1L, 1L, 2L), .Label = c("0", "1", "2"), class = "factor"),
RF.Diet = structure(c(3L, NA, 1L, 2L, 3L), .Label = c("0",
"1", "2"), class = "factor"), RF.Radon = structure(c(2L,
NA, 1L, 3L, 3L), .Label = c("0", "1", "2"), class = "factor"),
RF.Smoking = structure(c(2L, NA, 2L, 2L, 2L), .Label = c("0",
"1", "2"), class = "factor"), RF.Second.Smoke = structure(c(3L,
NA, 1L, 3L, 2L), .Label = c("0", "1", "2"), class = "factor"),
Survival.One.Year = c(80, 20, NA, NA, 90), Survival.Five.Year = c(60,
50, NA, 30, 50), Survival.Ten.Year = c(40, 20, NA, NA, 2),
Worry.Frequency = structure(c(1L, 3L, 1L, 1L, 1L), .Label = c("1",
"2", "3", "4"), class = "factor"), Worry.Intensity = structure(c(1L,
2L, 2L, 2L, 1L), .Label = c("1", "2", "3", "4"), class = "factor"),
Mental.Health.One = structure(c(1L, 3L, 2L, 1L, 1L), .Label = c("0",
"1", "2", "3"), class = "factor"), Mental.Health.Two = structure(c(1L,
2L, 2L, 1L, 1L), .Label = c("0", "1", "2", "3"), class = "factor"),
Mental.Health.Three = structure(c(1L, 1L, 1L, 1L, 1L), .Label = c("0",
"1", "2", "3"), class = "factor"), Mental.Health.Four = structure(c(2L,
2L, 1L, 1L, 1L), .Label = c("0", "1", "2", "3"), class = "factor"),
PHQ.4 = structure(c(2L, 5L, 3L, 1L, 1L), .Label = c("0",
"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11",
"12"), class = "factor"), PHQ4.Anx = structure(c(1L, 4L,
3L, 1L, 1L), .Label = c("0", "1", "2", "3", "4", "5", "6"
), class = "factor"), PHQ4.Dep = structure(c(2L, 2L, 1L,
1L, 1L), .Label = c("0", "1", "2", "3", "4", "5", "6"), class = "factor"),
PHQ4.Bin = structure(c(1L, 2L, 1L, 1L, 1L), .Label = c("0",
"1", "2", "3"), class = "factor"), Dep.Bin = structure(c(1L,
1L, 1L, 1L, 1L), .Label = c("0", "1"), class = "factor"),
Anx.Bin = structure(c(1L, 2L, 1L, 1L, 1L), .Label = c("0",
"1"), class = "factor"), Survival.Compelete.Sense = structure(c(2L,
1L, 1L, 1L, 2L), .Label = c("0", "1"), class = "factor"),
Survival.Semi.Sense = c(1L, 0L, 0L, 1L, 1L)), row.names = c(NA,
5L), class = "data.frame")
>
解决方案
鉴于问题描述,不需要第二个 data.frame,RPSdata
只需要一个。该问题通过在必须等于 的列上设置条件子集1
来解决。
library(ggplot2)
ggplot(data = subset(RPSdata, Survival.Complete.Sense == 1),
mapping = aes(x = Survival.Complete.Sense, y = Survival.One.Year)) +
geom_boxplot()
使用 package 的另一种选择dplyr
是filter
首先将结果通过管道传输到ggplot
. 我还强制x
轴列考虑因素。
library(dplyr)
library(ggplot2)
RPSdata %>%
filter(Survival.Complete.Sense == 1) %>%
mutate(Survival.Complete.Sense = factor(Survival.Complete.Sense)) %>%
ggplot(aes(Survival.Complete.Sense, Survival.One.Year)) +
geom_boxplot()
推荐阅读
- laravel - “找不到类 'ConsoleTVs\Charts\Facades\Charts'”
- java - SAP Hybris - Tomcat 忽略内存设置
- c# - visual studio C# listview 保存和加载问题
- android - 如何恢复在 IntentService 中创建和启动的 Mediarecorder 对象引用
- regex - Get the first character using Regex
- google-sheets - 我需要一个有多个条件的countif
- amazon-web-services - Powershell 写入 AWS S3
- reactjs - 为什么要在升级 Material UI 时升级 React
- mysql - 来自 MySQL 数据库的查询在 node.js 和 JSON.stringify() 中返回 [Object] [Object] 似乎不起作用
- docker - Docker如何将文件复制到Windows?