首页 > 解决方案 > 包裹在函数中的 geom_smooth() 值的子集

问题描述

我一直无法让我的函数在 R 中工作

这是我的测试数据:

df.summary <- structure(list(sample = structure(c(1L, 11L, 13L, 14L, 15L, 16L, 
17L, 18L, 19L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 12L), .Label = c("P1", 
"P10", "P11", "P12", "P13", "P14", "P15", "P16", "P18", "P19", 
"P2", "P20", "P3", "P4", "P5", "P6", "P7", "P8", "P9"), class = "factor"), 
    my_col1 = c(0.18933457306591, 0.235931461802108, 0.189103550993512, 
    0.125949595916727, 0.0534753960389538, 0.147040309859083, 
    0.0911609796692189, 0.175136203125972, 0.116254981602728, 
    0.133480302179393, 0.109994771038499, 0.149204159468607, 
    0.105682126016057, 0.0967607072540045, 0.172893104456964, 
    0.115091434919033, 0.0653509609616037, 0.113300972345115, 
    0.0801326785643683), my_col2 = structure(c(1L, 1L, 1L, 2L, 
    2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
    ), .Label = c("F", "M"), class = "factor"), my_col3 = c(0, 
    0, 0, 20.9715009722175, 13.3519208510716, 24.0257081096482, 
    19.2584928826721, 0, 0, 22.3923771843906, 16.6293335002717, 
    26.5622107372171, 0, 0, 0, 0, 0, 0, 0)), class = "data.frame", row.names = c(NA, 
-19L))

library(ggplot2)
## read data in 
## df.summary <- read.csv('data_test.csv',header = TRUE,sep=';', check.names = FALSE)

plot_correlation <- function(my_df, my_col1, my_col3, my_col2, output) {


  my_df[, my_col1] <- my_df[, my_col1] * 100

  lm_plot <- ggplot(my_df, aes(my_col1, my_col3)) +
    geom_point(data = my_df, aes(colour = my_col2), size = 2.5) +
    scale_color_manual(values=c("violetred1", "royalblue1", "gold")) +
    labs(x = "", y = "") +
    geom_abline(intercept = 0, slope = 1,linetype="dotted") +
    geom_smooth(data=subset(my_df, my_col2 == "M"),method="lm", color="royalblue1")

  my_output <- output
  ggsave(filename=my_output, plot=lm_plot,width = 9, height = 9, pointsize = 10)
}

plot_correlation(df.summary,'my_col1','my_col3','my_col2','test_outfig.pdf')

这段代码给了我这个情节: 在此处输入图像描述

当这段代码:

df.summary[,my_col1] <- df.summary[,my_col1]*100
ggplot(df.summary, aes(my_col1,my_col3)) +
  geom_point(data = df.summary, aes(colour = my_col2), size = 2.5) +
  scale_color_manual(values=c("violetred1", "royalblue1", "gold")) +
  labs(x = "", y = "") +
  geom_abline(intercept = 0, slope = 1,linetype="dotted") +
  geom_smooth(data=subset(df.summary, my_col2 == "M"), method="lm", color="royalblue1")

给了我这个情节(这正是我想要的):

在此处输入图像描述

看起来(也许我错了)在函数内部,R 无法链接我的 col 名称,我不知道哪个是正确的语法......

标签: rggplot2subset

解决方案


替换aesaes_string。您的代码可能会有些工作,因为变量名(my_col1等)正是变量值("my_col1"等)。由于您想使用函数参数指定列名,您需要使用 tidyeval或 use aes_string,它采用字符串值而不是不带引号的符号。

此外,没有理由复制outputmy_output函数体中。

library("ggplot2")
df.summary <- structure(list(sample = structure(c(1L, 11L, 13L, 14L, 15L, 16L, 
17L, 18L, 19L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 12L), .Label = c("P1", 
"P10", "P11", "P12", "P13", "P14", "P15", "P16", "P18", "P19", 
"P2", "P20", "P3", "P4", "P5", "P6", "P7", "P8", "P9"), class = "factor"), 
    my_col1 = c(0.18933457306591, 0.235931461802108, 0.189103550993512, 
    0.125949595916727, 0.0534753960389538, 0.147040309859083, 
    0.0911609796692189, 0.175136203125972, 0.116254981602728, 
    0.133480302179393, 0.109994771038499, 0.149204159468607, 
    0.105682126016057, 0.0967607072540045, 0.172893104456964, 
    0.115091434919033, 0.0653509609616037, 0.113300972345115, 
    0.0801326785643683), my_col2 = structure(c(1L, 1L, 1L, 2L, 
    2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
    ), .Label = c("F", "M"), class = "factor"), my_col3 = c(0, 
    0, 0, 20.9715009722175, 13.3519208510716, 24.0257081096482, 
    19.2584928826721, 0, 0, 22.3923771843906, 16.6293335002717, 
    26.5622107372171, 0, 0, 0, 0, 0, 0, 0)), class = "data.frame", row.names = c(NA, 
-19L))

plot_correlation <- function(my_df, my_col1, my_col3, my_col2) {
  my_df[, my_col1] <- my_df[, my_col1] * 100

  ggplot(my_df, aes_string(my_col1, my_col3)) +
    geom_point(data = my_df, aes(colour = my_col2), size = 2.5) +
    scale_color_manual(values=c("violetred1", "royalblue1", "gold")) +
    labs(x = "", y = "") +
    geom_abline(intercept = 0, slope = 1,linetype="dotted") +
    geom_smooth(data=subset(my_df, my_col2 == "M"),method="lm", color="royalblue1")
}
plot_correlation(df.summary,'my_col1','my_col3','my_col2')

reprex 包(v0.3.0)于 2019 年 12 月 16 日创建


推荐阅读