首页 > 解决方案 > ggstatsplot 中的“提供的非有限值”

问题描述

我正在使用 ggstatsplot 来获得我的统计分析的可视化表示。

我有很多数据集,在构成上都非常相似。有些工作得很好,而有些则不行。data1 是一个工作示例,而 data2 不起作用。

 data1 <- structure(list(
     treatment = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 
     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
     3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 
     5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
     6L),
     .Label = c("negative_ctrl", "positive_ctrl", "treatmentA", "treatmentB", "treatmentC", "treatmentD"), class = "factor"),
     
     value = c(1.74501, 2.04001, 1.89501, 1.84001, 
     1.89501, 9.75001, 8.50001, 8.80001, 11.50001, 10.25001, 7.90001, 
     9.25001, 11.45001, 7.75001, 7.75001, 7.55001, 8.70001, 8.20001, 
     6.95001, 6.60001, 7.40001, 7.15001, 8.25001, 9.20001, 8.95001, 
     6.45001, 6.05001, 5.40001, 7.95001, 6.80001, 4.65001, 6.40001, 
     6.40001, 6.70001, 5.40001, 3.20001, 2.70001, 4.30001, 4.10001, 
     3.60001, 4.00001, 3.00001, 4.70001, 3.10001, 3.50001, 6.45001, 
     5.45001, 4.90001, 7.25001, 4.55001, 4.70001, 6.25001, 5.65001, 
     6.00001, 5.10001)),
     
     row.names = c(NA, -55L), class = c("tbl_df", "tbl", "data.frame"))

data2 <- structure(list(
     treatment = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 
     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
     4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 
     5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L),
     .Label = c("negative_ctrl", "positive_ctrl", "treatmentA", "treatmentB", "treatmentC", "treatmentD"), class = "factor"), 
    
     value = c(1.00001, 1.00001, 1.00001, 1.00001, 1.00001, 6.77501, 
     5.68751, 5.99201, 8.24501, 7.01251, 4.79501, 5.99126, 8.26276, 
     5.35376, 5.38751, 4.60251, 5.38901, 4.85201, 4.44401, 5.20501, 
     6.20701, 5.77001, 4.05201, 3.65126, 3.02401, 4.68351, 3.90001, 
     2.56951, 3.70001, 3.61901, 3.96401, 2.93601, 1.53901, 1.40801, 
     2.05601, 2.08501, 1.89701, 1.79501, 1.50001, 2.09151, 1.53551, 
     1.57501, 3.88851, 3.09151, 2.75501, 4.40626, 2.42001, 2.60951, 
     3.83501, 3.37151, 3.70001, 2.92701)),
     
     row.names = c(NA, -52L), class = c("tbl_df", "tbl", "data.frame"))

我将这两个数据集的最基本分析称为:

library(Rmpfr)
library(ggstatsplot)

ggstatsplot::ggbetweenstats(
     data = data1, 
     x = treatment, 
     y = value,
     messages = FALSE )

ggstatsplot::ggbetweenstats(
     data = data2, 
     x = treatment, 
     y = value,
     messages = FALSE )

对于 data1 我得到这个:

在此处输入图像描述

对于 data2 我得到:

> Error in stats::optim(par = 1.1 * rep(lambda, 2), fn = function(x) { : non-finite value supplied by optim

起初我认为问题可能是我在阴性对照中传递的几个零,但我首先将它们提高了一点点,然后提高了 1 以确保值的范围不是问题。我能看到的唯一差异是我在 data2 中只有 7 个而不是 10 个测量值,而在 data2 中进行了治疗 A(级别 3),但在 data1 中有 10 个(由于样本失败而不得不删除一些 NA)。然而,在这两种情况下,阴性对照(1 级)只有 5 个值,我不认为在这种类型的分析中,组之间的样本量不同存在问题。

标签: rggplot2

解决方案


在这些情况下尝试基本图是个好主意,例如隔离箱线图:

所以比较两个数据集:

boxplot(value ~ treatment, data=data1)
boxplot(value ~ treatment, data=data2)

data2具有无变异性 ( "negative_ctrl") 的处理,0 SD。我猜这个函数正在做一些需要变化的测试。您将需要阅读该函数的文档以查看是否出现此问题,但您可以通过删除这些处理或强制进行非常少量的变化来获取视图,例如

# run without negative_ctrl
ggstatsplot::ggbetweenstats(
  data = data2[data2$treatment != "negative_ctrl",], 
  x = treatment, 
  y = value,
  messages = FALSE )

# add some tiny fake variation to force it through (this is a hack)
data3 <- data2
data3[data3$treatment=="negative_ctrl",][1,][["value"]] <- 1.0001
ggstatsplot::ggbetweenstats(
  data = data3, 
  x = treatment, 
  y = value,
  messages = FALSE )

推荐阅读