首页 > 解决方案 > 在确认所有因子变量都长于 2 个唯一值 [R] 后,glm 模型中的“对比度错误”消息:

问题描述

我不确定为什么在我的数据集中运行包含所有变量的基本模型时仍然收到此消息:

我的数据,带有匿名变量:

set.seed(1234)

#dput(df)
structure(list(outcome_1= structure(c(2L, 1L, 1L, 2L, 
1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 
NA, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 
1L, 2L, 1L, NA, 2L, 1L), .Label = c("0", "1"), class = "factor"), 
    outcome_2= structure(c(2L, 1L, 1L, 1L, 1L, 2L, 1L, 
    2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, NA, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 
    1L, 1L, NA, 2L, 1L), .Label = c("0", "1"), class = "factor"), 
    outcome_3= structure(c(2L, 1L, 1L, 1L, 1L, 2L, 1L, 
    2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, NA, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 
    1L, 1L, NA, 1L, 1L), .Label = c("0", "1"), class = "factor"), 
    bl_ep = c(16, 92, 10, 40, 19, 1, 16, 10, 22, 28, 8, 11, 6, 
    47, 12, 1, 9, 20, 2, 14, 72, 28, 5, 16, 61, 12, 24, 22, 44, 
    44, 16, 36, 62, 10, 16, 10, 89, 22, 5, 38, 8, 11), bl_days = c(12, 
    28, 10, 25, 19, 1, 10, 9, 13, 28, 4, 11, 6, 20, 12, 1, 8, 
    16, 2, 12, 27, 28, 5, 13, 24, 10, 18, 18, 16, 16, 10, 28, 
    22, 5, 15, 8, 28, 15, 5, 22, 7, 11), score_1 = c(11, 
    19, 17, 17, 12, 14, 8, 12, 14, 15, 14, 13, 12, 14, 15, 5, 
    11, 14, 14, 13, 16, 11, 11, 14, 20, 14, 12, 11, 17, 15, 14, 
    18, 15, 14, 12, 10, 17, 16, 11, 13, 18, 17), score_2 = c(1.1, 
    1.6, 1.6, 2.8, 1.9, 3.3, 4, 3.8, 1.8, 1.4, 2, 3.55, 1.6, 
    1.8, 2.4, 3.7, 1.4, 2.9, 3.55, 2.5, 1.6, 3.2, 3.5, 2.4, 3.1, 
    2.3, 3.8, 3.9, 1.1, 1.7, 2.3, 1.5, 1.9, 3.3, 3, 2.9, 1.6, 
    3.1, 3.7, 2.8, 1.2, 1.9), score_3 = c(1, 
    1.22222222222222, 1.11111111111111, 1.88888888888889, 1.44444444444444, 
    1.44444444444444, 3.22222222222222, 2.77777777777778, 1.11111111111111, 
    1, 1, 2.83333333333333, 1.22222222222222, 1.875, 1.55555555555556, 
    2.66666666666667, 1, 2.25, 1.72222222222222, 2.05555555555556, 
    1.22222222222222, 2, 2, 1.77777777777778, 1.33333333333333, 
    1.11111111111111, 2.5, 2.55555555555556, 1, 1.22222222222222, 
    1.77777777777778, 1.22222222222222, 2.44444444444444, 1.55555555555556, 
    1.77777777777778, 1.66666666666667, 1.11111111111111, 2.33333333333333, 
    2.88888888888889, 1.55555555555556, 1, 1.25), score_4 = c(1.31428571428571, 
    1.37142857142857, 1.08571428571429, 1.83809523809524, 1.37142857142857, 
    1.8952380952381, 4, 3.88571428571429, 3.02857142857143, 2.12222222222222, 
    1.43333333333333, 3.39047619047619, 1.74285714285714, 1.67619047619048, 
    2.02857142857143, 3.48571428571429, 1.24761904761905, 3.73333333333333, 
    3.08571428571429, 2.56666666666667, 1.74285714285714, 2.6952380952381, 
    3.45714285714286, 2.27619047619048, 1.9047619047619, 2.62857142857143, 
    3.74285714285714, 3.74285714285714, 1.24761904761905, 1.39047619047619, 
    1.83809523809524, 2.74285714285714, 4, 1.77142857142857, 
    3.42857142857143, 3.2, 1.65714285714286, 2.55238095238095, 
    2.38095238095238, 2.40952380952381, 2.07619047619048, 2.56666666666667
    ), score_5 = c(1, 1, 1, 1, 1.33333333333333, 
    1, 3.33333333333333, 3.66666666666667, 1.66666666666667, 
    1.66666666666667, 2, 2.5, 1.66666666666667, 1, 1.33333333333333, 
    3, 1, 1.66666666666667, 2.16666666666667, 2.16666666666667, 
    1.33333333333333, 2.66666666666667, 3, 2.66666666666667, 
    1.33333333333333, 2.66666666666667, 3, 1.33333333333333, 
    1, 1, 1, 1, 1, 1.33333333333333, 3, 3.66666666666667, 1.66666666666667, 
    1.33333333333333, 2.33333333333333, 1.66666666666667, 2, 
    2), sex = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 
    1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L), .Label = c("F", "M"), class = "factor"), age = c(64, 
    66, 51, 69, 60, 65, 65, 69, 50, 78, 75, 78, 35, 77, 69, 48, 
    65, 72, 60, 64, 78, 71, 58, 55, 55, 57, 81, 76, 56, 71, 56, 
    73, 69, 51, 43, 77, 31, 64, 69, 63, 38, 71), childbirth = structure(c(2L, 
    2L, 2L, 1L, 2L, 2L, 2L, NA, 2L, 2L, 2L, 2L, NA, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, NA, 1L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L), .Label = c("N", 
    "Y"), class = "factor"), x1= c(3, 2, 2, NA, 
    3, 2, 3, NA, 3, 3, 2, 2, NA, 2, 5, 2, 2, 2, 4, 3, 2, 2, 3, 
    NA, 2, 3, NA, NA, 2, 2, 2, 2, 2, 2, 3, 2, 1, NA, 2, 2, 1, 
    3), x2= c(0, 0, 0, NA, 1, 0, 0, NA, 0, 0, 
    0, 0, NA, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, NA, 0, 0, NA, NA, 
    0, 0, 0, 0, 0, 0, 0, 0, 1, NA, 0, 0, 0, 0), x3= structure(c(4L, 
    1L, 1L, 2L, 1L, 1L, 1L, NA, 4L, 1L, 1L, 4L, NA, 4L, 1L, 4L, 
    4L, 4L, 4L, 3L, 1L, 1L, 1L, 2L, 4L, 1L, NA, 2L, 1L, 4L, 1L, 
    1L, 4L, 4L, 1L, 4L, 4L, 2L, 4L, 4L, 4L, 1L), .Label = c("N", 
    "NA", "UNK", "Y"), class = "factor"), x4= structure(c(4L, 
    1L, 1L, 2L, 1L, 1L, 1L, NA, 1L, 1L, 4L, 1L, NA, 1L, 1L, 4L, 
    3L, 1L, 4L, 4L, 1L, 4L, 4L, 2L, 1L, 4L, NA, 2L, 4L, 1L, 4L, 
    1L, 1L, 4L, 4L, 1L, 4L, 2L, 4L, 1L, 4L, 4L), .Label = c("N", 
    "NA", "UNK", "Y"), class = "factor"), x5= structure(c(2L, 
    2L, 2L, 2L, 2L, 2L, 2L, NA, 2L, 2L, 2L, 2L, NA, 2L, 2L, 1L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, NA, 2L, 2L, 2L, 2L, 
    2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L), .Label = c("N", 
    "Y"), class = "factor"), x6= structure(c(2L, 2L, 2L, 1L, 
    1L, 2L, 2L, NA, 1L, 1L, 1L, 2L, NA, 2L, 2L, 1L, 2L, 2L, 1L, 
    2L, 2L, 2L, 1L, 1L, 2L, 2L, NA, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 
    1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L), .Label = c("N", "Y"), class = "factor"), 
    x7= structure(c(1L, 1L, 1L, 1L, 1L, 
    1L, 1L, NA, 1L, 1L, 1L, 1L, NA, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 3L, 1L, NA, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 
    3L, 1L, 1L, 1L, 1L, 2L, 3L), .Label = c("N", "NA", "Y"), class = "factor"), 
    x8= structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, NA, 1L, 
    2L, 2L, 2L, NA, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 
    1L, 1L, NA, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 
    2L, 2L, 2L), .Label = c("N", "Y"), class = "factor"), x9= structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 
    1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 
    1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("N", 
    "Y"), class = "factor"), x10= structure(c(1L, 2L, 2L, 
    1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 
    1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 
    1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L), .Label = c("N", "Y"), class = "factor"), 
    x11= structure(c(1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 
    2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 
    2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L), .Label = c("N", "Y"), class = "factor"), 
    x12= structure(c(1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 
    1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 
    1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
    1L, 1L, 1L, 1L), .Label = c("N", "Y"), class = "factor"), 
    x13= structure(c(2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 
    2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 
    2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 2L, 1L, 1L), .Label = c("N", "Y"), class = "factor"), 
    x14= c(41, 7, 8, 9, 7, 2, 1, 5, 9, 6, 6, 8, 
    14, 2, 4, NA, 11, 9, 31, 13, 8, 2, 11, 20, 8, 7, 6, 8, 2, 
    12, 32, 1, 2, 38, 10, 17, 5, 28, 31, 10, 3, 6), x15= structure(c(3L, 
    4L, 2L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 3L, 5L, 1L, 3L, 3L, 
    3L, 3L, 3L, 3L, 2L, 2L, 3L, 2L, 3L, 1L, 2L, 2L, 3L, 3L, 3L, 
    2L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L), .Label = c("IATRO", 
    "IDIO", "OBST", "OBST/IDIO", "TRAUM"), class = "factor"), 
    x16= structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 
    1L, 1L, 1L, 1L, 1L), .Label = c("N", "Y"), class = "factor"), 
    x17= structure(c(2L, 2L, 1L, 2L, 1L, 2L, 
    2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 
    2L, 1L, 1L, 1L, 1L, 1L), .Label = c("N", "Y"), class = "factor"), 
    x18= c(31.8, 20, 30.9, 23.3, 22.5, 23.1, 23.6, 25.9, 22.8, 
    25.2, 30.2, 23.4, 22.2, 29, 24.8, 32.7, 20.8, 28.5, 24.6, 
    23, 23.4, 21.1, 24.9, 18, 21.7, 27.6, 27, 29, 32.9, 26, 29.3, 
    27.1, 22.7, 19.7, 25, 22.3, 21.3, 17.5, 20.9, 20.1, 25.1, 
    22.1), x19= structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 
    1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 2L, 2L, 2L), .Label = c("No", "Yes"), class = "factor"), 
    x20 = structure(c(2L, 1L, 2L, 1L, 2L, 1L, 2L, 
    1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 
    1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 
    1L, 1L, 1L, 2L, 2L), .Label = c("NO", "YES"), class = "factor"), 
    x21= structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 
    1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 
    1L, 1L, 1L, 1L, 1L), .Label = c("NO", "YES"), class = "factor")), row.names = c(NA, 
-42L), class = c("tbl_df", "tbl", "data.frame"))

 logit1 <-glm(outcome_1~., data = df, family = "binomial")

这产生了 logit 模型的经典错误消息:

#Error in `contrasts<-`(`*tmp*`, value = contr.funs[1 + isOF[nn]]) : 
#  contrasts can be applied only to factors with 2 or more levels

好的,所以我去仔细检查所有因子变量确实有超过 1 个唯一值,并且可以验证:

sapply(lapply(df, unique), length)

返回显示 2 个或更多唯一值的所有变量。当我再次运行模型时仍然出现相同的错误消息。

我什至尝试运行我在网上找到的一种解决方案:

values_count <- sapply(lapply(df, unique), length)

logit1 <-
  lm(outcome_1~ ., df[ , values_count > 1])

这是怎么回事?我是否盲目地看到某个变量偷偷说它具有多个唯一值而没有?

谢谢!

标签: rglm

解决方案


回归适用于简单模型的提供数据,例如

logit1 <-glm(outcome_1~ sex + age, data = df, family = "binomial")

这是一个包含很多变量的小型数据集,即使它们存在,计算机也无法提取有意义的关系。从一些探索性数据图开始,并思考您的结果与其他变量之间的(生物)关系如何,以便提出可以用数据进行测试的假设。实际上,您认为哪些测量值实际上会影响患者的预后?


推荐阅读