首页 > 解决方案 > 无法使用 lrm 函数拟合惩罚逻辑回归模型

问题描述

我正在使用rms库和lrm函数进行惩罚逻辑回归。看看我的数据:

> dput(cs_data_train[1:50,])
structure(list(DataCRMSanoflore.Year_Sales = structure(c(1L, 
2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 
2L), .Label = c("2015", "2016", "2017"), class = "factor"), DataCRMSanoflore.HOURS_INSCR = c(14L, 
18L, 17L, 16L, 11L, 22L, 23L, 17L, 9L, 21L, 18L, 19L, 12L, 11L, 
17L, 16L, 21L, 20L, 14L, 19L, 22L, 17L, 22L, 13L, 19L, 13L, 21L, 
16L, 23L, 19L, 11L, 21L, 11L, 22L, 20L, 13L, 11L, 17L, 15L, 12L, 
15L, 21L, 17L, 14L, 10L, 17L, 10L, 12L, 18L, 13L), DataCRMSanoflore.Month_Sales = structure(c(9L, 
2L, 5L, 9L, 4L, 7L, 3L, 9L, 7L, 12L, 3L, 3L, 12L, 3L, 3L, 6L, 
3L, 4L, 5L, 8L, 8L, 1L, 4L, 10L, 9L, 5L, 4L, 9L, 2L, 12L, 9L, 
4L, 4L, 3L, 6L, 8L, 6L, 4L, 12L, 5L, 6L, 9L, 7L, 9L, 1L, 9L, 
7L, 11L, 11L, 4L), .Label = c("01", "02", "03", "04", "05", "06", 
"07", "08", "09", "10", "11", "12"), class = "factor"), DataCRMSanoflore.Date_Sales = structure(c(3L, 
10L, 22L, 23L, 26L, 13L, 12L, 2L, 25L, 11L, 10L, 9L, 4L, 10L, 
18L, 9L, 9L, 1L, 14L, 24L, 4L, 2L, 2L, 22L, 17L, 4L, 14L, 22L, 
2L, 5L, 29L, 13L, 2L, 10L, 25L, 5L, 10L, 1L, 6L, 20L, 7L, 9L, 
1L, 3L, 17L, 22L, 3L, 9L, 20L, 13L), .Label = c("01", "02", "03", 
"04", "05", "06", "07", "08", "09", "10", "11", "12", "13", "14", 
"15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", 
"26", "27", "28", "29", "30", "31"), class = "factor"), DataCRMSanoflore.HOURS_INSCR.1 = c(14L, 
18L, 17L, 16L, 11L, 22L, 23L, 17L, 9L, 21L, 18L, 19L, 12L, 11L, 
17L, 16L, 21L, 20L, 14L, 19L, 22L, 17L, 22L, 13L, 19L, 13L, 21L, 
16L, 23L, 19L, 11L, 21L, 11L, 22L, 20L, 13L, 11L, 17L, 15L, 12L, 
15L, 21L, 17L, 14L, 10L, 17L, 10L, 12L, 18L, 13L), DataCRMSanoflore.Year_Creation_Sales = structure(c(1L, 
2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 
2L), .Label = c("2015", "2016", "2017"), class = "factor"), DataCRMSanoflore.Month_Creation_Sales = structure(c(9L, 
2L, 10L, 10L, 9L, 7L, 12L, 9L, 7L, 12L, 3L, 4L, 2L, 6L, 3L, 6L, 
10L, 4L, 5L, 8L, 3L, 1L, 4L, 11L, 9L, 5L, 4L, 9L, 2L, 12L, 10L, 
4L, 4L, 3L, 10L, 8L, 6L, 4L, 12L, 8L, 6L, 2L, 10L, 5L, 1L, 9L, 
8L, 11L, 11L, 4L), .Label = c("01", "02", "03", "04", "05", "06", 
"07", "08", "09", "10", "11", "12"), class = "factor"), DataCRMSanoflore.Day_Creation_Sales = structure(c(11L, 
15L, 2L, 31L, 26L, 23L, 5L, 2L, 25L, 16L, 10L, 13L, 7L, 3L, 18L, 
9L, 8L, 27L, 18L, 24L, 6L, 2L, 4L, 16L, 17L, 12L, 15L, 22L, 10L, 
5L, 1L, 14L, 2L, 10L, 5L, 5L, 10L, 25L, 6L, 5L, 28L, 8L, 10L, 
18L, 17L, 22L, 31L, 9L, 21L, 22L), .Label = c("01", "02", "03", 
"04", "05", "06", "07", "08", "09", "10", "11", "12", "13", "14", 
"15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", 
"26", "27", "28", "29", "30", "31"), class = "factor"), DataCRMSanoflore.Year_Validation_Sales = structure(c(1L, 
2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 
2L), .Label = c("2015", "2016", "2017"), class = "factor"), DataCRMSanoflore.Month_Validation_Sales = structure(c(9L, 
2L, 10L, 11L, 10L, 7L, 12L, 9L, 7L, 12L, 3L, 4L, 2L, 6L, 3L, 
6L, 10L, 4L, 5L, 8L, 3L, 1L, 4L, 11L, 9L, 5L, 4L, 9L, 2L, 12L, 
10L, 4L, 4L, 3L, 10L, 8L, 6L, 4L, 12L, 8L, 6L, 2L, 10L, 5L, 1L, 
9L, 9L, 11L, 11L, 4L), .Label = c("01", "02", "03", "04", "05", 
"06", "07", "08", "09", "10", "11", "12"), class = "factor"), 
    DataCRMSanoflore.Day_Validation_Sales = structure(c(14L, 
    16L, 3L, 3L, 1L, 27L, 6L, 5L, 27L, 21L, 19L, 27L, 8L, 5L, 
    21L, 10L, 9L, 30L, 26L, 27L, 7L, 4L, 15L, 17L, 18L, 13L, 
    20L, 29L, 11L, 7L, 2L, 16L, 3L, 20L, 6L, 6L, 13L, 29L, 8L, 
    6L, 30L, 9L, 12L, 20L, 18L, 29L, 1L, 10L, 23L, 25L), .Label = c("01", 
    "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", 
    "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", 
    "22", "23", "24", "25", "26", "27", "28", "29", "30", "31"
    ), class = "factor"), DataCRMSanoflore.AGE_CUSTUMER = c(37L, 
    23L, 34L, 32L, 45L, 52L, 44L, 55L, 37L, 29L, 33L, 29L, 30L, 
    37L, 56L, 48L, 44L, 42L, 45L, 33L, 37L, 53L, 55L, 60L, 57L, 
    33L, 51L, 32L, 35L, 54L, 41L, 47L, 59L, 33L, 45L, 35L, 36L, 
    28L, 42L, 24L, 32L, 39L, 33L, 36L, 49L, 56L, 45L, 39L, 54L, 
    55L), DataCRMSanoflore.MEAN_PURCHASE = c(71.75, 50.7142857142857, 
    18.6666666666667, 0, 0, 54.7, 0.666666666666667, 38, 6.5, 
    0, 83.3333333333333, 44.3333333333333, 25.7777777777778, 
    24.1818181818182, 23.3846153846154, 35.5294117647059, 21.6363636363636, 
    1.125, 6, 8.66666666666667, 18.4, 16.9285714285714, 0, 0, 
    36.5, 21.5, 18.5714285714286, 28.125, 101.333333333333, 0, 
    2, 0, 20.9166666666667, 69.1428571428571, 16.6666666666667, 
    1.5, 87.1666666666667, 48.25, 13.3333333333333, 20.5833333333333, 
    12, 0, 23, 15.1428571428571, 0, 30.4375, 30.3076923076923, 
    24.625, 23.4285714285714, 20.0833333333333), DataCRMSanoflore.NUMBER_GIFTS = c(1L, 
    1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 3L, 4L, 3L, 
    4L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 3L, 1L, 3L, 2L, 1L, 1L, 1L, 
    1L, 2L, 2L, 1L, 1L, 1L, 2L, 3L, 1L, 3L, 1L, 4L, 1L, 1L, 1L, 
    2L, 5L, 2L, 2L), SENSIBILITE = c(4L, 4L, 1L, 3L, 1L, 1L, 
    2L, 1L, 1L, 1L, 4L, 1L, 3L, 1L, 3L, 3L, 4L, 1L, 1L, 1L, 4L, 
    1L, 1L, 4L, 1L, 3L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 4L, 1L, 1L, 
    1L, 4L, 1L, 3L, 2L, 1L, 3L, 4L, 1L, 1L, 4L, 3L, 1L, 4L), 
    IMPERFECTIONS = c(4L, 3L, 1L, 2L, 1L, 1L, 4L, 1L, 1L, 1L, 
    3L, 1L, 2L, 1L, 3L, 2L, 3L, 1L, 1L, 1L, 3L, 1L, 1L, 3L, 1L, 
    3L, 3L, 3L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 3L, 1L, 2L, 
    3L, 1L, 2L, 2L, 1L, 1L, 3L, 3L, 1L, 3L), BRILLANCE = c(2L, 
    2L, 1L, 4L, 1L, 1L, 4L, 1L, 1L, 1L, 4L, 1L, 4L, 1L, 4L, 4L, 
    4L, 1L, 1L, 1L, 4L, 1L, 1L, 3L, 1L, 4L, 4L, 4L, 4L, 1L, 1L, 
    1L, 1L, 4L, 1L, 1L, 1L, 4L, 1L, 4L, 4L, 1L, 4L, 4L, 1L, 1L, 
    4L, 4L, 1L, 4L), GRAIN_PEAU = c(4L, 4L, 1L, 4L, 1L, 1L, 2L, 
    1L, 1L, 1L, 4L, 1L, 2L, 1L, 2L, 4L, 4L, 1L, 1L, 1L, 3L, 1L, 
    1L, 2L, 1L, 2L, 4L, 4L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 
    2L, 1L, 4L, 4L, 1L, 2L, 4L, 1L, 1L, 4L, 3L, 1L, 4L), RIDES_VISAGE = c(2L, 
    2L, 1L, 4L, 1L, 1L, 4L, 1L, 1L, 1L, 4L, 1L, 2L, 1L, 4L, 2L, 
    4L, 1L, 1L, 1L, 4L, 1L, 1L, 4L, 1L, 2L, 4L, 2L, 2L, 1L, 1L, 
    1L, 1L, 2L, 1L, 1L, 1L, 4L, 1L, 2L, 4L, 1L, 2L, 4L, 1L, 1L, 
    4L, 4L, 1L, 4L), ALLERGIES = c(2L, 2L, 1L, 2L, 1L, 1L, 2L, 
    1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 
    1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 
    2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 3L, 2L, 1L, 2L), MAINS = c(4L, 
    4L, 1L, 4L, 1L, 1L, 3L, 1L, 1L, 1L, 3L, 1L, 3L, 1L, 3L, 3L, 
    3L, 1L, 1L, 1L, 4L, 1L, 1L, 4L, 1L, 3L, 4L, 4L, 3L, 1L, 1L, 
    1L, 1L, 3L, 1L, 1L, 1L, 3L, 1L, 4L, 3L, 1L, 3L, 4L, 1L, 1L, 
    3L, 3L, 1L, 4L), PEAU_CORPS = c(3L, 3L, 1L, 2L, 1L, 1L, 2L, 
    1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 3L, 1L, 1L, 1L, 2L, 1L, 
    1L, 3L, 1L, 3L, 3L, 2L, 3L, 1L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 
    3L, 1L, 3L, 2L, 1L, 2L, 4L, 1L, 1L, 3L, 3L, 1L, 3L), INTERET_ALIM_NATURELLE = c(4L, 
    4L, 1L, 2L, 1L, 1L, 4L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 4L, 2L, 
    2L, 1L, 1L, 1L, 2L, 1L, 1L, 4L, 1L, 4L, 2L, 2L, 2L, 1L, 1L, 
    1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 4L, 4L, 1L, 4L, 2L, 1L, 1L, 
    4L, 2L, 1L, 2L), INTERET_ORIGINE_GEO = c(4L, 2L, 1L, 2L, 
    1L, 1L, 5L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 5L, 2L, 1L, 1L, 
    1L, 2L, 1L, 1L, 2L, 1L, 2L, 5L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 
    1L, 1L, 1L, 2L, 1L, 5L, 5L, 1L, 4L, 2L, 1L, 1L, 2L, 2L, 1L, 
    2L), INTERET_VACANCES = c(4L, 2L, 1L, 3L, 1L, 1L, 2L, 1L, 
    1L, 1L, 3L, 1L, 2L, 1L, 3L, 4L, 3L, 1L, 1L, 1L, 2L, 1L, 1L, 
    3L, 1L, 4L, 3L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 
    1L, 2L, 2L, 1L, 4L, 3L, 1L, 1L, 2L, 2L, 1L, 2L), INTERET_ENVIRONNEMENT = c(5L, 
    5L, 1L, 5L, 1L, 1L, 5L, 1L, 1L, 1L, 3L, 1L, 3L, 1L, 3L, 3L, 
    3L, 1L, 1L, 1L, 3L, 1L, 1L, 3L, 1L, 3L, 3L, 3L, 3L, 1L, 1L, 
    1L, 1L, 3L, 1L, 1L, 1L, 3L, 1L, 3L, 5L, 1L, 5L, 3L, 1L, 1L, 
    3L, 5L, 1L, 3L), INTERET_COMPOSITION = c(2L, 2L, 1L, 4L, 
    1L, 1L, 4L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 
    1L, 2L, 1L, 1L, 4L, 1L, 4L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 
    1L, 1L, 1L, 4L, 1L, 2L, 4L, 1L, 4L, 2L, 1L, 1L, 2L, 2L, 1L, 
    2L), DataCRMSanoflore.Nb_achats = c(4, 7, 3, 3, 4, 10, 3, 
    4, 14, 4, 6, 6, 9, 22, 26, 17, 22, 8, 3, 9, 10, 14, 3, 7, 
    12, 6, 14, 16, 3, 3, 3, 3, 12, 7, 3, 6, 6, 12, 18, 12, 15, 
    6, 21, 7, 6, 16, 13, 16, 14, 12), OUTCOME = structure(c(1L, 
    2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 
    1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L), .Label = c("0", "1"), class = "factor")), .Names = c("DataCRMSanoflore.Year_Sales", 
"DataCRMSanoflore.HOURS_INSCR", "DataCRMSanoflore.Month_Sales", 
"DataCRMSanoflore.Date_Sales", "DataCRMSanoflore.HOURS_INSCR.1", 
"DataCRMSanoflore.Year_Creation_Sales", "DataCRMSanoflore.Month_Creation_Sales", 
"DataCRMSanoflore.Day_Creation_Sales", "DataCRMSanoflore.Year_Validation_Sales", 
"DataCRMSanoflore.Month_Validation_Sales", "DataCRMSanoflore.Day_Validation_Sales", 
"DataCRMSanoflore.AGE_CUSTUMER", "DataCRMSanoflore.MEAN_PURCHASE", 
"DataCRMSanoflore.NUMBER_GIFTS", "SENSIBILITE", "IMPERFECTIONS", 
"BRILLANCE", "GRAIN_PEAU", "RIDES_VISAGE", "ALLERGIES", "MAINS", 
"PEAU_CORPS", "INTERET_ALIM_NATURELLE", "INTERET_ORIGINE_GEO", 
"INTERET_VACANCES", "INTERET_ENVIRONNEMENT", "INTERET_COMPOSITION", 
"DataCRMSanoflore.Nb_achats", "OUTCOME"), row.names = c(22L, 
33L, 40L, 48L, 54L, 59L, 74L, 78L, 87L, 89L, 104L, 115L, 121L, 
141L, 159L, 161L, 163L, 165L, 196L, 202L, 211L, 222L, 272L, 300L, 
318L, 325L, 327L, 349L, 374L, 380L, 392L, 393L, 394L, 398L, 427L, 
440L, 449L, 456L, 470L, 477L, 479L, 490L, 505L, 508L, 514L, 520L, 
528L, 531L, 534L, 543L), class = "data.frame")

然后,当我想使用此代码拟合模型时:

fit = lrm(OUTCOME ~ .-1,data = cs_data_train,x=T, y=T)

它给出了一个错误:

lrm.fit 中的奇异信息矩阵(rank= 148 )。违规变量:DataCRMSanoflore.HOURS_INSCR.1 lrm 中的错误(OUTCOME ~ . - 1, data = cs_data_train, x = T, y = T):无法使用“lrm.fit”拟合模型</p>

我搜索了但我无法解决这个问题。谢谢您的帮助!

编辑:

正如下面的评论中所说。我需要删除两个相关变量中的一个。所以我写了这段代码:

> highlyCorrelated <- findCorrelation(correlationMatrix, cutoff=(0.7),verbose = FALSE)
> print(highlyCorrelated)
 [1] 21 20 26 15 18 17 22 16 25 19 23 24  6  9  7 10 28  2
> important_var=colnames(DATA_BASE[,-highlyCorrelated])
> important_var
[1] "DataCRMSanoflore.Year_Sales"         "DataCRMSanoflore.Date_Sales"         "DataCRMSanoflore.HOURS_INSCR.1"     
[4] "DataCRMSanoflore.Day_Creation_Sales" "DataCRMSanoflore.MEAN_PURCHASE"      "OUTCOME"                            
> DATA_BASE<-DATA_BASE[,-highlyCorrelated]
> str(DATA_BASE)
'data.frame':   5775 obs. of  6 variables:
 $ DataCRMSanoflore.Year_Sales        : num  2 1 2 1 2 1 1 1 1 2 ...
 $ DataCRMSanoflore.Date_Sales        : num  13 3 10 22 23 26 13 1 12 2 ...
 $ DataCRMSanoflore.HOURS_INSCR.1     : num  17 14 18 17 16 11 22 14 23 17 ...
 $ DataCRMSanoflore.Day_Creation_Sales: num  13 11 15 2 31 26 23 1 5 2 ...
 $ DataCRMSanoflore.MEAN_PURCHASE     : num  0 71.8 50.7 18.7 0 ...
 $ OUTCOME                            : Factor w/ 2 levels "0","1": 1 1 2 1 1 1 2 2 1 1 ...

但是我得到了同样的错误

lrm(OUTCOME ~ . - 1, data = train, x = T, y = T) 中的错误:无法使用“lrm.fit”拟合模型</p>

这真的很奇怪!

请问我该如何解决?

标签: r

解决方案


推荐阅读