首页 > 解决方案 > 为什么在 R 中使用 glmnet 时出现此错误

问题描述

当试图预测我得到这个错误

  error in evaluating the argument 'x' in selecting a method for function 'as.matrix': Cholmod error 'X and/or Y have wrong dimensions' at file ../MatrixOps/cholmod_sdmult.c, line 90

到目前为止,这是我的代码

library(data.table)
library(caret)
library(Metrics)
library(glmnet)
library(plotmo)
library(lubridate)

#Reading in the necessary data needed for this project

train <- fread("project/volume/data/processed/start_train.csv")
test<-fread("project/volume/data/processed/start_test.csv")
example_sub<-fread("project/volume/data/processed/example_submission.csv")
card_tab <- fread("project/volume/data/processed/card_tab.csv")


#Merging the card_tab dataset with both my train and test datasets to add more variables to each

train = merge(train, card_tab, by = "id")
test = merge(test, card_tab, by = "id")

train$power = as.numeric(train$power)
train$toughness = as.numeric(train$toughness)
test$power = as.numeric(test$power)
test$toughness = as.numeric(test$toughness)

train$powerovertough = train$power/train$toughness
test$powerovertough = test$power/test$toughness

train$current_date<-as_date(train$current_date)
train<-train[order(-current_date)]

test$current_date<-as_date(test$current_date)
test<-test[order(-current_date)]

#Handling NA values in both train and test. The NA values will be set to 0

train[is.na(train)] <- 0
test[is.na(test)] <- 0

# Specifying which columns of our model we will be dropping and applying it to our train and test datasets

drops<- c('id','future_date','current_date','card_name','power','loyalty','cmc','type','colors','mana_cost','subtypes', 'text','set','set_name')

train<-train[, !drops, with = FALSE]
test<-test[, !drops, with = FALSE]


#Saving the response variable in the train dataset

train_y<-train$future_price

test$future_price<-0

#Using dummies to train model

dummies <- dummyVars(future_price ~ ., data = train)
train<-predict(dummies, newdata = train)
test<-predict(dummies, newdata = test)

train<-data.table(train)
test<-data.table(test)

#Cross validating the model to fin the best lamda value

train<-as.matrix(train)
test<-as.matrix(test)

gl_model<-cv.glmnet(train, train_y, alpha = 1,family="gaussian")

bestlam<-gl_model$lambda.min

# Fitting the full model 

gl_model<-glmnet(train, train_y, alpha = 1,family="gaussian")

plot_glmnet(gl_model)

saveRDS(gl_model,"./project/volume/models/gl_model.model")

test<-as.matrix(test)

#use the full model
pred<-predict(gl_model,s=bestlam, newx = test)

我正在尝试为我的测试集预测 future_price。错误是说我的尺寸错误,但我不知道是什么导致它们不同。我尝试观察数据集,因为它在代码中运行,它们似乎具有相同的变量。

这是输入

> dput(head(train))
structure(c(0.25, 0.1, 0.1, 0.1, 0.25, 0.25, 1, 0, 0, 1, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 3, 0, 2, 0, 0, 0, 0.333333333333333, 
0, 1, 0), .Dim = c(6L, 20L), .Dimnames = list(NULL, c("current_price", 
"typesArtifact", "typesArtifact Creature", "typesCreature", "typesEnchantment", 
"typesEnchantment Artifact", "typesEnchantment Creature", "typesInstant", 
"typesLand", "typesPlaneswalker", "typesSorcery", "supertypes", 
"supertypesBasic", "supertypesLegendary", "rarityCommon", "rarityMythic", 
"rarityRare", "rarityUncommon", "toughness", "powerovertough"
)))
> dput(head(test))
structure(c(0.15, 0.16, 2, 0.39, 0.16, 0.19, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 
0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 
1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 
1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0), .Dim = c(6L, 18L), .Dimnames = list(
    NULL, c("current_price", "typesArtifact", "typesArtifact Creature", 
    "typesCreature", "typesEnchantment", "typesInstant", "typesLand", 
    "typesPlaneswalker", "typesSorcery", "supertypes", "supertypesBasic", 
    "supertypesLegendary", "rarityCommon", "rarityMythic", "rarityRare", 
    "rarityUncommon", "toughness", "powerovertough")))

标签: rmachine-learning

解决方案


推荐阅读