首页 > 解决方案 > 如何在R中使用带有外生变量的var进行预测

问题描述

我有以下数据:

library(data.table)
modelling_dt_train <- structure(list(`1` = c(54593L, 74481L, 85566L, 97637L, 101081L, 
                                             184089L, 158895L, 153780L, 153681L, 157188L, 142216L, 136437L, 
                                             135501L, 111264L, 123259L, 110397L, 146034L, 162900L, 132499L, 
                                             121516L, 119651L, 114045L, 112551L, 123209L, 134930L, 132147L, 
                                             151327L, 155666L, 158538L, 205766L, 200407L, 219588L, 231954L, 
                                             179884L, 159121L, 156148L, 136191L, 132956L, 202086L, 141047L, 
                                             118490L, 116595L, 127620L, 135962L, 137419L, 127334L, 158804L, 
                                             139142L, 181773L, 228278L, 272373L, 186666L, 148791L, 143608L, 
                                             169634L, 188149L, 239867L, 332543L, 253463L, 240574L, 237245L, 
                                             275466L, 262755L, 241538L, 303377L), 
                                     `2` = c(148181L, 186894L, 
                                             243357L, 227298L, 195640L, 412137L, 363152L, 355169L, 296208L, 
                                             328993L, 281652L, 308027L, 316254L, 249293L, 320821L, 220521L, 
                                             284411L, 263807L, 258093L, 261060L, 320153L, 311547L, 279734L, 
                                             258453L, 269697L, 313700L, 255285L, 232495L, 305346L, 393256L, 
                                             390655L, 527039L, 529056L, 450689L, 425190L, 372144L, 303765L, 
                                             324658L, 365035L, 285178L, 230985L, 251308L, 290378L, 279595L, 
                                             294676L, 391377L, 445682L, 364056L, 441207L, 516852L, 673401L, 
                                             415677L, 304000L, 266365L, 311924L, 314192L, 407313L, 664519L, 
                                             456920L, 384978L, 351644L, 432627L, 409624L, 386330L, 487679L
                                     ), 
                                     `3` = c(60217L, 66492L, 66675L, 76400L, 117252L, 264527L, 
                                             256384L, 241815L, 187115L, 193106L, 177620L, 140833L, 188291L, 
                                             110069L, 163581L, 107650L, 118319L, 118821L, 122383L, 117267L, 
                                             134962L, 121227L, 124952L, 111740L, 137493L, 163895L, 60653L, 
                                             69311L, 88810L, 128620L, 132077L, 153399L, 162989L, 151866L, 
                                             127325L, 122813L, 115284L, 103765L, 113185L, 101607L, 92379L, 
                                             98646L, 94376L, 98069L, 98972L, 103074L, 142199L, 123497L, 141823L, 
                                             205582L, 251187L, 109603L, 80711L, 80799L, 84175L, 104965L, 181221L, 
                                             245377L, 201378L, 235504L, 188925L, 214614L, 220312L, 191591L, 
                                             203292L)), 
                                .Names = c("1", "2", "3"), class = c("data.table", 
                                                                     "data.frame"), row.names = c(NA, -65L))

modelling_x_train <- structure(list(`1` = c(1982134L, 1968327L, 2019222L, 2025126L, 
                                            2033065L, 2188202L, 2066808L, 2070103L, 2041154L, 2201142L, 2105848L, 
                                            2067669L, 2005707L, 2239632L, 2435928L, 2363759L, 2444016L, 2556139L, 
                                            2807283L, 2674632L, 2687984L, 2889011L, 2839239L, 2712064L, 2928420L, 
                                            2889533L, 3106868L, 2746471L, 2953436L, 3225171L, 2926874L, 2914124L, 
                                            3210355L, 2847523L, 2890636L, 3268445L, 2941468L, 2931027L, 2906610L, 
                                            3222324L, 2833093L, 2978953L, 3196315L, 3055240L, 3210672L, 3368890L, 
                                            3046191L, 2960181L, 3341146L, 3227672L, 3062702L, 3197227L, 3445476L, 
                                            3441273L, 3651232L, 3566179L, 3619685L, 3716756L, 3600666L, 3732533L, 
                                            3695464L, 3857145L, 3700072L, 3608183L, 3904237L),
                                    `2` = c(4082316L, 
                                            4644387L, 5230567L, 5115720L, 4729153L, 5658227L, 5492034L, 5443022L, 
                                            5094415L, 5939637L, 5354626L, 5509783L, 5438960L, 4912936L, 5736293L, 
                                            5167632L, 5244341L, 5580274L, 5750346L, 5358527L, 5916955L, 6129790L, 
                                            5245982L, 5801479L, 5683117L, 5721551L, 6972176L, 7072498L, 7979325L, 
                                            8324202L, 7434885L, 8189438L, 8062609L, 7658496L, 8066643L, 8528136L, 
                                            7515745L, 8276800L, 8227022L, 6523804L, 5780869L, 6481060L, 6912797L, 
                                            6276934L, 6592158L, 6908732L, 6067945L, 6459707L, 6910377L, 6645470L, 
                                            6538196L, 6694136L, 7484290L, 7299620L, 8532078L, 7713988L, 7256825L, 
                                            8237839L, 7834919L, 7725377L, 7291804L, 8224205L, 7784470L, 7514557L, 
                                            8164590L), 
                                    `3` = c(3181556L, 3232260L, 3272852L, 3233534L, 2876956L, 
                                            2979204L, 3275916L, 3345278L, 2951867L, 2976889L, 3289397L, 2955148L, 
                                            3306653L, 1861934L, 2239827L, 2207356L, 2335514L, 2387791L, 2592206L, 
                                            2371527L, 2586856L, 2447660L, 2322218L, 2342827L, 2666258L, 2627928L, 
                                            2525534L, 2521129L, 2573991L, 2752528L, 2538251L, 2676848L, 2802139L, 
                                            2702108L, 2630417L, 2778233L, 2725544L, 2723849L, 2795745L, 1954820L, 
                                            1842684L, 2132844L, 2182141L, 2041725L, 2355857L, 2414334L, 2350885L, 
                                            2367547L, 2436918L, 2328244L, 2390647L, 2460700L, 3081623L, 2877487L, 
                                            3025104L, 3108909L, 3172441L, 3267766L, 3354357L, 3273165L, 3322516L, 
                                            3342817L, 3413854L, 3217624L, 2736617L)), 
                               .Names = c("1", "2", 
                                          "3"), class = c("data.table", "data.frame"), row.names = c(NA, 
                                                                                                     -65L))

其中modelling_dt_train是 3 个产品的时间序列,是相同modelling_x_train产品的外生变量(也是时间序列)。

我正在VAR使用以下代码估计模型

library(vars)
 x <- log(modelling_dt_train)
 x <- x[,lapply(.SD,function(x){ifelse(is.infinite(x),0,x)})]
 modelling_x_train <- log(modelling_x_train)
 modelling_x_train <- modelling_x_train[,lapply(.SD,function(x){ifelse(is.infinite(x),0,x)})]

x_mat <- as.matrix(x)
dx <- x_mat

var = VAR(dx, p=p,  exogen = modelling_x_train, season = 18)

到目前为止一切都很好,但是当我想预测 12 个周期的值时,使用

predict(var, newdata = modelling_dt_test, dumvar = modelling_x_test, n.ahead = 12)

我收到一个错误:

Error in predict.varest(var, newdata = modelling_dt_test, dumvar = modelling_x_test,  : 

Column names of dumvar do not coincide with exogen.

我使用的thenewdata和 the是以前相同产品的一些未来值,但时间更远dumvar

modelling_x_test <- structure(list(`1` = c(4447896L, 4779229L, 4628391L, 4737933L, 
                                           5102152L, 4838918L, 4955183L, 5258605L, 5084001L, 4798945L, 5204015L, 
                                           5129690L, 5101568L), 
                                   `2` = c(6108187L, 6733956L, 7065148L, 7111155L, 
                                           6513151L, 7622806L, 7062042L, 7206067L, 7144091L, 7412266L, 6752614L, 
                                           7705255L, 7487054L), 
                                   `3` = c(1716975L, 2022198L, 2122109L, 2155489L, 
                                           2428639L, 2433860L, 2717315L, 2471655L, 2795100L, 2908946L, 2581813L, 
                                           2633578L, 2666302L)),
                              .Names = c("1", "2", "3"), class = c("data.table", 
                                                                   "data.frame"), row.names = c(NA, -13L))

modelling_dt_test <- structure(list(`1` = c(244876L, 275993L, 256180L, 321256L, 316042L, 
                                            275097L, 250842L, 245543L, 233386L, 218958L, 254270L, 238804L, 
                                            234079L), 
                                    `2` = c(375278L, 429496L, 478816L, 532311L, 442922L, 
                                            485787L, 460750L, 501956L, 454178L, 425800L, 413112L, 434328L, 
                                            446069L), 
                                    `3` = c(119577L, 139870L, 127951L, 125017L, 138176L, 
                                            114517L, 129880L, 120941L, 159176L, 157890L, 149554L, 144210L, 
                                            165979L)), 
                               .Names = c("1", "2", "3"), class = c("data.table", 
                                                                    "data.frame"), row.names = c(NA, -13L))

编辑

在这里的源代码中,第 58 行有这个检查。但是这条线检查的是 if colnames(data.all)(which are "X1" "X2" "X3" "X1.l1" "X2.l1" "X3.l1" "const" "sd1" "sd2" "sd3" "sd4" "sd5" "sd6" "sd7" "sd8" "sd9" "sd10" "sd11" "sd12" "sd13" "sd14" "sd15" "sd16" "sd17" "X1.1" "X2.1" "X3.1",所以在我看来系数) 是否等于colnames(modelling_x_test)which are "1" "2" "3"(产品)。这些怎么可能相等

标签: rtime-seriesvarpredictionforecasting

解决方案


推荐阅读