r - autoarima 训练和测试,在 r 中按 id 分组
问题描述
我正在尝试使用 auto.arima 预测时间序列。我需要的是拆分训练和测试数据以查看模型指标。我的日期范围在 2016 年 12 月到 2020 年 1 月之间。我需要将火车数据保存到 2018 年 12 月,然后再进行测试。
除此之外,我还需要每个 id 的度量 RMSE 和 MSE。这是我的数据示例:
x<- tibble::tribble(
~ID, ~Date, ~Value,
1L, "01-12-2016", 48L,
1L, "01-01-2017", 10055L,
1L, "01-02-2017", 650L,
1L, "01-03-2017", 8255L,
1L, "01-04-2017", 3680L,
1L, "01-05-2017", 2180L,
1L, "01-06-2017", 2790L,
1L, "01-07-2017", 3805L,
1L, "01-08-2017", 2811L,
1L, "01-09-2017", -225L,
1L, "01-10-2017", -232L,
1L, "01-11-2017", -243L,
1L, "01-12-2017", -217L,
1L, "01-01-2018", -256L,
1L, "01-02-2018", -277L,
1L, "01-03-2018", -3L,
1L, "01-04-2018", -247L,
1L, "01-05-2018", 88L,
1L, "01-06-2018", -260L,
1L, "01-07-2018", -228L,
1L, "01-08-2018", -285L,
1L, "01-09-2018", -321L,
1L, "01-10-2018", -265L,
1L, "01-11-2018", -302L,
1L, "01-12-2018", -11968L,
1L, "01-01-2019", 5435L,
1L, "01-02-2019", 6694L,
1L, "01-03-2019", 4750L,
1L, "01-04-2019", 3747L,
1L, "01-05-2019", 3727L,
1L, "01-06-2019", 3252L,
1L, "01-07-2019", 1691L,
1L, "01-08-2019", 2489L,
1L, "01-09-2019", -182L,
1L, "01-10-2019", 3926L,
1L, "01-11-2019", 326L,
1L, "01-12-2019", -1047L,
1L, "01-01-2020", 14L,
2L, "01-12-2016", -241L,
2L, "01-01-2017", -262L,
2L, "01-02-2017", -231L,
2L, "01-03-2017", -203L,
2L, "01-04-2017", -226L,
2L, "01-05-2017", -223L,
2L, "01-06-2017", -300L,
2L, "01-07-2017", -259L,
2L, "01-08-2017", -241L,
2L, "01-09-2017", -225L,
2L, "01-10-2017", -227L,
2L, "01-11-2017", -243L,
2L, "01-12-2017", -217L,
2L, "01-01-2018", -256L,
2L, "01-02-2018", -277L,
2L, "01-03-2018", 0L,
2L, "01-04-2018", -247L,
2L, "01-05-2018", -274L,
2L, "01-06-2018", -264L,
2L, "01-07-2018", -227L,
2L, "01-08-2018", -275L,
2L, "01-09-2018", -325L,
2L, "01-10-2018", -269L,
2L, "01-11-2018", -306L,
2L, "01-12-2018", -264L,
2L, "01-01-2019", -308L,
2L, "01-02-2019", -332L,
2L, "01-03-2019", -260L,
2L, "01-04-2019", -300L,
2L, "01-05-2019", -302L,
2L, "01-06-2019", -291L,
2L, "01-07-2019", -284L,
2L, "01-08-2019", -288L,
2L, "01-09-2019", -272L,
2L, "01-10-2019", 0L,
2L, "01-11-2019", 0L,
2L, "01-12-2019", -17107L,
2L, "01-01-2020", 3500L,
3L, "01-12-2016", 1940L,
3L, "01-01-2017", 1753L,
3L, "01-02-2017", 2758L,
3L, "01-03-2017", 2539L,
3L, "01-04-2017", -9078L,
3L, "01-05-2017", 5215L,
3L, "01-06-2017", 1796L,
3L, "01-07-2017", -8424L,
3L, "01-08-2017", 19868L,
3L, "01-09-2017", 10707L,
3L, "01-10-2017", 8985L,
3L, "01-11-2017", 3058L,
3L, "01-12-2017", 2469L,
3L, "01-01-2018", 21L,
3L, "01-02-2018", 1039L,
3L, "01-03-2018", 2875L,
3L, "01-04-2018", -2678L,
3L, "01-05-2018", 1515L,
3L, "01-06-2018", 2651L,
3L, "01-07-2018", -5014L,
3L, "01-08-2018", 299L,
3L, "01-09-2018", 1755L,
3L, "01-10-2018", 5009L,
3L, "01-11-2018", 2857L,
3L, "01-12-2018", 2909L,
3L, "01-01-2019", 1353L,
3L, "01-02-2019", 2337L,
3L, "01-03-2019", 3019L,
3L, "01-04-2019", -531L,
3L, "01-05-2019", -1055L,
3L, "01-06-2019", 1706L,
3L, "01-07-2019", -507L,
3L, "01-08-2019", 2234L,
3L, "01-09-2019", 890L,
3L, "01-10-2019", 94L,
3L, "01-11-2019", -1781L,
3L, "01-12-2019", 102590L,
3L, "01-01-2020", 471L,
4L, "01-12-2016", 2658L,
4L, "01-01-2017", 2344L,
4L, "01-02-2017", 2728L,
4L, "01-03-2017", -58L,
4L, "01-04-2017", -226L,
4L, "01-05-2017", -5L,
4L, "01-06-2017", -300L,
4L, "01-07-2017", -259L,
4L, "01-08-2017", -241L,
4L, "01-09-2017", -225L,
4L, "01-10-2017", -229L,
4L, "01-11-2017", -243L,
4L, "01-12-2017", -217L,
4L, "01-01-2018", -245L,
4L, "01-02-2018", -277L,
4L, "01-03-2018", -155L,
4L, "01-04-2018", 5437L,
4L, "01-05-2018", 2866L,
4L, "01-06-2018", 3091L,
4L, "01-07-2018", 3669L,
4L, "01-08-2018", 311L,
4L, "01-09-2018", 4120L,
4L, "01-10-2018", 2357L,
4L, "01-11-2018", -4759L,
4L, "01-12-2018", 4220L,
4L, "01-01-2019", 2730L,
4L, "01-02-2019", 2515L,
4L, "01-03-2019", 2560L,
4L, "01-04-2019", 2864L,
4L, "01-05-2019", 1935L,
4L, "01-06-2019", 938L,
4L, "01-07-2019", 3268L,
4L, "01-08-2019", 3232L,
4L, "01-09-2019", 3347L,
4L, "01-10-2019", 4241L,
4L, "01-11-2019", -247L,
4L, "01-12-2019", 179L,
4L, "01-01-2020", 2542L
)
x<-as.data.frame(x)
我尝试使用 dplyr:
x %>%
group_by(ID) %>%
do(fit=auto.arima(.$Value,seasonal = F,stepwise = F,approximation = F),
fit_forecast=forecast(auto.arima(.$Value,seasonal = F,stepwise = F,approximation = F),h=12))
但我不知道如何添加训练和测试步骤以及指标。有谁知道如何解决它?谢谢 !
解决方案
我已经在https://community.rstudio.com/t/autoarima-train-and-test-grouped-by-id-in-r/66400回答了同样的问题,但这里再次记录。
像这样使用 tsibble 和 fable 包更容易做到这一点。
library(dplyr)
library(tsibble)
library(lubridate)
library(fable)
# Turn x into a tsibble object
x <- x %>%
mutate(Date = yearmonth(dmy(Date))) %>%
as_tsibble(index = Date, key = ID)
# Use filter to create training set
# Then fit non-seasonal ARIMA models
fit <- x %>%
filter(Date <= yearmonth("2018 Dec")) %>%
model(ARIMA(Value ~ PDQ(0,0,0), stepwise=FALSE, approximation=FALSE))
# Now forecast the test set and compute RMSE and MSE
fit %>%
forecast(h = 13) %>%
accuracy(x) %>%
mutate(MSE = RMSE^2) %>%
select(ID, RMSE, MSE)
#> # A tibble: 4 x 3
#> ID RMSE MSE
#> <int> <dbl> <dbl>
#> 1 1 9696. 94011195.
#> 2 2 4792. 22964718.
#> 3 3 27899. 778326636.
#> 4 4 1776. 3153398.
由reprex 包(v0.3.0)于 2020-05-18 创建
推荐阅读
- oracle11g - 我有外键约束的问题
- python - 如何为每列制作具有不同行的多列
- javascript - 使用 javascript/Jquery 在 Gridview 中启用/禁用文本框的问题
- c# - 如何从另一个方法调用取消正在运行的方法。时间:2019-04-01 标签:c#.core 2.2
- javascript - 如何按元素值对对象数组进行排序?
- apache - AMPPS - 文件 /Applications/AMPPS/ampps/softaculous/includes/enduser.php 已损坏
- linux - 如何使用 .pgpass 文件中给出的密码连接到 postgres?
- bash - 从共享库文件中的 bash 函数中获取两个值
- python - Networkx:创建具有给定社区数量的有向无标度图
- scala - 当没有明确给出返回类型时,为什么可以用返回 String 的方法覆盖返回 Unit 的方法?