首页 > 解决方案 > 预测:在 R 中计算 MAPE 时,长度为 3 的列表没有意义

问题描述

在这个数据中

timeseries=structure(list(Data = structure(c(10L, 14L, 18L, 22L, 26L, 29L, 
                                             32L, 35L, 38L, 1L, 4L, 7L, 11L, 15L, 19L, 23L, 27L, 30L, 33L, 
                                             36L, 39L, 2L, 5L, 8L, 12L, 16L, 20L, 24L, 28L, 31L, 34L, 37L, 
                                             40L, 3L, 6L, 9L, 13L, 17L, 21L, 25L), .Label = c("01.01.2018", 
                                                                                              "01.01.2019", "01.01.2020", "01.02.2018", "01.02.2019", "01.02.2020", 
                                                                                              "01.03.2018", "01.03.2019", "01.03.2020", "01.04.2017", "01.04.2018", 
                                                                                              "01.04.2019", "01.04.2020", "01.05.2017", "01.05.2018", "01.05.2019", 
                                                                                              "01.05.2020", "01.06.2017", "01.06.2018", "01.06.2019", "01.06.2020", 
                                                                                              "01.07.2017", "01.07.2018", "01.07.2019", "01.07.2020", "01.08.2017", 
                                                                                              "01.08.2018", "01.08.2019", "01.09.2017", "01.09.2018", "01.09.2019", 
                                                                                              "01.10.2017", "01.10.2018", "01.10.2019", "01.11.2017", "01.11.2018", 
                                                                                              "01.11.2019", "01.12.2017", "01.12.2018", "01.12.2019"), class = "factor"), 
                          client = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                               1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                               2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                               1L), .Label = c("Horns", "Kornev"), class = "factor"), stuff = structure(c(1L, 
                                                                                                                          1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 
                                                                                                                          3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 
                                                                                                                          2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("chickens", 
                                                                                                                                                                          "hooves", "Oysters"), class = "factor"), Sales = c(374L, 
                                                                                                                                                                                                                             12L, 120L, 242L, 227L, 268L, 280L, 419L, 12L, 172L, 336L, 
                                                                                                                                                                                                                             117L, 108L, 150L, 90L, 117L, 116L, 146L, 120L, 211L, 213L, 
                                                                                                                                                                                                                             67L, 146L, 118L, 152L, 122L, 201L, 497L, 522L, 65L, 268L, 
                                                                                                                                                                                                                             441L, 247L, 348L, 445L, 477L, 62L, 226L, 476L, 306L)), .Names = c("Data", 
                                                                                                                                                                                                                                                                                               "client", "stuff", "Sales"), class = "data.frame", row.names = c(NA, 
                                                                                                                                                                                                                                                                                                                                                                -40L))

按组创建预测

# first the grouping variable
timeseries$group <- paste0(timeseries$client,timeseries$stuff)

# determine all groups
groups <- unique(timeseries$group)
# find starting date per group and save them as a list of elements c('YEAR','Month')
timeseries$date <- as.Date(as.character(timeseries$Data), '%d.%m.%Y')
timeseries <- timeseries[order(timeseries$date),]
start_dates <- format(timeseries$date[match(groups, timeseries$group)], "%Y %m")
start_dates <- strsplit(start_dates, ' ')


# now the list
listed <- split(timeseries,timeseries$group)
str(listed)
# Edited the lapply funcion in order to consider the starting dates
# to have a smaller output, I post the str(listed)

library("forecast")
library("lubridate")
listed_ts <- lapply(seq_along(listed),
                    function(k) ts(listed[[k]][["Sales"]], start = as.integer(start_dates[[k]]), frequency = 12)  ) 

listed_ts

listed_arima <- lapply(listed_ts,function(x) auto.arima(x,allowmean = F ))
#Now the forecast for each arima:
listed_forecast <- lapply(listed_arima,function(x) forecast(x,5) )
listed_forecast

do.call(rbind,listed_forecast)
lapply(listed_arima, fitted)

#As a side comment, note that the solution is equivalent to

lapply(listed_arima, function(x) fitted(x))
#For the same reason you may also use AIC Metrix

listed_arima <- lapply(listed_ts, auto.arima)

所以我想计算 MAPE 使用 library("MLmetrics")

让我们检查帮助

?MAPE(y_pred, y_true)

y_true是时间序列数据,y_predlapply(listed_arima, fitted)

所以我这样做

MAPE(lapply(listed_arima, fitted), timeseries)

并得到错误

Error in Ops.data.frame(y_true, y_pred) : list of length 3 not meaningful

怎么了?为什么我不能使用库中的这个函数计算 MAPE 指标(“MLmetric)?如何为每个组计算 MAPE?所以作为输出,我想要像我的示例中那样的数据框 在此处输入图像描述

如何达到这个输出?

标签: rdplyrtime-series

解决方案


你需要的是

mapply(MAPE, lapply(listed_arima, fitted), split(timeseries$Sales, timeseries$group))
# [1] 3.4659421 0.8926123 0.2577634

通过这种方式,我们适用于列表和MAPE的每一对元素。lapply(listed_arima, fitted)split(timeseries$Sales, timeseries$group)

问题与

MAPE(lapply(listed_arima, fitted), timeseries)

lapply(listed_arima, fitted)是一个列表,而第一个参数MAPE必须是一个向量,而且那timeseries是一个数据框,而不仅仅是一个列。


推荐阅读