首页 > 解决方案 > 多元输入多步 LSTM 时间序列预测模型的奇怪问题

问题描述

我根据教程(https://machinelearningmastery.com/how-to-develop-lstm-models-for-multi-step-time-series-forecasting )为我的数据集开发了多变量输入多步 LSTM 时间序列预测模型-of-house-power-consumption/ )。

然而,我遇到了一个非常奇怪的问题,即当我使用较小的样本(50 个训练样本,10 个测试样本)运行代码时,预测是正确的。但是当我使用完整样本(4000 个样本用于训练,1000 个样本用于测试)运行实验时,预测包含 NaN 值,这会导致错误。

然后,当我尝试缩放加 relu 激活函数加正则化如下代码时,我可以得到完整样本的预测(4000 个样本用于训练,1000 个样本用于测试),但预测仍然不正确,我想预测 96 步,但我预测的所有步骤都是相同的数字。

你能给出一个有用的建议来处理预测准确性问题吗?

import time
from math import sqrt
from numpy import split
from numpy import array
from pandas import read_csv
from sklearn.metrics import mean_squared_error
from matplotlib import pyplot
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import LSTM
from keras.layers import RepeatVector
from keras.layers import TimeDistributed
import csv
import numpy
from sklearn.preprocessing import MinMaxScaler
from numpy import save
from timeit import default_timer as timer

def scale(train, test):
    # fit scaler
    scaler = MinMaxScaler(feature_range=(-1, 1))
    train = train.astype(float)
    test = test.astype(float)
    scaler = scaler.fit(train)
    # transform train
    train = train.reshape(train.shape[0], train.shape[1])
    train_scaled = scaler.transform(train)
    # transform test
    test = test.reshape(test.shape[0], test.shape[1])
    test_scaled = scaler.transform(test)
    return scaler, train_scaled, test_scaled

# split a univariate dataset into train/test sets


def split_dataset(data):
    # split into standard weeks
    train, test = data[0:387030, 10:26], data[387030:433881, 10:26]
    # train, test = data[0:4850, 10:26], data[4850:5820, 10:26]
    # train, test = data[0:387030], data[387029:433880]
    # restructure into windows of weekly data
    # numpy.savetxt("test.csv", data[387030:433881, :], delimiter=",")
    # save('test.npy', data[387030:433881, :])
    scaler, train_scaled, test_scaled = scale(train, test)

    train_scaled = array(split(train_scaled, len(train_scaled) / 97))

    test_scaled = array(split(test_scaled, len(test_scaled) / 97))
    return scaler, train_scaled, test_scaled

# create a list of configs to try


def model_configs():
    # define scope of configs
    # n_input = [12]
    n_nodes = [100, 200, 300]
    n_epochs = [50, 100]
    n_batch = [64]
    # n_diff = [12]
    # create configs
    configs = list()
    # for i in n_input:
    for j in n_nodes:
        for k in n_epochs:
            for l in n_batch:
                cfg = [j, k, l]
                configs.append(cfg)
    print('Total configs: %d' % len(configs))
    return configs

# evaluate one or more weekly forecasts against expected values


def evaluate_forecasts(actual, predicted):
    scores = list()
    # calculate an RMSE score for each day
    for i in range(0, actual.shape[1], 97):
        # for i in range():
        # calculate mse
        mse = mean_squared_error(actual[:, i, :], predicted[:, i, :])
        # calculate rmse
        rmse = sqrt(mse)
        # store
        scores.append(rmse)
    # calculate overall RMSE
    s = 0
    for x in range(actual.shape[0]):
        for y in range(actual.shape[1]):
            for z in range(actual.shape[2]):
                s += (actual[x, y, z] - predicted[x, y, z])**2
    score = sqrt(s / (actual.shape[0] * actual.shape[1] * actual.shape[2]))
    return score, scores


# convert history into inputs and outputs


def to_supervised(train, n_steps_in, n_steps_out=97, overlop=97):
    # flatten data
    sequences = train.reshape(
        (train.shape[0] * train.shape[1], train.shape[2]))
    X, y = list(), list()
    for i in range(0, len(sequences), overlop):

        end_ix = i + n_steps_in

        out_end_ix = end_ix + n_steps_out
        # check if we are beyond the dataset
        if out_end_ix > len(sequences):
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequences[i:end_ix, :], sequences[end_ix:out_end_ix, :]
        X.append(seq_x)
        y.append(seq_y)

    return array(X), array(y)

# train the model


def build_model(train, n_input, config):
    # unpack config
    n_nodes, n_epochs, n_batch = config
    # prepare data
    train_x, train_y = to_supervised(train, n_input)
    # define parameters
    verbose, epochs, batch_size = 0, n_epochs, n_batch
    n_timesteps, n_features, n_outputs = train_x.shape[1], train_x.shape[2], train_y.shape[1]
    # reshape output into [samples, timesteps, features]
    train_y = train_y.reshape((train_y.shape[0], train_y.shape[1], n_features))
    # define model
    model = Sequential()
    model.add(
        LSTM(
            n_nodes,
            activation='relu',
            input_shape=(
                n_timesteps,
                n_features), recurrent_dropout=0.6))
    model.add(RepeatVector(n_outputs))
    model.add(LSTM(n_nodes, activation='relu', return_sequences=True, recurrent_dropout=0.6))
    model.add(TimeDistributed(Dense(n_nodes, activation='relu')))
    model.add(TimeDistributed(Dense(n_features)))
    model.compile(loss='mse', optimizer='adam')
    # fit network
    model.fit(
        train_x,
        train_y,
        epochs=epochs,
        batch_size=batch_size,
        verbose=verbose)
    return model

# make a forecast


def forecast(model, history, n_input):
    # flatten data
    data = array(history)
    data = data.reshape((data.shape[0] * data.shape[1], data.shape[2]))
    # retrieve last observations for input data
    input_x = data[-n_input:, :]
    # reshape into [1, n_input, n]
    input_x = input_x.reshape((1, input_x.shape[0], input_x.shape[1]))
    # forecast the next week
    yhat = model.predict(input_x, verbose=0)
    # we only want the vector forecast
    yhat = yhat[0]
    return yhat

# evaluate a single model


def evaluate_model(train, test, n_input, cfg):

    start = timer()

    # fit model
    model = build_model(train, n_input, cfg)

    # print("--- %s seconds ---" % (time.time() - start_time))
    # history is a list of weekly data
    history = [x for x in train]
    # walk-forward validation over each week
    predictions = list()

    for i in range(len(test)):
        # predict the week
        yhat_sequence = forecast(model, history, n_input)
        # store the predictions
        predictions.append(yhat_sequence)
        # get real observation and add to history for predicting the next week
        history.append(test[i, :])
    # evaluate predictions days for each week
    predictions = array(predictions)

    # invert scaling

    predictions = predictions.reshape(
        (predictions.shape[0] *
         predictions.shape[1],
         predictions.shape[2]))
    predictions = scaler.inverse_transform(predictions)

    test = test.reshape((test.shape[0] * test.shape[1], test.shape[2]))
    test = scaler.inverse_transform(test)


    predictions = array(split(predictions, len(predictions) / 97))

    test = array(split(test, len(test) / 97))
    score, scores = evaluate_forecasts(test, predictions)
    run_time = timer() - start
    return cfg[0], cfg[1], cfg[2], score, scores, run_time


# load the new file
dataset = read_csv(
    'data_preproccess_5.csv',
    header=0,
    index_col=0)
# split into train and test
scaler, train_scaled, test_scaled = split_dataset(dataset.values)
# evaluate model and get scores
n_input = 7 * 97

# model configs
cfg_list = model_configs()

scores = [
    evaluate_model(
        train_scaled,
        test_scaled,
        n_input,
        cfg) for cfg in cfg_list]

提供一些样本数据 样本数据

标签: tensorflowkerastime-serieslstmrecurrent-neural-network

解决方案


如果您有多步输出,您可以轻松地重塑您的预测并计算它。

我的拆分数据集

`trainX, trainY, testX, testY`

获取预测结果

`trainPredict = model.predict(trainX)`
`testPredict = model.predict(testX)`

重塑预测和真实价值

`trainY = trainY.reshape(-1, )`
`trainPredict = trainPredict.reshape(-1, )`

`testY = testY.reshape(-1, )`
`testPredict = testPredict.reshape(-1, )`

计算均方根误差

`print('Train Root mean squared error: {}'.format(math.sqrt(mean_squared_error(trainY, trainPredict))))`
`print('Test Root mean squared error: {}'.format(math.sqrt(mean_squared_error(testY, testPredict))))`

推荐阅读