首页 > 解决方案 > CNN和LSTM的组合用于时间序列数据

问题描述

我正在尝试运行 CNN(卷积神经网络)和 LSTM(长期短期记忆)的组合,但没有找到适合两者的数据的正确重塑。我认为 LSTM 需要 [samples, timesteps, features],但它在这里作为输入不起作用。

我收到一个错误:

ValueError: Negative dimension size caused by subtracting 3 from 1 for '{{node conv1d/conv1d/Conv2D}} = Conv2D[T=DT_FLOAT, data_format="NHWC", dilations=[1, 1, 1, 1], explicit_paddings=[], padding="VALID", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true](conv1d/conv1d/Reshape, conv1d/conv1d/ExpandDims_1)' with input shapes: [?,1,1,24], [1,3,24,64].

数据取自: https ://www.kaggle.com/berkeleyearth/climate-change-earth-surface-temperature-data

它的形状为:

日期 土地平均温度
1750-01-01 1.2
ETC..

完整的代码是:

import tensorflow as tf

def preprocessing(data,n_in=1, n_out=1):
from sklearn.model_selection import train_test_split


    def series_to_supervised(df, n_in=1, n_out=1, 
       dropnan=True):
       

        cols = list()
        # input sequence (t-n, ... t-1)
        for i in range(n_in, 0, -1):
          cols.append(df.shift(i))
        # forecast sequence (t, t+1, ... t+n)
        for i in range(0, n_out):
          cols.append(df.shift(-i))
        # put it all together
        agg = pd.concat(cols, axis=1)
        # drop rows with NaN values
        if dropnan:
          agg.dropna(inplace=True)
        return agg.values

land_temp = pd.DataFrame(data['LandAverageTemperature'].values)


ma_vals = data['LandAverageTemperature'].expanding(min_periods=12).mean()
ma_vals_inter = ma_vals.interpolate(limit_direction='both')

df = series_to_supervised(ma_vals_inter, n_in=n_in, n_out=n_out, dropnan=True)
df = pd.DataFrame(df)

X, y = df.iloc[:, :-n_out], df.iloc[:, -n_out:]

percent = 0.8
if n_out == 1:
    y = y.iloc[:, 0]
lim = int(percent * X.shape[0])

X_train, X_test, y_train, y_test = X[:lim], X[lim:], y[:lim], y[ lim:]  # train_test_split( X, y, test_size=0.4, random_state=0)
return X_train, X_test, y_train, y_test

def lstm_cnn2(X_train,y_train,config,n_in,n_out=1,batch_size=1,epochs=1000,verbose=0,n_features=1):
input_y = y_train.values.reshape(y_train.shape[0], 1)
n_timesteps, n_features, n_outputs = X_train.shape[0], 
X_train.shape[1], input_y.shape[1]
# reshape output into [samples, timesteps, features]
input_x = X_train.values.reshape((X_train.shape[0], 1, 
n_features))

# define model
model = Sequential()
model.add(Conv1D(64, 3, activation='relu', input_shape=(n_timesteps,1,n_features))) 
model.add(Conv1D(64, 3, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(RepeatVector(n_outputs))
model.add(LSTM(200, activation='relu', 
return_sequences=True))
model.add(TimeDistributed(Dense(100, 
activation='relu')))
model.add(TimeDistributed(Dense(1)))
model.compile(loss='mse', optimizer='adam')
# fit network
model.fit(input_x, input_y, epochs=epochs, 
batch_size=batch_size, verbose=verbose)
return model

if __name__ == '__main__':

file_location='./GlobalTemperatures.csv'
data = pd.read_csv(file_location)
data['dt'] = pd.to_datetime(data['dt'])
n_out = 1
n_in = 12 * 2

X_train, X_test, y_train, y_test = 
preprocessing(data,n_in,n_out)

config = 128, 64, 32, 3 * 48, 48, 24, 100, 20  # lstm 
 model configuration
verbose, epochs, batch_size = 0, 1, 16

model_lstm = lstm_cnn2(X_train, y_train, config, 
n_in,batch_size=batch_size)

标签: time-seriesconv-neural-networklstmforecasting

解决方案


推荐阅读