首页 > 解决方案 > LSTM,多二进制数组输入和过拟合处理

问题描述

现在我正在研究一个空间环境模型,该模型使用最近 3 天的日冕洞信息来预测明天的最大 Kp 指数
(总数据量约为 4300 天。)

对于输入,使用了 3 个包含 136 个元素的数组(一个数组代表一天,因此是 3 天的数据)。例如,

inputArray_day1 = [0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
inputArray_day2 = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0] 
inputArray_day3 = [0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

输出是长度为 28 的单个 one-hot 向量,表示第 4 天的最大Kp 指数我使用下面的字典轻松地在Kp 索引和 one-hot 向量之间进行转换。

kp2idx = {0.0:0, 0.3:1, 0.7:2, 1.0:3, 1.3:4, 1.7:5, 2.0:6, 2.3:7, 2.7:8, 3.0:9, 3.3:10, 3.7:11, 4.0:12, 4.3:13,
       4.7:14, 5.0:15, 5.3:16, 5.7:17, 6.0:18, 6.3:19, 6.7:20, 7.0:21, 7.3:22, 7.7:23, 8.0:24, 8.3:25, 8.7:26, 9.0:27}
idx2kp = {0:0.0, 1:0.3, 2:0.7, 3:1.0, 4:1.3, 5:1.7, 6:2.0, 7:2.3, 8:2.7, 9:3.0, 10:3.3, 11:3.7, 12:4.0, 13:4.3,
       14:4.7, 15:5.0, 16:5.3, 17:5.7, 18:6.0, 19:6.3, 20:6.7, 21:7.0, 22:7.3, 23:7.7, 24:8.0, 25:8.3, 26:8.7, 27:9.0}

该模型包含两个带有 dropout 的 LSTM 层。

def fit_lstm2(X,Y,Xv,Yv, n_batch, nb_epoch, n_neu1, n_neu2, dropout):
  model = tf.keras.Sequential()
  model.add(tf.keras.layers.LSTM(n_neu1, batch_input_shape = (n_batch,X.shape[1],X.shape[2]), return_sequences=True))
  model.add(tf.keras.layers.Dropout(dropout))
  model.add(tf.keras.layers.LSTM(n_neu2))
  model.add(tf.keras.layers.Dropout(dropout))
  model.add(tf.keras.layers.Dense(28,activation='softmax'))
  model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy','mse'])

  for i in range(nb_epoch):
    print('epochs : ' + str(i))
    model.fit(X,Y, epochs=1, batch_size = n_batch, verbose=1, shuffle=False,callbacks=[custom_hist], validation_data = (Xv,Yv))
    model.reset_states()

  return model

我尝试了各种神经元数量和辍学率,例如

n_batch = 1
nb_epochs = 100
n_neu1 = [128,64,32,16]
n_neu2 = [64,32,16,8]
n_dropout = [0.2,0.4,0.6,0.8]

for dropout in n_dropout:
  for i in range(len(n_neu1)):
    model = fit_lstm2(x_train,y_train,x_val,y_val,n_batch, nb_epochs,n_neu1[i],n_neu2[i],dropout)

问题是预测准确率不会超过 10%,并且在初始化训练后很快就会开始过度拟合。

以下是一些训练历史的图片。(对不起传说的位置)

老实说,我不知道为什么验证准确度永远不会上升,而且过拟合开始得这么快。有没有更好的方法来使用输入数据?我的意思是,我应该标准化还是标准化输入?

请帮助我,任何意见和建议将不胜感激。

标签: pythonkerasnormalizationlstmmultiple-input

解决方案


推荐阅读