python - 如何使用 tf.keras 拟合 LSTM 模型
问题描述
我在尝试拟合我无法识别的模型时出错。
KeyError: 168
keras-2.6.0。张量流-2.6.0。我的数据是经过 onehotencoding 之后的,并且已经使用 minmaxscaler 进行了标准化。我没有任何 NaN 或 inf 值。我的代码示例如下:
x_train,x_test,y_train,y_test=train_test_split(features,target,test_size=0.2,random_state=123,shuffle=False)
from keras.preprocessing.sequence import TimeseriesGenerator
win_length = 168
batch_size = 32
num_features = 40
train_generator = TimeseriesGenerator(x_train,y_train,length=win_length,sampling_rate=1,batch_size=batch_size)
test_generator = TimeseriesGenerator(x_test,y_test,length=win_length,sampling_rate=1,batch_size=batch_size)
import tensorflow as tf
model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(128,input_shape=(win_length,num_features),return_sequences=True))
model.add(tf.keras.layers.LeakyReLU(alpha=0.5))
model.add(tf.keras.layers.LSTM(128,return_sequences=True))
model.add(tf.keras.layers.LeakyReLU(alpha=0.5))
model.add(tf.keras.layers.Dropout(0.3))
model.add(tf.keras.layers.LSTM(64,return_sequences=False))
model.add(tf.keras.layers.Dropout(0.3))
model.add(tf.keras.layers.Dense(1))
model.summary()
型号:“sequential_2”
层(类型)输出形状参数#
lstm_6 (LSTM) (无, 168, 128) 86528
leaky_re_lu_4 (LeakyReLU) (无, 168, 128) 0
lstm_7 (LSTM) (无, 168, 128) 131584
leaky_re_lu_5 (LeakyReLU) (无, 168, 128) 0
dropout_4(辍学)(无、168、128)0
lstm_8 (LSTM)(无,64)49408
dropout_5(辍学)(无,64)0
dense_2(密集)(无,1)65
总参数:267,585 可训练参数:267,585 不可训练参数:0
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, mode='min')
model.compile(loss=tf.losses.MeanSquaredError(),
optimizer=tf.optimizers.Adam(),
metrics=[tf.metrics.MeanAbsoluteError()])
history = model.fit(train_generator,epochs=50,validation_data=test_generator,shuffle=False,callbacks=[early_stopping])
然后我得到了错误:
Epoch 1/50
136/136 [==============================] - ETA: 0s - loss: 0.0014 - mean_absolute_error: 0.0243
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2897 try:
-> 2898 return self._engine.get_loc(casted_key)
2899 except KeyError as err:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 168
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
<ipython-input-86-b78994610c64> in <module>
3 optimizer=tf.optimizers.Adam(),
4 metrics=[tf.metrics.MeanAbsoluteError()])
----> 5 history = model.fit(train_generator,epochs=50,validation_data=test_generator,shuffle=False,callbacks=[early_stopping])
~/anaconda3/envs/python3/lib/python3.6/site-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1212 use_multiprocessing=use_multiprocessing,
1213 model=self,
-> 1214 steps_per_execution=self._steps_per_execution)
1215 val_logs = self.evaluate(
1216 x=val_x,
~/anaconda3/envs/python3/lib/python3.6/site-packages/keras/engine/data_adapter.py in get_data_handler(*args, **kwargs)
1381 if getattr(kwargs["model"], "_cluster_coordinator", None):
1382 return _ClusterCoordinatorDataHandler(*args, **kwargs)
-> 1383 return DataHandler(*args, **kwargs)
1384
1385
~/anaconda3/envs/python3/lib/python3.6/site-packages/keras/engine/data_adapter.py in __init__(self, x, y, sample_weight, batch_size, steps_per_epoch, initial_epoch, epochs, shuffle, class_weight, max_queue_size, workers, use_multiprocessing, model, steps_per_execution, distribute)
1148 use_multiprocessing=use_multiprocessing,
1149 distribution_strategy=tf.distribute.get_strategy(),
-> 1150 model=model)
1151
1152 strategy = tf.distribute.get_strategy()
~/anaconda3/envs/python3/lib/python3.6/site-packages/keras/engine/data_adapter.py in __init__(self, x, y, sample_weights, shuffle, workers, use_multiprocessing, max_queue_size, model, **kwargs)
922 max_queue_size=max_queue_size,
923 model=model,
--> 924 **kwargs)
925
926 @staticmethod
~/anaconda3/envs/python3/lib/python3.6/site-packages/keras/engine/data_adapter.py in __init__(self, x, y, sample_weights, workers, use_multiprocessing, max_queue_size, model, **kwargs)
792 # Since we have to know the dtype of the python generator when we build the
793 # dataset, we have to look at a batch to infer the structure.
--> 794 peek, x = self._peek_and_restore(x)
795 peek = self._standardize_batch(peek)
796 peek = _process_tensorlike(peek)
~/anaconda3/envs/python3/lib/python3.6/site-packages/keras/engine/data_adapter.py in _peek_and_restore(x)
926 @staticmethod
927 def _peek_and_restore(x):
--> 928 return x[0], x
929
930 def _handle_multiprocessing(self, x, workers, use_multiprocessing,
~/anaconda3/envs/python3/lib/python3.6/site-packages/keras_preprocessing/sequence.py in __getitem__(self, index)
372 samples = np.array([self.data[row - self.length:row:self.sampling_rate]
373 for row in rows])
--> 374 targets = np.array([self.targets[row] for row in rows])
375
376 if self.reverse:
~/anaconda3/envs/python3/lib/python3.6/site-packages/keras_preprocessing/sequence.py in <listcomp>(.0)
372 samples = np.array([self.data[row - self.length:row:self.sampling_rate]
373 for row in rows])
--> 374 targets = np.array([self.targets[row] for row in rows])
375
376 if self.reverse:
~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/series.py in __getitem__(self, key)
880
881 elif key_is_scalar:
--> 882 return self._get_value(key)
883
884 if is_hashable(key):
~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/series.py in _get_value(self, label, takeable)
988
989 # Similar to Index.get_value, but we do not fall back to positional
--> 990 loc = self.index.get_loc(label)
991 return self.index._get_values_for_loc(self, loc, label)
992
~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2898 return self._engine.get_loc(casted_key)
2899 except KeyError as err:
-> 2900 raise KeyError(key) from err
2901
2902 if tolerance is not None:
KeyError: 168
我的数据按小时汇总,168 是整周。如何正确拟合我的模型?
解决方案
如果没有数据,我最好的猜测是它正在使用y_test
看起来像pd.Series
. 拆分后,y_test
可能仍然有旧的索引值,target
因为y_test
它只是target
. 如果我是对的,那么您y_test
系列中的第一个/最小索引值可能是 ~17,512。您可以使用以下方法检验该假设:
print(y_test.index.min())
当它选择 168 时,它没有找到它,因为索引开始高约 17K。如果是这种情况,这应该解决它:
y_test = y_test.copy().reset_index(drop=True)
推荐阅读
- html - 如何在投票按钮下方定位投票计数?
- ios - “if #available(iOS 13.0, *)”是如何实现的,对运行时有什么影响?
- javascript - 使用 JavaScript 在 Html 中动态添加单选和下拉列表并返回数据
- c++ - glew和glfw的opengl初始化问题
- angular - Firebase 云消息传递 fcm_options.link 网址在点击时未打开
- java - 在 Websphere 8.0.0.13 中设置 Oracle JDBC 提供程序
- javascript - Vue.js - 点击跨度也点击输入。如何防止?
- javascript - 使用动态 URL 查询参数提供静态目录
- django - Django:覆盖表单。如何缩短字段的映射?
- javascript - 预处理 Javascript 以删除对象属性名称引号