首页 > 解决方案 > IndexError:在 Tensorflow 训练期间列出超出范围的索引

问题描述

我有一个简单的模型训练如下:

simple_lstm_model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(8, input_shape=x_train.shape[-2:]),
    tf.keras.layers.Dense(1)
])
simple_lstm_model.compile(optimizer='adam', loss='mae')
simple_lstm_model.summary()

epochs = 10

history = simple_lstm_model.fit(
    (x_train, y_train),
    epochs=epochs,
    validation_data=(x_test, y_test),
    validation_steps=50
)

这是我得到的错误:

Train on 494797 samples, validate on 164933 samples
Epoch 1/10
    32/494797 [..............................] - ETA: 1:22:12
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-24-36add9c522be> in <module>
     16     epochs=epochs,
     17     validation_data=(X_test, y_test),
---> 18     validation_steps=50
     19 )

~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    817         max_queue_size=max_queue_size,
    818         workers=workers,
--> 819         use_multiprocessing=use_multiprocessing)
    820
    821   def evaluate(self,

~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    340                 mode=ModeKeys.TRAIN,
    341                 training_context=training_context,
--> 342                 total_epochs=epochs)
    343             cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)
    344

~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)
    126         step=step, mode=mode, size=current_batch_size) as batch_logs:
    127       try:
--> 128         batch_outs = execution_function(iterator)
    129       except (StopIteration, errors.OutOfRangeError):
    130         # TODO(kaftan): File bug about tf function and errors.OutOfRangeError?

~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in execution_function(input_fn)
     96     # `numpy` translates Tensors to values in Eager mode.
     97     return nest.map_structure(_non_none_constant_value,
---> 98                               distributed_function(input_fn))
     99
    100   return execution_function

~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py in __call__(self, *args, **kwds)
    566         xla_context.Exit()
    567     else:
--> 568       result = self._call(*args, **kwds)
    569
    570     if tracing_count == self._get_tracing_count():

~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py in _call(self, *args, **kwds)
    604       # In this case we have not created variables on the first call. So we can
    605       # run the first trace but we should fail if variables are created.
--> 606       results = self._stateful_fn(*args, **kwds)
    607       if self._created_variables:
    608         raise ValueError("Creating variables on a non-first call to a function"

~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in __call__(self, *args, **kwargs)
   2360     """Calls a graph function specialized to the inputs."""
   2361     with self._lock:
-> 2362       graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
   2363     return graph_function._filtered_call(args, kwargs)  # pylint: disable=protected-access
   2364

~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in _maybe_define_function(self, args, kwargs)
   2701
   2702       self._function_cache.missed.add(call_context_key)
-> 2703       graph_function = self._create_graph_function(args, kwargs)
   2704       self._function_cache.primary[cache_key] = graph_function
   2705       return graph_function, args, kwargs

~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
   2591             arg_names=arg_names,
   2592             override_flat_arg_shapes=override_flat_arg_shapes,
-> 2593             capture_by_value=self._capture_by_value),
   2594         self._function_attributes,
   2595         # Tell the ConcreteFunction to clean up its graph once it goes out of

~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
    976                                           converted_func)
    977
--> 978       func_outputs = python_func(*func_args, **func_kwargs)
    979
    980       # invariant: `func_outputs` contains only Tensors, CompositeTensors,

~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py in wrapped_fn(*args, **kwds)
    437         # __wrapped__ allows AutoGraph to swap in a converted function. We give
    438         # the function a weak reference to itself to avoid a reference cycle.
--> 439         return weak_wrapped_fn().__wrapped__(*args, **kwds)
    440     weak_wrapped_fn = weakref.ref(wrapped_fn)
    441

~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in distributed_function(input_iterator)
     83     args = _prepare_feed_values(model, input_iterator, mode, strategy)
     84     outputs = strategy.experimental_run_v2(
---> 85         per_replica_function, args=args)
     86     # Out of PerReplica outputs reduce or pick values to return.
     87     all_outputs = dist_utils.unwrap_output_dict(

~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/distribute/distribute_lib.py in experimental_run_v2(self, fn, args, kwargs)
    761       fn = autograph.tf_convert(fn, ag_ctx.control_status_ctx(),
    762                                 convert_by_default=False)
--> 763       return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    764
    765   def reduce(self, reduce_op, value, axis):

~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/distribute/distribute_lib.py in call_for_each_replica(self, fn, args, kwargs)
   1817       kwargs = {}
   1818     with self._container_strategy().scope():
-> 1819       return self._call_for_each_replica(fn, args, kwargs)
   1820
   1821   def _call_for_each_replica(self, fn, args, kwargs):

~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/distribute/distribute_lib.py in _call_for_each_replica(self, fn, args, kwargs)
   2162         self._container_strategy(),
   2163         replica_id_in_sync_group=constant_op.constant(0, dtypes.int32)):
-> 2164       return fn(*args, **kwargs)
   2165
   2166   def _reduce_to(self, reduce_op, value, destinations):

~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/autograph/impl/api.py in wrapper(*args, **kwargs)
    290   def wrapper(*args, **kwargs):
    291     with ag_ctx.ControlStatusCtx(status=ag_ctx.Status.DISABLED):
--> 292       return func(*args, **kwargs)
    293
    294   if inspect.isfunction(func) or inspect.ismethod(func):

~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in train_on_batch(model, x, y, sample_weight, class_weight, reset_metrics, standalone)
    431       y,
    432       sample_weights=sample_weights,
--> 433       output_loss_metrics=model._output_loss_metrics)
    434
    435   if reset_metrics:

~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_eager.py in train_on_batch(model, inputs, targets, sample_weights, output_loss_metrics)
    310           sample_weights=sample_weights,
    311           training=True,
--> 312           output_loss_metrics=output_loss_metrics))
    313   if not isinstance(outs, list):
    314     outs = [outs]

~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_eager.py in _process_single_batch(model, inputs, targets, output_loss_metrics, sample_weights, training)
    251               output_loss_metrics=output_loss_metrics,
    252               sample_weights=sample_weights,
--> 253               training=training))
    254       if total_loss is None:
    255         raise ValueError('The model cannot be run '

~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_eager.py in _model_loss(model, inputs, targets, output_loss_metrics, sample_weights, training)
    165
    166         if hasattr(loss_fn, 'reduction'):
--> 167           per_sample_losses = loss_fn.call(targets[i], outs[i])
    168           weighted_losses = losses_utils.compute_weighted_loss(
    169               per_sample_losses,

IndexError: list index out of range

我所有的数据集似乎都是正确的,所以在这一点上,我不知道我的问题出在哪里。

以下是我的数据集中的一些样本(全长为 494797 个样本):

# x_train variable (each sample has 10 float numbers)
[[[ 831.259198    832.184729    832.85326538  833.14926758  833.21054993
    833.55166626  835.30004883  835.31390381  833.2000885   829.34616394]]

 [[ 826.76694946  826.03945923  827.20136719  827.67208862  826.49060669
    848.25384216  884.94091492  873.46332703  855.76559143  843.89425354]]

 [[ 839.61763     831.93538818  826.70728149  829.41572266  830.15981445
    829.41616211  829.06277466  828.33288574  829.6803772   829.8375061 ]]]
# y_train variable (only "0" or "1" values)
[[1]
 [1]
 [0]]

标签: pythontensorflowlstm

解决方案


您致电的格式Sequential.fit不正确;前两个参数应该是xand y,而不是 tuple (x, y),即:

history = simple_lstm_model.fit(
    x_train, y_train,
    epochs=epochs,
    validation_data=(x_test, y_test),
    validation_steps=50
)

推荐阅读