首页 > 解决方案 > TensorFlow 嵌入层触发 InvalidArgumentError

问题描述

我正在为推荐系统使用两塔神经网络。我解释错误日志的方式是,在训练数据集的第 32 行第 0 列上,它找到了一个不适合 [0, 853515) 范围的值 329416640。我完全不知道 329416640 这个值对应什么。

我使用以下数据集。

在此处输入图像描述

数据集已被拆分为 80/20 的火车和测试拆分。训练数据形状:(4728919, 3) 测试数据形状:(1182230, 3)

然后我确定我的 vocab_lengths 如下:

client_vocab_len = (int(client_data.CLI_ID.nunique()))
libelle_vocab_len = (int(items_data.prod_id.nunique()))
print(client_vocab_len) # = 853514
print(libelle_vocab_len) # = 1484

我的模型架构如下所示:

#user input network
input_clients = tf.layers.Input(shape=[1])
dense = tf.layers.Dense(32, activation="relu")
yy = dense(input_clients)
embed_clients = tf.layers.Embedding(client_vocab_len+1, 15)(yy)
clients_out = tf.layers.Flatten()(embed_clients)

#product input network
input_products = tf.layers.Input(shape=[1])
dense = tf.layers.Dense(32, activation="relu")
xx = dense(input_products)
embed_products = tf.layers.Embedding(libelle_vocab_len+1, 15)(xx)
products_out = tf.layers.Flatten()(embed_products)


conc_layer = tf.layers.Concatenate()([clients_out, products_out])
x = tf.layers.Dense(128, activation='relu')(conc_layer)
#x = tf.layers.Dropout(0.3)(x)
x = tf.layers.Dense(128, activation='relu')(x)
x_out = x = tf.layers.Dense(1, activation='relu')(x)
model = tf.Model([input_clients, input_products], x_out)

在此处输入图像描述

当我尝试训练我的模型时,我收到以下错误日志:

# Create a callback to stop the model if overfits
early_stop = EarlyStopping(monitor='val_loss',patience=3) 
lr_reduce = ReduceLROnPlateau(monitor='val_loss',
                              factor=0.75,
                              patience=2,
                              verbose=2,
                              mode='auto')


hist = model.fit([Xtrain.CLI_ID, Xtrain.prod_id], Xtrain.QTY, 
                 batch_size=128, 
                 epochs=15, 
                 verbose=1,
                 validation_data=([Xtest.CLI_ID, Xtest.prod_id], Xtest.QTY),
                 callbacks=[early_stop,lr_reduce,PlotLossesKeras()])



---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
<ipython-input-166-d767717c6437> in <module>
      8 
      9 
---> 10 hist = model.fit([Xtrain.CLI_ID, Xtrain.prod_id], Xtrain.QTY, 
     11                  batch_size=128,
     12                  epochs=15,

~/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
   1182                 _r=1):
   1183               callbacks.on_train_batch_begin(step)
-> 1184               tmp_logs = self.train_function(iterator)
   1185               if data_handler.should_sync:
   1186                 context.async_wait()

~/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds)
    883 
    884       with OptionalXlaContext(self._jit_compile):
--> 885         result = self._call(*args, **kwds)
    886 
    887       new_tracing_count = self.experimental_get_tracing_count()

~/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds)
    948         # Lifting succeeded, so variables are initialized and we can run the
    949         # stateless function.
--> 950         return self._stateless_fn(*args, **kwds)
    951     else:
    952       _, _, _, filtered_flat_args = \

~/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/function.py in __call__(self, *args, **kwargs)
   3037       (graph_function,
   3038        filtered_flat_args) = self._maybe_define_function(args, kwargs)
-> 3039     return graph_function._call_flat(
   3040         filtered_flat_args, captured_inputs=graph_function.captured_inputs)  # pylint: disable=protected-access
   3041 

~/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
   1961         and executing_eagerly):
   1962       # No tape is watching; skip to running the function.
-> 1963       return self._build_call_outputs(self._inference_function.call(
   1964           ctx, args, cancellation_manager=cancellation_manager))
   1965     forward_backward = self._select_forward_and_backward_functions(

~/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/function.py in call(self, ctx, args, cancellation_manager)
    589       with _InterpolateFunctionError(self):
    590         if cancellation_manager is None:
--> 591           outputs = execute.execute(
    592               str(self.signature.name),
    593               num_outputs=self._num_outputs,

~/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
     57   try:
     58     ctx.ensure_initialized()
---> 59     tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
     60                                         inputs, attrs, num_outputs)
     61   except core._NotOkStatusException as e:

InvalidArgumentError:  indices[32,0] = 329416640 is not in [0, 853515)
     [[node model_27/embedding_55/embedding_lookup (defined at <ipython-input-166-d767717c6437>:10) ]] [Op:__inference_train_function_28184]

Errors may have originated from an input operation.
Input Source operations connected to node model_27/embedding_55/embedding_lookup:
 model_27/embedding_55/embedding_lookup/27907 (defined at /Users/jonathankhalifa/opt/anaconda3/lib/python3.8/contextlib.py:113)

Function call stack:
train_function


标签: pythontensorflowkerasdeep-learning

解决方案


推荐阅读