python - TensorFlow 嵌入层触发 InvalidArgumentError
问题描述
我正在为推荐系统使用两塔神经网络。我解释错误日志的方式是,在训练数据集的第 32 行第 0 列上,它找到了一个不适合 [0, 853515) 范围的值 329416640。我完全不知道 329416640 这个值对应什么。
我使用以下数据集。
数据集已被拆分为 80/20 的火车和测试拆分。训练数据形状:(4728919, 3) 测试数据形状:(1182230, 3)
然后我确定我的 vocab_lengths 如下:
client_vocab_len = (int(client_data.CLI_ID.nunique()))
libelle_vocab_len = (int(items_data.prod_id.nunique()))
print(client_vocab_len) # = 853514
print(libelle_vocab_len) # = 1484
我的模型架构如下所示:
#user input network
input_clients = tf.layers.Input(shape=[1])
dense = tf.layers.Dense(32, activation="relu")
yy = dense(input_clients)
embed_clients = tf.layers.Embedding(client_vocab_len+1, 15)(yy)
clients_out = tf.layers.Flatten()(embed_clients)
#product input network
input_products = tf.layers.Input(shape=[1])
dense = tf.layers.Dense(32, activation="relu")
xx = dense(input_products)
embed_products = tf.layers.Embedding(libelle_vocab_len+1, 15)(xx)
products_out = tf.layers.Flatten()(embed_products)
conc_layer = tf.layers.Concatenate()([clients_out, products_out])
x = tf.layers.Dense(128, activation='relu')(conc_layer)
#x = tf.layers.Dropout(0.3)(x)
x = tf.layers.Dense(128, activation='relu')(x)
x_out = x = tf.layers.Dense(1, activation='relu')(x)
model = tf.Model([input_clients, input_products], x_out)
当我尝试训练我的模型时,我收到以下错误日志:
# Create a callback to stop the model if overfits
early_stop = EarlyStopping(monitor='val_loss',patience=3)
lr_reduce = ReduceLROnPlateau(monitor='val_loss',
factor=0.75,
patience=2,
verbose=2,
mode='auto')
hist = model.fit([Xtrain.CLI_ID, Xtrain.prod_id], Xtrain.QTY,
batch_size=128,
epochs=15,
verbose=1,
validation_data=([Xtest.CLI_ID, Xtest.prod_id], Xtest.QTY),
callbacks=[early_stop,lr_reduce,PlotLossesKeras()])
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-166-d767717c6437> in <module>
8
9
---> 10 hist = model.fit([Xtrain.CLI_ID, Xtrain.prod_id], Xtrain.QTY,
11 batch_size=128,
12 epochs=15,
~/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1182 _r=1):
1183 callbacks.on_train_batch_begin(step)
-> 1184 tmp_logs = self.train_function(iterator)
1185 if data_handler.should_sync:
1186 context.async_wait()
~/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds)
883
884 with OptionalXlaContext(self._jit_compile):
--> 885 result = self._call(*args, **kwds)
886
887 new_tracing_count = self.experimental_get_tracing_count()
~/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds)
948 # Lifting succeeded, so variables are initialized and we can run the
949 # stateless function.
--> 950 return self._stateless_fn(*args, **kwds)
951 else:
952 _, _, _, filtered_flat_args = \
~/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/function.py in __call__(self, *args, **kwargs)
3037 (graph_function,
3038 filtered_flat_args) = self._maybe_define_function(args, kwargs)
-> 3039 return graph_function._call_flat(
3040 filtered_flat_args, captured_inputs=graph_function.captured_inputs) # pylint: disable=protected-access
3041
~/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
1961 and executing_eagerly):
1962 # No tape is watching; skip to running the function.
-> 1963 return self._build_call_outputs(self._inference_function.call(
1964 ctx, args, cancellation_manager=cancellation_manager))
1965 forward_backward = self._select_forward_and_backward_functions(
~/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/function.py in call(self, ctx, args, cancellation_manager)
589 with _InterpolateFunctionError(self):
590 if cancellation_manager is None:
--> 591 outputs = execute.execute(
592 str(self.signature.name),
593 num_outputs=self._num_outputs,
~/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
57 try:
58 ctx.ensure_initialized()
---> 59 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
60 inputs, attrs, num_outputs)
61 except core._NotOkStatusException as e:
InvalidArgumentError: indices[32,0] = 329416640 is not in [0, 853515)
[[node model_27/embedding_55/embedding_lookup (defined at <ipython-input-166-d767717c6437>:10) ]] [Op:__inference_train_function_28184]
Errors may have originated from an input operation.
Input Source operations connected to node model_27/embedding_55/embedding_lookup:
model_27/embedding_55/embedding_lookup/27907 (defined at /Users/jonathankhalifa/opt/anaconda3/lib/python3.8/contextlib.py:113)
Function call stack:
train_function
解决方案
推荐阅读
- c++ - C++ 拆分字符串反馈
- vue.js - ref vue绑定的访问方法
- python - 将项目从 python 2 转换为 3
- wordpress - 如何在 wordpress 中使用 typist.js?
- debian - Debian 10 Buster 中缺少 chromedriver
- javascript - 如何路由到默认并使用剩余的 url 字符串作为客户端上的参数?
- sql - Oracle 复合触发器:尝试获取日期列的最大值时遇到 PLS-00103
- python - 排序/格式化计数器输出
- javascript - 如何仅从带有正则表达式的字符串中获取数字?
- python - 使用临时环境变量运行 Python 脚本