tensorflow - 模型中无法识别 Input_shape
问题描述
我在训练时对原始模型进行了一些修改我收到了这个错误:我们目前不支持使用在其第一层或子类模型中没有/设置的Sequential
模型创建的分布策略。我是初学者,我现在该怎么办?input_shape/input_dim如何创建,应该在哪里添加?</p>
input_shape
input_dim
"""
class SASRec(tf.keras.Model):
def __init__(self, item_fea_col, blocks=1, num_heads=1,ffn_hidden_unit=128,max_relative_position=2,dropout=0., maxlen=40, norm_training=True,
causality=False, embed_reg=1e-6):
super(SASRec, self).__init__()
# sequence length
self.maxlen = maxlen
# item feature columns
self.item_fea_col = item_fea_col
# embed_dim
self.embed_dim = self.item_fea_col['embed_dim']
# d_model must be the same as embedding_dim, because of residual connection
self.d_model = self.embed_dim
self.max_relative_position = max_relative_position
# item embedding
self.item_embedding = Embedding(input_dim=self.item_fea_col['feat_num'],
input_length=1,
output_dim=self.item_fea_col['embed_dim'],
mask_zero=True,
embeddings_initializer='random_uniform',
embeddings_regularizer=l2(embed_reg))
# self.pos_embedding = Embedding(input_dim=self.maxlen,
# input_length=1,
# output_dim=self.embed_dim,
# mask_zero=False,
# embeddings_initializer='random_uniform',
# embeddings_regularizer=l2(embed_reg))
self.dropout = Dropout(dropout)
# attention block
self.encoder_layer = [EncoderLayer(self.d_model, num_heads, ffn_hidden_unit,
dropout, norm_training, causality) for b in range(blocks)]
self.GRU_layer1 = GRU(self.item_fea_col['embed_dim'], activation='tanh', use_bias=True, dropout=0.3,
return_sequences=True)
self.Dense1 = Dense(300, use_bias=True)
self.Dense2 = Dense(1, activation='sigmoid', use_bias=True)
self.Dense3 = DenseA(self.d_model, ffn_hidden_unit)
def call(self, inputs, training=None):
# inputs
seq_inputs, pos_inputs, neg_inputs, gru_neg_inputs, gru_pos_inputs = inputs # (None, maxlen), (None, 1), (None, 1)
# mask
mask = tf.expand_dims(tf.cast(tf.not_equal(seq_inputs, 0), dtype=tf.float32), axis=-1) # (None, maxlen, 1)
# seq info
seq_embed = self.item_embedding(seq_inputs) # (None, maxlen, dim)
gru_neg_embed = self.item_embedding(gru_neg_inputs)
gru_pos_embed = self.item_embedding(gru_pos_inputs)
gru_inputs = seq_embed
gru_outputs = self.GRU_layer1(gru_inputs)
gru_outputs = self.Dense3(gru_outputs)
gru_pos_logits = tf.reduce_sum(gru_outputs * gru_pos_embed, axis=-1)
gru_neg_logits = tf.reduce_sum(gru_outputs * gru_neg_embed, axis=-1)
gru_pos_logits = tf.math.log(tf.clip_by_value(gru_pos_logits, 1e-8, tf.reduce_max(gru_pos_logits)))
gru_neg_logits = tf.math.log(tf.clip_by_value(gru_neg_logits, 1e-8, tf.reduce_max(gru_neg_logits)))
aux_loss = tf.reduce_mean(- tf.math.log(tf.nn.sigmoid(gru_pos_logits)) -
tf.math.log(1 - tf.nn.sigmoid(gru_neg_logits))) / 2
# pos encoding
# pos_encoding = positional_encoding(seq_inputs, self.embed_dim)
# pos_encoding = tf.expand_dims(self.pos_embedding(tf.range(self.maxlen)), axis=0)
# seq_embed += pos_encoding
seq_embed = self.dropout(seq_embed)
att_outputs = seq_embed # (None, maxlen, dim)
att_outputs *= mask
for block in self.encoder_layer:
att_outputs = block([att_outputs, mask]) # (None, seq_len, dim)
att_outputs *= mask
cos_similarity = tf.keras.losses.cosine_similarity(seq_embed, gru_outputs)
a = cos_similarity + 1
# user_info = tf.reduce_mean(att_outputs, axis=1) # (None, dim)
# item info
pos_info = self.item_embedding(pos_inputs) # (None, 1, dim)
neg_info = self.item_embedding(neg_inputs) # (None, 1/100, dim)
inputs1 = tf.concat([seq_embed, gru_outputs, gru_pos_embed], axis=-1)
out = self.Dense1(inputs1)
sim = self.Dense2(out)
outputs = sim * gru_outputs + (1 - sim) * att_outputs
user_info = tf.expand_dims(outputs[:, -1], axis=1) # (None, 1, dim)
pos_logits = tf.reduce_sum(user_info * pos_info, axis=-1) # (None, 1)
neg_logits = tf.reduce_sum(user_info * neg_info, axis=-1) # (None, 1)
# loss
d = tf.reduce_mean(a)
losses = tf.reduce_mean(- tf.math.log(tf.nn.sigmoid(pos_logits)) -
tf.math.log(1 - tf.nn.sigmoid(neg_logits))) / 2 + d * aux_loss
self.add_loss(losses)
logits = tf.concat([pos_logits, neg_logits], axis=-1)
return logits
def summary(self):
seq_inputs = Input(shape=(self.maxlen,), dtype=tf.int32)
pos_inputs = Input(shape=(1,), dtype=tf.int32)
neg_inputs = Input(shape=(1,), dtype=tf.int32)
gru_neg_inputs = Input(shape=(self.maxlen,), dtype=tf.int32)
gru_pos_inputs = Input(shape=(self.maxlen,), dtype=tf.int32)
tf.keras.Model(inputs=[seq_inputs, pos_inputs, neg_inputs, gru_neg_inputs, gru_pos_inputs],
outputs=self.call([seq_inputs, pos_inputs, neg_inputs, gru_neg_inputs, gru_pos_inputs])).summary()
"""
解决方案
您直接将输入数据提供给使用 Keras 功能 API 构建的模型。相反,在您的第一层(看起来像)和您的实际数据之间堆叠一个输入层。Embedding
推荐阅读
- r - 小标题里面的小标题到列表
- c++ - 将 jpeg 文件读取为无符号字符数组
- java - 使用 AfterInstall 和 Inno Setup 时如何运行包含空格的路径(捆绑的 JRE)的命令
- html - 使用叠加按钮构建网格图块
- php - php 中缺少类 php_com_dotnet
- ios - UICollectionView 重新加载数据而不调用 reloadData()
- javascript - 保存平面列表中的参数导航,并将其显示在另一个屏幕上
- javascript - ReactJS 中的异步函数阻塞组件
- docx4j - 通过 docx4j-export-fo 在 TOC 中获取错误的页码
- .net - 使用 CefSharp 从 JavaScript 调用 C# 是否有一种不那么痛苦的方法?