python - ConcatOp:输入的尺寸应该匹配:shape[0] = [1,24] vs. shape[1] = [256,8] [Op:ConcatV2] 名称:concat
问题描述
我训练了一个 RNN 分类器。这是我的代码:
class RNNSentimentClassifier: def init (self, vocab_size, sequence_length): # 定义超参数 self.learning_rate = 0.2 # 应该差不多 self.training_epochs = 10 # 训练多长时间 - 选择适合上课时间 self.display_epoch_freq = 5 # 测试和打印统计数据的频率 self.dim = 24 # RNN 的隐藏状态的维度 self.embedding_dim = 8 # 学习的词嵌入的维度 self.batch_size = 256 # 有点随意 - 可以调整,但经常调整速度,而不是准确性 self.vocab_size = vocab_size # 由上面的文件阅读器定义 self.sequence_length = sequence_length # 由上面的文件阅读器定义 self.l2_lambda = 0.001
self.trainable_variables = []
# Define the parameters
self.E = tf.Variable(tf.random.normal([self.vocab_size, self.embedding_dim], stddev=0.1))
self.trainable_variables.append(self.E)
self.W_cl = tf.Variable(tf.random.normal([self.dim, 5], stddev=0.1))
self.b_cl = tf.Variable(tf.random.normal([5], stddev=0.1))
self.trainable_variables.append(self.W_cl)
self.trainable_variables.append(self.b_cl)
# TODO 1: Define the RNN parameters
self.W = tf.Variable(tf.random.normal([self.dim + self.embedding_dim, self.dim], stddev=0.1))
self.b = tf.Variable(tf.random.normal([self.dim], stddev=0.1))
self.trainable_variables.append(self.W)
self.trainable_variables.append(self.b)
def model_1(self,x):
# Split up the inputs into individual tensors
self.x_slices = tf.split(x, self.sequence_length, 1)
#print('x slices')
#print(self.x_slices)
# Define the start state of the RNN
self.h_zero = tf.zeros([1, self.dim])
# TODO 2: Write a (very short) Python function that defines one step of an RNN
def step_1(x, h_prev):
#add your code here
x_e=tf.nn.embedding_lookup(self.E,x)
#print(x_e.shape)
#print(h_prev.shape)
concat=tf.concat([h_prev,x_e],1)
#concat=tf.concat(h_prev,x_e)
h = tf.tanh(tf.matmul(concat,self.W)+self.b)
return h
# TODO 3: Unroll the RNN using a for loop, and obtain the sentence representation with the final hidden state
sentence_representation = None
sentence_representation = self.h_zero
# tf.print(sentence_representation)
#for slice in self.x_slices:
# tf.print(slice)
for slice in self.x_slices:
slice_1=tf.reshape(slice,shape=[-1])
sentence_representation=step_1(slice_1, sentence_representation)
# Compute the logits using one last linear layer
logits = tf.matmul(sentence_representation, self.W_cl) + self.b_cl
return logits
def train(self, training_data, dev_set):
def get_minibatch(dataset, start_index, end_index):
indices = range(start_index, end_index)
vectors = np.vstack([dataset[i]['index_sequence'] for i in indices])
labels = [dataset[i]['label'] for i in indices]
return vectors, labels
print('Training.')
# Training cycle
for epoch in range(self.training_epochs):
random.shuffle(training_set)
avg_cost = 0.
total_batch = int(len(training_set) / self.batch_size)
# Loop over all batches in epoch
for i in range(total_batch):
# Assemble a minibatch of the next B examples
minibatch_vectors, minibatch_labels = get_minibatch(training_set,
self.batch_size * i,
self.batch_size * (i + 1))
# Run the optimizer to take a gradient step, and also fetch the value of the
# cost function for logging
with tf.GradientTape() as tape:
logits = self.model_1(minibatch_vectors)
# Define the L2 cost
self.l2_cost = self.l2_lambda * (tf.reduce_sum(tf.square(self.W)) +
tf.reduce_sum(tf.square(self.W_cl)))
# Define the cost function (here, the softmax exp and sum are built in)
total_cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=minibatch_labels, logits=logits) + self.l2_cost)
# This performs the main SGD update equation with gradient clipping
optimizer = tf.optimizers.SGD(self.learning_rate)
gradients = tape.gradient(total_cost, self.trainable_variables)
gvs = zip(gradients, self.trainable_variables)
capped_gvs = [(tf.clip_by_norm(grad, 5.0), var) for grad, var in gvs if grad is not None]
optimizer.apply_gradients(capped_gvs)
# Compute average loss
avg_cost += total_cost / total_batch
# Display some statistics about the step
# Evaluating only one batch worth of data -- simplifies implementation slightly
if (epoch+1) % self.display_epoch_freq == 0:
tf.print("Epoch:", (epoch+1), "Cost:", avg_cost, \
"Dev acc:", evaluate_classifier(self.classify, dev_set[0:256]), \
"Train acc:", evaluate_classifier(self.classify, training_set[0:256]))
def classify(self, examples):
# This classifies a list of examples
vectors = np.int32(np.vstack([example['index_sequence'] for example in examples]))
logits = self.model_1(vectors)
return np.argmax(logits, axis=1)
我收到以下错误:ConcatOp:输入的尺寸应该匹配:shape[0] = [1,24] vs. shape[1] = [256,8] [Op:ConcatV2] name: concat
请问我应该怎么做才能解决这个问题?
解决方案
推荐阅读
- python - 不以数字开头的模式的正则表达式
- sas - PROC FORMAT 可以用于跨多列的组内求和吗?
- c# - 在调用 chart.chartdata.activate() 方法时隐藏 excel
- c# - 清除订阅:什么会破坏这个自清洁事件发布者?
- python - 使用 Flask 从 API 解析 JSON 数据?
- javascript - 如何将 td 引用到同一 tr 中具有相似类的另一个 td?
- python - 相对导入:子包导入子包
- git - 如果忘记创建新的 Git 功能分支怎么办?
- swift - 将字符串动态转换为 JSONDecoder Decodable.Protocol 类型
- javascript - 如果单击了具有特定 ID 的按钮,则在卸载前禁用 JS