python - 张量流中辍学层的奇怪行为
问题描述
我用 tensorflow 制作了一个 CNN 模型,它实现了 dropouts 层。我在网络函数中传递了 is_training 参数,因此 dropout 将在测试阶段被禁用,我意识到当我禁用它时错误明显更高。如果我用 dropout 函数(这不是逻辑)测试模型,我得到的平均误差为 0.01,而当我通过指定 is_training 为 False 来测试它时(但仍然用 dropout 训练它)我得到一个0.8 的误差。我不明白我的错误在哪里
这是模型函数:
def conv_net(x, arch, is_training=False):
# MNIST data input is a 1-D vector of 784 features (28*28 pixels)
# Reshape to match picture format [Height x Width x Channel]
# Tensor input becomes 4-D: [Batch Size, Height, Width, Channel]
x = tf.reshape(x, shape=[-1, 28, 28, 1])
### YOUR CODE STARTS HERE ###
# Convolution Layer with F1 filters, a kernel size of K1 and ReLU activations
pad = 'same'
conv1 = tf.layers.conv2d(x, arch['conv1'][0], arch['conv1'][1], activation=tf.nn.relu)
conv2 = tf.layers.conv2d(conv1, arch['conv2'][0], arch['conv2'][1], activation=tf.nn.relu)
pool1 = tf.layers.max_pooling2d(conv2, arch['pool1'][0], arch['pool1'][0])
drop1 = tf.layers.dropout(pool1, arch['dropout1'], training=is_training)
conv3 = tf.layers.conv2d(drop1, arch['conv3'][0], arch['conv3'][1], activation=tf.nn.relu) # # TODO: add padding
drop1_2 = tf.layers.dropout(conv3, arch['dropout1'], training=is_training)
conv4 = tf.layers.conv2d(drop1_2, arch['conv4'][0], arch['conv4'][1], activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(conv4, arch['pool2'][0], arch['pool2'][0])
drop2 = tf.layers.dropout(pool2, arch['dropout2'], training=is_training)
flat = tf.contrib.layers.flatten(drop2)
fc1 = tf.layers.dense(flat, arch['N'])
out = tf.layers.dense(fc1, n_classes)
### YOUR CODE ENDS HERE ###
return out
以及训练功能:
def train_test_model(hypers, save_final_model=False):
# Running the training session
print("Starting training session...")
with tf.Session() as sess:
# Run the initializer
sess.run(init)
total_batch = int(mnist.train.num_examples / hypers.batch_size)
# Training cycle
try:
for epoch in range(hypers.n_epochs):
avg_cost = 0.
# Loop over all batches
for i in range(total_batch):
batch_x, batch_y = mnist.train.next_batch(hypers.batch_size)
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# Display logs per epoch step
if epoch % display_step == 0:
# Test model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# Calculate accuracy
# ORIGINAL:
# accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
# train_err = 1-accuracy.eval({x: mnist.train.images, y: mnist.train.labels})
# valid_err = 1-accuracy.eval({x: mnist.validation.images, y: mnist.validation.labels})
# WITH BATCHES FOR LESS MEM ALLOC
accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
train_acc = 0
for i in range(total_batch):
batch_x, batch_y = mnist.train.next_batch(hypers.batch_size)
train_acc += accuracy.eval(feed_dict={x:batch_x,
y:batch_y})
train_acc /= total_batch
train_err = 1 - train_acc
valid_err = 1 - accuracy.eval({x: mnist.validation.images, y: mnist.validation.labels})
# Display accuracy
print("Epoch:", '%05d' % (epoch + 1), ", cost=",
"{:.9f}".format(avg_cost), ", train_err=", "{:.4f}".format(train_err), ", valid_err=",
"{:.4f}".format(valid_err))
if epoch % 5 == 0:
v = input('Do you want to stop the model? [Y/n]')
if 'y' in v.lower():
raise KeyboardInterrupt
except KeyboardInterrupt:
hypers.n_epochs = epoch
print("SIGINT Received, interrupting the training")
print("\nOptimization Finished!\n")
# Test model
correct_prediction = tf.equal(tf.argmax(test_pred, 1), tf.argmax(y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
# modified to batches
train_acc = 0
for i in range(total_batch):
batch_x, batch_y = mnist.train.next_batch(hypers.batch_size)
train_acc += accuracy.eval(feed_dict={x: batch_x,
y: batch_y})
train_acc /= total_batch
train_err = 1 - train_acc
#
valid_err = 1 - accuracy.eval({x: mnist.validation.images, y: mnist.validation.labels})
print("Optimized for ", '%05d' % (epoch + 1), "epochs, to obtain training error", "{:.4f}".format(train_err),
", and validation error", "{:.4f}".format(valid_err))
confusion = tf.confusion_matrix(tf.argmax(pred, 1), tf.argmax(y, 1))
print("\nValidation Confusion matrix:\n",
confusion.eval({x: mnist.validation.images, y: mnist.validation.labels}))
解决方案
推荐阅读
- python - 从 Flask-SQLAlchemy 获取 UTC 中的 DateTime 字段
- office-ui-fabric - Office UI Fabric React:下拉组件
- sitecore - 使字段只读,除了在 Sitecore 中具有特定角色的用户之外的所有用户?
- javascript - 将文件名设置为 Blob 文件
- php - 最好的插入方式。或 , 显示货币时为整数
- javascript - 鼠标悬停时如何更改段落文本?
- asp.net - (403) IIS 上出现禁止错误,但 IIS Express 工作正常
- python - 具有一个值的 Python 直方图
- django - 如何保护我的 .pyc 文件不会从 docker 中提取出来
- python - 有没有使用 Python 脚本读取 .one(OneNote 文件)?