python - 张量流中辍学层的奇怪行为

问题描述

我用 tensorflow 制作了一个 CNN 模型，它实现了 dropouts 层。我在网络函数中传递了 is_training 参数，因此 dropout 将在测试阶段被禁用，我意识到当我禁用它时错误明显更高。如果我用 dropout 函数（这不是逻辑）测试模型，我得到的平均误差为 0.01，而当我通过指定 is_training 为 False 来测试它时（但仍然用 dropout 训练它）我得到一个0.8 的误差。我不明白我的错误在哪里

这是模型函数：

def conv_net(x, arch, is_training=False):

    # MNIST data input is a 1-D vector of 784 features (28*28 pixels)
    # Reshape to match picture format [Height x Width x Channel]
    # Tensor input becomes 4-D: [Batch Size, Height, Width, Channel]
    x = tf.reshape(x, shape=[-1, 28, 28, 1])

    ### YOUR CODE STARTS HERE ###

    # Convolution Layer with F1 filters, a kernel size of K1 and ReLU activations
    pad = 'same'

    conv1 = tf.layers.conv2d(x, arch['conv1'][0], arch['conv1'][1], activation=tf.nn.relu)
    conv2 = tf.layers.conv2d(conv1, arch['conv2'][0], arch['conv2'][1], activation=tf.nn.relu)
    pool1 = tf.layers.max_pooling2d(conv2, arch['pool1'][0], arch['pool1'][0])
    drop1 = tf.layers.dropout(pool1, arch['dropout1'], training=is_training)

    conv3 = tf.layers.conv2d(drop1, arch['conv3'][0], arch['conv3'][1], activation=tf.nn.relu)  # # TODO: add padding
    drop1_2 = tf.layers.dropout(conv3, arch['dropout1'], training=is_training)
    conv4 = tf.layers.conv2d(drop1_2, arch['conv4'][0], arch['conv4'][1], activation=tf.nn.relu)
    pool2 = tf.layers.max_pooling2d(conv4, arch['pool2'][0], arch['pool2'][0])

    drop2 = tf.layers.dropout(pool2, arch['dropout2'], training=is_training)

    flat = tf.contrib.layers.flatten(drop2)

    fc1 = tf.layers.dense(flat, arch['N'])

    out = tf.layers.dense(fc1, n_classes)
    ### YOUR CODE ENDS HERE ###

    return out

以及训练功能：

def train_test_model(hypers, save_final_model=False):
    # Running the training session
    print("Starting training session...")
    with tf.Session() as sess:

        # Run the initializer
        sess.run(init)
        total_batch = int(mnist.train.num_examples / hypers.batch_size)
        # Training cycle
        try:
            for epoch in range(hypers.n_epochs):
                avg_cost = 0.

                # Loop over all batches
                for i in range(total_batch):
                    batch_x, batch_y = mnist.train.next_batch(hypers.batch_size)
                    # Run optimization op (backprop) and cost op (to get loss value)
                    _, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
                                                                  y: batch_y})
                    # Compute average loss
                    avg_cost += c / total_batch
                # Display logs per epoch step
                if epoch % display_step == 0:

                    # Test model
                    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))

                    # Calculate accuracy
                    # ORIGINAL:
                    # accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
                    # train_err = 1-accuracy.eval({x: mnist.train.images, y: mnist.train.labels})
                    # valid_err = 1-accuracy.eval({x: mnist.validation.images, y: mnist.validation.labels})

                    # WITH BATCHES FOR LESS MEM ALLOC
                    accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
                    train_acc = 0
                    for i in range(total_batch):
                        batch_x, batch_y = mnist.train.next_batch(hypers.batch_size)
                        train_acc += accuracy.eval(feed_dict={x:batch_x,
                                                              y:batch_y})
                    train_acc /= total_batch

                    train_err = 1 - train_acc
                    valid_err = 1 - accuracy.eval({x: mnist.validation.images, y: mnist.validation.labels})
                    # Display accuracy
                    print("Epoch:", '%05d' % (epoch + 1), ", cost=",
                          "{:.9f}".format(avg_cost), ", train_err=", "{:.4f}".format(train_err), ", valid_err=",
                          "{:.4f}".format(valid_err))

                if epoch % 5 == 0:
                    v = input('Do you want to stop the model? [Y/n]')
                    if 'y' in v.lower():
                        raise KeyboardInterrupt

        except KeyboardInterrupt:
            hypers.n_epochs = epoch
            print("SIGINT Received, interrupting the training")



        print("\nOptimization Finished!\n")

        # Test model
        correct_prediction = tf.equal(tf.argmax(test_pred, 1), tf.argmax(y, 1))
        # Calculate accuracy
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
        # modified to batches
        train_acc = 0
        for i in range(total_batch):
            batch_x, batch_y = mnist.train.next_batch(hypers.batch_size)
            train_acc += accuracy.eval(feed_dict={x: batch_x,
                                                  y: batch_y})
        train_acc /= total_batch
        train_err = 1 - train_acc
        #
        valid_err = 1 - accuracy.eval({x: mnist.validation.images, y: mnist.validation.labels})
        print("Optimized for ", '%05d' % (epoch + 1), "epochs, to obtain training error", "{:.4f}".format(train_err),
              ", and validation error", "{:.4f}".format(valid_err))
        confusion = tf.confusion_matrix(tf.argmax(pred, 1), tf.argmax(y, 1))
        print("\nValidation Confusion matrix:\n",
              confusion.eval({x: mnist.validation.images, y: mnist.validation.labels}))

标签： pythontensorflow

python - 张量流中辍学层的奇怪行为

问题描述

解决方案

推荐阅读