首页 > 解决方案 > 将 BatchNormalization 和 Activation 层添加到 Keras 中的自定义层时出现 ValueError

问题描述

我在 Keras 中创建了一个自定义层,在其中我通过调用 conv_forward() 定义了前向传播函数

from tensorflow.keras import layers
from tensorflow.keras.layers import Input, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D
from tensorflow.keras.models import Model
from tensorflow.keras import metrics


import tensorflow as tf

tf.keras.backend.clear_session()  # For easy reset of notebook state.

print(tf.__version__)


def conv_forward(A_prev, W, b, parameters): # forward prop having a for loop
    """
    Implements the forward propagation for a convolution function

    Arguments:
    A_prev -- output activations of the previous layer, numpy array of shape (m, n_H_prev, n_W_prev, n_C_prev)
    W -- Weights, numpy array of shape (f, f, n_C_prev, n_C)
    b -- Biases, numpy array of shape (1, 1, 1, n_C)
    hparameters -- python dictionary containing "stride" and "pad"

    Returns:
    Z -- conv output, numpy array of shape (m, n_H, n_W, n_C)
    cache -- cache of values needed for the conv_backward() function
    """
    expand = hparameters["expand"]
    channels = hparameters["channels"]
    depth_batch = channels // expand

    # Conv2D for 1 step of gradual update
    for i in range(depth_batch):
        # if you dont add b or not use a registered parameter, tensorflow will give error as follows:
        # Gradients do not exist for variables ['layer/Variable:0'] when minimizing the loss
        Z = tf.nn.conv2d(A_prev, W, [1, 1, 1, 1], "SAME") + b
        A_prev = tf.concat([A_prev[:, :, :, :i*expand ], Z, A_prev[:, :, :, i*expand + expand : ]], 3)

    return A_prev


class Gunn2D(layers.Layer): # custom layer definition 

  def __init__(self, input_channels, expansion_rate=32):
    super(Gunn2D, self).__init__()
    self.input_channels = input_channels
    self.expansion_rate = expansion_rate
    self.hparameters = {"expand": self.expansion_rate, "channels": self.input_channels}

  def build(self, input_shape):
    self.w = self.add_weight(shape=(3, 3, self.input_channels, self.expansion_rate), initializer='random_normal', trainable=True)
    self.b = self.add_weight(shape=(1, 1, 1, self.expansion_rate), initializer='random_normal', trainable=True)

  def call(self, inputs):
    output = conv_forward(inputs, self.w, self.b, self.hparameters)
    return output 


def GunnModel(input_shape):
    """
    Implementation of the Model.

    Arguments:
    input_shape -- shape of the images of the dataset

    Returns:
    model -- a Model() instance in Keras
    """

    X_input = Input(input_shape)
    Gunn2D_layer = Gunn2D(6, 2)  # At instantiation, we don't know on what inputs this is going to get called
    X = Gunn2D_layer(X_input)  # using the Custom Keras layer
    print('After gunnlayer : {}'.format(X.get_shape()))

    X = Flatten()(X)
    X = Dense(3, activation='softmax', name = 'fc1')(X)

    model = Model(inputs = X_input, outputs = X, name = 'GunnModel')

    return model


我创建模型并对其进行拟合

X_train = tf.ones((50, 5, 5, 6))
X_test = tf.ones((20, 5, 5, 6))
Y_train = tf.ones((50, 3))
Y_test = tf.ones((20, 3))

gunnModel = GunnModel(X_train.shape[1:])
gunnModel.compile(optimizer = "adam", loss='categorical_crossentropy', metrics=[metrics.categorical_accuracy])
gunnModel.fit(x = X_train , y = Y_train, epochs = 5, steps_per_epoch = (X_train.shape[0]//10))
preds = gunnModel.evaluate(x=X_test, y=Y_test)
print()
print ("Loss = " + str(preds[0]))
print ("Test Accuracy = " + str(preds[1]))

它给了我成功的训练输出,如下所示:

Epoch 1/5
5/5 [==============================] - 0s 3ms/step - loss: 3.3864 - categorical_accuracy: 1.0000
Epoch 2/5
5/5 [==============================] - 0s 2ms/step - loss: 3.3766 - categorical_accuracy: 0.0000e+00
Epoch 3/5
5/5 [==============================] - 0s 3ms/step - loss: 3.3967 - categorical_accuracy: 0.0000e+00
Epoch 4/5
5/5 [==============================] - 0s 2ms/step - loss: 3.4462 - categorical_accuracy: 0.8000
Epoch 5/5
5/5 [==============================] - 0s 2ms/step - loss: 3.5673 - categorical_accuracy: 1.0000
1/1 [==============================] - 0s 2ms/step - loss: 3.6945 - categorical_accuracy: 1.0000

Loss = 3.69450306892395
Test Accuracy = 1.0

这是一个让层工作的虚拟程序,所以不要关心准确性。

让我们打印摘要:

gunnModel.summary()

输出:

Model: "GunnModel"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         [(None, 5, 5, 6)]         0         
_________________________________________________________________
gunn2d (Gunn2D)              (None, 5, 5, 6)           110       
_________________________________________________________________
flatten (Flatten)            (None, 150)               0         
_________________________________________________________________
fc1 (Dense)                  (None, 3)                 453       
=================================================================
Total params: 563
Trainable params: 563
Non-trainable params: 0
_________________________________________________________________

现在,当我在自定义层中添加更多功能时,特别是在 conv_forward() 层中,如 BatchNormalization 和 Activation,它给了我错误。

conv_forward() 的新定义:

def conv_forward(A_shortcut, W1, b1, W2, b2, W3, b3, hparameters):
    """
    Implements the forward propagation for a convolution function

    Arguments:
    A_shortcut -- output activations of the previous layer, numpy array of shape (m, n_H_prev, n_W_prev, n_C_prev)
    W -- Weights, numpy array of shape (f, f, n_C_prev, n_C)
    b -- Biases, numpy array of shape (1, 1, 1, n_C)
    hparameters -- python dictionary containing "stride" and "pad"

    Returns:
    A -- conv output, numpy array of shape (m, n_H, n_W, n_C)
    """
    expand = hparameters["expand"]
    channels = hparameters["channels"]
    depth_batch = channels // expand

    # Conv2D for 1 step of gradual update
    # Note: if you dont add b or not use a registered parameter, tensorflow will give error as follows:
    # Gradients do not exist for variables ['layer/Variable:0'] when minimizing the loss
    A = tf.identity(A_shortcut)
    for i in range(depth_batch):
        Z = tf.nn.conv2d(A, W1, [1, 1, 1, 1], "VALID") + b1
        A = tf.concat([A[:, :, :, :i*expand ], Z, A[:, :, :, i*expand + expand : ]], 3)
    A = BatchNormalization(axis = 3 , name = 'Gunn_BN_1')(A)
    A = Activation('relu')(A)

    for i in range(depth_batch):
        Z = tf.nn.conv2d(A, W2, [1, 1, 1, 1], "SAME") + b2
        A = tf.concat([A[:, :, :, :i*expand ], Z, A[:, :, :, i*expand + expand : ]], 3)
    A = BatchNormalization(axis = 3 , name = 'Gunn_BN_2')(A)
    A = Activation('relu')(A)

    for i in range(channels):
        Z = tf.nn.conv2d(A, W3, [1, 1, 1, 1], "VALID") + b3
        A = tf.concat([A[:, :, :, :i ], Z, A[:, :, :, i + 1 : ]], 3)
    A = BatchNormalization(axis = 3 , name = 'Gunn_BN_3')(A)

    # Add shortcut value to main path. This implements the identity block in Residual Network.
    A = Add()([A , A_shortcut])
    print('Resnet : {}'.format(A.shape))

    return A

错误:

Resnet : (None, 5, 5, 6)
After gunnlayer : (None, 5, 5, 6)
Epoch 1/5
Resnet : (10, 5, 5, 6)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-28-d6d9fedc335a> in <module>()
      6 gunnModel = GunnModel(X_train.shape[1:])
      7 gunnModel.compile(optimizer = "adam", loss='categorical_crossentropy', metrics=[metrics.categorical_accuracy])
----> 8 gunnModel.fit(x = X_train , y = Y_train, epochs = 5, steps_per_epoch = (X_train.shape[0]//10))
      9 preds = gunnModel.evaluate(x=X_test, y=Y_test)
     10 print()

9 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
    966           except Exception as e:  # pylint:disable=broad-except
    967             if hasattr(e, "ag_error_metadata"):
--> 968               raise e.ag_error_metadata.to_exception(e)
    969             else:
    970               raise

ValueError: in user code:

    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:505 train_function  *
        outputs = self.distribute_strategy.run(
    <ipython-input-1-3e0d6e941353>:75 call  *
        output = conv_forward(inputs, self.w1, self.b1, self.w2, self.b2, self.w3, self.b3, self.hparameters)
    <ipython-input-27-56c3c46e1785>:37 conv_forward  *
        A = BatchNormalization(axis = 3 , name = 'Gunn_BN_1')(A)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py:897 __call__  **
        self._maybe_build(inputs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py:2416 _maybe_build
        self.build(input_shapes)  # pylint:disable=not-callable
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/layers/normalization.py:400 build
        experimental_autocast=False)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py:577 add_weight
        caching_device=caching_device)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/training/tracking/base.py:743 _add_variable_with_custom_getter
        **kwargs_for_getter)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer_utils.py:141 make_variable
        shape=variable_shape if variable_shape else None)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/variables.py:259 __call__
        return cls._variable_v1_call(*args, **kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/variables.py:220 _variable_v1_call
        shape=shape)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/variables.py:66 getter
        return captured_getter(captured_previous, **kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2562 creator
        return next_creator(**kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/variables.py:66 getter
        return captured_getter(captured_previous, **kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2562 creator
        return next_creator(**kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/variables.py:66 getter
        return captured_getter(captured_previous, **kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2562 creator
        return next_creator(**kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/variables.py:66 getter
        return captured_getter(captured_previous, **kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py:511 invalid_creator_scope
        *emphasized text*"tf.function-decorated function tried to create "

    ValueError: tf.function-decorated function tried to create variables on non-first call.

我什至尝试将@tf.function 用于for循环,但问题仍然存在,我怀疑它在添加批量标准化后但在打印语句'ResNet'上打印,但在训练期间没有立即打印下一个打印语句'After gunnlayer' . 两者都是在模型创建期间打印的。

标签: pythontensorflowkerasdeep-learningconv-neural-network

解决方案


推荐阅读