首页 > 解决方案 > 如何在实现自己的池化层时处理由于输入形状(如 (None, 1024, 1024, 4))而导致的“TypeError”?

问题描述

我天真地尝试实现自己的池化和取消池化功能,以便在池化时保留索引,以便在取消池化时将值映射到索引。这是代码。

from keras.layers import Input, Layer
from keras.models import Model
from keras.layers.convolutional import Conv2D
import keras.backend as K

class MaxPool2DWithIndices(Layer):
    def __init__(self, filter_size=(2,2), strides=(2,2), padding='valid', **kwargs):
        super(MaxPool2DWithIndices, self).__init__(**kwargs)
        self.filter_size = filter_size
        self.strides = strides
        self.padding = padding

    def __call__(self, inputs):
        '''
        inputs: (n, H, W, d), ie batch of n inputs of size (H, W, d)
        '''
        print (inputs.shape)
        inputs_shape = inputs.shape #K.array(inputs.shape)
        pooled_H = (inputs_shape[1]-self.filter_size[0])//self.strides[0]+1
        pooled_W = (inputs_shape[2]-self.filter_size[1])//self.strides[1]+1
        #if type(inputs_shape[0]) is not int: inputs_shape[0] = 1
        mpooled = np.zeros((inputs_shape[0], pooled_H, pooled_W, inputs_shape[3]))
        indices = np.zeros((inputs_shape[0], pooled_H, pooled_W, inputs_shape[3]))
        for n in range(0, inputs_shape[0], 2): # for each example
            for i in range(0, inputs_shape[1], 2):
                for j in range(0, inputs_shape[2], 2):
                    for k in range(inputs_shape[3]):
                        mpooled[n, i//2, j//2, k] = np.max(inputs[n, i:i+2, j:j+2, k])
                        indices[n, i//2, j//2, k] = np.argmax(inputs[n, i:i+2, j:j+2, k])
        return [mpooled, indices]

class MaxUnPool2DWithIndices(Layer):
    def __init__(self, filter_size=(2,2), strides=(2,2), padding='valid'):
        super(MaxUnPool2DWithIndices, self).__init__(**kwargs)
        self.indices = indices
        self.filter_size = filter_size
        self.strides = strides
        self.padding = padding

    def __call__(self, inputs, indices):
        '''
        inputs: (n, H, W, d), ie batch of n inputs of size (H, W, d)
        '''
        inputs_shape = inputs.shape #K.array(inputs.shape)
        unpooled_H = (inputs_shape[1]-1)*self.strides[0]+self.filter_size[0]
        unpooled_W = (inputs_shape[2]-1)*self.strides[1]+self.filter_size[1]
        #if type(inputs_shape[0]) is not int: inputs_shape[0] = 1
        max_unpooled = np.zeros((inputs_shape[0], unpooled_H, unpooled_W, inputs_shape[3]))
        for n in range(inputs_shape[0]): # for each example
            for i in range(0, unpooled_H, 2):
                for j in range(0, unpooled_W, 2):
                    for k in range(inputs_shape[2]):
                        if self.indices[n, i//2, j//2, k] == 0:
                            max_unpooled[n, i+0, j+0, k] = inputs[n, i//2, j//2, k]
                        elif self.indices[n, i//2, j//2, k] == 1:
                            max_unpooled[n, i+0, j+1, k] = inputs[n, i//2, j//2, k]
                        elif self.indices[n, i//2, j//2, k] == 2:
                            max_unpooled[n, i+1, j+0, k] = inputs[n, i//2, j//2, k]
                        else: # it is 3
                            max_unpooled[n, i+1, j+1, k] = inputs[n, i//2, j//2, k]
        return max_unpooled

让以下模型作为复制错误的示例:

input_layer = Input(shape=(1024, 1024, 1))

encoded_out = Conv2D(4, (3, 3), activation='relu', padding="same")(input_layer)
encoded_out, indices1 = MaxPool2DWithIndices(filter_size=(2,2), strides=(2,2), padding='valid')(encoded_out)

decoded_out = MaxUnPool2DWithIndices(filter_size=(2,2), strides=(2,2), padding='valid')(encoded_out, indices1)
decoded_out = Conv2D(4, (3, 3), activation='relu', padding="same")(decoded_out)

model = Model(input_layer, decoded_out)

上面的代码输出以下错误:

(None, 1024, 1024, 4)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-77-5a2cf359f3ac> in <module>
      2 
      3 encoded_out = Conv2D(4, (3, 3), activation='relu', padding="same")(input_layer)
----> 4 encoded_out, indices1 = MaxPool2DWithIndices(filter_size=(2,2), strides=(2,2), padding='valid')(encoded_out)
      5 
      6 decoded_out = MaxUnPool2DWithIndices(filter_size=(2,2), strides=(2,2), padding='valid')(encoded_out, indices1)

<ipython-input-76-8a54043c249e> in __call__(self, inputs)
     20         pooled_W = (inputs_shape[2]-self.filter_size[1])//self.strides[1]+1
     21         #if type(inputs_shape[0]) is not int: inputs_shape[0] = 1
---> 22         mpooled = np.zeros((inputs_shape[0], pooled_H, pooled_W, inputs_shape[3]))
     23         indices = np.zeros((inputs_shape[0], pooled_H, pooled_W, inputs_shape[3]))
     24         for n in range(0, inputs_shape[0], 2): # for each example

TypeError: 'NoneType' object cannot be interpreted as an integer

这当然是由于事先不知道批量大小。我该如何解决这个问题?

标签: conv-neural-networkmax-pooling

解决方案


这些__call__函数需要一些 if-else 条件来处理 input.shape=(None, H, W, d) 或 input.shape=(None, None, None, d) 等情况。解决了!


推荐阅读