首页 > 解决方案 > 如何将以下用tensor-flow编写的CNN转换为Pytorch中的模型?

问题描述

我有一个用 tensorflow 编写的现有 CNN 网络模型。我正在将我的项目更改为 pytorch。我是 Pytorch 的新手。我相信我在模型的转换过程中做的大部分是正确的。但是我收到一条错误消息:无效参数 0:张量的大小必须匹配,但维度 3 除外。在维度 1 中得到 144 和 72

#My tensorflow network that works perfectly
def FluidNet( nClasses, nClasses1 ,  input_height=128, input_width=128):
    ## input_height and width must be devisible by 32 because maxpooling with filter size = (2,2) is operated 5 times,
    ## which makes the input_height and width 2^5 = 32 times smaller
    assert input_height%32 == 0
    assert input_width%32 == 0
    IMAGE_ORDERING =  "channels_last" 

    img_input = Input(shape=(input_height,input_width, 6), name='combined_input') ## Assume 128,128,6

    ## Block 1 128x128
    x = Conv2D(18, (2, 2), activation='relu', padding='same', name='block1_conv1', data_format=IMAGE_ORDERING )(img_input)
    x = Conv2D(18, (2, 2), activation='relu', padding='same', name='block1_conv2', data_format=IMAGE_ORDERING )(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool', data_format=IMAGE_ORDERING )(x)
    f1 = x

    # Block 2 64x64
    x = Conv2D(36, (2, 2), activation='relu', padding='same', name='block2_conv1', data_format=IMAGE_ORDERING )(x)
    x = Conv2D(36, (2, 2), activation='relu', padding='same', name='block2_conv2', data_format=IMAGE_ORDERING )(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool', data_format=IMAGE_ORDERING )(x)
    f2 = x

    # Block 3 32x32
    x = Conv2D(72, (2, 2), activation='relu', padding='same', name='block3_conv1', data_format=IMAGE_ORDERING )(x)
    x = Conv2D(72, (2, 3), activation='relu', padding='same', name='block3_conv2', data_format=IMAGE_ORDERING )(x)
    x = Conv2D(72, (2, 2), activation='relu', padding='same', name='block3_conv3', data_format=IMAGE_ORDERING )(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool', data_format=IMAGE_ORDERING )(x)
    pool3 = x

    # Block 4 16x16
    x = Conv2D(144, (2, 2), activation='relu', padding='same', name='block4_conv1', data_format=IMAGE_ORDERING )(x)
    x = Conv2D(144, (2, 2), activation='relu', padding='same', name='block4_conv2', data_format=IMAGE_ORDERING )(x)
    x = Conv2D(144, (2, 2), activation='relu', padding='same', name='block4_conv3', data_format=IMAGE_ORDERING )(x)
    pool4 = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool', data_format=IMAGE_ORDERING )(x)

    # Block 5 8x8
    x = Conv2D(144, (2, 2), activation='relu', padding='same', name='block5_conv1', data_format=IMAGE_ORDERING )(pool4)
    x = Conv2D(144, (2, 2), activation='relu', padding='same', name='block5_conv2', data_format=IMAGE_ORDERING )(x)
    x = Conv2D(144, (2, 2), activation='relu', padding='same', name='block5_conv3', data_format=IMAGE_ORDERING )(x)
    pool5 = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool', data_format=IMAGE_ORDERING )(x)

    # Block Transpose <DECODER> : Depth
    #1st deconv layer 4x4
    x = (Conv2DTranspose( 72, kernel_size=(4,4) ,  strides=(2,2) , padding='same', dilation_rate = (1,1), use_bias=False, data_format=IMAGE_ORDERING, name="Transpose_pool5" ) (pool5))

    #concatinate x and pool4 for 2nd Deconv layer 8X8
    x = concatenate ([x, pool4],axis = 3)
    x = (Conv2DTranspose( 36 , kernel_size=(6,6) ,  strides=(2,2) ,padding='same', dilation_rate = (1,1), use_bias=False, data_format=IMAGE_ORDERING, name="Transpose_pool4")(x))

    #concatinate x and pool3 for 3rd Deconv layer 28x28
    x = concatenate ([x, pool3],axis = 3)    
    x= (Conv2DTranspose( 18 , kernel_size=(4,4) ,  strides=(2,2) , padding='same',dilation_rate = (1,1), use_bias=False, data_format=IMAGE_ORDERING, name="Transpose_pool3" )(x))

    #concatinate x and f2 for 4th Deconv layer
    x = concatenate ([x, f2],axis = 3)    
    x = (Conv2DTranspose( 9 , kernel_size=(4,4) ,  strides=(2,2) ,  padding='same',dilation_rate = (1,1), use_bias=False, data_format=IMAGE_ORDERING, name="Transpose_pool2" )(x))

    #concatinate x and f1 for 5th Deconv layer

    x = concatenate ([x, f1],axis = 3)    
    x = (Conv2DTranspose( nClasses + nClasses1 , kernel_size=(3,3) ,  strides=(2,2) , padding='same',dilation_rate = (1,1), use_bias=False, data_format=IMAGE_ORDERING, name="Transpose_pool1" )(x))

    o = x
    o = (Activation('sigmoid', name="depth_out"))(o)

    x2 = (Conv2DTranspose( 72, kernel_size=(4,4) ,  strides=(2,2) , padding='same', dilation_rate = (1,1), use_bias=False, data_format=IMAGE_ORDERING, name="Transpose_pool5_2" ) (pool5))

    x2 = concatenate ([x2, pool4],axis = 3)
    x2 = (Conv2DTranspose( 36 , kernel_size=(6,6) ,  strides=(2,2) ,padding='same', dilation_rate = (1,1), use_bias=False, data_format=IMAGE_ORDERING, name="Transpose_pool4_2")(x2))

    x2 = concatenate ([x2, pool3],axis = 3)    
    x2= (Conv2DTranspose( 18 , kernel_size=(4,4) ,  strides=(2,2) , padding='same',dilation_rate = (1,1), use_bias=False, data_format=IMAGE_ORDERING, name="Transpose_pool3_2" )(x2))

    x2 = concatenate ([x2, f2],axis = 3)    
    x2 = (Conv2DTranspose( 9 , kernel_size=(4,4) ,  strides=(2,2) ,  padding='same',dilation_rate = (1,1), use_bias=False, data_format=IMAGE_ORDERING, name="Transpose_pool2_2" )(x2))

    x2 = concatenate ([x2, f1],axis = 3)    
    x2 = (Conv2DTranspose( 7 , kernel_size=(3,3) ,  strides=(2,2) , padding='same',dilation_rate = (1,1), use_bias=False, data_format=IMAGE_ORDERING, name="Transpose_pool1_2" )(x2))

    o2 = x2
    o2 = (Activation('sigmoid', name="scale_out"))(o2)

    singleOut = concatenate([o,o2],axis = 3, name="single_out")

    #model creation
    model = Model(img_input, singleOut)

    return model


# My pytorch network that gives an error
class FluidNet (nn.Module):
def __init__(self,nClasses=4,nClasses1=7):
    super(FluidNet,self).__init__()

    # conv layers: (in_channel size, out_channels size, kernel_size, stride, padding)
    self.conv1_1 = nn.Conv2d(6, 18, kernel_size=2, padding=1)
    self.conv1_2 = nn.Conv2d(18, 18, kernel_size=2, padding=1)

    self.conv2_1 = nn.Conv2d(18, 36, kernel_size=2, padding=1)
    self.conv2_2 = nn.Conv2d(36, 36, kernel_size=2, padding=1)

    self.conv3_1 = nn.Conv2d(36, 72, kernel_size=2, padding=1)
    self.conv3_2 = nn.Conv2d(72, 72, kernel_size=2, padding=1)
    self.conv3_3 = nn.Conv2d(72, 72, kernel_size=2, padding=1)

    self.conv4_1 = nn.Conv2d(72, 144, kernel_size=2, padding=1)
    self.conv4_2 = nn.Conv2d(144, 144, kernel_size=2, padding=1)
    self.conv4_3 = nn.Conv2d(144, 144, kernel_size=2, padding=1)

    self.conv5_1 = nn.Conv2d(144, 144, kernel_size=2, padding=1)
    self.conv5_2 = nn.Conv2d(144, 144, kernel_size=2, padding=1)
    self.conv5_3 = nn.Conv2d(144, 144, kernel_size=2, padding=1)

    # max pooling (kernel_size, stride)
    self.pool = nn.MaxPool2d(kernel_size=2, stride=2,padding=0)

    # Block Transpose <DECODER> : Depth and Normal : nClasses = 4
    self.up5_a = nn.ConvTranspose2d(144,72,4, stride=2,  padding=2, bias=False)
    self.up4_a = nn.ConvTranspose2d(72,36,6, stride=2,  padding=2, bias=False)
    self.up3_a = nn.ConvTranspose2d(36,18,4, stride=2,  padding=2, bias=False)
    self.up2_a = nn.ConvTranspose2d(18,9,4, stride=2, padding=2, bias=False)
    self.up1_a = nn.ConvTranspose2d(9,nClasses,3,stride=2, padding=2, bias=False)

    # Block Transpose <DECODER> : Image+Ref+Scale: nClasses1 = 7
    self.up5_b = nn.ConvTranspose2d(144,72,4, stride=2, padding=2, bias=False)
    self.up4_b = nn.ConvTranspose2d(72,36,6, stride=2,  padding=2, bias=False)
    self.up3_b = nn.ConvTranspose2d(36,18,4, stride=2,  padding=2, bias=False)
    self.up2_b = nn.ConvTranspose2d(18,9,4, stride=2,  padding=2, bias=False)
    self.up1_b = nn.ConvTranspose2d(9,nClasses1,3, stride=2,  padding=2, bias=False)

def forward(self, x, training=True):
    x = F.relu(self.conv1_1(x))
    x = F.relu(self.conv1_2(x))
    x = self.pool(x)
    pool1 = x

    x = F.relu(self.conv2_1(x))
    x = F.relu(self.conv2_2(x))
    x = self.pool(x)
    pool2 = x

    x = F.relu(self.conv3_1(x))
    x = F.relu(self.conv3_2(x))
    x = F.relu(self.conv3_3(x))
    x = self.pool(x)
    pool3 = x

    x = F.relu(self.conv4_1(x))
    x = F.relu(self.conv4_2(x))
    x = F.relu(self.conv4_3(x))
    x = self.pool(x)
    pool4 = x

    x = F.relu(self.conv5_1(x))
    x = F.relu(self.conv5_2(x))
    x = F.relu(self.conv5_3(x))
    x = self.pool(x)
    pool5 = x

    x = self.up5_a(x)
    print(x.size())
    print(pool4.size())
    x = torch.cat((x,pool4),3)
    x = self.up4_a(x)
    x = torch.cat((x,pool3),3)
    x = self.up3_a(x)
    x = torch.cat((x,pool2),3)
    x = self.up2_a(x)
    x = torch.cat((x,pool1),3)
    x = self.up1_a(x)
    o = x
    o = torch.sigmoid(o)

    x1 = self.up5_a(pool5)
    x1 = torch.cat((x1,pool4),3)
    x1 = self.up4_a(x1)
    x1 = torch.cat((x1,pool3),3)
    x1 = self.up3_a(x1)
    x1 = torch.cat((x1,pool2),3)
    x1 = self.up2_a(x1)
    x1 = torch.cat((x1,pool1),3)
    x1 = self.up1_a(x1)
    o1 = x1
    o1 = torch.sigmoid(o1)

    return o,o1

我的 pytorch 代码 (print(x.size()), print(pool4.size())) 中的这些打印语句打印为: torch.Size([1, 72, 10, 10]) torch.Size([ 1、144、10、10])。请帮我转帐。谢谢你。

标签: tensorflowdeep-learningpytorch

解决方案


看起来您正在尝试连接 Pytorch 中维度为 1 而不是 3 的输出通道/特征图。这可以在您的打印输出中看到。


推荐阅读