首页 > 解决方案 > ValueError:logits 和标签必须具有相同的形状 ((22, 8, 8, 2) vs (22, 2))

问题描述

我正在尝试在我自己的二进制数据集上使用 GoogLeNet [Machine-Learning-Collection][1]。我收到以下错误:

ValueError: logits and labels must have the same shape ((22, 8, 8, 2) vs (22, 2))

即使我更改了以下内容以适应模型:

  1. 用于binary_crossentropy代替“categorical_crossentropy”
  2. 使用tf.keras.utils.to_categorical(y_train.reshape((-1,1)), num_classes=2, dtype='float32')andy_trainy_test来拟合二进制一
  3. 重塑X_train,x_test在 4 维
  4. 在 5000 上使用了 2 个类
  5. 更改了 sigmoid 中的 softmax 函数

我仍然无法让代码工作。如果有人可以帮助我,我将不胜感激。代码如下所示。

我的预处理和训练代码(它使用谷歌网络文件,而该文件又使用最后指定的 Conv 块)。

# disable tensorflow debugging messages
import os

from tensorflow.python.keras.layers.core import Flatten
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

from googlenet import GoogLeNet
from absl import app, logging
import numpy as np
import random
from sklearn.model_selection import train_test_split
from tensorflow import keras
import tensorflow as tf


def crop_image(arr, w, h):
    return arr[0:w, 0:h]
if __name__ == "__main__":
    

    datadir = 'D:\\raw-dataset-4'

    categories = []

    for c in os.listdir(datadir):
        if "label" not in c:
            categories.append(c)

    print(categories)
    
    training_data = []

    for cat in categories:
        path = os.path.join(datadir, cat)
        class_num = categories.index(cat)
        counter = 0
        print(path)
        print(class_num)
        for img in os.listdir(path):
            img_array = np.load(os.path.join(path, img))
            cropped_img_array = crop_image(img_array, 256, 256)
            training_data.append([cropped_img_array, class_num])
            counter = counter + 1
            if counter > 15:
                break

    print("Printing the training data")
    print(np.array(training_data))
    print(np.array(training_data).shape)

    logging.info("Training data array done.")
    X = []
    y = []

    random.shuffle(training_data)

    logging.info("Training data suffled.")
    
    for features, label in training_data:
        X.append(features)
        y.append(label)

    logging.info("Convert into numpy done.")

    X = np.array(X)
    y = np.array(y)
    

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)



    X_train = X_train.astype('float32').reshape(-1, 256, 256, 1) / 255.0
    X_test = X_test.astype('float32').reshape(-1, 256, 256, 1) / 255.0
    # y_train = np.asarray(y_train).astype('float32').reshape((-1,1))
    # y_test = np.asarray(y_test).astype('float32').reshape((-1,1))



    y_train = tf.keras.utils.to_categorical(y_train.reshape((-1,1)), num_classes=2, dtype='float32')
    y_test = tf.keras.utils.to_categorical(y_test.reshape((-1,1)), num_classes=2, dtype='float32')
    

    # print(y_train)
    # print(y_train.reshape(-1, 2))
    # print(tf.keras.utils.to_categorical(
    # y_train, num_classes=None, dtype='int'))



    model = GoogLeNet(input_shape=X_train.shape[1:], classes=2)
    model.summary()

    print(f' {np.array(X_train).shape} \n')
    print(f' {np.array(y_train).shape} \n')
    print(f' {np.array(X_test).shape} \n')
    print(f' {np.array(y_test).shape} \n')

    model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
    model.fit(X_train, y_train, verbose=2)

    # model.evaluate(X_train, y_test, batch_size=5, verbose=2)

GoogLeNet 在 GitHub Repository 中找到(我将 softmax 修改为 sigmoid,类数修改为 2):

# Tensorflow v.2.3.1

"""
Programmed by the-robot <https://github.com/the-robot>
"""

from block import (
    auxiliary_block,
    convolution_block,
    inception_block,
)

from tensorflow.keras.layers import (
    AveragePooling2D,
    Dense,
    Dropout,
    Input,
    MaxPooling2D,
)
from tensorflow.keras import Model
import tensorflow as tf
import typing

tf.config.run_functions_eagerly(True)

@tf.function
def GoogLeNet(input_shape: typing.Tuple[int] = (224, 224, 3), classes: int = 2) -> Model:
    """
    Implementation of the popular GoogLeNet aka Inception v1 architecture.
    Refer to the original paper, page 6 - table 1 for inception block filter sizes.
    Arguments:
    input_shape -- shape of the images of the dataset
    classes     -- number of classes for classification
    Returns:
    model       -- a Model() instance in Keras
    """

    # convert input shape into tensor
    X_input = Input(input_shape)

    # NOTE: auxiliary layers are only used in trainig phase to improve performance
    #       because they act as regularization and prevent vanishing gradient problem
    auxiliary1 = None # to store auxiliary layers classification value
    auxiliary2 = None

    # layer 1 (convolution block)
    X = convolution_block(
        X = X_input,
        filters = 64,
        kernel_size = 7,
        stride = 2,
        padding = "same",
    )

    # layer 2 (max pool)
    X = MaxPooling2D(
        pool_size = (3, 3),
        padding = "same",
        strides = (2, 2),
    )(X)

    # layer 3 (convolution block)
    # 1x1 reduce
    X = convolution_block(
        X,
        filters = 64,
        kernel_size = 1,
        stride = 1,
        padding = "same",
    )
    X = convolution_block(
        X,
        filters = 192,
        kernel_size = 3,
        stride = 1,
        padding = "same",
    )

    # layer 4 (max pool)
    X = MaxPooling2D(
        pool_size = (3, 3),
        padding = "same",
        strides = (2, 2),
    )(X)

    # layer 5 (inception 3a)
    X = inception_block(
        X,
        filters_1x1 = 64,
        filters_3x3_reduce = 96,
        filters_3x3 = 128,
        filters_5x5_reduce = 16,
        filters_5x5 = 32,
        pool_size = 32,
    )

    # layer 6 (inception 3b)
    X = inception_block(
        X,
        filters_1x1 = 128,
        filters_3x3_reduce = 128,
        filters_3x3 = 192,
        filters_5x5_reduce = 32,
        filters_5x5 = 96,
        pool_size = 64,
    )

    # layer 7 (max pool)
    X = MaxPooling2D(
        pool_size = (3, 3),
        padding = "same",
        strides = (2, 2),
    )(X)

    # layer 8 (inception 4a)
    X = inception_block(
        X,
        filters_1x1 = 192,
        filters_3x3_reduce = 96,
        filters_3x3 = 208,
        filters_5x5_reduce = 16,
        filters_5x5 = 48,
        pool_size = 64,
    )

    # First Auxiliary Softmax Classifier
    auxiliary1 = auxiliary_block(X, classes = classes)

    # layer 9 (inception 4b)
    X = inception_block(
        X,
        filters_1x1 = 160,
        filters_3x3_reduce = 112,
        filters_3x3 = 224,
        filters_5x5_reduce = 24,
        filters_5x5 = 64,
        pool_size = 64,
    )

    # layer 10 (inception 4c)
    X = inception_block(
        X,
        filters_1x1 = 128,
        filters_3x3_reduce = 128,
        filters_3x3 = 256,
        filters_5x5_reduce = 24,
        filters_5x5 = 64,
        pool_size = 64,
    )

    # layer 11 (inception 4d)
    X = inception_block(
        X,
        filters_1x1 = 112,
        filters_3x3_reduce = 144,
        filters_3x3 = 288,
        filters_5x5_reduce = 32,
        filters_5x5 = 64,
        pool_size = 64,
    )

    # Second Auxiliary Softmax Classifier
    auxiliary2 = auxiliary_block(X, classes = classes)

    # layer 12 (inception 4e)
    X = inception_block(
        X,
        filters_1x1 = 256,
        filters_3x3_reduce = 160,
        filters_3x3 = 320,
        filters_5x5_reduce = 32,
        filters_5x5 = 128,
        pool_size = 128,
    )

    # layer 13 (max pool)
    X = MaxPooling2D(
        pool_size = (3, 3),
        padding = "same",
        strides = (2, 2),
    )(X)

    # layer 14 (inception 5a)
    X = inception_block(
        X,
        filters_1x1 = 256,
        filters_3x3_reduce = 160,
        filters_3x3 = 320,
        filters_5x5_reduce = 32,
        filters_5x5 = 128,
        pool_size = 128,
    )

    # layer 15 (inception 5b)
    X = inception_block(
        X,
        filters_1x1 = 384,
        filters_3x3_reduce = 192,
        filters_3x3 = 384,
        filters_5x5_reduce = 48,
        filters_5x5 = 128,
        pool_size = 128,
    )

    # layer 16 (average pool)
    X = AveragePooling2D(
        pool_size = (7, 7),
        padding = "same",
        strides = (1, 1),
    )(X)

    # layer 17 (dropout 40%)
    X = Dropout(rate = 0.4)(X)

    # layer 18 (fully-connected layer with softmax activation)
    X = Dense(units = classes, activation='sigmoid')(X)

    model = Model(X_input, outputs = [X, auxiliary1, auxiliary2], name='GoogLeNet/Inception-v1')
    return model

转换块

  # Tensorflow v.2.3.1

"""
Programmed by the-robot <https://github.com/the-robot>
"""

from tensorflow.keras.layers import (
    Activation,
    AveragePooling2D,
    BatchNormalization,
    Conv2D,
    Dense,
    Dropout,
    Flatten,
    MaxPooling2D,
    concatenate,
)
import tensorflow as tf
import typing

@tf.function
def convolution_block(
    X: tf.Tensor,
    filters: int,
    kernel_size: int,
    stride: int = 1,
    padding: str = 'valid',
) -> tf.Tensor:
    """
    Convolution block for GoogLeNet.
    Arguments:
    X           -- input tensor of shape (m, H, W, filters)
    filters      -- defining the number of filters in the CONV layers
    kernel_size -- integer, specifying the shape of the middle CONV's window for the main path
    stride      -- integer specifying the stride to be used
    padding     -- padding type, same or valid. Default is valid
    Returns:
    X           -- output of the identity block, tensor of shape (H, W, filters)
    """

    X = Conv2D(
        filters = filters,
        kernel_size = (kernel_size, kernel_size),
        strides = (stride, stride),
        padding = padding,
    )(X)
    # batch normalization is not in original paper because it was not invented at that time
    # however I am using it here because it will improve the performance
    X = BatchNormalization()(X)
    X = Activation("relu")(X)

    return X

@tf.function
def inception_block(
    X: tf.Tensor,
    filters_1x1: int,
    filters_3x3_reduce: int,
    filters_3x3: int,
    filters_5x5_reduce: int,
    filters_5x5: int,
    pool_size: int,
) -> tf.Tensor:
    """
    Inception block for GoogLeNet.
    Arguments:
    X                  -- input tensor of shape (m, H, W, filters)
    filters_1x1        -- number of filters for (1x1 conv) in first branch 
    filters_3x3_reduce -- number of filters for (1x1 conv) dimensionality reduction before (3x3 conv) in second branch
    filters_3x3        -- number of filters for (3x3 conv) in second branch
    filters_5x5_reduce -- number of filters for (1x1 conv) dimensionality reduction before (5x5 conv) in third branch
    filters_5x5        -- number of filters for (5x5 conv) in third branch
    pool_size          -- number of filters for (1x1 conv) after 3x3 max pooling in fourth branch 
    Returns:
    X                  -- output of the identity block, tensor of shape (H, W, filters)
    """

    # first branch
    conv_1x1 = convolution_block(
        X,
        filters = filters_1x1,
        kernel_size = 1,
        padding = "same"
    )

    # second branch
    conv_3x3 = convolution_block(
        X,
        filters = filters_3x3_reduce,
        kernel_size = 1,
        padding = "same"
    )
    conv_3x3 = convolution_block(
        conv_3x3,
        filters = filters_3x3,
        kernel_size = 3,
        padding = "same"
    )

    # third branch
    conv_5x5 = convolution_block(
        X,
        filters = filters_5x5_reduce,
        kernel_size = 1,
        padding = "same"
    )
    conv_5x5 = convolution_block(
        conv_5x5,
        filters = filters_5x5,
        kernel_size = 5,
        padding = "same"
    )

    # fourth branch
    pool_projection = MaxPooling2D(
        pool_size = (2, 2),
        strides = (1, 1),
        padding = "same",
    )(X)
    pool_projection = convolution_block(
        pool_projection,
        filters = pool_size,
        kernel_size = 1,
        padding = "same"
    )

    # concat by channel/filter
    return concatenate(inputs = [conv_1x1, conv_3x3, conv_5x5, pool_projection], axis = 3)

@tf.function
def auxiliary_block(
    X: tf.Tensor,
    classes: int,
) -> tf.Tensor:
    """
    Auxiliary block for GoogLeNet.
    Refer to the original paper, page 8 for the auxiliary layer specification.
    Arguments:
    X       -- input tensor of shape (m, H, W, filters)
    classes -- number of classes for classification
    Return:
    X       -- output of the identity block, tensor of shape (H, W, filters)
    """

    X = AveragePooling2D(
        pool_size = (5, 5),
        padding = "same",
        strides = (3, 3),
    )(X)
    X = convolution_block(
        X,
        filters = 128,
        kernel_size = 1,
        stride = 1,
        padding = "same",
    )
    X = Flatten()(X)
    X = Dense(units = 1024, activation = "relu")(X)
    X = Dropout(rate = 0.7)(X)
    X = Dense(units = classes)(X)
    X = Activation("softmax")(X)

    return X

谢谢[1]:https ://github.com/aladdinpersson/Machine-Learning-Collection/tree/master/ML/TensorFlow/CNN_architectures/GoogLeNet

标签: pythontensorflowconv-neural-network

解决方案


推荐阅读