python - ValueError:logits 和标签必须具有相同的形状 ((22, 8, 8, 2) vs (22, 2))
问题描述
我正在尝试在我自己的二进制数据集上使用 GoogLeNet [Machine-Learning-Collection][1]。我收到以下错误:
ValueError: logits and labels must have the same shape ((22, 8, 8, 2) vs (22, 2))
即使我更改了以下内容以适应模型:
- 用于
binary_crossentropy
代替“categorical_crossentropy” - 使用
tf.keras.utils.to_categorical(y_train.reshape((-1,1)), num_classes=2, dtype='float32')
andy_train
和y_test
来拟合二进制一 - 重塑
X_train
,x_test
在 4 维 - 在 5000 上使用了 2 个类
- 更改了 sigmoid 中的 softmax 函数
我仍然无法让代码工作。如果有人可以帮助我,我将不胜感激。代码如下所示。
我的预处理和训练代码(它使用谷歌网络文件,而该文件又使用最后指定的 Conv 块)。
# disable tensorflow debugging messages
import os
from tensorflow.python.keras.layers.core import Flatten
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
from googlenet import GoogLeNet
from absl import app, logging
import numpy as np
import random
from sklearn.model_selection import train_test_split
from tensorflow import keras
import tensorflow as tf
def crop_image(arr, w, h):
return arr[0:w, 0:h]
if __name__ == "__main__":
datadir = 'D:\\raw-dataset-4'
categories = []
for c in os.listdir(datadir):
if "label" not in c:
categories.append(c)
print(categories)
training_data = []
for cat in categories:
path = os.path.join(datadir, cat)
class_num = categories.index(cat)
counter = 0
print(path)
print(class_num)
for img in os.listdir(path):
img_array = np.load(os.path.join(path, img))
cropped_img_array = crop_image(img_array, 256, 256)
training_data.append([cropped_img_array, class_num])
counter = counter + 1
if counter > 15:
break
print("Printing the training data")
print(np.array(training_data))
print(np.array(training_data).shape)
logging.info("Training data array done.")
X = []
y = []
random.shuffle(training_data)
logging.info("Training data suffled.")
for features, label in training_data:
X.append(features)
y.append(label)
logging.info("Convert into numpy done.")
X = np.array(X)
y = np.array(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_train = X_train.astype('float32').reshape(-1, 256, 256, 1) / 255.0
X_test = X_test.astype('float32').reshape(-1, 256, 256, 1) / 255.0
# y_train = np.asarray(y_train).astype('float32').reshape((-1,1))
# y_test = np.asarray(y_test).astype('float32').reshape((-1,1))
y_train = tf.keras.utils.to_categorical(y_train.reshape((-1,1)), num_classes=2, dtype='float32')
y_test = tf.keras.utils.to_categorical(y_test.reshape((-1,1)), num_classes=2, dtype='float32')
# print(y_train)
# print(y_train.reshape(-1, 2))
# print(tf.keras.utils.to_categorical(
# y_train, num_classes=None, dtype='int'))
model = GoogLeNet(input_shape=X_train.shape[1:], classes=2)
model.summary()
print(f' {np.array(X_train).shape} \n')
print(f' {np.array(y_train).shape} \n')
print(f' {np.array(X_test).shape} \n')
print(f' {np.array(y_test).shape} \n')
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
model.fit(X_train, y_train, verbose=2)
# model.evaluate(X_train, y_test, batch_size=5, verbose=2)
GoogLeNet 在 GitHub Repository 中找到(我将 softmax 修改为 sigmoid,类数修改为 2):
# Tensorflow v.2.3.1
"""
Programmed by the-robot <https://github.com/the-robot>
"""
from block import (
auxiliary_block,
convolution_block,
inception_block,
)
from tensorflow.keras.layers import (
AveragePooling2D,
Dense,
Dropout,
Input,
MaxPooling2D,
)
from tensorflow.keras import Model
import tensorflow as tf
import typing
tf.config.run_functions_eagerly(True)
@tf.function
def GoogLeNet(input_shape: typing.Tuple[int] = (224, 224, 3), classes: int = 2) -> Model:
"""
Implementation of the popular GoogLeNet aka Inception v1 architecture.
Refer to the original paper, page 6 - table 1 for inception block filter sizes.
Arguments:
input_shape -- shape of the images of the dataset
classes -- number of classes for classification
Returns:
model -- a Model() instance in Keras
"""
# convert input shape into tensor
X_input = Input(input_shape)
# NOTE: auxiliary layers are only used in trainig phase to improve performance
# because they act as regularization and prevent vanishing gradient problem
auxiliary1 = None # to store auxiliary layers classification value
auxiliary2 = None
# layer 1 (convolution block)
X = convolution_block(
X = X_input,
filters = 64,
kernel_size = 7,
stride = 2,
padding = "same",
)
# layer 2 (max pool)
X = MaxPooling2D(
pool_size = (3, 3),
padding = "same",
strides = (2, 2),
)(X)
# layer 3 (convolution block)
# 1x1 reduce
X = convolution_block(
X,
filters = 64,
kernel_size = 1,
stride = 1,
padding = "same",
)
X = convolution_block(
X,
filters = 192,
kernel_size = 3,
stride = 1,
padding = "same",
)
# layer 4 (max pool)
X = MaxPooling2D(
pool_size = (3, 3),
padding = "same",
strides = (2, 2),
)(X)
# layer 5 (inception 3a)
X = inception_block(
X,
filters_1x1 = 64,
filters_3x3_reduce = 96,
filters_3x3 = 128,
filters_5x5_reduce = 16,
filters_5x5 = 32,
pool_size = 32,
)
# layer 6 (inception 3b)
X = inception_block(
X,
filters_1x1 = 128,
filters_3x3_reduce = 128,
filters_3x3 = 192,
filters_5x5_reduce = 32,
filters_5x5 = 96,
pool_size = 64,
)
# layer 7 (max pool)
X = MaxPooling2D(
pool_size = (3, 3),
padding = "same",
strides = (2, 2),
)(X)
# layer 8 (inception 4a)
X = inception_block(
X,
filters_1x1 = 192,
filters_3x3_reduce = 96,
filters_3x3 = 208,
filters_5x5_reduce = 16,
filters_5x5 = 48,
pool_size = 64,
)
# First Auxiliary Softmax Classifier
auxiliary1 = auxiliary_block(X, classes = classes)
# layer 9 (inception 4b)
X = inception_block(
X,
filters_1x1 = 160,
filters_3x3_reduce = 112,
filters_3x3 = 224,
filters_5x5_reduce = 24,
filters_5x5 = 64,
pool_size = 64,
)
# layer 10 (inception 4c)
X = inception_block(
X,
filters_1x1 = 128,
filters_3x3_reduce = 128,
filters_3x3 = 256,
filters_5x5_reduce = 24,
filters_5x5 = 64,
pool_size = 64,
)
# layer 11 (inception 4d)
X = inception_block(
X,
filters_1x1 = 112,
filters_3x3_reduce = 144,
filters_3x3 = 288,
filters_5x5_reduce = 32,
filters_5x5 = 64,
pool_size = 64,
)
# Second Auxiliary Softmax Classifier
auxiliary2 = auxiliary_block(X, classes = classes)
# layer 12 (inception 4e)
X = inception_block(
X,
filters_1x1 = 256,
filters_3x3_reduce = 160,
filters_3x3 = 320,
filters_5x5_reduce = 32,
filters_5x5 = 128,
pool_size = 128,
)
# layer 13 (max pool)
X = MaxPooling2D(
pool_size = (3, 3),
padding = "same",
strides = (2, 2),
)(X)
# layer 14 (inception 5a)
X = inception_block(
X,
filters_1x1 = 256,
filters_3x3_reduce = 160,
filters_3x3 = 320,
filters_5x5_reduce = 32,
filters_5x5 = 128,
pool_size = 128,
)
# layer 15 (inception 5b)
X = inception_block(
X,
filters_1x1 = 384,
filters_3x3_reduce = 192,
filters_3x3 = 384,
filters_5x5_reduce = 48,
filters_5x5 = 128,
pool_size = 128,
)
# layer 16 (average pool)
X = AveragePooling2D(
pool_size = (7, 7),
padding = "same",
strides = (1, 1),
)(X)
# layer 17 (dropout 40%)
X = Dropout(rate = 0.4)(X)
# layer 18 (fully-connected layer with softmax activation)
X = Dense(units = classes, activation='sigmoid')(X)
model = Model(X_input, outputs = [X, auxiliary1, auxiliary2], name='GoogLeNet/Inception-v1')
return model
转换块
# Tensorflow v.2.3.1
"""
Programmed by the-robot <https://github.com/the-robot>
"""
from tensorflow.keras.layers import (
Activation,
AveragePooling2D,
BatchNormalization,
Conv2D,
Dense,
Dropout,
Flatten,
MaxPooling2D,
concatenate,
)
import tensorflow as tf
import typing
@tf.function
def convolution_block(
X: tf.Tensor,
filters: int,
kernel_size: int,
stride: int = 1,
padding: str = 'valid',
) -> tf.Tensor:
"""
Convolution block for GoogLeNet.
Arguments:
X -- input tensor of shape (m, H, W, filters)
filters -- defining the number of filters in the CONV layers
kernel_size -- integer, specifying the shape of the middle CONV's window for the main path
stride -- integer specifying the stride to be used
padding -- padding type, same or valid. Default is valid
Returns:
X -- output of the identity block, tensor of shape (H, W, filters)
"""
X = Conv2D(
filters = filters,
kernel_size = (kernel_size, kernel_size),
strides = (stride, stride),
padding = padding,
)(X)
# batch normalization is not in original paper because it was not invented at that time
# however I am using it here because it will improve the performance
X = BatchNormalization()(X)
X = Activation("relu")(X)
return X
@tf.function
def inception_block(
X: tf.Tensor,
filters_1x1: int,
filters_3x3_reduce: int,
filters_3x3: int,
filters_5x5_reduce: int,
filters_5x5: int,
pool_size: int,
) -> tf.Tensor:
"""
Inception block for GoogLeNet.
Arguments:
X -- input tensor of shape (m, H, W, filters)
filters_1x1 -- number of filters for (1x1 conv) in first branch
filters_3x3_reduce -- number of filters for (1x1 conv) dimensionality reduction before (3x3 conv) in second branch
filters_3x3 -- number of filters for (3x3 conv) in second branch
filters_5x5_reduce -- number of filters for (1x1 conv) dimensionality reduction before (5x5 conv) in third branch
filters_5x5 -- number of filters for (5x5 conv) in third branch
pool_size -- number of filters for (1x1 conv) after 3x3 max pooling in fourth branch
Returns:
X -- output of the identity block, tensor of shape (H, W, filters)
"""
# first branch
conv_1x1 = convolution_block(
X,
filters = filters_1x1,
kernel_size = 1,
padding = "same"
)
# second branch
conv_3x3 = convolution_block(
X,
filters = filters_3x3_reduce,
kernel_size = 1,
padding = "same"
)
conv_3x3 = convolution_block(
conv_3x3,
filters = filters_3x3,
kernel_size = 3,
padding = "same"
)
# third branch
conv_5x5 = convolution_block(
X,
filters = filters_5x5_reduce,
kernel_size = 1,
padding = "same"
)
conv_5x5 = convolution_block(
conv_5x5,
filters = filters_5x5,
kernel_size = 5,
padding = "same"
)
# fourth branch
pool_projection = MaxPooling2D(
pool_size = (2, 2),
strides = (1, 1),
padding = "same",
)(X)
pool_projection = convolution_block(
pool_projection,
filters = pool_size,
kernel_size = 1,
padding = "same"
)
# concat by channel/filter
return concatenate(inputs = [conv_1x1, conv_3x3, conv_5x5, pool_projection], axis = 3)
@tf.function
def auxiliary_block(
X: tf.Tensor,
classes: int,
) -> tf.Tensor:
"""
Auxiliary block for GoogLeNet.
Refer to the original paper, page 8 for the auxiliary layer specification.
Arguments:
X -- input tensor of shape (m, H, W, filters)
classes -- number of classes for classification
Return:
X -- output of the identity block, tensor of shape (H, W, filters)
"""
X = AveragePooling2D(
pool_size = (5, 5),
padding = "same",
strides = (3, 3),
)(X)
X = convolution_block(
X,
filters = 128,
kernel_size = 1,
stride = 1,
padding = "same",
)
X = Flatten()(X)
X = Dense(units = 1024, activation = "relu")(X)
X = Dropout(rate = 0.7)(X)
X = Dense(units = classes)(X)
X = Activation("softmax")(X)
return X
解决方案
推荐阅读
- reactjs - 使用 Django websocket 和 React 添加 Google Speech
- python - 用户如何在 python 中为 justdial 抓取网页时选择城市名称
- c++11 - 如何在 C++ 中将派生类型指针序列化为接口类型对象?
- javascript - 错误:无法读取未定义的 REACT JS 的属性“地图”
- javascript - 未捕获的 SyntaxError:main.js 中的令牌无效或意外
- flutter - dartdoc 未找到/不工作。飞镖安装坏了?
- python - psutil 工作负载不一致
- mysql - 选项文件的Mysql位置
- google-apps-script - Problems to get only the value of a row within a 2D array Google Apps Script
- azure - 错误:请求的资源需要用户身份验证:在 AzureCLI 任务构建管道中