python - Keras model.fit ValueError: 形状 (None, 43) 和 (None, 1, 1, 43) 不兼容
问题描述
编辑:我在 ResNet50 和 Dense 层之间添加了一个 Flatten() 层,错误消失了
一般来说,我对使用 Keras 和机器学习非常陌生。我的目标是使用经过 ImageNet 权重预训练的 Keras ResNet50 模型对 GTSRB 中不同类型的标志进行分类。我正在使用 Google CoLab,加载和预处理所有图像/标签的代码是由我的教授编写的。我可以用模型编译和预测,我遇到的问题是训练。我正在使用 model.fit() 函数来尝试训练模型。
这些图像是 32x32 图像,所以我将 input_tensor 设置为 (32, 32, 3) 的形状。有 43 个标签可供分类,我使用的是单热编码,所以我在 ResNet50 之后添加了一个大小为 43 的层。我不确定 ValueError 到底出现在哪里,但我很确定这与标签有关。我真的不知道形状的问题是什么,我一直在查看文档和其他 Stack Overflow 答案,但我找不到任何适用于此的东西(我的理解有限)。
以下是错误:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-21-2f55b5d19997> in <module>()
6
7 resnet.compile(optimizer='adam', loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=['acc'])
----> 8 resnet.fit(x=imgs_train, y=labels_train, batch_size=BATCH_SIZE, epochs=NUM_EPOCH, validation_data=(imgs_val, labels_val))
9
10 predictions = resnet.predict(x=imgs_test)
9 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
992 except Exception as e: # pylint:disable=broad-except
993 if hasattr(e, "ag_error_metadata"):
--> 994 raise e.ag_error_metadata.to_exception(e)
995 else:
996 raise
ValueError: in user code:
/usr/local/lib/python3.7/dist-packages/keras/engine/training.py:853 train_function *
return step_function(self, iterator)
/usr/local/lib/python3.7/dist-packages/keras/engine/training.py:842 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:1286 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:2849 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:3632 _call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/keras/engine/training.py:835 run_step **
outputs = model.train_step(data)
/usr/local/lib/python3.7/dist-packages/keras/engine/training.py:789 train_step
y, y_pred, sample_weight, regularization_losses=self.losses)
/usr/local/lib/python3.7/dist-packages/keras/engine/compile_utils.py:201 __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
/usr/local/lib/python3.7/dist-packages/keras/losses.py:141 __call__
losses = call_fn(y_true, y_pred)
/usr/local/lib/python3.7/dist-packages/keras/losses.py:245 call **
return ag_fn(y_true, y_pred, **self._fn_kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/dispatch.py:206 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/keras/losses.py:1666 categorical_crossentropy
y_true, y_pred, from_logits=from_logits, axis=axis)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/dispatch.py:206 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/keras/backend.py:4839 categorical_crossentropy
target.shape.assert_is_compatible_with(output.shape)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/tensor_shape.py:1161 assert_is_compatible_with
raise ValueError("Shapes %s and %s are incompatible" % (self, other))
ValueError: Shapes (None, 43) and (None, 1, 1, 43) are incompatible
这是我的代码:
drive.mount('/content/drive')
# Import libraries
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications.resnet50 import ResNet50, decode_predictions
import matplotlib.pyplot as plt
import pickle
import pandas as pd
import cv2
from itertools import chain
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
### Load data
# Path to project directory
PROJECT_ROOT_DIR = 'drive/My Drive/Colab Notebooks/Assignment1/'
# Path to directory containing the dataset
DATA_DIR = PROJECT_ROOT_DIR + 'Data/'
# Function for loading the dataset
# Code from advml-traffic-sign (https://github.com/inspire-group/advml-traffic-sign)
def load_dataset_GTSRB(n_channel=3, train_file_name=None):
"""
Load GTSRB data as a (datasize) x (channels) x (height) x (width) numpy
matrix. Each pixel is rescaled to the range [0,1].
"""
def load_pickled_data(file, columns):
"""
Loads pickled training and test data.
Parameters
----------
file : string
Name of the pickle file.
columns : list of strings
List of columns in pickled data we're interested in.
Returns
-------
A tuple of datasets for given columns.
"""
with open(file, mode='rb') as f:
dataset = pickle.load(f)
return tuple(map(lambda c: dataset[c], columns))
def preprocess(x, n_channel):
"""
Preprocess dataset: turn images into grayscale if specified, normalize
input space to [0,1], reshape array to appropriate shape for NN model
"""
if n_channel == 3:
# Scale features to be in [0, 1]
x = (x / 255.).astype(np.float32)
else:
# Convert to grayscale, e.g. single Y channel
x = 0.299 * x[:, :, :, 0] + 0.587 * x[:, :, :, 1] + \
0.114 * x[:, :, :, 2]
# Scale features to be in [0, 1]
x = (x / 255.).astype(np.float32)
x = x[:, :, :, np.newaxis]
return x
# Load pickle dataset
if train_file_name is None:
x_train, y_train = load_pickled_data(
DATA_DIR + 'train.p', ['features', 'labels'])
else:
x_train, y_train = load_pickled_data(
DATA_DIR + train_file_name, ['features', 'labels'])
x_val, y_val = load_pickled_data(
DATA_DIR + 'valid.p', ['features', 'labels'])
x_test, y_test = load_pickled_data(
DATA_DIR + 'test.p', ['features', 'labels'])
# Preprocess loaded data
x_train = preprocess(x_train, n_channel)
x_val = preprocess(x_val, n_channel)
x_test = preprocess(x_test, n_channel)
return x_train, y_train, x_val, y_val, x_test, y_test
# Set constants (GTSRB)
NUM_LABELS = 43 # Number of labels
BATCH_SIZE = 32 # Size of batch
HEIGHT = 32 # Height of input image
WIDTH = 32 # Width of input image
N_CHANNEL = 3 # Number of channels
OUTPUT_DIM = 43 # Number of output dimension
# Set training hyperparameters
NUM_EPOCH = 100 # Number of epoch to train
LR = 0.0001 # Learning rate
INPUT_SHAPE = (HEIGHT, WIDTH, N_CHANNEL) # Input shape of model
IMG_SHAPE = (HEIGHT, WIDTH, N_CHANNEL)
# Load the images and labels
imgs_train, labels_train, imgs_val, labels_val, imgs_test, labels_test = load_dataset_GTSRB(n_channel=N_CHANNEL)
# Display the shapes of train, validation, and test datasets
print('Images train shape: {} - Labels train shape: {}'.format(imgs_train.shape, labels_train.shape))
print('Images validation shape: {} - Labels validation shape: {}'.format(imgs_val.shape, labels_val.shape))
print('Images test shape: {} - Labels test shape: {}'.format(imgs_test.shape, labels_test.shape))
# Display the range of images (to make sure they are in the [0, 1] range)
print('\nMax pixel value', np.max(imgs_train))
print('Min pixel value', np.min(imgs_train))
print('Average pixel value', np.mean(imgs_train))
print('Data type', imgs_train[0].dtype)
# Read the sign names
signnames = pd.read_csv(DATA_DIR + 'signnames.csv').values[:, 1]
# Plot a few images to check if the data makes sense (note that the quality of some of the images is pretty low)
plt.figure(figsize=(16, 10))
for n in range(9):
i = np.random.randint(0, len(imgs_train), 1)
ax = plt.subplot(3, 3, n+1)
plt.imshow(imgs_train[i[0]])
plt.title('Label:' + str(signnames[labels_train[i[0]]]))
plt.axis('off')
# Convert the labels to one-hot encoding (to input to the models)
labels_train = keras.utils.to_categorical(labels_train, NUM_LABELS)
labels_test = keras.utils.to_categorical(labels_test, NUM_LABELS)
labels_val = keras.utils.to_categorical(labels_val, NUM_LABELS)
print('Labels train shape: {}'.format(labels_train.shape))
### Creating the model and training
resnet = Sequential([
ResNet50(weights='imagenet', include_top=False, input_tensor=Input(shape=(WIDTH, HEIGHT, N_CHANNEL))),
Dense(NUM_LABELS, activation='softmax')
])
resnet.layers[0].trainable = False
resnet.compile(optimizer='adam', loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=['acc'])
resnet.fit(x=imgs_train, y=labels_train, batch_size=BATCH_SIZE, epochs=NUM_EPOCH, validation_data=(imgs_val, labels_val))
# predictions = resnet.predict(x=imgs_test)
这是数据:https ://drive.google.com/drive/folders/1dbr0bYFX40HoAJetAIDSh7Zsc-O5JbGS?usp=sharing
如果有人读到这个,谢谢。我很感激你的时间。
解决方案
我在 ResNet50 和 Dense 层之间添加了一个 Flatten() 层,错误消失了!这是现在创建模型的代码:
resnet = Sequential(
[
ResNet50(weights='imagenet', include_top=False, input_tensor=Input(shape=(WIDTH, HEIGHT, N_CHANNEL))),
Flatten(),
Dense(NUM_LABELS, activation='softmax')
]
)
推荐阅读
- javascript - ¿ 是否有与 SVG 一起使用的球形几何 XML 命名空间……?
- mysql - 从被劫持的域名转移 wordpress
- xcode8 - 3D 对象在移动相机时悬停
- google-cloud-platform - 无法创建 Windows 虚拟机
- api - 如何使用从 API 获取的数据显示来自 VueX 的对象
- java - 在 JUnit 中测试没有睡眠的潜在死锁
- angular - 如何在 Angular 中使用 jwt/interceptor 进行登录/注册方法
- sql - 使用信息单元格而不是列标题
- mysql - 错误 1067 'created_at' 的默认值无效
- javascript - 如何使用 webpack 正确创建 Node.js 模块