python - Keras 中 NPY 格式的 4 通道图像的图像分类
问题描述
我正在使用卷积神经网络构建一个包含 3000 张图像的 5 类图像分类器。我的输入数据存储为具有形状 (1080,1080,4) 的 .npy 文件格式,这是通过 RGB 图像和边缘特征(4 通道)获得的。当尝试使用类 Datagenerator 训练模型时,我得到了训练准确度(acc train = 97%)和验证准确度(acc val = 51%)之间的巨大差异。您可以在下面找到我的代码摘录。谁能帮我解决这个问题,我该如何提高验证的准确性。
有什么方法可以使用 ImageDataGenerator 提供的数据增强功能?
非常感谢,任何帮助表示赞赏。
class DataGenerator(keras.utils.Sequence):
'Generates data for Keras'
def __init__(self, list_IDs, labels, batch_size=16, dim=(1080,1080), n_channels=4,
n_classes=5, shuffle=True):
'Initialization'
self.dim = dim
self.batch_size = batch_size
self.labels = labels
self.list_IDs = list_IDs
self.n_channels = n_channels
self.n_classes = n_classes
self.shuffle = shuffle
self.on_epoch_end()
def __len__(self):
'Denotes the number of batches per epoch'
return int(np.floor(len(self.list_IDs) / self.batch_size))
def __getitem__(self, index):
'Generate one batch of data'
# Generate indexes of the batch
indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
# Find list of IDs
list_IDs_temp = [self.list_IDs[k] for k in indexes]
# Generate data
X, y = self.__data_generation(list_IDs_temp)
return X, y
def on_epoch_end(self):
'Updates indexes after each epoch'
self.indexes = np.arange(len(self.list_IDs))
if self.shuffle == True:
np.random.shuffle(self.indexes)
def __data_generation(self, list_IDs_temp):
'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
# Initialization
X = np.empty((self.batch_size, *self.dim, self.n_channels))
y = np.empty((self.batch_size), dtype=int)
# Generate data
for i, ID in enumerate(list_IDs_temp):
# Store sample
X[i,] = np.load('data' + ID + '.npy')
# Store class
y[i] = self.labels[ID]
return X, keras.utils.to_categorical(y, num_classes=self.n_classes)
还有模型:
model = Sequential()
# Note the input shape is the desired size of the image img_width, img_height with 3 bytes color
# 1____The first convolution
model.add(Convolution2D(16, (3, 3), input_shape=(1080,1080,4)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2))) # ,dim_ordering="th")
# 2____The second convolution
model.add(Convolution2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
# 3____The third convolution
model.add(Convolution2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
# 4____The fourth convolution
model.add(Convolution2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
# 5____The fifth convolution
model.add(Convolution2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
# F____Flatten the results to feed into a dense layer
model.add(Flatten()) # this converts our 3D feature maps to 1D feature vectors
# D____128 neuron in the fully-connected layer
model.add(Dense(128))
model.add(Activation('relu'))
# D____Dropout
model.add(Dropout(0.5))
# D____5 output neurons for 5 classes with the softmax activation
model.add(Dense(5))
model.add(Activation('softmax'))
model.summary()
# model compile
opt = Adam(lr=0.001, decay=1e-6)
model.compile(loss='categorical_crossentropy',
optimizer=opt,
metrics=['acc'])
解决方案
推荐阅读
- arrays - 来自 ReactJS 的多附加 Laravel
- excel - 如果空白或非空白,Excel IF 公式具有不同的结果
- asp.net-core - Microsoft.AspNetCore.OData:服务文档的相对 @odata.context 链接
- azure-pipelines - Azure 管道,分支工作,但在批准后无法重新分支
- amazon-elastic-beanstalk - Elastic Beanstalk env.yaml 包含无效密钥
- c# - C#、统一和 Mqtt
- php - PHP 读取 HTTP/2 响应正文
- windows - Win10设置记事本为默认浏览器
- python - 斐波那契中缺少函数调用(带有动态编程记忆)
- javascript - 在 rhandsontable() 中添加自定义渲染器会删除类型显示