python - 准确度通过 Epochs 提高,但恢复到评估的初始准确度
问题描述
我正在开展一个项目,以在 EEG 数据集上使用神经网络重新创建研究结果。在我从事该项目的整个过程中,我一直在反复遇到问题,即模型在整个 epoch 中都有一些准确性改进,但对于评估,准确性总是返回到初始值,特别是 1/NUM_CLASSES。其中 NUM_CLASSES 是分类类别的数量。老实说,我被困在这一点上,我认为该模型过度拟合并试图调整我的数据预处理以进行补偿,但运气不佳。
代码如下:
# Filters out warnings
import warnings
warnings.filterwarnings("ignore")
# Imports, as of 3/10, all are necessary
import numpy as np
import tensorflow as tf
from keras import layers
from keras import backend as K
from keras.models import Model
from keras.optimizers import Adam, SGD
from keras.callbacks import Callback
from keras.layers import Conv3D, Input, Dense, Activation, BatchNormalization, Flatten, Add, Softmax
from sklearn.model_selection import StratifiedKFold
from DonghyunMBCNN import MultiBranchCNN
# Global Variables
# The directory of the process data, must have been converted and cropped, reference dataProcessing.py and crop.py
DATA_DIR = "../datasets/BCICIV_2a_cropped/"
# Which trial subject will be trained
SUBJECT = 1
# The number of classification categories, for motor imagery, there are 4
NUM_CLASSES = 4
# The number of timesteps in each input array
TIMESTEPS = 240
# The X-Dimension of the dataset
XDIM = 7
# The Y-Dimension of the dataset
YDIM = 6
# The delta loss requirement for lower training rate
LOSS_THRESHOLD = 0.01
# Initial learning rate for ADAM optimizer
INIT_LR = 0.01
# Define Which NLL (Negative Log Likelihood) Loss function to use, either "NLL1", "NLL2", or "SCCE"
LOSS_FUNCTION = 'NLL2'
# Defines which optimizer is in use, either "ADAM" or "SGD"
OPTIMIZER = 'SGD'
# Whether training output should be given
VERBOSE = 1
# Determines whether K-Fold Cross Validation is used
USE_KFOLD = False
# Number of ksplit validation, must be atleast 2
KFOLD_NUM = 2
# Specifies which model structure will be used, '1' corresponds to the Create_Model function and '2' corresponds to Donghyun's model.
USE_STRUCTURE = '2'
# Number of epochs to train for
EPOCHS = 10
# Receptive field sizes
SRF_SIZE = (2, 2, 1)
MRF_SIZE = (2, 2, 3)
LRF_SIZE = (2, 2, 5)
# Strides for each receptive field
SRF_STRIDES = (2, 2, 1)
MRF_STRIDES = (2, 2, 2)
LRF_STRIDES = (2, 2, 4)
# This is meant to handle the reduction of the learning rate, current is not accurate, I have been unable to access the loss information from each Epoch
# The expectation is that if the delta loss is < threshold, learning rate *= 0.1. Threshold has not been set yet.
class LearningRateReducerCb(Callback):
def __init__(self):
self.history = {}
def on_epoch_end(self, epoch, logs={}):
for k, v in logs.items():
self.history.setdefault(k, []).append(v)
fin_index = len(self.history['loss']) - 1
if (fin_index >= 1):
if (self.history['loss'][fin_index-1] - self.history['loss'][fin_index] > LOSS_THRESHOLD):
old_lr = self.model.optimizer.lr.read_value()
new_lr = old_lr*0.1
print("\nEpoch: {}. Reducing Learning Rate from {} to {}".format(epoch, old_lr, new_lr))
self.model.optimizer.lr.assign(new_lr)
# The Negative Log Likelihood function
def Loss_FN1(y_true, y_pred, sample_weight=None):
return K.sum(K.binary_crossentropy(y_true, y_pred), axis=-1) # This is another loss function that I tried, was less effective
# Second NLL function, generally seems to work better
def Loss_FN2(y_true, y_pred, sample_weight=None):
n_dims = int(int(y_pred.shape[1])/2)
mu = y_pred[:, 0:n_dims]
logsigma = y_pred[:, n_dims:]
mse = -0.5*K.sum(K.square((y_true-mu)/K.exp(logsigma)), axis=1)
sigma_trace = -K.sum(logsigma, axis=1)
log2pi = -0.5*n_dims*np.log(2*np.pi)
log_likelihood = mse+sigma_trace+log2pi
return K.mean(-log_likelihood)
# Loads given data into two arrays, x and y, while also ensuring that all values are formatted as float32s
def load_data(data_dir, num):
x = np.load(data_dir + "A0" + str(num) + "TD_cropped.npy").astype(np.float32)
y = np.load(data_dir + "A0" + str(num) + "TK_cropped.npy").astype(np.float32)
return x, y
def create_receptive_field(size, strides, model, name):
modelRF = Conv3D(kernel_size = size, strides=strides, filters=32, padding='same', name=name+'1')(model)
modelRF1 = BatchNormalization()(modelRF)
modelRF2 = Activation('elu')(modelRF1)
modelRF3 = Conv3D(kernel_size = size, strides=strides, filters=64, padding='same', name=name+'2')(modelRF2)
modelRF4 = BatchNormalization()(modelRF3)
modelRF5 = Activation('elu')(modelRF4)
modelRF6 = Flatten()(modelRF5)
modelRF7 = Dense(32)(modelRF6)
modelRF8 = BatchNormalization()(modelRF7)
modelRF9 = Activation('relu')(modelRF8)
modelRF10 = Dense(32)(modelRF9)
modelRF11 = BatchNormalization()(modelRF10)
modelRF12 = Activation('relu')(modelRF11)
return Dense(NUM_CLASSES, activation='softmax')(modelRF12)
def Create_Model():
# Model Creation
model1 = Input(shape=(1, XDIM, YDIM, TIMESTEPS))
# 1st Convolution Layer
model1a = Conv3D(kernel_size = (3, 3, 5), strides = (2, 2, 4), filters=16, name="Conv1")(model1)
model1b = BatchNormalization()(model1a)
model1c = Activation('elu')(model1b)
# Small Receptive Field (SRF)
modelSRF = create_receptive_field(SRF_SIZE, SRF_STRIDES, model1c, 'SRF')
# Medium Receptive Field (MRF)
modelMRF = create_receptive_field(MRF_SIZE, MRF_STRIDES, model1c, 'MRF')
# Large Receptive Field (LRF)
modelLRF = create_receptive_field(LRF_SIZE, LRF_STRIDES, model1c, 'LRF')
# Add the layers - This sums each layer
final = Add()([modelSRF, modelMRF, modelLRF])
out = Softmax()(final)
model = Model(inputs=model1, outputs=out)
return model
if (LOSS_FUNCTION == 'NLL1'):
loss_function = Loss_FN1
elif (LOSS_FUNCTION == 'NLL2'):
loss_function = Loss_FN2
elif (LOSS_FUNCTION == 'SCCE'):
loss_function = 'sparse_categorical_crossentropy'
# Optimizer is given as ADAM with an initial learning rate of 0.01
if (OPTIMIZER == 'ADAM'):
opt = Adam(learning_rate = INIT_LR)
elif (OPTIMIZER == 'SGD'):
opt = SGD(learning_rate = INIT_LR)
X, Y = load_data(DATA_DIR, SUBJECT)
if (USE_KFOLD):
seed = 4
kfold = StratifiedKFold(n_splits=KFOLD_NUM, shuffle=True, random_state=seed)
cvscores = []
for train, test in kfold.split(X, Y):
if (USE_STRUCTURE == '1'):
MRF_model = Create_Model()
elif (USE_STRUCTURE == '2'):
MRF_model = MultiBranchCNN(TIMESTEPS, YDIM, XDIM, NUM_CLASSES)
# Compiling the model with the negative log likelihood loss function, ADAM optimizer
MRF_model.compile(loss=loss_function, optimizer=opt, metrics=['accuracy'])
# Training for 30 epochs
MRF_model.fit(X[train], Y[train], epochs=30, verbose=VERBOSE)
# Evaluating the effectiveness of the model
scores = MRF_model.evaluate(X[test], Y[test], verbose=VERBOSE)
print("%s: %.2f%%" % (MRF_model.metrics_names[1], scores[1]*100))
cvscores.append(scores[1]*100)
print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))
else:
if (USE_STRUCTURE == '1'):
MRF_model = Create_Model()
elif (USE_STRUCTURE == '2'):
MRF_model = MultiBranchCNN(TIMESTEPS, YDIM, XDIM, NUM_CLASSES)
MRF_model.compile(loss=loss_function, optimizer=opt, metrics=['accuracy'])
MRF_model.fit(X, Y, epochs=EPOCHS, verbose=VERBOSE)
_, acc = MRF_model.evaluate(X, Y, verbose=VERBOSE)
print("Accuracy: %.2f" % (acc*100))
数据来自 BCICIV 2A 数据集,该数据集由 25 个通道组成。忽略 3 个 EOG 通道,留下 22 个通道。这 22 个通道被格式化为 7x6 - 0 填充阵列,以提供更具空间相关性的表示。我们使用滑动窗口方法来补偿小数据集,然后在每次试验中运行通道平均以进一步处理数据。训练结果如下。
Epoch 1/10
666/666 [==============================] - 13s 17ms/step - loss: 4.0290 - accuracy: 0.3236
Epoch 2/10
666/666 [==============================] - 12s 18ms/step - loss: 3.9622 - accuracy: 0.3434
Epoch 3/10
666/666 [==============================] - 14s 21ms/step - loss: 3.9747 - accuracy: 0.3481
Epoch 4/10
666/666 [==============================] - 14s 21ms/step - loss: 3.9373 - accuracy: 0.3720
Epoch 5/10
666/666 [==============================] - 14s 21ms/step - loss: 3.9412 - accuracy: 0.3710
Epoch 6/10
666/666 [==============================] - 14s 21ms/step - loss: 3.9191 - accuracy: 0.3829
Epoch 7/10
666/666 [==============================] - 14s 21ms/step - loss: 3.9234 - accuracy: 0.3936
Epoch 8/10
666/666 [==============================] - 14s 21ms/step - loss: 3.8973 - accuracy: 0.3983
Epoch 9/10
666/666 [==============================] - 14s 21ms/step - loss: 3.8780 - accuracy: 0.4022
Epoch 10/10
666/666 [==============================] - 14s 21ms/step - loss: 3.8647 - accuracy: 0.3900
666/666 [==============================] - 5s 8ms/step - loss: 4.1935 - accuracy: 0.2500
Accuracy: 25.00
忽略较差的准确度,训练后准确度降至 25.00 的事实令人担忧。我有一半觉得我错过了一些简单的东西,但一直无法解决问题。
欢迎任何建议或问题,非常感谢!
解决方案
我可以想到您观察到的差异的两个潜在原因,但我现在没有时间测试它们:
SGD
您正在使用和的两个优化器都使用Adam
行的子集进行训练,而不是整个数据集。这会导致您观察到的不一致。- BatchNorm 在训练时间和评估时间的工作方式不同。
两种情况都指向同一个方向:在训练期间对准确性和损失的评估是对每批结果的汇总估计,在这种情况下过于乐观。
对于测试 1,您可以尝试batch_size
在fit
to中设置 a len(X)
。请注意,您可能会用完内存并且它肯定会很慢(它可能非常慢)。
对于测试 2,您可以尝试删除 BatchNorm 步骤。
如果你在这些思路上工作,请随时通知我!
推荐阅读
- php - 如何防止图片标题出现在主页(帖子)内容中?
- powershell - 如何调查powershell中的未知方法?
- spring - 按 id=1 查找弹簧数据 jpa
- excel - 从 2 个 Excel 工作表中交替合并行
- sql - 在预期条件的上下文中指定的非布尔类型的表达式 (SQLSERVER 2017)
- c# - 使用 ABBYY FlexiCapture 构建导出连接器?
- python - Python - 如何循环一个结果集
- angular - 使用 SCSS 将 Nativescript 添加到 Angular 8 项目时出错
- azure - 从 Azure 上的 Linux 容器运行 Google Cloud Vision
- pandas - 如何从系列中删除索引 col?