首页 > 解决方案 > Keras python中的k-Fold交叉验证

问题描述

我使用 keras 和 tensorflow 对卷积神经网络进行了微调:

from itertools import cycle

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from keras.utils import np_utils
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import argparse
import cv2
import os
import time
import sys

# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-d", "--dataset", required=True, help="path to input dataset")
ap.add_argument("-p", "--plot", type=str, help="path to output loss/accuracy plot")
ap.add_argument("-m", "--model", type=str, help="path to output model")
args = vars(ap.parse_args())

INIT_LR = 1e-3
EPOCHS = 25
BS = 8

print("[INFO] loading images...")
imagePaths = list(paths.list_images(args["dataset"]))
data = []
labels = []

counter = 0
for imagePath in imagePaths:
    label = imagePath.split(os.path.sep)[-2].split("_")
    image = cv2.imread(imagePath)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, (224, 224))
    counter = counter +1
    data.append(image)
    labels.append(label)
   
data = np.array(data) / 255.0
labels = np.array(labels)

lb = MultiLabelBinarizer()
labels = lb.fit_transform(labels)

# partition the data into training and testing splits using 80% of
# the data for training and the remaining 20% for testing
(trainX, testX, trainY, testY) = train_test_split(data, labels,
    test_size=0.20, random_state=42)

trainAug = ImageDataGenerator(rotation_range=30, width_shift_range=0.1,
    height_shift_range=0.1, shear_range=0.2, zoom_range=0.2,
    horizontal_flip=True, fill_mode="nearest")


# load the VGG16 network, ensuring the head FC layer sets are left
baseModel = VGG16(weights="imagenet", include_top=False,
    input_tensor=Input(shape=(224, 224, 3)))

headModel = baseModel.output
headModel = AveragePooling2D(pool_size=(4, 4))(headModel)
headModel = Flatten(name="flatten")(headModel)
headModel = Dense(64, activation="relu")(headModel)
headModel = Dropout(0.5)(headModel)
headModel = Dense(3, activation="softmax")(headModel) # Attention: or "sigmoid" activation

model = Model(inputs=baseModel.input, outputs=headModel)

for layer in baseModel.layers:
    layer.trainable = False

print("[INFO] compiling model...")
opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(loss="categorical_crossentropy", optimizer=opt, 
    metrics=["accuracy"])

# train the head of the network
print("[INFO] training head...")
H = model.fit_generator(
    trainAug.flow(trainX, trainY, batch_size=BS),
    steps_per_epoch=len(trainX) // BS,
    validation_data=(testX, testY),
    validation_steps=len(testX) // BS,
    epochs=EPOCHS)

# make predictions on the testing set
print("[INFO] evaluating network...")
predIdxs = model.predict(testX, batch_size=BS)

predIdxs = np.argmax(predIdxs, axis=1)

print(classification_report(testY.argmax(axis=1), predIdxs))

acc_NN = accuracy_score(testY.argmax(axis=1), predIdxs)
print('Overall accuracy of Neural Network model:', acc_NN)

# serialize the model to disk
print("[INFO] saving model...")
model.save(args["model"], save_format="h5")

我想执行 k-Fold 交叉验证,到目前为止,我已经看到了添加(示例)的解决方案:

# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]
# define 10-fold cross validation test harness
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
cvscores = []
for train, test in kfold.split(X, Y):
    # do

在这一行中,for train, test in kfold.split(X, Y):我不明白在我的情况下使用什么。谁能帮我这个?XY

标签: pythonkerasdeep-learningtensorflow2.0cross-validation

解决方案


从文档中。

X 应该是数据数组 Y 应该是标签

基于这条线

(trainX, testX, trainY, testY) = train_test_split(data, labels,
    test_size=0.20, random_state=42)

你的代码应该看起来像

for train, test in kfold.split(data, labels):
 trainX, testX = data[train], data[test]
 trainY, testY = labels[train], labels[test]

推荐阅读