python - Keras python中的k-Fold交叉验证
问题描述
我使用 keras 和 tensorflow 对卷积神经网络进行了微调:
from itertools import cycle
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from keras.utils import np_utils
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import argparse
import cv2
import os
import time
import sys
# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-d", "--dataset", required=True, help="path to input dataset")
ap.add_argument("-p", "--plot", type=str, help="path to output loss/accuracy plot")
ap.add_argument("-m", "--model", type=str, help="path to output model")
args = vars(ap.parse_args())
INIT_LR = 1e-3
EPOCHS = 25
BS = 8
print("[INFO] loading images...")
imagePaths = list(paths.list_images(args["dataset"]))
data = []
labels = []
counter = 0
for imagePath in imagePaths:
label = imagePath.split(os.path.sep)[-2].split("_")
image = cv2.imread(imagePath)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = cv2.resize(image, (224, 224))
counter = counter +1
data.append(image)
labels.append(label)
data = np.array(data) / 255.0
labels = np.array(labels)
lb = MultiLabelBinarizer()
labels = lb.fit_transform(labels)
# partition the data into training and testing splits using 80% of
# the data for training and the remaining 20% for testing
(trainX, testX, trainY, testY) = train_test_split(data, labels,
test_size=0.20, random_state=42)
trainAug = ImageDataGenerator(rotation_range=30, width_shift_range=0.1,
height_shift_range=0.1, shear_range=0.2, zoom_range=0.2,
horizontal_flip=True, fill_mode="nearest")
# load the VGG16 network, ensuring the head FC layer sets are left
baseModel = VGG16(weights="imagenet", include_top=False,
input_tensor=Input(shape=(224, 224, 3)))
headModel = baseModel.output
headModel = AveragePooling2D(pool_size=(4, 4))(headModel)
headModel = Flatten(name="flatten")(headModel)
headModel = Dense(64, activation="relu")(headModel)
headModel = Dropout(0.5)(headModel)
headModel = Dense(3, activation="softmax")(headModel) # Attention: or "sigmoid" activation
model = Model(inputs=baseModel.input, outputs=headModel)
for layer in baseModel.layers:
layer.trainable = False
print("[INFO] compiling model...")
opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(loss="categorical_crossentropy", optimizer=opt,
metrics=["accuracy"])
# train the head of the network
print("[INFO] training head...")
H = model.fit_generator(
trainAug.flow(trainX, trainY, batch_size=BS),
steps_per_epoch=len(trainX) // BS,
validation_data=(testX, testY),
validation_steps=len(testX) // BS,
epochs=EPOCHS)
# make predictions on the testing set
print("[INFO] evaluating network...")
predIdxs = model.predict(testX, batch_size=BS)
predIdxs = np.argmax(predIdxs, axis=1)
print(classification_report(testY.argmax(axis=1), predIdxs))
acc_NN = accuracy_score(testY.argmax(axis=1), predIdxs)
print('Overall accuracy of Neural Network model:', acc_NN)
# serialize the model to disk
print("[INFO] saving model...")
model.save(args["model"], save_format="h5")
我想执行 k-Fold 交叉验证,到目前为止,我已经看到了添加(示例)的解决方案:
# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]
# define 10-fold cross validation test harness
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
cvscores = []
for train, test in kfold.split(X, Y):
# do
在这一行中,for train, test in kfold.split(X, Y):
我不明白在我的情况下使用什么。谁能帮我这个?X
Y
解决方案
从文档中。
X 应该是数据数组 Y 应该是标签
基于这条线
(trainX, testX, trainY, testY) = train_test_split(data, labels,
test_size=0.20, random_state=42)
你的代码应该看起来像
for train, test in kfold.split(data, labels):
trainX, testX = data[train], data[test]
trainY, testY = labels[train], labels[test]
推荐阅读
- python-3.x - Keras Conv1d Input Shape/ Parameters for Stock Data
- eclipse - Tomcat无法保存到临时文件夹?
- r - 如何处理消息错误:R 中的意外符号?
- c++ - 如何读取 EXE 字节,存储到 char 数组中,然后将字节写入新的 EXE 文件?
- javascript - 在javascript中第二次随机选择值失败
- javascript - axios 无法提交 Javascript 数组
- c++ - 使用 C++ 从文件中将值输入到数组中
- node.js - 502, Bad Gateway, 我已经使用 express-http-proxy 设置了 wordpress rest api
- javascript - 填写表格时如何使进度条工作
- c# - URL 重写中间件 ASP.Net Core 2.0