python - 指定的至少一个标签必须在 y_true 中
问题描述
我想根据y_test和pred_test得到一个混淆矩阵,但是提出一个问题“至少一个指定的标签必须在y_true”,我不知道为什么
metrics.confusion_matrix(np.argmax(y_test,axis=1),pred_test)
y_test = [[0. 1. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 1.]
[0. 0. 0. 0. 1. 0.]
...
[0. 0. 0. 1. 0. 0.]
[0. 0. 1. 0. 0. 0.]
[0. 0. 1. 0. 0. 0.]]
pred_test = [1 4 5 ... 3 2 2]
np.argmax(y_test,axis=1) = [1 5 4 ... 3 2 2]
File "D:\Anaconda\lib\site-packages\sklearn\metrics\classification.py", line 259, in confusion_matrix
raise ValueError("At least one label specified must be in y_true")
ValueError: At least one label specified must be in y_true
我创建了一个卷积神经网络。模型并使用交叉验证进行估计,最终生成混淆矩阵。现在在生成混淆矩阵方面存在问题。
数据集在此处输入链接描述。完整代码如下:</p>
import matplotlib
#matplotlib.use('Agg')
import timing
from keras.layers import Input,Dense,Conv2D,MaxPooling2D,UpSampling2D,Flatten
from keras.models import Model
from keras import backend as K
from keras.utils.np_utils import to_categorical
import numpy as np
import pandas as pd
import seaborn as sns
from keras.models import Sequential# 导入Sequential
from keras.utils import np_utils, generic_utils
from keras.callbacks import LearningRateScheduler
import os
from keras.layers import Dropout
from keras.backend.tensorflow_backend import set_session
import tensorflow as tf
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.cross_validation import KFold, StratifiedKFold
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn import metrics
import time
from scipy import stats
from keras import optimizers
import matplotlib.pyplot as plt
from keras import regularizers
import keras
from keras.callbacks import TensorBoard
config = tf.ConfigProto(allow_soft_placement=True)
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
time1 = time.time()
class LossHistory(keras.callbacks.Callback):
def on_train_begin(self, logs={}):
self.losses = {'batch':[], 'epoch':[]}
self.accuracy = {'batch':[], 'epoch':[]}
self.val_loss = {'batch':[], 'epoch':[]}
self.val_acc = {'batch':[], 'epoch':[]}
def on_batch_end(self, batch, logs={}):
self.losses['batch'].append(logs.get('loss'))
self.accuracy['batch'].append(logs.get('acc'))
self.val_loss['batch'].append(logs.get('val_loss'))
self.val_acc['batch'].append(logs.get('val_acc'))
def on_epoch_end(self, batch, logs={}):
self.losses['epoch'].append(logs.get('loss'))
self.accuracy['epoch'].append(logs.get('acc'))
self.val_loss['epoch'].append(logs.get('val_loss'))
self.val_acc['epoch'].append(logs.get('val_acc'))
def loss_plot(self, loss_type):
iters = range(len(self.losses[loss_type]))
plt.figure()
# acc
plt.plot(iters, self.accuracy[loss_type], 'r', label='train acc')
# loss
plt.plot(iters, self.losses[loss_type], 'g', label='train loss')
if loss_type == 'epoch':
# val_acc
plt.plot(iters, self.val_acc[loss_type], 'b', label='val acc')
# val_loss
plt.plot(iters, self.val_loss[loss_type], 'k', label='val loss')
plt.grid(True)
plt.xlabel(loss_type)
plt.ylabel('acc-loss')
plt.legend(loc="center")
plt.show()
#plt.savefig('common.png')
#dataset
RANDOM_SEED = 42
def read_data(file_path):
column_names = ['user-id', 'activity', 'timestamp', 'x-axis', 'y-axis', 'z-axis']
m = pd.read_csv(file_path,names=column_names, header=None,sep=',')
return m
def feature_normalize(dataset):
mu = np.mean(dataset,axis=0)
sigma = np.std(dataset,axis=0)
return (dataset-mu)/sigma
dataset1 = read_data('ab.txt')
dataset = pd.DataFrame(dataset1)
dataset['x-axis'] = feature_normalize(dataset['x-axis'])
dataset['y-axis'] = feature_normalize(dataset['y-axis'])
dataset['z-axis'] = feature_normalize(dataset['z-axis'])
N_TIME_STEPS = 200
N_FEATURES = 3
step = 200
segments = []
labels = []
for i in range(0, len(dataset) - N_TIME_STEPS, step):
xs = dataset['x-axis'].values[i: i + N_TIME_STEPS]
ys = dataset['y-axis'].values[i: i + N_TIME_STEPS]
zs = dataset['z-axis'].values[i: i + N_TIME_STEPS]
label = stats.mode(dataset['activity'][i: i + N_TIME_STEPS])[0][0]
segments.append([xs, ys, zs])
labels.append(label)
print("reduced size of data", np.array(segments).shape)
reshaped_segments = np.asarray(segments,dtype=np.float32).reshape(-1,1, N_TIME_STEPS, 3)
print("Reshape the segments", np.array(reshaped_segments).shape)
#x_train1, x_val_test, y_train1, y_val_test = train_test_split(reshaped_segments, labels, test_size=0.25, random_state=RANDOM_SEED)
batch_size = 128
num_classes =6
def create_model():
input_shape = Input(shape=(1,200,3))
x = Conv2D(5, kernel_size=(1, 1), padding='valid')(input_shape)
x1 = keras.layers.concatenate([input_shape, x], axis=-1)
x = Conv2D(50, kernel_size=(1, 7),padding='valid',
kernel_initializer='glorot_uniform',
kernel_regularizer = keras.regularizers.l2(0.0015))(x1)
x = keras.layers.core.Activation('relu')(x)
x = MaxPooling2D(pool_size=(1, 2))(x)
x = Conv2D(50, kernel_size=(1, 7),padding='valid',kernel_initializer='glorot_uniform',
kernel_regularizer=keras.regularizers.l2(0.0015))(x)
x = keras.layers.core.Activation('relu')(x)
x = MaxPooling2D(pool_size=(1, 2))(x)
x = Flatten()(x)
x = Dropout(0.9)(x)
output = Dense(num_classes, activation='softmax',kernel_initializer='glorot_uniform',)(x)
model = Model(inputs=input_shape,outputs=output)
model.summary()
sgd = optimizers.SGD(lr=0.005,decay=1e-6,momentum=0.9,nesterov=True)
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=sgd,
metrics=['accuracy'])
return model
history = LossHistory()
epochs = 4000
#setting learning rate
def scheduler(epoch):
if epoch > 0.75 * epochs:
lr = 0.0005
elif epoch > 0.25 * epochs:
lr = 0.001
else:
lr = 0.005
return lr
scheduler = LearningRateScheduler(scheduler)
estimator = KerasClassifier(build_fn=create_model)
#divide dataset
scores = []
confusions = []
sign = ['DOWNSTAIRS','JOGGING','SITTING','STANDING','UPSTAIRS','WALKING']
encoder = LabelEncoder()
encoder_y = encoder.fit_transform(labels)
train_labels = to_categorical(encoder_y,num_classes=None)
#kfold = StratifiedKFold(reshaped_segments.shape[0],n_folds=10,shuffle=True,random_state=42)
kfold = StratifiedKFold(labels,n_folds=3,shuffle=True,random_state=42)
for train_index,test_index in kfold:
print(test_index)
x_train, x_test = reshaped_segments[train_index], reshaped_segments[test_index]
y_train, y_test = train_labels[train_index], train_labels[test_index]
estimator.fit(x_train,y_train,callbacks=[scheduler,history],epochs=10,batch_size=128,verbose=0)
scores.append(estimator.score(x_test,y_test))
print(y_test)
print(type(y_test))
pred_test = estimator.predict(x_test)
print(pred_test)
print(np.argmax(y_test,axis=1))
confusions.append(metrics.confusion_matrix(np.argmax(y_test,axis=1),pred_test,sign))
matrix = [[0,0,0,0,0,0],[0,0,0,0,0,0],[0,0,0,0,0,0],[0,0,0,0,0,0],[0,0,0,0,0,0],[0,0,0,0,0,0]]
for i in np.arange(n_folds-1):
for j in len(confusions[0]):
for k in len(confusions[0][0]):
matrix[j][k] = matrix[j][k] + confusions[i][j][k] + confusions[i+1][j][k]
model.save('model.h5')
model.save_weights('my_model_weights.h5')
print('score:',scores)
scores = np.mean(scores)
print('mean:',scores)
plt.figure(figsize=(16,14))
sns.heatmap(matrix, xticklabels=sign, yticklabels=sign, annot=True, fmt="d");
plt.title("CONFUSION MATRIX : ")
plt.ylabel('True Label')
plt.xlabel('Predicted label')
plt.savefig('cmatrix.png')
plt.show();
解决方案
错误不在您的主代码中,而是在符号的定义中。当您将符号定义为
sign = ['DOWNSTAIRS','JOGGING','SITTING','STANDING','UPSTAIRS','WALKING']
系统无法读取您的标签,因为它正在寻找标签 0、1、2、3、4、5,正如错误试图说的那样,即它在 y_pred 中找不到任何带有符号的标签。将符号更改为
sign = [1,2,3,4,5]
应该修复错误。至于你现在做什么,它相当简单,只需将你的结果映射为这个数组,然后在实际预测(部署)期间只需换出标签的数值。
推荐阅读
- flutter - 颤振刷新带有所选项目的网格视图
- reactjs - 当我尝试安装 react-chat-engine ..我看到这些错误
- javascript - 如何从 bookstrap 下拉菜单中获取数据值,以便我可以传递到数据库?
- xamarin - 根据 CollectionView 中控件数量的性能
- wordpress - 我该如何检查 Redux sample-config.php 以及 if/else 检查?
- c# - Asp.Net Core MVC - 发布于 SmarterAsp
- python - 我可以在单核 AWS Lightsail 中使用 tmux 运行多少个 Python 脚本?
- javascript - 我们如何在 fullcalender (@fullcalendar/react) 中创建与 dayGridMonth 视图相同的自定义视图
- html - 如何从 Bootstrap 5 折叠侧菜单中的链接拆分菜单 Chevron?
- heroku - Heroku 部署失败