python-3.x - 定制的神经网络为 iris 数据集提供 0 精度
问题描述
我使用以下代码构建了一个用于虹膜分类的神经网络:
from sklearn import datasets
from scipy.optimize import minimize
import numpy as np
def train_test_split(X, y):
idx = np.arange(len(X))
train_size = int(len(X) * 0.2)
np.random.shuffle(idx)
X = X[idx]
y = y[idx]
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]
return X_train, X_test, y_train, y_test
iris = datasets.load_iris()
X = iris.data
y = iris.target
nb_classes = 3
targets = np.array([y]).reshape(-1)
Y = np.eye(nb_classes)[targets]
# randomize = np.arange(len(X))
# np.random.shuffle(randomize)
# X = X[randomize]
# Y = Y[randomize]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y)
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train)
def optimize(X_train, Y_train, X_val=None, Y_val=None, epochs=10, nodes=[], lr=0.15):
hidden_layers = len(nodes) - 1
weights = init_weights(nodes)
for epoch in range(1, epochs+1):
weights = train(X_train, Y_train, lr, weights)
if(epoch % 20 == 0):
print("Epoch {}".format(epoch))
print("Training accuracy:{}".format(acc(X_train, Y_train, weights)))
if X_val.any():
print("Validation Accuracy:{}".format(acc(X_val, Y_val, weights)))
return weights
def init_weights(nodes):
"""Initialize weights with random values in [-1, 1] (including bias)"""
layers, weights = len(nodes), []
for i in range(1, layers):
w = [[np.random.uniform(-1, 1) for k in range(nodes[i-1] + 1)]
for j in range(nodes[i])]
weights.append(np.matrix(w))
return weights
def forward(x, weights, layers):
activations, layer_input = [x], x
for j in range(layers):
activation = sigmoid(np.dot(layer_input, weights[j].T))
activations.append(activation)
layer_input = np.append(1, activation) # Augment with bias
return activations
def back(y, activations, weights, layers):
outputFinal = activations[-1]
error = np.matrix(y - outputFinal) # Error at output
for j in range(layers, 0, -1):
currActivation = activations[j]
if(j > 1):
# Augment previous activation
prevActivation = np.append(1, activations[j-1])
else:
# First hidden layer, prevActivation is input (without bias)
prevActivation = activations[0]
delta = np.multiply(error, sigmoid_gradient(currActivation))
weights[j-1] += lr * np.multiply(delta.T, prevActivation)
w = np.delete(weights[j-1], [0], axis=1) # Remove bias from weights
error = np.dot(delta, w) # Calculate error for current layer
return weights
def train(X, Y, lr, weights):
layers = len(weights)
for i in range(len(X)):
x, y = X[i], Y[i]
x = np.matrix(np.append(1, x)) # Augment feature vector
activations = forward(x, weights, layers)
weights = back(y, activations, weights, layers)
return weights
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def sigmoid_gradient(x):
return np.multiply(x, 1-x)
def predict(item, weights):
layers = len(weights)
item = np.append(1, item) # Augment feature vector
##_Forward Propagation_##
activations = forward(item, weights, layers)
outputFinal = activations[-1].A1
index = FindMaxActivation(outputFinal)
# Initialize prediction vector to zeros
y = [0 for i in range(len(outputFinal))]
y[index] = 1 # Set guessed class to 1
return y # Return prediction vector
def FindMaxActivation(output):
"""Find max activation in output"""
m, index = output[0], 0
for i in range(1, len(output)):
if(output[i] > m):
m, index = output[i], i
return index
def acc(X, Y, weights):
"""Run set through network, find overall accuracy"""
correct = 0
for i in range(len(X)):
# x, y = X[i], list(Y[i])
x, y = X[i], Y[i].tolist()
guess = predict(x, weights)
if(y == guess):
# Guessed correctly
correct += 1
return correct / len(X)
f = len(X[0]) # Number of features
o = len(Y[0]) # Number of outputs / classes
layers = [f, 5, 10, o] # Number of nodes in layers
lr, epochs = 0.15, 100
weights = optimize(X_train, Y_train, X_val, Y_val, epochs=epochs, nodes=layers, lr=lr);
print("Testing Accuracy: {}".format(acc(X_test, Y_test, weights)))
但它给出的结果精度为0
s:
Epoch 20
Training accuracy:0.0
Validation Accuracy:0.0
Epoch 40
Training accuracy:0.0
Validation Accuracy:0.0
Epoch 60
Training accuracy:0.0
Validation Accuracy:0.0
Epoch 80
Training accuracy:0.0
Validation Accuracy:0.0
Epoch 100
Training accuracy:0.0
Validation Accuracy:0.0
Testing Accuracy: 0.0
但是,如果我使用从这里下载的 csv 格式的数据集
iris = pd.read_csv("./data/Iris.csv")
iris = iris.sample(frac=1).reset_index(drop=True) # Shuffle
X = iris[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']]
X = np.array(X)
from sklearn.preprocessing import OneHotEncoder
one_hot_encoder = OneHotEncoder(sparse=False)
Y = iris.Species
Y = one_hot_encoder.fit_transform(np.array(Y).reshape(-1, 1))
Y[:5]
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.15)
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.1)
输出:
Epoch 20
Training accuracy:0.9385964912280702
Validation Accuracy:0.9230769230769231
Epoch 40
Training accuracy:0.9912280701754386
Validation Accuracy:0.9230769230769231
Epoch 60
Training accuracy:0.9736842105263158
Validation Accuracy:0.9230769230769231
Epoch 80
Training accuracy:0.9736842105263158
Validation Accuracy:0.9230769230769231
Epoch 100
Training accuracy:0.9824561403508771
Validation Accuracy:0.9230769230769231
Testing Accuracy: 0.9565217391304348
为什么会有这种差异?
解决方案
推荐阅读
- python - 想要帮助在词干后再次连接我的标记和短语以形成一个字符串
- sugarcrm - 跨站请求伪造 (XSRF) 攻击检测到糖 8.2
- java - 使用 Jersey REST 客户端调用 Yelp API
- performance - 我正在寻找有关加快 Boyer-Moore-Horspool 代码的建议
- c# - 当我打开解决方案时,Visual Studio 2017 冻结然后崩溃
- ssh-tunnel - 复杂的 SSH 隧道
- python - 如何将使用 GetDist 生成的三角形/等高线图插入 Matplotlib 子图中?
- javascript - 在多维 JSON 数组中选择多个数组
- node.js - 无尽循环:找不到“交叉生成”
- progress-4gl - &GLOBAL-DEFINE 和 &SCOPED-DEFINE in Progress 4gl 有什么区别?