首页 > 解决方案 > XOR 在神经网络中收敛到 0.5

问题描述

目前我正在尝试实现一个三层网络(2个隐藏层和1个输出层)并让这个网络对XOR进行分类。我遇到了一个问题,我的输出层总是会收敛到 0.5 左右 - 我无法弄清楚为什么会发生这种情况,并希望得到一些关于原因的指导。

import numpy as np
from matplotlib import pyplot as plt

# XOR
x=np.array(([0,0],[0,1],[1,0],[1,1]), dtype=float)
y=np.array(([0],[1],[1],[0]), dtype=float)

class NN:
    def __init__(self, x, y):
        self.weights1 = np.random.uniform(-0.5, 0.5, (2,3))
        self.weights2 = np.random.uniform(-0.5, 0.5, (3,4))
        self.weights3 = np.random.uniform(-0.5, 0.5, (4,1))
        self.output = np.zeros(1)

    def forward_prop(self, training_data):
        self.layer1 = logistic_function(np.dot(training_data, self.weights1))
        self.layer2 = logistic_function(np.dot(self.layer1, self.weights2))
        self.output = logistic_function(np.dot(self.layer2, self.weights3))
        return self.output

    def back_prop(self, training_data, test_data):
        self.delta = loss_function(test_data, self.output) * logistic_deriv(self.output)
        self.e_weights3 = self.delta.dot(self.weights3.T)
        self.d_weights3 = self.e_weights3 * logistic_deriv(self.layer2)

        self.e_weights2 = self.d_weights3.dot(self.weights2.T)
        self.d_weights2 = self.e_weights2 * logistic_deriv(self.layer1)

        self.e_weights1 = self.d_weights2.dot(self.weights1.T)
        self.d_weights1 = self.e_weights1 * logistic_deriv(training_data)

        self.weights1 -= 0.01 * training_data.T.dot(self.d_weights1)
        self.weights2 -= 0.01 * self.layer1.T.dot(self.d_weights2)
        self.weights3 -= 0.01 * self.layer2.T.dot(self.d_weights3)

# Activation function
def logistic_function(z):
    return 1.0 / (1.0 + np.exp(-z))

# Derivative function
def logistic_deriv(z):
    return logistic_function(z) * (1.0 - logistic_function(z))

# Squared loss function
def loss_function(target_y, output_y):
    loss = target_y - output_y
    return 0.5 * np.power(loss,2)

network = NN(x, y)
for i in range(1000):
    for j in range(0,len(x)):
        network.forward_prop(x[j])
        network.back_prop(x[j],y[j])


print(network.forward_prop(x[0]))
print(network.forward_prop(x[1]))
print(network.forward_prop(x[2]))
print(network.forward_prop(x[3]))

标签: pythonpython-3.xmachine-learningneural-networkartificial-intelligence

解决方案


推荐阅读