首页 > 解决方案 > 神经网络只在给定几个模式顺序时学习最后一个模式

问题描述

这篇文章是关于同样的问题,但没有给出正确的答案。由于这个问题似乎很普遍,我将把我的代码放在幕后。

这个源之后,我编写了一个网络,当我给它一个带有目标向量的训练示例时效果很好。当给定相应的输入向量时,我使用梯度下降最小化成本函数以使网络提供目标向量。但这仅适用于一个示例!

神经网络的主要目标是对不同的输入做出不同的反应,我们应该能够训练它这样做。我尝试通过为每个示例计算的 delta-weights 的平均值来更改网络权重,但失败了:训练过程卡在输出向量中,输出向量包含训练集中所有目标向量的平均值。没有留下任何想法,没有找到解释的来源。

如何使用一组示例而不是仅使用一个输入向量来训练神经网络?

更新 对于那些想知道我将在下面附上我的代码的人。尝试运行它,您会看到0 1它提供的不是输出,而是0.5 0.5减去平均 delta-weights 的结果。

import numpy as np
from sympy import symbols, lambdify
from sympy.functions.elementary.exponential import exp
from time import sleep

x = symbols('x')
sigmoid = exp(x) / (1 + exp(x))
sigmoid_der = sigmoid.diff(x)
sigmoid = lambdify(x, sigmoid)
sigmoid_der = lambdify(x, sigmoid_der)


class Neuron:
    def __init__(self, amount_of_inputs: int, hidden = True):
        self.inputs = np.random.rand(amount_of_inputs) if hidden else np.array([1])
        self.bias = 0.0
        self._activation = 0.0
        self._wsum = 0.0

    
    @property
    def activation(self) -> float:
        return self._activation

    @property
    def wsum(self) -> float:
        return self._wsum


    def calculate(self, indata):
        wval = self.inputs * indata + self.bias
        self._wsum = wval.sum()
        self._activation = sigmoid(self._wsum)



class NeuralNetwork:
    def __init__(self, shape: tuple):
        self.shape = shape
        self.layers = len(self.shape)

        self.network = [None for _ in range(self.layers)]
        self.network[0] = tuple([Neuron(1, hidden = False) for _ in range(shape[0])])
        for L in range(1, self.layers):
            self.network[L] = tuple([Neuron(shape[L - 1]) for _ in range(shape[L])])
        self.network = tuple(self.network)
        
        y = [symbols(f'y[{i}]') for i in range(shape[self.layers - 1])]
        a = [symbols(f'a[{i}]') for i in range(shape[self.layers - 1])]
        self.cost_function = sum([(y[i] - a[i]) ** 2 / 2 for i in range(shape[self.layers - 1])])
        self.gradient = tuple([self.cost_function.diff(a[i]) for i in range(shape[self.layers - 1])])
        self.cost_function = lambdify((y, a), self.cost_function)
        self.gradient = lambdify((y, a), self.gradient)


    def getLayer(self, L):
        return np.array([self.network[L][i].activation for i in range(self.shape[L])])

    
    def getWeightedSum(self, L):
        return np.array([self.network[L][i].wsum for i in range(self.shape[L])])

    
    def getInputsMatrix(self, L):
        return np.array([self.network[L][i].inputs for i in range(self.shape[L])])

    
    def calculate(self, values):
        for i in range(self.shape[0]):
            self.network[0][i].calculate(values[i])
        
        for L in range(1, self.layers):
            indata = self.getLayer(L - 1)
            for j in range(self.shape[L]):
                self.network[L][j].calculate(indata)

    
    def get_result(self) -> tuple:
        return tuple([self.network[self.layers - 1][i].activation for i in range(self.shape[self.layers - 1])])
    

    def teach(self, targets, examples):
        if len(targets) != len(examples):
            raise TypeError("The amounts of target and input vectors do not coincide")
        
        activations = [None for _ in range(len(examples))]
        delta = activations.copy()

        cost_is_low_enough = False
        while not cost_is_low_enough:
            for x in range(len(examples)):
                self.calculate(examples[x])

                activations[x] = [self.getLayer(l) for l in range(self.layers)]
                delta[x] = [None for _ in range(self.layers - 1)]

                network_output = self.getLayer(self.layers - 1)
                output_weighted = self.getWeightedSum(self.layers - 1)
                gradient_vector = np.array(self.gradient(targets[x], network_output))
                delta[x][-1] = gradient_vector * sigmoid_der(output_weighted)

                for l in range(self.layers - 2, 0, -1):
                    weight_matrix = self.getInputsMatrix(l + 1).transpose()
                    output_weighted = self.getWeightedSum(l)
                    activation = self.getLayer(l)
                    for j in range(self.shape[l]):
                        delta[x][l - 1] = (weight_matrix @ delta[x][l]) * sigmoid_der(output_weighted) * activation

               
            dw = [None for _ in range(self.layers - 1)]
            for x in range(len(examples)):
                self.calculate(examples[x])
                for l in range(self.layers - 1):
                    dw[l] = np.empty(self.shape[l + 1])
                    for j in range(self.shape[l + 1]):
                        dw[l][j] = np.mean([delta[x][l][j] for x in range(len(examples))])

            for l in range(1, self.layers):
                for j in range(self.shape[l]):
                    for k in range(self.shape[l - 1]):
                        self.network[l][j].inputs[k] -= 0.1 * dw[l - 1][j]
            cost = 0
            for x in range(len(examples)):
                self.calculate(examples[x])
                network_output = np.array(self.get_result())
                incost = self.cost_function(targets[x], network_output)
                print(network_output, incost)
                cost += incost
                # sleep(0.05)
            cost /= len(examples)
            print()
            if cost < 0.001: cost_is_low_enough = True



network = NeuralNetwork((2, 4, 1))

examples = np.array([
    [1, 2],
    [3, 4],
])

targets = np.array([
    [0],
    [1]
])


network.teach(targets, examples)

values_1 = np.array([5, 10])
network.calculate(values_1)
result = network.get_result()
print(result)

'''
values_2 = np.array([3, 4])
network.calculate(values_2)
result = network.get_result()
print(result)
'''

标签: pythonpython-3.xneural-networkbackpropagation

解决方案


推荐阅读