python - 神经网络只在给定几个模式顺序时学习最后一个模式
问题描述
这篇文章是关于同样的问题,但没有给出正确的答案。由于这个问题似乎很普遍,我将把我的代码放在幕后。
在这个源之后,我编写了一个网络,当我给它一个带有目标向量的训练示例时效果很好。当给定相应的输入向量时,我使用梯度下降最小化成本函数以使网络提供目标向量。但这仅适用于一个示例!
神经网络的主要目标是对不同的输入做出不同的反应,我们应该能够训练它这样做。我尝试通过为每个示例计算的 delta-weights 的平均值来更改网络权重,但失败了:训练过程卡在输出向量中,输出向量包含训练集中所有目标向量的平均值。没有留下任何想法,没有找到解释的来源。
如何使用一组示例而不是仅使用一个输入向量来训练神经网络?
更新
对于那些想知道我将在下面附上我的代码的人。尝试运行它,您会看到0 1
它提供的不是输出,而是0.5 0.5
减去平均 delta-weights 的结果。
import numpy as np
from sympy import symbols, lambdify
from sympy.functions.elementary.exponential import exp
from time import sleep
x = symbols('x')
sigmoid = exp(x) / (1 + exp(x))
sigmoid_der = sigmoid.diff(x)
sigmoid = lambdify(x, sigmoid)
sigmoid_der = lambdify(x, sigmoid_der)
class Neuron:
def __init__(self, amount_of_inputs: int, hidden = True):
self.inputs = np.random.rand(amount_of_inputs) if hidden else np.array([1])
self.bias = 0.0
self._activation = 0.0
self._wsum = 0.0
@property
def activation(self) -> float:
return self._activation
@property
def wsum(self) -> float:
return self._wsum
def calculate(self, indata):
wval = self.inputs * indata + self.bias
self._wsum = wval.sum()
self._activation = sigmoid(self._wsum)
class NeuralNetwork:
def __init__(self, shape: tuple):
self.shape = shape
self.layers = len(self.shape)
self.network = [None for _ in range(self.layers)]
self.network[0] = tuple([Neuron(1, hidden = False) for _ in range(shape[0])])
for L in range(1, self.layers):
self.network[L] = tuple([Neuron(shape[L - 1]) for _ in range(shape[L])])
self.network = tuple(self.network)
y = [symbols(f'y[{i}]') for i in range(shape[self.layers - 1])]
a = [symbols(f'a[{i}]') for i in range(shape[self.layers - 1])]
self.cost_function = sum([(y[i] - a[i]) ** 2 / 2 for i in range(shape[self.layers - 1])])
self.gradient = tuple([self.cost_function.diff(a[i]) for i in range(shape[self.layers - 1])])
self.cost_function = lambdify((y, a), self.cost_function)
self.gradient = lambdify((y, a), self.gradient)
def getLayer(self, L):
return np.array([self.network[L][i].activation for i in range(self.shape[L])])
def getWeightedSum(self, L):
return np.array([self.network[L][i].wsum for i in range(self.shape[L])])
def getInputsMatrix(self, L):
return np.array([self.network[L][i].inputs for i in range(self.shape[L])])
def calculate(self, values):
for i in range(self.shape[0]):
self.network[0][i].calculate(values[i])
for L in range(1, self.layers):
indata = self.getLayer(L - 1)
for j in range(self.shape[L]):
self.network[L][j].calculate(indata)
def get_result(self) -> tuple:
return tuple([self.network[self.layers - 1][i].activation for i in range(self.shape[self.layers - 1])])
def teach(self, targets, examples):
if len(targets) != len(examples):
raise TypeError("The amounts of target and input vectors do not coincide")
activations = [None for _ in range(len(examples))]
delta = activations.copy()
cost_is_low_enough = False
while not cost_is_low_enough:
for x in range(len(examples)):
self.calculate(examples[x])
activations[x] = [self.getLayer(l) for l in range(self.layers)]
delta[x] = [None for _ in range(self.layers - 1)]
network_output = self.getLayer(self.layers - 1)
output_weighted = self.getWeightedSum(self.layers - 1)
gradient_vector = np.array(self.gradient(targets[x], network_output))
delta[x][-1] = gradient_vector * sigmoid_der(output_weighted)
for l in range(self.layers - 2, 0, -1):
weight_matrix = self.getInputsMatrix(l + 1).transpose()
output_weighted = self.getWeightedSum(l)
activation = self.getLayer(l)
for j in range(self.shape[l]):
delta[x][l - 1] = (weight_matrix @ delta[x][l]) * sigmoid_der(output_weighted) * activation
dw = [None for _ in range(self.layers - 1)]
for x in range(len(examples)):
self.calculate(examples[x])
for l in range(self.layers - 1):
dw[l] = np.empty(self.shape[l + 1])
for j in range(self.shape[l + 1]):
dw[l][j] = np.mean([delta[x][l][j] for x in range(len(examples))])
for l in range(1, self.layers):
for j in range(self.shape[l]):
for k in range(self.shape[l - 1]):
self.network[l][j].inputs[k] -= 0.1 * dw[l - 1][j]
cost = 0
for x in range(len(examples)):
self.calculate(examples[x])
network_output = np.array(self.get_result())
incost = self.cost_function(targets[x], network_output)
print(network_output, incost)
cost += incost
# sleep(0.05)
cost /= len(examples)
print()
if cost < 0.001: cost_is_low_enough = True
network = NeuralNetwork((2, 4, 1))
examples = np.array([
[1, 2],
[3, 4],
])
targets = np.array([
[0],
[1]
])
network.teach(targets, examples)
values_1 = np.array([5, 10])
network.calculate(values_1)
result = network.get_result()
print(result)
'''
values_2 = np.array([3, 4])
network.calculate(values_2)
result = network.get_result()
print(result)
'''
解决方案
推荐阅读
- android - 如何使用导航类型安全参数插件将 Parcelable 类型的对象传递给片段
- python - 如何最好地实现链接变量?
- python - 如何过滤 sys.stdout?
- php - Array Combination need as like below
- xamarin - xamarin 复数 电磁学
- node.js - 为什么在 expressjs 中的 POST 请求上清空 req.body?
- bash - 脚本在本地运行但在 Jenkins 上失败(得到 SIGTERM,退出)
- node.js - 如何在同一网络中的spring boot和nodejs应用程序之间进行通信而无需登录特定端点
- apache-spark - 独立集群中的 Spark Streaming 多次接收相同的 Kafka 消息
- jquery - 在 Excel 工作表中导出动态 Jquery 变量数据