c++ - 神经网络似乎无法正常工作
问题描述
我一直在按照 Tariq Rashid 的一本名为“Make Your Own Neural Network”的书创建自己的神经网络。在一些理论之后,本书解释了一个用 Python 编写的示例。由于我不太喜欢 Python,所以我尝试在 C++ 中实现它。是的,我确实尝试过现有的库。但现在我想实现我自己的架构,我可以随意调整并将它们与我的 Qt 和 AR 项目一起使用。
前馈信号按预期工作。我使用了与书中给出的相同示例值,并且一切似乎都运行良好。然而,反向传播未能解决 XOR 问题。输出值几乎总是相同的。当两个输入值都是 0.0 时,它会稍大一些。权重对我来说显得异常大(通常大于 7.0。不确定这是否正常)。索引所有层并转置需要的层也可以正常工作,否则我会出现超出范围的错误。该项目由一个包含所有矩阵计算的命名空间和 NeuralNetwork 类组成。
也许有人知道是什么导致了我的代码中的问题。谢谢
(编辑:如果您想自己编译和运行它,可以从我的 Git 存储库获取源代码)
AnnMaths.cpp
#include "AnnMaths.h"
vector<vector<double>> AnnMaths::transpose(vector<vector<double>>& x)
{
vector<vector<double>> y;
for (int col = 0; col < x[0].size(); ++col)
{
y.push_back(vector<double>());
for (int row = 0; row < x.size(); ++row)
{
y[col].push_back(x[row][col]);
}
}
return y;
}
vector<vector<double>> AnnMaths::multiply(vector<vector<double>>& x, vector<vector<double>>& y, bool useSigmoid)
{
vector<vector<double>> z;
for (int row = 0; row < x.size(); row++)
{
z.push_back(vector<double>());
for (int col = 0; col < y[0].size(); col++)
{
z[row].push_back(0);
for (int inner = 0; inner < x[0].size(); inner++)
{
z[row][col] += x[row][inner] * y[inner][col];
}
if (useSigmoid) { z[row][col] = sigmoid(z[row][col]); }
}
}
return z;
}
vector<vector<double>> AnnMaths::getOutputErrors(vector<vector<double>>& targets, vector<vector<double>>& output)
{
vector<vector<double>> errors;
for (int neuron = 0; neuron < targets.size(); ++neuron)
{
errors.push_back(vector<double>());
errors[neuron].push_back(/*pow(*/targets[neuron][0] - output[neuron][0]/*, 2)*/);
}
return errors;
}
vector<vector<double>> AnnMaths::getHiddenErrors(vector<vector<double>>& weightsT, vector<vector<double>>& errors)
{
vector<vector<double>> results = multiply(weightsT, errors);
return results;
}
vector<vector<double>> AnnMaths::applyErrors(vector<vector<double>>& errors, vector<vector<double>>& output)
{
vector<vector<double>> results;
for (int i = 0; i < errors.size(); ++i)
{
results.push_back(vector<double>());
results[i].push_back(errors[i][0] * output[i][0] * (1.0 - output[i][0]));
}
return results;
}
vector<vector<double>> AnnMaths::applyLearnrate(vector<vector<double>>& x, double lr)
{
vector<vector<double>> results;
for (int row = 0; row < x.size(); ++row)
{
results.push_back(vector<double>());
for (int col = 0; col < x[0].size(); ++col)
{
results[row].push_back(x[row][col] * lr);
}
}
return results;
}
vector<vector<double>> AnnMaths::add(vector<vector<double>>& x, vector<vector<double>>& y)
{
vector<vector<double>> results;
for (int row = 0; row < x.size(); ++row)
{
results.push_back(vector<double>());
for (int col = 0; col < x[0].size(); ++col)
{
results[row].push_back(x[row][col] + y[row][col]);
}
}
return results;
}
double AnnMaths::sigmoid(double val)
{
return 1 / (1 + pow(E, -val));
}
double AnnMaths::randomVal()
{
return ((double)rand() / (RAND_MAX) -0.5);
}
神经网络.cpp
#include "NeuralNetwork.h"
NeuralNetwork::NeuralNetwork(vector<int>& topology, vector<vector<double>>& input, vector<vector<double>>& targets, double lr)
{
this->topology = topology;
this->layers.clear();
this->weights.clear();
this->targets.clear();
this->targets = targets;
this->layers.resize(topology.size());
this->errors.resize(this->layers.size());
this->weights.resize(topology.size()-1);
this->learnrate = lr;
for (int layer = 0; layer < topology.size(); ++layer)
{
if (layer == 0)
{
this->layers[layer].push_back(vector<double>());
this->layers[layer] = input;
}
else
{
for (int neuron = 0; neuron < topology[layer]; ++neuron)
{
this->layers[layer].push_back(vector<double>());
this->layers[layer][neuron].push_back(/*AnnMaths::randomVal()*/0.0);
}
}
if (layer < (topology.size() - 1))
{
for (int row = 0; row < topology[layer+1]; ++row)
{
this->weights[layer].push_back(vector<double>());
for (int col = 0; col < topology[layer]; ++col)
{
this->weights[layer][row].push_back(AnnMaths::randomVal());
}
}
}
}
this->errors = this->layers;
}
void NeuralNetwork::feedForward()
{
for (int layer = 0; layer < weights.size(); ++layer)
{
layers[layer + 1] = AnnMaths::multiply(weights[layer], layers[layer], true);
}
}
void NeuralNetwork::setErrors()
{
for (int layer = layers.size() - 1; layer >= 0; --layer)
{
if (layer == layers.size() - 1)
{
this->errors[layer] = AnnMaths::getOutputErrors(this->targets, layers[layer]);
}
else
{
vector<vector<double>> weightsT = AnnMaths::transpose(this->weights[layer]);
vector<vector<double>> tmpErrors = AnnMaths::multiply(weightsT, this->errors[layer+1]);
this->errors[layer] = tmpErrors;
}
}
}
void NeuralNetwork::setInput(vector<vector<double>>& input)
{
this->layers[0] = input;
}
void NeuralNetwork::setTargets(vector<vector<double>>& target)
{
this->targets = targets;
}
void NeuralNetwork::backPropagation()
{
setErrors(); //compute all errors
for (int layer = layers.size() - 2; layer >= 0; --layer)
{
vector<vector<double>> prevOutputT = AnnMaths::transpose(layers[layer]); //get the transposed output of the previous layer
vector<vector<double>> appliedErrors = AnnMaths::applyErrors(this->errors[layer+1], layers[layer+1]); //apply errors to output of next layer
vector<vector<double>> deltaWeights = AnnMaths::multiply(appliedErrors, prevOutputT); //compute delta of weights by multiplying the applied output with the previous output
deltaWeights = AnnMaths::applyLearnrate(deltaWeights, learnrate); //add learning rate to delta weights
weights[layer] = AnnMaths::add(deltaWeights, weights[layer]); //add delta weights to the weights
}
}
主文件
#include"AnnMaths.h"
#include<iostream>
#include<vector>
#include"NeuralNetwork.h"
int main()
{
std::vector<std::vector<std::vector<double>>> input;
input.push_back(std::vector<std::vector<double>>());
input.push_back(std::vector<std::vector<double>>());
input.push_back(std::vector<std::vector<double>>());
input.push_back(std::vector<std::vector<double>>());
input[0].push_back(std::vector<double>());
input[0].push_back(std::vector<double>());
input[0][0].push_back(0.0);
input[0][1].push_back(0.0);
input[1].push_back(std::vector<double>());
input[1].push_back(std::vector<double>());
input[1][0].push_back(1.0);
input[1][1].push_back(0.0);
input[2].push_back(std::vector<double>());
input[2].push_back(std::vector<double>());
input[2][0].push_back(0.0);
input[2][1].push_back(1.0);
input[3].push_back(std::vector<double>());
input[3].push_back(std::vector<double>());
input[3][0].push_back(1.0);
input[3][1].push_back(1.0);
std::vector<std::vector<std::vector<double>>> targets;
targets.push_back(std::vector<std::vector<double>>());
targets.push_back(std::vector<std::vector<double>>());
targets.push_back(std::vector<std::vector<double>>());
targets.push_back(std::vector<std::vector<double>>());
targets[0].push_back(std::vector<double>());
targets[0][0].push_back(0.0);
targets[1].push_back(std::vector<double>());
targets[1][0].push_back(1.0);
targets[2].push_back(std::vector<double>());
targets[2][0].push_back(1.0);
targets[3].push_back(std::vector<double>());
targets[3][0].push_back(0.0);
std::vector<int> topology;
topology.push_back(input[0].size());
topology.push_back(3);
topology.push_back(targets[0].size());
NeuralNetwork nn(topology, input[0], targets[0], 0.3);
for (struct { int i = 0; int count = 0; } x; x.i < 10000; ++x.i, ++x.count)
{
if (x.count == 4) { x.count = 0; }
nn.setInput(input[x.count]);
nn.setTargets(targets[x.count]);
nn.feedForward();
nn.backPropagation();
if (x.i % 51 == 0 )
{
nn.printInput();
nn.printOutput();
}
}
return 0;
}
为了更好的可见性,我没有包括测试和打印功能。
解决方案
推荐阅读
- java - JOOQ查询的Java返回结果
- ruby-on-rails - 如何检查页面是否在范围内(kaminari)?
- mysql - MySQL:使用与源表不同的引擎从选择创建表
- python - Django表单中的自定义EditProfile,提交按钮不起作用
- direct2d - Direct2D:在绘制之前检查图像是否在可见区域之外?
- python - Pandas:将函数应用于数据框列和对象
- swift - Tabelview 行不显示(由于错误的约束)在 swift
- javascript - 在javascript中使用带有索引的id属性隐藏/显示div
- typescript - Graphql Appolo Server 不会将绑定函数解析为字段
- r - 如何仅将ggplot图例的某些元素斜体?