首页 > 解决方案 > 神经网络似乎无法正常工作

问题描述

我一直在按照 Tariq Rashid 的一本名为“Make Your Own Neural Network”的书创建自己的神经网络。在一些理论之后,本书解释了一个用 Python 编写的示例。由于我不太喜欢 Python,所以我尝试在 C++ 中实现它。是的,我确实尝试过现有的库。但现在我想实现我自己的架构,我可以随意调整并将它们与我的 Qt 和 AR 项目一起使用。

前馈信号按预期工作。我使用了与书中给出的相同示例值,并且一切似乎都运行良好。然而,反向传播未能解决 XOR 问题。输出值几乎总是相同的。当两个输入值都是 0.0 时,它会稍大一些。权重对我来说显得异常大(通常大于 7.0。不确定这是否正常)。索引所有层并转置需要的层也可以正常工作,否则我会出现超出范围的错误。该项目由一个包含所有矩阵计算的命名空间和 NeuralNetwork 类组成。

也许有人知道是什么导致了我的代码中的问题。谢谢

(编辑:如果您想自己编译和运行它,可以从我的 Git 存储库获取源代码

AnnMaths.cpp

#include "AnnMaths.h"

vector<vector<double>> AnnMaths::transpose(vector<vector<double>>& x)
{
    vector<vector<double>> y;

    for (int col = 0; col < x[0].size(); ++col)
    {
        y.push_back(vector<double>());
        for (int row = 0; row < x.size(); ++row)
        {
            y[col].push_back(x[row][col]);
        }
    }

    return y;
}

vector<vector<double>> AnnMaths::multiply(vector<vector<double>>& x, vector<vector<double>>& y, bool useSigmoid)
{
    vector<vector<double>> z;

    for (int row = 0; row < x.size(); row++)
    {
        z.push_back(vector<double>());
        for (int col = 0; col < y[0].size(); col++)
        {
            z[row].push_back(0);
            for (int inner = 0; inner < x[0].size(); inner++)
            {
                z[row][col] += x[row][inner] * y[inner][col];
            }
            if (useSigmoid) { z[row][col] = sigmoid(z[row][col]); }
        }
    }

    return z;
}

vector<vector<double>> AnnMaths::getOutputErrors(vector<vector<double>>& targets, vector<vector<double>>& output)
{
    vector<vector<double>> errors;

    for (int neuron = 0; neuron < targets.size(); ++neuron)
    {
        errors.push_back(vector<double>());
        errors[neuron].push_back(/*pow(*/targets[neuron][0] - output[neuron][0]/*, 2)*/);
    }

    return errors;
}

vector<vector<double>> AnnMaths::getHiddenErrors(vector<vector<double>>& weightsT, vector<vector<double>>& errors)
{
    vector<vector<double>> results = multiply(weightsT, errors);
    return results;
}

vector<vector<double>> AnnMaths::applyErrors(vector<vector<double>>& errors, vector<vector<double>>& output)
{
    vector<vector<double>> results;

    for (int i = 0; i < errors.size(); ++i)
    {
        results.push_back(vector<double>());
        results[i].push_back(errors[i][0] * output[i][0] * (1.0 - output[i][0]));
    }

    return results;
}

vector<vector<double>> AnnMaths::applyLearnrate(vector<vector<double>>& x, double lr)
{
    vector<vector<double>> results;

    for (int row = 0; row < x.size(); ++row)
    {
        results.push_back(vector<double>());
        for (int col = 0; col < x[0].size(); ++col)
        {
            results[row].push_back(x[row][col] * lr);
        }
    }

    return results;
}

vector<vector<double>> AnnMaths::add(vector<vector<double>>& x, vector<vector<double>>& y)
{
    vector<vector<double>> results;

    for (int row = 0; row < x.size(); ++row)
    {
        results.push_back(vector<double>());
        for (int col = 0; col < x[0].size(); ++col)
        {
            results[row].push_back(x[row][col] + y[row][col]);
        }
    }

    return results;
}

double AnnMaths::sigmoid(double val)
{
    return 1 / (1 + pow(E, -val));
}

double AnnMaths::randomVal()
{
    return ((double)rand() / (RAND_MAX) -0.5);
}

神经网络.cpp

#include "NeuralNetwork.h"

NeuralNetwork::NeuralNetwork(vector<int>& topology, vector<vector<double>>& input, vector<vector<double>>& targets, double lr)
{
    this->topology = topology;
    this->layers.clear();
    this->weights.clear();
    this->targets.clear();
    this->targets = targets;
    this->layers.resize(topology.size());
    this->errors.resize(this->layers.size());
    this->weights.resize(topology.size()-1);
    this->learnrate = lr;

    for (int layer = 0; layer < topology.size(); ++layer)
    {
        if (layer == 0)
        {
            this->layers[layer].push_back(vector<double>());
            this->layers[layer] = input;
        }
        else
        {
            for (int neuron = 0; neuron < topology[layer]; ++neuron)
            {
                this->layers[layer].push_back(vector<double>());
                this->layers[layer][neuron].push_back(/*AnnMaths::randomVal()*/0.0);
            }
        }

        if (layer < (topology.size() - 1))
        {
            for (int row = 0; row < topology[layer+1]; ++row)
            {
                this->weights[layer].push_back(vector<double>());
                for (int col = 0; col < topology[layer]; ++col)
                {
                    this->weights[layer][row].push_back(AnnMaths::randomVal());
                }
            }
        }
    }
    this->errors = this->layers;
}

void NeuralNetwork::feedForward()
{
    for (int layer = 0; layer < weights.size(); ++layer)
    {
        layers[layer + 1] = AnnMaths::multiply(weights[layer], layers[layer], true);
    }
}

void NeuralNetwork::setErrors()
{
    for (int layer = layers.size() - 1; layer >= 0; --layer)
    {
        if (layer == layers.size() - 1)
        {
            this->errors[layer] = AnnMaths::getOutputErrors(this->targets, layers[layer]);
        }
        else
        {
            vector<vector<double>> weightsT = AnnMaths::transpose(this->weights[layer]);
            vector<vector<double>> tmpErrors = AnnMaths::multiply(weightsT, this->errors[layer+1]);
            this->errors[layer] = tmpErrors;
        }
    }
}

void NeuralNetwork::setInput(vector<vector<double>>& input)
{
    this->layers[0] = input;
}

void NeuralNetwork::setTargets(vector<vector<double>>& target)
{
    this->targets = targets;
}

void NeuralNetwork::backPropagation()
{
    setErrors(); //compute all errors

    for (int layer = layers.size() - 2; layer >= 0; --layer)
    {
        vector<vector<double>> prevOutputT = AnnMaths::transpose(layers[layer]); //get the transposed output of the previous layer
        vector<vector<double>> appliedErrors = AnnMaths::applyErrors(this->errors[layer+1], layers[layer+1]); //apply errors to output of next layer
        vector<vector<double>> deltaWeights = AnnMaths::multiply(appliedErrors, prevOutputT); //compute delta of weights by multiplying the applied output with the previous output
        deltaWeights = AnnMaths::applyLearnrate(deltaWeights, learnrate); //add learning rate to delta weights
        weights[layer] = AnnMaths::add(deltaWeights, weights[layer]); //add delta weights to the weights
    }
}

主文件

#include"AnnMaths.h"
#include<iostream>
#include<vector>
#include"NeuralNetwork.h"

int main()
{
    std::vector<std::vector<std::vector<double>>> input;
    input.push_back(std::vector<std::vector<double>>());
    input.push_back(std::vector<std::vector<double>>());
    input.push_back(std::vector<std::vector<double>>());
    input.push_back(std::vector<std::vector<double>>());
    input[0].push_back(std::vector<double>());
    input[0].push_back(std::vector<double>());
    input[0][0].push_back(0.0);
    input[0][1].push_back(0.0);
    input[1].push_back(std::vector<double>());
    input[1].push_back(std::vector<double>());
    input[1][0].push_back(1.0);
    input[1][1].push_back(0.0);
    input[2].push_back(std::vector<double>());
    input[2].push_back(std::vector<double>());
    input[2][0].push_back(0.0);
    input[2][1].push_back(1.0);
    input[3].push_back(std::vector<double>());
    input[3].push_back(std::vector<double>());
    input[3][0].push_back(1.0);
    input[3][1].push_back(1.0);

    std::vector<std::vector<std::vector<double>>> targets;
    targets.push_back(std::vector<std::vector<double>>());
    targets.push_back(std::vector<std::vector<double>>());
    targets.push_back(std::vector<std::vector<double>>());
    targets.push_back(std::vector<std::vector<double>>());
    targets[0].push_back(std::vector<double>());
    targets[0][0].push_back(0.0);
    targets[1].push_back(std::vector<double>());
    targets[1][0].push_back(1.0);
    targets[2].push_back(std::vector<double>());
    targets[2][0].push_back(1.0);
    targets[3].push_back(std::vector<double>());
    targets[3][0].push_back(0.0);

    std::vector<int> topology;
    topology.push_back(input[0].size());
    topology.push_back(3);
    topology.push_back(targets[0].size());

    NeuralNetwork nn(topology, input[0], targets[0], 0.3);

    for (struct { int i = 0; int count = 0; } x; x.i < 10000; ++x.i, ++x.count)
    {
        if (x.count == 4) { x.count = 0; }
        nn.setInput(input[x.count]);
        nn.setTargets(targets[x.count]);

        nn.feedForward();
        nn.backPropagation();

        if (x.i % 51 == 0 )
        {
            nn.printInput();
            nn.printOutput();
        }
    }
    return 0;
}

为了更好的可见性,我没有包括测试和打印功能。

标签: c++machine-learningdeep-learningneural-network

解决方案


推荐阅读