c# – Is the backpropagation-algorithm and the Math behind my implemented correct?


I am learning machine-learning therefore I want to implement a neural network by myself from the Code point of View. So far I got an algorithm in C# that works more or less reliable. Sometimes it learns correctly, sometimes the performence is not that good.

What I want to know is: Did I implement the Math behind backpropagation correct?

My problem is, when I change my activation function from Tanh to Logistic, my network doesn’t seem to work anymore. I tryed some different things, doublechecked my activation functions etc. and as far as I can say, the Math should be implemented correctly, but I am not 100% sure about that.

I.E. I checked out the activation function explanation here and worked with the mentoned code which didn’t improve the output.

Another thing that makes me unsure is, when I changed my cost-function, I got some bad results. So I researched some cost-function implementation here and here. For example the MSE/Quadric on one site calculates the derivate as “Output – Target” while on the other site they used “Target – Output” which confuses me.

I’m also not sure if my Bias-calculation for training is correct.

My Code comes as a Class with my implementation for the neuronal network and all the necessary functions/objects, and some code to test the Class. I removed some activation- and cost-functions since I haven’t tested yet them and to keep it simple. However my test-case is an XOR, which is probbably not the very best test-case, but I also used this code with the Iris-Dataset which worked.

Network-Class:

class Review
{
    private Random _Randomizer;
    private float _LearnRate = 0.5f;

    private int() _ActivationTopology;

    public float Error = float.MaxValue;
    private float error, d_error;

    public float() ErrorPerNeuron;

    public float()() Neurons;
    public float()() Biases;
    public float()()() Weights;
    public float()() Gammas;

    public Review(IReadOnlyList<int> topology, int() activations)
    {
        _ActivationTopology = activations;

        initialise(topology);
    }

    private void initialise(IReadOnlyList<int> topology)
    {
        _Randomizer = new Random();

        Neurons = new float(topology.Count)();
        Gammas = new float(topology.Count)();
        Biases = new float(topology.Count - 1)();
        Weights = new float(topology.Count - 1)()();

        for (int i = 0; i < topology.Count; i++)
        {
            Neurons(i) = new float(topology(i));
            Gammas(i) = new float(topology(i));
        }

        ErrorPerNeuron = new float(topology(^1));

        for (int i = 0; i < topology.Count - 1; i++)
        {
            Biases(i) = new float(topology(i + 1));
            Weights(i) = new float(topology(i + 1))();

            for (int ii = 0; ii < topology(i + 1); ii++)
            {
                Weights(i)(ii) = new float(topology(i));
                //Biases(i)(ii) = (float)_Randomizer.NextDouble() - 0.5f;
                Biases(i)(ii) = (float)_Randomizer.NextDouble() * MathF.Sqrt(2f / topology(i));

                for (int iii = 0; iii < topology(i); iii++)
                {
                    //Weights(i)(ii)(iii) = (float)_Randomizer.NextDouble() - 0.5f;
                    Weights(i)(ii)(iii) = (float)_Randomizer.NextDouble() * MathF.Sqrt(2f / topology(i));
                }
            }
        }
    }

    public float() FeedForward(float() inputs)
    {
        Neurons(0) = (float())inputs.Clone();

        for (int i = 1; i < Neurons.Length; i++)
        {
            for (int ii = 0; ii < Neurons(i).Length; ii++)
            {
                Neurons(i)(ii) = Biases(i - 1)(ii);

                for (int iii = 0; iii < Neurons(i - 1).Length; iii++)
                {
                    Neurons(i)(ii) += Weights(i - 1)(ii)(iii) * Neurons(i - 1)(iii);
                }

                Neurons(i)(ii) = activate(Neurons(i)(ii), _ActivationTopology(i - 1));
            }
        }
        return Neurons(^1);
    }

    public float CalculateCost(float() target, int costFunction)
    {
        Error = 0;

        for (int i = 0; i < target.Length; i++)
        {
            Gammas(^1)(i) = DCostfunctions(Neurons(^1)(i), target(i), costFunction);
            ErrorPerNeuron(i) = Costfunctions(Neurons(^1)(i), target(i), costFunction);
            Error += ErrorPerNeuron(i);
            Gammas(^1)(i) *= activateDer(Neurons(^1)(i), _ActivationTopology(^1));
        }

        Error /= target.Length;

        return Error;
    }

    public void BackPropagate()
    {
        for (int i = Neurons.Length - 2; i >= 0; i--)
        {
            for (int ii = 0; ii < Neurons(i).Length; ii++)
            {
                Gammas(i)(ii) = 0;

                for (int iii = 0; iii < Neurons(i + 1).Length; iii++)
                {
                    Gammas(i)(ii) += Gammas(i + 1)(iii) * Weights(i)(iii)(ii);
                }

                Gammas(i)(ii) *= activateDer(Neurons(i)(ii), _ActivationTopology(i)); 
            }
        }
    }

    public void MutateWeights()
    {
        for (int i = Neurons.Length - 1; i > 0; i--)
        {
            for (int ii = 0; ii < Neurons(i).Length; ii++)
            {
                for (int iii = 0; iii < Neurons(i - 1).Length; iii++)
                {
                    Weights(i - 1)(ii)(iii) -= _LearnRate * Gammas(i)(ii) * Neurons(i - 1)(iii);
                }

                Biases(i - 1)(ii) -= Gammas(i)(ii);
            }
        }
    }

    public float Train(float() input, float() output, int costFunction)
    {
        FeedForward(input);
        CalculateCost(output, costFunction);
        BackPropagate();
        MutateWeights();

        return Error;
    }

    #region activations
    private float activate(float value, int activation)//all activation functions
    {
        switch (activation)
        {
            case 0:
                return Logistic(value);
            case 1:
                return Tanh(value);
            default:
                return Logistic(value);
        }
    }
    private float activateDer(float value, int activation)//all activation function derivatives
    {
        switch (activation)
        {
            case 0:
                return DLogistic(value);
            case 1:
                return DTanh(value);
            default:
                return DLogistic(value);
        }
    }

    public float Logistic(float x)
    {
        return 1 / (1 + MathF.Exp(-x)); // MathF.Pow(MathF.E, -x));
    }
    public float DLogistic(float x)
    {
        float rel = Logistic(x);
        return rel * (1 - rel);
    }
    public float Tanh(float x)
    {
        //return 2 / (1 + MathF.Pow(MathF.E, -(2 * x))) - 1;
        return MathF.Tanh(x);
    }
    public float DTanh(float x)
    {
        return 1 - MathF.Pow(Tanh(x), 2);
    }
    #endregion

    #region Costfunctions
    private float Costfunctions(float output, float expected, int function)
    {
        switch (function)
        {
            case 0:
                return Quadratic(expected, output);
            default:
                return Quadratic(expected, output);
        }
    }

    private float DCostfunctions(float output, float expected, int function)
    {
        switch (function)
        {
            case 0:
                return DQuadratic(expected, output);
            default:
                return DQuadratic(expected, output);
        }
    }

    public float Quadratic(float t, float o)
    {
        error = MathF.Pow((o - t), 2);
        return error;
    }
    public float DQuadratic(float t, float o)
    {
        d_error = o - t;
        return d_error;
    }
    #endregion
}

Here is my test-function:

static void testreview()
     {
        int() netztopologie = new int() { 3, 5, 5, 2 };
        int() A = new int() { 1, 1, 1 };

        float()() test, result;

        test = new float(8)();
        result = new float(8)();

        test(0) = new float() { 1, 0, 0 };
        test(1) = new float() { 0, 1, 1 };
        test(2) = new float() { 1, 0, 1 };
        test(3) = new float() { 0, 0, 1 };
        test(4) = new float() { 1, 1, 0 };
        test(5) = new float() { 0, 1, 0 };
        test(6) = new float() { 1, 1, 1 };
        test(7) = new float() { 0, 0, 0 };

        result(0) = new float() { 1, 0 };
        result(1) = new float() { 0, 1 };
        result(2) = new float() { 0, 1 };
        result(3) = new float() { 1, 0 };
        result(4) = new float() { 0, 1 };
        result(5) = new float() { 1, 0 };
        result(6) = new float() { 0, 1 };
        result(7) = new float() { 0, 1 };

        Review Network = new Review(netztopologie, A);

        for (int i = 0; i < 1000; i++)
        {
            for (int ii = 0; ii < 8; ii++)
            {
                Network.Train(test(ii), result(ii), 0);
                Console.WriteLine(Network.Error);
            }
        }

        float() testrel = Network.FeedForward(new float() { 0, 1, 0 }); //1 0
        testrel = Network.FeedForward(new float() { 0, 1, 1 }); //0 1
        testrel = Network.FeedForward(new float() { 0, 0, 0 }); //0 1
        testrel = Network.FeedForward(new float() { 1, 0, 0 }); //1 0
    }

Any feedback will be appreciated.