performance – Neural Network Written in Python is Extremely Slow

I coded a basic feedforward neural network with all pure python with the exception of numpy in order to better understand how neural networks work. It works, but the only problem is it is extremely slow, and I have no idea how to fix it. The neural network looks like this:

import numpy as np 

from digits import x_train

np.random.seed(0)

def leaky_relu(inputs):
    return np.maximum(0.1*inputs, inputs)

class Layer:
    def __init__(self, n_inputs, n_neurons):
        self.weights = 0.1*np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))
        self.updated_weights = self.weights
        self.updated_biases = self.biases
        self.dc_dz = ()

    def forward(self, inputs):
        self.output = leaky_relu(np.dot(inputs, self.weights) + 
self.biases(0) 


l1 = Layer(784, 8)
l2 = Layer(8, 128)
l3 = Layer(128, 128)
l4 = Layer(128, 64)
l5 = Layer(64, 10)

l1.forward(x_train(0).flatten())
l2.forward(l1.output)
l3.forward(l2.output)
l4.forward(l3.output)
l5.forward(l4.output)

layers = (l1, l2, l3, l4, l5)

def leaky_relu_derivative(output):
    if output > 0:
        return 1
    else:
        return 0.1

def calculate_bias(output, actual=None, dc_dcn=None):
    if dc_dcn is None:
        return leaky_relu_derivative(output) * 2 * (output - actual)
    else:
        return leaky_relu_derivative(output) * dc_dcn

def calculate_weight(output, input, actual=None, dc_dcn=None):
    if dc_dcn is None:
        return input * leaky_relu_derivative(output) * 2 * (output - actual)
    else:
        return input * leaky_relu_derivative(output) * dc_dcn

def calculate_dc_dcn(weights, dc_dz):
    #find the derivative of the cost function in respect to the current node
    return np.sum(np.multiply(weights, dc_dz))

def train(learning_rate, actual):
prev = None
next = None
x = len(layers) - 1

while x != 0:
    layer = layers(x)
    next = layers(x-1)

    if x == len(layers) - 1:
        for i in range(len(layer.output)):
            #for every node in the layer
            new_bias = calculate_bias(layer.output(i), actual(i))
            layer.dc_dz.append(new_bias)
            layer.updated_biases(0)(i) -= learning_rate * new_bias
            for j in range(len(next.output)):
                #for every weight of the current node
                new_weight = calculate_weight(layer.output(i), next.output(j), actual(i))
                layer.updated_weights(j)(i) -= learning_rate * new_weight
        prev = layer
    else:
        for i in range(len(layer.output)):
            #for every node in the layer
            dc_dcn = calculate_dc_dcn(prev.weights(i), prev.dc_dz(:(len(prev.output))))
            new_bias = calculate_bias(layer.output(i), dc_dcn)
            layer.dc_dz.append(new_bias)
            layer.updated_biases(0)(i) -= learning_rate * new_bias
            for j in range(len(next.output)):
                #for every weight of the current node
                new_weight = calculate_weight(layer.output(i), next.output(j), dc_dcn)
                layer.updated_weights(j)(i) -= learning_rate * new_weight
        prev = layer
    
    for layer in layers:
        layer.weights = layer.updated_weights
        layer.biases = layer.updated_biases

    x -= 1

I’m assuming the code isn’t very efficient and probably written poorly so any constructive criticism along with how to make it faster would help.