首页 > 解决方案 > 简单的卷积神经网络

问题描述

import numpy as np
from keras.datasets import mnist
import time

# Functions

def sigmoid(x):
    return 1.0/(1.0 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x)*(1-sigmoid(x))

def relu(x):
    return np.maximum(0,x)

def relu_derivative(x):
    return np.greater(x, 0).astype(int)

def softmax(x):
    exps = np.exp(x - x.max())
    return exps / np.sum(exps)

# Import and Create Dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# Setup
np.random.seed(seed=12345)
alpha = 0.05

# Initialize Network Values

# Layers
input_layer = np.zeros(shape=(28,28))
convolutional_layer = np.zeros(shape=(10,24,24))
pooling_layer = np.zeros(shape=(10,12,12))
flattened_layer = np.reshape(pooling_layer,newshape=(1440,1))
dense_layer = np.zeros(shape=(100,1))
output_layer = np.zeros(shape=(10,1))

# Filters and Weights
convolution_filters = np.random.rand(10,5,5)
weights1 = np.random.rand(100,1440)
weights2 = np.random.rand(10,100)

# Bias
dense_layer_bias = np.ones(shape=(100,1))
output_layer_bias = np.ones(shape=(10,1))
convolution_bias = np.ones(shape=(10,5,5))

for epoch in range(1):
    print(np.mean(weights1),np.mean(weights2),np.mean(convolution_filters))
    for sample in range(20):
        # Get Input Data
        input = x_train[sample]
        # Target Data
        target = np.zeros((10,1))
        target[y_train[sample]][0] = 1
        # Feed Forward Input to Convolution Layer
        i=j=k=0
        for i in range(10):
            for j in range(24):
                for k in range(24):
                    minimatrix = input[j:j+5, k:k+5]
                    convolutional_layer[i][j][k] = np.sum(minimatrix * convolution_filters[i] + convolution_bias[i]) 
        # Pooling Layer
        i=j=k=0
        for i in range(10):
            for j in range(12):
                for k in range(12):
                    minimatrix = convolutional_layer[i,j*2:j*2+2,k*2:k*2+2]
                    pooling_layer[i][j][k] = relu(minimatrix.max())
        # Flattening Layer
        flattened_layer = np.reshape(pooling_layer,newshape=(1440,1))
        # Feed Forward - DENSE_LAYER
        dense_layer = relu(np.dot(weights1,flattened_layer) + dense_layer_bias)
        # Feed Forward - OUTPUT_LAYER
        output_layer = softmax(np.dot(weights2,dense_layer) + output_layer_bias)
        
        # Backpropogation - OUTPUT_LAYER
        delta = output_layer - target
        weights2gradient = np.dot(delta,dense_layer.T)
        output_layer_bias_gradient = delta

        # Backpropogation - DENSE_LAYER
        delta = np.dot(weights2.T,delta) * relu_derivative(dense_layer)
        weights1gradient = np.dot(delta,flattened_layer.T)
        dense_layer_bias_gradient = delta

        # Backpropogation - POOLING_LAYER
        delta = np.reshape(np.dot(weights1.T,delta),newshape=(10,12,12)) * relu_derivative(pooling_layer) # find delta at pooling layer
        
        # Backpropagation - TRANSPOSE FOR CALCULATIONS
        delta = np.array([delta[i].T for i in range(len(delta))]) # Math says this has to happen

        # Gradient For Backward Pass
        pooling_backward_pass = np.zeros(shape=(10,24,24)) # matrix for passing adjusted cost

        # BACKWARD POOLING GRADIENT PASS
        i=j=k=0
        for i in range(10):
            for j in range(12):
                for k in range(12):
                    minimatrix = convolutional_layer[i,j*2:j*2+2,k*2:k*2+2]
                    maxvalindex = np.argmax(minimatrix)
                    pooling_backward_pass[i, j*2+(maxvalindex // 2), k+(maxvalindex % 2)] += delta[i,j,k]
        
        # Backpropogation - CONVOLUTION LAYER
        convolution_gradient = np.zeros(shape=(10,5,5))
        convolution_bias_gradient = np.zeros(shape=(10,5,5))
        i=j=k=0
        for i in range(10):
            for j in range(24):
                for k in range(24):
                    minimatrix = input[j:j+5, k:k+5]
                    convolution_gradient[i] += pooling_backward_pass[i,j,k] * minimatrix
                    convolution_bias_gradient[i] += pooling_backward_pass[i,j,k]
        
        # Weight and Filter Adjustments
        weights2 -= weights2gradient * alpha
        weights1 -= weights1gradient * alpha
        convolution_filters -= convolution_gradient * alpha
        # Bias Adjustments
        dense_layer_bias -= dense_layer_bias_gradient * alpha
        output_layer_bias -= output_layer_bias_gradient * alpha
        convolution_bias -= convolution_gradient * alpha
print(np.mean(weights1),np.mean(weights2),np.mean(convolution_filters))

我已经在这段代码上工作了一段时间,我(几乎)确定基本功能应该可以工作,但我没有对网络的权重进行任何更改。我特别想在没有实际框架提供的抽象的情况下理解神经网络。是否存在阻止权重更新的 Python 范围问题?

标签: pythonnumpyconv-neural-network

解决方案


推荐阅读