首页 > 技术文章 > Python3 BP神经网络

FightLi 2018-01-18 13:22 原文

转自麦子学院

  1 """
  2 network.py
  3 ~~~~~~~~~~
  4 
  5 A module to implement the stochastic gradient descent learning
  6 algorithm for a feedforward neural network.  Gradients are calculated
  7 using backpropagation.  Note that I have focused on making the code
  8 simple, easily readable, and easily modifiable.  It is not optimized,
  9 and omits many desirable features.
 10 """
 11 
 12 #### Libraries
 13 # Standard library
 14 import random
 15 
 16 # Third-party libraries
 17 import numpy as np
 18 
 19 class Network(object):
 20 
 21     def __init__(self, sizes):
 22         """The list ``sizes`` contains the number of neurons in the
 23         respective layers of the network.  For example, if the list
 24         was [2, 3, 1] then it would be a three-layer network, with the
 25         first layer containing 2 neurons, the second layer 3 neurons,
 26         and the third layer 1 neuron.  The biases and weights for the
 27         network are initialized randomly, using a Gaussian
 28         distribution with mean 0, and variance 1.  Note that the first
 29         layer is assumed to be an input layer, and by convention we
 30         won't set any biases for those neurons, since biases are only
 31         ever used in computing the outputs from later layers."""
 32         self.num_layers = len(sizes)
 33         self.sizes = sizes
 34         self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
 35         self.weights = [np.random.randn(y, x)
 36                         for x, y in zip(sizes[:-1], sizes[1:])]
 37 
 38     def feedforward(self, a):
 39         """Return the output of the network if ``a`` is input."""
 40         for b, w in zip(self.biases, self.weights):
 41             a = sigmoid(np.dot(w, a)+b)
 42         return a
 43 
 44     def SGD(self, training_data, epochs, mini_batch_size, eta,
 45             test_data=None):
 46         """Train the neural network using mini-batch stochastic
 47         gradient descent.  The ``training_data`` is a list of tuples
 48         ``(x, y)`` representing the training inputs and the desired
 49         outputs.  The other non-optional parameters are
 50         self-explanatory.  If ``test_data`` is provided then the
 51         network will be evaluated against the test data after each
 52         epoch, and partial progress printed out.  This is useful for
 53         tracking progress, but slows things down substantially."""
 54         if test_data: n_test = len(test_data)
 55         n = len(training_data)
 56         for j in range(epochs):
 57             random.shuffle(training_data)
 58             mini_batches = [
 59                 training_data[k:k+mini_batch_size]
 60                 for k in range(0, n, mini_batch_size)]
 61             for mini_batch in mini_batches:
 62                 self.update_mini_batch(mini_batch, eta)
 63             if test_data:
 64                 print ("Epoch {0}: {1} / {2}".format(
 65                     j, self.evaluate(test_data), n_test))
 66             else:
 67                 print ("Epoch {0} complete".format(j))
 68 
 69     def update_mini_batch(self, mini_batch, eta):
 70         """Update the network's weights and biases by applying
 71         gradient descent using backpropagation to a single mini batch.
 72         The ``mini_batch`` is a list of tuples ``(x, y)``, and ``eta``
 73         is the learning rate."""
 74         nabla_b = [np.zeros(b.shape) for b in self.biases]
 75         nabla_w = [np.zeros(w.shape) for w in self.weights]
 76         #一个一个的进行训练  跟吴恩达的Mini-Batch 不一样
 77         for x, y in mini_batch:
 78             delta_nabla_b, delta_nabla_w = self.backprop(x, y)
 79             nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
 80             nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
 81         self.weights = [w-(eta/len(mini_batch))*nw
 82                         for w, nw in zip(self.weights, nabla_w)]
 83         self.biases = [b-(eta/len(mini_batch))*nb
 84                        for b, nb in zip(self.biases, nabla_b)]
 85 
 86     def backprop(self, x, y):
 87         """Return a tuple ``(nabla_b, nabla_w)`` representing the
 88         gradient for the cost function C_x.  ``nabla_b`` and
 89         ``nabla_w`` are layer-by-layer lists of numpy arrays, similar
 90         to ``self.biases`` and ``self.weights``."""
 91         nabla_b = [np.zeros(b.shape) for b in self.biases]
 92         nabla_w = [np.zeros(w.shape) for w in self.weights]
 93         # feedforward
 94         activation = x
 95         activations = [x] # list to store all the activations, layer by layer
 96         zs = [] # list to store all the z vectors, layer by layer
 97         for b, w in zip(self.biases, self.weights):
 98             z = np.dot(w, activation)+b
 99             zs.append(z)
100             activation = sigmoid(z)
101             activations.append(activation)
102         # backward pass
103         delta = self.cost_derivative(activations[-1], y) * \
104             sigmoid_prime(zs[-1])
105         nabla_b[-1] = delta
106         nabla_w[-1] = np.dot(delta, activations[-2].transpose())
107         # Note that the variable l in the loop below is used a little
108         # differently to the notation in Chapter 2 of the book.  Here,
109         # l = 1 means the last layer of neurons, l = 2 is the
110         # second-last layer, and so on.  It's a renumbering of the
111         # scheme in the book, used here to take advantage of the fact
112         # that Python can use negative indices in lists.
113         for l in range(2, self.num_layers):
114             z = zs[-l]
115             sp = sigmoid_prime(z)
116             delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
117             nabla_b[-l] = delta
118             nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
119         return (nabla_b, nabla_w)
120 
121     def evaluate(self, test_data):
122         """Return the number of test inputs for which the neural
123         network outputs the correct result. Note that the neural
124         network's output is assumed to be the index of whichever
125         neuron in the final layer has the highest activation."""
126         test_results = [(np.argmax(self.feedforward(x)), y)
127                         for (x, y) in test_data]
128         return sum(int(x == y) for (x, y) in test_results)
129 
130     def cost_derivative(self, output_activations, y):
131         """Return the vector of partial derivatives \partial C_x /
132         \partial a for the output activations."""
133         return (output_activations-y)
134 
135 #### Miscellaneous functions
136 def sigmoid(z):
137     """The sigmoid function."""
138     return 1.0/(1.0+np.exp(-z))
139 
140 def sigmoid_prime(z):
141     """Derivative of the sigmoid function."""
142     return sigmoid(z)*(1-sigmoid(z))

该算法比我之前写的神经网络算法准确率高,但是在测试过程中发现有错误,各个地方的注释我是没看明白,与理论结合不是很好。本人在他的基础上进行了改进,提高了算法的扩展程度,自己也亲测了改进后的代码,效果杠杠的。

  1 # -*- coding: utf-8 -*-
  2 """
  3 Created on Thu Jan 18 15:27:24 2018
  4 
  5 @author: markli
  6 """
  7 
  8 import numpy as np;
  9 import random;
 10 
 11 def tanh(x):  
 12     return np.tanh(x);
 13 
 14 def tanh_derivative(x):  
 15     return 1.0 - np.tanh(x)*np.tanh(x);
 16 
 17 def logistic(x):  
 18     return 1/(1 + np.exp(-x));
 19 
 20 def logistic_derivative(x):  
 21     return logistic(x)*(1-logistic(x));
 22 
 23 def ReLU(x,a=1):
 24     return max(0,a * x);
 25 
 26 def ReLU_derivative(x,a=1):
 27     return 0 if x < 0 else a;
 28 
 29 class NeuralNetwork:
 30     '''
 31     Z = W * x + b
 32     A = sigmod(Z)
 33     Z 净输入
 34     x 样本集合 n * m n 个特征 m 个样本数量
 35     b 偏移量
 36     W 权重
 37     A 净输出
 38     '''
 39     def __init__(self,layers,active_function=[logistic],active_function_der=[logistic_derivative],learn_rate=0.9):
 40         """
 41         初始化神经网络
 42         layer中存放每层的神经元数量,layer的长度即为网络的层数
 43         active_function 为每一层指定一个激活函数,若长度为1则表示所有层使用同一个激活函数
 44         active_function_der 激活函数的导数
 45         learn_rate 学习速率 
 46         """
 47         self.weights = [np.random.randn(x,y) for x,y in zip(layers[1:],layers[:-1])];
 48         self.biases = [np.random.randn(x,1) for x in layers[1:]];
 49         self.size = len(layers);
 50         self.rate = learn_rate;
 51         self.sigmoids = [];
 52         self.sigmoids_der = [];
 53         for i in range(len(layers)-1):
 54             if(len(active_function) == self.size-1):
 55                 self.sigmoids = active_function;
 56             else:
 57                 self.sigmoids.append(active_function[0]);
 58             if(len(active_function_der)== self.size-1):
 59                 self.sigmoids_der = active_function_der;
 60             else:
 61                 self.sigmoids_der.append(active_function_der[0]);
 62         
 63     def fit(self,TrainData,epochs=1000,mini_batch_size=32):
 64         """
 65         运用后向传播算法学习神经网络模型
 66         TrainData 是(X,Y)值对
 67         X 输入特征矩阵 m*n 维 n 个特征,m个样本
 68         Y 输入实际值 t*m 维 t个类别标签,m个样本
 69         epochs 迭代次数
 70         mini_batch_size mini_batch 一次的大小,不使用则mini_batch_size = 1
 71         """
 72         n = len(TrainData);
 73         for i in range(epochs):
 74             random.shuffle(TrainData)
 75             mini_batches = [
 76                 TrainData[k:k+mini_batch_size]
 77                 for k in range(0, n, mini_batch_size)];
 78             for mini_batch in mini_batches:
 79                 self.BP(mini_batch, self.rate);
 80         
 81         
 82         
 83         
 84     def predict(self, x):
 85         """前向传播"""
 86         i = 0;
 87         for b, w in zip(self.biases, self.weights):
 88             x = self.sigmoids[i](np.dot(w, x)+b);
 89             i = i + 1;
 90         return x
 91     
 92     def BP(self,mini_batch,rate):
 93         """
 94         BP 神经网络算法
 95         """
 96         size = len(mini_batch);
 97 
 98         nabla_b = [np.zeros(b.shape) for b in self.biases]; #存放每次训练b的变化量
 99         nabla_w = [np.zeros(w.shape) for w in self.weights]; #存放每次训练w的变化量
100         #一个一个的进行训练  
101         for x, y in mini_batch:
102             delta_nabla_b, delta_nabla_w = self.backprop(x, y);
103             nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]; #累加每次训练b的变化量
104             nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]; #累加每次训练w的变化量
105         self.weights = [w-(rate/size)*nw
106                         for w, nw in zip(self.weights, nabla_w)];
107         self.biases = [b-(rate/size)*nb
108                        for b, nb in zip(self.biases, nabla_b)];
109             
110     def backprop(self, x, y):
111         """
112         x 是一维 的行向量
113         y 是一维行向量
114         """
115         nabla_b = [np.zeros(b.shape) for b in self.biases];
116         nabla_w = [np.zeros(w.shape) for w in self.weights];
117         # feedforward
118         activation = np.atleast_2d(x).reshape((len(x),1)); #转换为列向量
119         activations = [activation]; # 存放每层a
120         zs = []; # 存放每z值
121         i = 0;
122         for b, w in zip(self.biases, self.weights):
123             z = np.dot(w, activation)+b;
124             zs.append(z);
125             activation = self.sigmoids[i](z);
126             activations.append(activation);
127             i = i + 1;
128         # backward pass
129         y = np.atleast_2d(y).reshape((len(y),1)); #将y转化为列向量
130         #delta cost对z的偏导数
131         delta = self.cost_der(activations[-1], y) * \
132             self.sigmoids_der[-1](zs[-1]);
133         nabla_b[-1] = delta;
134         nabla_w[-1] = np.dot(delta, np.transpose(activations[-2]));
135         #从后往前遍历每一层,从倒数第2层开始
141         for l in range(2, self.size):
142             z = zs[-l]; #当前层的z
143             sp = self.sigmoids_der[-l](z); #对z的偏导数值
144             delta = np.multiply(np.dot(np.transpose(self.weights[-l+1]), delta), sp); #求出当前层的误差
145             nabla_b[-l] = delta;
146             nabla_w[-l] = np.dot(delta, np.transpose(activations[-l-1]));
147         return (nabla_b, nabla_w)
148     
149     """
150     损失函数
151     cost_der 差的平方损失函数对a 的导数
152     cost_cross_entropy_der 交叉熵损失函数对a的导数
153     """
154     def cost_der(self,a,y):
155         return a - y;
156     
157     def cost_cross_entropy_der(self,a,y):
158         return (a-y)/(a * (1-a));
159         
160         

以上是BP神经网络算法源码,下面给出一个数字识别程序,用来测试上述代码的正确性。

 1 import numpy as np
 2 from sklearn.datasets import load_digits
 3 from sklearn.metrics import confusion_matrix, classification_report
 4 from sklearn.preprocessing import LabelBinarizer
 5 from network_mark import  NeuralNetwork
 6 from sklearn.cross_validation import train_test_split
 7 
 8 
 9 
10 digits = load_digits();
11 X = digits.data;
12 y = digits.target;
13 X -= X.min(); # normalize the values to bring them into the range 0-1
14 X /= X.max();
15 
16 nn = NeuralNetwork([64,100,10]);
17 X_train, X_test, y_train, y_test = train_test_split(X, y);
18 labels_train = LabelBinarizer().fit_transform(y_train);
19 labels_test = LabelBinarizer().fit_transform(y_test);
20 
21 
22 # X_train.shape (1347,64)
23 #y_train.shape(1347)
24 #labels_train.shape (1347,10)
25 #labels_test.shape(450,10)
26 
27 print ("start fitting");
28 Data = [(x,y) for x,y in zip(X_train,labels_train)];
29 #print(Data);
30 nn.fit(Data,epochs=500,mini_batch_size=32);
31 result = nn.predict(X_test.T);
32 predictions = [np.argmax(result[:,y]) for y in range(result.shape[1])];
33 
34 print(predictions);
35 #for i in range(result.shape[1]):
36 #    y = result[:,i];
37 #    predictions.append(np.argmax(y));
38 ##print(np.atleast_2d(predictions).shape);
39 print (confusion_matrix(y_test,predictions));
40 print (classification_report(y_test,predictions));
41  

最后是测试结果,效果很客观。

推荐阅读