python - 如何通过从零开始构建的神经网络提高预测的准确性?
问题描述
我对机器学习比较陌生,作为一个入门项目,我决定使用 NumPy 在 Python 中从头开始实现我自己的神经网络。因此,我手动实现了前向传播、反向传播和计算函数导数的方法。
对于我的测试数据,我编写了一个生成 sin(x) 值的函数。当我最终创建和训练我的网络时,我的输出在每次试验中波动很大,并且明显偏离真实值(尽管它们比最初的预测有了不错的改进)。
我尝试调整了很多设置,包括学习率、神经元数量、层数、训练迭代和激活函数,但最终我的输入数据的平方成本约为 0.1。
我认为我的导数函数和链式规则表达式是正确的,因为当我只使用一个输入样本时,我得到了一个近乎完美的答案。
然而,添加更多输入数据会显着降低网络的准确性。
你们对如何改善这个网络有什么建议吗,或者我现在做错了什么?
我的代码:
import numpy as np
#Generate input data for the network
def inputgen():
inputs=[]
outputs=[]
i=0.01
for x in range(10000):
inputs.append([round(i,7)])
outputs.append([np.sin(i)]) #output is sin(x)
i+=0.0001
return [inputs,outputs]
#set training input and output
inputs = np.array(inputgen()[0])
outputs = np.array(inputgen()[1])
#sigmoid activation function and derivative
def sigmoid(x):
return 1/(1+np.exp(-x))
def sigmoid_derivative(x):
return sigmoid(x)*(1-sigmoid(x))
#tanh activation function and derivative
def tanh(x):
return np.tanh(x)
def tanh_derivative(x):
return 1-((tanh(x))**2)
#Layer class
class Layer:
def __init__(self,num_neurons,num_inputs,inputs):
self.num_neurons = num_neurons #number of neurons in hidden layers
self.num_inputs = num_inputs #number of input neurons(1 in the case of testing data)
self.inputs = inputs
self.weights = np.random.rand(num_inputs,num_neurons)*np.sqrt(1/num_inputs) #weights initialized by Xavier function
self.biases = np.zeros((1,num_neurons)) #biases initialized as 0
self.z = np.dot(self.inputs,self.weights)+self.biases #Cacluate z
self.a = tanh(self.z) #Calculate activation
self.dcost_a = [] #derivative of cost with respect to activation
self.da_z = [] #derivative of activation with respect to z
self.dz_w = [] #derivative of z with respect to weight
self.dcost_w = [] #derivative of cost with respect to weight
self.dcost_b = [] #derivative of cost with respect to bias
#functions used in forwardpropagation
def compute_z(self):
self.z = np.dot(self.inputs,self.weights)+self.biases
return self.z
def activation(self):
self.a = tanh(self.compute_z())
def forward(self):
self.activation()
#Network class
class Network:
def __init__(self,num_layers,num_neurons,num_inputs,inputs,num_outputs,outputs):
self.learningrate = 0.01 #learning rate
self.num_layers=num_layers #number of hidden layers
self.num_neurons=num_neurons #number of neurons in hidden layers
self.num_inputs = num_inputs #number of input neurons
self.inputs=inputs
self.expected_outputs=outputs
self.layers=[]
for x in range(num_layers):
if x==0:
self.layers.append(Layer(num_neurons,num_inputs,inputs)) #Initial layer with given inputs
else:
#Other layers have an input which is the activation of previous layer
self.layers.append(Layer(num_neurons,len(self.layers[x-1].a[0]),self.layers[x-1].a))
self.prediction = Layer(num_outputs,num_neurons,self.layers[-1].a) #prediction
self.layers.append(self.prediction)
self.cost = (self.prediction.a-self.expected_outputs)**2 #cost
#forwardpropagation
def forwardprop(self):
for x in range(self.num_layers+1):
if(x!=0):
self.layers[x].inputs=self.layers[x-1].a
self.layers[x].forward()
self.prediction=self.layers[-1] #update prediction value
def backprop(self):
self.cost = (self.prediction.a-self.expected_outputs)**2
for x in range(len(self.layers)-1,-1,-1):
if(x==len(self.layers)-1):
dcost_a = 2*(self.prediction.a-self.expected_outputs) #derivative of cost with respect to activation for output layer
else:
#derivative of cost with respect to activation for hidden layers(chain rule)
dcost_a=np.zeros((len(self.layers[x].inputs),self.num_neurons)).T
dcost_a1=self.layers[x+1].dcost_a.T
da_z1=self.layers[x+1].da_z.T
dz_a=(self.layers[x+1].weights).T
for z in range(len(dcost_a1)):
dcost_a+=((dcost_a1[z])*da_z1)
for j in range(len(dcost_a)):
dcost_a[j]*=dz_a[z][j]
dcost_a=dcost_a.T
self.layers[x].dcost_a=dcost_a
#derivative of activation with respect to z
da_z = tanh_derivative(self.layers[x].z)
self.layers[x].da_z=da_z
#derivative of z with respect to weights
dz_w = []
if x!=0:
dz_w=self.layers[x-1].a
else:
dz_w=self.inputs
self.layers[x].dz_w=dz_w
#change weights and biases
for x in range(len(self.layers)-1,-1,-1):
#Average each of the derivatives over all training samples
self.layers[x].dcost_a=np.average(self.layers[x].dcost_a,axis=0)
self.layers[x].da_z=np.average(self.layers[x].da_z,axis=0)
self.layers[x].dz_w=(np.average(self.layers[x].dz_w,axis=0)).T
self.layers[x].dcost_w = np.zeros((self.layers[x].weights.shape))
self.layers[x].dcost_b = self.layers[x].dcost_a*self.layers[x].da_z
for v in range(len(self.layers[x].dz_w)):
self.layers[x].dcost_w[v] = (self.layers[x].dcost_a*self.layers[x].da_z)*self.layers[x].dz_w[v]
#update weights and biases
self.layers[x].weights-=(self.layers[x].dcost_w)*self.learningrate
self.layers[x].biases-=(self.layers[x].dcost_b)*self.learningrate
#train the network
def train(self):
for x in range(1000):
self.backprop()
self.forwardprop()
Network1 = Network(3,3,1,inputs,1,outputs)
Network1.train()
print(Network1.prediction.a)
样本输入:
[[0.01 ]
[0.0101]
[0.0102]
...
[1.0097]
[1.0098]
[1.0099]]
样本输出:
[[0.37656753]
[0.37658777]
[0.37660802]
...
[0.53088048]
[0.53089046]
[0.53090043]]
预期输出:
[[0.00999983]
[0.01009983]
[0.01019982]
...
[0.84667225]
[0.84672546]
[0.84677865]]
解决方案
我会跟踪 cost_history 并更新你的学习率。
如果你一直 - 越来越接近实际值,将学习率提高 5% - 越来越远,将学习率降低 50%
def update_learning_rate(self):
if(len(self.cost_history) < 2):
return
if(self.cost_history[0] > self.cost_history[1]):
self.learning_rate /= 2
else:
self.learning_rate *= 1.05
这实际上应该会产生更好的结果
通常发生的情况是,您可能会陷入局部最小值 (d) 而不是绝对最小值 (b) 之一。忽略标签,这是我在网上随便找的一张照片。
推荐阅读
- python - 逐行清理文本文件,通过python忽略几行而不使用熊猫
- file-manager - iOS 将文件复制到外部设备
- android - 在 React Native 的特定屏幕中隐藏 TabBar
- telegram - 如何在我的电报机器人中阅读/接收电报频道消息?
- c# - 为什么要注入整个 DbContext 而不是只注入必要的 DbSet?
- javascript - 为什么我可以在javascript中将对象传递给对象,这意味着对象可以作为函数调用?
- java - 每次我在循环中更改坐标时绘图都不会更新
- android - 为什么它会抛出类似“没有从 onCreateView() 返回视图或在 onCreateView() 之前调用它。”之类的错误?
- ajax - 变量存在时不存在
- excel - 无法使用 excel VBA 连接到 Oracle DB