python - NN实现-反向传播后更新尺寸改变
问题描述
作为作业的一部分,我们需要实现 NN。
我正在计算前向结果,然后我运行反向传播,然后我更新了权重(都针对同一个实例)。
当我尝试计算同一实例的前向值时,我收到尺寸错误的错误消息。
class MyNN:
def __init__(self, learning_rate, layer_sizes):
self.learning_rate = learning_rate
self.layer_sizes = layer_sizes
self.model_params = {}
self.memory = {}
self.grads = {}
# Initializing weights
for layer_index in range(len(layer_sizes) - 1):
W_input = layer_sizes[layer_index + 1]
W_output = layer_sizes[layer_index]
self.model_params['W_' + str(layer_index + 1)] = np.random.randn(W_input, W_output) * 0.1
self.model_params['b_' + str(layer_index + 1)] = np.random.randn(W_input) * 0.1
def forward_single_instance(self, x):
a_i_1 = x
self.memory['a_0'] = x
for layer_index in range(len(self.layer_sizes) - 1):
W_i = self.model_params['W_' + str(layer_index + 1)]
b_i = self.model_params['b_' + str(layer_index + 1)]
z_i = np.dot(W_i, a_i_1) + b_i
a_i = 1/(1+np.exp(-z_i))
self.memory['a_' + str(layer_index + 1)] = a_i
a_i_1 = a_i
return a_i_1
def log_loss(self, y_hat, y):
'''
Logistic loss, assuming a single value in y_hat and y.
'''
m = y_hat[0]
cost = -y[0]*np.log(y_hat[0]) - (1 - y[0])*np.log(1 - y_hat[0])
return cost
def backward_single_instance(self, y):
a_output = self.memory['a_' + str(len(self.layer_sizes) - 1)]
dz = a_output - y
for layer_index in range(len(self.layer_sizes) - 1, 0, -1):
a_l_1 = self.memory['a_' + str(layer_index - 1)]
dW = np.dot(dz.reshape(-1, 1), a_l_1.reshape(1, -1))
db = dz.transpose()
self.grads['dW_' + str(layer_index)] = dW
self.grads['db_' + str(layer_index)] = db
W_l = self.model_params['W_' + str(layer_index)]
dz = (a_l_1 * (1 - a_l_1)).reshape(-1, 1) * np.dot(W_l.T, dz.reshape(-1, 1))
def update(self):
for layer_index in range(len(self.layer_sizes) - 1):
Wi = 'W_' + str(layer_index + 1)
bi = 'b_' + str(layer_index + 1)
dWi = 'dW_' + str(layer_index + 1)
dbi = 'db_' + str(layer_index + 1)
W_i = self.model_params[Wi]
b_i = self.model_params[bi]
dW_i = self.grads[dWi]
db_i = self.grads[dbi]
self.model_params[Wi] = W_i - self.learning_rate * dW_i
self.model_params[bi] = b_i - self.learning_rate * db_i
然后为了测试我写了这段代码:
nn = MyNN(0.01, [3, 2, 1])
x = np.random.randn(3)
y = np.random.randn(1)
y_hat = nn.forward_single_instance(x)
print(y_hat)
nn.backward_single_instance(y)
nn.update()
y_hat = nn.forward_single_instance(x)
这是打印的错误:
x
[ 0.57072262 1.8578982 -1.48560691]
x
[[0.53932246 0.57051188]]
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-99-d8d9152fef18> in <module>()
----> 1 y_hat = nn.forward_single_instance(x)
2 print(y_hat)
3
4
5 l = nn.log_loss(y_hat, y)
<ipython-input-89-f354993c95f9> in forward_single_instance(self, x)
24 W_i = self.model_params['W_' + str(layer_index + 1)]
25 b_i = self.model_params['b_' + str(layer_index + 1)]
---> 26 z_i = np.dot(W_i, a_i_1) + b_i
27 a_i = 1/(1+np.exp(-z_i))
28 self.memory['a_' + str(layer_index + 1)] = a_i
ValueError: shapes (1,2) and (1,2) not aligned: 2 (dim 1) != 1 (dim 0)
问题出在 b_i 维度上,我不知道为什么。我尝试了 b_i 形状(行向量,列向量)的变体,它们都抛出了相同的异常。
解决方案
推荐阅读
- r - 在 Seperate_rows 函数中将负值转换为正值
- c# - 使用 C# WPF 添加数据
- android - Android 图像视图未更新
- flutter - 如何避免动画列表Flutter内的圆形头像卡顿?
- php - 在 2 个不同的 div ID 中有 2 个同名的 HTML 下拉列表,那么如何使用 AJAX/jQuery 提交特定 div ID 的下拉值
- angular - PrimeNG p-multiSelect 未显示所选值
- git - 如何在 Visual Studio 2019 中创建 Bitbucket 拉取请求
- javascript - javascripting 哪个网站接受发布 javascript
- php - Mysql选择具有相同值列的行
- python - 我怎样才能把它放在一个循环中,在开始时尝试使用一个while循环,但我会得到错误的输出