python - Gradient is None 是 Pytorch
问题描述
对于下面的代码,我希望得到梯度get_grads()
,但输出总是None
,这是什么原因?
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np
X = torch.randn(100, 1)*10 #random with 100rows*1col, *10 to make it larger (original is small variance around 0)
y = X + 3*torch.randn(100, 1) #3*torch.randn(100, 1), it's the noise
plt.plot(X.numpy(), y.numpy(), 'o')
plt.ylabel('y')
plt.xlabel('x')
class LR(nn.Module):
def __init__(self, input_size, output_size):
super().__init__()
self.linear = nn.Linear(input_size, output_size)
def forward(self, x):
pred = self.linear(x)
return pred
torch.manual_seed(1)
model = LR(1, 1)
[w, b] = model.parameters()
def get_params():
return (w[0][0].item(), b[0].item())
def get_grads():
return (w[0][0].grad, b[0].grad)
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01)
epochs = 30
losses = []
for i in range(epochs):
y_pred = model.forward(X)
loss = criterion(y_pred, y)
print("epoch:", i, "loss:", loss.item())
losses.append(loss)
optimizer.zero_grad() #clear gradient for parameter]
print("model=" + str(get_params())+str(get_grads()))
loss.backward() #calculate gradient for parameters
print("model2=" + str(get_params())+str(get_grads()))
optimizer.step() #update the parameters with gradient
print("model3=" + str(get_params())+str(get_grads()))
print()
输出
epoch: 0 loss: 31.3035831451416
model=(0.5152631998062134, -0.44137823581695557)(None, None)
model2=(0.5152631998062134, -0.44137823581695557)(None, None)
model3=(1.447475790977478, -0.4449453055858612)(None, None)
epoch: 1 loss: 31.142377853393555
model=(1.447475790977478, -0.4449453055858612)(None, None)
model2=(1.447475790977478, -0.4449453055858612)(None, None)
model3=(0.5188075304031372, -0.41897052526474)(None, None)
epoch: 2 loss: 30.982675552368164
model=(0.5188075304031372, -0.41897052526474)(None, None)
model2=(0.5188075304031372, -0.41897052526474)(None, None)
model3=(1.4446537494659424, -0.42287370562553406)(None, None)
epoch: 3 loss: 30.824453353881836
model=(1.4446537494659424, -0.42287370562553406)(None, None)
model2=(1.4446537494659424, -0.42287370562553406)(None, None)
model3=(0.5223162770271301, -0.39742958545684814)(None, None)
epoch: 4 loss: 30.667678833007812
model=(0.5223162770271301, -0.39742958545684814)(None, None)
model2=(0.5223162770271301, -0.39742958545684814)(None, None)
model3=(1.4418396949768066, -0.40165263414382935)(None, None)
epoch: 5 loss: 30.51233673095703
model=(1.4418396949768066, -0.40165263414382935)(None, None)
model2=(1.4418396949768066, -0.40165263414382935)(None, None)
model3=(0.5257899761199951, -0.37672188878059387)(None, None)
epoch: 6 loss: 30.358415603637695
model=(0.5257899761199951, -0.37672188878059387)(None, None)
model2=(0.5257899761199951, -0.37672188878059387)(None, None)
model3=(1.4390342235565186, -0.3812492787837982)(None, None)
epoch: 7 loss: 30.205856323242188
model=(1.4390342235565186, -0.3812492787837982)(None, None)
model2=(1.4390342235565186, -0.3812492787837982)(None, None)
model3=(0.5292295813560486, -0.35681530833244324)(None, None)
epoch: 8 loss: 30.054668426513672
model=(0.5292295813560486, -0.35681530833244324)(None, None)
model2=(0.5292295813560486, -0.35681530833244324)(None, None)
model3=(1.4362375736236572, -0.3616321086883545)(None, None)
epoch: 9 loss: 29.904821395874023
model=(1.4362375736236572, -0.3616321086883545)(None, None)
model2=(1.4362375736236572, -0.3616321086883545)(None, None)
model3=(0.532635509967804, -0.3376788794994354)(None, None)
epoch: 10 loss: 29.756284713745117
model=(0.532635509967804, -0.3376788794994354)(None, None)
model2=(0.532635509967804, -0.3376788794994354)(None, None)
model3=(1.4334499835968018, -0.3427707254886627)(None, None)
epoch: 11 loss: 29.609052658081055
model=(1.4334499835968018, -0.3427707254886627)(None, None)
model2=(1.4334499835968018, -0.3427707254886627)(None, None)
model3=(0.5360085964202881, -0.3192828297615051)(None, None)
...
解决方案
您需要直接获取渐变w.grad
和b.grad
,而不是w[0][0].grad
如下:
def get_grads():
return (w.grad, b.grad)
或者您也可以直接在训练循环中使用参数的名称来打印其梯度:
print(model.linear.weight.grad)
print(model.linear.bias.grad)
推荐阅读
- python - 来自 guild.channels 的列表为空
- php - 在 symfony 中显示问题的实体
- python - gunicorn3 无法识别新的 mysql 条目
- c# - 如何制作一个在 3 种背景颜色之间切换的按钮?
- sql-server - SQL Server 函数从小时和分钟计算分钟
- gradle - 未找到匹配的 XXX 变体。消费者被配置为查找值为“平台”和“版本目录”的属性
- java - 读取日志压缩主题的所有记录
- flutter - Flutter 以最小宽度对齐列中的特定小部件
- c# - ASP.NET Core RESTful,相同的路由但不同的操作
- p5.js - 自定义功能未实现