python - Pytorch:deepcopy 后没有训练效果
问题描述
我试图在 pytorch 中制作一个神经网络的副本,然后对复制的网络进行训练,但在复制后训练似乎并没有改变网络中的权重。这篇文章表明这deepcopy
是一种制作神经网络副本的便捷方法,因此我尝试在我的代码中使用它。
下面的代码工作得很好,表明网络的权重和准确性在训练后与训练前不同。但是,当我切换network_cp=deepcopy(network)
和时optimizer_cp=deepcopy(optimizer)
,训练前后的准确性和权重完全相同。
# torch settings
torch.backends.cudnn.enabled = True
device = torch.device("cpu")
# training settings
learning_rate = 0.01
momentum = 0.5
batch_size_train = 64
batch_size_test = 1000
# get MNIST data set
train_loader, test_loader = load_mnist(batch_size_train=batch_size_train,
batch_size_test=batch_size_test)
# make a network
network = Net()
optimizer = optim.SGD(network.parameters(), lr=learning_rate,
momentum=momentum)
network.to(device)
# train network
train(network, optimizer, train_loader, device)
# copy network
network_cp = network
#network_cp = deepcopy(network)
optimizer_cp = optimizer
#optimizer_cp = deepcopy(optimizer)
# get edge weights and accuracy of the copied network
acc1 = float(test(network_cp, optimizer_cp, test_loader, device))
weights1 = np.array(get_edge_weights(network_cp))
# train copied network
train(network_cp, optimizer_cp, train_loader, device)
# get edge weights and accuracy of the copied network after training
acc2 = float(test(network_cp, optimizer_cp, test_loader, device))
weights2 = np.array(get_edge_weights(network_cp))
# compare edge weights and accuracy of copied network before and after training
print('accuracy', acc1, acc2)
print('abs diff of weights for net1 and net2', np.sum(np.abs(weights1-weights2)))
要运行上面的代码,包括这些导入和函数定义:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn as tnn
import torch.nn.functional as tnf
from copy import deepcopy
import numpy as np
def load_mnist(batch_size_train = 64, batch_size_test = 1000):
train_loader = torch.utils.data.DataLoader(
torchvision.datasets.MNIST('temp/', #'/data/users/alice/pytorch_training_files/',
train=True, download=True,
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(
(0.1307,), (0.3081,))
])),
batch_size=batch_size_train, shuffle=True)
test_loader = torch.utils.data.DataLoader(
torchvision.datasets.MNIST('temp/', #'/data/users/alice/pytorch_training_files/',
train=False, download=True,
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(
(0.1307,), (0.3081,))
])),
batch_size=batch_size_test, shuffle=True)
return(train_loader, test_loader)
def train(network, optimizer, train_loader, device, n_epochs=5):
network.train()
for epoch in range(1, n_epochs + 1):
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = network(data)
loss = tnf.nll_loss(output, target)
loss.backward()
optimizer.step()
def test(network, optimizer, test_loader, device):
network.eval()
test_loss, correct = 0, 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = network(data)
test_loss += tnf.nll_loss(output, target, size_average=False).item()
pred = output.data.max(1, keepdim=True)[1]
correct += pred.eq(target.data.view_as(pred)).sum()
test_loss /= len(test_loader.dataset)
print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
return(float(correct)/float(len(test_loader.dataset)))
def get_edge_weights(network):
layers = [module for module in network.modules()][1:]
output = np.zeros(1)
for j, layer in enumerate(layers):
weights = list(layer.parameters())[0]
weights_arr = weights.detach().numpy()
weights_arr = weights_arr.flatten()
output = np.concatenate((output,weights_arr))
return output[1:]
class Net(tnn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 =tnn.Linear(784,264)
self.fc2 = tnn.Linear(264,10)
def forward(self, x):
x = tnf.relu(self.fc1(x.view(-1,784)))
x = tnf.relu(self.fc2(x))
return tnf.log_softmax(x)
解决方案
之后optimizer_cp = deepcopy(optimizer)
,optimizer_cp
仍然想优化旧模型的参数(由 定义optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
)。
深度复制模型后,需要告诉优化器优化这个新模型的参数:
optimizer_cp = optim.SGD(network_cp.parameters(), lr=learning_rate, momentum=momentum)
推荐阅读
- arduino - 如何在arduino中停止无效循环?
- linux - Azure CLI aks install cli 权限被拒绝并且 sudo 不起作用
- c++ - 创建一个向量类,在到达结束时将内存大小加倍?
- java - Java 用 '\r\n'、'\r' 或 '\n' 分割字符串,并与前面的子字符串保持一致
- c# - 如何在不破坏向后兼容性的情况下更改 DataContract 属性的类型?
- java - 连接JDBC与sqlserver 2017的问题
- reactjs - 为什么reducer无法更新商店?
- javascript - 从 S3 获取 JSON 数据并创建 html 列表的问题
- python - 合并的 PDF 文件在哪里?
- tornado - 如何在龙卷风的ioloop中获取待处理任务的数量?