首页 > 解决方案 > Pytorch:deepcopy 后没有训练效果

问题描述

我试图在 pytorch 中制作一个神经网络的副本,然后对复制的网络进行训练,但在复制后训练似乎并没有改变网络中的权重。这篇文章表明这deepcopy​​是一种制作神经网络副本的便捷方法,因此我尝试在我的代码中使用它。

下面的代码工作得很好,表明网络的权重和准确性在训练后与训练前不同。但是,当我切换network_cp=deepcopy(network)和时optimizer_cp=deepcopy(optimizer),训练前后的准确性和权重完全相同。

# torch settings
torch.backends.cudnn.enabled = True 
device = torch.device("cpu")

# training settings
learning_rate = 0.01
momentum = 0.5
batch_size_train = 64
batch_size_test = 1000

# get MNIST data set
train_loader, test_loader = load_mnist(batch_size_train=batch_size_train,
    batch_size_test=batch_size_test)

# make a network
network = Net()
optimizer = optim.SGD(network.parameters(), lr=learning_rate,
    momentum=momentum)
network.to(device)

# train network
train(network, optimizer, train_loader, device)

# copy network
network_cp = network
#network_cp = deepcopy(network)
optimizer_cp = optimizer
#optimizer_cp = deepcopy(optimizer)

# get edge weights and accuracy of the copied network
acc1 = float(test(network_cp, optimizer_cp, test_loader, device))
weights1 = np.array(get_edge_weights(network_cp))

# train copied network
train(network_cp, optimizer_cp, train_loader, device)

# get edge weights and accuracy of the copied network after training
acc2 = float(test(network_cp, optimizer_cp, test_loader, device))
weights2 = np.array(get_edge_weights(network_cp))

# compare edge weights and accuracy of copied network before and after training
print('accuracy', acc1, acc2)
print('abs diff of weights for net1 and net2', np.sum(np.abs(weights1-weights2)))

要运行上面的代码,包括这些导入和函数定义:

import torch
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn as tnn
import torch.nn.functional as tnf
from copy import deepcopy
import numpy as np

def load_mnist(batch_size_train = 64, batch_size_test = 1000):
    train_loader = torch.utils.data.DataLoader(
        torchvision.datasets.MNIST('temp/', #'/data/users/alice/pytorch_training_files/',
                                   train=True, download=True,
                                   transform=torchvision.transforms.Compose([
                                       torchvision.transforms.ToTensor(),
                                       torchvision.transforms.Normalize(
                                           (0.1307,), (0.3081,))
                                       ])),
        batch_size=batch_size_train, shuffle=True)

    test_loader = torch.utils.data.DataLoader(
        torchvision.datasets.MNIST('temp/', #'/data/users/alice/pytorch_training_files/',
                                   train=False, download=True,
                                   transform=torchvision.transforms.Compose([
                                       torchvision.transforms.ToTensor(),
                                       torchvision.transforms.Normalize(
                                           (0.1307,), (0.3081,))
                                     ])),
        batch_size=batch_size_test, shuffle=True)

    return(train_loader, test_loader)

def train(network, optimizer, train_loader, device, n_epochs=5):
    network.train()
    for epoch in range(1, n_epochs + 1):
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = network(data)
            loss = tnf.nll_loss(output, target)
            loss.backward()
            optimizer.step()

def test(network, optimizer, test_loader, device):
    network.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = network(data)
            test_loss += tnf.nll_loss(output, target, size_average=False).item()
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).sum()
    test_loss /= len(test_loader.dataset)
    print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

    return(float(correct)/float(len(test_loader.dataset)))

def get_edge_weights(network):
    layers = [module for module in network.modules()][1:]
    output = np.zeros(1)
    for j, layer in enumerate(layers):
        weights = list(layer.parameters())[0]
        weights_arr = weights.detach().numpy()
        weights_arr = weights_arr.flatten()
        output = np.concatenate((output,weights_arr))
    return output[1:]

class Net(tnn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 =tnn.Linear(784,264)
        self.fc2 = tnn.Linear(264,10)

    def forward(self, x):
        x = tnf.relu(self.fc1(x.view(-1,784)))
        x = tnf.relu(self.fc2(x))
        return tnf.log_softmax(x)

标签: pythontensorflowpytorch

解决方案


之后optimizer_cp = deepcopy(optimizer)optimizer_cp仍然想优化旧模型的参数(由 定义optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum))。

深度复制模型后,需要告诉优化器优化这个新模型的参数:

optimizer_cp = optim.SGD(network_cp.parameters(), lr=learning_rate, momentum=momentum)


推荐阅读