首页 > 解决方案 > 为什么我的 CNN 回归器不起作用(Pytorch)

问题描述

我正在尝试将我的 tensorflow 代码转换为 pytorch。

简单地说,它使用 CNN 从图像中估计 7 个值(数字)。(回归器)

主干网络是具有预训练权重的 vgg16,我想将最后一个 fcl(实际上由于 ImageNet 数据集,最后一个 fcl 输出为 1000 个类)转换为(4096 x 4096),并添加更多 fcl。

前 :

vgg 最后整箱 (4096 x 1000)

后:

vgg 最后整箱(更改为 4096 x 4096)

----添加 fcl1 (4096 x 4096)

----添加 fcl2 (4096 x 2048)

└ 添加 fclx (2048 x 3)

└ 添加 fclq (2048 x 4)

: fcl2 连接到两个不同的张量,大小分别为 3 和 4

在这里,我尝试仅使用一张图像(仅用于调试)和带有 L2 损失的 GT 值(7 个值)来实现。如果我使用 Tensorflow 执行此操作,则损失会大大减少,并且当我推断图像时,它给出的值与 GT 几乎相似。

但是,如果我尝试使用 Pytorch 来做,看起来训练效果不佳。

我猜损失应该在训练时急剧减少(几乎每次迭代)

有什么问题?

from torchvision import models
import torch.nn as nn
import torch
from torch.autograd import Variable
import torch.optim as optim
import os
import os.path
import torch.utils.data as data
from torchvision import transforms as T
from PIL import Image

class DataSource(data.Dataset):
    def __init__(self, root, train=True, transforms=None, txtName='dataset_train'):
        self.root = os.path.expanduser(root)
        self.transforms = transforms
        self.train = train
        self.imageFormat = '.jpg'
        self.image_poses = []
        self.image_paths = []
        self.txtName = txtName
        self._get_data()

        if transforms is None:
            normalize = T.Normalize(mean=[0.485, 0.456, 0.406],
                                    std=[0.229, 0.224, 0.225])
            if not train:
                self.transforms = T.Compose(
                    [T.Resize(256),
                     T.CenterCrop(224),
                     T.ToTensor(),
                     normalize]
                )
            else:
                self.transforms = T.Compose(
                    [T.Resize(256),
                     T.CenterCrop(224),
                     # T.RandomCrop(224),
                     T.ToTensor(),
                     normalize]
                )

    def _get_data(self):
        txt_file = self.root + '/' + self.txtName + '.txt'
        count = 0
        with open(txt_file, 'r') as f:
            for line in f:
                if len(line.split()) != 8:
                    next(f)
                fname, p0, p1, p2, p3, p4, p5, p6 = line.split()
                p0 = float(p0); p1 = float(p1); p2 = float(p2);
                p3 = float(p3); p4 = float(p4); p5 = float(p5); p6 = float(p6)
                ImageFullName = self.root + '/' +  fname
                if count == 0:
                    if os.path.isfile(ImageFullName) == False:
                        self.imageFormat = '.png'

                if self.imageFormat != '.jpg':
                    ImageFullName = ImageFullName.replace('.jpg', self.imageFormat)

                self.image_poses.append([p0, p1, p2, p3, p4, p5, p6])
                self.image_paths.append(ImageFullName)
                count += 1
        print('Total : ', len(self.image_paths), ' images')

    def __getitem__(self, index):
        img_path = self.image_paths[index]
        img_pose = self.image_poses[index]
        data = Image.open(img_path)
        data = self.transforms(data)
        return data, torch.tensor(img_pose)

    def __len__(self):
        return len(self.image_paths)

class PoseLoss(nn.Module):
    def __init__(self, beta, device = 'cuda'):
        super(PoseLoss, self).__init__()
        self.beta = beta
        self.device = device
        self.t_loss_fn = nn.MSELoss()

    def forward(self, x, q, poseGT):
        GT_x = poseGT[:, 0:3]
        GT_q = poseGT[:, 3:]

        xx = Variable(x, requires_grad=True).to(self.device)
        qq = Variable(q, requires_grad=True).to(self.device)
        print('GT', GT_x, GT_q)
        print('Estim', xx, qq)

        loss = torch.sqrt(self.t_loss_fn(GT_x[:, :3].cpu(), xx[:, :3].cpu())) + self.beta*torch.sqrt(self.t_loss_fn(GT_q[:, 3:].cpu(), qq[:, 3:].cpu()))
        return loss

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.backbone =  models.vgg16(pretrained=True)
        self.backbone._modules['classifier'][6] = nn.ReLU(nn.Linear(4096, 4096))
        self.fcl = nn.Sequential(nn.Linear(4096, 4096), nn.ReLU(), nn.Linear(4096, 2048), nn.ReLU())
        self.xyz = nn.Linear(2048, 3)
        self.q = nn.Linear(2048, 4)

    def forward(self, x):
        x1 = self.backbone(x)
        x2 = self.fcl(x1)
        xyz = self.xyz(x2)
        q = self.q(x2)
        return xyz, q

batch_size = 1
learning_rate = 10e-5
training_epochs = 100


if __name__ == "__main__":
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    data = DataSource(DatasetDirectory + DatasetFolder, train=True, transforms=None, txtName=TrainDatasetList)
    data_loader = torch.utils.data.DataLoader(dataset=data, batch_size=batch_size, shuffle=False, num_workers=4)

    model = Net().to(device)

    model.train()

    criterion = PoseLoss(beta = 100, device = device)

    optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas = (0.9, 0.999), eps =0.00000001)

    iteration = 0
    minloss = 10e8
    minlossindex = -1
    for epoch in range(1, training_epochs):
        dataiter = iter(data_loader)
        for Images, Poses in dataiter:
            optimizer.zero_grad()
            Images = Images.to(device).float()
            x, q = model(Images)
            loss = criterion(x, q, Poses)
            loss.backward()
            loss = loss.item()/ batch_size
            optimizer.step()
            print(epoch, ' : ', iteration , ' -> ' , loss, ' minloss ', minloss, ' at ', minlossindex)
            if loss < minloss:
                minloss = loss
                minlossindex = iteration
                if epoch < (int)(training_epochs*0.8):
                    torch.save(model.state_dict(), 'Min.pth')
            iteration = iteration + 1
    torch.save(model.state_dict(), 'Fin.pth')

对于所有 7 个值,估计结果往往为零,我想不出它为什么给出这样的值。

在此处输入图像描述

另外,正如我上面提到的,损失值在训练时并没有显着降低(我预计每次迭代都应该显着降低,直到收敛,因为我只使用了一张图像进行训练)

在此处输入图像描述

标签: pythontensorflowdeep-learningpytorch

解决方案


在我的测试下.cpu()不影响血压


我注意到您.cpu()在最终损失中添加了 a,PyTorch 无法将梯度从 CPU 传递到 GPU(我想创建了一个新的计算图)。只需删除.cpu()inPoseLoss并保留 GPU 上的所有张量。由于 PyTorch 支持自动标记计算图的叶节点,因此变量 API 也不再需要。


推荐阅读