首页 > 解决方案 > Pytorch 错误:优化器在 Linux 服务器上得到了一个空参数列表

问题描述

我正在运行 FasterRCNN 代码,并且正在试验各种主干(resnet18...101)。我用 init 函数创建了 FasterRCNN 类。在我的 linux 服务器上进行培训时,我遇到了“空参数”错误(可以在附图 1 中看到) 服务器上的空参数错误

虽然相同的代码在我的本地机器上运行而没有任何错误。我正在使用 Anaconda 发行版 4.0.1 和 Python 3.7 和 pytorch 1.2 下面你可以参考我的代码(模型和训练) -

class Faster_RCNN(nn.Module):
    def __init__(self, num_classes=2, backbone = 'mobilenet_v2', test= False):
        super(Faster_RCNN, self).__init__()
        self.classes = num_classes # replace the classifier with a new one, that has num_classes 2 for our use.
        # 1 class (bar) + background 
        self.test = test
        self.backbone = backbone
        print('....Initializing Model....\n')
        print(f'....This model uses {self.backbone} as backbone....\n')
        if self.backbone == 'mobilenet_v2':        
            backbone = torchvision.models.mobilenet_v2(pretrained=True).features
            backbone.out_channels = 1280
            anchor_generator = AnchorGenerator(sizes=((32, 64, 128),),
                                       aspect_ratios=((1.0),))
            roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                    output_size=7,
                                                    sampling_ratio=2)
            self.model = FasterRCNN(backbone,
                       num_classes=2,
                       rpn_anchor_generator=anchor_generator,
                       box_roi_pool=roi_pooler)


        if self.backbone == 'resnet18':
            bb = torchvision.models.resnet18(pretrained=True)
            backbone = nn.Sequential(*list(bb.children())[:-2])
            backbone.out_channels = 512
            anchor_generator = AnchorGenerator(sizes=((32, 64, 128),),
                                       aspect_ratios=((1.0),))
            roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                    output_size=7,
                                                    sampling_ratio=2)
            self.model = FasterRCNN(backbone,
                       num_classes=2,
                       rpn_anchor_generator=anchor_generator,
                       box_roi_pool=roi_pooler)

        if self.backbone == 'resnet34':
            bb = torchvision.models.resnet34(pretrained=True)
            backbone = nn.Sequential(*list(bb.children())[:-2])
            backbone.out_channels = 512
            anchor_generator = AnchorGenerator(sizes=((32, 64, 128),),
                                       aspect_ratios=((1.0),))
            roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                    output_size=7,
                                                    sampling_ratio=2)
            self.model = FasterRCNN(backbone,
                       num_classes=2,
                       rpn_anchor_generator=anchor_generator,
                       box_roi_pool=roi_pooler)
        if self.backbone == 'resnet50':
            bb = torchvision.models.resnet50(pretrained=True)
            backbone = nn.Sequential(*list(bb.children())[:-2])
            backbone.out_channels = 2048
            anchor_generator = AnchorGenerator(sizes=((32, 64, 128),),
                                       aspect_ratios=((1.0),))
            roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                    output_size=7,
                                                    sampling_ratio=2)
            self.model = FasterRCNN(backbone,
                       num_classes=2,
                       rpn_anchor_generator=anchor_generator,
                       box_roi_pool=roi_pooler)
        if self.backbone == 'resnet101':
            bb = torchvision.models.resnet50(pretrained=True)
            backbone = nn.Sequential(*list(bb.children())[:-2])
            backbone.out_channels = 2048
            anchor_generator = AnchorGenerator(sizes=((32, 64, 128),),
                                       aspect_ratios=((1.0),))
            roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                    output_size=7,
                                                    sampling_ratio=2)
            self.model = FasterRCNN(backbone,
                       num_classes=2,
                       rpn_anchor_generator=anchor_generator,
                       box_roi_pool=roi_pooler)

    def forward(self, x,y):
        if self.test:
            pred = self.model(x)
        else:
            pred = self.model(x,y)
        return pred

这是火车代码:

import dataloader
import model as md
import numpy as np
#import matplotlib.pyplot as plt
import torch
from torch.optim import Adam
from torch.utils.tensorboard import SummaryWriter
import argparse
writer = SummaryWriter()

bkbone = ['resnet18','resenet34','resenet50','resnet101']

for bbone in bkbone:

    ap = argparse.ArgumentParser()
    ap.add_argument("-b", "--backbone", required = False, default = bbone, help = "resenet18, resnet34, resnet50, resenet101 and mobilenet_v2 can be given")
    args = vars(ap.parse_args())
    epochs = 1000
    batch_size = 10
    lr = 0.005
    path = 'D:\\beantech_Data\\objtect_detection'
    # path = '/media/TBDataStudent/pankaj/beantech/object_detection'
    data= dataloader.Bar(root=path, batch_size=batch_size)

    model = md.Faster_RCNN(backbone=args['backbone']).cuda()
    model.train()

    optimizer = Adam(model.parameters(), lr = lr, weight_decay=0.0005)

    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=3,gamma=0.1)

    ### Train ###
    for epoch in range(epochs):
        loss_classifier=[]
        loss_box_reg =[]
        loss_objectness = []
        loss_rpn_box_reg = []
        Tloss = []

        for _, (img, label) in enumerate(data.train_loader):
            images = [image.cuda() for image in img ]
            for i in range(len(label)):
                label[i]['boxes']=label[i]['boxes'].cuda()
                label[i]['labels']=label[i]['labels'].cuda()
            optimizer.zero_grad()
            output = model(images, label)

            l1, l2, l3, l4 = output
            loss_classifier.append(output[l1].item())
            loss_box_reg.append(output[l2].item())
            loss_objectness.append(output[l3].item())
            loss_rpn_box_reg.append(output[l4].item())

            loss = sum(list(output.values()))
            loss.backward()
            optimizer.step()
            Tloss.append(loss.item())    

        writer.add_scalar(l1, np.mean(loss_classifier), epoch)
        writer.add_scalar(l2, np.mean(loss_box_reg), epoch)
        writer.add_scalar(l3, np.mean(loss_objectness), epoch)
        writer.add_scalar(l4, np.mean(loss_rpn_box_reg), epoch)
        writer.add_scalar('Total Loss', np.mean(Tloss), epoch)

        print(f'\n\n --{l1}: {np.mean(loss_classifier)}\n --{l2}: {np.mean(loss_box_reg)}\n --{l3}: {np.mean(loss_objectness)}\n --{l4}: {np.mean(loss_rpn_box_reg)}')
        print(f'Total loss of epoch {epoch}is: {np.mean(Tloss)}')
        writer.close()

    torch.save(model.state_dict(), f'FasterRCNN_{args["backbone"]}'+'.pt')

标签: pythonpytorch

解决方案


您需要更新服务器上的代码,因为根据错误,您肯定没有运行您向我们展示的训练代码。在您的代码中,您传递model.parameters()给优化器:

optimizer = Adam(model.parameters(), lr = lr, weight_decay=0.0005)

但是在错误消息中(您将其作为图像发布,强烈建议不要这样做),它说第 36 行:

optimizer = Adam(param, lr = lr, weight_decay=0.0005)

是什么param


推荐阅读