python - Pytorch“upsample_bilinear2d_out_frame”未为“字节”实现
问题描述
我已经使用此链接中描述的步骤训练了一个自定义对象检测模型。我能够训练我的模型,但是当我尝试在一个时期结束时对其进行评估时,我收到以下错误
Epoch: [0] Total time: 0:00:06 (0.2223 s / it)
creating index...
index created!
Traceback (most recent call last):
File "train.py", line 106, in <module>
evaluate(model, data_loader_test, device=device)
File "/home/sarvani/anaconda3/envs/flir_env/lib/python3.7/site-packages/torch/autograd/grad_mode.py", line 49, in decorate_no_grad
return func(*args, **kwargs)
File "/home/sarvani/Desktop/flir/test_frcnn/custom/engine.py", line 107, in evaluate
outputs = model(image)
File "/home/sarvani/anaconda3/envs/flir_env/lib/python3.7/site-packages/torch/nn/modules/module.py", line 547, in __call__
result = self.forward(*input, **kwargs)
File "/home/sarvani/anaconda3/envs/flir_env/lib/python3.7/site-packages/torchvision/models/detection/generalized_rcnn.py", line 47, in forward
images, targets = self.transform(images, targets)
File "/home/sarvani/anaconda3/envs/flir_env/lib/python3.7/site-packages/torch/nn/modules/module.py", line 547, in __call__
result = self.forward(*input, **kwargs)
File "/home/sarvani/anaconda3/envs/flir_env/lib/python3.7/site-packages/torchvision/models/detection/transform.py", line 41, in forward
image, target = self.resize(image, target)
File "/home/sarvani/anaconda3/envs/flir_env/lib/python3.7/site-packages/torchvision/models/detection/transform.py", line 70, in resize
image[None], scale_factor=scale_factor, mode='bilinear', align_corners=False)[0]
File "/home/sarvani/anaconda3/envs/flir_env/lib/python3.7/site-packages/torch/nn/functional.py", line 2503, in interpolate
return torch._C._nn.upsample_bilinear2d(input, _output_size(2), align_corners)
RuntimeError: "upsample_bilinear2d_out_frame" not implemented for 'Byte'
我加载数据的代码如下
class CustomDataset(torch.utils.data.Dataset):
def __init__(self, root_dir,transform=None):
self.root = root_dir
self.rgb_imgs = list(sorted(os.listdir(os.path.join(root_dir, "rgb/"))))
self.annotations = list(sorted(os.listdir(os.path.join(root_dir, "annotations/"))))
self._classes = ('__background__', # always index 0
'car','person','bicycle','dog','other')
self._class_to_ind = {'car':'1', 'person':'2', 'bicycle':'3', 'dog':'4','other':'5'}
def __len__(self):
return len(self.rgb_imgs)
def __getitem__(self, idx):
self.num_classes = 6
img_rgb_path = os.path.join(self.root, "rgb/", self.rgb_imgs[idx])
img = Image.open(img_rgb_path)
img = np.array(img)
img = img.transpose((2, 0, 1))
img = torch.from_numpy(img)
filename = os.path.join(self.root,'annotations',self.annotations[idx])
tree = ET.parse(filename)
objs = tree.findall('object')
num_objs = len(objs)
labels = np.zeros((num_objs), dtype=np.float32)
seg_areas = np.zeros((num_objs), dtype=np.float32)
boxes = []
for ix, obj in enumerate(objs):
bbox = obj.find('bndbox')
x1 = float(bbox.find('xmin').text)
y1 = float(bbox.find('ymin').text)
x2 = float(bbox.find('xmax').text)
y2 = float(bbox.find('ymax').text)
cls = self._class_to_ind[obj.find('name').text.lower().strip()]
boxes.append([x1, y1, x2, y2])
labels[ix] = cls
boxes = torch.as_tensor(boxes, dtype=torch.float32)
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
image_id = torch.tensor([idx])
iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
labels = torch.as_tensor(labels, dtype=torch.float32)
target = {'boxes': boxes,
'labels': labels,
'area': area,
"image_id":image_id
}
target["iscrowd"] = iscrowd
return img,target
我的 train.py 如下
num_classes = 6
model = fasterrcnn_resnet50_fpn(pretrained=True)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
device = torch.device('cuda')
model = model.cuda()
dataset_train = CustomDataset('FLIR/images/train')
dataset_val = CustomDataset('FLIR/images/val')
data_loader_train = torch.utils.data.DataLoader(
dataset_train, batch_size=4, shuffle=True,collate_fn=utils.collate_fn)
data_loader_test = torch.utils.data.DataLoader(
dataset_val, batch_size=4 shuffle=False,collate_fn=utils.collate_fn)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(params,lr=0.05,weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
step_size=3,
gamma=0.1)
num_epochs = 30
for epoch in range(num_epochs):
train_one_epoch(model, optimizer, data_loader_train, device, epoch, print_freq=1)
lr_scheduler.step()
evaluate(model, data_loader_test, device=device)
带有所需文件的评估函数位于此链接。
有人可以帮帮我吗。
解决方案
我也遇到了同样的错误,似乎需要在输入模型之前对数据集进行规范化。我使用albumentations进行转换和规范化。
以下是代码片段:
def get_transform(train):
if train:
train_transform = A.Compose(
[
A.MedianBlur(blur_limit=7, p=0.5),
A.RandomGamma(gamma_limit=(90, 110), p=0.5),
A.RandomBrightnessContrast(p=0.5),
A.InvertImg(p=0.3),
A.HueSaturationValue(p=0.2),
A.GaussNoise(p=0.5),
A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
ToTensorV2(),
])
return train_transform
else:
val_transform = A.Compose(
[
A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
ToTensorV2(),
])
return val_transform
剩下的你可以关注 pytorch 教程和albumentations 文档albumentation链接
推荐阅读
- thrift - 在 Thrift IDL 中重命名字段是否安全?
- spring-boot - Spring Boot Zuul:如何在没有应用前缀的发现服务中注册多个 MS
- javascript - 具有 Razor 语法的 Javascript 命名空间?
- python - 在特定字符之间插入空格,但如果后面跟着特定字符 regex
- azure - 如何授予用户在 AzureDevOps 中创建 sprint 和 backlog 的权限?
- c - 从结构输入字符串后出现分段错误
- dialogflow-es - 有没有办法创建一个消除对话流中所有上下文的意图?
- javascript - 从单独的输入中查找和匹配属性
- python - 如何将超链接添加到 tkinter 按钮
- ruby-on-rails - Rails:处理不同版本的宝石