arrays - Pytorch U-net 分割模型的“ValueError:轴与数组错误不匹配”可能是什么原因?
问题描述
我正在尝试为名为“Carvana Image Masking Challange”的 kaggle 数据集实现分割模型(我之前成功地用于另一个数据集)。
我搜索了很多,但仍然无法弄清楚我收到此错误的原因是什么。有一些建议可以检查图像尺寸,可能是灰度格式,但似乎我有 3 个通道用于原始图像和蒙版图像。我感谢您的所有支持
我的代码如下:
图书馆
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from PIL import Image
import numpy as np
import cv2
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from torch.utils.data import Dataset as BaseDataset
import albumentations as albu
import torch
import numpy as np
import segmentation_models_pytorch as smp
数据路径
DATA_DIR = 'D:/Users/eugur/Belgeler/Jupyter/Segmentation_Kaggle'
x_train_dir = os.path.join(DATA_DIR, 'train')
y_train_dir = os.path.join(DATA_DIR, 'train_masks')
x_valid_dir = os.path.join(DATA_DIR, 'valid')
y_valid_dir = os.path.join(DATA_DIR, 'valid_masks')
x_test_dir = os.path.join(DATA_DIR, 'test')
数据可视化的辅助函数
def visualize(**images):
"""PLot images in one row."""
n = len(images)
plt.figure(figsize=(16, 5))
for i, (name, image) in enumerate(images.items()):
plt.subplot(1, n, i + 1)
plt.xticks([])
plt.yticks([])
plt.title(' '.join(name.split('_')).title())
plt.imshow(image)
plt.show()
数据集类
class Dataset(BaseDataset):
"""
Args:
images_dir (str): path to images folder
masks_dir (str): path to segmentation masks folder
class_values (list): values of classes to extract from segmentation mask
augmentation (albumentations.Compose): data transfromation pipeline
(e.g. flip, scale, etc.)
preprocessing (albumentations.Compose): data preprocessing
(e.g. noralization, shape manipulation, etc.)
"""
CLASSES = ['car']
def __init__(
self,
images_dir,
masks_dir,
classes=None,
augmentation=None,
preprocessing=None,
):
self.ids = os.listdir(images_dir)
self.images_fps = [os.path.join(images_dir, image_id) for image_id in self.ids]
self.masks_fps = [os.path.join(masks_dir, image_id.split('.')[0]+'_mask.gif') for image_id in self.ids]
# convert str names to class values on masks
self.class_values = [self.CLASSES.index(cls.lower()) for cls in classes]
self.augmentation = augmentation
self.preprocessing = preprocessing
def __getitem__(self, i):
# read data
image = cv2.imread(self.images_fps[i])
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# mask = cv2.imread(self.masks_fps[i], 0)
mask = cv2.VideoCapture(self.masks_fps[i],0)
ret,mask = mask.read()
mask = mask/255
# extract certain classes from mask (e.g. cars)
masks = [(mask == v) for v in self.class_values]
mask = np.stack(masks, axis=-1).astype('float')
# apply augmentations
if self.augmentation:
sample = self.augmentation(image=image, mask=mask)
image, mask = sample['image'], sample['mask']
# apply preprocessing
if self.preprocessing:
sample = self.preprocessing(image=image, mask=mask)
image, mask = sample['image'], sample['mask']
return image, np.squeeze(mask,axis=3)
def __len__(self):
return len(self.ids)
预处理和增强
def get_training_augmentation():
train_transform = [
albu.HorizontalFlip(p=0.5),
albu.ShiftScaleRotate(scale_limit=0.5, rotate_limit=0, shift_limit=0.1, p=1, border_mode=0),
albu.PadIfNeeded(min_height=320, min_width=320, always_apply=True, border_mode=0),
albu.RandomCrop(height=320, width=320, always_apply=True),
albu.IAAAdditiveGaussianNoise(p=0.2),
albu.IAAPerspective(p=0.5),
albu.OneOf(
[
albu.CLAHE(p=1),
albu.RandomBrightness(p=1),
albu.RandomGamma(p=1),
],
p=0.9,
),
albu.OneOf(
[
albu.IAASharpen(p=1),
albu.Blur(blur_limit=3, p=1),
albu.MotionBlur(blur_limit=3, p=1),
],
p=0.9,
),
albu.OneOf(
[
albu.RandomContrast(p=1),
albu.HueSaturationValue(p=1),
],
p=0.9,
),
]
return albu.Compose(train_transform)
def get_validation_augmentation():
"""Add paddings to make image shape divisible by 32"""
test_transform = [
albu.PadIfNeeded(384, 480)
]
return albu.Compose(test_transform)
def to_tensor(x, **kwargs):
return x.transpose(0,2,1).astype('float32')
def get_preprocessing(preprocessing_fn):
"""Construct preprocessing transform
Args:
preprocessing_fn (callbale): data normalization function
(can be specific for each pretrained neural network)
Return:
transform: albumentations.Compose
"""
_transform = [
albu.Lambda(image=preprocessing_fn),
albu.Lambda(image=to_tensor, mask=to_tensor),
]
return albu.Compose(_transform)
模型定义
ENCODER = 'se_resnext50_32x4d'
ENCODER_WEIGHTS = 'imagenet'
CLASSES = ['car']
ACTIVATION = 'sigmoid' # could be None for logits or 'softmax2d' for multicalss segmentation
DEVICE = 'cuda'
# create segmentation model with pretrained encoder
model = smp.FPN(
encoder_name=ENCODER,
encoder_weights=ENCODER_WEIGHTS,
classes=len(CLASSES),
in_channels=3,
activation=ACTIVATION,
)
preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS)
数据加载器
train_dataset = Dataset(
x_train_dir,
y_train_dir,
preprocessing=get_preprocessing(preprocessing_fn),
classes=CLASSES,
)
valid_dataset = Dataset(
x_valid_dir,
y_valid_dir,
preprocessing=get_preprocessing(preprocessing_fn),
classes=CLASSES,
)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=0)
valid_loader = DataLoader(valid_dataset, batch_size=1, shuffle=False, num_workers=0)
优化定义
loss = smp.utils.losses.DiceLoss()
metrics = [
smp.utils.metrics.IoU(threshold=0.5),
]
optimizer = torch.optim.Adam([
dict(params=model.parameters(), lr=0.0001),
])
训练
train_epoch = smp.utils.train.TrainEpoch(
model,
loss=loss,
metrics=metrics,
optimizer=optimizer,
device=DEVICE,
verbose=True,
)
valid_epoch = smp.utils.train.ValidEpoch(
model,
loss=loss,
metrics=metrics,
device=DEVICE,
verbose=True,
)
max_score = 0
for i in range(0, 20):
print('\nEpoch: {}'.format(i))
train_logs = train_epoch.run(train_loader)
valid_logs = valid_epoch.run(valid_loader)
# do something (save model, change lr, etc.)
if max_score < valid_logs['iou_score']:
max_score = valid_logs['iou_score']
torch.save(model, './best_model.pth')
print('Model saved!')
if i == 25:
optimizer.param_groups[0]['lr'] = 1e-5
print('Decrease decoder learning rate to 1e-5!')
错误
> Epoch: 0 train: 0%| | 0/510 [00:00<?, ?it/s]
>
> --------------------------------------------------------------------------- ValueError Traceback (most recent call
> last) <ipython-input-208-d2306c5ca0ea> in <module>
> 6
> 7 print('\nEpoch: {}'.format(i))
> ----> 8 train_logs = train_epoch.run(train_loader)
> 9 valid_logs = valid_epoch.run(valid_loader)
> 10
>
> C:\ProgramData\Anaconda3\envs\segmentation\lib\site-packages\segmentation_models_pytorch\utils\train.py
> in run(self, dataloader)
> 43
> 44 with tqdm(dataloader, desc=self.stage_name, file=sys.stdout, disable=not (self.verbose)) as iterator:
> ---> 45 for x, y in iterator:
> 46 x, y = x.to(self.device), y.to(self.device)
> 47 loss, y_pred = self.batch_update(x, y)
>
> C:\ProgramData\Anaconda3\envs\segmentation\lib\site-packages\tqdm\std.py
> in __iter__(self) 1169 1170 try:
> -> 1171 for obj in iterable: 1172 yield obj 1173 # Update and possibly print the
> progressbar.
>
> C:\ProgramData\Anaconda3\envs\segmentation\lib\site-packages\torch\utils\data\dataloader.py
> in __next__(self)
> 433 if self._sampler_iter is None:
> 434 self._reset()
> --> 435 data = self._next_data()
> 436 self._num_yielded += 1
> 437 if self._dataset_kind == _DatasetKind.Iterable and \
>
> C:\ProgramData\Anaconda3\envs\segmentation\lib\site-packages\torch\utils\data\dataloader.py
> in _next_data(self)
> 473 def _next_data(self):
> 474 index = self._next_index() # may raise StopIteration
> --> 475 data = self._dataset_fetcher.fetch(index) # may raise StopIteration
> 476 if self._pin_memory:
> 477 data = _utils.pin_memory.pin_memory(data)
>
> C:\ProgramData\Anaconda3\envs\segmentation\lib\site-packages\torch\utils\data\_utils\fetch.py
> in fetch(self, possibly_batched_index)
> 42 def fetch(self, possibly_batched_index):
> 43 if self.auto_collation:
> ---> 44 data = [self.dataset[idx] for idx in possibly_batched_index]
> 45 else:
> 46 data = self.dataset[possibly_batched_index]
>
> C:\ProgramData\Anaconda3\envs\segmentation\lib\site-packages\torch\utils\data\_utils\fetch.py
> in <listcomp>(.0)
> 42 def fetch(self, possibly_batched_index):
> 43 if self.auto_collation:
> ---> 44 data = [self.dataset[idx] for idx in possibly_batched_index]
> 45 else:
> 46 data = self.dataset[possibly_batched_index]
>
> <ipython-input-146-65256f8f536d> in __getitem__(self, i)
> 54 # apply preprocessing
> 55 if self.preprocessing:
> ---> 56 sample = self.preprocessing(image=image, mask=mask)
> 57 image, mask = sample['image'], sample['mask']
> 58
>
> C:\ProgramData\Anaconda3\envs\segmentation\lib\site-packages\albumentations\core\composition.py
> in __call__(self, force_apply, *args, **data)
> 180 p.preprocess(data)
> 181
> --> 182 data = t(force_apply=force_apply, **data)
> 183
> 184 if dual_start_end is not None and idx == dual_start_end[1]:
>
> C:\ProgramData\Anaconda3\envs\segmentation\lib\site-packages\albumentations\core\transforms_interface.py
> in __call__(self, force_apply, *args, **kwargs)
> 87 )
> 88 kwargs[self.save_key][id(self)] = deepcopy(params)
> ---> 89 return self.apply_with_params(params, **kwargs)
> 90
> 91 return kwargs
>
> C:\ProgramData\Anaconda3\envs\segmentation\lib\site-packages\albumentations\core\transforms_interface.py
> in apply_with_params(self, params, force_apply, **kwargs)
> 100 target_function = self._get_target_function(key)
> 101 target_dependencies = {k: kwargs[k] for k in self.target_dependence.get(key, [])}
> --> 102 res[key] = target_function(arg, **dict(params, **target_dependencies))
> 103 else:
> 104 res[key] = None
>
> C:\ProgramData\Anaconda3\envs\segmentation\lib\site-packages\albumentations\augmentations\transforms.py
> in apply_to_mask(self, mask, **params) 3068 def
> apply_to_mask(self, mask, **params): 3069 fn =
> self.custom_apply_fns["mask"]
> -> 3070 return fn(mask, **params) 3071 3072 def apply_to_bbox(self, bbox, **params):
>
> <ipython-input-186-4f194a842931> in to_tensor(x, **kwargs)
> 52
> 53
> ---> 54 return x.transpose(0,2,1).astype('float32')
> 55
> 56
>
> ValueError: axes don't match array
解决方案
上面的代码有两个问题;
蒙版图像大小错误,应为 (x,y,1) 但它是 (x,y,3)
模型期望行和列的大小相等。
上述更改后,代码可以正常工作。
推荐阅读
- r - 工作一小时后闪亮的“与服务器断开连接”就好了
- hyperledger-composer - Composer 运行时 (0.19.13) 与客户端 (0.20.0) 不兼容
- gun - 如何在没有订阅功能的情况下获取地图?
- c++ - 如何像在 javascript 中一样“推送”c++ 映射
- android - 更换自身后自动重启安卓应用
- c# - 如何将 UWP 中的树视图绑定到 ViewModel?
- python - K-means 仅使用带有 scikit-learn 的特定数据框列
- machine-learning - 具有文本特征的数据集的逻辑回归
- r - r 不允许在我的 while() 循环中进行 100 次迭代
- c# - 使用没有列表的 Parallel.ForEach