python - 如何在 Pytorch 中为图像及其蒙版制作自定义数据集?
问题描述
我有两个tif图像的数据集文件夹,一个是一个名为BMMCdata的文件夹,另一个是BMMCdata图像的掩码,称为BMMCmasks(图像名称对应)。我正在尝试制作自定义数据集,并随机拆分数据以进行训练和测试。目前我收到一个错误
self.filenames.append(fn)
AttributeError: 'CustomDataset' object has no attribute 'filenames'
任何评论将不胜感激。
import torch
from torch.utils.data.dataset import Dataset # For custom data-sets
from torchvision import transforms
from PIL import Image
import os.path as osp
import glob
folder_data = "/Users/parto/PycharmProjects/U-net/BMMCdata/data"
class CustomDataset(Dataset):
def __init__(self, root):
self.filename = folder_data
self.root = root
self.to_tensor = transforms.ToTensor()
filenames = glob.glob(osp.join(folder_data, '*.tif'))
for fn in filenames:
self.filenames.append(fn)
self.len = len(self.filenames)
print(fn)
def __getitem__(self, index):
image = Image.open(self.filenames[index])
return self.transform(image)
def __len__(self):
return self.len
custom_img = CustomDataset(folder_data)
# total images in set
print(custom_img.len)
train_len = int(0.6*custom_img.len)
test_len = custom_img.len - train_len
train_set, test_set = CustomDataset.random_split(custom_img, lengths=[train_len, test_len])
# check lens of subset
len(train_set), len(test_set)
train_set = CustomDataset(folder_data)
train_set = torch.utils.data.TensorDataset(train_set, train=True, batch_size=4)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=4, shuffle=True, num_workers=1)
print(train_set)
print(train_loader)
test_set = torch.utils.data.DataLoader(Dataset, batch_size=4, sampler= train_sampler)
test_loader = torch.utils.data.DataLoader(Dataset, batch_size=4)
解决方案
pytorch 社区中@ptrblck 给出的答案。谢谢你
# get all the image and mask path and number of images
folder_data = glob.glob("D:\\Neda\\Pytorch\\U-net\\BMMCdata\\data\\*.tif")
folder_mask = glob.glob("D:\\Neda\\Pytorch\\U-net\\BMMCmasks\\masks\\*.tif")
# split these path using a certain percentage
len_data = len(folder_data)
print(len_data)
train_size = 0.6
train_image_paths = folder_data[:int(len_data*train_size)]
test_image_paths = folder_data[int(len_data*train_size):]
train_mask_paths = folder_mask[:int(len_data*train_size)]
test_mask_paths = folder_mask[int(len_data*train_size):]
class CustomDataset(Dataset):
def __init__(self, image_paths, target_paths, train=True): # initial logic
happens like transform
self.image_paths = image_paths
self.target_paths = target_paths
self.transforms = transforms.ToTensor()
def __getitem__(self, index):
image = Image.open(self.image_paths[index])
mask = Image.open(self.target_paths[index])
t_image = self.transforms(image)
return t_image, mask
def __len__(self): # return count of sample we have
return len(self.image_paths)
train_dataset = CustomDataset(train_image_paths, train_mask_paths, train=True)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=1)
test_dataset = CustomDataset(test_image_paths, test_mask_paths, train=False)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=1)
推荐阅读
- azure - 如何从我的 Azure 订阅中获取所有 VM 的操作系统信息
- powershell - Export-CSV in Powershell just outputs numbers/length
- c# - 嵌套列表日期过滤器
- .net - Nancy FX 中 http 请求的默认超时是多少?
- php - 移动和替换超过一定分辨率的所有子目录中的每个图像
- c++ - 无法在 C++ Visual Studio 中获取字符串的大小
- android - 无法使用侦听器进行 Realm 测试以运行
- php - 如何检查PHP中的数组中是否存在一个字符串或多个字符串
- python - 耦合非线性方程python
- uwp - 应用清单发布者名称 (CN={digits}) 必须与签名证书的主题名称 (CN=anton) 匹配