首页 > 解决方案 > Pytorch CNN 损失没有改变

问题描述

我正在使用 Pytorch 制作一个 CNN,用于解决戴口罩和不戴口罩的人之间的图像分类问题。图像被转换为​​具有 3 个通道的 256x256。

运行代码时,无论初始损失值是多少,都将保持不变。我试过改变学习率,但没有改变。我还尝试使用一个简单的单一线性层来检查它是否是不正确的模型。我的猜测是训练循环不正确,但与其他示例训练循环相比,我的代码看起来是正确的。

模型

class NetConv(nn.Module):
    def __init__(self):
        super(NetConv, self).__init__()
        # self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1)
        # self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1)
        # self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.conv1 = nn.Conv2d(in_channels= 3, out_channels = 8, kernel_size = 3)
        self.conv2 = nn.Conv2d(in_channels= 8, out_channels = 16, kernel_size = 3)
        self.conv3 = nn.Conv2d(in_channels= 16, out_channels = 32, kernel_size = 3)

        self.fc1 = nn.Linear(30*30*32, 1000)
        self.fc2 = nn.Linear(1000, 200)
        self.fc3 = nn.Linear(200, 3)

    def forward(self, x):
        # print(x.shape)
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv3(x))
        x = F.max_pool2d(x, 2)

        x = x.reshape(-1, 30*30*32)

        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        x = F.softmax(x, dim=1)

        # print(x.shape)
 
        return x

数据加载器

class faceMaskDataset(Dataset):
    def __init__(self, img_folder, annot_folder, transform=None):
        # Extracting image name and class from xml file
        desc = []
        for dirname, _, filenames in os.walk(annot_folder):
            for filename in filenames:
                desc.append(os.path.join(dirname, filename))

        img_name,label = [],[]

        for d in desc:
            content = []
            n = []

            with open(d, "r") as file:
                content = file.readlines()
            content = "".join(content)
            soup = BeautifulSoup(content,"html.parser")
            file_name = soup.filename.string
            name_tags = soup.find_all("name")
            
            for t in name_tags:
                n.append(t.get_text())
                
            # selecting tag with maximum occurence in an image (If it has multiple tags)
            name = max(set(n), key = n.count)
        
            img_name.append(file_name)
            label.append(name)

        labels = pd.get_dummies(label)
        print(labels.head())

        # Our target classes
        classes = list(labels.columns)
        print(classes)

        data, target = [],[]
        img_h, img_w = 256, 256

        # Loading images and converting them to pixel array
        for i in range(len(img_name)):
            name = os.path.join("./images/1", img_name[i])
            image = cv2.imread(name)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image = cv2.resize(image, (img_w, img_h), cv2.INTER_AREA)
            
            data.append(image)
            target.append(tuple(labels.iloc[i,:]))

        print(type(data))
        # data = np.array(data)
        data = np.array(data) / 255 # Normalise pixel data to between 0 and 1
        target = np.array(target)
        
        data = np.swapaxes(data, 1, 3)
        data = np.swapaxes(data, 2, 3)

        self.data = data
        
        self.target = target
        self.transform = transform

    def __len__(self):
        return len(self.target)

    def __getitem__(self, index):
        image = self.data[index,:]
        label = (self.target[index])
        return (image, label)

训练循环

def train(model, optimizer, loss_fn, train_dl, val_dl, epochs=20, device='cuda'):

    for epoch in range(epochs):

        model.train()
        train_loss         = 0.0
        num_train_correct  = 0
        num_train_examples = 0

        for i, (image, target) in enumerate(train_dl):
            image = image.to(device).float()
            target = target.to(device)

            optimizer.zero_grad()

            output = model(image)

            loss = loss_fn(output, torch.max(target, 1)[1]) 

            loss.backward()
            optimizer.step()

            train_loss         += loss.data.item() * image.size(0)
            num_train_correct  += (torch.max(output, 1)[1] == torch.max(target, 1)[1]).sum().item()

            num_train_examples += image.shape[0]

        train_acc   = num_train_correct / num_train_examples
        train_loss  = train_loss / len(train_dl.dataset)
        print("Epoch: %2d, Loss: %.3f, Acc: %.3f" % (epoch, train_loss, train_acc))

标签: pythonmachine-learningpytorchconv-neural-networkloss-function

解决方案


推荐阅读