首页 > 解决方案 > 如何在 CNN 中进行归一化?

问题描述

我是 CNN 的新手,我正在学习食品分类。这是我的代码。在DATASET部分,我将训练数据集和验证数据集从 numpy 更改为 tensor。此时,张量的形状为 ( [9866, 128, 128, 3])。由于通道 3 需要在第一个索引中,所以我使用“转置”方法来更改索引。然后,我使用“Data.TensorDataset”将训练数据和训练标签放在一起,使用“Data.DataLoader”的原因是我需要批量大小来加快速度。

import os
import numpy as np
import pandas as pd
import cv2
import torch
import torch.nn as nn
from torch.nn import functional as F
import torchvision.transforms as transforms
from torch.autograd import Variable
from torch import optim
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import torch.utils.data as Data



'''Initialize Params'''
epochs = 3
learning_rate = 0.0001
momentum = 0.5
batch_size = 128


'''Load Data'''
def readFile(path,label):
    image_dir = sorted(os.listdir(path))
    # x stores photos
    x = np.zeros((len(image_dir),128,128,3),dtype=np.uint8)
    # y stores labels
    y = np.zeros((len(image_dir)), dtype=np.uint8)
    for i, file in enumerate(image_dir):
        img = cv2.imread(os.path.join(path, file))
        x[i, :, :] = cv2.resize(img,(128, 128))
        if label:
            y[i] = int(file.split("_")[0])
    if label:
        return x,y
    else:
        return x

train_x, train_y = readFile('./food/training',True)
val_x, val_y = readFile('./food/validation',True)
test_x = readFile('./food/testing',False)
# print("Reading data: ")
# print("Size of training data = {}".format(len(train_x)))
# print("Size of validation data = {}".format(len(val_x)))
# print("Size of Testing data = {}".format(len(test_x)))


'''DataSet'''
train_x = torch.tensor(train_x)
# print(train_x.shape)
train_x = train_x.transpose(1,3).float()
train_y = torch.tensor(train_y)
val_x = torch.tensor(val_x)
val_x = val_x.transpose(1, 3).float()
val_y = torch.tensor(val_y)


train_dataset = Data.TensorDataset(train_x,train_y)
val_dataset = Data.TensorDataset(val_x,val_y)

train_loader = Data.DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)
val_loader = Data.DataLoader(dataset=val_dataset,batch_size=batch_size,shuffle=True) 

我得到了 68% 的训练集准确率,我想改进它。我在网上搜索,发现也许我应该添加规范化。但我只找到了这样的方式

transform = transforms.Compose([
    transforms.ToTensor(), # range [0, 255] -> [0.0,1.0]
    ]
)

我很困惑如何将它与“Data.DataLoader”放在一起。而且我知道还有另一种方法可以像这样将训练数据从 numpy 更改为 dataloader,这是链接

train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(), 
    transforms.RandomRotation(15), 
    transforms.ToTensor(), 
])

test_transform = transforms.Compose([
    transforms.ToPILImage(),                                    
    transforms.ToTensor(),
])


class ImgDataset(Dataset):
    def __init__(self, x, y=None, transform=None):
        self.x = x
        self.y = y
        if y is not None:
            self.y = torch.LongTensor(y)
        self.transform = transform
    def __len__(self):
        return len(self.x)
    def __getitem__(self, index):
        X = self.x[index]
        if self.transform is not None:
            X = self.transform(X)
        if self.y is not None:
            Y = self.y[index]
            return X, Y
        else:
            return X

train_set = ImgDataset(train_x, train_y, train_transform)
val_set = ImgDataset(val_x, val_y, test_transform)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)

我的方式可能很愚蠢,但我想尝试一下,任何帮助将不胜感激。我希望我能解释清楚,并提前感谢。

如果需要,这是完整的代码:

import os
import numpy as np
import pandas as pd
import cv2
import torch
import torch.nn as nn
from torch.nn import functional as F
import torchvision.transforms as transforms
from torch.autograd import Variable
from torch import optim
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import torch.utils.data as Data



'''Initialize Params'''
epochs = 3
learning_rate = 0.0001
momentum = 0.5
batch_size = 128


transform = transforms.Compose([
    transforms.ToTensor(), # range [0, 255] -> [0.0,1.0]
    ]
)



'''Load Data'''
def readFile(path,label):
    image_dir = sorted(os.listdir(path))
    # x stores photos
    x = np.zeros((len(image_dir),128,128,3),dtype=np.uint8)
    # y stores labels
    y = np.zeros((len(image_dir)), dtype=np.uint8)
    for i, file in enumerate(image_dir):
        img = cv2.imread(os.path.join(path, file))
        x[i, :, :] = cv2.resize(img,(128, 128))
        if label:
            y[i] = int(file.split("_")[0])
    if label:
        return x,y
    else:
        return x

train_x, train_y = readFile('./food/training',True)
val_x, val_y = readFile('./food/validation',True)
test_x = readFile('./food/testing',False)
# print("Reading data: ")
# print("Size of training data = {}".format(len(train_x)))
# print("Size of validation data = {}".format(len(val_x)))
# print("Size of Testing data = {}".format(len(test_x)))


'''DataSet'''
train_x = torch.tensor(train_x)
# print(train_x.shape)
train_x = train_x.transpose(1,3).float()
train_y = torch.tensor(train_y)
val_x = torch.tensor(val_x)
val_x = val_x.transpose(1, 3).float()
val_y = torch.tensor(val_y)


train_dataset = Data.TensorDataset(train_x,train_y)
val_dataset = Data.TensorDataset(val_x,val_y)

train_loader = Data.DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)
val_loader = Data.DataLoader(dataset=val_dataset,batch_size=batch_size,shuffle=True) 


'''Create Model'''
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # nn.Conv2d(input_channel, output_channel, kernel, stride)
        self.conv1 = nn.Conv2d(3,64,5,1,1) 
        nn.BatchNorm2d(64) 
        self.conv2 = nn.Conv2d(64,128,5,1,1)
        nn.BatchNorm2d(128)
        self.conv3 = nn.Conv2d(128,256,5,1,1)
        nn.BatchNorm2d(256)
        self.conv4 = nn.Conv2d(256,256,5,1,1)
        nn.BatchNorm2d(256)
        self.conv4_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(6*6*256, 1024)  
        # self.fc1 = nn.Linear(512*4*4, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 256)
        self.fc4 = nn.Linear(256, 11)


    def forward(self, x):
        # maxpooling 1
        x = self.conv1(x)
        x = F.relu(x) # 124*124*64
        x = F.max_pool2d(x, 2) # 62*62*20

        # maxpooling 2
        x = self.conv2(x)
        x = F.relu(x) # 58*58*128
        x = F.max_pool2d(x, 2) # 29*29*40

        # maxpooling 3
        x = self.conv3(x)
        x = F.relu(x) # 25*25*256
        x = F.max_pool2d(x, 2) # 12*12*100

        # maxpooling 4
        x = self.conv4(x)
        x = F.relu(x) # 8*8*256
        x = F.max_pool2d(x, 2) # 4*4*256

        x = x.view(-1,6*6*256)
        # print(x.shape)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        # return F.log_softmax(x)
        return F.log_softmax(x)


'''Initialize the network'''
net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=learning_rate)


'''Train Model'''
if __name__ == '__main__':
    # print(train_x.shape)  torch.Size([9866, 128, 128, 3])
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    net.to(device)
    for epoch in range(epochs):
        for i,data in enumerate(train_loader):
            x,y = data
            # print(x)
            # print('--------------------')
            sum_loss = 0.0
            optimizer.zero_grad()  
            x,y = Variable(x).cuda(), Variable(y).cuda()
            outputs = net(x)  
            loss = criterion(outputs, y.long())  
            loss.backward()
            optimizer.step()  
            # print(loss.item())
            # sum_loss += loss.item()
            # if i % 500 == 99:
            #     print('[%d,%d] loss:%.03f' %
            #             (epoch + 1, i + 1, sum_loss / 500))
            #     sum_loss = 0.0


        '''Test Model with Validation'''
        net.eval() 
        correct = 0
        total = 0
        for val_data in val_loader:
            x,y = val_data
            x, y = Variable(x).cuda(), Variable(y).cuda()
            output_test = net(x)
            _, predicted = torch.max(output_test, 1)
            total += y.size(0)
            correct += (predicted == y).sum()
        print("correct1: ", correct)
        print("Test acc: {0}".format(correct.item() /len(val_loader)))

标签: machine-learningpytorchconv-neural-network

解决方案


如果你想规范化图像,你可以在 train_transform 和 test_transform 中添加 transforms.Normalize (在你的第二个代码片段中)。像这样的东西:

train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(), 
    transforms.RandomRotation(15), 
    transforms.ToTensor(), 
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

你可以对 test_transform 做同样的事情。


推荐阅读