machine-learning - 如何在 CNN 中进行归一化?
问题描述
我是 CNN 的新手,我正在学习食品分类。这是我的代码。在DATASET部分,我将训练数据集和验证数据集从 numpy 更改为 tensor。此时,张量的形状为 ( [9866, 128, 128, 3]
)。由于通道 3 需要在第一个索引中,所以我使用“转置”方法来更改索引。然后,我使用“Data.TensorDataset”将训练数据和训练标签放在一起,使用“Data.DataLoader”的原因是我需要批量大小来加快速度。
import os
import numpy as np
import pandas as pd
import cv2
import torch
import torch.nn as nn
from torch.nn import functional as F
import torchvision.transforms as transforms
from torch.autograd import Variable
from torch import optim
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import torch.utils.data as Data
'''Initialize Params'''
epochs = 3
learning_rate = 0.0001
momentum = 0.5
batch_size = 128
'''Load Data'''
def readFile(path,label):
image_dir = sorted(os.listdir(path))
# x stores photos
x = np.zeros((len(image_dir),128,128,3),dtype=np.uint8)
# y stores labels
y = np.zeros((len(image_dir)), dtype=np.uint8)
for i, file in enumerate(image_dir):
img = cv2.imread(os.path.join(path, file))
x[i, :, :] = cv2.resize(img,(128, 128))
if label:
y[i] = int(file.split("_")[0])
if label:
return x,y
else:
return x
train_x, train_y = readFile('./food/training',True)
val_x, val_y = readFile('./food/validation',True)
test_x = readFile('./food/testing',False)
# print("Reading data: ")
# print("Size of training data = {}".format(len(train_x)))
# print("Size of validation data = {}".format(len(val_x)))
# print("Size of Testing data = {}".format(len(test_x)))
'''DataSet'''
train_x = torch.tensor(train_x)
# print(train_x.shape)
train_x = train_x.transpose(1,3).float()
train_y = torch.tensor(train_y)
val_x = torch.tensor(val_x)
val_x = val_x.transpose(1, 3).float()
val_y = torch.tensor(val_y)
train_dataset = Data.TensorDataset(train_x,train_y)
val_dataset = Data.TensorDataset(val_x,val_y)
train_loader = Data.DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)
val_loader = Data.DataLoader(dataset=val_dataset,batch_size=batch_size,shuffle=True)
我得到了 68% 的训练集准确率,我想改进它。我在网上搜索,发现也许我应该添加规范化。但我只找到了这样的方式
transform = transforms.Compose([
transforms.ToTensor(), # range [0, 255] -> [0.0,1.0]
]
)
我很困惑如何将它与“Data.DataLoader”放在一起。而且我知道还有另一种方法可以像这样将训练数据从 numpy 更改为 dataloader,这是链接
train_transform = transforms.Compose([
transforms.ToPILImage(),
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(15),
transforms.ToTensor(),
])
test_transform = transforms.Compose([
transforms.ToPILImage(),
transforms.ToTensor(),
])
class ImgDataset(Dataset):
def __init__(self, x, y=None, transform=None):
self.x = x
self.y = y
if y is not None:
self.y = torch.LongTensor(y)
self.transform = transform
def __len__(self):
return len(self.x)
def __getitem__(self, index):
X = self.x[index]
if self.transform is not None:
X = self.transform(X)
if self.y is not None:
Y = self.y[index]
return X, Y
else:
return X
train_set = ImgDataset(train_x, train_y, train_transform)
val_set = ImgDataset(val_x, val_y, test_transform)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)
我的方式可能很愚蠢,但我想尝试一下,任何帮助将不胜感激。我希望我能解释清楚,并提前感谢。
如果需要,这是完整的代码:
import os
import numpy as np
import pandas as pd
import cv2
import torch
import torch.nn as nn
from torch.nn import functional as F
import torchvision.transforms as transforms
from torch.autograd import Variable
from torch import optim
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import torch.utils.data as Data
'''Initialize Params'''
epochs = 3
learning_rate = 0.0001
momentum = 0.5
batch_size = 128
transform = transforms.Compose([
transforms.ToTensor(), # range [0, 255] -> [0.0,1.0]
]
)
'''Load Data'''
def readFile(path,label):
image_dir = sorted(os.listdir(path))
# x stores photos
x = np.zeros((len(image_dir),128,128,3),dtype=np.uint8)
# y stores labels
y = np.zeros((len(image_dir)), dtype=np.uint8)
for i, file in enumerate(image_dir):
img = cv2.imread(os.path.join(path, file))
x[i, :, :] = cv2.resize(img,(128, 128))
if label:
y[i] = int(file.split("_")[0])
if label:
return x,y
else:
return x
train_x, train_y = readFile('./food/training',True)
val_x, val_y = readFile('./food/validation',True)
test_x = readFile('./food/testing',False)
# print("Reading data: ")
# print("Size of training data = {}".format(len(train_x)))
# print("Size of validation data = {}".format(len(val_x)))
# print("Size of Testing data = {}".format(len(test_x)))
'''DataSet'''
train_x = torch.tensor(train_x)
# print(train_x.shape)
train_x = train_x.transpose(1,3).float()
train_y = torch.tensor(train_y)
val_x = torch.tensor(val_x)
val_x = val_x.transpose(1, 3).float()
val_y = torch.tensor(val_y)
train_dataset = Data.TensorDataset(train_x,train_y)
val_dataset = Data.TensorDataset(val_x,val_y)
train_loader = Data.DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)
val_loader = Data.DataLoader(dataset=val_dataset,batch_size=batch_size,shuffle=True)
'''Create Model'''
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# nn.Conv2d(input_channel, output_channel, kernel, stride)
self.conv1 = nn.Conv2d(3,64,5,1,1)
nn.BatchNorm2d(64)
self.conv2 = nn.Conv2d(64,128,5,1,1)
nn.BatchNorm2d(128)
self.conv3 = nn.Conv2d(128,256,5,1,1)
nn.BatchNorm2d(256)
self.conv4 = nn.Conv2d(256,256,5,1,1)
nn.BatchNorm2d(256)
self.conv4_drop = nn.Dropout2d()
self.fc1 = nn.Linear(6*6*256, 1024)
# self.fc1 = nn.Linear(512*4*4, 1024)
self.fc2 = nn.Linear(1024, 512)
self.fc3 = nn.Linear(512, 256)
self.fc4 = nn.Linear(256, 11)
def forward(self, x):
# maxpooling 1
x = self.conv1(x)
x = F.relu(x) # 124*124*64
x = F.max_pool2d(x, 2) # 62*62*20
# maxpooling 2
x = self.conv2(x)
x = F.relu(x) # 58*58*128
x = F.max_pool2d(x, 2) # 29*29*40
# maxpooling 3
x = self.conv3(x)
x = F.relu(x) # 25*25*256
x = F.max_pool2d(x, 2) # 12*12*100
# maxpooling 4
x = self.conv4(x)
x = F.relu(x) # 8*8*256
x = F.max_pool2d(x, 2) # 4*4*256
x = x.view(-1,6*6*256)
# print(x.shape)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
x = self.fc4(x)
# return F.log_softmax(x)
return F.log_softmax(x)
'''Initialize the network'''
net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=learning_rate)
'''Train Model'''
if __name__ == '__main__':
# print(train_x.shape) torch.Size([9866, 128, 128, 3])
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net.to(device)
for epoch in range(epochs):
for i,data in enumerate(train_loader):
x,y = data
# print(x)
# print('--------------------')
sum_loss = 0.0
optimizer.zero_grad()
x,y = Variable(x).cuda(), Variable(y).cuda()
outputs = net(x)
loss = criterion(outputs, y.long())
loss.backward()
optimizer.step()
# print(loss.item())
# sum_loss += loss.item()
# if i % 500 == 99:
# print('[%d,%d] loss:%.03f' %
# (epoch + 1, i + 1, sum_loss / 500))
# sum_loss = 0.0
'''Test Model with Validation'''
net.eval()
correct = 0
total = 0
for val_data in val_loader:
x,y = val_data
x, y = Variable(x).cuda(), Variable(y).cuda()
output_test = net(x)
_, predicted = torch.max(output_test, 1)
total += y.size(0)
correct += (predicted == y).sum()
print("correct1: ", correct)
print("Test acc: {0}".format(correct.item() /len(val_loader)))
解决方案
如果你想规范化图像,你可以在 train_transform 和 test_transform 中添加 transforms.Normalize (在你的第二个代码片段中)。像这样的东西:
train_transform = transforms.Compose([
transforms.ToPILImage(),
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(15),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
你可以对 test_transform 做同样的事情。
推荐阅读
- javascript - 如何在 Dynamics CRM 中使用 openForm 设置多选字段值?
- react-native - 如何仅在图像太大时使用 react-native-image-picker 压缩图像
- c# - 缩短一串数字
- matlab - 隔离信号中的“活动”部分
- javascript - 无法读取“...”的属性未定义
- zarr - 如何减少/删除 zarr 数组
- php - 在 PHP 中从 curl 重新计算 div
- python - 命名变量或附加字典
- docker - 如何仅下载和安装 boost headers?
- listview - Flutter中ListView中显示多个文档数据