python - Pytorch CNN 损失没有改变
问题描述
我正在使用 Pytorch 制作一个 CNN,用于解决戴口罩和不戴口罩的人之间的图像分类问题。图像被转换为具有 3 个通道的 256x256。
运行代码时,无论初始损失值是多少,都将保持不变。我试过改变学习率,但没有改变。我还尝试使用一个简单的单一线性层来检查它是否是不正确的模型。我的猜测是训练循环不正确,但与其他示例训练循环相比,我的代码看起来是正确的。
模型
class NetConv(nn.Module):
def __init__(self):
super(NetConv, self).__init__()
# self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1)
# self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1)
# self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
self.conv1 = nn.Conv2d(in_channels= 3, out_channels = 8, kernel_size = 3)
self.conv2 = nn.Conv2d(in_channels= 8, out_channels = 16, kernel_size = 3)
self.conv3 = nn.Conv2d(in_channels= 16, out_channels = 32, kernel_size = 3)
self.fc1 = nn.Linear(30*30*32, 1000)
self.fc2 = nn.Linear(1000, 200)
self.fc3 = nn.Linear(200, 3)
def forward(self, x):
# print(x.shape)
x = F.relu(self.conv1(x))
x = F.max_pool2d(x, 2)
x = F.relu(self.conv2(x))
x = F.max_pool2d(x, 2)
x = F.relu(self.conv3(x))
x = F.max_pool2d(x, 2)
x = x.reshape(-1, 30*30*32)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
x = F.softmax(x, dim=1)
# print(x.shape)
return x
数据加载器
class faceMaskDataset(Dataset):
def __init__(self, img_folder, annot_folder, transform=None):
# Extracting image name and class from xml file
desc = []
for dirname, _, filenames in os.walk(annot_folder):
for filename in filenames:
desc.append(os.path.join(dirname, filename))
img_name,label = [],[]
for d in desc:
content = []
n = []
with open(d, "r") as file:
content = file.readlines()
content = "".join(content)
soup = BeautifulSoup(content,"html.parser")
file_name = soup.filename.string
name_tags = soup.find_all("name")
for t in name_tags:
n.append(t.get_text())
# selecting tag with maximum occurence in an image (If it has multiple tags)
name = max(set(n), key = n.count)
img_name.append(file_name)
label.append(name)
labels = pd.get_dummies(label)
print(labels.head())
# Our target classes
classes = list(labels.columns)
print(classes)
data, target = [],[]
img_h, img_w = 256, 256
# Loading images and converting them to pixel array
for i in range(len(img_name)):
name = os.path.join("./images/1", img_name[i])
image = cv2.imread(name)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = cv2.resize(image, (img_w, img_h), cv2.INTER_AREA)
data.append(image)
target.append(tuple(labels.iloc[i,:]))
print(type(data))
# data = np.array(data)
data = np.array(data) / 255 # Normalise pixel data to between 0 and 1
target = np.array(target)
data = np.swapaxes(data, 1, 3)
data = np.swapaxes(data, 2, 3)
self.data = data
self.target = target
self.transform = transform
def __len__(self):
return len(self.target)
def __getitem__(self, index):
image = self.data[index,:]
label = (self.target[index])
return (image, label)
训练循环
def train(model, optimizer, loss_fn, train_dl, val_dl, epochs=20, device='cuda'):
for epoch in range(epochs):
model.train()
train_loss = 0.0
num_train_correct = 0
num_train_examples = 0
for i, (image, target) in enumerate(train_dl):
image = image.to(device).float()
target = target.to(device)
optimizer.zero_grad()
output = model(image)
loss = loss_fn(output, torch.max(target, 1)[1])
loss.backward()
optimizer.step()
train_loss += loss.data.item() * image.size(0)
num_train_correct += (torch.max(output, 1)[1] == torch.max(target, 1)[1]).sum().item()
num_train_examples += image.shape[0]
train_acc = num_train_correct / num_train_examples
train_loss = train_loss / len(train_dl.dataset)
print("Epoch: %2d, Loss: %.3f, Acc: %.3f" % (epoch, train_loss, train_acc))
解决方案
推荐阅读
- java - 如何使用k8s的client-java-api创建路由
- google-apps-script - 谷歌工作表 appscript 将标签复制到新工作表
- reactjs - 如何在 for 循环中渲染反应组件?
- xslt - 如何根据条件使用 for-each - XSLT?
- java - 如何设置元素的高度以匹配另一个元素的高度?在安卓中
- android - 使用 ConstraintLayout 在 ImageView 上放置 TextView 时出现问题
- ios - 如果iOS应用程序进入后台并返回“nw_read_request_report [C3]接收失败并出现错误”软件导致连接中止“
- r - r中从列表到矩阵
- android - 我的 Button-Android 工作室上未显示文本
- php - 如何在 Laravel 中创建广告展示系统