首页 > 解决方案 > 使用 pytorch-lightning 实现 Network in Network CNN 模型

问题描述

我正在尝试实现 NiN 模型。基本上试图从d2l复制代码这是我的代码。

import pandas as pd
import torch
from torch import nn
import torchmetrics
from torchvision import transforms
from torch.utils.data import DataLoader, random_split
import pytorch_lightning as pl
from torchvision.datasets import FashionMNIST
import wandb
from pytorch_lightning.loggers import WandbLogger
wandb.login()

## class definition
class Lightning_nin(pl.LightningModule):
  def __init__(self):
    super().__init__()
    self.accuracy = torchmetrics.Accuracy(top_k=1)
    self.model = nn.Sequential(
                self.nin_block(1, 96, kernel_size=11, strides=4, padding=0),
                nn.MaxPool2d(3, stride=2),
                self.nin_block(96, 256, kernel_size=5, strides=1, padding=2),
                nn.MaxPool2d(3, stride=2),
                self.nin_block(256, 384, kernel_size=3, strides=1, padding=1),
                nn.MaxPool2d(3, stride=2), nn.Dropout(0.5),
                # There are 10 label classes
                self.nin_block(384, 10, kernel_size=3, strides=1, padding=1),
                nn.AdaptiveAvgPool2d((1, 1)),
                # Transform the four-dimensional output into two-dimensional output with a
                # shape of (batch size, 10)
                nn.Flatten())
    for layer in self.model:
      if type(layer) == nn.Linear or type(layer) == nn.Conv2d:
        nn.init.xavier_uniform_(layer.weight)  
  def nin_block(self,in_channels, out_channels, kernel_size, strides, padding):
      return nn.Sequential(
          nn.Conv2d(in_channels, out_channels, kernel_size, strides, padding),
          nn.ReLU(), nn.Conv2d(out_channels, out_channels, kernel_size=1),
          nn.ReLU(), nn.Conv2d(out_channels, out_channels, kernel_size=1),
          nn.ReLU())
      
  def forward(self, x):
      x = self.model(x)
      return x
 
  def loss_fn(self,logits,y):
    loss = nn.CrossEntropyLoss()
    return loss(logits,y)
    
  def training_step(self,train_batch,batch_idx):
    X, y = train_batch
    logits = self.forward(X)
    loss = self.loss_fn(logits,y)
    self.log('train_loss',loss)
    m = nn.Softmax(dim=1)
    output = m(logits)
    self.log('train_acc',self.accuracy(output,y))
    return loss
  
  def validation_step(self,val_batch,batch_idx):
    X,y = val_batch
    logits = self.forward(X)
    loss = self.loss_fn(logits,y)
    self.log('test_loss',loss)
    m = nn.Softmax(dim=1)
    output = m(logits)
    self.log('test_acc',self.accuracy(output,y))
  
  def configure_optimizers(self):
    optimizer = torch.optim.SGD(self.model.parameters(),lr= 0.1)
    return optimizer
  
class Light_DataModule(pl.LightningDataModule):
  def __init__(self,resize= None):
    super().__init__()
    if resize:
      self.resize = resize
 
  def setup(self, stage):
    # transforms for images
    trans = [transforms.ToTensor()]
    if self.resize:
      trans.insert(0, transforms.Resize(self.resize))
    trans = transforms.Compose(trans)
    # prepare transforms standard to MNIST
    self.mnist_train = FashionMNIST(root="../data", train=True, download=True, transform=trans)
    self.mnist_test = FashionMNIST(root="../data", train=False, download=True, transform=trans)
 
  def train_dataloader(self):
    return DataLoader(self.mnist_train, batch_size=128,shuffle=True,num_workers=4)
 
  def val_dataloader(self):
    return DataLoader(self.mnist_test, batch_size=128,num_workers=4)

## Train model
data_module = Light_DataModule(resize=224)
wandb_logger = WandbLogger(project="d2l",name ='NIN')
model  = Lightning_nin()
trainer = pl.Trainer(logger=wandb_logger,max_epochs=4,gpus=1,progress_bar_refresh_rate =1)
trainer.fit(model, data_module)
wandb.finish()

运行代码后,我的准确度仅为 0.1。不知道我哪里出错了。我已经能够使用相同的模板实现其他 CNN(如 VGG)。不知道我哪里出错了。10 个 epoch 后准确率应该接近 0.9。

标签: deep-learningpytorchpytorch-lightning

解决方案


kernel_size&对于strides224 的图像大小非常大。它将大大减少传递给后续层的信息。尝试减少它们。此外,VGG 是一个非常精心设计的架构。


推荐阅读