首页 > 解决方案 > RuntimeError: mat1 dim 1 必须与 mat2 dim 0 匹配并添加数据增强

问题描述

我在训练循环中遇到了这个错误。在我在 datasets.py 文件中添加增强功能之前,该模型训练良好。我不明白那是什么。我的模型正在对图像进行训练。请看下面。

错误:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-9-efc5be094f2a> in <module>()
    102 train.train_model(model, train_loader, val_loader, optimizer, criterion,
    103                   IMG_CLASS_NAMES, NUM_EPOCHS, project_name = "CSE5DL Assignment Task 1",
--> 104                   ident_str= "Didn't used anything sp")

8 frames
/content/drive/My Drive/DL Assignment/train.py in train_model(model, train_loader, val_loader, optimizer, criterion, class_names, n_epochs, project_name, ident_str)
    163           for epoch in tq.tqdm(range(n_epochs), total=n_epochs, desc='Epochs'):
    164               _, _, train_metrics_dict = \
--> 165                   train_epoch(epoch, model, optimizer, criterion, train_loader)
    166               val_lbls, val_outs, val_metrics_dict = \
    167                   val_epoch(epoch, model, criterion, val_loader)

/content/drive/My Drive/DL Assignment/train.py in train_epoch(epoch, model, optimizer, criterion, loader)
     61           optimizer.zero_grad()
     62 
---> 63           outputs = model(inputs)
     64 
     65           loss = criterion(outputs, lbls)

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    887             result = self._slow_forward(*input, **kwargs)
    888         else:
--> 889             result = self.forward(*input, **kwargs)
    890         for hook in itertools.chain(
    891                 _global_forward_hooks.values(),

/content/drive/My Drive/DL Assignment/models.py in forward(self, x)
     40 
     41     def forward(self, x):
---> 42       x = self.seq(x)
     43      # print(x.shape)
     44       return x

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    887             result = self._slow_forward(*input, **kwargs)
    888         else:
--> 889             result = self.forward(*input, **kwargs)
    890         for hook in itertools.chain(
    891                 _global_forward_hooks.values(),

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/container.py in forward(self, input)
    117     def forward(self, input):
    118         for module in self:
--> 119             input = module(input)
    120         return input
    121 

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    887             result = self._slow_forward(*input, **kwargs)
    888         else:
--> 889             result = self.forward(*input, **kwargs)
    890         for hook in itertools.chain(
    891                 _global_forward_hooks.values(),

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/linear.py in forward(self, input)
     92 
     93     def forward(self, input: Tensor) -> Tensor:
---> 94         return F.linear(input, self.weight, self.bias)
     95 
     96     def extra_repr(self) -> str:

/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in linear(input, weight, bias)
   1751     if has_torch_function_variadic(input, weight):
   1752         return handle_torch_function(linear, (input, weight), input, weight, bias=bias)
-> 1753     return torch._C._nn.linear(input, weight, bias)
   1754 
   1755 

RuntimeError: mat1 dim 1 must match mat2 dim 0

datasets.py 代码:

import collections
import csv
from pathlib import Path
import os
import numpy as np
import torch
import torchvision.transforms as transforms
from PIL import Image
import pandas as pd
to_tensor_transform = transforms.ToTensor()
from torch.utils.data import Dataset
from torchvision.datasets import ImageFolder


train_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(224),
    transforms.RandomVerticalFlip(0.5),
    transforms.ToTensor(),
   ])
    
class LesionDataset(Dataset):
  def __init__(self, img_dir, labels_fname, augment=False):
      
      self.img_dir = img_dir
      self.augment = augment
      self.labels_fname = pd.read_csv(labels_fname)
    
  def __len__(self):
        return len(self.labels_fname)

  def __getitem__(self, idx):
  
        image_id = self.labels_fname.iloc[idx,0]
        image = Image.open(os.path.join(self.img_dir, image_id +'.jpg')).convert("RGB")
        labels = self.labels_fname.drop(['image'], axis = 1)
        labels = np.array(labels)
        labels = np.argmax(labels, axis = 1)
        label = labels[idx]
        if self.augment:
           image = train_transforms(image)
        else:
           image = to_tensor_transform(image)
        return image, label

train.py 代码:

from datetime import datetime
import numpy as np
import torch
import torch.nn as nn
import tqdm.notebook as tq
import sklearn.metrics
import wandb
from torch.utils.data import DataLoader
from sklearn.metrics import confusion_matrix

import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt

device = torch.device("cpu")
if torch.cuda.is_available():
      device = torch.device("cuda:0")
      torch.cuda.set_device(device)

def plot_confusion_matrix(all_lbls, all_outputs, class_names, normalize = True):
      cm = confusion_matrix(all_lbls, all_outputs)
      
      cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
      
      df_cm = pd.DataFrame(cm, class_names, class_names)
      
      ax = sn.heatmap(df_cm, annot=True, cmap='flare')

      # TODO Task 1c - Set axis labels and show plot
      
      plt.ylabel('True label')
      plt.xlabel('Predicted label')

def count_classes(preds):
      '''
      Counts the number of predictions per class given preds, a tensor
      shaped [batch, n_classes], where the maximum per preds[i]
      is considered the "predicted class" for batch element i.
      '''
      pred_classes = preds.argmax(dim=1)
      n_classes = preds.shape[1]
      return [(pred_classes == c).sum().item() for c in range(n_classes)]

def train_epoch(epoch, model, optimizer, criterion, loader):
      epoch_loss = 0

      model.train()
      # At the end all_outputs should store the output for each sample in the training data.
      all_outputs = []
      # At the end all_lbls should store the ground truth label for each sample in the training data.
      all_lbls = []
      for i, (inputs, lbls) in enumerate(loader):
          inputs, lbls = inputs.to(device), lbls.to(device)

          # Update model weights
          # TODO: Tasb 1b - Perform a forward pass, backward pass and 
          # update the weights of your model with the batch of data.
          optimizer.zero_grad() 
              
          outputs = model(inputs)
              
          loss = criterion(outputs, lbls)
          
            # Compute gradients and update model weights
          loss.backward()
          optimizer.step()
          
          
          # TODO: Task 2d - Temporarily uncomment these lines
          # print(count_classes(outputs))
          # if i > 9:
          #     assert False

          # Collect metrics
          epoch_loss += loss.item()
          all_outputs.extend(outputs.tolist())
          all_lbls.extend(lbls.tolist())
      
      all_outputs = np.array(all_outputs)
      all_lbls = np.array(all_lbls)
      all_outputs =  np.argmax(all_outputs, axis = 1)

      # Calculate epoch metrics
      # TODO Task 1b - Use all_outputs and all_lbls with
      # sklearn.metrics.accuracy_score and sklearn.metrics.recall_score
      # to calculate the accuracy and unweighted average recall.
      # Note sklearn.metrics.accuracy_score and sklearn.metrics.recall_score
      # only take numpy arrays and also you need to convert all_outputs
      # to the actual predicted class for each sample.
      
      acc = sklearn.metrics.accuracy_score(all_outputs, all_lbls)
      uar = sklearn.metrics.recall_score(all_lbls, all_outputs, average='macro')

      metrics_dict = {
          'Loss/train': (epoch_loss/len(loader)),
          'Accuracy/train': acc,
          'UAR/train': uar,
      }

      return all_lbls, all_outputs, metrics_dict

def val_epoch(epoch, model, criterion, loader):
      epoch_loss = 0

      model.eval()
      all_outputs = []
      all_lbls = []
      for inputs, lbls in loader:
          inputs, lbls = inputs.to(device), lbls.to(device)

          # TODO Task 1b - Perform a forward pass through your model and 
          # obtain the validation loss (use torch.no_grad())
          with torch.no_grad():

            outputs = model(inputs)

          loss = criterion(outputs, lbls)

          # Collect metrics
          epoch_loss += loss.item()
          all_outputs.extend(outputs.tolist())
          all_lbls.extend(lbls.tolist())
      
      all_outputs = np.array(all_outputs)
      all_lbls = np.array(all_lbls)
      all_outputs =  np.argmax(all_outputs, axis = 1)

      # Calculate epoch metrics
      # TODO Task 1b - Use all_outputs and all_lbls with
      # sklearn.metrics.accuracy_score and sklearn.metrics.recall_score
      # to calculate the accuracy and unweighted average recall.
      # Note sklearn.metrics.accuracy_score and sklearn.metrics.recall_score
      # only take numpy arrays and also you need to convert all_outputs
      # to the actual predicted class for each sample.
  
      acc = sklearn.metrics.accuracy_score(all_outputs, all_lbls)
      uar = sklearn.metrics.recall_score(all_lbls, all_outputs, average='macro')

      metrics_dict = {
          'Loss/val': (epoch_loss/len(loader)),
          'Accuracy/val': acc,
          'UAR/val': uar,
      }

      return all_lbls, all_outputs, metrics_dict


def train_model(model, train_loader, val_loader, optimizer, criterion,
                  class_names, n_epochs, project_name, ident_str=None):
      model.to(device)

      # Initialise Weights and Biases project
      if ident_str is None:
        ident_str = datetime.now().strftime("%Y%m%d_%H%M%S")
      exp_name = f"{model.__class__.__name__}_{ident_str}"
      run = wandb.init(project = project_name, name=exp_name)

      try:
          # Train by iterating over epochs
          for epoch in tq.tqdm(range(n_epochs), total=n_epochs, desc='Epochs'):
              _, _, train_metrics_dict = \
                  train_epoch(epoch, model, optimizer, criterion, train_loader)
              val_lbls, val_outs, val_metrics_dict = \
                  val_epoch(epoch, model, criterion, val_loader)
              wandb.log({**train_metrics_dict, **val_metrics_dict})
      finally:
          run.finish()

       plot_confusion_matrix(val_lbls, val_outs, class_names, normalize = True)

模型训练单元代码:

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder

import datasets
import models
import train
from train import device
from train import plot_confusion_matrix
from sklearn.metrics import confusion_matrix

torch.manual_seed(42)

NUM_EPOCHS = 5
BATCH_SIZE = 64

model = models.SimpleBNConv()

model.to(device)

# Create datasets/loaders
# TODO Task 1b - Create the data loaders from LesionDatasets
# TODO Task 1d - Account for data issues, if applicable

train_dataset = datasets.LesionDataset('/content/data/img',
                            '/content/data/img/train.csv', augment = True)
val_dataset = datasets.LesionDataset('/content/data/img',
                            '/content/data/img/val.csv', augment = False)

# TODO Task 1d - Account for data issues, if applicable
train_ML_proportion = len(torch.nonzero(torch.Tensor(np.array(train_dataset.labels_fname["MEL"])))) / len(torch.Tensor(np.array(train_dataset.labels_fname["MEL"])))
train_NV_proportion = len(torch.nonzero(torch.Tensor(np.array(train_dataset.labels_fname["NV"])))) / len(torch.Tensor(np.array(train_dataset.labels_fname["NV"])))
train_BCC_proportion = len(torch.nonzero(torch.Tensor(np.array(train_dataset.labels_fname["BCC"])))) / len(torch.Tensor(np.array(train_dataset.labels_fname["BCC"])))
train_AKIEC_proportion = len(torch.nonzero(torch.Tensor(np.array(train_dataset.labels_fname["AKIEC"])))) / len(torch.Tensor(np.array(train_dataset.labels_fname["AKIEC"])))
train_BKL_proportion = len(torch.nonzero(torch.Tensor(np.array(train_dataset.labels_fname["BKL"])))) / len(torch.Tensor(np.array(train_dataset.labels_fname["BKL"])))
train_DF_proportion = len(torch.nonzero(torch.Tensor(np.array(train_dataset.labels_fname["DF"])))) / len(torch.Tensor(np.array(train_dataset.labels_fname["DF"])))
train_VASC_proportion = len(torch.nonzero(torch.Tensor(np.array(train_dataset.labels_fname["VASC"])))) / len(torch.Tensor(np.array(train_dataset.labels_fname["VASC"])))

print("Train data - ML proportion:", "{:.3f}".format(train_ML_proportion), "NV proportion:", "{:.3f}".format(train_NV_proportion), "BCC proportion:", "{:.3f}".format(train_BCC_proportion), "AKIEC proportion:",
      "{:.3f}".format(train_AKIEC_proportion), "BKL proportion:", "{:.3f}".format(train_BKL_proportion), "DF proportion:", "{:.3f}".format(train_DF_proportion), "VASC proportion:", "{:.3f}".format(train_VASC_proportion))

# we can see that in the train dataset the data is skewed and around 70% labels are of class NV.

#computing the sampling weight for 7 classses in train
ML_weight = 1 - train_ML_proportion
NV_weight = 1 - train_NV_proportion
BCC_weight = 1 - train_BCC_proportion
AKIEC_weight = 1 - train_AKIEC_proportion
BKL_weight = 1 - train_BKL_proportion
DF_weight = 1 - train_DF_proportion
VASC_weight = 1 - train_VASC_proportion

weights = []
for i in train_dataset.labels_fname:
  
  if i == 0:
    weights.append(ML_weight)
  
  elif i == 1:
    weights.append(NV_weight)

  elif i == 2:
    weights.append(BCC_weight)

  elif i == 3:
    weights.append(AKIEC_weight)

  elif i == 4:
    weights.append(BKL_weight)

  elif i == 5:
    weights.append(DF_weight)

  else:
    weights.append(VASC_weight)


sampler = torch.utils.data.sampler.WeightedRandomSampler(weights, len(train_dataset))
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, num_workers=1, sampler=sampler)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=1)

# defining the Optimizer 
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Train model
# TODO Task 1c: Set to ident_str to a string that identifies this particular
#               training run. Note this line in the training code
#                     exp_name = f"{model.__class__.__name__}_{ident_str}"
#               So it means the the model class name is already included in the
#               exp_name string. You can consider adding other information particular
#               to this training run, e.g. learning rate (lr) used, 
#               augmentation (aug) used or not, etc.


train.train_model(model, train_loader, val_loader, optimizer, criterion,
                  IMG_CLASS_NAMES, NUM_EPOCHS, project_name = "CSE5DL Assignment Task 1",
                  ident_str= "Didn't used anything sp")

我希望增强会提高模型的准确性。请建议我还能尝试什么来提高模型的准确性?我正在使用以下图层:

10 个 nn.Conv2d 层,分别具有 8、8、16、16、32、32、64、64、128、128 个输出通道 5 个 nn.MaxPool2d 层散布在输出通道的每次变化之前,以及 nn.BatchNorm2d 和nn.ReLU() 用于激活函数

谢谢。

标签: pythontensorflowdeep-learningtraining-datadata-augmentation

解决方案


更新-:我通过调整代码大小image来修复错误。datasets.py


推荐阅读