首页 > 解决方案 > 预期隐藏 [0] 大小 (2, 8, 256),得到 [8, 256]

问题描述

我有正确的隐藏层形状,如下所示。

print(h0.shape)
print(x.shape)

torch.Size([2, 8, 256])
torch.Size([8, 300, 300])

但我仍然有错误 Expected hidden[0] size (2, 8, 256), got [8, 256]

有什么问题?

整个代码如下。

import torch
import torch.nn as nn
import torchvision
import matplotlib.pyplot as plt
import torchvision.transforms as tt
from torchvision.datasets import ImageFolder
from PIL import Image
import numpy as np
from torch.autograd import Variable

seq_len = input_size
hidden_size = 256 #size of hidden layers
num_classes = 5
num_epochs = 20
batch_size = 8
learning_rate = 0.001
# Fully connected neural network with one hidden layer
num_layers = 2 # 2 RNN layers are stacked  
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)#batch must have first dimension
        #our inpyt needs to have shape
        #x -> (batch_size, seq, input_size)
        self.fc = nn.Linear(hidden_size, num_classes)#this fc is after RNN. So needs the last hidden size of RNN

    def forward(self, x):
        #according to ducumentation of RNN in pytorch
        #rnn needs input, h_0 for inputs at RNN (h_0 is initial hidden state)

        #the following one is initial hidden layer
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)#first one is number of layers and second one is batch size
        #output has two outputs. The first tensor contains the output features of the hidden last layer for all time steps
        #the second one is hidden state f
        print(h0.shape)
        print(x.shape)
        out, _ = self.lstm(x, h0)
        print(out.shape)
        #output has batch_size, seq_len, hidden size
        #we need to decode hidden state only the last time step
        #out (N, 30, 128)
        #Since we need only the last time step
        #Out (N, 128)
        out = out[:, -1, :] #-1 for last time step, take all for N and 128
        out = self.fc(out)
        return out
        
        

stacked_lstm_model = LSTM(input_size, hidden_size, num_layers, num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()#cross entropy has softmax at output
optimizer = torch.optim.Adam(stacked_lstm_model.parameters(), lr=learning_rate) #optimizer used gradient optimization using Adam 

# Train the model
n_total_steps = len(train_dl)
for epoch in range(num_epochs):
    t_losses=[]
    for i, (images, labels) in enumerate(train_dl):  
        # origin shape: [8, 1, 300, 300]
        # resized: [8, 300, 300]
        images = images.reshape(-1, seq_len, input_size).to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = stacked_lstm_model(images)
        loss = criterion(outputs, labels)
        t_losses.append(loss)
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')
    avgd_trainloss = sum(t_losses)/len(t_losses)
    acc=0
    v_losses=[]
    with torch.no_grad():
      n_correct = 0
      n_samples = 0      
      for v_images, v_labels in valid_dl:
          v_images = v_images.reshape(-1, seq_len, input_size).to(device)
          v_labels = v_labels.to(device)
          v_outputs = stacked_lstm_model(v_images)
          v_loss = criterion(v_outputs, v_labels)
          v_losses.append(v_loss)
          # max returns (value ,index)
          _, v_predicted = torch.max(v_outputs.data, 1)
          n_samples += v_labels.size(0)
          n_correct += (v_predicted == v_labels).sum().item()

      acc = 100.0 * n_correct / n_samples
    avgd_validloss = sum(v_losses)/len(v_losses)
    print (f'Epoch [{epoch+1}/{num_epochs}], Train loss: {avgd_trainloss.item():.4f}, Valid loss: {avgd_validloss.item():.4f}, Valid accu: {acc.item():.2f}')

# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for images, labels in test_dl:
        images = images.reshape(-1, seq_len, input_size).to(device)
        labels = labels.to(device)
        outputs = stacked_lstm_model(images)
        # max returns (value ,index)
        _, predicted = torch.max(outputs.data, 1)
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()

    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network on test images: {acc} %')

标签: pythonpytorchlstmrecurrent-neural-network

解决方案


LSTM 需要两种隐藏状态,而不是一种。所以而不是

h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)

采用

h0 = (torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device), torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device))

所以你需要一个元组中的两个隐藏状态。


推荐阅读