python - 预期隐藏 [0] 大小 (2, 8, 256),得到 [8, 256]
问题描述
我有正确的隐藏层形状,如下所示。
print(h0.shape)
print(x.shape)
torch.Size([2, 8, 256])
torch.Size([8, 300, 300])
但我仍然有错误 Expected hidden[0] size (2, 8, 256), got [8, 256]
有什么问题?
整个代码如下。
import torch
import torch.nn as nn
import torchvision
import matplotlib.pyplot as plt
import torchvision.transforms as tt
from torchvision.datasets import ImageFolder
from PIL import Image
import numpy as np
from torch.autograd import Variable
seq_len = input_size
hidden_size = 256 #size of hidden layers
num_classes = 5
num_epochs = 20
batch_size = 8
learning_rate = 0.001
# Fully connected neural network with one hidden layer
num_layers = 2 # 2 RNN layers are stacked
class LSTM(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_classes):
super(LSTM, self).__init__()
self.num_layers = num_layers
self.hidden_size = hidden_size
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)#batch must have first dimension
#our inpyt needs to have shape
#x -> (batch_size, seq, input_size)
self.fc = nn.Linear(hidden_size, num_classes)#this fc is after RNN. So needs the last hidden size of RNN
def forward(self, x):
#according to ducumentation of RNN in pytorch
#rnn needs input, h_0 for inputs at RNN (h_0 is initial hidden state)
#the following one is initial hidden layer
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)#first one is number of layers and second one is batch size
#output has two outputs. The first tensor contains the output features of the hidden last layer for all time steps
#the second one is hidden state f
print(h0.shape)
print(x.shape)
out, _ = self.lstm(x, h0)
print(out.shape)
#output has batch_size, seq_len, hidden size
#we need to decode hidden state only the last time step
#out (N, 30, 128)
#Since we need only the last time step
#Out (N, 128)
out = out[:, -1, :] #-1 for last time step, take all for N and 128
out = self.fc(out)
return out
stacked_lstm_model = LSTM(input_size, hidden_size, num_layers, num_classes).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()#cross entropy has softmax at output
optimizer = torch.optim.Adam(stacked_lstm_model.parameters(), lr=learning_rate) #optimizer used gradient optimization using Adam
# Train the model
n_total_steps = len(train_dl)
for epoch in range(num_epochs):
t_losses=[]
for i, (images, labels) in enumerate(train_dl):
# origin shape: [8, 1, 300, 300]
# resized: [8, 300, 300]
images = images.reshape(-1, seq_len, input_size).to(device)
labels = labels.to(device)
# Forward pass
outputs = stacked_lstm_model(images)
loss = criterion(outputs, labels)
t_losses.append(loss)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (i+1) % 100 == 0:
print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')
avgd_trainloss = sum(t_losses)/len(t_losses)
acc=0
v_losses=[]
with torch.no_grad():
n_correct = 0
n_samples = 0
for v_images, v_labels in valid_dl:
v_images = v_images.reshape(-1, seq_len, input_size).to(device)
v_labels = v_labels.to(device)
v_outputs = stacked_lstm_model(v_images)
v_loss = criterion(v_outputs, v_labels)
v_losses.append(v_loss)
# max returns (value ,index)
_, v_predicted = torch.max(v_outputs.data, 1)
n_samples += v_labels.size(0)
n_correct += (v_predicted == v_labels).sum().item()
acc = 100.0 * n_correct / n_samples
avgd_validloss = sum(v_losses)/len(v_losses)
print (f'Epoch [{epoch+1}/{num_epochs}], Train loss: {avgd_trainloss.item():.4f}, Valid loss: {avgd_validloss.item():.4f}, Valid accu: {acc.item():.2f}')
# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
n_correct = 0
n_samples = 0
for images, labels in test_dl:
images = images.reshape(-1, seq_len, input_size).to(device)
labels = labels.to(device)
outputs = stacked_lstm_model(images)
# max returns (value ,index)
_, predicted = torch.max(outputs.data, 1)
n_samples += labels.size(0)
n_correct += (predicted == labels).sum().item()
acc = 100.0 * n_correct / n_samples
print(f'Accuracy of the network on test images: {acc} %')
解决方案
LSTM 需要两种隐藏状态,而不是一种。所以而不是
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
采用
h0 = (torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device), torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device))
所以你需要一个元组中的两个隐藏状态。
推荐阅读
- jakarta-ee - 从 JavaEE 发送的带有容器管理事务的 JMS 消息在容器提交事务之前不会发送
- python - 将 DataFrame 中凌乱的日期字符串转换为 python 和 pandas 中的“datetime”
- python - 使用 pygsheets 如何检测电子表格是否已经存在?
- python - 为什么我的编辑器没有给我语法错误,但是编译却给了我语法错误
- python - 相同功能的不同输出
- c# - 将异步任务从 c# 转换为 vb.net
- web-audio-api - Webaudio 增益在 0 时不静音
- java - Android Java 应用程序 - 发出通知时,发送者会收到通知,但运行该应用程序的其他设备不会
- docker - 如何为每个 docker 容器获取不同的 IP 地址?
- javascript - 元素 cloneNode(deep) 失败,控制台中仅给出未定义的错误