machine-learning - 如何使用带注意力的 Seq2Seq 编码器-解码器模型进行多步预测
问题描述
我正在尝试使用 Seq2Seq 模型使用增强了 Attention 的 Encoder-Decoder 模型来预测工厂生产数据。我有点卡住了,因为模型的输出似乎是一个常数,并且具有与输入相同的大小序列长度,实际上我希望能够指定我想要预测 3/5/9 个月进入未来。你能看看我的代码,也许会建议我哪里出错了吗?
我在 PyTorch Seq2Seq 教程和以下 Github 配置文件上花费了大量时间,这些配置文件都面向 NLP 和机器翻译,但我无法将它们改造成经济数据。
目标 据我了解,我是在预测这家工厂给定材料的未来产量。所以它的维数是1,当然是整数。
编码器 编码器将长度为 168 的序列作为输入,每个输入是前 20 天的数据,以及 37 个工厂级特征,例如工人数量等。
解码器 据我了解,解码器应该将先前的时间步生产级别作为输入(即维度 1),以及先前的隐藏和单元状态。问题似乎是解码器似乎每次都输出一个(几乎)恒定值,这似乎并不理解数据(预测总是积极的)。
代码
class EncoderRNN(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, p):
super(EncoderRNN, self).__init__()
self.lstm = nn.LSTM(input_size, hidden_size,
num_layers, dropout = p,
bidirectional = True) #batch_first=True,
self.fc_hidden = nn.Linear(hidden_size*2, hidden_size)
self.fc_cell = nn.Linear(hidden_size*2, hidden_size)
def forward(self, input):
#print(f"Encoder input shape is {input.shape}")
encoder_states, (hidden, cell_state) = self.lstm(input)
hidden = self.fc_hidden(torch.cat((hidden[0:1], hidden[1:2]), dim = 2))
cell = self.fc_cell(torch.cat((cell_state[0:1], cell_state[1:2]), dim = 2))
#print(f"Encoder Hidden: {hidden.shape}")
#print(f"Encoder Cell: {cell.shape}")
return encoder_states, hidden, cell
class Decoder_LSTMwAttention(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, output_size, p):
super(Decoder_LSTMwAttention, self).__init__()
self.rnn = nn.LSTM(hidden_size*2 + input_size, hidden_size,
num_layers) #batch_first=True
self.energy = nn.Linear(hidden_size * 3, 1)
self.softmax = nn.Softmax(dim=1)
self.dropout = nn.Dropout(p)
self.relu = nn.ReLU()
self.tanh = nn.Tanh()
self.fc = nn.Linear(hidden_size, output_size)
self.attention_combine = nn.Linear(hidden_size, hidden_size)
def forward(self, input, encoder_states, hidden, cell):
batch_size = encoder_states.shape[1]
input = input.unsqueeze(0)
input = input.unsqueeze(0)
input = input.repeat(1, batch_size, 1)
input = self.relu(self.dropout(input))
#print('\n')
#print(f"Decoder input Shape: {input.shape}")
sequence_length = encoder_states.shape[0]
#print(f"Sequence Length: {sequence_length}")
#print(f"Encoder states shape[1]: {encoder_states.shape[1]}")
#print(f"Hidden size: {hidden.size()}")
h_reshaped = hidden.repeat(sequence_length, 1, 1)
#print(f"h_reshaped size: {h_reshaped.shape}") # h_reshaped size: torch.Size([168, 168, 1024])
#print(f"Encoder states size: {encoder_states.shape}") # Encoder states size: torch.Size([168, 36, 2048])
concatenated = torch.cat((h_reshaped, encoder_states), dim = 2)
#print(f"Concatenated size: {concatenated.shape}")
#energy = self.relu(self.energy(concatenated))
energy = self.relu(self.energy(concatenated))
attention = self.softmax(energy)
#print(f"Attention size: {attention.shape}")
#attention = attention.permute(1, 0, 2)
#encoder_states = encoder_states.permute(1, 0, 2)
#print("\n")
#print(f"Encoder states size: {attention.shape}")
context_vector = torch.einsum("snk,snl->knl", attention, encoder_states)
#print("\n")
#print(f"Context vector size: {context_vector.shape}")
rnn_input = torch.cat((context_vector, input), dim = 2)
#print("\n")
#print(f"RNN input size: {rnn_input.shape}")
output, (hidden, cell) = self.rnn(rnn_input, (hidden, cell))
#print("\n")
#print(f"Penultimate output size: {output.shape}")
#print(f"Hidden size: {hidden.shape}")
#print(f"Cell size: {cell.shape}")
output = self.tanh(self.fc(output[:, -1, :]).squeeze(0))
#print("\n")
#print(f"Final output size: {output.shape}")
return output, hidden, cell
class Seq2Seq(nn.Module):
def __init__(self, encoder, decoder):
super(Seq2Seq, self).__init__()
self.encoder = encoder
self.decoder = decoder
def forward(self, source, target, teacher_force_ratio=0.1):
batch_size = source.shape[1]
target_len = target.shape[0]
#print(f"Target len : {target_len}")
outputs = torch.zeros(target_len).to(device)
#print(f"Seq2Seq Output Tensor shape: {outputs.shape}")
#print('\n')
encoder_states, hidden, cell = self.encoder(source)
# First input will be <SOS> token
x = target[0]
#print(f"The input to the decoder is of shape: {x.shape}")
for t in range(1, target_len):
# At every time step use encoder_states and update hidden, cell
output, hidden, cell = self.decoder(x, encoder_states, hidden, cell)
#print("\n")
#print(f"Seq2Seq Hidden Tensor shape: {hidden.shape}")
#print(f"Seq2Seq Output Tensor shape: {output.shape}")
#print(f"Seq2Seq Cell Tensor shape: {cell.shape}")
#print("\n")
# Store prediction for current time step
outputs[t] = output
# Get the best word the Decoder predicted (index in the vocabulary)
best_guess = output #.argmax(1)
# With probability of teacher_force_ratio we take the actual next word
# otherwise we take the word that the Decoder predicted it to be.
# Teacher Forcing is used so that the model gets used to seeing
# similar inputs at training and testing time, if teacher forcing is 1
# then inputs at test time might be completely different than what the
# network is used to. This was a long comment.
x = target[t] if random.random() < teacher_force_ratio else best_guess
return outputs
这是模型预测的输出图:
解决方案
推荐阅读
- javascript - 脚注行不显示
- php - 带数字的 PHP 数组格式
- google-sheets - 使用键和标头查找值(vlookup,索引号搜索)
- mysql - 尝试将新行插入到 MySQL 表中,Node.js 出现神秘错误。无法工作
- node.js - 茉莉花模拟:错误:
: 方法不存在 - django - IntegrityError:唯一约束失败:users_customuser.email
- c++ - 错误:无法将参数 '3' 转换为 'float (*)[b]' 到 'float**' 到 'void
- reactjs - 如何使用反应条件渲染?
- php - 是否可以仅使用 PHP 或 Web 向不和谐用户添加角色?
- r - 为什么 mutate 会在函数变量上窒息?