如何使用带注意力的 Seq2Seq 编码器-解码器模型进行多步预测


我正在尝试使用 Seq2Seq 模型使用增强了 Attention 的 Encoder-Decoder 模型来预测工厂生产数据。我有点卡住了,因为模型的输出似乎是一个常数,并且具有与输入相同的大小序列长度,实际上我希望能够指定我想要预测 3/5/9 个月进入未来。你能看看我的代码,也许会建议我哪里出错了吗?

我在 PyTorch Seq2Seq 教程和以下 Github 配置文件上花费了大量时间,这些配置文件都面向 NLP 和机器翻译,但我无法将它们改造成经济数据。

GitHubhttps ://github.com/aladdinpersson/Machine-Learning-Collection/blob/master/ML/Pytorch/more_advanced/Seq2Seq_attention/seq2seq_attention.py

目标 据我了解,我是在预测这家工厂给定材料的未来产量。所以它的维数是1,当然是整数。

编码器 编码器将长度为 168 的序列作为输入,每个输入是前 20 天的数据,以及 37 个工厂级特征,例如工人数量等。

解码器 据我了解,解码器应该将先前的时间步生产级别作为输入(即维度 1),以及先前的隐藏和单元状态。问题似乎是解码器似乎每次都输出一个(几乎)恒定值,这似乎并不理解数据(预测总是积极的)。


class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, p):
        super(EncoderRNN, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size,
                            num_layers, dropout = p,
                            bidirectional = True) #batch_first=True,

        self.fc_hidden = nn.Linear(hidden_size*2, hidden_size) 
        self.fc_cell = nn.Linear(hidden_size*2, hidden_size)

    def forward(self, input):
        #print(f"Encoder input shape is {input.shape}")
        encoder_states, (hidden, cell_state) = self.lstm(input)
        hidden = self.fc_hidden(torch.cat((hidden[0:1], hidden[1:2]), dim = 2))
        cell = self.fc_cell(torch.cat((cell_state[0:1], cell_state[1:2]), dim = 2))

        #print(f"Encoder Hidden: {hidden.shape}")
        #print(f"Encoder Cell: {cell.shape}")
        return encoder_states, hidden, cell

class Decoder_LSTMwAttention(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, p):
        super(Decoder_LSTMwAttention, self).__init__()
        self.rnn = nn.LSTM(hidden_size*2 + input_size, hidden_size,
                           num_layers) #batch_first=True

        self.energy = nn.Linear(hidden_size * 3, 1)
        self.softmax = nn.Softmax(dim=1)
        self.dropout = nn.Dropout(p)
        self.relu = nn.ReLU()  
        self.tanh = nn.Tanh()
        self.fc = nn.Linear(hidden_size, output_size)
        self.attention_combine = nn.Linear(hidden_size, hidden_size)

    def forward(self, input, encoder_states, hidden, cell):

        batch_size = encoder_states.shape[1]    

        input = input.unsqueeze(0)
        input = input.unsqueeze(0)
        input = input.repeat(1, batch_size, 1)

        input = self.relu(self.dropout(input))

        #print(f"Decoder input Shape: {input.shape}")

        sequence_length = encoder_states.shape[0]
        #print(f"Sequence Length: {sequence_length}") 
        #print(f"Encoder states shape[1]: {encoder_states.shape[1]}")
        #print(f"Hidden size: {hidden.size()}") 

        h_reshaped = hidden.repeat(sequence_length, 1, 1)

        #print(f"h_reshaped size: {h_reshaped.shape}") # h_reshaped size: torch.Size([168, 168, 1024])
        #print(f"Encoder states size: {encoder_states.shape}") # Encoder states size: torch.Size([168, 36, 2048])

        concatenated = torch.cat((h_reshaped, encoder_states), dim = 2)
        #print(f"Concatenated size: {concatenated.shape}")

        #energy = self.relu(self.energy(concatenated))
        energy = self.relu(self.energy(concatenated))
        attention = self.softmax(energy)
        #print(f"Attention size: {attention.shape}")
        #attention = attention.permute(1, 0, 2)

        #encoder_states = encoder_states.permute(1, 0, 2)
        #print(f"Encoder states size: {attention.shape}")

        context_vector = torch.einsum("snk,snl->knl", attention, encoder_states)

        #print(f"Context vector size: {context_vector.shape}")
        rnn_input = torch.cat((context_vector, input), dim = 2)

        #print(f"RNN input size: {rnn_input.shape}")

        output, (hidden, cell) = self.rnn(rnn_input, (hidden, cell))
        #print(f"Penultimate output size: {output.shape}")
        #print(f"Hidden size: {hidden.shape}")
        #print(f"Cell size: {cell.shape}")
        output = self.tanh(self.fc(output[:, -1, :]).squeeze(0)) 

        #print(f"Final output size: {output.shape}")
        return output, hidden, cell

class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, source, target, teacher_force_ratio=0.1):
        batch_size = source.shape[1]
        target_len = target.shape[0]
        #print(f"Target len : {target_len}")

        outputs = torch.zeros(target_len).to(device)
        #print(f"Seq2Seq Output Tensor shape: {outputs.shape}")
        encoder_states, hidden, cell = self.encoder(source)

        # First input will be <SOS> token
        x = target[0]
        #print(f"The input to the decoder is of shape: {x.shape}")

        for t in range(1, target_len):
            # At every time step use encoder_states and update hidden, cell
            output, hidden, cell = self.decoder(x, encoder_states, hidden, cell)

            #print(f"Seq2Seq Hidden Tensor shape: {hidden.shape}")
            #print(f"Seq2Seq Output Tensor shape: {output.shape}")
            #print(f"Seq2Seq Cell Tensor shape: {cell.shape}")
            # Store prediction for current time step
            outputs[t] = output

            # Get the best word the Decoder predicted (index in the vocabulary)
            best_guess = output #.argmax(1)

            # With probability of teacher_force_ratio we take the actual next word
            # otherwise we take the word that the Decoder predicted it to be.
            # Teacher Forcing is used so that the model gets used to seeing
            # similar inputs at training and testing time, if teacher forcing is 1
            # then inputs at test time might be completely different than what the
            # network is used to. This was a long comment.
            x = target[t] if random.random() < teacher_force_ratio else best_guess

        return outputs



