首页 > 解决方案 > Pytorch RuntimeError:设备类型为 cuda 的预期对象,但在调用 _th_index_select 时获得了参数 #1 'self' 的设备类型 cpu

问题描述

我正在训练一个模型,该模型采用标记化字符串,然后通过嵌入层和 LSTM 传递。但是,输入中似乎存在错误,因为它没有通过嵌入层。

class DrugModel(nn.Module):
    def __init__(self, input_dim, output_dim, hidden_dim, drug_embed_dim,
            lstm_layer, lstm_dropout, bi_lstm, linear_dropout, char_vocab_size,
            char_embed_dim, char_dropout, dist_fn, learning_rate,
            binary, is_mlp, weight_decay, is_graph, g_layer,
            g_hidden_dim, g_out_dim, g_dropout):

        super(DrugModel, self).__init__()

        # Save model configs
        self.drug_embed_dim = drug_embed_dim
        self.lstm_layer = lstm_layer
        self.char_dropout = char_dropout
        self.dist_fn = dist_fn
        self.binary = binary
        self.is_mlp = is_mlp
        self.is_graph = is_graph
        self.g_layer = g_layer
        self.g_dropout = g_dropout
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        # For one-hot encoded SMILES
        if not is_mlp:
            self.char_embed = nn.Embedding(char_vocab_size, char_embed_dim,
                                           padding_idx=0)
            self.lstm = nn.LSTM(char_embed_dim, drug_embed_dim, lstm_layer,
                                bidirectional=False,
                                batch_first=True, dropout=lstm_dropout)
        # Distance function
        self.dist_fc = nn.Linear(drug_embed_dim, 1)

        if binary:
            # Binary Cross Entropy
            self.criterion = lambda x, y: y*torch.log(x) + (1-y)*torch.log(1-x)

    def init_lstm_h(self, batch_size):
        return (Variable(torch.zeros(
                self.lstm_layer*1, batch_size, self.drug_embed_dim)).cuda(),
                Variable(torch.zeros(
                self.lstm_layer*1, batch_size, self.drug_embed_dim)).cuda())

    # Set Siamese network as basic LSTM
    def siamese_sequence(self, inputs, length):
        # Character embedding
        inputs = inputs.long()
        inputs = inputs.cuda()

        self.char_embed = self.char_embed(inputs.to(self.device))
        c_embed = self.char_embed(inputs)
        # c_embed = F.dropout(c_embed, self.char_dropout)
        maxlen = inputs.size(1)

        if not self.training:
            # Sort c_embed
            _, sort_idx = torch.sort(length, dim=0, descending=True)
            _, unsort_idx = torch.sort(sort_idx, dim=0)
            maxlen = torch.max(length)

            # Pack padded sequence
            c_embed = c_embed.index_select(0, Variable(sort_idx).cuda())
            sorted_len = length.index_select(0, sort_idx).tolist()
            c_packed = pack_padded_sequence(c_embed, sorted_len, batch_first=True)

        else:
            c_packed = c_embed

        # Run LSTM
        init_lstm_h = self.init_lstm_h(inputs.size(0))
        lstm_out, states = self.lstm(c_packed, init_lstm_h)

        hidden = torch.transpose(states[0], 0, 1).contiguous().view(
                                 -1, 1 * self.drug_embed_dim)
        if not self.training:
            # Unsort hidden states
            outputs = hidden.index_select(0, Variable(unsort_idx).cuda())
        else:
            outputs = hidden

        return outputs

    def forward(self, key1, key2, targets, key1_len, key2_len, status, predict = False):
        if not self.is_mlp:
            output1 = self.siamese_sequence(key1, key1_len)
            output2 = self.siamese_sequence(key2, key2_len)

实例化类后,通过嵌入层传递输入时出现以下错误:

<ipython-input-128-432fcc7a1e39> in forward(self, key1, key2, targets, key1_len, key2_len, status, predict)
    129     def forward(self, key1, key2, targets, key1_len, key2_len, status, predict = False):
    130         if not self.is_mlp:
--> 131             output1 = self.siamese_sequence(key1, key1_len)
    132             output2 = self.siamese_sequence(key2, key2_len)
    133             set_trace()

<ipython-input-128-432fcc7a1e39> in siamese_sequence(self, inputs, length)
     74         inputs = inputs.cuda()
     75 
---> 76         self.char_embed = self.char_embed(inputs.to(self.device))
     77         set_trace()
     78         c_embed = self.char_embed(inputs)

~/miniconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    539             result = self._slow_forward(*input, **kwargs)
    540         else:
--> 541             result = self.forward(*input, **kwargs)
    542         for hook in self._forward_hooks.values():
    543             hook_result = hook(self, input, result)

~/miniconda3/lib/python3.7/site-packages/torch/nn/modules/sparse.py in forward(self, input)
    112         return F.embedding(
    113             input, self.weight, self.padding_idx, self.max_norm,
--> 114             self.norm_type, self.scale_grad_by_freq, self.sparse)
    115 
    116     def extra_repr(self):

~/miniconda3/lib/python3.7/site-packages/torch/nn/functional.py in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
   1482         # remove once script supports set_grad_enabled
   1483         _no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
-> 1484     return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
   1485 
   1486 

RuntimeError: Expected object of device type cuda but got device type cpu for argument #1 'self' in call to _th_index_select

尽管输入(例如 key1)已经传递给 cuda 并已转换为长格式:

tensor([[25, 33, 30,  ...,  0,  0,  0],
        [25,  7,  7,  ...,  0,  0,  0],
        [25,  7, 30,  ...,  0,  0,  0],
        ...,
        [25,  7, 33,  ...,  0,  0,  0],
        [25, 33, 41,  ...,  0,  0,  0],
        [25, 33, 41,  ...,  0,  0,  0]], device='cuda:0')

标签: runtime-errorgpupytorchembedding

解决方案


设置model.device为 cuda 不会更改您的内部模块设备,因此self.lstm,self.char_embedself.dist_fc仍然在 cpu 上。正确的做法是使用DrugModel().to(device)

一般来说,最好不要将 a 提供device给您model并以与设备无关的方式编写它。要使您的init_lstm_h功能与设备无关,您可以使用类似这样的东西


推荐阅读