python - PyTorch 嵌入层引发“预期...cuda...但得到...cpu”错误
问题描述
我正在将 PyTorch 模型从 CPU(它可以工作的地方)转换为 GPU(到目前为止还没有)。错误消息(剪辑到重要位)如下:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-12-a7bb230c924c> in <module>
1 model = FeedforwardTabularModel()
2 model.cuda()
----> 3 model.fit(X_train_sample.values, y_train_sample.values)
<ipython-input-11-40b1edae7417> in fit(self, X, y)
100 for epoch in range(self.n_epochs):
101 for i, (X_batch, y_batch) in enumerate(batches):
--> 102 y_pred = model(X_batch).squeeze()
103 # scheduler.batch_step() # Disabled due to a bug, see above.
104 loss = self.loss_fn(y_pred, y_batch)
[...]
/opt/conda/lib/python3.6/site-packages/torch/nn/functional.py in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
1482 # remove once script supports set_grad_enabled
1483 _no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
-> 1484 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
1485
1486
RuntimeError: Expected object of device type cuda but got device type cpu for argument #1 'self' in call to _th_index_select
这是完整的模型定义:
import torch
from torch import nn
import torch.utils.data
# ^ https://discuss.pytorch.org/t/attributeerror-module-torch-utils-has-no-attribute-data/1666
class FeedforwardTabularModel(nn.Module):
def __init__(self):
super().__init__()
self.batch_size = 512
self.base_lr, self.max_lr = 0.001, 0.003
self.n_epochs = 5
self.cat_vars_embedding_vector_lengths = [
(1115, 80), (7, 4), (3, 3), (12, 6), (31, 10), (2, 2), (25, 10), (26, 10), (4, 3),
(3, 3), (4, 3), (23, 9), (8, 4), (12, 6), (52, 15), (22, 9), (6, 4), (6, 4), (3, 3),
(3, 3), (8, 4), (8, 4)
]
self.loss_fn = torch.nn.MSELoss()
self.score_fn = torch.nn.MSELoss()
# Layer 1: embeddings.
self.embeddings = []
for (in_size, out_size) in self.cat_vars_embedding_vector_lengths:
emb = nn.Embedding(in_size, out_size)
self.embeddings.append(emb)
# Layer 1: dropout.
self.embedding_dropout = nn.Dropout(0.04)
# Layer 1: batch normalization (of the continuous variables).
self.cont_batch_norm = nn.BatchNorm1d(16, eps=1e-05, momentum=0.1)
# Layers 2 through 9: sequential feedforward model.
self.seq_model = nn.Sequential(*[
nn.Linear(in_features=215, out_features=1000, bias=True),
nn.ReLU(),
nn.BatchNorm1d(1000, eps=1e-05, momentum=0.1),
nn.Dropout(p=0.001),
nn.Linear(in_features=1000, out_features=500, bias=True),
nn.ReLU(),
nn.BatchNorm1d(500, eps=1e-05, momentum=0.1),
nn.Dropout(p=0.01),
nn.Linear(in_features=500, out_features=1, bias=True)
])
def forward(self, x):
# Layer 1: embeddings.
inp_offset = 0
embedding_subvectors = []
for emb in self.embeddings:
index = torch.tensor(inp_offset, dtype=torch.int64).cuda()
inp = torch.index_select(x, dim=1, index=index).long().cuda()
out = emb(inp)
out = out.view(out.shape[2], out.shape[0], 1).squeeze()
embedding_subvectors.append(out)
inp_offset += 1
out_cat = torch.cat(embedding_subvectors)
out_cat = out_cat.view(out_cat.shape[::-1])
# Layer 1: dropout.
out_cat = self.embedding_dropout(out_cat)
# Layer 1: batch normalization (of the continuous variables).
out_cont = self.cont_batch_norm(x[:, inp_offset:])
out = torch.cat((out_cat, out_cont), dim=1)
# Layers 2 through 9: sequential feedforward model.
out = self.seq_model(out)
return out
def fit(self, X, y):
self.train()
# TODO: set a random seed to invoke determinism.
# cf. https://github.com/pytorch/pytorch/issues/11278
X = torch.tensor(X, dtype=torch.float32).cuda()
y = torch.tensor(y, dtype=torch.float32).cuda()
# The build of PyTorch on Kaggle has a blog that prevents us from using
# CyclicLR with ADAM. Cf. GH#19003.
# optimizer = torch.optim.Adam(model.parameters(), lr=max_lr)
# scheduler = torch.optim.lr_scheduler.CyclicLR(
# optimizer, base_lr=base_lr, max_lr=max_lr,
# step_size_up=300, step_size_down=300,
# mode='exp_range', gamma=0.99994
# )
optimizer = torch.optim.Adam(model.parameters(), lr=(self.base_lr + self.max_lr) / 2)
batches = torch.utils.data.DataLoader(
torch.utils.data.TensorDataset(X, y),
batch_size=self.batch_size, shuffle=True
)
for epoch in range(self.n_epochs):
for i, (X_batch, y_batch) in enumerate(batches):
y_pred = model(X_batch).squeeze()
# scheduler.batch_step() # Disabled due to a bug, see above.
loss = self.loss_fn(y_pred, y_batch)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(
f"Epoch {epoch + 1}/{self.n_epochs}, Loss {loss.detach().numpy()}"
)
def predict(self, X):
self.eval()
with torch.no_grad():
y_pred = model(torch.tensor(X, dtype=torch.float32).cuda())
return y_pred.squeeze()
def score(self, X, y):
y_pred = self.predict(X)
y = torch.tensor(y, dtype=torch.float32).cuda()
return self.score_fn(y, y_pred)
model = FeedforwardTabularModel()
model.cuda()
model.fit(X_train_sample.values, y_train_sample.values)
当模型中有一个张量应该在 GPU 上但在 CPU 上时,通常会发生这种类型的错误。但据我所知,我已经.cuda()
在所有必要的地方进行了调用:每次 atorch.tensor
被声明,并且model.cuda()
在model.fit
.
是什么导致了这个错误?
解决方案
另一个论坛上的某个人提供了解决方案:
Pytorch 需要您做
self.module_name = module
一些事情才能正常工作。可以将它们保存在列表中。只需setattr(self, 'emb_{}'.format(i), emb)
为该循环中的每个步骤执行类似操作即可。
因为我在一个列表中管理我的嵌入层,而 PyTorch 要求所有层都注册为模型对象上的一个属性,所以在调用它们时它们不会自动移动到 GPU 内存model.cuda()
中。棘手!
推荐阅读
- php - 如何使用php生成mega.nz直接下载链接
- c++ - 不能遍历链表
- python - sys.argv[1][-4:] 脚本含义
- c# - C# Jwt Token 在一个 api 中创建,在另一个 api 中使用时返回 invalid_token
- ansible - 如何将不同的配置应用于辅助主机集
- r - 在 Caret / R 中返回 train 函数后,如何找出选择了哪个模型?如何交叉检查?
- html - SVG 未在实时 Chrome 页面上呈现
- python - 通过对元素求和来调整矩阵的大小
- python - 数据框对象转换为 JSON 的问题
- html - 如何在不破坏功能的情况下替换锚标记上的类?