首页 > 解决方案 > 保存/加载包装在 sklearn 兼容估计器中的 PyTorch 模型

问题描述

我已经使用 PyTorch 编写了一个自动编码器,并将它滚动到一个自定义的 sklearn BaseEstimator 中。我通常在带有 GPU 的机器上训练估计器,然后使用 pickle 保存它们以供以后评估。如果我尝试在模型存储在 GPU 上的机器上加载估计器,我会收到以下错误:

RuntimeError:尝试反序列化 CUDA 设备上的对象,但 torch.cuda.is_available() 为 False。如果您在仅 CPU 的机器上运行,请使用带有 map_location='cpu' 的 torch.load 将您的存储映射到 CPU。

以下是我的 PyTorch 模型和 sklearn 可兼容估计器的示例,以及我如何尝试保存和加载模型的示例。

PyTorch 模型

import torch.nn as nn
class _AutoEncoder(nn.Module):
    def __init__(self, input_dim, output_dim, encoder_dim=4):
        super(_AutoEncoder, self).__init__()
        hidden_dim = int( (input_dim+encoder_dim)/2 )
        layers = []
        layers.append( nn.Linear(input_dim, hidden_dim) )
        layers.append( nn.Linear(hidden_dim, encoder_dim) )
        self.encoder = nn.Sequential(*layers)
        layers = []
        layers.append( nn.Linear(encoder_dim, hidden_dim) )
        layers.append( nn.Linear(hidden_dim, output_dim) )
        self.decoder = nn.Sequential(*layers)

    def forward(self, X):
        return self.decoder( self.encoder( X ) )

sklearn Comptable Estimator

import warnings
import inspect
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn import utils as sk_utils

import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data

class AutoEncoder(BaseEstimator, TransformerMixin):
    def __init__(
        self,
        encoder_dim=4,
        n_epochs=200,
        batch_size=None,
        shuffle=True,
        use_cuda=False
    ):
        super(AutoEncoder, self).__init__()
        args, _, _, values = inspect.getargvalues(inspect.currentframe())
        values.pop("self")
        for arg, val in values.items():
            setattr(self, arg, val)

        if use_cuda:
            if torch.cuda.is_available():
                self.device = torch.device("cuda")
            else:
                self.device = torch.device("cpu")
                warnings.warn("cuda not avaliable", UserWarning)
        else:
            self.device = torch.device("cpu")

    def fit(self, X, y=None):
        # X, y = sk_utils.check_X_y(X, y, ensure_2d=False, allow_nd=True)
        self._model = self._train_classifier(X, y)
        return self

    def transform(self, X):
        sk_utils.validation.check_is_fitted(self, ['_model'])
        X = sk_utils.check_array(X)
        X = torch.from_numpy(X.astype(np.float32)).to(self.device)
        with torch.no_grad():
            self._model.eval()
            output = self._model.forward( X )
            return output.cpu().numpy()

    def encode(self, X):
        sk_utils.validation.check_is_fitted(self, ['_model'])
        X = sk_utils.check_array(X)
        X = torch.from_numpy(X.astype(np.float32)).to(self.device)
        with torch.no_grad():
            self._model.eval()
            output = self._model.encoder( X )
            return output.cpu().numpy()

    def decode(self, X):
        sk_utils.validation.check_is_fitted(self, ['_model'])
        X = sk_utils.check_array(X)
        X = torch.from_numpy(X.astype(np.float32)).to(self.device)
        with torch.no_grad():
            self._model.eval()
            output = self._model.decoder( X )
            return output.cpu().numpy()

    def _train_classifier(self, x_train, y_train):
        x_train = torch.from_numpy(x_train.astype(np.float32)).to(self.device)
        y_train = torch.from_numpy(y_train.astype(np.float32)).to(self.device)
        input_dim = x_train.shape[-1]
        output_dim = y_train.shape[-1]
        model = _AutoEncoder(input_dim, output_dim, encoder_dim=self.encoder_dim).to(self.device)
        loss_function = nn.MSELoss()
        optimizer = optim.Adam(model.parameters())
        print model
        if self.batch_size is None:
            return self._batch_train_simple_classifier(x_train, y_train, model, loss_function, optimizer)
        else:
            return self._minibatch_train_simple_classifier(x_train, y_train, model, loss_function, optimizer)

    def _batch_train_simple_classifier(self, x_train, y_train, model, loss_function, optimizer):
        for epoch in range(1, self.n_epochs+1):
            model.train()
            optimizer.zero_grad()
            outputs = model.forward(x_train)
            loss = loss_function(outputs, y_train)
            loss.backward()
            optimizer.step()
            if epoch % 10 == 0 or epoch == self.n_epochs:
                message = "Train Epoch: {:5d},    Loss: {:15.6f}".format(
                    epoch,
                    loss.item()
                )
                print message
        return model

    def _minibatch_train_simple_classifier(self, x_train, y_train, model, loss_function, optimizer):
        train_data = torch.utils.data.TensorDataset(x_train, y_train)
        train_loader = torch.utils.data.DataLoader(train_data, batch_size=self.batch_size, shuffle=self.shuffle)
        for epoch in range(1, self.n_epochs+1):
            for data, target in train_loader:
                model.train()
                optimizer.zero_grad()
                outputs = model.forward(data)
                loss = loss_function(outputs, target)
                loss.backward()
                optimizer.step()
            if epoch % 10 == 0 or epoch == self.n_epochs:
                model.eval()
                outputs = model.forward(x_train)
                loss = loss_function(outputs, y_train)
                message = "Train Epoch: {:5d},    Loss: {:15.6f}".format(
                    epoch,
                    loss.item()
                )
                print message
        return model

训练

这通常在带有 GPU 的机器上完成。

from sklearn import datasets as sk_datasets
digits = sk_datasets.load_digits(n_class=10, return_X_y=False)
data = digits.data

ae = AutoEncoder(
    encoder_dim=2,
    n_epochs=100,
    batch_size=128,
    shuffle=True,
    use_cuda=True
)

data_fitted = ae.fit_transform(data, data)

保存估算器

我想找到一种方法,在无需显式调用的情况下将 PyTorch 模型保存到 CPU 中。也许作为 AutoEncoder 类的一部分的函数在被腌制时被调用?

with open("autoencoder.pkl", "wb") as fp:
    # ae._model needs to be moved to the CPU here.
    # I don't want to have to call ae._model.cpu() explicitly
    pickle.dump(ae, fp)

正在加载

如果在 PyTorch 模型仍在 GPU 上时保存估计器,我无法弄清楚如何在没有 GPU 的机器上加载估计器。

# This gives an error if the model was saved while on the GPU,
# and a GPU is not avaiable when loading.
with open("autoencoder.pkl", "rb") as fp:
    model = pickle.load(fp)

# This also a similar error. I also would not expect this to
# work since the pickle file contains an sklearn estimator
# wrapping a PyTorch model.
with open("autoencoder.pkl", "rb") as fp:
    touch.load(fp, map_location="cpu")

标签: pythonscikit-learnpicklepytorch

解决方案


推荐阅读