首页 > 解决方案 > Pybind11 内存似乎在调用之间发生了变化

问题描述

我正在使用 pybind11 通过 pybind11 将 scipy 的 csr_matrix 转换为 C++ 对象。为此,我定义了一个Vector类,它是从 numpy 、 和 字段转换data而来indicesindptr

template<typename T>
class Vector {
 public:
  T *data;
  const std::array<ssize_t, 1> shape;

  Vector() = delete;
  Vector(T *data, const std::array<ssize_t, 1> shape) : data(data), shape(shape) {};
};

后来,我使用执行此操作的模板函数注册了该类的几个实例。

py::class_<Vector<T>>(m, "...Vector", py::buffer_protocol())
    .def("__init__", [](Vector<T> &v, py::array_t<T, py::array::c_style | py::array::forcecast> data) {
      py::buffer_info info = data.request();
      if (info.ndim != 1) throw std::invalid_argument("must be a 1d array!");
      std::array<ssize_t, 1> shape_ = {info.shape[0]};
      new(&v) Vector<T>(static_cast<T *>(info.ptr), shape_);
    })
    .def_buffer([](Vector<T> &v) -> py::buffer_info {
      return py::buffer_info(
          v.data, sizeof(T), py::format_descriptor<T>::format(), 1, v.shape, {sizeof(T)}
      );
    });

所以向量类工作正常,但是,我然后定义一个csr_matrix像这样的类

template<typename T>
class csr_matrix {
 public:
  Vector<T> data;
  Vector<ssize_t> indices;
  Vector<ssize_t> indptr;
  const std::array<ssize_t, 2> shape;

  csr_matrix() = delete;
  csr_matrix(Vector<T>& data, Vector<ssize_t>& indices, Vector<ssize_t>& indptr, const std::array<ssize_t, 2>& shape)
      : data(data), indices(indices), indptr(indptr), shape(shape) {}
};

然后以与使用模板相同的方式注册它Vector,因此我可以为浮点数、双精度数和整数注册 csr_matrices。

py::class_<csr_matrix<T>>(m, "...csr_matrix"))
    .def("__init__", [](
        csr_matrix<T> &matrix,
        py::array_t<T, py::array::c_style> data,
        py::array_t<ssize_t, py::array::c_style> indices,
        py::array_t<ssize_t, py::array::c_style> indptr,
        py::array_t<ssize_t, py::array::c_style | py::array::forcecast> shape
    ) {
      py::buffer_info data_info = data.request();
      py::buffer_info indices_info = indices.request();
      py::buffer_info indptr_info = indptr.request();

      // ... some validity checks

      auto vec_data = new Vector<T>(static_cast<T *>(data_info.ptr), {data_info.shape[0]});
      auto vec_indices = new Vector<ssize_t>(static_cast<ssize_t *>(indices_info.ptr), {indices_info.shape[0]});
      auto vec_indptr = new Vector<ssize_t>(static_cast<ssize_t *>(indptr_info.ptr), {indptr_info.shape[0]});
      std::array<ssize_t, 2> shape_ = {*shape.data(0), *shape.data(1)};
      new(&matrix) csr_matrix<T>(*vec_data, *vec_indices, *vec_indptr, shape_);
    })
    .def_readonly("data", &csr_matrix<T>::data)
    .def_readonly("indices", &csr_matrix<T>::indices)
    .def_readonly("indptr", &csr_matrix<T>::indptr);

现在,我用 Python 编写了一个简单的单元测试,以确保一切正常,并且我得到了最令人困惑的错误

x = sp.csr_matrix([
    [0, 0, 1, 2],
    [0, 0, 0, 0],
    [3, 0, 4, 0],
], dtype=np.float32)
cx = matrix.Float32CSRMatrix(x.data, x.indices, x.indptr, x.shape)
np.testing.assert_equal(x.data, np.asarray(cx.data, dtype=np.float32))
np.testing.assert_equal(x.indices, np.asarray(cx.indices, dtype=np.uint64))
np.testing.assert_equal(x.indptr, np.asarray(cx.indptr, dtype=np.uint64))  # fails here!

我在每一行之后打印了几张照片,这就是输出

print(1, np.asarray(cx.data, dtype=np.float32), np.asarray(cx.indices, dtype=np.uint64), np.asarray(cx.indptr, dtype=np.uint64))
#  data         indices   indptr
- [1. 2. 3. 4.] [2 3 0 2] [0 2 2 4]  # original, python object
0 [1. 2. 3. 4.] [2 3 0 2] [0 2 2 4]  # after matrix.Float32CSRMatrix(...)
1 [1. 2. 3. 4.] [2 3 0 2] [0 2 2 4]  # after assert on data
2 [1. 2. 3. 4.] [2 3 0 2] [2 3 0 2]  # after assert on indices
# Fails at assert on indptr!

所以在某个地方,某些东西改变了indptrinto的值indices,我不知道是什么以及在哪里。更令人困惑的是,如果我更改断言的顺序,以便在索引之前检查 indptr,就像这样

np.testing.assert_equal(x.data, np.asarray(cx.data, dtype=np.float32))
np.testing.assert_equal(x.indptr, np.asarray(cx.indptr, dtype=np.uint64))
np.testing.assert_equal(x.indices, np.asarray(cx.indices, dtype=np.uint64))  # fails here!

那么这是输出

- [1. 2. 3. 4.] [2 3 0 2] [0 2 2 4]
0 [1. 2. 3. 4.] [2 3 0 2] [0 2 2 4]
1 [1. 2. 3. 4.] [2 3 0 2] [0 2 2 4]
2 [1. 2. 3. 4.] [0 2 2 4] [0 2 2 4]
# Now fails at assert indices; we do the assert on indptr before, and it passes

所以现在,它indices被覆盖了indptr,而不是相反。我已经把头撞在墙上已经一天多了,我不知道发生了什么。对象生命周期不是问题,向量在开始时构建并在 csr_matrix 消失时销毁。我已经在这里粘贴了所有相关代码,我没有做任何可能导致这种情况的事情。

任何和所有的帮助将不胜感激。

标签: c++pybind11

解决方案


推荐阅读