首页 > 解决方案 > 通过 move_pages() 查询失败

问题描述

#include <cstdint>
#include <iostream>
#include <numaif.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include <limits>

int main(int argc, char** argv) {
    const constexpr uint64_t size = 16lu * 1024 * 1024;
    const constexpr uint32_t nPages = size / (4lu * 1024 * 1024);
    int32_t status[nPages];
    std::fill_n(status, nPages, std::numeric_limits<int32_t>::min());
    void* pages[nPages];

    auto fd = shm_open("test_shm", O_RDWR|O_CREAT, 0666);
    void* ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);

    if (ptr == MAP_FAILED) {
        if (fd > 0) close(fd);
        throw "failed to map hugepages";
    }

    for (uint32_t i = 0; i < nPages; i++) {
        pages[i] = (char*)ptr + 4 * 1024 * 1024;
    }

    if (0 != move_pages(0, nPages, pages, nullptr, status, 0)) {
        std::cout << "failed to inquiry pages because " << strerror(errno) << std::endl;
    }
    else {
            for (uint32_t i = 0; i < nPages; i++) {
            std::cout << "page # " << i << " locates at numa node " << status[i] << std::endl;
        }
    }
    munmap(ptr, size);
    close(fd);
}

它打印:

page # 0 locates at numa node -2
page # 1 locates at numa node -2
page # 2 locates at numa node -2
page # 3 locates at numa node -2

根据手册页,它指出:

nodes is an array of integers that specify the desired location for each page.
Each element in the array is a node number. nodes can also be NULL, in which 
case move_pages() does not move any pages but instead will return the node where 
each page currently resides, in the status array. Obtaining the status of each 
page may be necessary to determine pages that need to be moved.

为什么查询返回成功却打印负值?我的机器只有 2 个 NUMA——0 和 1。

内核版本:3.10.0-862.2.3.el7.x86_64

这是大页面的版本:

#include <cstdint>
#include <iostream>
#include <numaif.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include <limits>

int main(int argc, char** argv) {
        const int32_t dst_node = strtoul(argv[1], nullptr, 10);
        const constexpr uint64_t size = 4lu * 1024 * 1024;
        const constexpr uint64_t pageSize = 2lu * 1024 * 1024;
        const constexpr uint32_t nPages = size / pageSize;
        int32_t status[nPages];
        std::fill_n(status, nPages, std::numeric_limits<int32_t>::min());
        void* pages[nPages];
        int32_t dst_nodes[nPages];
        void* ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE | MAP_HUGETLB, -1, 0);

        if (ptr == MAP_FAILED) {
                throw "failed to map hugepages";
        }
        memset(ptr, 0x41, nPages*pageSize);
        for (uint32_t i = 0; i < nPages; i++) {
                pages[i] = &((char*)ptr)[i*pageSize];
                dst_nodes[i] = dst_node;
        }

        std::cout << "Before moving" << std::endl;

        if (0 != move_pages(0, nPages, pages, nullptr, status, 0)) {
            std::cout << "failed to inquiry pages because " << strerror(errno) << std::endl;
        }
        else {
                for (uint32_t i = 0; i < nPages; i++) {
                        std::cout << "page # " << i << " locates at numa node " << status[i] << std::endl;
                }
        }

        // real move
        if (0 != move_pages(0, nPages, pages, dst_nodes, status, MPOL_MF_MOVE_ALL)) {
                std::cout << "failed to move pages because " << strerror(errno) << std::endl;
                exit(-1);
        }

        const constexpr uint64_t smallPageSize = 4lu * 1024;
        const constexpr uint32_t nSmallPages = size / smallPageSize;
        void* smallPages[nSmallPages];
        int32_t smallStatus[nSmallPages] = {std::numeric_limits<int32_t>::min()};
        for (uint32_t i = 0; i < nSmallPages; i++) {
                smallPages[i] = &((char*)ptr)[i*smallPageSize];
        }


        std::cout << "after moving" << std::endl;
        if (0 != move_pages(0, nSmallPages, smallPages, nullptr, smallStatus, 0)) {
            std::cout << "failed to inquiry pages because " << strerror(errno) << std::endl;
        }
        else {
                for (uint32_t i = 0; i < nSmallPages; i++) {
                        std::cout << "page # " << i << " locates at numa node " << smallStatus[i] << std::endl;
                }
        }

}

有趣的是,它move_pages()似乎可以理解大页面,因为在大页面移动后,我根据小页面大小进行查询,并填充预期的 NUMA ID。

标签: c++linuxnuma

解决方案


您对 shm_open 和 mmap 的使用可能不会获得您想要的大页面。

move_pages系统调用(和 libnuma 包装器)适用于 x86_64 的 4096 字节标准页面。

并且您move_pages以错误的方式使用不正确的第三个参数“页面”。它不应该是指向内存的指针;但是指向本身将包含 nPages 指针的数组的指针:

http://man7.org/linux/man-pages/man2/move_pages.2.html

  long move_pages(int pid, unsigned long count, void **pages,
                   const int *nodes, int *status, int flags);

   pages is an array of pointers to the pages that should be moved.
   These are pointers that should be aligned to page boundaries.
   Addresses are specified as seen by the process specified by pid.

如果“页面”中没有正确的指针,您将得到 -14,根据errno 14(来自 moreutils 包),这是 EFAULT。

//https://stackoverflow.com/questions/54546367/fail-to-query-via-move-pages
//g++ 54546367.move_pages.cc -o 54546367.move_pages -lnuma -lrt
#include <cstdint>
#include <iostream>
#include <numaif.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include <limits>

int main(int argc, char** argv) {
    const constexpr uint64_t size = 256lu * 1024;// * 1024;
    const constexpr uint32_t nPages = size / (4lu * 1024);
    void * pages[nPages];
    int32_t status[nPages];
    std::fill_n(status, nPages, std::numeric_limits<int32_t>::min());

//  auto fd = shm_open("test_shm", O_RDWR|O_CREAT, 0666);
//  void* ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
    void* ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
    std::cout << "Ptr is " << ptr << std::endl;
    if (ptr == MAP_FAILED) {
//      if (fd > 0) close(fd);
        throw "failed to map hugepages";
    }
    memset(ptr, 0x41, nPages*4096);
    for(uint32_t i = 0; i<nPages; i++) {
        pages[i] = &((char*)ptr)[i*4096];
    }

    if (0 != move_pages(0, nPages, pages, nullptr, status, 0)) {
        std::cout << "failed to inquiry pages because " << strerror(errno) << std::endl;
    }
    else {
        for (uint32_t i = 0; i < nPages; i++) {
            std::cout << "page # " << i << " locates at numa node " << status[i] << std::endl;
        }
    }
    munmap(ptr, size);
//  close(fd);
}

使用 NUMA 机器,它在启动时输出相同的节点,taskset -c 7 ./54546367.move_pages并且在 时交错 (0 1 0 1) numactl -i all ./54546367.move_pages


推荐阅读