首页 > 解决方案 > 在将 XGETBV 的结果用于 XSETBV 之前,我应该还是不应该掩盖它们?

问题描述

我正在尝试执行一些 UEFI 应用程序。

我发现此代码在 VirtualBox 上崩溃(test success打印时test start未打印):

#include <stdint.h>

void* ConOut;
uint64_t (*OutputString)(void* protocol, void* string);

void printChar(int c) {
    unsigned char data[4] = { (unsigned char)c };
    if (c == '\n') printChar('\r');
    OutputString(ConOut, data);
}

void printString(const char* str) {
    while (*str != '\0') printChar((unsigned char)*(str++));
}

void entry(void* unused, uint64_t* table) {
    (void)unused;

    ConOut = (void*)table[8];
    OutputString = (uint64_t (*)(void*, void*))((uint64_t*)ConOut)[1];

    printString("waiting for breakpoint set...\n");
    {
        volatile int j;
        for (j = 0; j < 1000000000; j++);
    }

    printString("test start\n");

    __asm__ __volatile__ (
        /* marker for setting breakpoint */
        "cmp $0xdeadbeef, %%eax\n\t"
        /* turn on OSXSAVE */
        "mov %%cr4, %%rax\n\t"
        "or $0x40000, %%rax\n\t"
        "mov %%rax, %%cr4\n\t"
        /* read XCR[0] */
        "xor %%eax, %%eax\n\t"
        "xor %%edx, %%edx\n\t"
        "xor %%ecx, %%ecx\n\t"
        "xgetbv\n\t"
        /* write XCR[0] */
        "xsetbv\n\t"
    : : : "%eax", "%ecx", "%edx");
    
    printString("test success\n");

    for (;;) __asm__ __volatile__ ("cli\n\thlt\n\t");
}

编译命令:

C:\MyInstalledApps\TDM-GCC-64\bin\gcc -Wall -Wextra -nostdlib -e entry -m64 -Wl,--subsystem=10 minimum_test.c -o minimum_test.efi

通过我的检查,我发现EDX:EAX设置为00000000:0000001f通过xgetbv指令并xsetbv导致#GP(中断向量 13)错误看到该值。

xgetbv奇怪的是,当我通过踩踏 VirtualBox执行指令时,它设置EDX:EAX00000000:00000001,因此没有发生故障并被test success打印。

参考Intel® 64 and IA-32 Architectures Software Developer Manuals,我发现它是这样描述 XGETBV 的:

如果在读取的 XCR 中实现的位少于 64 位,则返回到 EDX:EAX 的未实现位位置的值是未定义的。

然后,关于 XSETBV:

保护模式异常
#GP(0)
如果当前权限级别不为 0。
如果在 ECX 中指定了无效的 XCR。
如果 EDX:EAX 中的值设置了 ECX 指定的 XCR 中保留的位。
如果尝试清除 XCR0 的位 0。
如果尝试将 XCR0[2:1] 设置为 10b。

这种情况是根据EDX:EAX值设置保留位。由于从 XGETBV 返回的未实现位的值是未定义的,因此在将 XGETBV 的结果传递给 XSETBV 之前屏蔽它们似乎是合理的。用于掩码的值可以通过EAX=0x0D, ECX=0 的 CPUID获得。添加一些代码以应用屏蔽后,XSETBV 在 VirtualBox 上运行良好。

另一方面,英特尔手册也这样说 XSETBV:

XCR 中的未定义或保留位应设置为先前读取的值。

这看起来应该将保留位设置为通过 XGETBV 获得的值,并且我不应该应用 maskimg 来强制位变为零。

作为结论,在将 XGETBV 的结果传递给 XSETBV 之前,我应该或不应该通过 CPUID 获得的有效位来掩盖 XGETBV 的结果?


我发现相关但不重复的问题:


主机环境:

来宾(VM)环境:


完整的测试代码:

#include <stdint.h>

void* ConOut;
uint64_t (*OutputString)(void* protocol, void* string);

void printChar(int c) {
    unsigned char data[4] = { (unsigned char)c };
    if (c == '\n') printChar('\r');
    OutputString(ConOut, data);
}

void printString(const char* str) {
    while (*str != '\0') printChar((unsigned char)*(str++));
}

void printInt(uint64_t value, int radix, int minDigits) {
    char vStr[128] = "";
    char* pStr = vStr + 120;
    int digits = 0;
    do {
        *(pStr--) = "0123456789ABCDEF"[value % radix];
        value /= radix;
        digits++;
    } while (value > 0 || digits < minDigits);
    printString(pStr + 1);
}

void stop(void) {
    __asm__ __volatile__(
        "cli\n\t"
        "1:\n\t"
        "hlt\n\t"
        "jmp 1b\n\t"
    );
}

void entry(void* unused, uint64_t* table) {
    uint32_t eax, ebx, ecx, edx, cs, cr0, xcr0_low, xcr0_high;
    uint32_t cpuid_max, eax_mask, edx_mask;
    unsigned char src_test[32], dst_test[32] = {0};
    int i;
    (void)unused;

    ConOut = (void*)table[8];
    OutputString = (uint64_t (*)(void*, void*))((uint64_t*)ConOut)[1];

    __asm__ __volatile__ (
        "xor %%eax, %%eax\n\t"
        "cpuid\n\t"
    : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx));
    printString("CPUID.00H: EAX=0x"); printInt(eax, 16, 8);
    printString(", EBX=0x"); printInt(ebx, 16, 8);
    printString(", ECX=0x"); printInt(ecx, 16, 8);
    printString(", EDX=0x"); printInt(edx, 16, 8);
    printChar('\n');
    if (eax < 1) {
        printString("CPUID.01H not supported!\n");
        stop();
    }
    cpuid_max = eax;

    __asm__ __volatile__ (
        "mov $1, %%eax\n\t"
        "cpuid\n\t"
    : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx));
    printString("CPUID.01H: EAX=0x"); printInt(eax, 16, 8);
    printString(", EBX=0x"); printInt(ebx, 16, 8);
    printString(", ECX=0x"); printInt(ecx, 16, 8);
    printString(", EDX=0x"); printInt(edx, 16, 8);
    printChar('\n');
    if (!((ecx >> 26) & 1)) {
        printString("xsave (ECX[26]) not supported!\n");
        stop();
    }

    if (cpuid_max >= 0x0D) {
        __asm__ __volatile__ (
            "mov $0xd, %%eax\n\t"
            "xor %%ecx, %%ecx\n\t"
            "cpuid\n\t"
        : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx));
        printString("CPUID.0DH: EAX=0x"); printInt(eax, 16, 8);
        printString(", EBX=0x"); printInt(ebx, 16, 8);
        printString(", ECX=0x"); printInt(ecx, 16, 8);
        printString(", EDX=0x"); printInt(edx, 16, 8);
        printChar('\n');
        eax_mask = eax;
        edx_mask = edx;
    } else {
        printString("CPUID.0DH not supported\n");
        eax_mask = UINT32_C(0xffffffff);
        edx_mask = UINT32_C(0xffffffff);
    }

    __asm__ __volatile__ (
        "mov %%cs, %%ax\n\t"
        "movzwl %%ax, %0\n\t"
        "mov %%cr0, %%rax\n\t"
    : "=g"(cs), "=a"(cr0));
    printString("CPL check: CS=0x"); printInt(cs, 16, 4);
    printString(", CR0=0x"); printInt(cr0, 16, 8);
    printChar('\n');
    if (!cr0 & 1) {
        printString("not in protected mode!\n");
        stop();
    }
    if ((cs & 3) != 0) {
        printString("CPL is not zero!\n");
        stop();
    }

    printString("waiting for breakpoint set...\n");
    {
        volatile int j;
        for (j = 0; j < 1000000000; j++);
    }

    printString("turning on OSXSAVE\n");
    __asm__ __volatile__ (
        /* turn on OSXSAVE */
        "mov %%cr4, %%rax\n\t"
        "or $0x40000, %%rax\n\t"
        "mov %%rax, %%cr4\n\t"
    : : : "%eax");

    __asm__ __volatile__ (
        /* marker for setting breakpoint */
        "cmp $0xdeadbeef, %%eax\n\t"
        /* read XCR[0] */
        "xor %%eax, %%eax\n\t"
        "xor %%edx, %%edx\n\t"
        "xor %%ecx, %%ecx\n\t"
        "xgetbv\n\t"
    : "=a"(xcr0_low), "=d"(xcr0_high) : : "%ecx", "cc");
    printString("XCR[0] = ");
    printInt(xcr0_high, 16, 8); printChar(':');
    printInt(xcr0_low, 16, 8); printChar('\n');

    xcr0_low |= 6;

#if 0
    printString("applying mask\n");
    xcr0_low &= eax_mask;
    xcr0_high &= edx_mask;
#else
    (void)eax_mask; (void)edx_mask;
#endif

    printString("new XCR[0] will be: ");
    printInt(xcr0_high, 16, 8); printChar(':');
    printInt(xcr0_low, 16, 8); printChar('\n');

    printString("turning on AVX\n");
    __asm__ __volatile__ (
        /* marker for setting breakpoint */
        "cmp $0xdeadbeef, %%ecx\n\t"
        /* turn on AVX */
        "xor %%ecx, %%ecx\n\t"
        "xsetbv\n\t"
    : : "a"(xcr0_low), "d"(xcr0_high) : "%ecx", "cc");

    for (i = 0; i < 32; i++) src_test[i] = 123 * (i + 1);
    printString("testing AVX instruction\n");
    printString("src:\n");
    for (i = 0; i < 32; i++) {
        printInt(src_test[i], 16, 2);
        printChar((i + 1) % 16 == 0 ? '\n' : ' ');
    }
    printString("dest before:\n");
    for (i = 0; i < 32; i++) {
        printInt(dst_test[i], 16, 2);
        printChar((i + 1) % 16 == 0 ? '\n' : ' ');
    }
    __asm__ __volatile__ (
        "vmovups (%0), %%ymm0\n\t"
        "vmovups %%ymm0, (%1)\n\t"
    : : "r"(src_test), "r"(dst_test));
    printString("dest after:\n");
    for (i = 0; i < 32; i++) {
        printInt(dst_test[i], 16, 2);
        printChar((i + 1) % 16 == 0 ? '\n' : ' ');
    }

    printString("test done.\n");
    stop();
}

VirtualBox 的输出:

CPUID.00H: EAX=0x00000016, EBX=0x756E6547, ECX=0x6C65746E, EDX=0x49656E69
CPUID.01H: EAX=0x000906ED, EBX=0x00010800, ECX=0x56DA220B, EDX=0x178BFBFF
CPUID.0DH: EAX=0x00000007, EBX=0x00000340, EDX=0x00000340, EDX=0x00000000
CPL check: CS=0x0038, CR0=0xC0010033
waiting for breakpoint set...
turning on OSXSAVE
XCR[0] = 00000000:0000001F
new XCR[0] will be: 00000000:0000001F
turning on AVX

直接在 PC 上执行程序时的输出:

CPUID.00H: EAX=0x00000016, EBX=0x756E6547, ECX=0x6C65746E, EDX=0x49656E69
CPUID.01H: EAX=0x000906ED, EBX=0x00100800, ECX=0x77FAFBBF, EDX=0xBFEBFBFF
CPUID.0DH: EAX=0x0000001F, EBX=0x00000240, ECX=0x00000440, EDX=0x00000000
CPL check: CS=0x0038, CR0=0x80000013
waiting for breakpoint set...
turning on OSXSAVE
XCR[0] = 00000000:00000001
new XCR[0] will be: 00000000:00000007
turning on AVX
testing AVX instruction
src:
7B F6 71 EC 67 E2 5D D8 53 CE 49 C4 3F BA 35 B0
2B A6 21 9C 17 92 0D 88 03 7E F9 74 EF 6A E5 60
dest before:
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
dest after:
7B F6 71 EC 67 E2 5D D8 53 CE 49 C4 3F BA 35 B0
2B A6 21 9C 17 92 0D 88 03 7E F9 74 EF 6A E5 60
test done.

标签: x86x86-64avxbare-metal

解决方案


推荐阅读