首页 > 解决方案 > 有没有一个技巧可以让 GCC 优化掉多余的指令?

问题描述

编译gcc -mcpu=cortex-m0 -mthumb -Os 会发出冗余指令,如本说明性示例中所示:

void memzero(void* p, int n)
{
    n -= 4;
    do
    {
        *(int*)((char*)p + n) = 0;
        n -= 4;
    }
    while(n > 0);
}

结果是:

memzero:
  movs r3, #0
  subs r1, r1, #4
.L2:
  str r3, [r0, r1]
  subs r1, r1, #4
  cmp r1, #0
  bgt .L2
  bx lr

显然,显式比较本质上是一个 nop。有没有办法打开更多优化来解决这个问题?

标签: cgccarm

解决方案


删除比较会改变函数的行为。

如果.BGT则指令跳转Z == 0 and N == V。这在n溢出时很重要。

考虑使用n = -2147483644(如果int是 32 位)调用该函数:

memzero:
        movs    r3, #0
        subs    r1, r1, #4    ; n = -2147483648
.L2:
        str     r3, [r0, r1]
        subs    r1, r1, #4    ; n = 2147483644, Z = 0, N = 0, V = 1
        ;cmp     r1, #0       ; (would set Z = 0, N = 0, V = 0)
        bgt     .L2           ; doesn't jump, even though n is positive
        bx      lr

如果我们测试,优化就会起作用,n >= 0因为如果有一条指令会跳转N == 0

memzero:
        movs    r3, #0
        subs    r1, r1, #4
.L2:
        str     r3, [r0, r1]
        subs    r1, r1, #4
        bpl     .L2
        bx      lr

测试程序

#include <stdio.h>
#include <limits.h>

__attribute__((noinline)) int with_cmp(int n) {
    asm("L1:\n\t"
        "subs    %[n], #4\n\t"
        "cmp     %[n], #0\n\t"
        "bgt     L1"
        : [n] "+r" (n));
    return n;
}

__attribute__((noinline)) int without_cmp(int n) {
    asm("L2:\n\t"
        "subs    %[n], #4\n\t"
        "bgt     L2"
        : [n] "+r" (n));
    return n;
}

int main() {
    printf("with cmp: %d\nwithout cmp: %d\n", with_cmp(INT_MIN), without_cmp(INT_MIN));
}

输出:

with cmp: 0              // loops as long as n > 0
without cmp: 2147483644  // immediately returns with positive n

推荐阅读