首页 > 解决方案 > macOS 上使用 mach_absolute_time 的 NASM x64 - 无法获得纳秒(工作代码审查)

问题描述

我只是在学习 NASM,如果我犯了一些明显的错误,我很抱歉,但我不明白我做错了什么。

请查看下面的代码,让我知道什么是不正确的。它编译并运行正常,但结果打印垃圾。我知道来自的信息mach_absolute_time取决于硬件,因此需要使用来自 struct 的信息进行调整mach_timebase_info

我创建了以下测试程序,人为地执行需要 1 秒。它打印开始、结束和经过的绝对马赫时间信息(奇怪的是,在我的机器上显示正确的纳秒量)。但是计算出的纳秒是垃圾 - 可能与我在数学/使用 xmm 寄存器和数据大小时犯的一些错误有关,但为了我的爱,我无法弄清楚。谢谢您的帮助!

示例运行: 在此处输入图像描述

; ----------------------------------------------------------------------------------------
; Testing mach_absolute_time
; nasm -fmacho64 mach.asm && gcc -o mach mach.o
; ----------------------------------------------------------------------------------------

    global      _main
    extern      _printf
    extern      _mach_absolute_time
    extern      _mach_timebase_info
    extern      _nanosleep
    default     rel

section .text

_main: 
    push        rbx                     ; aligns the stack x C calls

    ; start measurement
    call        _mach_absolute_time     ; get the absolute time hardware dependant
    mov         [start], rax            ; save start in start
    ; print start
    lea         rdi, [time_absolute]
    mov         rsi, rax
    call        _printf

    ; do some time intensive stuff - This simulates 1 sec work
    lea         rdi, [timeval]
    call        _nanosleep

    ; end measurement 
    call        _mach_absolute_time
    mov         [end], rax
    ; print end
    lea         rdi, [time_absolute]
    mov         rsi, rax
    call        _printf

    ; calc elapsed
    mov         r10d, [end]
    mov         r11d, [start]
    sub         r10d, r11d              ; r10d = end - start
    mov         [diff], r10d            ; copy to diff
    mov         rax, [diff]             ; diff to rax to print as int
    cvtsi2ss    xmm2, r10d              ; diff to xmm2 to calc nanoseconds
    ; print elapsed
    lea         rdi, [diff_absolute]
    mov         rsi, rax
    call        _printf

    ; get conversion factor to get nanoseconds and store numerator and denominator
    ; in xmm0 and xmm1
    lea         rdi, [timebase_info]
    call        _mach_timebase_info     ; get conversion factor to nanoseconds
    movss       xmm0, [numer]
    movss       xmm1, [denom]
    ; print numerator & denominator as float to ensure I am getting the info into xmm regs
    lea         rdi, [time_base]
    mov         rax, 2
    call        _printf

    ; calc nanoseconds - xmm0 ends with nanoseconds
    mulss       xmm0, xmm2              ; multiply elapsed * numerator
    divss       xmm0, xmm1              ; divide by the denominator
    ; print nanoseconds as float
    lea         rdi, [nanosecs_calc]
    mov         rax, 1                  ; 1 non-int argument
    call        _printf

    pop         rbx                     ; undoes the stack alignment push
    ret

section .data

; _mach_timebase_info call struct 
timebase_info:
    numer      db 8
    denom      db 8

; lazy way to set up 1 sec wait
timeval:
    tv_sec      dq 1
    tv_usec     dq 0

time_absolute:  db "mach_absoute_time: %ld", 10, 0
diff_absolute:  db "absoute_time diff: %ld", 10, 0
time_base:      db "numerator: %g, denominator: %g", 10, 0
nanosecs_calc:  db "calc nanoseconds:  %ld", 10, 0
; using %g format also prints garbage
; nanosecs_calc:  db "calc nanoseconds:  %g", 10, 0

; should use registers but for clarity
start:          dq 0
end:            dq 0
diff:           dq 0

标签: macostimenasm

解决方案


编辑:我知道出了什么问题。xmm reges 在 c 调用后被清除,这就是乘法和结果失败的原因。无论如何,下面获得时基比率的 C 解决方法可以正常工作,下面是测试的完整代码。

解决方法是从一个简短的 C 函数中获取比率mach_timebase_info,并将其与结果相乘mach_absolute_time以获得纳秒。

正如在我的实际硬件(2013 年末 MBP 2.3 i7)中所怀疑的那样,mach_absolute_time已经返回纳秒,因此 C 打印的因子是 1.000。(时基分子 = 1,时基分母 = 1)

#include <stdio.h>
#include <mach/mach_time.h>

double timebase() {
    double  ratio;
    mach_timebase_info_data_t tb;

    mach_timebase_info(&tb);
    ratio = tb.numer / tb.denom;
    printf("num: %u, den: %u\n", tb.numer, tb.denom);
    printf("ratio from C: %.3f\n", ratio);

    return ratio;
}

NASM:

    global      _main
    extern      _printf
    extern      _mach_absolute_time
    extern      _timebase
    extern      _nanosleep
    default     rel

section .text

_main: 
    push        rbx                     ; aligns the stack x C calls

    ; start measurement
    call        _mach_absolute_time     ; get the absolute time hardware dependant
    mov         [start], rax            ; save start in start
    ; print start
    lea         rdi, [time_absolute]
    mov         rsi, rax
    call        _printf

    ; do some time intensive stuff - This simulates 1 sec work
    lea         rdi, [timeval]
    call        _nanosleep

    ; end measurement 
    call        _mach_absolute_time
    mov         [end], rax
    ; print end
    lea         rdi, [time_absolute]
    mov         rsi, rax
    call        _printf

    ; calc elapsed
    mov         r10d, [end]
    mov         r11d, [start]
    sub         r10d, r11d              ; r10d = end - start
    mov         [diff], r10d            ; copy to diff
    mov         rax, [diff]             ; diff to rax to print as int 

    ; print elapsed
    lea         rdi, [diff_absolute]
    mov         rsi, [diff]
    call        _printf

    ; get conversion ratio from C function
    call        _timebase               ; get conversion ratio to nanoseconds into xmm0    
    cvtsi2sd    xmm1, [diff]            ; load diff from mach_absolute time in [diff]
                                        ; if you do it before register gets cleared
    ; calc nanoseconds - xmm0 ends with nanoseconds
    ; in my hardware ratio is 1.0 so mach_absolute_time = nanoseconds
    mulsd       xmm0, xmm1
    cvtsd2si    rax, xmm0                
    mov         [result], rax           ; save to result

    ; print nanoseconds as int
    lea         rdi, [nanosecs_calc]
    mov         rsi, [result]
    call        _printf

    pop         rbx                     ; undoes the stack alignment push
    ret

section .data

; lazy way to set up 1 sec wait
timeval:
    tv_sec      dq 1
    tv_usec     dq 0

time_absolute:  db "mach_absoute_time: %ld", 10, 0
diff_absolute:  db "absoute_time diff: %ld", 10, 0
nanosecs_calc:  db "nanoseconds:       %ld", 10, 0

; should use registers but for clarity
start:          dq 0
end:            dq 0
diff:           dq 0
result:         dq 0

推荐阅读