macos - macOS 上使用 mach_absolute_time 的 NASM x64 - 无法获得纳秒(工作代码审查)
问题描述
我只是在学习 NASM,如果我犯了一些明显的错误,我很抱歉,但我不明白我做错了什么。
请查看下面的代码,让我知道什么是不正确的。它编译并运行正常,但结果打印垃圾。我知道来自的信息mach_absolute_time
取决于硬件,因此需要使用来自 struct 的信息进行调整mach_timebase_info
。
我创建了以下测试程序,人为地执行需要 1 秒。它打印开始、结束和经过的绝对马赫时间信息(奇怪的是,在我的机器上显示正确的纳秒量)。但是计算出的纳秒是垃圾 - 可能与我在数学/使用 xmm 寄存器和数据大小时犯的一些错误有关,但为了我的爱,我无法弄清楚。谢谢您的帮助!
; ----------------------------------------------------------------------------------------
; Testing mach_absolute_time
; nasm -fmacho64 mach.asm && gcc -o mach mach.o
; ----------------------------------------------------------------------------------------
global _main
extern _printf
extern _mach_absolute_time
extern _mach_timebase_info
extern _nanosleep
default rel
section .text
_main:
push rbx ; aligns the stack x C calls
; start measurement
call _mach_absolute_time ; get the absolute time hardware dependant
mov [start], rax ; save start in start
; print start
lea rdi, [time_absolute]
mov rsi, rax
call _printf
; do some time intensive stuff - This simulates 1 sec work
lea rdi, [timeval]
call _nanosleep
; end measurement
call _mach_absolute_time
mov [end], rax
; print end
lea rdi, [time_absolute]
mov rsi, rax
call _printf
; calc elapsed
mov r10d, [end]
mov r11d, [start]
sub r10d, r11d ; r10d = end - start
mov [diff], r10d ; copy to diff
mov rax, [diff] ; diff to rax to print as int
cvtsi2ss xmm2, r10d ; diff to xmm2 to calc nanoseconds
; print elapsed
lea rdi, [diff_absolute]
mov rsi, rax
call _printf
; get conversion factor to get nanoseconds and store numerator and denominator
; in xmm0 and xmm1
lea rdi, [timebase_info]
call _mach_timebase_info ; get conversion factor to nanoseconds
movss xmm0, [numer]
movss xmm1, [denom]
; print numerator & denominator as float to ensure I am getting the info into xmm regs
lea rdi, [time_base]
mov rax, 2
call _printf
; calc nanoseconds - xmm0 ends with nanoseconds
mulss xmm0, xmm2 ; multiply elapsed * numerator
divss xmm0, xmm1 ; divide by the denominator
; print nanoseconds as float
lea rdi, [nanosecs_calc]
mov rax, 1 ; 1 non-int argument
call _printf
pop rbx ; undoes the stack alignment push
ret
section .data
; _mach_timebase_info call struct
timebase_info:
numer db 8
denom db 8
; lazy way to set up 1 sec wait
timeval:
tv_sec dq 1
tv_usec dq 0
time_absolute: db "mach_absoute_time: %ld", 10, 0
diff_absolute: db "absoute_time diff: %ld", 10, 0
time_base: db "numerator: %g, denominator: %g", 10, 0
nanosecs_calc: db "calc nanoseconds: %ld", 10, 0
; using %g format also prints garbage
; nanosecs_calc: db "calc nanoseconds: %g", 10, 0
; should use registers but for clarity
start: dq 0
end: dq 0
diff: dq 0
解决方案
编辑:我知道出了什么问题。xmm reges 在 c 调用后被清除,这就是乘法和结果失败的原因。无论如何,下面获得时基比率的 C 解决方法可以正常工作,下面是测试的完整代码。
解决方法是从一个简短的 C 函数中获取比率mach_timebase_info
,并将其与结果相乘mach_absolute_time
以获得纳秒。
正如在我的实际硬件(2013 年末 MBP 2.3 i7)中所怀疑的那样,mach_absolute_time
已经返回纳秒,因此 C 打印的因子是 1.000。(时基分子 = 1,时基分母 = 1)
#include <stdio.h>
#include <mach/mach_time.h>
double timebase() {
double ratio;
mach_timebase_info_data_t tb;
mach_timebase_info(&tb);
ratio = tb.numer / tb.denom;
printf("num: %u, den: %u\n", tb.numer, tb.denom);
printf("ratio from C: %.3f\n", ratio);
return ratio;
}
NASM:
global _main
extern _printf
extern _mach_absolute_time
extern _timebase
extern _nanosleep
default rel
section .text
_main:
push rbx ; aligns the stack x C calls
; start measurement
call _mach_absolute_time ; get the absolute time hardware dependant
mov [start], rax ; save start in start
; print start
lea rdi, [time_absolute]
mov rsi, rax
call _printf
; do some time intensive stuff - This simulates 1 sec work
lea rdi, [timeval]
call _nanosleep
; end measurement
call _mach_absolute_time
mov [end], rax
; print end
lea rdi, [time_absolute]
mov rsi, rax
call _printf
; calc elapsed
mov r10d, [end]
mov r11d, [start]
sub r10d, r11d ; r10d = end - start
mov [diff], r10d ; copy to diff
mov rax, [diff] ; diff to rax to print as int
; print elapsed
lea rdi, [diff_absolute]
mov rsi, [diff]
call _printf
; get conversion ratio from C function
call _timebase ; get conversion ratio to nanoseconds into xmm0
cvtsi2sd xmm1, [diff] ; load diff from mach_absolute time in [diff]
; if you do it before register gets cleared
; calc nanoseconds - xmm0 ends with nanoseconds
; in my hardware ratio is 1.0 so mach_absolute_time = nanoseconds
mulsd xmm0, xmm1
cvtsd2si rax, xmm0
mov [result], rax ; save to result
; print nanoseconds as int
lea rdi, [nanosecs_calc]
mov rsi, [result]
call _printf
pop rbx ; undoes the stack alignment push
ret
section .data
; lazy way to set up 1 sec wait
timeval:
tv_sec dq 1
tv_usec dq 0
time_absolute: db "mach_absoute_time: %ld", 10, 0
diff_absolute: db "absoute_time diff: %ld", 10, 0
nanosecs_calc: db "nanoseconds: %ld", 10, 0
; should use registers but for clarity
start: dq 0
end: dq 0
diff: dq 0
result: dq 0
推荐阅读
- jquery - 无法下载与 href url 操作一起使用的文件形式的 JavaScript
- python - 在 tensorflow 中写入数据集并使用 decode_raw 读取
- c# - 使用 dotnet core HttpClient 上传原始字节数据
- node.js - 如何在 Mongo-DB 中删除多级引用模式
- python-3.x - Pytest模拟文件写入任意文件
- java - Unity 不喜欢它安装的 JDK
- mysql - 关于使用多个 SQL GROUP BY 语句的问题
- c - 我需要做什么才能获得正确的 PI 值?
- java - repository.update(parentEntity) 不会在事务上下文中立即生成子 ID
- google-app-engine - 有没有办法动态更新 App Engine 调度规则?