您的位置:首页 > 其它

单指令的时间测试

2011-04-05 18:50 344 查看
测试小指令片段的执行时间,比如一条指令执行100次所需的时钟周期数。
comment %
TSCTestB32.asm    ?2005-07-16 Agner Fog
Updated	  ?2011-04-05 G-Spider

Test program to measure how many clock cycles a little piece of code takes.

The program will execute the code to test NUMTESTS times and then print
out a list of clock counts.

Instructions:

Insert the code to test at the place marked "### Test code here ###"
in TSCTestB32.asm

Compile and link for Windows, console mode, 32  bits.

?2004 GNU General Public License www.gnu.org/copyleft/gpl.html

ml  /c /coff  TSCTestB32.asm
link /subsystem:console TSCTestB32.obj

%

.686p
.model flat,stdcall
option casemap:none

include windows.inc
include user32.inc
include kernel32.inc
include msvcrt.inc

includelib user32.lib
includelib kernel32.lib
includelib msvcrt.lib

; ###################### DEFINE CONSTANTS ######################

; set number of times to run test
NUMTESTS = 20

; ###################### END DEFINE CONSTANTS ###################
.data
overhead 	qword   ?               ; timing overhead
clocks   	dword   ?               ; pointer to results array
fmt		db	'%10I64i',0dh,0ah,0
szPause		db	'Pause',0

.data?
ClocksArr	qword	NUMTESTS dup (?)

.code

;=======================================================
ClockTest	proc	lpSrc
push	ebp
mov	esi,lpSrc
mov     clocks, esi
; get list into cache
cld
mov     ecx, NUMTESTS * 2
rep     lodsd

; dummy test loop without test code to measure overhead
xor     ebp, ebp
mov     edi, clocks
CLOOP1:
sub     eax, eax
cpuid                     ; serialize
rdtsc                     ; read time stamp counter into edx:eax
mov     [edi+ebp], eax    ; save in list
mov     [edi+ebp+4], edx  ; save high dword in list
sub     eax, eax
cpuid                     ; serialize again

; void to measure overhead only

sub     eax, eax
cpuid                     ; serialize
rdtsc                     ; read time stamp counter into edx:eax
sub     eax, [edi+ebp]    ; subtract old value
sbb     edx, [edi+ebp+4]
mov     [edi+ebp], eax    ; save difference in list
mov     [edi+ebp+4], edx
sub     eax, eax
cpuid                     ; serialize again

add     ebp, 8
cmp     ebp, NUMTESTS*8
jb      CLOOP1            ; dummy loop end

; find smallest clock count:
mov     edi, clocks
mov     eax, -1
cdq
;================================================
xor     ebp, ebp
xor     ecx, ecx
IF NUMTESTS GT 1
mov     [edi], eax        ; exclude first count from minimum
mov     [edi+4], edx
ENDIF
xor     ebp, ebp
MINLOOP:
; (If cpu doesn't support conditional moves then replace all cmov by conditional jumps)

cmp     [edi+ebp],eax     ; compare low dword and get result into bl
setb    bl
cmp     [edi+ebp+4],edx   ; compare high dword and get result into cl
setb    cl
cmove   ecx, ebp          ; replace cl by bl if high dword equal
test    cl, cl
cmovnz  eax, [edi+ebp]    ; get lowest qword into edx:eax
cmovnz  edx, [edi+ebp+4]
add     ebp, 8
cmp     ebp, NUMTESTS*8
jb      MINLOOP
mov     dword ptr overhead, eax  ; save minimum overhead
mov     dword ptr overhead+4, edx

; loop to measure clock cycles of test code
xor     ebp, ebp
mov     edi, clocks
CLOOP2:
sub     eax, eax
cpuid                     ; serialize
rdtsc                     ; read time stamp counter into edx:eax
mov     [edi+ebp], eax    ; save in list
mov     [edi+ebp+4], edx  ; save high dword in list
sub     eax, eax
cpuid                     ; serialize again

; ###################### Test code here ######################

; put your test code here. Example:

REPT 100          ; repeat code 100 times
;shr eax, 5
nop
;adc eax,5
;add eax,5
ENDM

; ###################### Test code end  ######################
sub     eax, eax
cpuid                     ; serialize
rdtsc                     ; read time stamp counter into edx:eax
sub     eax, [edi+ebp]    ; subtract old value
sbb     edx, [edi+ebp+4]
mov     [edi+ebp], eax    ; save difference in list
mov     [edi+ebp+4], edx
sub     eax, eax
cpuid                     ; serialize again

add     ebp, 8
cmp     ebp, NUMTESTS*8
jb      CLOOP2

; subtract overhead from clock counts:
xor     ebp, ebp
mov     edi, clocks
mov     eax, dword ptr overhead
mov     edx, dword ptr overhead + 4
OVHLOOP:
sub     [edi+ebp], eax
sbb     [edi+ebp+4], edx
add     ebp, 8
cmp     ebp, NUMTESTS*8
jb      OVHLOOP
mov     eax, NUMTESTS
pop	ebp
ret
ClockTest	endp

start:
invoke	GetCurrentProcess
push	eax
invoke	SetPriorityClass,eax,REALTIME_PRIORITY_CLASS
;//Run Tests

invoke ClockTest,offset ClocksArr

;// set priority back normal
pop	eax
invoke	SetPriorityClass,eax,NORMAL_PRIORITY_CLASS

mov	esi,offset ClocksArr
mov	ecx,NUMTESTS
xor	eax,eax
A00:
push	ecx
push	eax
invoke 	crt_printf,offset fmt,qword ptr [esi+eax*8]
pop	eax
pop	ecx
inc	eax
sub	ecx,1
jnz	A00

invoke	crt_system,offset szPause
invoke 	ExitProcess,0
end 	start
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: