From: Branimir Maksimovic on
On Mon, 31 May 2010 13:34:10 +0200
Branimir Maksimovic <bmaxa(a)hotmail.com> wrote:

> > On Sun, 30 May 2010 05:16:03 -0400
> > Frank Kotler <fbkotler(a)myfairpoint.net> wrote:
> >
> > >
> > > My output from this is "21C" (with a bunch of zeros in front).
> > > With the "five byte padding" uncommented, it goes to "220". All
> > > we're "timing" is push edx/push eax/cpuid... is cpuid sensitive to
> > > alignment??? I would expect that if five bytes changes it, one
> > > byte would, too - but it doesn't (your mileage may vary)...
> >
>
> bmaxa(a)maxa:~/fasm/test$ fasm ttest.asm
> flat assembler version 1.68 (16384 kilobytes memory)
> 2 passes, 230 bytes.
> bmaxa(a)maxa:~/fasm/test$ ./ttest
> 000000000000019E
> 00000000000001A7
> 000000000000019E
> 00000000000001A7
> 00000000000001A7
> 00000000000001A7
> 00000000000001A7
> 00000000000001A7
> 00000000000001A7
> 00000000000001A7
> 000000000000019E
> 00000000000001A7
> 00000000000001A7
> 00000000000001A7
> 00000000000001A7
> 00000000000001A7
> bmaxa(a)maxa:~/fasm/test$ cat ttest.asm
> ; fasm myprog.asm
> ;
> ; from Branimir Maksimovic
> ; bugfixes from Nathan Baker
> ; cruft from fbk :)
>
> format ELF executable
>
> segment writeable executable
>
> entry $
>
> mov ebx, xtbl
>
> ;nop
> ;nop
> ;nop
> ;nop
> ;nop
> ;nop
>
> mov ecx,16
> l1:
> push ecx
>
> cpuid
> rdtsc
> push edx
> push eax
>
> das
> push eax
> pop eax
> push eax
> pop eax
>
> cpuid
> rdtsc
>
> pop ebx
> sub eax, ebx
> pop ecx
> sub edx, ecx
>
> mov edi, ascbuf
> call u64toha
>
> mov ecx, ascbuf
> mov edx, 17
> mov ebx, 1
> mov eax, 4
> int 80h
>
> pop ecx
> loop l1
>
> exit:
> mov eax, 1
> mov ebx,0
> int 80h
>
> xtbl db 30h,31h,32h,33h,34h,35h,36h,37h,38h,39h,41h,42h, \
> 43h,44h,45h,46h
>
> u64toha:
> add edi, 15
> mov ebx,xtbl
> mov cl, 16
> std
> l2:
> mov ch,al
> and al,0xf
> xlatb
> stosb
> mov al,ch
> ; shrd edx,eax,4
> shrd eax,edx,4
> shr edx, 4
> dec cl
> jz e1
> ; mov byte[edi], ','
> ; inc edi
> jmp l2
>
> e1:
> cld
> ret
>
> ascbuf db 17 dup (0xa)
> bmaxa(a)maxa:~/fasm/test$
>
> Greets!
>

bmaxa(a)maxa:~/fasm/test$ cat ttest.asm
; fasm myprog.asm
;
; from Branimir Maksimovic
; bugfixes from Nathan Baker
; cruft from fbk :)

format ELF executable

segment writeable executable

entry $

mov ebx, xtbl

;nop
;nop
;nop
;nop
;nop
;nop

mov ecx,16
l1:
push ecx

cpuid
rdtsc
push edx
push eax

das
push eax
pop eax
push eax
pop eax

cpuid
rdtsc

pop ebx
sub eax, ebx
pop ecx
sub edx, ecx

mov edi, ascbuf
call u64toha

mov ecx, ascbuf
mov edx, 17
mov ebx, 1
mov eax, 4
int 80h

pop ecx
dec ecx
jnz l1

exit:
mov eax, 1
mov ebx,0
int 80h

xtbl db 30h,31h,32h,33h,34h,35h,36h,37h,38h,39h,41h,42h, \
43h,44h,45h,46h

u64toha:
add edi, 15
mov ebx,xtbl
mov cl, 16
std
l2:
mov ch,al
and al,0xf
xlatb
stosb
mov al,ch
; shrd edx,eax,4
shrd eax,edx,4
shr edx, 4
dec cl
jz e1
; mov byte[edi], ','
; inc edi
jmp l2

e1:
cld
ret

ascbuf db 17 dup (0xa)
bmaxa(a)maxa:~/fasm/test$

Greets!
--
http://maxa.homedns.org/

Sometimes online sometimes not

Svima je "dozvoljeno" biti idiot i
> mrak, ali samo neki to odaberu,


From: Branimir Maksimovic on
On Mon, 31 May 2010 14:35:01 +0200
Branimir Maksimovic <bmaxa(a)hotmail.com> wrote:

>
> format ELF executable
>
> segment writeable executable
>
> entry $
>
> mov ebx, xtbl
>
> ;nop
> ;nop
> ;nop
> ;nop
> ;nop
> ;nop
>
> mov ecx,16
> l1:
> push ecx
>
> cpuid
> rdtsc
> push edx
> push eax
>
> das
> push eax
> pop eax
> push eax
> pop eax
>
> cpuid
> rdtsc
>
> pop ebx
> sub eax, ebx
> pop ecx
> sub edx, ecx
>
> mov edi, ascbuf
> call u64toha
>
> mov ecx, ascbuf
> mov edx, 17
> mov ebx, 1
> mov eax, 4
> int 80h
>
> pop ecx
> dec ecx
> jnz l1
>
> exit:
> mov eax, 1
> mov ebx,0
> int 80h
>
> xtbl db 30h,31h,32h,33h,34h,35h,36h,37h,38h,39h,41h,42h, \
> 43h,44h,45h,46h
>
> u64toha:
> add edi, 15
> mov ebx,xtbl
> mov cl, 16
> std
> l2:
> mov ch,al
> and al,0xf
> xlatb
> stosb
> mov al,ch
> ; shrd edx,eax,4
> shrd eax,edx,4
> shr edx, 4
> dec cl
> jz e1
> ; mov byte[edi], ','
> ; inc edi
> jmp l2
>
> e1:
> cld
> ret
>
> ascbuf db 17 dup (0xa)
> bmaxa(a)maxa:~/fasm/test$
>
> Greets!

bmaxa(a)maxa:~/fasm/test$ fasm ttest.asm
flat assembler version 1.68 (16384 kilobytes memory)
2 passes, 231 bytes.
bmaxa(a)maxa:~/fasm/test$ ./ttest
0000000000000183
0000000000000183
0000000000000183
000000000000017A
000000000000017A
000000000000018C
000000000000017A
000000000000017A
000000000000017A
0000000000000183
0000000000000183
000000000000017A
000000000000017A
0000000000000183
0000000000000183
0000000000000183

bmaxa(a)maxa:~/fasm/test$ fasm ttest.asm
flat assembler version 1.68 (16384 kilobytes memory)
2 passes, 237 bytes.
bmaxa(a)maxa:~/fasm/test$ ./ttest
00000000000001B9
0000000000000183
000000000000017A
000000000000017A
0000000000000183
0000000000000183
000000000000017A
0000000000000183
000000000000017A
0000000000000183
000000000000017A
000000000000017A
000000000000017A
000000000000017A
0000000000000183
000000000000017A
bmaxa(a)maxa:~/fasm/test$ cat ttest.asm
; fasm myprog.asm
;
; from Branimir Maksimovic
; bugfixes from Nathan Baker
; cruft from fbk :)

format ELF executable

segment writeable executable

entry $

mov ebx, xtbl

nop
nop
nop
nop
nop
nop

mov ecx,16
l1:
push ecx

cpuid
rdtsc
push edx
push eax

das
push eax
pop eax
push eax
pop eax

cpuid
rdtsc

pop ebx
sub eax, ebx
pop ecx
sub edx, ecx

mov edi, ascbuf
call u64toha

mov ecx, ascbuf
mov edx, 17
mov ebx, 1
mov eax, 4
int 80h

pop ecx
dec ecx
jnz l1

exit:
mov eax, 1
mov ebx,0
int 80h

xtbl db 30h,31h,32h,33h,34h,35h,36h,37h,38h,39h,41h,42h, \
43h,44h,45h,46h

u64toha:
add edi, 15
mov ebx,xtbl
mov cl, 16
std
l2:
mov ch,al
and al,0xf
xlatb
stosb
mov al,ch
; shrd edx,eax,4
shrd eax,edx,4
shr edx, 4
dec cl
jz e1
; mov byte[edi], ','
; inc edi
jmp l2

e1:
cld
ret

ascbuf db 17 dup (0xa)
bmaxa(a)maxa:~/fasm/test$

SO CATCH WAS IN LOOP INSTRUCTION ;)


Greets!
--
http://maxa.homedns.org/

Sometimes online sometimes not

Svima je "dozvoljeno" biti idiot i
> mrak, ali samo neki to odaberu,


From: wolfgang kern on

Frank Kotler wrote:
....
> ; five bytes here changes the timing
> ;mov ebx, xtbl
>
> ;nop
> ;nop
> ;nop
> ;nop
> ;nop
> ;nop ; six bytes changes it back
>
> mov ecx,16
> l1:
> push ecx
>
> ; serialize CPU and get start time
> cpuid
> rdtsc
> push edx
> push eax
>
> ; code to be timed
> ;--------------
> ;das
> ;push eax
> ;pop eax
> ;push eax
> ;pop eax
> ;--------------
****
> ; serialize cpu and get end time
> cpuid
this 'second serializing' may be the main amount of cycles you get.

> rdtsc
>
> ; calculate difference
> pop ebx
> sub eax, ebx
> pop ecx
> sub edx, ecx

....

> My output from this is "21C" (with a bunch of zeros in front). With the
> "five byte padding" uncommented, it goes to "220".
> All we're "timing" is push edx/push eax/cpuid... is cpuid sensitive to
> alignment???

No, but it serializes (timing depends on previous pipe/cache/prefetch...)

> I would expect that if five bytes changes it, one byte would, too
> - but it doesn't (your mileage may vary)...

these added NOPs may just alter the code prefetch or cache status?

> That first output you posted - varying between "1" and "A" - was that for
> an empty loop, or was that with "das" in there? I'm getting consistent
> results (if sometimes puzzling) for all 16 iterations... with anything but
> "das"...

An empty test-loop would contain 2 times MOV r,r (or PUSH r) and
one RDTSC. RDTSC timing is machine specific (5...else cycles).
I think to have posted this several times already:


MOV esi,result_buf ;16 bytes needed yet here
MOV byte[loop],01
CLI
L1:
;PUSH esi ;if desired/required
CPUID
RDTSC
MOV ebx,eax ;or: PUSH eax
MOV ecx,edx ;or: PUSH edx
... ;code under test (must preserve what's needed)
RDTSC ;without serialising yet !!!
SUB eax,ebx ;or: SUB eax,[esp+4]
SBB edx,ecx ;or: SBB edx,[esp]
;or: ADD esp,8
;POP esi
MOV [esi],eax ;
MOV [esi+4],edx ;store 64-bit cycle-count
ADD esi,8
DEC byte [loop]
JNS L1 ;loop the above just one more time
STI

A first run (I intentionally avoid the term 'iterate' here)
would show a cycle count which implies cache-burst-reads.
The second test-run follows immediate still with IRQs disabled.

One million iterations may just measure OS-noise from IRQ-
taskswitching or whatsoever an OS may do behind your back.

I measure 6..7 cycles (almost just the RDTSC itself) on an
empty (single NOP) test one my current machine with the MOV-
version while the PUSH-variant takes 1..2 cycles more.

Usually I ignore the first 64-bit result (caching time)
for code variants compare, but the info there is very
useful for proper code alignment.

Note:
this method shouldn't be used on huge code parts,
too long disabled IRQs can result in stuck hardware.

My timing check on DAS seem to confirm the AMD-books, it
takes 8 cycles regardless if eax were altered before or
not, but timing become worse if several of these 'vectored'
instructions are within one codefetch range.
__
wolfgang


From: Mint on
On May 28, 5:42 am, Frank Kotler <fbkot...(a)myfairpoint.net> wrote:
> Branimir Maksimovic wrote:
> > On Thu, 27 May 2010 22:50:22 -0400
> > Frank Kotler <fbkot...(a)myfairpoint.net> wrote:

> Andy, are you still getting 340 cycles for any block of code?

> Best,
> Frank

Sorry for the late response.
This is my last code.

The results are "relatively" close to what they should be.
Even running from a boot disk, I get 55 cycles.

Here's the .exe.

Andy

begin 0666 timeit.exe
M35KT`0T``0`@`$$`__^``0`$```1`0``'@````$`$@$`````````````````
M````````````````````````````````````````````````````````````
M````````````````````````````````````````````````````````````
M````````````````````````````````````````````````````````````
M````````````````````````````````````````````````````````````
M````````````````````````````````````````````````````````````
M````````````````````````````````````````````````````````````
M````````````````````````````````````````````````````````````
M````````````````````````````````````````````````````````````
M````````````````````````````````````````````````````````````
M````````````````````````````````````````````````````````````
M``````````````````````!5B^Q3BUX$@#\`=`-#Z_B+PRM&!%O)PU6+[%!3
M45+_=(a)3HW/^#Q`*+R+L!`(M6!+1`S2%:65M8R<.T`<T6=`:T`,T6Z_2T`<T6
M=/JT`,T64+0!S19T!K0`S1;K]%C#58OL4%-14E=FBT8$BWX(9H7`>0=F]]C&
M!2U'9KL*````,\EF,])F]_-2068+P'7S6`0PB`5'XOC&!0!?6EE;6,G#!E.X
M0`".P+ML`&8FBP=;!\-5B^SHZ?]FB]!F`U8$Z-__9CO"<OC)PY"0D)"0D)"0
MD)"0D)"0D)!5B^Q34HM6!#/`B]C1ZA/#(])U^%I;R<.0D)"0D)"0D)"0D)"0
MD)"04S/`4HO8$\/1ZG7Z6A/#6\.X:P&.V(S3*]C!XP2.T`/C9F@R````Z(?_
M@\0$^F;'!B0`_____V;'!B@`_____V;'!BP`_____V;'!C``_____V;'!C0`
M9````&;'!C@`9````&939C/`#Z)F,\`/HF8SP`^B9EN0D)"0D)"0D)"0D)"0
MD)"0D)"0D)"0D)"09E-F,\`/H@\Q9EMF4F909E-F,\`/HF9;9E-F,\`/H@\Q
M9EMF668KP6999AO19CL6,`!U!68[!BP`<PEFHRP`9HD6,`!F(a)RXX``%UM&;'
M!C@`9````)"0D)"0D)"0D)"0D)"0D)"0D)"0D)"0D)"0D&939C/`#Z(/,69;
M9E)F4&939C/`#Z)F6V939C/`#Z(/,69;9EEF*\%F668;T68[%B@`=05F.P8D
M`',)9J,D`&:)%B@`9H,N.``!=;1FH20`9BL&+`!FBQ8H`&8;%C``^V@&`&90
MZ/;]@\0&:`8`Z*;]@\0":#P`Z)W]@\0"^F;'!B0`_____V;'!B@`_____V;'
M!BP`_____V;'!C``_____V;'!C0`9````&;'!C@`9````&939C/`#Z)F,\`/
MHF8SP`^B9EN0D)"0D)"0D)"0D)"0D)"0D)"0D)"0D)"09E-F,\`/H@\Q9EMF
M4F909E-F,\`/HF9;9E-F,\`/H@\Q9EMF668KP6999AO19CL6,`!U!68[!BP`
M<PEFHRP`9HD6,`!F(a)RXX``%UM&;'!C@`9````)"0D)"0D)"0D)"0D)"0D)"0
MD)"0D)"0D)"0D&939C/`#Z(/,69;9E)F4&939C/`#Z)F6V:XT/L!`&:YH/<#
M`&:99O?Y9E-F,\`/H@\Q9EMF668KP6999AO19CL6*`!U!68[!B0`<PEFHR0`
M9HD6*`!F(a)RXX``%UHV:A)`!F*P8L`&:+%B@`9AL6,`#[:`8`9E#HE?R#Q`9H
M!@#H1?R#Q`)H30#H//R#Q`+Z9L<&)`#_____9L<&*`#_____9L<&+`#_____
M9L<&,`#_____9L<&-`!D````9L<&.`!D````9E-F,\`/HF8SP`^B9C/`#Z)F
M6Y"0D)"0D)"0D)"0D)"0D)"0D)"0D)"0D&939C/`#Z(/,69;9E)F4&939C/`
M#Z)F6V939C/`#Z(/,69;9EEF*\%F668;T68[%C``=05F.P8L`',)9J,L`&:)
M%C``9H,N.``!=;1FQP8X`&0```"0D)"0D)"0D)"0D)"0D)"0D)"0D)"0D)"0
MD)!F4V8SP`^B#S%F6V929E!F4V8SP`^B9EMF4V8SP`^B#S%F6V999BO!9EEF
M&]%F.Q8H`'4%9CL&)`!S"6:C)`!FB18H`&:#+C@``76T9J$D`&8K!BP`9HL6
M*`!F&Q8P`/MH!@!F4.A&^X/$!F@&`.CV^H/$`FA=`.CM^H/$`OIFQP8D`/__
M__]FQP8H`/____]FQP8L`/____]FQP8P`/____]FQP8T`&0```!FQP8X`&0`
M``!F4V8SP`^B9C/`#Z)F,\`/HF9;D)"0D)"0D)"0D)"0D)"0D)"0D)"0D)"0
MD&939C/`#Z(/,69;9E)F4&939C/`#Z)F6V939C/`#Z(/,69;9EEF*\%F668;
MT68[%C``=05F.P8L`',)9J,L`&:)%C``9H,N.``!=;1FQP8X`&0```"0D)"0
MD)"0D)"0D)"0D)"0D)"0D)"0D)"0D)!F4V8SP`^B#S%F6V929E!F4V8SP`^B
M9EMFN-#[`0!FN:#W`P!FF6;W^6939C/`#Z(/,69;9EEF*\%F668;T68[%B@`
M=05F.P8D`',)9J,D`&:)%B@`9H,N.``!=:-FH20`9BL&+`!FBQ8H`&8;%C``
M^V@&`&90Z.7Y@\0&:`8`Z)7Y@\0":&X`Z(SY@\0"^F;'!B0`_____V;'!B@`
M_____V;'!BP`_____V;'!C``_____V;'!C0`9````&;'!C@`9````&939C/`
M#Z)F,\`/HF8SP`^B9EN0D)"0D)"0D)"0D)"0D)"0D)"0D)"0D)!F4V8SP`^B
M#S%F6V929E!F4V8SP`^B9EMF4V8SP`^B#S%F6V999BO!9EEF&]%F.Q8P`'4%
M9CL&+`!S"6:C+`!FB18P`&:#+C@``76T9L<&.`!D````D)"0D)"0D)"0D)"0
MD)"0D)"0D)"0D)"0D)"09E-F,\`/H@\Q9EMF4F909E-F,\`/HF9;9E-F,\`/
MH@\Q9EMF668KP6999AO19CL6*`!U!68[!B0`<PEFHR0`9HD6*`!F(a)RXX``%U
MM&:A)`!F*P8L`&:+%B@`9AL6,`#[:`8`9E#HEOB#Q`9H!@#H1OB#Q`)H?@#H
M/?B#Q`+Z9L<&)`#_____9L<&*`#_____9L<&+`#_____9L<&,`#_____9L<&
M-`!D````9L<&.`!D````9E-F,\`/HF8SP`^B9C/`#Z)F6Y"0D)"0D)"0D)"0
MD)"0D)"0D)"0D)"0D)!F4V8SP`^B#S%F6V929E!F4V8SP`^B9EMF4V8SP`^B
M#S%F6V999BO!9EEF&]%F.Q8P`'4%9CL&+`!S"6:C+`!FB18P`&:#+C@``76T
M9L<&.`!D````D)"0D)"0D)"0D)"0D)"0D)"0D)"0D)"0D)"09E-F,\`/H@\Q
M9EMF4F909E-F,\`/HF9;9KC0^P$`9KF@]P,`9IEF]_EF4V8SP`^B#S%F6V99
M9BO!9EEF&]%F.Q8H`'4%9CL&)`!S"6:C)`!FB18H`&:#+C@``76C9J$D`&8K
M!BP`9HL6*`!F&Q8P`/MH!@!F4.@U]X/$!F@&`.CE]H/$`FB/`.C<]H/$`OIF
MQP8D`/____]FQP8H`/____]FQP8L`/____]FQP8P`/____]FQP8T`&0```!F
MQP8X`&0```!F4V8SP`^B9C/`#Z)F,\`/HF9;D)"0D)"0D)"0D)"0D)"0D)"0
MD)"0D)"09E-F,\`/H@\Q9EMF4F909E-F,\`/HF9;9E-F,\`/H@\Q9EMF668K
MP6999AO19CL6,`!U!68[!BP`<PEFHRP`9HD6,`!F(a)RXX``%UM&;'!C@`9```
M`)"0D)"0D)"0D)"0D)"0D)"0D)"0D)"0D)"0D&939C/`#Z(/,69;9E)F4&93
M9C/`#Z)F6V939C/`#Z(/,69;9EEF*\%F668;T68[%B@`=05F.P8D`',)9J,D
M`&:)%B@`9H,N.``!=;1FH20`9BL&+`!FBQ8H`&8;%C``^V@&`&90Z.;U@\0&
M:`8`Z);U@\0":)\`Z(WU@\0"^F;'!B0`_____V;'!B@`_____V;'!BP`____
M_V;'!C``_____V;'!C0`9````&;'!C@`9````&939C/`#Z)F,\`/HF8SP`^B
M9EN0D)"0D)"0D)"0D)"0D)"0D)"0D)"0D)"09E-F,\`/H@\Q9EMF4F909E-F
M,\`/HF9;9E-F,\`/H@\Q9EMF668KP6999AO19CL6,`!U!68[!BP`<PEFHRP`
M9HD6,`!F(a)RXX``%UM&;'!C@`9````)"0D)"0D)"0D)"0D)"0D)"0D)"0D)"0
MD)"0D&939C/`#Z(/,69;9E)F4&939C/`#Z)F6V:XT/L!`&:YH/<#`&:99O?Y
M9E-F,\`/H@\Q9EMF668KP6999AO19CL6*`!U!68[!B0`<PEFHR0`9HD6*`!F
M(a)RXX``%UHV:A)`!F*P8L`&:+%B@`9AL6,`#[:`8`9E#HA?2#Q`9H!@#H-?2#
MQ`)HL`#H+/2#Q`+Z9L<&)`#_____9L<&*`#_____9L<&+`#_____9L<&,`#_
M____9L<&-`!D````9L<&.`!D````9E-F,\`/HF8SP`^B9C/`#Z)F6Y"0D)"0
MD)"0D)"0D)"0D)"0D)"0D)"0D&939C/`#Z(/,69;9E)F4&939C/`#Z)F6V93
M9C/`#Z(/,69;9EEF*\%F668;T68[%C``=05F.P8L`',)9J,L`&:)%C``9H,N
M.``!=;1FQP8X`&0```"0D)"0D)"0D)"0D)"0D)"0D)"0D)"0D)"0D)!F4V8S
MP`^B#S%F6V929E!F4V8SP`^B9EMF4V8SP`^B#S%F6V999BO!9EEF&]%F.Q8H
M`'4%9CL&)`!S"6:C)`!FB18H`&:#+C@``76T9J$D`&8K!BP`9HL6*`!F&Q8P
M`/MH!@!F4.@V\X/$!F@&`.CF\H/$`FC``.C=\H/$`OIFQP8D`/____]FQP8H
M`/____]FQP8L`/____]FQP8P`/____]FQP8T`&0```!FQP8X`&0```!F4V8S
MP`^B9C/`#Z)F,\`/HF9;D)"0D)"0D)"0D)"0D)"0D)"0D)"0D)"0D&939C/`
M#Z(/,69;9E)F4&939C/`#Z)F6V939C/`#Z(/,69;9EEF*\%F668;T68[%C``
M=05F.P8L`',)9J,L`&:)%C``9H,N.``!=;1FQP8X`&0```"0D)"0D)"0D)"0
MD)"0D)"0D)"0D)"0D)"0D)!F4V8SP`^B#S%F6V929E!F4V8SP`^B9EMFN-#[
M`0!FN:#W`P!FF6;W^6939C/`#Z(/,69;9EEF*\%F668;T68[%B@`=05F.P8D
M`',)9J,D`&:)%B@`9H,N.``!=:-FH20`9BL&+`!FBQ8H`&8;%C``^V@&`&90
MZ-7Q@\0&:`8`Z(7Q@\0":-$`Z'SQ@\0"^F;'!B0`_____V;'!B@`_____V;'
M!BP`_____V;'!C``_____V;'!C0`9````&;'!C@`9````&939C/`#Z)F,\`/
MHF8SP`^B9EN0D)"0D)"0D)"0D)"0D)"0D)"0D)"0D)!F4V8SP`^B#S%F6V92
M9E!F4V8SP`^B9EMF4V8SP`^B#S%F6V999BO!9EEF&]%F.Q8P`'4%9CL&+`!S
M"6:C+`!FB18P`&:#+C@``76T9L<&.`!D````D)"0D)"0D)"0D)"0D)"0D)"0
MD)"0D)"0D)"09E-F,\`/H@\Q9EMF4F909E-F,\`/HF9;9E-F,\`/H@\Q9EMF
M668KP6999AO19CL6*`!U!68[!B0`<PEFHR0`9HD6*`!F(a)RXX``%UM&:A)`!F
M*P8L`&:+%B@`9AL6,`#[:`8`9E#HAO"#Q`9H!@#H-O"#Q`)HX0#H+?"#Q`+Z
M9L<&)`#_____9L<&*`#_____9L<&+`#_____9L<&,`#_____9L<&-`!D````
M9L<&.`!D````9E-F,\`/HF8SP`^B9C/`#Z)F6Y"0D)"0D)"0D)"0D)"0D)"0
MD)"0D)"0D)!F4V8SP`^B#S%F6V929E!F4V8SP`^B9EMF4V8SP`^B#S%F6V99
M9BO!9EEF&]%F.Q8P`'4%9CL&+`!S"6:C+`!FB18P`&:#+C@``76T9L<&.`!D
M````D)"0D)"0D)"0D)"0D)"0D)"0D)"0D)"0D)"09E-F,\`/H@\Q9EMF4F90
M9E-F,\`/HF9;9KC0^P$`9KF@]P,`9IEF]_EF4V8SP`^B#S%F6V999BO!9EEF
M&]%F.Q8H`'4%9CL&)`!S"6:C)`!FB18H`&:#+C@``76C9J$D`&8K!BP`9HL6
M*`!F&Q8P`/MH!@!F4.@E[X/$!F@&`.C5[H/$`FCR`.C,[H/$`OIFQP8D`/__
M__]FQP8H`/____]FQP8L`/____]FQP8P`/____]FQP8T`&0```!FQP8X`&0`
M``!F4V8SP`^B9C/`#Z)F,\`/HF9;D)"0D)"0D)"0D)"0D)"0D)"0D)"0D)"0
M9E-F,\`/H@\Q9EMF4F909E-F,\`/HF9;9E-F,\`/H@\Q9EMF668KP6999AO1
M9CL6,`!U!68[!BP`<PEFHRP`9HD6,`!F(a)RXX``%UM&;'!C@`9````)"0D)"0
MD)"0D)"0D)"0D)"0D)"0D)"0D)"0D&939C/`#Z(/,69;9E)F4&939C/`#Z)F
M6V939C/`#Z(/,69;9EEF*\%F668;T68[%B@`=05F.P8D`',)9J,D`&:)%B@`
M9H,N.``!=;1FH20`9BL&+`!FBQ8H`&8;%C``^V@&`&90Z-;M@\0&:`8`Z(;M
M@\0":`(!Z'WM@\0"^F;'!B0`_____V;'!B@`_____V;'!BP`_____V;'!C``
M_____V;'!C0`9````&;'!C@`9````&939C/`#Z)F,\`/HF8SP`^B9EN0D)"0
MD)"0D)"0D)"0D)"0D)"0D)"0D)"09E-F,\`/H@\Q9EMF4F909E-F,\`/HF9;
M9E-F,\`/H@\Q9EMF668KP6999AO19CL6,`!U!68[!BP`<PEFHRP`9HD6,`!F
M(a)RXX``%UM&;'!C@`9````)"0D)"0D)"0D)"0D)"0D)"0D)"0D)"0D)"0D&93
M9C/`#Z(/,69;9E)F4&939C/`#Z)F6V:XT/L!`&:YH/<#`&:99O?Y9E-F,\`/
MH@\Q9EMF668KP6999AO19CL6*`!U!68[!B0`<PEFHR0`9HD6*`!F(a)RXX``%U
MHV:A)`!F*P8L`&:+%B@`9AL6,`#[:`8`9E#H=>R#Q`9H!@#H)>R#Q`)H$P'H
M'.R#Q`+Z9L<&)`#_____9L<&*`#_____9L<&+`#_____9L<&,`#_____9L<&
M-`!D````9L<&.`!D````9E-F,\`/HF8SP`^B9C/`#Z)F6Y"0D)"0D)"0D)"0
MD)"0D)"0D)"0D)"0D&939C/`#Z(/,69;9E)F4&939C/`#Z)F6V939C/`#Z(/
M,69;9EEF*\%F668;T68[%C``=05F.P8L`',)9J,L`&:)%C``9H,N.``!=;1F
MQP8X`&0```"0D)"0D)"0D)"0D)"0D)"0D)"0D)"0D)"0D)!F4V8SP`^B#S%F
M6V929E!F4V8SP`^B9EMF4V8SP`^B#S%F6V999BO!9EEF&]%F.Q8H`'4%9CL&
M)`!S"6:C)`!FB18H`&:#+C@``76T9J$D`&8K!BP`9HL6*`!F&Q8P`/MH!@!F
M4.(a)FZX/$!F@&`.C6ZH/$`F@C`>C-ZH/$`OIFQP8D`/____]FQP8H`/____]F
MQP8L`/____]FQP8P`/____]FQP8T`&0```!FQP8X`&0```!F4V8SP`^B9C/`
M#Z)F,\`/HF9;D)"0D)"0D)"0D)"0D)"0D)"0D)"0D)"0D&939C/`#Z(/,69;
M9E)F4&939C/`#Z)F6V939C/`#Z(/,69;9EEF*\%F668;T68[%C``=05F.P8L
M`',)9J,L`&:)%C``9H,N.``!=;1FQP8X`&0```"0D)"0D)"0D)"0D)"0D)"0
MD)"0D)"0D)"0D)!F4V8SP`^B#S%F6V929E!F4V8SP`^B9EMFN-#[`0!FN:#W
M`P!FF6;W^6939C/`#Z(/,69;9EEF*\%F668;T68[%B@`=05F.P8D`',)9J,D
M`&:)%B@`9H,N.``!=:-FH20`9BL&+`!FBQ8H`&8;%C``^V@&`&90Z,7I@\0&
M:`8`Z'7I@\0":#0!Z&SI@\0"Z(CIM$S-(0``````````````````````````
M```````````````````````````````````````````````@8WEC;&5S+"!E
M;7!T>0T*`"!C>6-L97,L('1E<W0-"@`@8WEC;&5S+"!E;7!T>0T*`"!C>6-L
M97,L('1E<W0-"@`@8WEC;&5S+"!E;7!T>0T*`"!C>6-L97,L('1E<W0-"@`@
M8WEC;&5S+"!E;7!T>0T*`"!C>6-L97,L('1E<W0-"@`@8WEC;&5S+"!E;7!T
M>0T*`"!C>6-L97,L('1E<W0-"@`@8WEC;&5S+"!E;7!T>0T*`"!C>6-L97,L
M('1E<W0-"@`@8WEC;&5S+"!E;7!T>0T*`"!C>6-L97,L('1E<W0-"@`@8WEC
=;&5S+"!E;7!T>0T*`"!C>6-L97,L('1E<W0-"@!L
`
end


; timeit.asm Masm code 55 cycles under cmd
;
..model small, c
..686
..stack
include dosctr.asm

..data

..code

codealign 8
CtBits proc uses bx dx arg

mov dx,arg
xor ax,ax
mov bx,ax

@@: shr dx,1
adc ax,bx
and dx,dx
jne @B

ret

CtBits endp

codealign 8
CBits PROC

push bx
xor ax,ax ;XOR always clears the carry flag
push dx
mov bx,ax

CBits0: adc ax,bx
shr dx,1 ;the zero flag is set if DX = 0
jnz CBits0 ;JNZ does not look at the carry flag

pop dx
adc ax,bx ;don't forget the last carry bit
pop bx

ret

CBits ENDP

..startup

invoke sleep, 50

REPEAT 8

ctr_begin 100
ctr_end
print dword$(eax)," cycles, empty",13,10

ctr_begin 100 ; 100 is number of reps

mov eax,130000d ; something slow 1 clock
mov ecx,260000d ; 26 decimal 1 clock
cdq ; 1 clock
idiv ecx ; divide eax by ecx 43 clocks

ctr_end
print dword$(eax)," cycles, test",13,10

ENDM

call waitkey

..exit
end