|
Prev: Windows Assembly
Next: resource files and assembler
From: ?a/b on 14 Sep 2005 13:48 I take some time for doing a windows95 .dll but with no success so i thank hutch for let me pass some good time thinking on qsort and so i have some question: If the array 'b' in qsortu_m(char* b, int n, int (*cmp)(U, U)); is not dword aligned the function qsortu_m() is good the same? I have tested qsort_m() for array of element-size 4(a dword) in your opinion it goes ok for any array of some other element-size? Do you see any error? if your assembly is better than mine your qsort will be better than mine. But is your qsort better than mine? :) Thanks ; b=cmp, _qsortr_m(U left, U right) ; 0Ra, 4(a)left, 8(a)right _qsortr_m: %define @left esp+4 %define @right esp+8 mov esi, [@left] mov edi, [@right] cmp esi, edi jae .ck lea eax, [esi+edi] lea ebp, [esi+4] shr eax, 1 mov ecx, esi and eax, 0xFFFFFFFC and ecx, 0x3 or eax, ecx ; array=..4, ..8, ..12 ecc ; array=..1, ..5, ..9 ; array=..2, ..6, ..10 mov edx, [esi] mov ecx, [eax] mov [eax], edx mov [esi], ecx push ecx jmp short .c2 ..ck: jmp short .cf ..c0: push dword[ebp] call ebx add esp, 4 cmp eax, 0 jge .c1 add esi, 4 mov edx, [esi] mov ecx, [ebp] mov [esi], ecx mov [ebp], edx ..c1: add ebp, 4 ..c2: cmp ebp, edi jbe .c0 pop eax mov edx, [@left] mov ecx, [esi] mov [edx], ecx mov [esi], eax ; r=(a)left, j=(a)right, i=last lea eax, [esi-4] push eax push edx call _qsortr_m add esp, 4 pop eax mov edi, [@right] add eax, 8 push edi push eax call _qsortr_m add esp, 8 ..cf: %undef @left %undef @right ret ; int _qsortu_m<(unsigned* ris, int n, int (*cmp)(U , U)) ; serve per ordinare array di ogg di dimensione 4char ; per tali oggetti utilizzare questa funzione e non qsort_m ; s= 0k, 4j, 8i, 12b, 16Ra, 20(a)ris, 24@n, 28(a)cmp _qsortu_m: push ebx push esi push edi push ebp %define @ris esp+20 %define @n esp+24 %define @cmp esp+28 mov eax, [@ris] mov edx, [@n] mov ebx, [@cmp] cmp edx, 2 jl .c1 cmp eax, 0 je .ce cmp ebx, 0 je .ce dec edx lea esi, [eax+4*edx] push esi push eax call _qsortr_m add esp, 8 ..c1: mov eax, 1 jmp short .cf ..ce: xor eax, eax ..cf: %undef @ris %undef @n %undef @cmp pop ebp pop edi pop esi pop ebx ret ; int _qsortu1_m<(unsigned* ris, int n, int (*cmp)(U , U)) ; utilizzato da qsort_m ; s= 0Ra, 4(a)ris, 8@n, 12(a)cmp _qsortu1_m: %define @ris esp+4 %define @n esp+8 %define @cmp esp+12 mov eax, [@ris] mov edx, [@n] mov ebx, [@cmp] dec edx lea esi, [eax+4*edx] push esi push eax call _qsortr_m add esp, 8 %undef @ris %undef @n %undef @cmp ret ; int _qsort_m<(char* res, char** ppres, char* orig, ; int n, int size, int (*cmp)(char*, char*) ) ; s = 0k, 4j, 8i, 12b, 16Ra, ; 20(a)res, 24(a)ppres, 28(a)orig, 32@n, 36(a)size, 40(a)cmp _qsort_m: push ebx push esi push edi push ebp %define @res esp+20 %define @ppres esp+24 %define @orig esp+28 %define @n esp+32 %define @size esp+36 %define @cmp esp+40 mov ecx, [@size] mov edi, [@ppres] mov eax, [@n] mov edx, [@orig] cmp ecx, 0 jle .ce cmp eax, 0 jle .ce cmp eax, 2 jle .re cmp edi, 0 je .ce cmp edx, 0 je .ce cmp dword[@cmp], 0 je .ce jmp short .c1 ..ce: xor eax, eax jmp .cf ..re: jmp .cz ..c1: mov [edi], edx add edi, 4 add edx, ecx dec eax jnz .c1 mov edi, [@ppres] mov eax, [@n] mov edx, [@cmp] push edx push eax push edi call _qsortu1_m add esp, 12 mov esi, [@ppres] mov edi, [@res] mov ecx, [@n] mov ebp, [@size] test ebp, 0x2 jnz .u2 test ebp, 0x1 jnz .u1 shr ebp, 2 ..c2: mov eax, [esi] mov ebx, ebp ..c3: mov edx, [eax] mov [edi], edx add eax, 4 add edi, 4 dec ebx jnz .c3 add esi, 4 dec ecx jnz .c2 jmp short .cz ..u2: shr ebp, 1 ..c4: mov eax, [esi] mov ebx, ebp ..c5: mov dx, [eax] mov [edi], dx add eax, 2 add edi, 2 dec ebx jnz .c5 add esi, 4 dec ecx jnz .c4 jmp short .cz ..u1: mov eax, [esi] mov ebx, ebp ..c6: mov dl, [eax] mov [edi], dl inc eax inc edi dec ebx jnz .c6 add esi, 4 dec ecx jnz .u1 ..cz: mov eax, 1 ..cf: %undef @res %undef @ppres %undef @orig %undef @n %undef @size %undef @cmp pop ebp pop edi pop esi pop ebx ret #include <stdio.h> #include <stdlib.h> #include <time.h> #define R return #define F for #define P printf #define U unsigned #define W while void qsortu_m(char* b, int n, int (*cmp)(U, U)); int qsort_m(char* res, char* ppres, char* orig, int n, int size, int (*cmp)(char*, char*) ); void qsortu_c(char* b, int l, int r, int (*cmp)(U, U)) {int i, last; U *v, tmp; /////////////// if(l>=r) R; v=( U *) b; tmp=v[l]; i=(l+r)/2; v[l]=v[i]; v[i]=tmp; last=l; F( i=l+1; i<=r; ++i ) if((*cmp)(v[i], v[l]) < 0) { ++last; tmp=v[last]; v[last]=v[i]; v[i]=tmp;} tmp=v[l]; v[l]=v[last]; v[last]=tmp; qsortu_c(b, l, last-1, cmp); qsortu_c(b, last+1, r, cmp); } void print_array(U* b, int len) {int i; /////////////////// if(len<=0||b==0) R; F(i=0; i<len; ++i) P("%u ", b[i]); P("\n"); } void random_array(U* b, int len) {static int led=0; int i; //////////////////// if(len<=0||b==0) R; if(led==0){ led=1; srand((U)time(0));} F(i=0; i<len; ++i) b[i]=rand(); } int cmpu(U a, U b) { if(a<b) R -1; else if(a>b) R 1; R 0; } int cmpx(char* aa, char* bb) {U *a, *b; a=aa; b=bb; if(*a<*b) R -1; else if(*a>*b) R 1; R 0; } int main(void) {unsigned a[800], b[800], *c[800], i, h, j; time_t ti, tf; //////////////////////////////////// /* random_array(a, 500); print_array(a, 51); //qsortu_c(a, 0, 10, cmpu); P("a=%p b=%p c=%p\n", (void*)a, (void*)b, (void*)c); qsortu_m(a, 50, cmpu); print_array(a, 51); */ /* random_array(a, 50); print_array(a, 51); qsort_m( b, &c, a, 50, 4, cmpx ); print_array(b, 51); random_array(a, 50); print_array(a, 51); qsort( &a, 50, 4, (int (*)(const void*, const void*))cmpx ); print_array(b, 51); */ /* F(j=0; j<10000; ++j) {h=rand()%500; ++h; random_array(a, h); F(i=0; i<h; ++i) b[i]=a[i]; qsortu_c(a, 0, h-1, cmpu); qsortu_m(b, h, cmpu); F(i=0; i<h; ++i) if(b[i]!=a[i]) {P("ERRORE\n"); goto fine;} } fine:; P("j==%u\n", j); */ random_array(a, 800); h=rand(); h<<=16; h|=rand(); ti=time(0); F(j=0; j<30000; ++j) {F( i=0; i<500; ++i) a[i]^=h; qsortu_c(a, 0, 499, cmpu); } tf=time(0); P("K&R qsort == %g\n", difftime(tf, ti) ); random_array(a, 800); h=rand(); h<<=16; h|=rand(); ti=time(0); F(j=0; j<30000; ++j) {F( i=0; i<500; ++i) a[i]^=h; qsortu_m(a, 500, cmpu); } tf=time(0); P("my qsort == %g\n", difftime(tf, ti) ); random_array(a, 800); h=rand(); h<<=16; h|=rand(); ti=time(0); F(j=0; j<30000; ++j) {F( i=0; i<500; ++i) a[i]^=h; qsort( &a, 500, 4, (int (*)(const void*, const void*))cmpx ); } tf=time(0); P("my qsys == %g\n", difftime(tf, ti) ); random_array(a, 800); h=rand(); h<<=16; h|=rand(); ti=time(0); F(j=0; j<30000; ++j) {F( i=0; i<500; ++i) a[i]^=h; qsort_m( b, &c, a, 500, 4, cmpx ); } tf=time(0); P("my msys == %g\n", difftime(tf, ti) ); R 0; } /* b=cmp, _qsortr_m(U left, U right) /* 0Ra, 4(a)left, 8(a)right _qsortr_m: << @left=s+4, @right=s+8 i=*@left; j=*@right; i>=j#.ck; a=&[i+j]; k=&[i+4]; a>>=1; c=i; a&=0xFFFFFFFC; c&=0x3; a|=c; /* array=..4, ..8, ..12 ecc /* array=..1, ..5, ..9 /* array=..2, ..6, ..10 r=*i; c=*a; *a=r; *i=c; < c; #.c2; .ck: #.cf; .c0: b<(D[k]); a<0?!#.c1; {i+=4; r=*i; c=*k; *i=c; *k=r;} .c1: k+=4; .c2: k<=j#.c0; > a r=*@left; c=*i; *r=c; *i=a; /* r=(a)left, j=(a)right, i=last a=&[i-4]; < a _qsortr_m<(r); > a j=*@right; a+=8; _qsortr_m<(a, j); ..cf: >> @left, @right ret /* int _qsortu_m<(unsigned* ris, int n, int (*cmp)(U , U)) /* serve per ordinare array di ogg di dimensione 4char /* per tali oggetti utilizzare questa funzione e non qsort_m /* s= 0k, 4j, 8i, 12b, 16Ra, 20(a)ris, 24@n, 28(a)cmp _qsortu_m: < b, i, j, k << @ris=s+20, @n=s+24, @cmp=s+28 a=*@ris; r=*@n; b=*@cmp; r<2?#.c1; a==0 #.ce; b==0#.ce; --r; i=&[a+4*r]; _qsortr_m<(a, i); ..c1: a=1; #.cf; ..ce: a^=a; ..cf: >> @ris, @n, @cmp > b, i, j, k ret /* int _qsortu1_m<(unsigned* ris, int n, int (*cmp)(U , U)) /* utilizzato da qsort_m /* s= 0Ra, 4(a)ris, 8@n, 12(a)cmp _qsortu1_m: << @ris=s+4, @n=s+8, @cmp=s+12 a=*@ris; r=*@n; b=*@cmp; --r; i=&[a+4*r]; _qsortr_m<(a, i); >> @ris, @n, @cmp ret /* int _qsort_m<(char* res, char** ppres, char* orig, /* int n, int size, int (*cmp)(char*, char*) ) /* s = 0k, 4j, 8i, 12b, 16Ra, /* 20(a)res, 24(a)ppres, 28(a)orig, 32@n, 36(a)size, 40(a)cmp _qsort_m: < b, i, j, k << @res=s+20, @ppres=s+24, @orig=s+28, @n=s+32, @size=s+36, @cmp=s+40 c=*@size; j=*@ppres; a=*@n; r=*@orig; c<=0?#.ce; a<=0?#.ce; a<=2 ?#.re; j==0 #.ce; r==0 #.ce; D*@cmp==0#.ce; #.c1; ..ce: a^=a; ##.cf; ..re: ##.cz; ..c1: {*j=r; j+=4; r+=c; --a#.c1;} j=*@ppres; a=*@n; r=*@cmp; _qsortu1_m<(j, a, r); i=*@ppres; j=*@res; c=*@n; k=*@size; k&0x2#.u2; k&0x1#.u1; k>>=2; .c2: {a=*i; b=k; .c3: {r=*a; *j=r; a+=4; j+=4; --b#.c3} i+=4; --c#.c2; } #.cz; ..u2: k>>=1; .c4: {a=*i; b=k; .c5: {rx=*a; *j=rx; a+=2; j+=2; --b#.c5} i+=4; --c#.c4 } #.cz ..u1: {a=*i; b=k; ..c6: { rl=*a; *j=rl; ++a; ++j; --b#.c6} i+=4; --c#.u1; } ..cz: a=1; ..cf: >> @res, @ppres, @orig, @n, @size, @cmp > b, i, j, k ret
From: ?a/b on 15 Sep 2005 04:19 On Wed, 14 Sep 2005 17:48:07 GMT, "ýa\\/b" <al(a)f.g> wrote: >/* int _qsortu1_m<(unsigned* ris, int n, int (*cmp)(U , U)) >/* utilizzato da qsort_m >/* s= 0Ra, 4(a)ris, 8@n, 12(a)cmp >_qsortu1_m: ><< @ris=s+4, @n=s+8, @cmp=s+12 > a=*@ris; r=*@n; b=*@cmp; > --r; i=&[a+4*r]; > _qsortr_m<(a, i); >>> @ris, @n, @cmp >ret > > >/* int _qsort_m<(char* res, char** ppres, char* orig, >/* int n, int size, int (*cmp)(char*, char*) ) >/* s = 0k, 4j, 8i, 12b, 16Ra, >/* 20(a)res, 24(a)ppres, 28(a)orig, 32@n, 36(a)size, 40(a)cmp >_qsort_m: >< b, i, j, k ><< @res=s+20, @ppres=s+24, @orig=s+28, @n=s+32, @size=s+36, @cmp=s+40 > c=*@size; j=*@ppres; a=*@n; r=*@orig; > c<=0?#.ce; a<=0?#.ce; a<=2 ?#.re; this is an error ^^^^^^^^^^^^^ > j==0 #.ce; r==0 #.ce; D*@cmp==0#.ce; > #.c1; >.ce: a^=a; ##.cf; >.re: ##.cz; >.c1: {*j=r; j+=4; r+=c; --a#.c1;} > j=*@ppres; a=*@n; r=*@cmp; _qsortu1_m<(j, a, r); ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ better 'inline' this qsort_m() seems good for array of chars too > i=*@ppres; j=*@res; c=*@n; k=*@size; > k&0x2#.u2; > k&0x1#.u1; > k>>=2; > .c2: {a=*i; b=k; > .c3: {r=*a; *j=r; a+=4; j+=4; --b#.c3} > i+=4; --c#.c2; > } > #.cz; >.u2: k>>=1; > .c4: {a=*i; b=k; > .c5: {rx=*a; *j=rx; a+=2; j+=2; --b#.c5} > i+=4; --c#.c4 > } > #.cz >.u1: {a=*i; b=k; >.c6: { rl=*a; *j=rl; ++a; ++j; --b#.c6} > i+=4; --c#.u1; > } >.cz: a=1; >.cf: >>> @res, @ppres, @orig, @n, @size, @cmp >> b, i, j, k >ret
From: ?a/b on 15 Sep 2005 05:40 On Thu, 15 Sep 2005 08:19:31 GMT, "ýa\\/b" <al(a)f.g> wrote: >On Wed, 14 Sep 2005 17:48:07 GMT, "ýa\\/b" <al(a)f.g> wrote: >>/* int _qsortu1_m<(unsigned* ris, int n, int (*cmp)(U , U)) >>/* utilizzato da qsort_m >>/* s= 0Ra, 4(a)ris, 8@n, 12(a)cmp >>_qsortu1_m: >><< @ris=s+4, @n=s+8, @cmp=s+12 >> a=*@ris; r=*@n; b=*@cmp; >> --r; i=&[a+4*r]; >> _qsortr_m<(a, i); >>>> @ris, @n, @cmp >>ret >> >> >>/* int _qsort_m<(char* res, char** ppres, char* orig, >>/* int n, int size, int (*cmp)(char*, char*) ) >>/* s = 0k, 4j, 8i, 12b, 16Ra, >>/* 20(a)res, 24(a)ppres, 28(a)orig, 32@n, 36(a)size, 40(a)cmp >>_qsort_m: >>< b, i, j, k >><< @res=s+20, @ppres=s+24, @orig=s+28, @n=s+32, @size=s+36, @cmp=s+40 >> c=*@size; j=*@ppres; a=*@n; r=*@orig; >> c<=0?#.ce; a<=0?#.ce; a<=2 ?#.re; >this is an error ^^^^^^^^^^^^^ > >> j==0 #.ce; r==0 #.ce; D*@cmp==0#.ce; >> #.c1; >>.ce: a^=a; ##.cf; >>.re: ##.cz; >>.c1: {*j=r; j+=4; r+=c; --a#.c1;} >> j=*@ppres; a=*@n; r=*@cmp; _qsortu1_m<(j, a, r); > ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ > >better 'inline' this >qsort_m() seems good for array of chars too > >> i=*@ppres; j=*@res; c=*@n; k=*@size; >> k&0x2#.u2; >> k&0x1#.u1; then should be better k&0x1#.u1; k&0x2#.u2; >> k>>=2; >> .c2: {a=*i; b=k; >> .c3: {r=*a; *j=r; a+=4; j+=4; --b#.c3} >> i+=4; --c#.c2; >> } >> #.cz; >>.u2: k>>=1; >> .c4: {a=*i; b=k; >> .c5: {rx=*a; *j=rx; a+=2; j+=2; --b#.c5} >> i+=4; --c#.c4 >> } >> #.cz >>.u1: {a=*i; b=k; >>.c6: { rl=*a; *j=rl; ++a; ++j; --b#.c6} >> i+=4; --c#.u1; >> } >>.cz: a=1; >>.cf: >>>> @res, @ppres, @orig, @n, @size, @cmp >>> b, i, j, k >>ret
From: ?a/b on 15 Sep 2005 05:40 On Wed, 14 Sep 2005 17:48:07 GMT, "ýa\\/b" <al(a)f.g> wrote: >int main(void) >{unsigned a[800], b[800], *c[800], i, h, j; > time_t ti, tf; > //////////////////////////////////// >/* >F(j=0; j<10000; ++j) > {h=rand()%500; ++h; this should be h=rand()%499; ++h; > random_array(a, h); > F(i=0; i<h; ++i) b[i]=a[i]; > qsortu_c(a, 0, h-1, cmpu); > qsortu_m(b, h, cmpu); > F(i=0; i<h; ++i) > if(b[i]!=a[i]) {P("ERRORE\n"); goto fine;} > } > fine:; > P("j==%u\n", j); >*/
From: T933191278 on 15 Sep 2005 06:33
But is your qsort better than mine? :) Probably :) i have not tested your routines but here are two versions of my qsort tested with an array of 1000 dwords with ramdom values the first takes about 100000 cpu clocks or 150000 with a unaligned array, the second is a generic version with suport for elements of any size, it takes 600000 cpu clocks, one difference respect your code is that i use iteration instead of recursion. LANG OCTASM,0.1 ;sorts a array of 32bits unsigned ints from lower to higher #qsort_32 { ;(ptr array,size/4) ;ordena un array con punteros de 32 bits ;de menor a mayor ;puede ocurrir un stack overflow pusha esi=[esp+arg1] edi=[esp+arg1+4] edi=esi+edi*4-4 l0() popa retp 8 # jne >1 eax=[esi] edx=[edi] cmp edx,eax jnc >1 [esi]=edx [edi]=eax # ret #l3 pop eax sub edi,4 [esi]=edx add esi,4 push edi edi=eax l0() pop edi,esi #l0 ecx=esi+4 cmp ecx,edi jnc <2 push edi,esi eax=[edi] ;por si el array esta ordenado add ecx,edi ;se compara con el del medio rcr ecx,1 and ecx,-4 edx=[ecx] [ecx]=eax jmp l1c #l1 [esi]=eax #l1b add esi,4 cmp esi,edi je l3 #l1c eax=[esi] cmp eax,edx jc l1b #l2 [edi]=eax #l2b sub edi,4 cmp esi,edi je l3 eax=[edi] cmp edx,eax jc l2b jmp l1 } ;generic version #qsort { ;(ptr array,num,size,ptr cmp funcion) ;ejemplo de funcion: push eax eax=[esi] cmp eax,[edi] pop eax ret ;size=arg1+8 size=32+4+8 bit1=-4 bufer_aux=-12 size1=-8 pushad ebp=esp eax=[ebp+arg1] edx=[ebp+arg1+4] ecx=[ebp+size] dec edx ebx=[esp+arg1+12] imul edx,ecx bsf esi,ecx add edx,eax push esi esi=ecx+7 shr ecx,1 push ecx and esi,-4 sub esp,esi [ebp+bufer_aux]=esp l0() esp=ebp popad retp 16 # esi=eax edi=edx call ebx jna >1 xchgblocks(eax,edx,d[ebp+size]) # ret #l3 pop esi sub edx,[ebp+size] movblock(eax,edi,d[ebp+size]) add eax,[ebp+size] push edx edx=esi l0() pop edx,eax #l0 ecx=edx sub ecx,eax jna <1 cmp ecx,[ebp+size] je <2 push edx,eax ;por si el array esta ordenado ;se compara con el del medio esi=[ebp+size1] shr ecx,1 jc >1 bsf edi,ecx cmp edi,[ebp+bit1] jc >1 xor esi,esi # sub ecx,esi add ecx,eax edi=[ebp+bufer_aux] movblock(edi,ecx,d[ebp+size]) movblock(ecx,edx,d[ebp+size]) jmp l1c #l1 movblock(eax,esi,d[ebp+size]) #l1b add eax,d[ebp+size] cmp eax,edx je l3 #l1c esi=eax call ebx jc l1b #l2 movblock(edx,esi,d[ebp+size]) #l2b sub edx,[ebp+size] cmp eax,edx je l3 esi=edx call ebx ja l2b jmp l1 #xchgblocks pushad edi=[esp+arg1] esi=[esp+arg1+4] ecx=[esp+arg1+8] xor edx,edx sub edx,edi and edx,3 xchg ecx,edx sub edx,ecx jle >4 jecxz >2 # al=[edi] xchg al,[esi] inc esi stosb loop <1 # ecx=edx shr ecx,2 jz >2 # eax=[edi] xchg eax,[esi] add esi,4 stosd loop <1 and edx,3 # add ecx,edx jz >2 # al=[edi] xchg al,[esi] inc esi stosb loop <1 # popad retp 12 #movblock pushad edi=[esp+arg1] esi=[esp+arg1+4] ecx=[esp+arg1+8] movedata() popad retp 12 } |