From: ?a/b on
I take some time for doing a windows95 .dll but with no success
so i thank hutch for let me pass some good time thinking on qsort
and so i have some question:

If the array 'b' in qsortu_m(char* b, int n, int (*cmp)(U, U));
is not dword aligned the function qsortu_m() is good the same?

I have tested qsort_m() for array of element-size 4(a dword) in your
opinion it goes ok for any array of some other element-size?

Do you see any error?

if your assembly is better than mine
your qsort will be better than mine. But is your qsort better than
mine? :)

Thanks

; b=cmp, _qsortr_m(U left, U right)
; 0Ra, 4(a)left, 8(a)right
_qsortr_m:
%define @left esp+4
%define @right esp+8
mov esi, [@left]
mov edi, [@right]
cmp esi, edi
jae .ck
lea eax, [esi+edi]
lea ebp, [esi+4]
shr eax, 1
mov ecx, esi
and eax, 0xFFFFFFFC
and ecx, 0x3
or eax, ecx
; array=..4, ..8, ..12 ecc
; array=..1, ..5, ..9
; array=..2, ..6, ..10
mov edx, [esi]
mov ecx, [eax]
mov [eax], edx
mov [esi], ecx
push ecx
jmp short .c2
..ck:
jmp short .cf
..c0:
push dword[ebp]
call ebx
add esp, 4
cmp eax, 0
jge .c1
add esi, 4
mov edx, [esi]
mov ecx, [ebp]
mov [esi], ecx
mov [ebp], edx
..c1:
add ebp, 4
..c2:
cmp ebp, edi
jbe .c0
pop eax
mov edx, [@left]
mov ecx, [esi]
mov [edx], ecx
mov [esi], eax
; r=(a)left, j=(a)right, i=last
lea eax, [esi-4]
push eax
push edx
call _qsortr_m
add esp, 4
pop eax
mov edi, [@right]
add eax, 8
push edi
push eax
call _qsortr_m
add esp, 8
..cf:
%undef @left
%undef @right
ret


; int _qsortu_m<(unsigned* ris, int n, int (*cmp)(U , U))
; serve per ordinare array di ogg di dimensione 4char
; per tali oggetti utilizzare questa funzione e non qsort_m
; s= 0k, 4j, 8i, 12b, 16Ra, 20(a)ris, 24@n, 28(a)cmp
_qsortu_m:
push ebx
push esi
push edi
push ebp
%define @ris esp+20
%define @n esp+24
%define @cmp esp+28
mov eax, [@ris]
mov edx, [@n]
mov ebx, [@cmp]
cmp edx, 2
jl .c1
cmp eax, 0
je .ce
cmp ebx, 0
je .ce
dec edx
lea esi, [eax+4*edx]
push esi
push eax
call _qsortr_m
add esp, 8
..c1:
mov eax, 1
jmp short .cf
..ce:
xor eax, eax
..cf:
%undef @ris
%undef @n
%undef @cmp
pop ebp
pop edi
pop esi
pop ebx
ret

; int _qsortu1_m<(unsigned* ris, int n, int (*cmp)(U , U))
; utilizzato da qsort_m
; s= 0Ra, 4(a)ris, 8@n, 12(a)cmp
_qsortu1_m:
%define @ris esp+4
%define @n esp+8
%define @cmp esp+12
mov eax, [@ris]
mov edx, [@n]
mov ebx, [@cmp]
dec edx
lea esi, [eax+4*edx]
push esi
push eax
call _qsortr_m
add esp, 8
%undef @ris
%undef @n
%undef @cmp
ret


; int _qsort_m<(char* res, char** ppres, char* orig,
; int n, int size, int (*cmp)(char*, char*) )
; s = 0k, 4j, 8i, 12b, 16Ra,
; 20(a)res, 24(a)ppres, 28(a)orig, 32@n, 36(a)size, 40(a)cmp
_qsort_m:
push ebx
push esi
push edi
push ebp
%define @res esp+20
%define @ppres esp+24
%define @orig esp+28
%define @n esp+32
%define @size esp+36
%define @cmp esp+40
mov ecx, [@size]
mov edi, [@ppres]
mov eax, [@n]
mov edx, [@orig]
cmp ecx, 0
jle .ce
cmp eax, 0
jle .ce
cmp eax, 2
jle .re
cmp edi, 0
je .ce
cmp edx, 0
je .ce
cmp dword[@cmp], 0
je .ce
jmp short .c1
..ce:
xor eax, eax
jmp .cf
..re:
jmp .cz
..c1:
mov [edi], edx
add edi, 4
add edx, ecx
dec eax
jnz .c1
mov edi, [@ppres]
mov eax, [@n]
mov edx, [@cmp]
push edx
push eax
push edi
call _qsortu1_m
add esp, 12
mov esi, [@ppres]
mov edi, [@res]
mov ecx, [@n]
mov ebp, [@size]
test ebp, 0x2
jnz .u2
test ebp, 0x1
jnz .u1
shr ebp, 2
..c2:
mov eax, [esi]
mov ebx, ebp
..c3:
mov edx, [eax]
mov [edi], edx
add eax, 4
add edi, 4
dec ebx
jnz .c3
add esi, 4
dec ecx
jnz .c2

jmp short .cz
..u2:
shr ebp, 1
..c4:
mov eax, [esi]
mov ebx, ebp
..c5:
mov dx, [eax]
mov [edi], dx
add eax, 2
add edi, 2
dec ebx
jnz .c5
add esi, 4
dec ecx
jnz .c4

jmp short .cz
..u1:
mov eax, [esi]
mov ebx, ebp
..c6:
mov dl, [eax]
mov [edi], dl
inc eax
inc edi
dec ebx
jnz .c6
add esi, 4
dec ecx
jnz .u1
..cz:
mov eax, 1
..cf:
%undef @res
%undef @ppres
%undef @orig
%undef @n
%undef @size
%undef @cmp
pop ebp
pop edi
pop esi
pop ebx
ret


#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define R return
#define F for
#define P printf
#define U unsigned
#define W while

void qsortu_m(char* b, int n, int (*cmp)(U, U));
int qsort_m(char* res, char* ppres, char* orig,
int n, int size, int (*cmp)(char*, char*) );

void
qsortu_c(char* b, int l, int r, int (*cmp)(U, U))
{int i, last;
U *v, tmp;
///////////////
if(l>=r) R;
v=( U *) b;
tmp=v[l]; i=(l+r)/2; v[l]=v[i]; v[i]=tmp;
last=l;
F( i=l+1; i<=r; ++i )
if((*cmp)(v[i], v[l]) < 0)
{ ++last; tmp=v[last]; v[last]=v[i]; v[i]=tmp;}
tmp=v[l]; v[l]=v[last]; v[last]=tmp;
qsortu_c(b, l, last-1, cmp);
qsortu_c(b, last+1, r, cmp);
}

void print_array(U* b, int len)
{int i;
///////////////////
if(len<=0||b==0) R;
F(i=0; i<len; ++i)
P("%u ", b[i]);
P("\n");
}

void random_array(U* b, int len)
{static int led=0;
int i;
////////////////////
if(len<=0||b==0) R;
if(led==0){ led=1; srand((U)time(0));}
F(i=0; i<len; ++i) b[i]=rand();
}

int cmpu(U a, U b)
{ if(a<b) R -1;
else if(a>b) R 1;
R 0;
}

int cmpx(char* aa, char* bb)
{U *a, *b;
a=aa; b=bb;
if(*a<*b) R -1;
else if(*a>*b) R 1;
R 0;
}



int main(void)
{unsigned a[800], b[800], *c[800], i, h, j;
time_t ti, tf;
////////////////////////////////////

/*
random_array(a, 500); print_array(a, 51);
//qsortu_c(a, 0, 10, cmpu);
P("a=%p b=%p c=%p\n", (void*)a, (void*)b, (void*)c);
qsortu_m(a, 50, cmpu);
print_array(a, 51);
*/

/*
random_array(a, 50); print_array(a, 51);
qsort_m( b, &c, a, 50, 4, cmpx );
print_array(b, 51);

random_array(a, 50); print_array(a, 51);
qsort( &a, 50, 4, (int (*)(const void*, const void*))cmpx );
print_array(b, 51);
*/

/*
F(j=0; j<10000; ++j)
{h=rand()%500; ++h;
random_array(a, h);
F(i=0; i<h; ++i) b[i]=a[i];
qsortu_c(a, 0, h-1, cmpu);
qsortu_m(b, h, cmpu);
F(i=0; i<h; ++i)
if(b[i]!=a[i]) {P("ERRORE\n"); goto fine;}
}
fine:;
P("j==%u\n", j);
*/


random_array(a, 800);
h=rand(); h<<=16; h|=rand();
ti=time(0);
F(j=0; j<30000; ++j)
{F( i=0; i<500; ++i) a[i]^=h;
qsortu_c(a, 0, 499, cmpu);
}
tf=time(0);
P("K&R qsort == %g\n", difftime(tf, ti) );

random_array(a, 800);
h=rand(); h<<=16; h|=rand();
ti=time(0);
F(j=0; j<30000; ++j)
{F( i=0; i<500; ++i) a[i]^=h;
qsortu_m(a, 500, cmpu);
}
tf=time(0);
P("my qsort == %g\n", difftime(tf, ti) );

random_array(a, 800);
h=rand(); h<<=16; h|=rand();
ti=time(0);
F(j=0; j<30000; ++j)
{F( i=0; i<500; ++i) a[i]^=h;
qsort( &a, 500, 4, (int (*)(const void*, const void*))cmpx );
}
tf=time(0);
P("my qsys == %g\n", difftime(tf, ti) );

random_array(a, 800);
h=rand(); h<<=16; h|=rand();
ti=time(0);
F(j=0; j<30000; ++j)
{F( i=0; i<500; ++i) a[i]^=h;
qsort_m( b, &c, a, 500, 4, cmpx );
}
tf=time(0);
P("my msys == %g\n", difftime(tf, ti) );


R 0;
}

/* b=cmp, _qsortr_m(U left, U right)
/* 0Ra, 4(a)left, 8(a)right
_qsortr_m:
<< @left=s+4, @right=s+8
i=*@left; j=*@right;
i>=j#.ck;
a=&[i+j]; k=&[i+4]; a>>=1; c=i;
a&=0xFFFFFFFC; c&=0x3; a|=c;
/* array=..4, ..8, ..12 ecc
/* array=..1, ..5, ..9
/* array=..2, ..6, ..10
r=*i; c=*a; *a=r; *i=c;
< c; #.c2;
.ck: #.cf;
.c0: b<(D[k]); a<0?!#.c1;
{i+=4; r=*i; c=*k; *i=c; *k=r;}
.c1: k+=4;
.c2: k<=j#.c0;
> a
r=*@left; c=*i; *r=c; *i=a;
/* r=(a)left, j=(a)right, i=last
a=&[i-4]; < a
_qsortr_m<(r);
> a
j=*@right; a+=8; _qsortr_m<(a, j);
..cf:
>> @left, @right
ret


/* int _qsortu_m<(unsigned* ris, int n, int (*cmp)(U , U))
/* serve per ordinare array di ogg di dimensione 4char
/* per tali oggetti utilizzare questa funzione e non qsort_m
/* s= 0k, 4j, 8i, 12b, 16Ra, 20(a)ris, 24@n, 28(a)cmp
_qsortu_m:
< b, i, j, k
<< @ris=s+20, @n=s+24, @cmp=s+28
a=*@ris; r=*@n;
b=*@cmp; r<2?#.c1;
a==0 #.ce; b==0#.ce;
--r; i=&[a+4*r];
_qsortr_m<(a, i);
..c1: a=1; #.cf;
..ce: a^=a;
..cf:
>> @ris, @n, @cmp
> b, i, j, k
ret

/* int _qsortu1_m<(unsigned* ris, int n, int (*cmp)(U , U))
/* utilizzato da qsort_m
/* s= 0Ra, 4(a)ris, 8@n, 12(a)cmp
_qsortu1_m:
<< @ris=s+4, @n=s+8, @cmp=s+12
a=*@ris; r=*@n; b=*@cmp;
--r; i=&[a+4*r];
_qsortr_m<(a, i);
>> @ris, @n, @cmp
ret


/* int _qsort_m<(char* res, char** ppres, char* orig,
/* int n, int size, int (*cmp)(char*, char*) )
/* s = 0k, 4j, 8i, 12b, 16Ra,
/* 20(a)res, 24(a)ppres, 28(a)orig, 32@n, 36(a)size, 40(a)cmp
_qsort_m:
< b, i, j, k
<< @res=s+20, @ppres=s+24, @orig=s+28, @n=s+32, @size=s+36, @cmp=s+40
c=*@size; j=*@ppres; a=*@n; r=*@orig;
c<=0?#.ce; a<=0?#.ce; a<=2 ?#.re;
j==0 #.ce; r==0 #.ce; D*@cmp==0#.ce;
#.c1;
..ce: a^=a; ##.cf;
..re: ##.cz;
..c1: {*j=r; j+=4; r+=c; --a#.c1;}
j=*@ppres; a=*@n; r=*@cmp; _qsortu1_m<(j, a, r);
i=*@ppres; j=*@res; c=*@n; k=*@size;
k&0x2#.u2;
k&0x1#.u1;
k>>=2;
.c2: {a=*i; b=k;
.c3: {r=*a; *j=r; a+=4; j+=4; --b#.c3}
i+=4; --c#.c2;
}
#.cz;
..u2: k>>=1;
.c4: {a=*i; b=k;
.c5: {rx=*a; *j=rx; a+=2; j+=2; --b#.c5}
i+=4; --c#.c4
}
#.cz
..u1: {a=*i; b=k;
..c6: { rl=*a; *j=rl; ++a; ++j; --b#.c6}
i+=4; --c#.u1;
}
..cz: a=1;
..cf:
>> @res, @ppres, @orig, @n, @size, @cmp
> b, i, j, k
ret

From: ?a/b on
On Wed, 14 Sep 2005 17:48:07 GMT, "ýa\\/b" <al(a)f.g> wrote:
>/* int _qsortu1_m<(unsigned* ris, int n, int (*cmp)(U , U))
>/* utilizzato da qsort_m
>/* s= 0Ra, 4(a)ris, 8@n, 12(a)cmp
>_qsortu1_m:
><< @ris=s+4, @n=s+8, @cmp=s+12
> a=*@ris; r=*@n; b=*@cmp;
> --r; i=&[a+4*r];
> _qsortr_m<(a, i);
>>> @ris, @n, @cmp
>ret
>
>
>/* int _qsort_m<(char* res, char** ppres, char* orig,
>/* int n, int size, int (*cmp)(char*, char*) )
>/* s = 0k, 4j, 8i, 12b, 16Ra,
>/* 20(a)res, 24(a)ppres, 28(a)orig, 32@n, 36(a)size, 40(a)cmp
>_qsort_m:
>< b, i, j, k
><< @res=s+20, @ppres=s+24, @orig=s+28, @n=s+32, @size=s+36, @cmp=s+40
> c=*@size; j=*@ppres; a=*@n; r=*@orig;
> c<=0?#.ce; a<=0?#.ce; a<=2 ?#.re;
this is an error ^^^^^^^^^^^^^

> j==0 #.ce; r==0 #.ce; D*@cmp==0#.ce;
> #.c1;
>.ce: a^=a; ##.cf;
>.re: ##.cz;
>.c1: {*j=r; j+=4; r+=c; --a#.c1;}
> j=*@ppres; a=*@n; r=*@cmp; _qsortu1_m<(j, a, r);
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

better 'inline' this
qsort_m() seems good for array of chars too

> i=*@ppres; j=*@res; c=*@n; k=*@size;
> k&0x2#.u2;
> k&0x1#.u1;
> k>>=2;
> .c2: {a=*i; b=k;
> .c3: {r=*a; *j=r; a+=4; j+=4; --b#.c3}
> i+=4; --c#.c2;
> }
> #.cz;
>.u2: k>>=1;
> .c4: {a=*i; b=k;
> .c5: {rx=*a; *j=rx; a+=2; j+=2; --b#.c5}
> i+=4; --c#.c4
> }
> #.cz
>.u1: {a=*i; b=k;
>.c6: { rl=*a; *j=rl; ++a; ++j; --b#.c6}
> i+=4; --c#.u1;
> }
>.cz: a=1;
>.cf:
>>> @res, @ppres, @orig, @n, @size, @cmp
>> b, i, j, k
>ret
From: ?a/b on
On Thu, 15 Sep 2005 08:19:31 GMT, "ýa\\/b" <al(a)f.g> wrote:
>On Wed, 14 Sep 2005 17:48:07 GMT, "ýa\\/b" <al(a)f.g> wrote:
>>/* int _qsortu1_m<(unsigned* ris, int n, int (*cmp)(U , U))
>>/* utilizzato da qsort_m
>>/* s= 0Ra, 4(a)ris, 8@n, 12(a)cmp
>>_qsortu1_m:
>><< @ris=s+4, @n=s+8, @cmp=s+12
>> a=*@ris; r=*@n; b=*@cmp;
>> --r; i=&[a+4*r];
>> _qsortr_m<(a, i);
>>>> @ris, @n, @cmp
>>ret
>>
>>
>>/* int _qsort_m<(char* res, char** ppres, char* orig,
>>/* int n, int size, int (*cmp)(char*, char*) )
>>/* s = 0k, 4j, 8i, 12b, 16Ra,
>>/* 20(a)res, 24(a)ppres, 28(a)orig, 32@n, 36(a)size, 40(a)cmp
>>_qsort_m:
>>< b, i, j, k
>><< @res=s+20, @ppres=s+24, @orig=s+28, @n=s+32, @size=s+36, @cmp=s+40
>> c=*@size; j=*@ppres; a=*@n; r=*@orig;
>> c<=0?#.ce; a<=0?#.ce; a<=2 ?#.re;
>this is an error ^^^^^^^^^^^^^
>
>> j==0 #.ce; r==0 #.ce; D*@cmp==0#.ce;
>> #.c1;
>>.ce: a^=a; ##.cf;
>>.re: ##.cz;
>>.c1: {*j=r; j+=4; r+=c; --a#.c1;}
>> j=*@ppres; a=*@n; r=*@cmp; _qsortu1_m<(j, a, r);
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>
>better 'inline' this
>qsort_m() seems good for array of chars too
>
>> i=*@ppres; j=*@res; c=*@n; k=*@size;
>> k&0x2#.u2;
>> k&0x1#.u1;

then should be better
k&0x1#.u1;
k&0x2#.u2;

>> k>>=2;
>> .c2: {a=*i; b=k;
>> .c3: {r=*a; *j=r; a+=4; j+=4; --b#.c3}
>> i+=4; --c#.c2;
>> }
>> #.cz;
>>.u2: k>>=1;
>> .c4: {a=*i; b=k;
>> .c5: {rx=*a; *j=rx; a+=2; j+=2; --b#.c5}
>> i+=4; --c#.c4
>> }
>> #.cz
>>.u1: {a=*i; b=k;
>>.c6: { rl=*a; *j=rl; ++a; ++j; --b#.c6}
>> i+=4; --c#.u1;
>> }
>>.cz: a=1;
>>.cf:
>>>> @res, @ppres, @orig, @n, @size, @cmp
>>> b, i, j, k
>>ret

From: ?a/b on
On Wed, 14 Sep 2005 17:48:07 GMT, "ýa\\/b" <al(a)f.g> wrote:
>int main(void)
>{unsigned a[800], b[800], *c[800], i, h, j;
> time_t ti, tf;
> ////////////////////////////////////

>/*
>F(j=0; j<10000; ++j)
> {h=rand()%500; ++h;

this should be h=rand()%499; ++h;

> random_array(a, h);
> F(i=0; i<h; ++i) b[i]=a[i];
> qsortu_c(a, 0, h-1, cmpu);
> qsortu_m(b, h, cmpu);
> F(i=0; i<h; ++i)
> if(b[i]!=a[i]) {P("ERRORE\n"); goto fine;}
> }
> fine:;
> P("j==%u\n", j);
>*/

From: T933191278 on
But is your qsort better than
mine? :)

Probably :)
i have not tested your routines but here are two versions of my qsort
tested with an array of 1000 dwords with ramdom values the first takes
about 100000 cpu clocks or 150000 with a unaligned array, the second
is a generic version with suport for elements of any size, it takes
600000 cpu clocks, one difference respect your code is that i use
iteration
instead of recursion.

LANG OCTASM,0.1
;sorts a array of 32bits unsigned ints from lower to higher
#qsort_32 { ;(ptr array,size/4)
;ordena un array con punteros de 32 bits
;de menor a mayor
;puede ocurrir un stack overflow
pusha
esi=[esp+arg1] edi=[esp+arg1+4] edi=esi+edi*4-4
l0() popa retp 8
# jne >1
eax=[esi] edx=[edi] cmp edx,eax jnc >1
[esi]=edx [edi]=eax
# ret
#l3 pop eax
sub edi,4 [esi]=edx add esi,4
push edi edi=eax
l0()
pop edi,esi
#l0 ecx=esi+4 cmp ecx,edi jnc <2
push edi,esi
eax=[edi] ;por si el array esta ordenado
add ecx,edi ;se compara con el del medio
rcr ecx,1 and ecx,-4
edx=[ecx] [ecx]=eax
jmp l1c
#l1 [esi]=eax
#l1b add esi,4 cmp esi,edi je l3
#l1c eax=[esi] cmp eax,edx jc l1b
#l2 [edi]=eax
#l2b sub edi,4 cmp esi,edi je l3
eax=[edi] cmp edx,eax jc l2b jmp l1
}

;generic version
#qsort { ;(ptr array,num,size,ptr cmp funcion)
;ejemplo de funcion: push eax eax=[esi] cmp eax,[edi] pop eax ret
;size=arg1+8
size=32+4+8
bit1=-4 bufer_aux=-12
size1=-8
pushad ebp=esp
eax=[ebp+arg1] edx=[ebp+arg1+4]
ecx=[ebp+size] dec edx ebx=[esp+arg1+12]
imul edx,ecx bsf esi,ecx add edx,eax
push esi esi=ecx+7 shr ecx,1
push ecx and esi,-4 sub esp,esi
[ebp+bufer_aux]=esp
l0() esp=ebp popad retp 16
# esi=eax edi=edx call ebx jna >1
xchgblocks(eax,edx,d[ebp+size])
# ret
#l3 pop esi
sub edx,[ebp+size] movblock(eax,edi,d[ebp+size]) add eax,[ebp+size]
push edx edx=esi
l0()
pop edx,eax
#l0 ecx=edx sub ecx,eax jna <1 cmp ecx,[ebp+size] je <2
push edx,eax
;por si el array esta ordenado
;se compara con el del medio
esi=[ebp+size1]
shr ecx,1 jc >1
bsf edi,ecx cmp edi,[ebp+bit1] jc >1
xor esi,esi
# sub ecx,esi add ecx,eax
edi=[ebp+bufer_aux] movblock(edi,ecx,d[ebp+size])
movblock(ecx,edx,d[ebp+size])
jmp l1c
#l1 movblock(eax,esi,d[ebp+size])
#l1b add eax,d[ebp+size] cmp eax,edx je l3
#l1c esi=eax call ebx jc l1b
#l2 movblock(edx,esi,d[ebp+size])
#l2b sub edx,[ebp+size] cmp eax,edx je l3
esi=edx call ebx ja l2b jmp l1

#xchgblocks
pushad
edi=[esp+arg1] esi=[esp+arg1+4] ecx=[esp+arg1+8]
xor edx,edx sub edx,edi and edx,3 xchg ecx,edx sub edx,ecx jle >4
jecxz >2
# al=[edi] xchg al,[esi] inc esi stosb loop <1
# ecx=edx shr ecx,2 jz >2
# eax=[edi] xchg eax,[esi] add esi,4 stosd loop <1
and edx,3
# add ecx,edx jz >2
# al=[edi] xchg al,[esi] inc esi stosb loop <1
# popad
retp 12

#movblock
pushad
edi=[esp+arg1] esi=[esp+arg1+4] ecx=[esp+arg1+8]
movedata()
popad
retp 12

}

 |  Next  |  Last
Pages: 1 2
Prev: Windows Assembly
Next: resource files and assembler