From: Randall Hyde on
Hi All,

I'm currently in the process of correcting a defect in the
HLA Standard Library case insensitive string comparison
routines. While I'm at it, I'm trying to speed it up a bit.
I've written the following code:

procedure stricmp; @nodisplay; @noframe; align(4);
begin stricmp;

push( ebx );


// Compare the two strings until we encounter a zero byte
// or until the corresonding characters are different.

cmpLoop:

mov( [esi], al );
mov( [edi], ah );
add( 1, esi );
add( 1, edi );
cmp( al, 0 );
je notAlpha;
cmp( al, ah );
je cmpLoop;
mov( ax, bx );
and( $5f5f, bx );
cmp( bl, 'A' );
jb notAlpha;
cmp( bl, 'Z' );
ja notAlpha;
cmp( bl, bh );
je cmpLoop;

notAlpha:

// At this point, we've either encountered a zero byte in the source
// string or we've encountered two bytes that are not the same in
// the two strings. In either case, return the result of the
// comparison in the flags.

pop( ebx );
cmp( al, ah );
ret();

end stricmp;


Any suggestions on speeding it up would be appreciated.

I wrote the following test program to try out various versions:

program t;
#include( "stdlib.hhf" )

static
// Str1 is the 255-char sequence #1..#255:

str1 :string := "" +
#for( chr:= 1 to 255 )
char( chr ) +
#endfor
"";

// As above, but substitute uppercase for lowercase:

str2 :string := "" +
#for( chr := 1 to 255 )
@uppercase( char( chr ), 0 ) +
#endfor
"";

// As above, but subsitute lowercase for uppercase:

str3 :string := "" +
#for( chr := 1 to 255 )
@lowercase( char( chr ), 0 ) +
#endfor
"";

// A string that won't match the above
// ( #1..'@' + '.'):

str4 :string := "" +
#for( chr := 1 to uns8('a')-1 )
char( chr ) +
#endfor
".";


procedure stricmp; @nodisplay; @noframe; align(16);
begin stricmp;

push( ebx );


// Compare the two strings until we encounter a zero byte
// or until the corresonding characters are different.

cmpLoop:

mov( [esi], al );
mov( [edi], ah );
add( 1, esi );
add( 1, edi );
cmp( al, 0 );
je notAlpha;
cmp( al, ah );
je cmpLoop;
mov( ax, bx );
and( $5f5f, bx );
cmp( bl, 'A' );
jb notAlpha;
cmp( bl, 'Z' );
ja notAlpha;
cmp( bl, bh );
je cmpLoop;

notAlpha:

// At this point, we've either encountered a zero byte in the source
// string or we've encountered two bytes that are not the same in
// the two strings. In either case, return the result of the
// comparison in the flags.

pop( ebx );
cmp( al, ah );
ret();

end stricmp;




procedure stricmp1; @nodisplay; @noframe; align(16);
begin stricmp1;

push( ebx );


// Compare the two strings until we encounter a zero byte
// or until the corresonding characters are different.

cmpLoop:

mov( [esi], al );
mov( [edi], ah );
add( 1, esi );
add( 1, edi );
cmp( al, 0 );
je notAlpha;
cmp( al, ah );
je cmpLoop;
mov( al, bl );
mov( ah, bh );
and( $5f, bl );
and( $5f, bh );
cmp( bl, 'A' );
jb notAlpha;
cmp( bl, 'Z' );
ja notAlpha;
cmp( bl, bh );
je cmpLoop;

notAlpha:

// At this point, we've either encountered a zero byte in the source
// string or we've encountered two bytes that are not the same in
// the two strings. In either case, return the result of the
// comparison in the flags.

pop( ebx );
cmp( al, ah );
ret();

end stricmp1;


procedure stricmp2; @nodisplay; @noframe; align(16);
static
xlattbl: char[256] :=
[
#for( chr := 0 to 254 )

@uppercase( char( chr ), 0),

#endfor
#255
];

begin stricmp2;

push( ebx );
push( ecx );
lea( ebx, xlattbl );

// Compare the two strings until we encounter a zero byte
// or until the corresonding characters are different.

cmpLoop:

mov( [edi], al );
xlat();
add( 1, edi );
mov( al, ah );
mov( [esi], al );
add( 1, esi );
xlat();

cmp( ah, 0 );
je notAlpha;
cmp( al, ah );
je cmpLoop;

notAlpha:

// At this point, we've either encountered a zero byte in the source
// string or we've encountered two bytes that are not the same in
// the two strings. In either case, return the result of the
// comparison in the flags.

pop( ecx );
pop( ebx );
cmp( al, ah );
ret();

end stricmp2;




procedure stricmp3; @nodisplay; @noframe; align(16);
static
xlattbl: char[256] :=
[
#for( chr := 0 to 254 )

@uppercase( char( chr ), 0),

#endfor
#255
];

begin stricmp3;

push( ebx );
push( ecx );
lea( ebx, xlattbl );

// Compare the two strings until we encounter a zero byte
// or until the corresonding characters are different.

cmpLoop:

movzx( (type byte [esi]), ecx );
add( 1, esi );
mov( [ebx+ecx], al );
movzx( (type byte [edi]), ecx );
add( 1, edi );
mov( [ebx+ecx], ah );

cmp( ah, 0 );
je notAlpha;
cmp( al, ah );
je cmpLoop;

notAlpha:

// At this point, we've either encountered a zero byte in the source
// string or we've encountered two bytes that are not the same in
// the two strings. In either case, return the result of the
// comparison in the flags.

pop( ecx );
pop( ebx );
cmp( al, ah );
ret();

end stricmp3;


static
timeStart :int64;
timeEnd :int64;

begin t;

// The the cache and pages:

lea( esi, stricmp );
mov( 1024, ecx );
repeat
mov( [esi], al );
inc( esi );
dec( ecx );
until( @z );

stdout.put( nl "Test 3:" nl );

align(16);
nop;
nop;
nop;
nop;
rdtsc();
mov( eax, (type dword timeStart));
mov( edx, (type dword timeStart[4]));

mov( str1, esi );
mov( str1, edi );
stricmp2();

mov( str1, esi );
mov( str2, edi );
stricmp2();

mov( str1, esi );
mov( str3, edi );
stricmp2();

mov( str2, esi );
mov( str3, edi );
stricmp2();

mov( str3, esi );
mov( str2, edi );
stricmp2();

mov( str1, esi );
mov( str4, edi );
stricmp2();

rdtsc();
sub( (type dword timeStart), eax );
mov( eax, (type dword timeEnd) );
sbb( (type dword timeStart[4]), edx );
mov( edx, (type dword timeEnd[4]));
stdout.put( "Sequence 3: ", timeEnd, nl );



stdout.put( nl "Test 4:" nl );

align(16);
nop;
nop;
nop;
nop;
rdtsc();
mov( eax, (type dword timeStart));
mov( edx, (type dword timeStart[4]));

mov( str1, esi );
mov( str1, edi );
stricmp3();

mov( str1, esi );
mov( str2, edi );
stricmp3();

mov( str1, esi );
mov( str3, edi );
stricmp3();

mov( str2, esi );
mov( str3, edi );
stricmp3();

mov( str3, esi );
mov( str2, edi );
stricmp3();

mov( str1, esi );
mov( str4, edi );
stricmp3();

rdtsc();
sub( (type dword timeStart), eax );
mov( eax, (type dword timeEnd) );
sbb( (type dword timeStart[4]), edx );
mov( edx, (type dword timeEnd[4]));
stdout.put( "Sequence 4: ", timeEnd, nl );



stdout.put( nl "Test 2:" nl );

align(16);
nop;
nop;
nop;
nop;
rdtsc();
mov( eax, (type dword timeStart));
mov( edx, (type dword timeStart[4]));

mov( str1, esi );
mov( str1, edi );
stricmp1();

mov( str1, esi );
mov( str2, edi );
stricmp1();

mov( str1, esi );
mov( str3, edi );
stricmp1();

mov( str2, esi );
mov( str3, edi );
stricmp1();

mov( str3, esi );
mov( str2, edi );
stricmp1();

mov( str1, esi );
mov( str4, edi );
stricmp1();

rdtsc();
sub( (type dword timeStart), eax );
mov( eax, (type dword timeEnd) );
sbb( (type dword timeStart[4]), edx );
mov( edx, (type dword timeEnd[4]));
stdout.put( "Sequence 2: ", timeEnd, nl );


stdout.put( nl "Test 1:" nl );

align(16);
nop;
nop;
nop;
nop;
rdtsc();
mov( eax, (type dword timeStart));
mov( edx, (type dword timeStart[4]));

mov( str1, esi );
mov( str1, edi );
stricmp();

mov( str1, esi );
mov( str2, edi );
stricmp();

mov( str1, esi );
mov( str3, edi );
stricmp();

mov( str2, esi );
mov( str3, edi );
stricmp();

mov( str3, esi );
mov( str2, edi );
stricmp();

mov( str1, esi );
mov( str4, edi );
stricmp();

rdtsc();
sub( (type dword timeStart), eax );
mov( eax, (type dword timeEnd) );
sbb( (type dword timeStart[4]), edx );
mov( edx, (type dword timeEnd[4]));
stdout.put( "Sequence 1: ", timeEnd, nl );

end t;


Here is some typical output on a 2.6 GHz PIV:

Test 3:
Sequence 3: 19832

Test 4:
Sequence 4: 9188

Test 2:
Sequence 2: 8684

Test 1:
Sequence 1: 8332

Note that Test 1 and Test 2 seem to be position dependent.
If I move them around, they tend to swap positions in the
sorted list of times. So my assumption is that their running
times are roughly equivalent.

I'm also wondering about the results to return when the strings
are not equal. In particular, the algorithm I've provided compares
the original characters and sets the flags if those characters do
not match. Therefore, 'a' > 'B'. I'm wondering if this is reasonable,
or if I should return the comparisions of the "converted" characters
(if they were uppercase)? I don't really care what *other* languages
(e.g., C/C++) do, I'm interested in a discussion of what is reasonable
to do here.
Cheers,
Randy Hyde






From: hutch-- on
Randy,

For string comparison, my own approach is to return zero if they don't
match or return the length if they do. Variations are to return where
the strings failed to match and something like -1 if they match.

With the speed issue, I would be inclined to go for a table approach
even though the code size is larger including the table. XLATB was a
reasonable performer last time I tested it but I would be inclined to
manually code the compare / swap so it evens out the performance on
different hardware.

Do the normal stuff like aligning the table and test aligning the
labels and you should come close to a decent all rounder.

Regards,

hutch at movsd dot com

From: Sevag Krikorian on
Randall Hyde wrote:
> Hi All,
>
> I'm currently in the process of correcting a defect in the
> HLA Standard Library case insensitive string comparison
> routines. While I'm at it, I'm trying to speed it up a bit.
> I've written the following code:
>
> procedure stricmp; @nodisplay; @noframe; align(4);
> begin stricmp;
>
> push( ebx );
>
>
> // Compare the two strings until we encounter a zero byte
> // or until the corresonding characters are different.
>
> cmpLoop:
>
> mov( [esi], al );
> mov( [edi], ah );
> add( 1, esi );
> add( 1, edi );
> cmp( al, 0 );
> je notAlpha;
> cmp( al, ah );
> je cmpLoop;
> mov( ax, bx );
> and( $5f5f, bx );
> cmp( bl, 'A' );
> jb notAlpha;
> cmp( bl, 'Z' );
> ja notAlpha;
> cmp( bl, bh );
> je cmpLoop;
>
> notAlpha:
>
> // At this point, we've either encountered a zero byte in the source
> // string or we've encountered two bytes that are not the same in
> // the two strings. In either case, return the result of the
> // comparison in the flags.
>
> pop( ebx );
> cmp( al, ah );
> ret();
>
> end stricmp;
>
>
> Any suggestions on speeding it up would be appreciated.
>
> I wrote the following test program to try out various versions:
>
> program t;
> #include( "stdlib.hhf" )
>
> static
> // Str1 is the 255-char sequence #1..#255:
>
> str1 :string := "" +
> #for( chr:= 1 to 255 )
> char( chr ) +
> #endfor
> "";
>
> // As above, but substitute uppercase for lowercase:
>
> str2 :string := "" +
> #for( chr := 1 to 255 )
> @uppercase( char( chr ), 0 ) +
> #endfor
> "";
>
> // As above, but subsitute lowercase for uppercase:
>
> str3 :string := "" +
> #for( chr := 1 to 255 )
> @lowercase( char( chr ), 0 ) +
> #endfor
> "";
>
> // A string that won't match the above
> // ( #1..'@' + '.'):
>
> str4 :string := "" +
> #for( chr := 1 to uns8('a')-1 )
> char( chr ) +
> #endfor
> ".";
>
>
> procedure stricmp; @nodisplay; @noframe; align(16);
> begin stricmp;
>
> push( ebx );
>
>
> // Compare the two strings until we encounter a zero byte
> // or until the corresonding characters are different.
>
> cmpLoop:
>
> mov( [esi], al );
> mov( [edi], ah );
> add( 1, esi );
> add( 1, edi );
> cmp( al, 0 );
> je notAlpha;
> cmp( al, ah );
> je cmpLoop;
> mov( ax, bx );
> and( $5f5f, bx );
> cmp( bl, 'A' );
> jb notAlpha;
> cmp( bl, 'Z' );
> ja notAlpha;
> cmp( bl, bh );
> je cmpLoop;
>
> notAlpha:
>
> // At this point, we've either encountered a zero byte in the source
> // string or we've encountered two bytes that are not the same in
> // the two strings. In either case, return the result of the
> // comparison in the flags.
>
> pop( ebx );
> cmp( al, ah );
> ret();
>
> end stricmp;
>
>
>
>
> procedure stricmp1; @nodisplay; @noframe; align(16);
> begin stricmp1;
>
> push( ebx );
>
>
> // Compare the two strings until we encounter a zero byte
> // or until the corresonding characters are different.
>
> cmpLoop:
>
> mov( [esi], al );
> mov( [edi], ah );
> add( 1, esi );
> add( 1, edi );
> cmp( al, 0 );
> je notAlpha;
> cmp( al, ah );
> je cmpLoop;
> mov( al, bl );
> mov( ah, bh );
> and( $5f, bl );
> and( $5f, bh );
> cmp( bl, 'A' );
> jb notAlpha;
> cmp( bl, 'Z' );
> ja notAlpha;
> cmp( bl, bh );
> je cmpLoop;
>
> notAlpha:
>
> // At this point, we've either encountered a zero byte in the source
> // string or we've encountered two bytes that are not the same in
> // the two strings. In either case, return the result of the
> // comparison in the flags.
>
> pop( ebx );
> cmp( al, ah );
> ret();
>
> end stricmp1;
>
>
> procedure stricmp2; @nodisplay; @noframe; align(16);
> static
> xlattbl: char[256] :=
> [
> #for( chr := 0 to 254 )
>
> @uppercase( char( chr ), 0),
>
> #endfor
> #255
> ];
>
> begin stricmp2;
>
> push( ebx );
> push( ecx );
> lea( ebx, xlattbl );
>
> // Compare the two strings until we encounter a zero byte
> // or until the corresonding characters are different.
>
> cmpLoop:
>
> mov( [edi], al );
> xlat();
> add( 1, edi );
> mov( al, ah );
> mov( [esi], al );
> add( 1, esi );
> xlat();
>
> cmp( ah, 0 );
> je notAlpha;
> cmp( al, ah );
> je cmpLoop;
>
> notAlpha:
>
> // At this point, we've either encountered a zero byte in the source
> // string or we've encountered two bytes that are not the same in
> // the two strings. In either case, return the result of the
> // comparison in the flags.
>
> pop( ecx );
> pop( ebx );
> cmp( al, ah );
> ret();
>
> end stricmp2;
>
>
>
>
> procedure stricmp3; @nodisplay; @noframe; align(16);
> static
> xlattbl: char[256] :=
> [
> #for( chr := 0 to 254 )
>
> @uppercase( char( chr ), 0),
>
> #endfor
> #255
> ];
>
> begin stricmp3;
>
> push( ebx );
> push( ecx );
> lea( ebx, xlattbl );
>
> // Compare the two strings until we encounter a zero byte
> // or until the corresonding characters are different.
>
> cmpLoop:
>
> movzx( (type byte [esi]), ecx );
> add( 1, esi );
> mov( [ebx+ecx], al );
> movzx( (type byte [edi]), ecx );
> add( 1, edi );
> mov( [ebx+ecx], ah );
>
> cmp( ah, 0 );
> je notAlpha;
> cmp( al, ah );
> je cmpLoop;
>
> notAlpha:
>
> // At this point, we've either encountered a zero byte in the source
> // string or we've encountered two bytes that are not the same in
> // the two strings. In either case, return the result of the
> // comparison in the flags.
>
> pop( ecx );
> pop( ebx );
> cmp( al, ah );
> ret();
>
> end stricmp3;
>
>
> static
> timeStart :int64;
> timeEnd :int64;
>
> begin t;
>
> // The the cache and pages:
>
> lea( esi, stricmp );
> mov( 1024, ecx );
> repeat
> mov( [esi], al );
> inc( esi );
> dec( ecx );
> until( @z );
>
> stdout.put( nl "Test 3:" nl );
>
> align(16);
> nop;
> nop;
> nop;
> nop;
> rdtsc();
> mov( eax, (type dword timeStart));
> mov( edx, (type dword timeStart[4]));
>
> mov( str1, esi );
> mov( str1, edi );
> stricmp2();
>
> mov( str1, esi );
> mov( str2, edi );
> stricmp2();
>
> mov( str1, esi );
> mov( str3, edi );
> stricmp2();
>
> mov( str2, esi );
> mov( str3, edi );
> stricmp2();
>
> mov( str3, esi );
> mov( str2, edi );
> stricmp2();
>
> mov( str1, esi );
> mov( str4, edi );
> stricmp2();
>
> rdtsc();
> sub( (type dword timeStart), eax );
> mov( eax, (type dword timeEnd) );
> sbb( (type dword timeStart[4]), edx );
> mov( edx, (type dword timeEnd[4]));
> stdout.put( "Sequence 3: ", timeEnd, nl );
>
>
>
> stdout.put( nl "Test 4:" nl );
>
> align(16);
> nop;
> nop;
> nop;
> nop;
> rdtsc();
> mov( eax, (type dword timeStart));
> mov( edx, (type dword timeStart[4]));
>
> mov( str1, esi );
> mov( str1, edi );
> stricmp3();
>
> mov( str1, esi );
> mov( str2, edi );
> stricmp3();
>
> mov( str1, esi );
> mov( str3, edi );
> stricmp3();
>
> mov( str2, esi );
> mov( str3, edi );
> stricmp3();
>
> mov( str3, esi );
> mov( str2, edi );
> stricmp3();
>
> mov( str1, esi );
> mov( str4, edi );
> stricmp3();
>
> rdtsc();
> sub( (type dword timeStart), eax );
> mov( eax, (type dword timeEnd) );
> sbb( (type dword timeStart[4]), edx );
> mov( edx, (type dword timeEnd[4]));
> stdout.put( "Sequence 4: ", timeEnd, nl );
>
>
>
> stdout.put( nl "Test 2:" nl );
>
> align(16);
> nop;
> nop;
> nop;
> nop;
> rdtsc();
> mov( eax, (type dword timeStart));
> mov( edx, (type dword timeStart[4]));
>
> mov( str1, esi );
> mov( str1, edi );
> stricmp1();
>
> mov( str1, esi );
> mov( str2, edi );
> stricmp1();
>
> mov( str1, esi );
> mov( str3, edi );
> stricmp1();
>
> mov( str2, esi );
> mov( str3, edi );
> stricmp1();
>
> mov( str3, esi );
> mov( str2, edi );
> stricmp1();
>
> mov( str1, esi );
> mov( str4, edi );
> stricmp1();
>
> rdtsc();
> sub( (type dword timeStart), eax );
> mov( eax, (type dword timeEnd) );
> sbb( (type dword timeStart[4]), edx );
> mov( edx, (type dword timeEnd[4]));
> stdout.put( "Sequence 2: ", timeEnd, nl );
>
>
> stdout.put( nl "Test 1:" nl );
>
> align(16);
> nop;
> nop;
> nop;
> nop;
> rdtsc();
> mov( eax, (type dword timeStart));
> mov( edx, (type dword timeStart[4]));
>
> mov( str1, esi );
> mov( str1, edi );
> stricmp();
>
> mov( str1, esi );
> mov( str2, edi );
> stricmp();
>
> mov( str1, esi );
> mov( str3, edi );
> stricmp();
>
> mov( str2, esi );
> mov( str3, edi );
> stricmp();
>
> mov( str3, esi );
> mov( str2, edi );
> stricmp();
>
> mov( str1, esi );
> mov( str4, edi );
> stricmp();
>
> rdtsc();
> sub( (type dword timeStart), eax );
> mov( eax, (type dword timeEnd) );
> sbb( (type dword timeStart[4]), edx );
> mov( edx, (type dword timeEnd[4]));
> stdout.put( "Sequence 1: ", timeEnd, nl );
>
> end t;
>
>
> Here is some typical output on a 2.6 GHz PIV:
>
> Test 3:
> Sequence 3: 19832
>
> Test 4:
> Sequence 4: 9188
>
> Test 2:
> Sequence 2: 8684
>
> Test 1:
> Sequence 1: 8332
>
> Note that Test 1 and Test 2 seem to be position dependent.
> If I move them around, they tend to swap positions in the
> sorted list of times. So my assumption is that their running
> times are roughly equivalent.
>
> I'm also wondering about the results to return when the strings
> are not equal. In particular, the algorithm I've provided compares
> the original characters and sets the flags if those characters do
> not match. Therefore, 'a' > 'B'. I'm wondering if this is reasonable,
> or if I should return the comparisions of the "converted" characters
> (if they were uppercase)? I don't really care what *other* languages
> (e.g., C/C++) do, I'm interested in a discussion of what is reasonable
> to do here.
> Cheers,
> Randy Hyde
>
>
>
>
>
>

Here are the typical results I get from a 1.8 Ghz AMD64


Test 3:
Sequence 3: 20417

Test 4:
Sequence 4: 12352

Test 2:
Sequence 2: 5685

Test 1:
Sequence 1: 5449



I notice that AMD64 takes a slight hit to performance when
it comes to xlat, with a slight gain in other areas
(compared to PIV).


I was surprised that changing the add (1, n) instructions to
inc (n) were actually less efficient, adding ~1000 to the
test times. I was under the impression that inc was smaller
and faster.


As for the returns, I think the flags approach is good, but
since the comparison is on case insensitive strings, it
would be better to return the converted characters, therefore
'A' > 'B', even at a slight cost in performance, but only for
a 'formal' routine intended to end up in a library (with esi
and edi preserved as well).

That is, if you care for such information. Probably 90% of the
time, the program is only interested if the strings are equal
or not.

Yes, I think I'll replace my current zicmp routine with stricmp
which was timing at arount ~15000



--
[kain]
http://www.geocities.com/kahlinor
From: arargh502NOSPAM on
On Sun, 06 Mar 2005 23:39:02 GMT, "Randall Hyde"
<randyhyde(a)earthlink.net> wrote:

>Hi All,
>
>I'm currently in the process of correcting a defect in the
>HLA Standard Library case insensitive string comparison
>routines. While I'm at it, I'm trying to speed it up a bit.
>I've written the following code:
>
>procedure stricmp; @nodisplay; @noframe; align(4);
>begin stricmp;
>
> push( ebx );
>
>
> // Compare the two strings until we encounter a zero byte
> // or until the corresonding characters are different.
>
> cmpLoop:
Probably align to 4 or 16 here
>
> mov( [esi], al );
> mov( [edi], ah );
> add( 1, esi );
> add( 1, edi );
> cmp( al, 0 );
> je notAlpha;
> cmp( al, ah );
> je cmpLoop;
> mov( ax, bx );

Why bother to move it?
No matter what, ax is not needed anymore that I can see.

Also if use32 save a byte by "mov( eax, ebx );"


> and( $5f5f, bx );
Could preload $5f5f into a reg

> cmp( bl, 'A' );
> jb notAlpha;
> cmp( bl, 'Z' );
> ja notAlpha;
What about bh?

Could combine these two jmps to one using setcc and some more regs
If you process bh that would be 4 jmps to one.


> cmp( bl, bh );
> je cmpLoop;

If they are not equal is it possible that the cmp below can get an
incorrect result in some cases?

>
> notAlpha:
>
> // At this point, we've either encountered a zero byte in the source
> // string or we've encountered two bytes that are not the same in
> // the two strings. In either case, return the result of the
> // comparison in the flags.
>
> pop( ebx );
> cmp( al, ah );
> ret();
>
>end stricmp;
>
>
>Any suggestions on speeding it up would be appreciated.

--
Arargh502 at [drop the 'http://www.' from ->] http://www.arargh.com
BCET Basic Compiler Page: http://www.arargh.com/basic/index.html

To reply by email, remove the garbage from the reply address.
From: Frank Kotler on
Randall Hyde wrote:

> mov( ax, bx );
> and( $5f5f, bx );

I wondered why use the 16-bit registers here, and incur the
size override penalty. Ought to work with eax and ebx, no?

I tried to try this... and discovered a strange problem. HLA
was silently compiling me a zero-length .asm file!!! After
much trial and error, I tracked it down to...

> str1 :string := "" +
> #for( chr:= 1 to 255 )
> char( chr ) +
> #endfor
> "";

The Linux(?) version of HLA seems to be treating
*something*(?) in here(?) as EOF(?). Changing the 255 to 127
(everywhere) allowed me to get a working file out of it, in
any case.

I won't post the results (K6-300), since they're skewed, but
the "mov (eax, ebx);"/"and ($5f5f, ebx);" *does* seem to be
faster...

Best,
Frank