From: Skybuck Flying on
Ok,

I ported my simulated-int64-Delphi-2007-version to Visual Studio 2008, to
compare assembler outputs,

Here is the Visual Studio 2008 C/C++ version and it's output:

// *** Begin of C/C++ Code ***

#include <stdio.h>
#include <windows.h>

/*

Skybuck's WriteLongword (simulated int64 version) ported to C/C++

Somebody gave me an idea:

Port the Delphi code to C/C++ and then examine the generated assembler
instructions.

See comments.

*/

// types to make programming more easy for me as Delphi programmer ;) :)

// signed integer types
typedef char int8;
typedef short int int16;
typedef int int32;
typedef long long int64;

// unsigned integer types
typedef unsigned char uint8;
typedef unsigned short uint16;
typedef unsigned int uint32;
typedef unsigned long long uint64;

// pointer type
typedef void * pointer;

// unsigned integer pointers
typedef uint8* Puint8;
typedef uint16* Puint16;
typedef uint32* Puint32;
typedef uint64* Puint64;

// signed integer points
typedef int8* Pint8;
typedef int16* Pint16;
typedef int32* Pint32;
typedef int64* Pint64;

// Delphi 2007: 89 instructions with optimizations on !!! holycow. it's the
int64 stuff.
// Visual Studio 2008: 58 instructions, but does use a few slower ones here
and there
// I would have to count the latencies to known which one is faster
// and then ofcourse a benchmark could be interesting as well ;)
void WriteLongword( uint32 Value, uint8 BitCount, pointer ToBase, uint32
ToBitPointer )
{
int64 Content;
int64 Mask;

uint32 ByteIndex;
uint8 BitIndex;

// copy value to a longword
Content = Value;

// calculate mask
Mask = ~(18446744073709551615 << BitCount);

// cut of accessive bits from content
Content = Content & Mask;

// ok now determine where the longword must be written to. which byte
ByteIndex = ToBitPointer >> 3; // div 8

// ok now determine at which bit it must be written to. which bit position
BitIndex = ToBitPointer & 7; // mod 8

// now simply shift the mask and the content this ammount of bits
Mask = Mask << BitIndex;
Content = Content << BitIndex;

// now simply clear the bits first in the buffer which are to be
overwritten. clear it with the mask.
// first invert mask so the rest remains
*( (Pint64)( uint32(ToBase) + ByteIndex ) ) = *( (Pint64)( uint32(ToBase) +
ByteIndex ) ) & (~ Mask);

// now simply or the content into it
*( (Pint64)( uint32(ToBase) + ByteIndex ) ) = *( (Pint64)( uint32(ToBase) +
ByteIndex) ) | Content;
}

// Generated Visual Studio 2008 assembler:

/*

// 37 instructions + 3 * 7 (= 21) = 58 instructions
// Impressive but it does use some slower instructions here and there ;)
void WriteLongword( uint32 Value, uint8 BitCount, pointer ToBase, uint32
ToBitPointer )
{
00401000 sub esp,8
00401003 push ebx
00401004 push ebp
00401005 mov ebx,eax
00401007 push esi
int64 Content;
int64 Mask;

uint32 ByteIndex;
uint8 BitIndex;

// copy value to a longword
Content = Value;

// calculate mask
Mask = ~(18446744073709551615 << BitCount);
00401008 or eax,0FFFFFFFFh
0040100B mov esi,edx
0040100D movzx ecx,cl
00401010 or edx,eax
00401012 call _allshl (401950h)
00401017 not eax

// cut of accessive bits from content
Content = Content & Mask;
00401019 and esi,eax
0040101B mov dword ptr [esp+0Ch],esi

// ok now determine where the longword must be written to. which byte
ByteIndex = ToBitPointer >> 3; // div 8
0040101F mov esi,ebx

// ok now determine at which bit it must be written to. which bit position
BitIndex = ToBitPointer & 7; // mod 8
00401021 and bl,7
00401024 movzx ebx,bl
00401027 not edx
00401029 xor ebp,ebp

// now simply shift the mask and the content this ammount of bits
Mask = Mask << BitIndex;
0040102B mov ecx,ebx
0040102D and ebp,edx
0040102F shr esi,3
00401032 call _allshl (401950h)
Content = Content << BitIndex;

// now simply clear the bits first in the buffer which are to be
overwritten. clear it with the mask.
// first invert mask so the rest remains
*( (Pint64)( uint32(ToBase) + ByteIndex ) ) = *( (Pint64)( uint32(ToBase) +
ByteIndex ) ) & (~ Mask);
00401037 not eax
00401039 and dword ptr [esi+edi],eax
0040103C mov eax,dword ptr [esp+0Ch]
00401040 not edx
00401042 and dword ptr [esi+edi+4],edx
00401046 mov edx,ebp
00401048 mov ecx,ebx
0040104A call _allshl (401950h)

// now simply or the content into it
*( (Pint64)( uint32(ToBase) + ByteIndex ) ) = *( (Pint64)( uint32(ToBase) +
ByteIndex) ) | Content;
0040104F or dword ptr [esi+edi],eax
00401052 or dword ptr [esi+edi+4],edx
}
00401056 pop esi
00401057 pop ebp
00401058 pop ebx
00401059 add esp,8
0040105C ret


// Extra Routine (called 3 times):

_allshl PROC NEAR

;
; Handle shifts of 64 or more bits (all get 0)
;
cmp cl, 64
00401950 cmp cl,40h
jae short RETZERO
00401953 jae RETZERO (40196Ah)

;
; Handle shifts of between 0 and 31 bits
;
cmp cl, 32
00401955 cmp cl,20h
jae short MORE32
00401958 jae MORE32 (401960h)
shld edx,eax,cl
0040195A shld edx,eax,cl
shl eax,cl
0040195D shl eax,cl
ret
0040195F ret


*/

int main()
{
uint8 Buffer[16];
uint32 Value;
uint8 Bits;
uint32 BitIndex;

Buffer[0] = 0;
Buffer[1] = 0;
Buffer[2] = 0;
Buffer[3] = 0;
Buffer[4] = 0;
Buffer[5] = 0;

Value = 4294967295;
Bits = 31;
BitIndex = 1;

// inlining optimizations turned off (to __inline only)
// 58 instructions in c/c++
WriteLongword( Value, Bits, &Buffer, BitIndex );

printf( "%d \n", Buffer[0] );
printf( "%d \n", Buffer[1] );
printf( "%d \n", Buffer[2] );
printf( "%d \n", Buffer[3] );

// one more time ;)
Value = GetTickCount();
Bits = GetTickCount() & 31;
BitIndex = GetTickCount() & 100;

WriteLongword( Value, Bits, &Buffer, BitIndex );

printf( "%d \n", Buffer[0] );
printf( "%d \n", Buffer[1] );
printf( "%d \n", Buffer[2] );
printf( "%d \n", Buffer[3] );

return 0;
}

// *** End of C/C++ Code ***

> Here is my 32 bit Delphi version (it actually needs 64 bit support):
>
> procedure WriteLongword( Value : longword; BitCount : byte; ToBase :
> pointer; ToBitPointer : longword );
> var
> Content : int64;
> Mask : int64;
>
> ByteIndex : longword;
> BitIndex : byte;
> begin
> // copy value to a longword
> Content := Value;
>
> // calculate mask
> Mask := not (18446744073709551615 shl BitCount);
>
> // cut of accessive bits from content
> Content := Content and Mask;
>
> // ok now determine where the longword must be written to. which byte
> ByteIndex := ToBitPointer shr 3; // div 8
>
> // ok now determine at which bit it must be written to. which bit position
> BitIndex := ToBitPointer and 7; // mod 8
>
> // now simply shift the mask and the content this ammount of bits
> Mask := Mask shl BitIndex;
> Content := Content shl BitIndex;
>
> // now simply clear the bits first in the buffer which are to be
> overwritten. clear it with the mask.
> // first invert mask so the rest remains
> int64( pointer(Longword(ToBase) + ByteIndex)^ ) := int64(
> pointer(Longword(ToBase) + ByteIndex)^ ) and (not Mask);
>
> // now simply or the content into it
> int64( pointer(Longword(ToBase) + ByteIndex)^ ) := int64(
> pointer(Longword(ToBase) + ByteIndex)^ ) or Content;
> end;
>
> (The same algorithm is used for the 8 bit and 16 bit versions, with some
> constant and typecast modifications).
>
> Here is the generated x86 assembler output for the 32 bit version:
>
> Project1.dpr.193: begin
> 00408FE8 55 push ebp
> 00408FE9 8BEC mov ebp,esp
> 00408FEB 83C4D8 add esp,-$28
> 00408FEE 894DF4 mov [ebp-$0c],ecx
> 00408FF1 8855FB mov [ebp-$05],dl
> 00408FF4 8945FC mov [ebp-$04],eax
> Project1.dpr.195: Content := Value;
> 00408FF7 8B45FC mov eax,[ebp-$04]
> 00408FFA 33D2 xor edx,edx
> 00408FFC 8945E8 mov [ebp-$18],eax
> 00408FFF 8955EC mov [ebp-$14],edx
> Project1.dpr.198: Mask := not (18446744073709551615 shl BitCount);
> 00409002 B8FFFFFFFF mov eax,$ffffffff
> 00409007 8BD0 mov edx,eax
> 00409009 8A4DFB mov cl,[ebp-$05]
> 0040900C E827BEFFFF call @_llshl
> 00409011 F7D0 not eax
> 00409013 F7D2 not edx
> 00409015 8945E0 mov [ebp-$20],eax
> 00409018 8955E4 mov [ebp-$1c],edx
> Project1.dpr.201: Content := Content and Mask;
> 0040901B 8B45E8 mov eax,[ebp-$18]
> 0040901E 8B55EC mov edx,[ebp-$14]
> 00409021 2345E0 and eax,[ebp-$20]
> 00409024 2355E4 and edx,[ebp-$1c]
> 00409027 8945E8 mov [ebp-$18],eax
> 0040902A 8955EC mov [ebp-$14],edx
> Project1.dpr.204: ByteIndex := ToBitPointer shr 3; // div 8
> 0040902D 8B4508 mov eax,[ebp+$08]
> 00409030 C1E803 shr eax,$03
> 00409033 8945DC mov [ebp-$24],eax
> Project1.dpr.207: BitIndex := ToBitPointer and 7; // mod 8
> 00409036 8A4508 mov al,[ebp+$08]
> 00409039 2407 and al,$07
> 0040903B 8845DB mov [ebp-$25],al
> Project1.dpr.210: Mask := Mask shl BitIndex;
> 0040903E 8B45E0 mov eax,[ebp-$20]
> 00409041 8B55E4 mov edx,[ebp-$1c]
> 00409044 8A4DDB mov cl,[ebp-$25]
> 00409047 E8ECBDFFFF call @_llshl
> 0040904C 8945E0 mov [ebp-$20],eax
> 0040904F 8955E4 mov [ebp-$1c],edx
> Project1.dpr.211: Content := Content shl BitIndex;
> 00409052 8B45E8 mov eax,[ebp-$18]
> 00409055 8B55EC mov edx,[ebp-$14]
> 00409058 8A4DDB mov cl,[ebp-$25]
> 0040905B E8D8BDFFFF call @_llshl
> 00409060 8945E8 mov [ebp-$18],eax
> 00409063 8955EC mov [ebp-$14],edx
> Project1.dpr.215: int64( pointer(Longword(ToBase) + ByteIndex)^ ) :=
> int64( pointer(Longword(ToBase) + ByteIndex)^ ) and (not Mask);
> 00409066 8B45E0 mov eax,[ebp-$20]
> 00409069 8B55E4 mov edx,[ebp-$1c]
> 0040906C F7D0 not eax
> 0040906E F7D2 not edx
> 00409070 8B4DF4 mov ecx,[ebp-$0c]
> 00409073 034DDC add ecx,[ebp-$24]
> 00409076 2301 and eax,[ecx]
> 00409078 235104 and edx,[ecx+$04]
> 0040907B 8B4DF4 mov ecx,[ebp-$0c]
> 0040907E 034DDC add ecx,[ebp-$24]
> 00409081 8901 mov [ecx],eax
> 00409083 895104 mov [ecx+$04],edx
> Project1.dpr.218: int64( pointer(Longword(ToBase) + ByteIndex)^ ) :=
> int64( pointer(Longword(ToBase) + ByteIndex)^ ) or Content;
> 00409086 8B45F4 mov eax,[ebp-$0c]
> 00409089 0345DC add eax,[ebp-$24]
> 0040908C 8B5004 mov edx,[eax+$04]
> 0040908F 8B00 mov eax,[eax]
> 00409091 0B45E8 or eax,[ebp-$18]
> 00409094 0B55EC or edx,[ebp-$14]
> 00409097 8B4DF4 mov ecx,[ebp-$0c]
> 0040909A 034DDC add ecx,[ebp-$24]
> 0040909D 8901 mov [ecx],eax
> 0040909F 895104 mov [ecx+$04],edx
> Project1.dpr.219: end;
> 004090A2 8BE5 mov esp,ebp
> 004090A4 5D pop ebp
> 004090A5 C20400 ret $0004
>
> Plus here is the extra routine it uses:
>
> @_llshl:
> 00404E38 80F920 cmp cl,$20
> 00404E3B 7C11 jl $00404e4e
> 00404E3D 80F940 cmp cl,$40
> 00404E40 7C05 jl $00404e47
> 00404E42 31D2 xor edx,edx
> 00404E44 31C0 xor eax,eax
> 00404E46 C3 ret
> 00404E47 89C2 mov edx,eax
> 00404E49 D3E2 shl edx,cl
> 00404E4B 31C0 xor eax,eax
> 00404E4D C3 ret
> 00404E4E 0FA5C2 shld edx,eax,cl
> 00404E51 D3E0 shl eax,cl
> 00404E53 C3 ret
> 00404E54 C3 ret
>

Bye,
Skybuck.


From: Skybuck Flying on

"Skybuck Flying" <BloodyShame(a)hotmail.com> wrote in message
news:9605d$481f5fce$541983fa$14308(a)cache2.tilbu1.nb.home.nl...
> Ok,
>
> I ported my simulated-int64-Delphi-2007-version to Visual Studio 2008, to
> compare assembler outputs,
>
> Here is the Visual Studio 2008 C/C++ version and it's output:
>
> // *** Begin of C/C++ Code ***
>
> #include <stdio.h>
> #include <windows.h>
>
> /*
>
> Skybuck's WriteLongword (simulated int64 version) ported to C/C++
>
> Somebody gave me an idea:
>
> Port the Delphi code to C/C++ and then examine the generated assembler
> instructions.
>
> See comments.
>
> */
>
> // types to make programming more easy for me as Delphi programmer ;) :)
>
> // signed integer types
> typedef char int8;
> typedef short int int16;
> typedef int int32;
> typedef long long int64;
>
> // unsigned integer types
> typedef unsigned char uint8;
> typedef unsigned short uint16;
> typedef unsigned int uint32;
> typedef unsigned long long uint64;
>
> // pointer type
> typedef void * pointer;
>
> // unsigned integer pointers
> typedef uint8* Puint8;
> typedef uint16* Puint16;
> typedef uint32* Puint32;
> typedef uint64* Puint64;
>
> // signed integer points
> typedef int8* Pint8;
> typedef int16* Pint16;
> typedef int32* Pint32;
> typedef int64* Pint64;
>
> // Delphi 2007: 89 instructions with optimizations on !!! holycow. it's
> the int64 stuff.
> // Visual Studio 2008: 58 instructions, but does use a few slower ones
> here and there
> // I would have to count the latencies to known which one is faster
> // and then ofcourse a benchmark could be interesting as well ;)
> void WriteLongword( uint32 Value, uint8 BitCount, pointer ToBase, uint32
> ToBitPointer )
> {
> int64 Content;
> int64 Mask;
>
> uint32 ByteIndex;
> uint8 BitIndex;
>
> // copy value to a longword
> Content = Value;
>
> // calculate mask
> Mask = ~(18446744073709551615 << BitCount);
>
> // cut of accessive bits from content
> Content = Content & Mask;
>
> // ok now determine where the longword must be written to. which byte
> ByteIndex = ToBitPointer >> 3; // div 8
>
> // ok now determine at which bit it must be written to. which bit position
> BitIndex = ToBitPointer & 7; // mod 8
>
> // now simply shift the mask and the content this ammount of bits
> Mask = Mask << BitIndex;
> Content = Content << BitIndex;
>
> // now simply clear the bits first in the buffer which are to be
> overwritten. clear it with the mask.
> // first invert mask so the rest remains
> *( (Pint64)( uint32(ToBase) + ByteIndex ) ) = *( (Pint64)( uint32(ToBase)
> + ByteIndex ) ) & (~ Mask);
>
> // now simply or the content into it
> *( (Pint64)( uint32(ToBase) + ByteIndex ) ) = *( (Pint64)( uint32(ToBase)
> + ByteIndex) ) | Content;
> }
>
> // Generated Visual Studio 2008 assembler:
>
> /*
>
> // 37 instructions + 3 * 7 (= 21) = 58 instructions
> // Impressive but it does use some slower instructions here and there ;)
> void WriteLongword( uint32 Value, uint8 BitCount, pointer ToBase, uint32
> ToBitPointer )
> {
> 00401000 sub esp,8
> 00401003 push ebx
> 00401004 push ebp
> 00401005 mov ebx,eax
> 00401007 push esi
> int64 Content;
> int64 Mask;
>
> uint32 ByteIndex;
> uint8 BitIndex;
>
> // copy value to a longword
> Content = Value;
>
> // calculate mask
> Mask = ~(18446744073709551615 << BitCount);
> 00401008 or eax,0FFFFFFFFh
> 0040100B mov esi,edx
> 0040100D movzx ecx,cl
> 00401010 or edx,eax
> 00401012 call _allshl (401950h)
> 00401017 not eax
>
> // cut of accessive bits from content
> Content = Content & Mask;
> 00401019 and esi,eax
> 0040101B mov dword ptr [esp+0Ch],esi
>
> // ok now determine where the longword must be written to. which byte
> ByteIndex = ToBitPointer >> 3; // div 8
> 0040101F mov esi,ebx
>
> // ok now determine at which bit it must be written to. which bit position
> BitIndex = ToBitPointer & 7; // mod 8
> 00401021 and bl,7
> 00401024 movzx ebx,bl
> 00401027 not edx
> 00401029 xor ebp,ebp
>
> // now simply shift the mask and the content this ammount of bits
> Mask = Mask << BitIndex;
> 0040102B mov ecx,ebx
> 0040102D and ebp,edx
> 0040102F shr esi,3
> 00401032 call _allshl (401950h)
> Content = Content << BitIndex;
>
> // now simply clear the bits first in the buffer which are to be
> overwritten. clear it with the mask.
> // first invert mask so the rest remains
> *( (Pint64)( uint32(ToBase) + ByteIndex ) ) = *( (Pint64)( uint32(ToBase)
> + ByteIndex ) ) & (~ Mask);
> 00401037 not eax
> 00401039 and dword ptr [esi+edi],eax
> 0040103C mov eax,dword ptr [esp+0Ch]
> 00401040 not edx
> 00401042 and dword ptr [esi+edi+4],edx
> 00401046 mov edx,ebp
> 00401048 mov ecx,ebx
> 0040104A call _allshl (401950h)
>
> // now simply or the content into it
> *( (Pint64)( uint32(ToBase) + ByteIndex ) ) = *( (Pint64)( uint32(ToBase)
> + ByteIndex) ) | Content;
> 0040104F or dword ptr [esi+edi],eax
> 00401052 or dword ptr [esi+edi+4],edx
> }
> 00401056 pop esi
> 00401057 pop ebp
> 00401058 pop ebx
> 00401059 add esp,8
> 0040105C ret
>
>
> // Extra Routine (called 3 times):
>
> _allshl PROC NEAR
>
> ;
> ; Handle shifts of 64 or more bits (all get 0)
> ;
> cmp cl, 64
> 00401950 cmp cl,40h
> jae short RETZERO
> 00401953 jae RETZERO (40196Ah)
>
> ;
> ; Handle shifts of between 0 and 31 bits
> ;
> cmp cl, 32
> 00401955 cmp cl,20h
> jae short MORE32
> 00401958 jae MORE32 (401960h)
> shld edx,eax,cl
> 0040195A shld edx,eax,cl
> shl eax,cl
> 0040195D shl eax,cl
> ret
> 0040195F ret

Hmm this little piece of code belongs to it as well but was not executed.

It might get executed for shift 32 or so.

;
; Handle shifts of between 32 and 63 bits
;
MORE32:
mov edx,eax
xor eax,eax
and cl,31
shl edx,cl
ret

Bye,
Skybuck.

>
> */
>
> int main()
> {
> uint8 Buffer[16];
> uint32 Value;
> uint8 Bits;
> uint32 BitIndex;
>
> Buffer[0] = 0;
> Buffer[1] = 0;
> Buffer[2] = 0;
> Buffer[3] = 0;
> Buffer[4] = 0;
> Buffer[5] = 0;
>
> Value = 4294967295;
> Bits = 31;
> BitIndex = 1;
>
> // inlining optimizations turned off (to __inline only)
> // 58 instructions in c/c++
> WriteLongword( Value, Bits, &Buffer, BitIndex );
>
> printf( "%d \n", Buffer[0] );
> printf( "%d \n", Buffer[1] );
> printf( "%d \n", Buffer[2] );
> printf( "%d \n", Buffer[3] );
>
> // one more time ;)
> Value = GetTickCount();
> Bits = GetTickCount() & 31;
> BitIndex = GetTickCount() & 100;
>
> WriteLongword( Value, Bits, &Buffer, BitIndex );
>
> printf( "%d \n", Buffer[0] );
> printf( "%d \n", Buffer[1] );
> printf( "%d \n", Buffer[2] );
> printf( "%d \n", Buffer[3] );
>
> return 0;
> }
>
> // *** End of C/C++ Code ***
>
>> Here is my 32 bit Delphi version (it actually needs 64 bit support):
>>
>> procedure WriteLongword( Value : longword; BitCount : byte; ToBase :
>> pointer; ToBitPointer : longword );
>> var
>> Content : int64;
>> Mask : int64;
>>
>> ByteIndex : longword;
>> BitIndex : byte;
>> begin
>> // copy value to a longword
>> Content := Value;
>>
>> // calculate mask
>> Mask := not (18446744073709551615 shl BitCount);
>>
>> // cut of accessive bits from content
>> Content := Content and Mask;
>>
>> // ok now determine where the longword must be written to. which byte
>> ByteIndex := ToBitPointer shr 3; // div 8
>>
>> // ok now determine at which bit it must be written to. which bit
>> position
>> BitIndex := ToBitPointer and 7; // mod 8
>>
>> // now simply shift the mask and the content this ammount of bits
>> Mask := Mask shl BitIndex;
>> Content := Content shl BitIndex;
>>
>> // now simply clear the bits first in the buffer which are to be
>> overwritten. clear it with the mask.
>> // first invert mask so the rest remains
>> int64( pointer(Longword(ToBase) + ByteIndex)^ ) := int64(
>> pointer(Longword(ToBase) + ByteIndex)^ ) and (not Mask);
>>
>> // now simply or the content into it
>> int64( pointer(Longword(ToBase) + ByteIndex)^ ) := int64(
>> pointer(Longword(ToBase) + ByteIndex)^ ) or Content;
>> end;
>>
>> (The same algorithm is used for the 8 bit and 16 bit versions, with some
>> constant and typecast modifications).
>>
>> Here is the generated x86 assembler output for the 32 bit version:
>>
>> Project1.dpr.193: begin
>> 00408FE8 55 push ebp
>> 00408FE9 8BEC mov ebp,esp
>> 00408FEB 83C4D8 add esp,-$28
>> 00408FEE 894DF4 mov [ebp-$0c],ecx
>> 00408FF1 8855FB mov [ebp-$05],dl
>> 00408FF4 8945FC mov [ebp-$04],eax
>> Project1.dpr.195: Content := Value;
>> 00408FF7 8B45FC mov eax,[ebp-$04]
>> 00408FFA 33D2 xor edx,edx
>> 00408FFC 8945E8 mov [ebp-$18],eax
>> 00408FFF 8955EC mov [ebp-$14],edx
>> Project1.dpr.198: Mask := not (18446744073709551615 shl BitCount);
>> 00409002 B8FFFFFFFF mov eax,$ffffffff
>> 00409007 8BD0 mov edx,eax
>> 00409009 8A4DFB mov cl,[ebp-$05]
>> 0040900C E827BEFFFF call @_llshl
>> 00409011 F7D0 not eax
>> 00409013 F7D2 not edx
>> 00409015 8945E0 mov [ebp-$20],eax
>> 00409018 8955E4 mov [ebp-$1c],edx
>> Project1.dpr.201: Content := Content and Mask;
>> 0040901B 8B45E8 mov eax,[ebp-$18]
>> 0040901E 8B55EC mov edx,[ebp-$14]
>> 00409021 2345E0 and eax,[ebp-$20]
>> 00409024 2355E4 and edx,[ebp-$1c]
>> 00409027 8945E8 mov [ebp-$18],eax
>> 0040902A 8955EC mov [ebp-$14],edx
>> Project1.dpr.204: ByteIndex := ToBitPointer shr 3; // div 8
>> 0040902D 8B4508 mov eax,[ebp+$08]
>> 00409030 C1E803 shr eax,$03
>> 00409033 8945DC mov [ebp-$24],eax
>> Project1.dpr.207: BitIndex := ToBitPointer and 7; // mod 8
>> 00409036 8A4508 mov al,[ebp+$08]
>> 00409039 2407 and al,$07
>> 0040903B 8845DB mov [ebp-$25],al
>> Project1.dpr.210: Mask := Mask shl BitIndex;
>> 0040903E 8B45E0 mov eax,[ebp-$20]
>> 00409041 8B55E4 mov edx,[ebp-$1c]
>> 00409044 8A4DDB mov cl,[ebp-$25]
>> 00409047 E8ECBDFFFF call @_llshl
>> 0040904C 8945E0 mov [ebp-$20],eax
>> 0040904F 8955E4 mov [ebp-$1c],edx
>> Project1.dpr.211: Content := Content shl BitIndex;
>> 00409052 8B45E8 mov eax,[ebp-$18]
>> 00409055 8B55EC mov edx,[ebp-$14]
>> 00409058 8A4DDB mov cl,[ebp-$25]
>> 0040905B E8D8BDFFFF call @_llshl
>> 00409060 8945E8 mov [ebp-$18],eax
>> 00409063 8955EC mov [ebp-$14],edx
>> Project1.dpr.215: int64( pointer(Longword(ToBase) + ByteIndex)^ ) :=
>> int64( pointer(Longword(ToBase) + ByteIndex)^ ) and (not Mask);
>> 00409066 8B45E0 mov eax,[ebp-$20]
>> 00409069 8B55E4 mov edx,[ebp-$1c]
>> 0040906C F7D0 not eax
>> 0040906E F7D2 not edx
>> 00409070 8B4DF4 mov ecx,[ebp-$0c]
>> 00409073 034DDC add ecx,[ebp-$24]
>> 00409076 2301 and eax,[ecx]
>> 00409078 235104 and edx,[ecx+$04]
>> 0040907B 8B4DF4 mov ecx,[ebp-$0c]
>> 0040907E 034DDC add ecx,[ebp-$24]
>> 00409081 8901 mov [ecx],eax
>> 00409083 895104 mov [ecx+$04],edx
>> Project1.dpr.218: int64( pointer(Longword(ToBase) + ByteIndex)^ ) :=
>> int64( pointer(Longword(ToBase) + ByteIndex)^ ) or Content;
>> 00409086 8B45F4 mov eax,[ebp-$0c]
>> 00409089 0345DC add eax,[ebp-$24]
>> 0040908C 8B5004 mov edx,[eax+$04]
>> 0040908F 8B00 mov eax,[eax]
>> 00409091 0B45E8 or eax,[ebp-$18]
>> 00409094 0B55EC or edx,[ebp-$14]
>> 00409097 8B4DF4 mov ecx,[ebp-$0c]
>> 0040909A 034DDC add ecx,[ebp-$24]
>> 0040909D 8901 mov [ecx],eax
>> 0040909F 895104 mov [ecx+$04],edx
>> Project1.dpr.219: end;
>> 004090A2 8BE5 mov esp,ebp
>> 004090A4 5D pop ebp
>> 004090A5 C20400 ret $0004
>>
>> Plus here is the extra routine it uses:
>>
>> @_llshl:
>> 00404E38 80F920 cmp cl,$20
>> 00404E3B 7C11 jl $00404e4e
>> 00404E3D 80F940 cmp cl,$40
>> 00404E40 7C05 jl $00404e47
>> 00404E42 31D2 xor edx,edx
>> 00404E44 31C0 xor eax,eax
>> 00404E46 C3 ret
>> 00404E47 89C2 mov edx,eax
>> 00404E49 D3E2 shl edx,cl
>> 00404E4B 31C0 xor eax,eax
>> 00404E4D C3 ret
>> 00404E4E 0FA5C2 shld edx,eax,cl
>> 00404E51 D3E0 shl eax,cl
>> 00404E53 C3 ret
>> 00404E54 C3 ret
>>
>
> Bye,
> Skybuck.
>