From: Skybuck Flying on
Hello,

Here is my second entry for the contest.

I was hoping that by using assembler instructions, the instruction count
could be reduced.

But the opposite has happened.

This version is even worse:

105 instructions !

And it's using slower instructions too, like shld and a branch !

However the keep low bits routine was not yet inlined...

And maybe the ShiftLeft routine can be inlined but for now it's kinda sucky.

The algorithm is cool though... just sucky speed-wise.

It's quite amazing to see that my original simulated-int64 version has
actually less instructions ?!?!?

WOW ;)

Oh well enjoy this version for what it's worth:

Maybe there are further optimizations possible ?

No benchmarking done yet :)

// *** Begin of Code ***

function KeepLowBits( Value : longword; Bits : longword ) : longword;
begin
Result := Value; // 32 bits case.
if Bits <= 31 then
begin
Result := Result and not (4294967295 shl Bits); // shl instruction limited
to 31.
end;
end;

function ShiftLeft( Left : longword; Right : Longword; Shift : longword ) :
longword;
asm
shld eax, edx, cl
end;

// correct
procedure WriteLongwordBits( Value : longword; Bits : longword; DestAddress
: pointer; DestBitIndex : longword );
var
vContent : longword;
vMask : longword;
vShift : longword;

vFirstContent : longword;
vSecondContent : longword;

vFirstMask : longword;
vSecondMask : longword;

vFirstAddress : longword;
vSecondAddress : longword;
begin
vContent := KeepLowBits( Value, Bits );
vMask := KeepLowBits( 4294967295, Bits );

vShift := DestBitIndex and 7; // mod 8

vFirstContent := ShiftLeft( vContent, 0, vShift );
vSecondContent := ShiftLeft( 0, vContent, vShift );

vFirstMask := ShiftLeft( vMask, 0, vShift );
vSecondMask := ShiftLeft( 0, vMask, vShift );

vFirstAddress := longword(DestAddress) + (DestBitIndex shr 3); // div 8
vSecondAddress := vFirstAddress + 4;

Plongword(vFirstAddress)^ := (Plongword(vFirstAddress)^ and not vFirstMask)
or vFirstContent;
Plongword(vSecondAddress)^ := (Plongword(vSecondAddress)^ and not
vSecondMask) or vSecondContent;
end;

// Generated Assembler:

{

// 63 instructions + 2 * 18 (= 36) + 4 * 2 = 8 = 105 instructions !!!??? WOW
Project1.dpr.1479: begin
00409098 55 push ebp
00409099 8BEC mov ebp,esp
0040909B 83C4D0 add esp,-$30
0040909E 894DF4 mov [ebp-$0c],ecx
004090A1 8955F8 mov [ebp-$08],edx
004090A4 8945FC mov [ebp-$04],eax
Project1.dpr.1480: vContent := KeepLowBits( Value, Bits );
004090A7 8B55F8 mov edx,[ebp-$08]
004090AA 8B45FC mov eax,[ebp-$04]
004090AD E8DEFEFFFF call KeepLowBits
004090B2 8945F0 mov [ebp-$10],eax
Project1.dpr.1481: vMask := KeepLowBits( 4294967295, Bits );
004090B5 8B55F8 mov edx,[ebp-$08]
004090B8 83C8FF or eax,-$01
004090BB E8D0FEFFFF call KeepLowBits
004090C0 8945EC mov [ebp-$14],eax
Project1.dpr.1483: vShift := DestBitIndex and 7;
004090C3 8B4508 mov eax,[ebp+$08]
004090C6 83E007 and eax,$07
004090C9 8945E8 mov [ebp-$18],eax
Project1.dpr.1485: vFirstContent := ShiftLeft( vContent, 0, vShift );
004090CC 8B4DE8 mov ecx,[ebp-$18]
004090CF 33D2 xor edx,edx
004090D1 8B45F0 mov eax,[ebp-$10]
004090D4 E8E3FEFFFF call ShiftLeft
004090D9 8945E4 mov [ebp-$1c],eax
Project1.dpr.1486: vSecondContent := ShiftLeft( 0, vContent, vShift );
004090DC 8B4DE8 mov ecx,[ebp-$18]
004090DF 8B55F0 mov edx,[ebp-$10]
004090E2 33C0 xor eax,eax
004090E4 E8D3FEFFFF call ShiftLeft
004090E9 8945E0 mov [ebp-$20],eax
Project1.dpr.1488: vFirstMask := ShiftLeft( vMask, 0, vShift );
004090EC 8B4DE8 mov ecx,[ebp-$18]
004090EF 33D2 xor edx,edx
004090F1 8B45EC mov eax,[ebp-$14]
004090F4 E8C3FEFFFF call ShiftLeft
004090F9 8945DC mov [ebp-$24],eax
Project1.dpr.1489: vSecondMask := ShiftLeft( 0, vMask, vShift );
004090FC 8B4DE8 mov ecx,[ebp-$18]
004090FF 8B55EC mov edx,[ebp-$14]
00409102 33C0 xor eax,eax
00409104 E8B3FEFFFF call ShiftLeft
00409109 8945D8 mov [ebp-$28],eax
Project1.dpr.1491: vFirstAddress := longword(DestAddress) + (DestBitIndex
shr 3); // div 32
0040910C 8B4508 mov eax,[ebp+$08]
0040910F C1E803 shr eax,$03
00409112 0345F4 add eax,[ebp-$0c]
00409115 8945D4 mov [ebp-$2c],eax
Project1.dpr.1492: vSecondAddress := vFirstAddress + 4;
00409118 8B45D4 mov eax,[ebp-$2c]
0040911B 83C004 add eax,$04
0040911E 8945D0 mov [ebp-$30],eax
Project1.dpr.1494: Plongword(vFirstAddress)^ := (Plongword(vFirstAddress)^
and not vFirstMask) or vFirstContent;
00409121 8B45D4 mov eax,[ebp-$2c]
00409124 8B00 mov eax,[eax]
00409126 8B55DC mov edx,[ebp-$24]
00409129 F7D2 not edx
0040912B 23C2 and eax,edx
0040912D 0B45E4 or eax,[ebp-$1c]
00409130 8B55D4 mov edx,[ebp-$2c]
00409133 8902 mov [edx],eax
Project1.dpr.1495: Plongword(vSecondAddress)^ := (Plongword(vSecondAddress)^
and not vSecondMask) or vSecondContent;
00409135 8B45D0 mov eax,[ebp-$30]
00409138 8B00 mov eax,[eax]
0040913A 8B55D8 mov edx,[ebp-$28]
0040913D F7D2 not edx
0040913F 23C2 and eax,edx
00409141 0B45E0 or eax,[ebp-$20]
00409144 8B55D0 mov edx,[ebp-$30]
00409147 8902 mov [edx],eax
Project1.dpr.1496: end;
00409149 8BE5 mov esp,ebp
0040914B 5D pop ebp
0040914C C20400 ret $0004

Extra Routine KeepLowBits:

// it has become longer ?!?! WOW ?!?!?!
// 18 instructions
unit_BitManipulation_KeepBits_version_001.pas.11: begin
00408F90 55 push ebp
00408F91 8BEC mov ebp,esp
00408F93 83C4F4 add esp,-$0c
00408F96 8955F8 mov [ebp-$08],edx
00408F99 8945FC mov [ebp-$04],eax
unit_BitManipulation_KeepBits_version_001.pas.12: Result := Value; // 32
bits case.
00408F9C 8B45FC mov eax,[ebp-$04]
00408F9F 8945F4 mov [ebp-$0c],eax
unit_BitManipulation_KeepBits_version_001.pas.13: if Bits <= 31 then
00408FA2 837DF81F cmp dword ptr [ebp-$08],$1f
00408FA6 770D jnbe $00408fb5
unit_BitManipulation_KeepBits_version_001.pas.15: Result := Result and not
(4294967295 shl Bits); // shl instruction limited to 31.
00408FA8 8B4DF8 mov ecx,[ebp-$08]
00408FAB 83C8FF or eax,-$01
00408FAE D3E0 shl eax,cl
00408FB0 F7D0 not eax
00408FB2 2145F4 and [ebp-$0c],eax
unit_BitManipulation_KeepBits_version_001.pas.17: end;
00408FB5 8B45F4 mov eax,[ebp-$0c]
00408FB8 8BE5 mov esp,ebp
00408FBA 5D pop ebp
00408FBB C3 ret

Extra Routine ShiftLeft:

// 2 instructions
unit_BitManipulation_Shift_version_001.pas.12: shld eax, edx, cl
00408FBC 0FA5D0 shld eax,edx,cl
unit_BitManipulation_Shift_version_001.pas.13: end;
00408FBF C3 ret

}

// *** End of Code ***

Bye,
Skybuck.