|
Prev: Skybuck presents ShiftLeft( Left, Right, Shift ) and ShiftRight( Right, Left, Shift )
Next: Wanna do a WriteLongwordBits contest ? (Skybuck's Third Entry)
From: Skybuck Flying on 5 May 2008 13:38 Hello, Here is my second entry for the contest. I was hoping that by using assembler instructions, the instruction count could be reduced. But the opposite has happened. This version is even worse: 105 instructions ! And it's using slower instructions too, like shld and a branch ! However the keep low bits routine was not yet inlined... And maybe the ShiftLeft routine can be inlined but for now it's kinda sucky. The algorithm is cool though... just sucky speed-wise. It's quite amazing to see that my original simulated-int64 version has actually less instructions ?!?!? WOW ;) Oh well enjoy this version for what it's worth: Maybe there are further optimizations possible ? No benchmarking done yet :) // *** Begin of Code *** function KeepLowBits( Value : longword; Bits : longword ) : longword; begin Result := Value; // 32 bits case. if Bits <= 31 then begin Result := Result and not (4294967295 shl Bits); // shl instruction limited to 31. end; end; function ShiftLeft( Left : longword; Right : Longword; Shift : longword ) : longword; asm shld eax, edx, cl end; // correct procedure WriteLongwordBits( Value : longword; Bits : longword; DestAddress : pointer; DestBitIndex : longword ); var vContent : longword; vMask : longword; vShift : longword; vFirstContent : longword; vSecondContent : longword; vFirstMask : longword; vSecondMask : longword; vFirstAddress : longword; vSecondAddress : longword; begin vContent := KeepLowBits( Value, Bits ); vMask := KeepLowBits( 4294967295, Bits ); vShift := DestBitIndex and 7; // mod 8 vFirstContent := ShiftLeft( vContent, 0, vShift ); vSecondContent := ShiftLeft( 0, vContent, vShift ); vFirstMask := ShiftLeft( vMask, 0, vShift ); vSecondMask := ShiftLeft( 0, vMask, vShift ); vFirstAddress := longword(DestAddress) + (DestBitIndex shr 3); // div 8 vSecondAddress := vFirstAddress + 4; Plongword(vFirstAddress)^ := (Plongword(vFirstAddress)^ and not vFirstMask) or vFirstContent; Plongword(vSecondAddress)^ := (Plongword(vSecondAddress)^ and not vSecondMask) or vSecondContent; end; // Generated Assembler: { // 63 instructions + 2 * 18 (= 36) + 4 * 2 = 8 = 105 instructions !!!??? WOW Project1.dpr.1479: begin 00409098 55 push ebp 00409099 8BEC mov ebp,esp 0040909B 83C4D0 add esp,-$30 0040909E 894DF4 mov [ebp-$0c],ecx 004090A1 8955F8 mov [ebp-$08],edx 004090A4 8945FC mov [ebp-$04],eax Project1.dpr.1480: vContent := KeepLowBits( Value, Bits ); 004090A7 8B55F8 mov edx,[ebp-$08] 004090AA 8B45FC mov eax,[ebp-$04] 004090AD E8DEFEFFFF call KeepLowBits 004090B2 8945F0 mov [ebp-$10],eax Project1.dpr.1481: vMask := KeepLowBits( 4294967295, Bits ); 004090B5 8B55F8 mov edx,[ebp-$08] 004090B8 83C8FF or eax,-$01 004090BB E8D0FEFFFF call KeepLowBits 004090C0 8945EC mov [ebp-$14],eax Project1.dpr.1483: vShift := DestBitIndex and 7; 004090C3 8B4508 mov eax,[ebp+$08] 004090C6 83E007 and eax,$07 004090C9 8945E8 mov [ebp-$18],eax Project1.dpr.1485: vFirstContent := ShiftLeft( vContent, 0, vShift ); 004090CC 8B4DE8 mov ecx,[ebp-$18] 004090CF 33D2 xor edx,edx 004090D1 8B45F0 mov eax,[ebp-$10] 004090D4 E8E3FEFFFF call ShiftLeft 004090D9 8945E4 mov [ebp-$1c],eax Project1.dpr.1486: vSecondContent := ShiftLeft( 0, vContent, vShift ); 004090DC 8B4DE8 mov ecx,[ebp-$18] 004090DF 8B55F0 mov edx,[ebp-$10] 004090E2 33C0 xor eax,eax 004090E4 E8D3FEFFFF call ShiftLeft 004090E9 8945E0 mov [ebp-$20],eax Project1.dpr.1488: vFirstMask := ShiftLeft( vMask, 0, vShift ); 004090EC 8B4DE8 mov ecx,[ebp-$18] 004090EF 33D2 xor edx,edx 004090F1 8B45EC mov eax,[ebp-$14] 004090F4 E8C3FEFFFF call ShiftLeft 004090F9 8945DC mov [ebp-$24],eax Project1.dpr.1489: vSecondMask := ShiftLeft( 0, vMask, vShift ); 004090FC 8B4DE8 mov ecx,[ebp-$18] 004090FF 8B55EC mov edx,[ebp-$14] 00409102 33C0 xor eax,eax 00409104 E8B3FEFFFF call ShiftLeft 00409109 8945D8 mov [ebp-$28],eax Project1.dpr.1491: vFirstAddress := longword(DestAddress) + (DestBitIndex shr 3); // div 32 0040910C 8B4508 mov eax,[ebp+$08] 0040910F C1E803 shr eax,$03 00409112 0345F4 add eax,[ebp-$0c] 00409115 8945D4 mov [ebp-$2c],eax Project1.dpr.1492: vSecondAddress := vFirstAddress + 4; 00409118 8B45D4 mov eax,[ebp-$2c] 0040911B 83C004 add eax,$04 0040911E 8945D0 mov [ebp-$30],eax Project1.dpr.1494: Plongword(vFirstAddress)^ := (Plongword(vFirstAddress)^ and not vFirstMask) or vFirstContent; 00409121 8B45D4 mov eax,[ebp-$2c] 00409124 8B00 mov eax,[eax] 00409126 8B55DC mov edx,[ebp-$24] 00409129 F7D2 not edx 0040912B 23C2 and eax,edx 0040912D 0B45E4 or eax,[ebp-$1c] 00409130 8B55D4 mov edx,[ebp-$2c] 00409133 8902 mov [edx],eax Project1.dpr.1495: Plongword(vSecondAddress)^ := (Plongword(vSecondAddress)^ and not vSecondMask) or vSecondContent; 00409135 8B45D0 mov eax,[ebp-$30] 00409138 8B00 mov eax,[eax] 0040913A 8B55D8 mov edx,[ebp-$28] 0040913D F7D2 not edx 0040913F 23C2 and eax,edx 00409141 0B45E0 or eax,[ebp-$20] 00409144 8B55D0 mov edx,[ebp-$30] 00409147 8902 mov [edx],eax Project1.dpr.1496: end; 00409149 8BE5 mov esp,ebp 0040914B 5D pop ebp 0040914C C20400 ret $0004 Extra Routine KeepLowBits: // it has become longer ?!?! WOW ?!?!?! // 18 instructions unit_BitManipulation_KeepBits_version_001.pas.11: begin 00408F90 55 push ebp 00408F91 8BEC mov ebp,esp 00408F93 83C4F4 add esp,-$0c 00408F96 8955F8 mov [ebp-$08],edx 00408F99 8945FC mov [ebp-$04],eax unit_BitManipulation_KeepBits_version_001.pas.12: Result := Value; // 32 bits case. 00408F9C 8B45FC mov eax,[ebp-$04] 00408F9F 8945F4 mov [ebp-$0c],eax unit_BitManipulation_KeepBits_version_001.pas.13: if Bits <= 31 then 00408FA2 837DF81F cmp dword ptr [ebp-$08],$1f 00408FA6 770D jnbe $00408fb5 unit_BitManipulation_KeepBits_version_001.pas.15: Result := Result and not (4294967295 shl Bits); // shl instruction limited to 31. 00408FA8 8B4DF8 mov ecx,[ebp-$08] 00408FAB 83C8FF or eax,-$01 00408FAE D3E0 shl eax,cl 00408FB0 F7D0 not eax 00408FB2 2145F4 and [ebp-$0c],eax unit_BitManipulation_KeepBits_version_001.pas.17: end; 00408FB5 8B45F4 mov eax,[ebp-$0c] 00408FB8 8BE5 mov esp,ebp 00408FBA 5D pop ebp 00408FBB C3 ret Extra Routine ShiftLeft: // 2 instructions unit_BitManipulation_Shift_version_001.pas.12: shld eax, edx, cl 00408FBC 0FA5D0 shld eax,edx,cl unit_BitManipulation_Shift_version_001.pas.13: end; 00408FBF C3 ret } // *** End of Code *** Bye, Skybuck. |