|
Prev: Wanna do a WriteLongwordBits contest ? (Skybuck's Second Entry)
Next: Wanna do a WriteLongwordBits contest ? (Skybuck's SimInt64 C/C++ version)
From: Skybuck Flying on 5 May 2008 14:02 Ok, This version 2 uses 85 instructions. Still 2 more instructions than the simulated-int64 version ;) // Optimizations applied: // + KeepLowBits function inlined. // + Variables reduced by re-using stack space via absolute directive // Instructions re-ordered to make that possible. // + DestBitIndex shr 3 done once for address calculation. Now it remains to be seen during benchmarks which version will be the fastest. In real code I'll probably not need to call this routine many times, I might get away with if statements for the number of bits and then selecting the 8 bit, 16 bit or this 32 bit version ;) but still I like to have a fast 32 bit version just in case ;) // *** Begin of Code *** function KeepLowBits( Value : longword; Bits : longword ) : longword; inline; begin Result := Value; // 32 bits case. if Bits <= 31 then begin Result := Result and not (4294967295 shl Bits); // shl instruction limited to 31. end; end; // correct // 85 instructions procedure WriteLongwordBitsV2( Value : longword; Bits : longword; DestAddress : pointer; DestBitIndex : longword ); var vContent : longword; vMask : longword; vShift : longword; vFirstContent : longword; vFirstMask : longword; vFirstAddress : longword; // recycle the variables above, little bit dangerous because // compiler might be buggy, but so far it seems to be working. vSecondContent : longword absolute vFirstContent; vSecondMask : longword absolute vFirstMask; vSecondAddress : longword absolute vFirstAddress; begin vContent := KeepLowBits( Value, Bits ); vMask := KeepLowBits( 4294967295, Bits ); vShift := DestBitIndex and 7; DestBitIndex := DestBitIndex shr 3; // div 32 vFirstContent := ShiftLeft( vContent, 0, vShift ); vFirstMask := ShiftLeft( vMask, 0, vShift ); vFirstAddress := longword(DestAddress) + DestBitIndex; Plongword(vFirstAddress)^ := (Plongword(vFirstAddress)^ and not vFirstMask) or vFirstContent; vSecondContent := ShiftLeft( 0, vContent, vShift ); vSecondMask := ShiftLeft( 0, vMask, vShift ); vSecondAddress := longword(DestAddress) + DestBitIndex + 4; Plongword(vSecondAddress)^ := (Plongword(vSecondAddress)^ and not vSecondMask) or vSecondContent; end; // Generated Assembler: { 77 instructions + 4 * 2 (= 8) = 85 instructions Project1.dpr.1648: begin 0040906C 55 push ebp 0040906D 8BEC mov ebp,esp 0040906F 83C4D4 add esp,-$2c 00409072 894DE8 mov [ebp-$18],ecx 00409075 8955EC mov [ebp-$14],edx 00409078 8945F0 mov [ebp-$10],eax Project1.dpr.1649: vContent := KeepLowBits( Value, Bits ); 0040907B 8B45F0 mov eax,[ebp-$10] 0040907E 8945D8 mov [ebp-$28],eax 00409081 837DEC1F cmp dword ptr [ebp-$14],$1f 00409085 770D jnbe $00409094 00409087 8B4DEC mov ecx,[ebp-$14] 0040908A 83C8FF or eax,-$01 0040908D D3E0 shl eax,cl 0040908F F7D0 not eax 00409091 2145D8 and [ebp-$28],eax 00409094 8B45D8 mov eax,[ebp-$28] 00409097 8945E4 mov [ebp-$1c],eax Project1.dpr.1650: vMask := KeepLowBits( 4294967295, Bits ); 0040909A C745D4FFFFFFFF mov [ebp-$2c],$ffffffff 004090A1 837DEC1F cmp dword ptr [ebp-$14],$1f 004090A5 770D jnbe $004090b4 004090A7 8B4DEC mov ecx,[ebp-$14] 004090AA 83C8FF or eax,-$01 004090AD D3E0 shl eax,cl 004090AF F7D0 not eax 004090B1 2145D4 and [ebp-$2c],eax 004090B4 8B45D4 mov eax,[ebp-$2c] 004090B7 8945E0 mov [ebp-$20],eax Project1.dpr.1652: vShift := DestBitIndex and 7; 004090BA 8B4508 mov eax,[ebp+$08] 004090BD 83E007 and eax,$07 004090C0 8945DC mov [ebp-$24],eax Project1.dpr.1654: DestBitIndex := DestBitIndex shr 3; // div 32 004090C3 C16D0803 shr dword ptr [ebp+$08],$03 Project1.dpr.1656: vFirstContent := ShiftLeft( vContent, 0, vShift ); 004090C7 8B4DDC mov ecx,[ebp-$24] 004090CA 33D2 xor edx,edx 004090CC 8B45E4 mov eax,[ebp-$1c] 004090CF E8BCFEFFFF call ShiftLeft 004090D4 8945FC mov [ebp-$04],eax Project1.dpr.1657: vFirstMask := ShiftLeft( vMask, 0, vShift ); 004090D7 8B4DDC mov ecx,[ebp-$24] 004090DA 33D2 xor edx,edx 004090DC 8B45E0 mov eax,[ebp-$20] 004090DF E8ACFEFFFF call ShiftLeft 004090E4 8945F8 mov [ebp-$08],eax Project1.dpr.1658: vFirstAddress := longword(DestAddress) + DestBitIndex; 004090E7 8B45E8 mov eax,[ebp-$18] 004090EA 034508 add eax,[ebp+$08] 004090ED 8945F4 mov [ebp-$0c],eax Project1.dpr.1659: Plongword(vFirstAddress)^ := (Plongword(vFirstAddress)^ and not vFirstMask) or vFirstContent; 004090F0 8B45F4 mov eax,[ebp-$0c] 004090F3 8B00 mov eax,[eax] 004090F5 8B55F8 mov edx,[ebp-$08] 004090F8 F7D2 not edx 004090FA 23C2 and eax,edx 004090FC 0B45FC or eax,[ebp-$04] 004090FF 8B55F4 mov edx,[ebp-$0c] 00409102 8902 mov [edx],eax Project1.dpr.1661: vSecondContent := ShiftLeft( 0, vContent, vShift ); 00409104 8B4DDC mov ecx,[ebp-$24] 00409107 8B55E4 mov edx,[ebp-$1c] 0040910A 33C0 xor eax,eax 0040910C E87FFEFFFF call ShiftLeft 00409111 8945FC mov [ebp-$04],eax Project1.dpr.1662: vSecondMask := ShiftLeft( 0, vMask, vShift ); 00409114 8B4DDC mov ecx,[ebp-$24] 00409117 8B55E0 mov edx,[ebp-$20] 0040911A 33C0 xor eax,eax 0040911C E86FFEFFFF call ShiftLeft 00409121 8945F8 mov [ebp-$08],eax Project1.dpr.1663: vSecondAddress := longword(DestAddress) + DestBitIndex + 4; 00409124 8B45E8 mov eax,[ebp-$18] 00409127 034508 add eax,[ebp+$08] 0040912A 83C004 add eax,$04 0040912D 8945F4 mov [ebp-$0c],eax Project1.dpr.1664: Plongword(vSecondAddress)^ := (Plongword(vSecondAddress)^ and not vSecondMask) or vSecondContent; 00409130 8B45F4 mov eax,[ebp-$0c] 00409133 8B00 mov eax,[eax] 00409135 8B55F8 mov edx,[ebp-$08] 00409138 F7D2 not edx 0040913A 23C2 and eax,edx 0040913C 0B45FC or eax,[ebp-$04] 0040913F 8B55F4 mov edx,[ebp-$0c] 00409142 8902 mov [edx],eax Project1.dpr.1665: end; 00409144 8BE5 mov esp,ebp 00409146 5D pop ebp 00409147 C20400 ret $0004 Extra Routine: Unit_BitManipulation_Shift_version_001.pas.12: shld eax, edx, cl 00408F90 0FA5D0 shld eax,edx,cl Unit_BitManipulation_Shift_version_001.pas.13: end; 00408F93 C3 ret } // *** End of Code *** Bye, Skybuck.
From: Skybuck Flying on 5 May 2008 14:35
"Skybuck Flying" <BloodyShame(a)hotmail.com> wrote in message news:15897$481f4a34$541983fa$14271(a)cache2.tilbu1.nb.home.nl... > Ok, > > This version 2 uses 85 instructions. Still 2 more instructions than the > simulated-int64 version ;) > > // Optimizations applied: > // + KeepLowBits function inlined. > // + Variables reduced by re-using stack space via absolute directive > // Instructions re-ordered to make that possible. > // + DestBitIndex shr 3 done once for address calculation. > > Now it remains to be seen during benchmarks which version will be the > fastest. > > In real code I'll probably not need to call this routine many times, I > might get away with if statements for the number of bits and then > selecting the 8 bit, 16 bit or this 32 bit version ;) but still I like to > have a fast 32 bit version just in case ;) > > // *** Begin of Code *** > > function KeepLowBits( Value : longword; Bits : longword ) : longword; > inline; > begin > Result := Value; // 32 bits case. > if Bits <= 31 then > begin > Result := Result and not (4294967295 shl Bits); // shl instruction > limited to 31. > end; > end; > > // correct > // 85 instructions > procedure WriteLongwordBitsV2( Value : longword; Bits : longword; > DestAddress : pointer; DestBitIndex : longword ); > var > vContent : longword; > vMask : longword; > vShift : longword; > > vFirstContent : longword; > vFirstMask : longword; > vFirstAddress : longword; > > // recycle the variables above, little bit dangerous because > // compiler might be buggy, but so far it seems to be working. > vSecondContent : longword absolute vFirstContent; > vSecondMask : longword absolute vFirstMask; > vSecondAddress : longword absolute vFirstAddress; > begin > vContent := KeepLowBits( Value, Bits ); > vMask := KeepLowBits( 4294967295, Bits ); > > vShift := DestBitIndex and 7; > > DestBitIndex := DestBitIndex shr 3; // div 32 Oh little comment typo: DestBitIndex := DestBitIndex shr 3; // div 8 Ah that's better :) LOL Bye, Skybuck ;) > > vFirstContent := ShiftLeft( vContent, 0, vShift ); > vFirstMask := ShiftLeft( vMask, 0, vShift ); > vFirstAddress := longword(DestAddress) + DestBitIndex; > Plongword(vFirstAddress)^ := (Plongword(vFirstAddress)^ and not > vFirstMask) or vFirstContent; > > vSecondContent := ShiftLeft( 0, vContent, vShift ); > vSecondMask := ShiftLeft( 0, vMask, vShift ); > vSecondAddress := longword(DestAddress) + DestBitIndex + 4; > Plongword(vSecondAddress)^ := (Plongword(vSecondAddress)^ and not > vSecondMask) or vSecondContent; > end; > > // Generated Assembler: > > { > > 77 instructions + 4 * 2 (= 8) = 85 instructions > Project1.dpr.1648: begin > 0040906C 55 push ebp > 0040906D 8BEC mov ebp,esp > 0040906F 83C4D4 add esp,-$2c > 00409072 894DE8 mov [ebp-$18],ecx > 00409075 8955EC mov [ebp-$14],edx > 00409078 8945F0 mov [ebp-$10],eax > Project1.dpr.1649: vContent := KeepLowBits( Value, Bits ); > 0040907B 8B45F0 mov eax,[ebp-$10] > 0040907E 8945D8 mov [ebp-$28],eax > 00409081 837DEC1F cmp dword ptr [ebp-$14],$1f > 00409085 770D jnbe $00409094 > 00409087 8B4DEC mov ecx,[ebp-$14] > 0040908A 83C8FF or eax,-$01 > 0040908D D3E0 shl eax,cl > 0040908F F7D0 not eax > 00409091 2145D8 and [ebp-$28],eax > 00409094 8B45D8 mov eax,[ebp-$28] > 00409097 8945E4 mov [ebp-$1c],eax > Project1.dpr.1650: vMask := KeepLowBits( 4294967295, Bits ); > 0040909A C745D4FFFFFFFF mov [ebp-$2c],$ffffffff > 004090A1 837DEC1F cmp dword ptr [ebp-$14],$1f > 004090A5 770D jnbe $004090b4 > 004090A7 8B4DEC mov ecx,[ebp-$14] > 004090AA 83C8FF or eax,-$01 > 004090AD D3E0 shl eax,cl > 004090AF F7D0 not eax > 004090B1 2145D4 and [ebp-$2c],eax > 004090B4 8B45D4 mov eax,[ebp-$2c] > 004090B7 8945E0 mov [ebp-$20],eax > Project1.dpr.1652: vShift := DestBitIndex and 7; > 004090BA 8B4508 mov eax,[ebp+$08] > 004090BD 83E007 and eax,$07 > 004090C0 8945DC mov [ebp-$24],eax > Project1.dpr.1654: DestBitIndex := DestBitIndex shr 3; // div 32 > 004090C3 C16D0803 shr dword ptr [ebp+$08],$03 > Project1.dpr.1656: vFirstContent := ShiftLeft( vContent, 0, vShift ); > 004090C7 8B4DDC mov ecx,[ebp-$24] > 004090CA 33D2 xor edx,edx > 004090CC 8B45E4 mov eax,[ebp-$1c] > 004090CF E8BCFEFFFF call ShiftLeft > 004090D4 8945FC mov [ebp-$04],eax > Project1.dpr.1657: vFirstMask := ShiftLeft( vMask, 0, vShift ); > 004090D7 8B4DDC mov ecx,[ebp-$24] > 004090DA 33D2 xor edx,edx > 004090DC 8B45E0 mov eax,[ebp-$20] > 004090DF E8ACFEFFFF call ShiftLeft > 004090E4 8945F8 mov [ebp-$08],eax > Project1.dpr.1658: vFirstAddress := longword(DestAddress) + DestBitIndex; > 004090E7 8B45E8 mov eax,[ebp-$18] > 004090EA 034508 add eax,[ebp+$08] > 004090ED 8945F4 mov [ebp-$0c],eax > Project1.dpr.1659: Plongword(vFirstAddress)^ := (Plongword(vFirstAddress)^ > and not vFirstMask) or vFirstContent; > 004090F0 8B45F4 mov eax,[ebp-$0c] > 004090F3 8B00 mov eax,[eax] > 004090F5 8B55F8 mov edx,[ebp-$08] > 004090F8 F7D2 not edx > 004090FA 23C2 and eax,edx > 004090FC 0B45FC or eax,[ebp-$04] > 004090FF 8B55F4 mov edx,[ebp-$0c] > 00409102 8902 mov [edx],eax > Project1.dpr.1661: vSecondContent := ShiftLeft( 0, vContent, vShift ); > 00409104 8B4DDC mov ecx,[ebp-$24] > 00409107 8B55E4 mov edx,[ebp-$1c] > 0040910A 33C0 xor eax,eax > 0040910C E87FFEFFFF call ShiftLeft > 00409111 8945FC mov [ebp-$04],eax > Project1.dpr.1662: vSecondMask := ShiftLeft( 0, vMask, vShift ); > 00409114 8B4DDC mov ecx,[ebp-$24] > 00409117 8B55E0 mov edx,[ebp-$20] > 0040911A 33C0 xor eax,eax > 0040911C E86FFEFFFF call ShiftLeft > 00409121 8945F8 mov [ebp-$08],eax > Project1.dpr.1663: vSecondAddress := longword(DestAddress) + DestBitIndex > + 4; > 00409124 8B45E8 mov eax,[ebp-$18] > 00409127 034508 add eax,[ebp+$08] > 0040912A 83C004 add eax,$04 > 0040912D 8945F4 mov [ebp-$0c],eax > Project1.dpr.1664: Plongword(vSecondAddress)^ := > (Plongword(vSecondAddress)^ and not vSecondMask) or vSecondContent; > 00409130 8B45F4 mov eax,[ebp-$0c] > 00409133 8B00 mov eax,[eax] > 00409135 8B55F8 mov edx,[ebp-$08] > 00409138 F7D2 not edx > 0040913A 23C2 and eax,edx > 0040913C 0B45FC or eax,[ebp-$04] > 0040913F 8B55F4 mov edx,[ebp-$0c] > 00409142 8902 mov [edx],eax > Project1.dpr.1665: end; > 00409144 8BE5 mov esp,ebp > 00409146 5D pop ebp > 00409147 C20400 ret $0004 > > Extra Routine: > > Unit_BitManipulation_Shift_version_001.pas.12: shld eax, edx, cl > 00408F90 0FA5D0 shld eax,edx,cl > Unit_BitManipulation_Shift_version_001.pas.13: end; > 00408F93 C3 ret > > } > > // *** End of Code *** > > Bye, > Skybuck. > |