|
Prev: Skybuck presents alternative division method, to be benchmarked ;)
Next: How many branches in a loop can be predicted successfully ?
From: Rod Pemberton on 6 May 2008 05:40 "Prime Mover" <epleite(a)hotmail.com> wrote in message news:a86ba4bc-7db2-46d9-81ce-b36d4014d0da(a)d45g2000hsc.googlegroups.com... > > For instance, I also coudn't find a clear example of conversion of a > simple program > like this below in C to assembly: > > a=8; > b=7; > i=1; > do > { > if (a<b) > c = a + b*i; > else > c = a - b*i; > i++; > } while (i<=20); > > Thank you all. I'm surprised that the easiest way to see how a C compiler converts to assembly wasn't mentioned: look at the compiler's output. Maybe it's not quite for a beginner. Anyway, for GCC (the S must be uppercase): gcc -S sample.c You can also have GCC insert additional info with -g: gcc -g -S sample.c The "catch" is that the output is AT&T syntax (arg's are "reversed", % on register, $ on constant, size letter on end of instruction...). On your code - with additions to compile - with a, b, c, i, as "global" integers - without optimization: (file scope creates assembly closer to what you'd code by hand) ..globl _arith _arith: pushl %ebp movl %esp, %ebp movl $8, _a movl $7, _b movl $1, _i L2: movl _a, %eax cmpl _b, %eax jge L5 movl _b, %eax imull _i, %eax addl _a, %eax movl %eax, _c jmp L6 L5: movl _b, %eax movl %eax, %edx imull _i, %edx movl _a, %eax subl %edx, %eax movl %eax, _c L6: incl _i cmpl $20, _i jg L1 jmp L2 L1: popl %ebp ret On your code - with additions to compile - with a, b, c, i, as "global" floats - without optimization: ..globl _arith _arith: pushl %ebp movl %esp, %ebp movl $0x41000000, %eax movl %eax, _a movl $0x40e00000, %eax movl %eax, _b movl $0x3f800000, %eax movl %eax, _i L2: flds _a flds _b fucompp fnstsw %ax testb $69, %ah je L6 jmp L5 L6: flds _b fmuls _i fadds _a fstps _c jmp L7 L5: flds _b fmuls _i flds _a fsubp %st, %st(1) fstps _c L7: flds _i fld1 faddp %st, %st(1) fstps _i flds _i flds LC3 fucompp fnstsw %ax testb $5, %ah je L2 popl %ebp ret You could also disassemble the produced object's with NASM's Ndisasm.exe for a more readable assembly format. Rod Pemberton
From: rio on 6 May 2008 10:39
"Prime Mover" <epleite(a)hotmail.com> ha scritto nel messaggio news:084f6c8e-4ba3-4351-b87b-1e8b6442539f(a)w7g2000hsa.googlegroups.com... > Thank you all for all the information and tips. It is been really > helpful. > > I am trying to convert this algorithm below to assembly: > > %%%%%%%%%%%% > Unsigned Multiplication Algorithm > > AQ = Q*M > > A = 0; > Q = operand1 > M = operand2 > C = carry > > for (count=1 to count <= nbits) > > begin > > if Q[0] = 1 then > A = A+M > end if > > shift CAQ 1 bit to the right % CAQ is the C,A,Q grouped > > end > %%%%%%%%%%%%%%%%% > > I am doing like this, for 8-bit numbers: > > mov AH, 0 > mov AL, Q > mov BL, M > mov CL, 8; counter starts with 8 > mov CF, 0; carry > L0: > cmp AL(0), 1; I need to access the first bit of AL!!!! don't know if > this is right > jne L1 > add AH, BL > jmp L1 > L1: > shr AX, 1 > dec CF > cmp CF, 0 > jng L0 > > Does that make sense??? in what i know CF is not a register it should be a bit set or not in a special register. for me the algo has to fit for CPU operations and registers i write some of the type of edx:eax mul3(int32 p1, int32 p2) that seems here 6 times more slow than the CPU mul ------------------------------------------ $ this mul3 is ok D(mul3)=6.000000 D(CPU)=1.000000 ----------------------------------- ; $ asc mb.m mb.asm */ ; $ nasm -felf -g mb.asm */ section .data global mul3 global mulck global mulCPU section .text ; edx:eax mul3(int32 p1, int32 p2) */ ; edx:eax=p1*p2 ; 0i, 4b, 8ra, 12P_1, 16P_2 */ mul3: push ebx push esi mov eax, 0 mov edx, 0 mov ebx, [esp+12] mov esi, 0 mov ecx, [esp+16] ..0: shr ecx, 1 jnc .1 add eax, ebx adc edx, esi ..1: shl esi, 1 shl ebx, 1 adc esi, 0 cmp ecx, 0 jne .0 pop esi pop ebx ret mulck: push ebx push esi mov eax, [esp+12] mul dword [esp+16] mov ebx, eax mov esi, edx mov eax, [esp+12] mov edx, [esp+16] push edx push eax call mul3 add esp, 8 cmp ebx, eax jne .0 cmp esi, edx jne .0 mov eax, 1 jmp short .f ..0: mov eax, 0 ..f: pop esi pop ebx ret ; 0ra, 4P1, 8p2*/ mulCPU: mov eax, [esp+4] mul dword [esp+8] ret ------------------------------------------- /* gcc this.c mb.o -o this */ #include <stdio.h> #include <time.h> unsigned mul3(unsigned, unsigned); unsigned mulck(unsigned, unsigned); unsigned mulCPU(unsigned, unsigned); double tempo( unsigned (*f)(unsigned, unsigned)) {double r; time_t i, e; unsigned a, a1; i=time(0); a1=0xFFFF; for(a=1; a<0x2000000; ++a, ++a1) {//if(a%0x40000000==0) {printf(" %u ", a); fflush(stdout);} (*f)(a, a1); } e=time(0); r=difftime(e, i); return r; } int check(unsigned (*f)(unsigned, unsigned)) {unsigned a=1, b, bb, i=0, a1, a2, c; l0:; a1=rand(); a2=rand(); bb=mulCPU(a1, a2); b = (*f)(a1, a2); if( mulck(a1, a2)==0 ) {++i; printf("%u*%u=%u but it should be %u or %u\n", a1, a2, b, bb, a1*a2); fflush(stdout); } if(a%0x80000000==0) {printf(" %u ", a); fflush(stdout);} if(i>=15) {printf("Contatore generale a=%u\n", a); return 0; } ++a; if(a<0x2000000) goto l0; return 1; } int main(void) {double r; unsigned j; (j=check(mul3))==1 ? printf("mul3 is ok\n"): printf("Error for mul3 \n"); if(j!=1) return 0; r=tempo(mul3); printf("D(mul3)=%f\n", r); r=tempo(mulCPU); printf("D(CPU)=%f\n", r); return 0; } --------------------------- /* $ asc mb.m mb.asm */ /* $ nasm -felf -g mb.asm */ section .data global mul3 global mulck global mulCPU section .text /* to note that in this way mul3 for exist has the need only of 5 registers of same number of bits the carry flag >>=, <<=, ++, ++=, cmp, jnz, jnc, =, instructions and the ABI 32bits OS (that i have not preserve a, c, r). mul3 is indipendent of register size */ /* edx:eax mul3(int32 p1, int32 p2) */ /* 0i, 4b, 8ra, 12P_1, 16P_2 */ mul3: < b, i a=0; r=0; b=[s+12]; i=0; c=[s+16]; ..0: c>>=1; jnc .1| a+=b; r++=i; ..1: i<<=1; b<<=1; i++=0; c#.0; > b, i ret mulck: < b, i a=[s+12]; mul D [s+16]; b=a; i=r; a=[s+12]; r=[s+16]; mul3<(a, r); b==a!#.0 i==r!#.0| a=1; #.f ..0: a=0; ..f: > b, i ret /* 0ra, 4P1, 8p2*/ mulCPU: a=[s+4]; mul D [s+8]; ret -------------------------------- |