| 	
Prev: Was NOP deprecated for AA64? NASM not disassembling... Next: The project BIEW was renamed into BEYE 	
		 From: Branimir Maksimovic on 25 Feb 2010 07:07 For exercize I taken knucleotide benchmark. http://shootout.alioth.debian.org/u32/performance.php?test=knucleotide My naive assembler program (fasm): struc vector d,s { .data dd d .size dd s .elements dd 0 } macro ccall proc,[arg] ; call CDECL procedure { common local size size = 0 reverse pushd arg size = size+4 common call proc add esp,size } macro sys_exit rc { mov eax,1 ; exit mov ebx,rc int 0x80 } macro sys_read fd, buf, size { mov eax, 3 ; sys_read mov ebx, fd mov ecx, buf mov edx, size int 0x80 } macro sys_write fd, buf, size { mov eax, 4 ; sys_write mov ebx, fd mov ecx, buf mov edx, size int 0x80 } macro read fd, buf,size { local l1,l2,l3 mov eax, dword [fptr] and eax,eax jnz l2 l1: sys_read fd,filebuf,fsize and eax,eax jz l3 lea eax, [eax+filebuf] mov dword [fend], eax mov dword [fptr], filebuf l2: mov ecx, size mov ebx, size mov eax, dword [fend] sub eax, dword [fptr] jz l1 cmp eax,ecx cmovl ecx,eax mov eax,ecx sub ebx, ecx strncpy buf,dword [fptr], ecx, 0 and ebx,ebx mov dword [fptr],esi jnz l1 l3: } macro getLine fd, buf, size { local l1,l2 mov ecx, size mov edi, buf l1: and ecx,ecx jz l2 push ecx push edi read fd,dword[esp],1 pop edi pop ecx cmp eax,1 jne l2; dec ecx inc edi cmp byte [edi-1], 0xa jnz l1 dec edi l2: mov byte [edi],0 } macro dwordnset s, c, count { mov edi,s mov eax,c mov ecx,count cld rep stosd } macro strnset s,c, size { mov edi,s mov eax,c mov ecx,size cld rep stosb } macro dwordncmp s1, s2, size, dir { if ~ dir cld else std end if mov esi,s2 mov edi,s1 mov ecx,size repe cmpsd if dir cld end if } macro strncmp s1, s2, size, dir { if ~ dir cld else std end if mov esi,s2 mov edi,s1 mov ecx,size repe cmpsb if dir cld end if } macro dwordncpy s1,s2, size, dir { if ~ dir cld else std end if mov esi,s2 mov edi,s1 mov ecx, size rep movsd if dir cld end if } macro strncpy s1,s2, size, dir { if ~ dir cld else std end if mov esi,s2 mov edi,s1 mov ecx, size rep movsb if dir cld end if } macro to_num src { local l1,l2,l3,l4,e1 cmp src,'A' je l1 cmp src,'a' je l1 cmp src,'C' je l2 cmp src,'c' je l2 cmp src,'G' je l3 cmp src,'g' je l3 ; cmp src,'T' ; je l4 ; cmp src,'t' jmp l4 l1: mov al,0 jmp e1 l2: mov al,1 jmp e1 l3: mov al,2 jmp e1 l4: mov al,3 e1: } macro to_char src { local l1,l2,l3,l4,e1 cmp src,0 je l1 cmp src,1 je l2 cmp src,2 je l3 ; cmp src,3 jmp l4 l1: mov al,'A' jmp e1 l2: mov al,'C' jmp e1 l3: mov al,'G' jmp e1 l4: mov al,'T' e1: } macro pack_str dst,src,size { local l1 mov esi,src mov edi,dst mov ecx,size l1: to_num byte [esi] mov byte [edi], al inc edi inc esi dec ecx jnz l1 } macro unpack_str dst,src,size { local l1 mov esi,src mov edi,dst mov ecx,size l1: to_char byte [esi] mov byte [edi], al inc edi inc esi dec ecx jnz l1 } macro initvector data,oldsize,size,block { local e1,e2 mov eax, size imul eax, block push eax ccall realloc,dword[data],eax pop ebx and eax,eax jz e1 mov dword[data],eax mov dword[oldsize],ebx jmp e2 e1: ccall perror, err1 sys_exit -1 e2: } macro hash str,size { local l1 mov ecx, size mov eax,0 mov ebx,str l1: imul eax,eax,31 movzx edx,byte [ebx] add eax, edx inc ebx dec ecx jnz l1 } macro hashfind data,elements,block,srchstr,srchlen { pushd srchstr pushd srchlen hash srchstr,srchlen mov ebx,data and eax,0x3ffff shl eax,5 strfind elements,block ; lea eax,[ebx+eax*block] } macro strfind elements,block { local l1,l2,e1 pop ecx ; len pop edx ; s l1: cmp dword[ebx+eax],0 jne l2 mov dword[ebx+eax],edx mov dword[ebx+eax+4],0 inc dword[elements] jmp e1 l2: push ecx strncmp dword[ebx+eax],edx,ecx,0 pop ecx je e1 add eax,block jmp l1 e1: lea eax,[ebx+eax] } macro find data,elements,block,srchstr,srchlen ; binary search and insert { local l1,l2,e1,e2,e3 mov ecx,srchstr mov eax,dword[elements] mov ebx,data lea edx,[ebx+eax*block] l1: and eax,eax jz e1 cmp edx,ebx jle e1 shr eax,1 push ecx strncmp dword[ebx+eax*block],ecx,srchlen,0 pop ecx jl l1 je e3 and eax,eax jnz l2 inc eax l2: lea ebx, [ebx+eax*block] jmp l1 e1: mov eax,data mov edx,dword[elements] lea eax, [eax+edx*block] mov edx, eax add edx,block dec edx sub eax,ebx jl e2 push ecx lea ecx, [edx-block] if 0 pusha ccall printf,fmt5,eax,dword[ecx] popa end if strncpy edx,ecx,eax,1 pop ecx if 0 pusha ccall printf,fmt5,eax,ecx popa end if mov dword [ebx],ecx mov dword [ebx+4],0 ; heh inc dword[elements] xor eax,eax jmp e3 e2: ccall printf,fmt,err2 sys_exit -1 e3: lea eax,[ebx+eax*block] } macro calc_frequencies size { local l1 mov ecx,dword [sdta] inc ecx sub ecx,size xor eax,eax l1: push ecx ; end push eax ; counter mov ebx,dword [dta] add ebx,eax hashfind dword [hashtable.data], hashtable.elements,8,ebx,size ; find dword [hashtable.data], hashtable.elements,8,ebx,size inc dword[eax+4] pop eax pop ecx inc eax cmp eax,ecx jne l1 } macro print_strs ptr,cnt { local l1,l2,e1 mov ebx,ptr mov ecx,cnt cmp dword[ebx],0 je l2 l1: push ebx push ecx ccall printf,fmt, dword [ebx] pop ecx pop ebx l2: dec ecx jz e1 add ebx,4 cmp dword[ebx],0 je l2 jmp l1 e1: } STDIN equ 0 STDOUT equ 1 STDERR equ 2 fsize equ 16384 format ELF SIZE equ 2097152 section '.text' executable public main extrn printf extrn perror extrn realloc extrn free main: getLine STDIN, buf, 256 movzx eax, byte [buf] and eax,eax jz e1 strncmp buf,three,6,0 and ecx,ecx jnz main l1: getLine STDIN, buf, 256 movzx eax, byte [buf] and eax,eax jz e1 cmp eax,'>' je e1 mov eax,256 sub eax,ecx dec eax push eax add eax, dword [sdta] ccall realloc,dword[dta], eax and eax,eax jz e2 mov dword[dta],eax pop eax mov ebx, dword [sdta] add ebx, dword [dta] push eax pack_str ebx,buf,eax pop eax add dword[sdta],eax jmp l1 e1: initvector hashtable.data,hashtable.size,SIZE,4 dwordnset dword[hashtable.data],0,SIZE ; dwordnset dword[hashtable.data],rstm,100 ; find dword [hashtable.data],hashtable.elements,8,msg,1 ; find dword [hashtable.data],hashtable.elements,8,msg7,1 ; find dword [hashtable.data],hashtable.elements,8,msg1,1 ; find dword [hashtable.data],hashtable.elements,8,msg2,1 ; find dword [hashtable.data],hashtable.elements,8,msg3,1 ; find dword [hashtable.data],hashtable.elements,8,msg4,1 ; find dword [hashtable.data],hashtable.elements,8,msg5,1 ; find dword [hashtable.data],hashtable.elements,8,msg6,1 ; find dword [hashtable.data],hashtable.elements,8,msg,1 ; find dword [hashtable.data],hashtable.elements,8,msg4,1 calc_frequencies 1 mov dword [hashtable.elements],0 dwordnset dword[hashtable.data],0,SIZE calc_frequencies 2 mov dword [hashtable.elements],0 dwordnset dword[hashtable.data],0,SIZE calc_frequencies 3 pack_str lngbuf,lngstr4,3 hashfind dword [hashtable.data],hashtable.elements,8,lngbuf,3 ccall printf, fmt1, dword [eax+4] ccall printf, fmt1, dword [hashtable.elements] mov dword [hashtable.elements],0 dwordnset dword[hashtable.data],0,SIZE calc_frequencies 4 pack_str lngbuf,lngstr3,4 hashfind dword [hashtable.data],hashtable.elements,8,lngbuf,4 ccall printf, fmt1, dword [eax+4] ccall printf, fmt1, dword [hashtable.elements] mov dword [hashtable.elements],0 dwordnset dword[hashtable.data],0,SIZE calc_frequencies 6 pack_str lngbuf,lngstr2,6 hashfind dword [hashtable.data],hashtable.elements,8,lngbuf,6 ccall printf, fmt1, dword [eax+4] ccall printf, fmt1, dword [hashtable.elements] mov dword [hashtable.elements],0 dwordnset dword[hashtable.data],0,SIZE calc_frequencies 12 pack_str lngbuf,lngstr1,12 hashfind dword [hashtable.data],hashtable.elements,8,lngbuf,12 ccall printf, fmt1, dword [eax+4] ccall printf, fmt1, dword [hashtable.elements] mov dword [hashtable.elements],0 dwordnset dword[hashtable.data],0,SIZE calc_frequencies 18 mov eax,dword[hashtable.data] pack_str lngbuf,lngstr,18 hashfind dword [hashtable.data],hashtable.elements,8,lngbuf,18 ccall printf, fmt1, dword [eax+4] ; print_strs eax,20 ccall printf, fmt1, dword [hashtable.elements] ccall printf, fmt1, dword [sdta] ; sys_write STDOUT, dword [dta], dword[sdta] xor eax,eax ret e2: ccall perror, err1 sys_exit -1 section '.data' writeable align 4 fmt db "%10s",0xa,0 fmt1 db "%u",0xa,0 fmt2 db "%p",0xa,0 fmt3 db "%c",0xa,0 fmt4 db "%s %u %u",0xa,0 fmt5 db "%u %s",0xa,0 err1 db "realloc failed",0 err2 db "index error",0 msg db "a",0 msg1 db "b",0 msg2 db "c",0 msg3 db "d",0 msg4 db "e",0 msg5 db "f",0 msg6 db "g",0 msg7 db "h",0 lngstr db "ggtattttaatttatagt",0 lngstr1 db "GGTATTTTAATT",0 lngstr2 db "GGTATT",0 lngstr3 db "GGTA",0 lngstr4 db "GGT",0 three db ">THREE" align 4 fptr dd 0 fend dd 0 dta dd 0 sdta dd 0 hashtable vector 0,0 section '.bss' writeable align 4 buf rb 256 align 4 filebuf rb fsize align 4 lngbuf rb 18 I didn't complete it yet (need to write sort routine, but that consumes just few ticks) but here is speed: bmaxa(a)maxa:~/fasm/knucleotide$ cat start.sh fasm -m 32768 knucleotide.asm gcc -m32 knucleotide.o -o knucleotide strip knucleotide bmaxa(a)maxa:~/fasm/knucleotide$ bmaxa(a)maxa:~/fasm/knucleotide$ ./start.sh flat assembler version 1.68 (32768 kilobytes memory) 4 passes, 4514 bytes. bmaxa(a)maxa:~/fasm/knucleotide$ time ./knucleotide < ~/long-input.txt 1471758 64 446535 256 47336 4096 893 138127 893 139882 125000000 real 0m32.982s user 0m32.820s sys 0m0.070s second place c++ program time: bmaxa(a)maxa:~/fasm/knucleotide$ time ./knucleotidecpp < ~/long-input.txt A 30.295 T 30.151 C 19.800 G 19.754 AA 9.177 TA 9.132 AT 9.131 TT 9.091 CA 6.002 AC 6.001 AG 5.987 GA 5.984 CT 5.971 TC 5.971 GT 5.957 TG 5.956 CC 3.917 GC 3.911 CG 3.909 GG 3.902 1471758 GGT 446535 GGTA 47336 GGTATT 893 GGTATTTTAATT 893 GGTATTTTAATTTATAGT real 0m12.301s user 0m22.840s sys 0m0.200s 22 seconds, which is faster (multithreaded 12 secs), but Im faster then C and java with most naive approach ;) but consumes 360 mb virt/144 res on my machine All in all, writing code assembler is pretty fast, as Im not versed in it (yet), but have experience in 90ies ;) Greets 	
		 From: Branimir Maksimovic on 25 Feb 2010 07:12 Branimir Maksimovic wrote: > > but consumes 360 mb virt/144 res on my machine yes c++ program is faster but consumes more memory than asm. I wasn't clear ;) > > All in all, writing code assembler is pretty fast, > as Im not versed in it (yet), but have experience > in 90ies ;) > > Greets 	
		 From: James Harris on 25 Feb 2010 08:12 On 25 Feb, 12:12, Branimir Maksimovic <bm...(a)hotmail.com> wrote: > Branimir Maksimovic wrote: > > > but consumes 360 mb virt/144 res on my machine > > yes c++ program is faster but consumes more memory > than asm. I wasn't clear ;) > > > > > All in all, writing code assembler is pretty fast, > > as Im not versed in it (yet), but have experience > > in 90ies ;) You may get some replies if you post a *much* smaller query. Your initial post was enormous! 	
		 From: Branimir Maksimovic on 25 Feb 2010 23:53 James Harris wrote: > On 25 Feb, 12:12, Branimir Maksimovic <bm...(a)hotmail.com> wrote: >> Branimir Maksimovic wrote: >> >>> but consumes 360 mb virt/144 res on my machine >> yes c++ program is faster but consumes more memory >> than asm. I wasn't clear ;) >> >> >> >>> All in all, writing code assembler is pretty fast, >>> as Im not versed in it (yet), but have experience >>> in 90ies ;) > > You may get some replies if you post a *much* smaller query. Your > initial post was enormous! He, yes, but I wanted to post complete source so anyone can compile. Perhaps someone will say it's crappy or ok ;) I didn't apply any special optimisation technique regarding hardware and got decent speed ;) Greets 	
		 From: Bobbias on 1 Mar 2010 12:28 On Feb 25, 8:53 pm, Branimir Maksimovic <bm...(a)hotmail.com> wrote: > James Harris wrote: > > On 25 Feb, 12:12, Branimir Maksimovic <bm...(a)hotmail.com> wrote: > >> Branimir Maksimovic wrote: > > >>> but consumes 360 mb virt/144 res on my machine > >> yes c++ program is faster but consumes more memory > >> than asm. I wasn't clear ;) > > >>> All in all, writing code assembler is pretty fast, > >>> as Im not versed in it (yet), but have experience > >>> in 90ies ;) > > > You may get some replies if you post a *much* smaller query. Your > > initial post was enormous! > > He, yes, but I wanted to post complete source so anyone can compile. > Perhaps someone will say it's crappy or ok ;) > I didn't apply any special optimisation technique regarding hardware and > got decent speed ;) > > Greets- Hide quoted text - > > - Show quoted text - It would have been a better idea to upload the source to something like http://pastebin.com/ and link to the page there, giving those who are interested, the ability to read the source, and those who aren't interested, the chance to avoid scrolling for 5 minutes to get to another post, lol. 
		  | 
Next
 | 
Last
 Pages: 1 2 Prev: Was NOP deprecated for AA64? NASM not disassembling... Next: The project BIEW was renamed into BEYE |