The Computer Language Shootout

Kenneth Johansson <>
Fri Mar 17 04:22:54 CET 2006


http://shootout.alioth.debian.org/gp4/benchmark.php?test=knucleotide&lang=all

I did an implementation in erlang for the knucleotide.  

And while the code is much shorter than C and fortran it's larger than
ruby and python. But since this is my first real try at erlang I'm sure
someone here can do significant improvement.

Also the speed is a problem it's on my computer 8 times slower than the
python version.

-------------- next part --------------
-module(knucleotide).
-export([main/0]).

%% turn characters a..z to uppercase and strip out any newline
to_upper_no_nl(Str) ->
    to_upper_no_nl(Str, []).

to_upper_no_nl([C|Cs], Acc) when C >= $a, C =< $z ->
    to_upper_no_nl(Cs, [C-($a-$A)| Acc]);

to_upper_no_nl([C|Cs], Acc) when C == $\n ->
   to_upper_no_nl(Cs,  Acc);

to_upper_no_nl([C|Cs], Acc) ->
    to_upper_no_nl(Cs, [C | Acc]);

to_upper_no_nl([], Acc) ->
    lists:reverse(Acc).

% Read in lines from stdin and discard them until a line starting with
% >THREE are reached. 
seek_three() ->
    Line = io:get_line(''),
    case string:str(Line,">THREE Homo sapiens frequency") of
	0 -> seek_three();
	_ -> i_dont_care
    end.
  
%% Read in lines from stdin until eof.
%% Lines are converted to upper case and put into a single list. 
dna_seq( Seq ) ->
    case io:get_line('') of
        eof  -> Seq;
        Line -> Uline = to_upper_no_nl(Line),
		dna_seq(Seq ++ Uline)
    end.
dna_seq() ->
    seek_three(),
    dna_seq([]).

%% Create a dictinary with the dna sequence as key and the number of times
%% it was in the original sequence as value.
%% Len is the number of basepairs to use as the key.
gen_freq(Dna, Len) ->
    gen_freq(Dna, Len, dict:new(),0,length(Dna)).

gen_freq([], _, Frequency, Acc, _) ->
    {Frequency,Acc};

gen_freq(Dna, Len, Frequency, Acc, Dec) when Dec >= Len ->
    {Key,_} = lists:split(Len, Dna), 
    Freq = dict:update_counter(Key, 1, Frequency),
    [_ | T]=Dna,
    gen_freq(T, Len, Freq, Acc +1, Dec -1);

gen_freq(_, _, Frequency, Acc, _) ->
     {Frequency,Acc}.


%% Print the frequency table    
printf({Frequency, Tot}) ->
    printf(lists:reverse(lists:keysort(2,dict:to_list(Frequency))),Tot).
    
printf([],_) -> 
    io:fwrite("\n");

printf([H |T],Tot)->
    {Nucleoid,Cnt}=H,
    io:fwrite("~s ~.3f\n",[Nucleoid,(Cnt*100.0)/Tot]),
    printf(T,Tot).

write_count(Dna, Pattern) ->
    { Freq ,_} = gen_freq(Dna, length(Pattern)),
    case dict:find(Pattern,Freq) of
	{ok,Value} -> io:fwrite("~w\t~s\n",[Value,Pattern]);
	error      -> io:fwrite("~w\t~s\n",[0,Pattern])
    end.

main() ->
    Seq =dna_seq(),
    printf(gen_freq(Seq,1)),
    printf(gen_freq(Seq,2)),
    write_count(Seq,"GGT"),
    write_count(Seq,"GGTA"),
    write_count(Seq,"GGTATT"),
    write_count(Seq,"GGTATTTTAATT"),
    write_count(Seq,"GGTATTTTAATTTATAGT"),
    halt(0).


More information about the erlang-questions mailing list