The Computer Language Shootout
Kenneth Johansson
ken@REDACTED
Fri Mar 17 04:22:54 CET 2006
http://shootout.alioth.debian.org/gp4/benchmark.php?test=knucleotide&lang=all
I did an implementation in erlang for the knucleotide.
And while the code is much shorter than C and fortran it's larger than
ruby and python. But since this is my first real try at erlang I'm sure
someone here can do significant improvement.
Also the speed is a problem it's on my computer 8 times slower than the
python version.
-------------- next part --------------
-module(knucleotide).
-export([main/0]).
%% turn characters a..z to uppercase and strip out any newline
to_upper_no_nl(Str) ->
to_upper_no_nl(Str, []).
to_upper_no_nl([C|Cs], Acc) when C >= $a, C =< $z ->
to_upper_no_nl(Cs, [C-($a-$A)| Acc]);
to_upper_no_nl([C|Cs], Acc) when C == $\n ->
to_upper_no_nl(Cs, Acc);
to_upper_no_nl([C|Cs], Acc) ->
to_upper_no_nl(Cs, [C | Acc]);
to_upper_no_nl([], Acc) ->
lists:reverse(Acc).
% Read in lines from stdin and discard them until a line starting with
% >THREE are reached.
seek_three() ->
Line = io:get_line(''),
case string:str(Line,">THREE Homo sapiens frequency") of
0 -> seek_three();
_ -> i_dont_care
end.
%% Read in lines from stdin until eof.
%% Lines are converted to upper case and put into a single list.
dna_seq( Seq ) ->
case io:get_line('') of
eof -> Seq;
Line -> Uline = to_upper_no_nl(Line),
dna_seq(Seq ++ Uline)
end.
dna_seq() ->
seek_three(),
dna_seq([]).
%% Create a dictinary with the dna sequence as key and the number of times
%% it was in the original sequence as value.
%% Len is the number of basepairs to use as the key.
gen_freq(Dna, Len) ->
gen_freq(Dna, Len, dict:new(),0,length(Dna)).
gen_freq([], _, Frequency, Acc, _) ->
{Frequency,Acc};
gen_freq(Dna, Len, Frequency, Acc, Dec) when Dec >= Len ->
{Key,_} = lists:split(Len, Dna),
Freq = dict:update_counter(Key, 1, Frequency),
[_ | T]=Dna,
gen_freq(T, Len, Freq, Acc +1, Dec -1);
gen_freq(_, _, Frequency, Acc, _) ->
{Frequency,Acc}.
%% Print the frequency table
printf({Frequency, Tot}) ->
printf(lists:reverse(lists:keysort(2,dict:to_list(Frequency))),Tot).
printf([],_) ->
io:fwrite("\n");
printf([H |T],Tot)->
{Nucleoid,Cnt}=H,
io:fwrite("~s ~.3f\n",[Nucleoid,(Cnt*100.0)/Tot]),
printf(T,Tot).
write_count(Dna, Pattern) ->
{ Freq ,_} = gen_freq(Dna, length(Pattern)),
case dict:find(Pattern,Freq) of
{ok,Value} -> io:fwrite("~w\t~s\n",[Value,Pattern]);
error -> io:fwrite("~w\t~s\n",[0,Pattern])
end.
main() ->
Seq =dna_seq(),
printf(gen_freq(Seq,1)),
printf(gen_freq(Seq,2)),
write_count(Seq,"GGT"),
write_count(Seq,"GGTA"),
write_count(Seq,"GGTATT"),
write_count(Seq,"GGTATTTTAATT"),
write_count(Seq,"GGTATTTTAATTTATAGT"),
halt(0).
More information about the erlang-questions
mailing list