The Computer Language Shootout

Kenneth Johansson ken@REDACTED
Sat Mar 18 01:44:37 CET 2006


On Fri, 2006-03-17 at 11:48 +0100, Vlad Dumitrescu wrote:
> Hi,
> Also, an even better result (~half the time) can be obtained by using
> binaries instead of lists:

Nice I copied that and did some small line reducing changes If no one
has any other suggestions I submit this version some time next week. 

-------------- next part --------------
-module(knucleotide).
-export([main/0]).
%% turn characters a..z to uppercase and strip out any newline
to_upper_no_nl(Str) ->                               to_upper_no_nl(Str, []).
to_upper_no_nl([C|Cs], Acc) when C >= $a, C =< $z -> to_upper_no_nl(Cs, [C-($a-$A)| Acc]);
to_upper_no_nl([C|Cs], Acc) when C == $\n ->         to_upper_no_nl(Cs,  Acc);
to_upper_no_nl([C|Cs], Acc) ->                       to_upper_no_nl(Cs, [C | Acc]);
to_upper_no_nl([], Acc) ->                           lists:reverse(Acc).

% Read in lines from stdin and discard them until a line starting with
% >THREE are reached. 
seek_three() ->
    case io:get_line('') of
	">TH" ++ _ -> found;
	eof        -> erlang:error(eof);
	_          -> seek_three()
    end.
  
%% Read in lines from stdin until eof.
%% Lines are converted to upper case and put into a single list. 
dna_seq()      -> seek_three(), dna_seq([]).
dna_seq( Seq ) ->
    case io:get_line('') of
        eof  -> list_to_binary(lists:reverse(Seq));
        Line -> Uline = to_upper_no_nl(Line),
                dna_seq([Uline|Seq])
    end.

%% Create a dictinary with the dna sequence as key and the number of times
%% it was in the original sequence as value.
%% Len is the number of basepairs to use as the key.
gen_freq(Dna, Len) ->
    gen_freq(Dna, Len, dict:new(),0,size(Dna)).
gen_freq(<<>>, _, Frequency, Acc, _) ->
    {Frequency,Acc};
gen_freq(Dna, Len, Frequency, Acc, Dec) when Dec >= Len ->
    <<Key:Len/binary, _/binary>> = Dna, 
    Freq = dict:update_counter(Key, 1, Frequency),
    <<_, T/binary>> = Dna,
    gen_freq(T, Len, Freq, Acc +1, Dec -1);
gen_freq(_, _, Frequency, Acc, _) ->
     {Frequency,Acc}.

%% Print the frequency table    
printf({Frequency, Tot}) ->
    printf(lists:reverse(lists:keysort(2,dict:to_list(Frequency))),Tot).
printf([],_) -> 
    io:fwrite("\n");
printf([H |T],Tot)->
    {Nucleoid,Cnt}=H,
    io:fwrite("~s ~.3f\n",[binary_to_list(Nucleoid),(Cnt*100.0)/Tot]),
    printf(T,Tot).

write_count(Dna, Pattern) ->
    { Freq ,_} = gen_freq(Dna, size(Pattern)),
    case dict:find(Pattern,Freq) of
        {ok,Value} -> io:fwrite("~w\t~s\n",[Value,binary_to_list(Pattern)]);
        error      -> io:fwrite("~w\t~s\n",[0,binary_to_list(Pattern)])
    end.

main() ->
    Seq = dna_seq(),
    lists:foreach(fun(H) -> printf(gen_freq(Seq,H)) end, [1,2]),
    lists:foreach(fun(H) -> write_count(Seq,H) end, [<<"GGT">>,<<"GGTA">>,<<"GGTATT">>,<<"GGTATTTTAATT">>,<<"GGTATTTTAATTTATAGT">>]),
    halt(0).


More information about the erlang-questions mailing list