line oriented input, is there an alternative to the io module?

mats cronqvist mats.cronqvist@REDACTED
Wed Sep 29 16:39:20 CEST 2004


On Fri, 17 Sep 2004 14:26:57 +0200, Bengt Kleberg  
<bengt.kleberg@REDACTED> wrote:


> if i want to (quickly) read the integers in a file, line by line, is  
> there a faster alternative to
> io:fread( '', "~d" )


   using file:read can be quite a bit faster.
   i wrote something called bio (for block io) that has an interface  
similar to lists:foldl;

   bio:string(Filename, Function, Accumulator)
   will open Filename and call Function for each line in the file.
   Function is a fun with arguments (String,Acc) that returns NewAcc,
   String is a line of text.

   if FN is a textfile that contains 10,000 lines with one integer on each  
line, this;

bio:string(FN,fun(T,A)->[list_to_integer(T)|A] end,[])

   is about 10 times faster than something similar using io:fread.

   mats


%%%-------------------------------------------------------------------
%%% File    : bio.erl
%%% Author  : Mats Cronqvist <etxmacr@REDACTED>
%%% Description : block io
%%%
%%% Created : 13 Mar 2003 by Mats Cronqvist <etxmacr@REDACTED>
%%%-------------------------------------------------------------------
-module(bio).
-export([string/3,term/3]).

-define(BLOCK, 8092).

string(FN, Fun, Acc) ->
     Bfun = fun(_, O) -> {ok, lists:reverse(O)} end,
     in(FN, Fun, Acc, Bfun).
term(FN, Fun, Acc) ->
     Bfun = fun(C, O) -> to_term(C, lists:reverse([10|O])) end,
     in(FN, Fun, Acc, Bfun).

in(FN, Fun, Acc, Bfun) ->
     case file:open(FN, [read, raw]) of
         {ok, FD} ->
             R = in(FD, file:read(FD, ?BLOCK), Fun, Bfun, {[], [], Acc}),
             file:close(FD),
             R;
         {error,R} -> exit({open_error, R, FN})
     end.
in(FD, eof, Fun, Bfun, {Cont, [], Acc}) -> Acc;
in(FD, eof, Fun, Bfun, {Cont, O, Acc}) ->
     case Bfun(Cont, O) of
         {ok, Term} -> Fun(Term, Acc);
         {cont, NCont} -> exit({incomplete_input, NCont})
     end;
in(FD, {ok, List}, Fun, Bfun, State) ->
     in(FD, file:read(FD, ?BLOCK), Fun, Bfun, do(List, Fun, Bfun, State)).

do([], Fun, Bfun, State) -> State;
do([13,10|R], Fun, Bfun, {Cont, O, Acc}) ->     %dos...
     do([10|R], Fun, Bfun, {Cont, O, Acc});
do([10|R], Fun, Bfun, {Cont, O, Acc}) ->
     case Bfun(Cont, O) of
         {cont, NCont} -> do(R, Fun, Bfun, {NCont, [], Acc});
         {ok, Term} -> do(R, Fun, Bfun, {[], [], Fun(Term, Acc)})
     end;
do([H|R], Fun, Bfun, {Cont, O, Acc}) ->
     do(R, Fun, Bfun, {Cont, [H|O], Acc}).

to_term(Cont, Str) ->
     case catch erl_scan:tokens(Cont, Str, 1) of
         {done, {ok, Toks, _}, []} ->
             case catch erl_parse:parse_term(Toks) of
                 {ok, Term} -> {ok, Term};
                 {error, R} -> exit({parser_failed, R, Str})
             end;
         {more, Ncont} -> {cont, Ncont};
         _ -> exit({scanner_failed, Str})
     end.



More information about the erlang-questions mailing list