[erlang-questions] Speeding up text file I/O

Tue Jan 8 11:51:48 CET 2008

* Dmitrii Dimandt:

> """start quote""
>
> This is incorrect. My unrolled function is twice as fast as the 
> one-liner when it is compiled with the native flag (I didn't translate 
> the flag part - my bad, D.) The number of lines has been carefully 
> measured through various tests.

Aha.  Why is the native flag so underdocumented?

Anyway, the original, unrolled find_8 version together with this code
(which avoids splitting the buffer unnecessarily) is a tiny bit faster
on my machine:

%% file_reader( File, Len ) -> Handle
%% Handle = { NextF, binary(), Pos } | eof
%% NextF = fun() -> Handle
file_reader( File, Len ) ->    file_reader( File, Len, << >> ).
file_reader( File, LenI, BufferB ) ->
   NextF = fun() ->
       case file:read( File, LenI ) of
           { ok, DataB } -> file_reader( File, LenI, DataB );
           eof -> eof
       end
   end,
   { NextF, BufferB, 0 }.

get_line( { NextF, BufferB, Pos } ) ->
    case find_8(BufferB, 10, Pos) of
	not_found ->
	    case BufferB of
		<< _:Pos/bytes, RestB/bytes >> -> 
		    case NextF() of
			eof -> {eof, RestB};
			Handl_1 ->
			    { Handl_2, LineB } = get_line( Handl_1 ),
			    { Handl_2, << RestB/bytes, LineB/bytes >> }
		    end
	    end;
	P -> LineSize = P - Pos, 
	     case BufferB of
		 << _:Pos/bytes, LineB:LineSize/bytes, _/bytes >> ->
		     {{ NextF, BufferB, P + 1}, LineB}
	     end
    end.

(This code is only lightly tested, there could be a fatal bug).