[erlang-questions] Fast directory walker

Frank Muller frank.muller.erl@REDACTED
Sat Dec 10 18:33:30 CET 2016


All in one file:

-module(dir).

-include_lib("kernel/include/file.hrl").

-export([ walker/1,
         walker1/1,
         walker2/1 ]).

%% original
walker(Path) ->
   %% io:format("~s~n", [Path]),
   case file:read_file_info(Path) of
       {ok, #file_info{type = regular}} ->
           1;
       _ -> %% not care about symlink for nor, assume a directory
           Children = filelib:wildcard(Path ++ "/*"),
           lists:foldl(fun(P, N) -> N + walker(P) end, 0, Children)
   end.

%% Sergej version + [raw] option
walker1(Path) ->
   case prim_file:list_dir(Path) of
       {ok,L} ->
           walker1(Path,L,0);
       _ ->
           0
   end.
walker1(Pth,["."|T],Sz) ->
   walker1(Pth,T,Sz);
walker1(Pth,[".."|T],Sz) ->
   walker1(Pth,T,Sz);
walker1(Pth,[H|T],Sz) ->
   Nm = Pth++"/"++H,
   case prim_file:read_file_info(Nm, [raw]) of
       {ok,#file_info{type = regular, size = FS}} ->
           walker1(Pth,T,Sz+FS);
       {ok,#file_info{type = directory}} ->
       case prim_file:list_dir(Nm) of
               {ok,L} ->
                   walker1(Pth, T, walker1(Nm,L,Sz));
               _ ->
                   walker1(Pth, T, Sz)
           end;
       _ ->
           walker1(Pth,T,Sz)
   end;
walker1(_,[],Sz) ->
   Sz.



%% Sergej version + Max's hint
walker2(Path) ->
   {ok, Port} = prim_file:start(),
   case prim_file:list_dir(Port, Path) of
       {ok,L} ->
           walker2(Port,Path,L,0);
       _ ->
           0
   end.

walker2(Port,Pth,["."|T],Sz) ->
   walker2(Port,Pth,T,Sz);
walker2(Port,Pth,[".."|T],Sz) ->
   walker2(Port,Pth,T,Sz);
walker2(Port,Pth,[H|T],Sz) ->
   Nm = Pth++"/"++H,
   case prim_file:read_file_info(Nm, [raw]) of
       {ok,#file_info{type = regular, size = FS}} ->
           walker2(Port,Pth,T,Sz+FS);
       {ok,#file_info{type = directory}} ->
           case prim_file:list_dir(Port,Nm) of
               {ok,L} ->
                   walker2(Port,Pth,T,walker2(Port,Nm,L,Sz));
               _ ->
                   walker2(Port,Pth, T, Sz)
           end;
       _ ->
           walker2(Port,Pth,T,Sz)
   end;
walker2(_,_,[],Sz) ->
   Sz.


1> timer:tc(fun() -> dir:walker("/usr/share") end).
{1538933,28941}
2> timer:tc(fun() -> dir:walker1("/usr/share") end).
{1492408,447632520}
3> timer:tc(fun() -> dir:walker2("/usr/share") end).
{1477578,447632520}

Getting close to 1sec. Any other ideas for improvement?

/Frank

Le sam. 10 déc. 2016 à 15:30, Mikael Pettersson <mikpelinux@REDACTED> a
écrit :

> Stanislaw Klekot writes:
>
>  > On Fri, Dec 09, 2016 at 11:15:58PM +0000, Frank Muller wrote:
>
>  > > I would like to improve the speed of my directory walker.
>
>  > >
>
>  > > walk(Dir) ->
>
>  > >     {ok, Files} = prim_file:list_dir(Dir),
>
>  > >     walk(Dir, Files).
>
>  >
>
>  > Why prim_file:list_dir() instead of file:list_dir()? The former is
>
>  > undocumented internal function.
>
>
>
> list_dir can be a very time-consuming operation, and in those cases
>
> using file:list_dir would block the single file server for everything
>
> else.  We routinely use prim_file:list_dir to reduce the negative
>
> effects of accessing large directories.
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://erlang.org/pipermail/erlang-questions/attachments/20161210/84d8b9b3/attachment.htm>


More information about the erlang-questions mailing list