[erlang-questions] Fast directory walker

Dan Gudmundsson dangud@REDACTED
Sat Dec 10 19:46:34 CET 2016


erl +A0 to turn off async filé access?
Or isn’t that used for list dir and filé info?

Den lör 10 dec 2016 18:33Frank Muller <frank.muller.erl@REDACTED> skrev:

> All in one file:
>
> -module(dir).
>
> -include_lib("kernel/include/file.hrl").
>
> -export([ walker/1,
>          walker1/1,
>          walker2/1 ]).
>
> %% original
> walker(Path) ->
>    %% io:format("~s~n", [Path]),
>    case file:read_file_info(Path) of
>
>        {ok, #file_info{type = regular}} ->
>            1;
>        _ -> %% not care about symlink for nor, assume a directory
>
>            Children = filelib:wildcard(Path ++ "/*"),
>            lists:foldl(fun(P, N) -> N + walker(P) end, 0, Children)
>    end.
>
> %% Sergej version + [raw] option
>
> walker1(Path) ->
>    case prim_file:list_dir(Path) of
>        {ok,L} ->
>            walker1(Path,L,0);
>        _ ->
>            0
>    end.
> walker1(Pth,["."|T],Sz) ->
>    walker1(Pth,T,Sz);
> walker1(Pth,[".."|T],Sz) ->
>    walker1(Pth,T,Sz);
> walker1(Pth,[H|T],Sz) ->
>    Nm = Pth++"/"++H,
>    case prim_file:read_file_info(Nm, [raw]) of
>
>        {ok,#file_info{type = regular, size = FS}} ->
>            walker1(Pth,T,Sz+FS);
>        {ok,#file_info{type = directory}} ->
>        case prim_file:list_dir(Nm) of
>                {ok,L} ->
>                    walker1(Pth, T, walker1(Nm,L,Sz));
>                _ ->
>                    walker1(Pth, T, Sz)
>            end;
>        _ ->
>            walker1(Pth,T,Sz)
>    end;
> walker1(_,[],Sz) ->
>    Sz.
>
>
>
> %% Sergej version + Max's hint
> walker2(Path) ->
>    {ok, Port} = prim_file:start(),
>    case prim_file:list_dir(Port, Path) of
>        {ok,L} ->
>            walker2(Port,Path,L,0);
>        _ ->
>            0
>    end.
>
> walker2(Port,Pth,["."|T],Sz) ->
>    walker2(Port,Pth,T,Sz);
> walker2(Port,Pth,[".."|T],Sz) ->
>    walker2(Port,Pth,T,Sz);
> walker2(Port,Pth,[H|T],Sz) ->
>
>    Nm = Pth++"/"++H,
>    case prim_file:read_file_info(Nm, [raw]) of
>
>        {ok,#file_info{type = regular, size = FS}} ->
>            walker2(Port,Pth,T,Sz+FS);
>
>        {ok,#file_info{type = directory}} ->
>            case prim_file:list_dir(Port,Nm) of
>                {ok,L} ->
>                    walker2(Port,Pth,T,walker2(Port,Nm,L,Sz));
>                _ ->
>                    walker2(Port,Pth, T, Sz)
>            end;
>        _ ->
>            walker2(Port,Pth,T,Sz)
>    end;
> walker2(_,_,[],Sz) ->
>    Sz.
>
>
> 1> timer:tc(fun() -> dir:walker("/usr/share") end).
> {1538933,28941}
> 2> timer:tc(fun() -> dir:walker1("/usr/share") end).
> {1492408,447632520}
> 3> timer:tc(fun() -> dir:walker2("/usr/share") end).
> {1477578,447632520}
>
> Getting close to 1sec. Any other ideas for improvement?
>
>
> /Frank
>
> Le sam. 10 déc. 2016 à 15:30, Mikael Pettersson <mikpelinux@REDACTED> a
> écrit :
>
> Stanislaw Klekot writes:
>
>  > On Fri, Dec 09, 2016 at 11:15:58PM +0000, Frank Muller wrote:
>
>  > > I would like to improve the speed of my directory walker.
>
>  > >
>
>  > > walk(Dir) ->
>
>  > >     {ok, Files} = prim_file:list_dir(Dir),
>
>  > >     walk(Dir, Files).
>
>  >
>
>  > Why prim_file:list_dir() instead of file:list_dir()? The former is
>
>  > undocumented internal function.
>
>
>
> list_dir can be a very time-consuming operation, and in those cases
>
> using file:list_dir would block the single file server for everything
>
> else.  We routinely use prim_file:list_dir to reduce the negative
>
> effects of accessing large directories.
>
> _______________________________________________
> erlang-questions mailing list
> erlang-questions@REDACTED
> http://erlang.org/mailman/listinfo/erlang-questions
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://erlang.org/pipermail/erlang-questions/attachments/20161210/bc106ff2/attachment.htm>


More information about the erlang-questions mailing list