[erlang-questions] Fast directory walker
Frank Muller
frank.muller.erl@REDACTED
Sat Dec 10 18:33:30 CET 2016
All in one file:
-module(dir).
-include_lib("kernel/include/file.hrl").
-export([ walker/1,
walker1/1,
walker2/1 ]).
%% original
walker(Path) ->
%% io:format("~s~n", [Path]),
case file:read_file_info(Path) of
{ok, #file_info{type = regular}} ->
1;
_ -> %% not care about symlink for nor, assume a directory
Children = filelib:wildcard(Path ++ "/*"),
lists:foldl(fun(P, N) -> N + walker(P) end, 0, Children)
end.
%% Sergej version + [raw] option
walker1(Path) ->
case prim_file:list_dir(Path) of
{ok,L} ->
walker1(Path,L,0);
_ ->
0
end.
walker1(Pth,["."|T],Sz) ->
walker1(Pth,T,Sz);
walker1(Pth,[".."|T],Sz) ->
walker1(Pth,T,Sz);
walker1(Pth,[H|T],Sz) ->
Nm = Pth++"/"++H,
case prim_file:read_file_info(Nm, [raw]) of
{ok,#file_info{type = regular, size = FS}} ->
walker1(Pth,T,Sz+FS);
{ok,#file_info{type = directory}} ->
case prim_file:list_dir(Nm) of
{ok,L} ->
walker1(Pth, T, walker1(Nm,L,Sz));
_ ->
walker1(Pth, T, Sz)
end;
_ ->
walker1(Pth,T,Sz)
end;
walker1(_,[],Sz) ->
Sz.
%% Sergej version + Max's hint
walker2(Path) ->
{ok, Port} = prim_file:start(),
case prim_file:list_dir(Port, Path) of
{ok,L} ->
walker2(Port,Path,L,0);
_ ->
0
end.
walker2(Port,Pth,["."|T],Sz) ->
walker2(Port,Pth,T,Sz);
walker2(Port,Pth,[".."|T],Sz) ->
walker2(Port,Pth,T,Sz);
walker2(Port,Pth,[H|T],Sz) ->
Nm = Pth++"/"++H,
case prim_file:read_file_info(Nm, [raw]) of
{ok,#file_info{type = regular, size = FS}} ->
walker2(Port,Pth,T,Sz+FS);
{ok,#file_info{type = directory}} ->
case prim_file:list_dir(Port,Nm) of
{ok,L} ->
walker2(Port,Pth,T,walker2(Port,Nm,L,Sz));
_ ->
walker2(Port,Pth, T, Sz)
end;
_ ->
walker2(Port,Pth,T,Sz)
end;
walker2(_,_,[],Sz) ->
Sz.
1> timer:tc(fun() -> dir:walker("/usr/share") end).
{1538933,28941}
2> timer:tc(fun() -> dir:walker1("/usr/share") end).
{1492408,447632520}
3> timer:tc(fun() -> dir:walker2("/usr/share") end).
{1477578,447632520}
Getting close to 1sec. Any other ideas for improvement?
/Frank
Le sam. 10 déc. 2016 à 15:30, Mikael Pettersson <mikpelinux@REDACTED> a
écrit :
> Stanislaw Klekot writes:
>
> > On Fri, Dec 09, 2016 at 11:15:58PM +0000, Frank Muller wrote:
>
> > > I would like to improve the speed of my directory walker.
>
> > >
>
> > > walk(Dir) ->
>
> > > {ok, Files} = prim_file:list_dir(Dir),
>
> > > walk(Dir, Files).
>
> >
>
> > Why prim_file:list_dir() instead of file:list_dir()? The former is
>
> > undocumented internal function.
>
>
>
> list_dir can be a very time-consuming operation, and in those cases
>
> using file:list_dir would block the single file server for everything
>
> else. We routinely use prim_file:list_dir to reduce the negative
>
> effects of accessing large directories.
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://erlang.org/pipermail/erlang-questions/attachments/20161210/84d8b9b3/attachment.htm>
More information about the erlang-questions
mailing list