[erlang-questions] Strange performance degradation in dict when it's storing lists

Park, Sungjin <>
Mon Aug 22 10:35:29 CEST 2016


I observed a strange performance degradation in dict.  Let me share the
code I used in the test first.


    -module(data).
    -export([start_link/1, get/1, get_concurrent/1]).
    -export([init/0]).

    start_link() ->
      proc_lib:start_link(?MODULE, init, []).

    init() ->
      register(?MODULE, self()),
      % Initialize data:
      % 0 => [],
      % 1 => [1],
      % 2 => [1,2]
      % ...
      Dict = lists:foldl(
        fun (Key, Dict0) -> dict:store(Key, value(Key), Dict0) end,
        dict:new(), lists:seq(0, 255)
      ),
      proc_lib:init_ack({ok, self()}),
      loop(Dict).

    value(Key) ->
        lists:seq(1, Key).

    loop(Dict) ->
      receive
        {get, Key, From} ->
          case dict:find(Key, Dict) of
            {ok, Value} -> From ! Value;
            error -> From ! undefined
          end;
        _ ->
          ok
      end,
      loop(Dict).

    get(Key) ->
      ?MODULE ! {get, Key, self()},
      receive
        Value -> Value
      end.

    %% Run get N times and return average execution time.
    -spec get_concurrent(integer()) -> number().
    get_concurrent(N) ->
      Profiler = self(),
      Workers = [
         prof_lib:spawn_link(
           fun () ->
             Key = erlang:system_time() rem 255,
             Result = timer:tc(?MODULE, get, [Key]),
             Profiler ! {self(), Result}
           end
         ) || _ <- lists:seq(1, N)
       ],
       Ts = receive_all(Workers, []),
       lists:sum(Ts) / length(Ts).

    receive_all([], Ts) ->
      Ts;
    receive_all(Workers, Ts) ->
      receive
        {Worker, {T, _}} -> receive_all(lists:delete(Worker, Workers), [T |
Ts])
      end.


When I ran the test in the shell, I got.

    1> data:start_link().
    {ok, <0.6497.46>}
    2> timer:tc(data, get, [5]).
    {23,[1,2,3,4,5]}


I could get a value in 23 microseconds and expected something not too
slower results for concurrent get but,

    3> data:get_concurrent(100000).
    19442.828


The value 19442.828 microseconds seemed to be too big a value so I tested
with different values such as large binaries and tuples.  And this time the
same get_concurrent(100000) gave me 200 something microseconds.

I also tried the same with an ets instead of a dict, but there was no such
performance degradation by the value type.

-- 
Park, Sungjin
-------------------------------------------------------------------------------------------------------------------
Peculiar travel suggestions are dancing lessons from god.
  -- The Books of Bokonon
-------------------------------------------------------------------------------------------------------------------
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://erlang.org/pipermail/erlang-questions/attachments/20160822/9cdb89f0/attachment.html>


More information about the erlang-questions mailing list