[erlang-questions] Hipe and Binary - Bitstring

Oliver Bollmann oliver.bollmann@REDACTED
Thu Mar 28 01:02:44 CET 2019


Hi Kostis,

attached the missing module!

The fact is: the factor is stable, 3 seconds with native, 20 minutes 
without native,
the result of the function is the same!

Between the 1,000 steps i do not get any traces messages on native!

If i trace the gc, the last lines, at the end, with native are:

2019-03-28 00:08:47.834 gc_minor_start [{wordsize,0},{old_heap_block_size,0},{heap_block_size,410034027},{mbuf_size,105},{recent_size,285921217},{stack_size,30},{old_heap_size,0},{heap_size,410033946},{bin_vheap_size,4539999},{bin_vheap_block_size,7427891},{bin_old_vheap_size,0},{bin_old_vheap_block_size,1090210}]
2019-03-28 00:08:51.778 gc_minor_end [{wordsize,124112863},{old_heap_block_size,590448998},{heap_block_size,79467343},{mbuf_size,0},{recent_size,120},{stack_size,30},{old_heap_size,285921068},{heap_size,120},{bin_vheap_size,2},{bin_vheap_block_size,3713945},{bin_old_vheap_size,3736072},{bin_old_vheap_block_size,1090210}]
2019-03-28 00:08:53.716 gc_minor_start [{wordsize,0},{old_heap_block_size,590448998},{heap_block_size,79467343},{mbuf_size,0},{recent_size,120},{stack_size,60},{old_heap_size,285921068},{heap_size,79467276},{bin_vheap_size,651374},{bin_vheap_block_size,3713945},{bin_old_vheap_size,3736072},{bin_old_vheap_block_size,1090210}]
2019-03-28 00:08:53.716 gc_minor_end [{wordsize,0},{old_heap_block_size,590448998},{heap_block_size,79467343},{mbuf_size,0},{recent_size,120},{stack_size,60},{old_heap_size,285921068},{heap_size,79467276},{bin_vheap_size,651374},{bin_vheap_block_size,3713945},{bin_old_vheap_size,3736072},{bin_old_vheap_block_size,1090210}]
2019-03-28 00:08:53.716 gc_major_start [{wordsize,0},{old_heap_block_size,590448998},{heap_block_size,79467343},{mbuf_size,0},{recent_size,120},{stack_size,60},{old_heap_size,285921068},{heap_size,79467276},{bin_vheap_size,651374},{bin_vheap_block_size,3713945},{bin_old_vheap_size,3736072},{bin_old_vheap_block_size,1090210}]
2019-03-28 00:08:57.587 gc_major_end [{wordsize,79467130},{old_heap_block_size,0},{heap_block_size,410034027},{mbuf_size,0},{recent_size,285921226},{stack_size,60},{old_heap_size,0},{heap_size,285921226},{bin_vheap_size,3736074},{bin_vheap_block_size,6009163},{bin_old_vheap_size,0},{bin_old_vheap_block_size,1090210}]
2019-03-28 00:09:03.228 gc_minor_start [{wordsize,0},{old_heap_block_size,0},{heap_block_size,410034027},{mbuf_size,15972166},{recent_size,285921226},{stack_size,19},{old_heap_size,0},{heap_size,402266882},{bin_vheap_size,4689732},{bin_vheap_block_size,6009163},{bin_old_vheap_size,0},{bin_old_vheap_block_size,1090210}]
2019-03-28 00:09:07.676 gc_minor_end [{wordsize,116345800},{old_heap_block_size,590448998},{heap_block_size,79467343},{mbuf_size,0},{recent_size,15972177},{stack_size,19},{old_heap_size,285921071},{heap_size,15972177},{bin_vheap_size,0},{bin_vheap_block_size,3004581},{bin_old_vheap_size,3736072},{bin_old_vheap_block_size,1090210}]


On not using native i get between the 1,000 steps:

...
2019-03-28 00:54:45.408 gc_minor_start [{wordsize,10970},{old_heap_block_size,410034027},{heap_block_size,17731},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287516968},{heap_size,11001},{bin_vheap_size,31252},{bin_vheap_block_size,46422},{bin_old_vheap_size,5156641},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:46.123 gc_minor_end [{wordsize,0},{old_heap_block_size,410034027},{heap_block_size,28690},{mbuf_size,0},{recent_size,10980},{stack_size,38},{old_heap_size,287516989},{heap_size,10980},{bin_vheap_size,0},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:46.135 gc_minor_start [{wordsize,15627},{old_heap_block_size,410034027},{heap_block_size,28690},{mbuf_size,0},{recent_size,10980},{stack_size,38},{old_heap_size,287516989},{heap_size,22186},{bin_vheap_size,62502},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:46.838 gc_minor_end [{wordsize,22165},{old_heap_block_size,410034027},{heap_block_size,28690},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287516989},{heap_size,21},{bin_vheap_size,31252},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:46.838 gc_minor_start [{wordsize,0},{old_heap_block_size,410034027},{heap_block_size,28690},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287516989},{heap_size,161},{bin_vheap_size,46878},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:47.549 gc_minor_end [{wordsize,137},{old_heap_block_size,410034027},{heap_block_size,233},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287516992},{heap_size,21},{bin_vheap_size,15626},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:47.946 gc_minor_start [{wordsize,15627},{old_heap_block_size,410034027},{heap_block_size,233},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287516992},{heap_size,135},{bin_vheap_size,62502},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:48.313 gc_minor_end [{wordsize,114},{old_heap_block_size,410034027},{heap_block_size,17731},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287516992},{heap_size,21},{bin_vheap_size,31252},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:48.313 gc_minor_start [{wordsize,0},{old_heap_block_size,410034027},{heap_block_size,17731},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287516992},{heap_size,155},{bin_vheap_size,46878},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:49.030 gc_minor_end [{wordsize,131},{old_heap_block_size,410034027},{heap_block_size,233},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287516995},{heap_size,21},{bin_vheap_size,15626},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:49.788 gc_minor_start [{wordsize,15627},{old_heap_block_size,410034027},{heap_block_size,233},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287516995},{heap_size,135},{bin_vheap_size,62502},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:49.789 gc_minor_end [{wordsize,114},{old_heap_block_size,410034027},{heap_block_size,17731},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287516995},{heap_size,21},{bin_vheap_size,31252},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:49.789 gc_minor_start [{wordsize,0},{old_heap_block_size,410034027},{heap_block_size,17731},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287516995},{heap_size,161},{bin_vheap_size,46878},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:50.508 gc_minor_end [{wordsize,137},{old_heap_block_size,410034027},{heap_block_size,233},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287516998},{heap_size,21},{bin_vheap_size,15626},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:50.520 gc_minor_start [{wordsize,15627},{old_heap_block_size,410034027},{heap_block_size,233},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287516998},{heap_size,135},{bin_vheap_size,62502},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:51.271 gc_minor_end [{wordsize,114},{old_heap_block_size,410034027},{heap_block_size,17731},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287516998},{heap_size,21},{bin_vheap_size,31252},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:51.271 gc_minor_start [{wordsize,0},{old_heap_block_size,410034027},{heap_block_size,17731},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287516998},{heap_size,155},{bin_vheap_size,46878},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:51.981 gc_minor_end [{wordsize,131},{old_heap_block_size,410034027},{heap_block_size,233},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287517001},{heap_size,21},{bin_vheap_size,15626},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:51.993 gc_minor_start [{wordsize,15627},{old_heap_block_size,410034027},{heap_block_size,233},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287517001},{heap_size,135},{bin_vheap_size,62503},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:52.738 gc_minor_end [{wordsize,114},{old_heap_block_size,410034027},{heap_block_size,17731},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287517001},{heap_size,21},{bin_vheap_size,31252},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:52.739 gc_minor_start [{wordsize,0},{old_heap_block_size,410034027},{heap_block_size,17731},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287517001},{heap_size,155},{bin_vheap_size,46878},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
...

Oliver

On 27.03.19 21:30, Kostis Sagonas wrote:
> On 3/27/19 1:04 PM, John Högberg wrote:
>> Hi Oliver,
>>
>> I've tried to reproduce this discrepancy on my machine, but I can 
>> only see a modest difference on latest OTP 21 (the results are in 
>> microseconds):
>>
>> Erlang/OTP 21 [erts-10.3.1] [source] [64-bit] [smp:24:24] 
>> [ds:24:24:10] [async-threads:1] [hipe]
>>
>>
>> Eshell V10.3.1  (abort with ^G)
>>
>> 1> c(t, []).
>>
>> {ok,t}
>>
>> 2> t:bench(one).
>>
>> 15957304
>>
>> 3> t:bench(union).
>>
>> 559470
>>
>> 4> c(t, [native]).
>>
>> {ok,t}
>>
>> 5> t:bench(one).
>>
>> 3611371
>>
>> 6> t:bench(union).
>>
>> 501871
>>
>>
>> I've attached the source code I used for this test, am I missing 
>> something?
>
>
> We are obviously missing the benchmark program that Oliver used to get 
> numbers.  But the "400x faster" figure cannot possibly be right.
>
> Personally, I cannot see how one could turn function union/2, which is 
> a one-liner with two bitstring matches and one construction, into a 
> benchmark.  So, it's not surprising that the performance difference 
> there is very small.
>
> On the other hand, I would not call the performance difference between 
> BEAM and HiPE that you observed "modest".  Four times faster execution 
> is IMO something that deserves a better adjective.
>
> Kostis
>
>
>> On Wed, 2019-03-27 at 10:09 +0100, Oliver Bollmann wrote:
>>>
>>> I use, with binaries like <<1:1000000>>,
>>>
>>> one(F,<<Size:64/unsigned,Bitmap:Size/bitstring,_/bitstring>>) ->
>>>    one(F,Bitmap,0,[]).
>>> one(F, <<0:1,R/bitstring>>,N,Acc) ->
>>>    one(F,R,N +1,Acc);
>>> one(F, <<1:1,R/bitstring>>,N,Acc) ->
>>>    one(F,R,N +1, [F(N) |Acc]);
>>> one(_, <<>>,_,Acc) ->Acc.
>>> union(<<Size:64/unsigned,L:Size/unsigned,P/bitstring>>,
>>> <<Size:64/unsigned,R:Size/unsigned,_/bitstring>>) ->
>>>    <<Size:64/unsigned, (L bor R):Size/unsigned,P/bitstring>>.
>>>
>>> and call this functions 1,000,000 times, this takes for 1,000 calls 
>>> about 20 minutes,
>>>
>>> if i compile with native -compile([native,{hipe, o2}])it takes 3 
>>> seconds for 1,000 calls, so it is about 400x faster !!
>>>
>>> OS: OSX
>>>
>>> What is the secret?
>>>
>>> -- 
>>> Grüße
>>> Oliver Bollmann
>>> _______________________________________________
>>> erlang-questions mailing list
>>> erlang-questions@REDACTED <mailto:erlang-questions@REDACTED>
>>> http://erlang.org/mailman/listinfo/erlang-questions
>>>
>>
>> _______________________________________________
>> erlang-questions mailing list
>> erlang-questions@REDACTED
>> http://erlang.org/mailman/listinfo/erlang-questions
>>
>
>
-- 
Grüße
Oliver Bollmann

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://erlang.org/pipermail/erlang-questions/attachments/20190328/74a80890/attachment.htm>
-------------- next part --------------
-module(bitmap_hipe_test).

%-compile([native]).

%Erlang 20.3
% native on
%bitmap_hipe_test:run(1000000,100,1000,true,false).
%{3634346,done}
% native off
%bitmap_hipe_test:run(1000000,100,1000,true,false).
%{13435975,done}
%
% factor 3,7

%% API
-export([run/0,run/5]).

run() -> run(1000000,100,1000,true,true).

run(Size,Count,Loop,UseTC,UseProf) ->
  % first make count bitmaps
  Bitmaps = make_bitmaps(Size,Count),
  % function F
  F = fun(N) -> case get(N) of _ -> N end end,
  FRun = fun() -> run_(F,Bitmaps,Bitmaps,Loop) end,
  profiling(UseProf,start),
  Res = case UseTC of true -> timer:tc(FRun); _-> FRun() end,
  profiling(UseProf,stop),
  Res.

profiling(false,_) -> ok;
profiling(true,start) -> consider_profiling();
profiling(true,stop) -> profile_output().


run_(_,_,_,0) -> done;
run_(F,[],OrgBitmaps,Loop) ->
  run_(F,OrgBitmaps,OrgBitmaps,Loop-1);
run_(F,[Bitmap1,Bitmap2|Bitmaps],OrgBitmaps,Loop) ->
  one(F,Bitmap1),
  union(Bitmap1,Bitmap2),
  run_(F,[Bitmap2|Bitmaps],OrgBitmaps,Loop-1);
run_(F,[Bitmap|Bitmaps],OrgBitmaps,Loop) ->
  one(F,Bitmap),
  run_(F,Bitmaps,OrgBitmaps,Loop-1).

one(F,<<Size:64/unsigned, Bitmap:Size/bitstring, _/bitstring>>) ->
  one(F,Bitmap,0,[]).
one(F, <<0:1, R/bitstring>>, N, Acc) ->
  one(F, R, N + 1, Acc);
one(F, <<1:1, R/bitstring>>, N, Acc) ->
  one(F, R, N + 1, [F(N) | Acc]);
one(_, <<>>, _, Acc) -> Acc.

union(<<Size:64/unsigned, L:Size/unsigned, P/bitstring>>,
    <<Size:64/unsigned, R:Size/unsigned, _/bitstring>>) ->
  <<Size:64/unsigned, (L bor R):Size/unsigned, P/bitstring>>.

make_bitmaps(Size,Count) ->
  make_bitmaps(Size,Count,[]).

make_bitmaps(_Size,0,Acc) -> Acc;
make_bitmaps(Size,Count,Acc) ->
  make_bitmaps(Size,Count-1,[random_bitmap(Size)|Acc]).

random_bitmap(Size) ->
  % we set 20% of the bitmap, about
  Bytes = bytes(Size) - 8,
  Bits = Bytes * 8,
  Elements = lists:usort([0|[begin R = rand:uniform(Bits-1), erlang:phash2(R,2)*R end || _ <- lists:seq(1, 100000)]]),
  from_list(Elements,[{size,Size}]).

new([{size, Size}]) when Size > 0->
  %% bytes gives us the total bytes of a bitmap, including the
  %% 8 byte size prefix, so we need to substract that.
  Bytes = bytes(Size) - 8,
  Bits = Bytes * 8,
  <<Size:64/unsigned, 0:Bits/unsigned>>.

bytes(Size) ->
  ceiling(Size / 8) + 8.

ceiling(X) when X < 0 ->
  trunc(X);

ceiling(X) ->
  T = trunc(X),
  case X - T == 0 of
    true -> T;
    false -> T + 1
  end.

size(<<Size:64/unsigned, _/binary>>) ->
  Size.

from_list(Elements, [{size, Size}]) ->
  R = from_list_(0, Elements, <<>>),
  Bytes = bytes(Size) - 8,
  Bits = Bytes * 8,
  Missing = Bits - bit_size(R),
  <<Size:64/unsigned, R/bitstring, 0:Missing>>.

from_list_(Pos, [X | R], Acc) when X > Pos ->
  Missing = X - Pos,
  from_list_(X + 1, R, <<Acc/bitstring, 0:Missing, 1:1>>);

from_list_(Pos, [Pos | R], Acc) ->
  from_list_(Pos + 1, R, <<Acc/bitstring, 1:1>>);

from_list_(_, [], R) ->
  R.

profile_output() ->
  eprof:stop_profiling(),
  %eprof:log("procs.profile"),
  %eprof:analyze(procs),
  eprof:log("total.profile"),
  eprof:analyze(total).

consider_profiling() ->
  {ok, Pid} = case eprof:start() of
                {error,{already_started,Pid}} -> {ok,Pid};
                {ok, Pid} -> {ok, Pid}
              end,
  eprof:start_profiling([self()]).%processes() -- [Pid]).


More information about the erlang-questions mailing list