[erlang-questions] Hipe and Binary - Bitstring
Oliver Bollmann
oliver.bollmann@REDACTED
Thu Mar 28 01:02:44 CET 2019
Hi Kostis,
attached the missing module!
The fact is: the factor is stable, 3 seconds with native, 20 minutes
without native,
the result of the function is the same!
Between the 1,000 steps i do not get any traces messages on native!
If i trace the gc, the last lines, at the end, with native are:
2019-03-28 00:08:47.834 gc_minor_start [{wordsize,0},{old_heap_block_size,0},{heap_block_size,410034027},{mbuf_size,105},{recent_size,285921217},{stack_size,30},{old_heap_size,0},{heap_size,410033946},{bin_vheap_size,4539999},{bin_vheap_block_size,7427891},{bin_old_vheap_size,0},{bin_old_vheap_block_size,1090210}]
2019-03-28 00:08:51.778 gc_minor_end [{wordsize,124112863},{old_heap_block_size,590448998},{heap_block_size,79467343},{mbuf_size,0},{recent_size,120},{stack_size,30},{old_heap_size,285921068},{heap_size,120},{bin_vheap_size,2},{bin_vheap_block_size,3713945},{bin_old_vheap_size,3736072},{bin_old_vheap_block_size,1090210}]
2019-03-28 00:08:53.716 gc_minor_start [{wordsize,0},{old_heap_block_size,590448998},{heap_block_size,79467343},{mbuf_size,0},{recent_size,120},{stack_size,60},{old_heap_size,285921068},{heap_size,79467276},{bin_vheap_size,651374},{bin_vheap_block_size,3713945},{bin_old_vheap_size,3736072},{bin_old_vheap_block_size,1090210}]
2019-03-28 00:08:53.716 gc_minor_end [{wordsize,0},{old_heap_block_size,590448998},{heap_block_size,79467343},{mbuf_size,0},{recent_size,120},{stack_size,60},{old_heap_size,285921068},{heap_size,79467276},{bin_vheap_size,651374},{bin_vheap_block_size,3713945},{bin_old_vheap_size,3736072},{bin_old_vheap_block_size,1090210}]
2019-03-28 00:08:53.716 gc_major_start [{wordsize,0},{old_heap_block_size,590448998},{heap_block_size,79467343},{mbuf_size,0},{recent_size,120},{stack_size,60},{old_heap_size,285921068},{heap_size,79467276},{bin_vheap_size,651374},{bin_vheap_block_size,3713945},{bin_old_vheap_size,3736072},{bin_old_vheap_block_size,1090210}]
2019-03-28 00:08:57.587 gc_major_end [{wordsize,79467130},{old_heap_block_size,0},{heap_block_size,410034027},{mbuf_size,0},{recent_size,285921226},{stack_size,60},{old_heap_size,0},{heap_size,285921226},{bin_vheap_size,3736074},{bin_vheap_block_size,6009163},{bin_old_vheap_size,0},{bin_old_vheap_block_size,1090210}]
2019-03-28 00:09:03.228 gc_minor_start [{wordsize,0},{old_heap_block_size,0},{heap_block_size,410034027},{mbuf_size,15972166},{recent_size,285921226},{stack_size,19},{old_heap_size,0},{heap_size,402266882},{bin_vheap_size,4689732},{bin_vheap_block_size,6009163},{bin_old_vheap_size,0},{bin_old_vheap_block_size,1090210}]
2019-03-28 00:09:07.676 gc_minor_end [{wordsize,116345800},{old_heap_block_size,590448998},{heap_block_size,79467343},{mbuf_size,0},{recent_size,15972177},{stack_size,19},{old_heap_size,285921071},{heap_size,15972177},{bin_vheap_size,0},{bin_vheap_block_size,3004581},{bin_old_vheap_size,3736072},{bin_old_vheap_block_size,1090210}]
On not using native i get between the 1,000 steps:
...
2019-03-28 00:54:45.408 gc_minor_start [{wordsize,10970},{old_heap_block_size,410034027},{heap_block_size,17731},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287516968},{heap_size,11001},{bin_vheap_size,31252},{bin_vheap_block_size,46422},{bin_old_vheap_size,5156641},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:46.123 gc_minor_end [{wordsize,0},{old_heap_block_size,410034027},{heap_block_size,28690},{mbuf_size,0},{recent_size,10980},{stack_size,38},{old_heap_size,287516989},{heap_size,10980},{bin_vheap_size,0},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:46.135 gc_minor_start [{wordsize,15627},{old_heap_block_size,410034027},{heap_block_size,28690},{mbuf_size,0},{recent_size,10980},{stack_size,38},{old_heap_size,287516989},{heap_size,22186},{bin_vheap_size,62502},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:46.838 gc_minor_end [{wordsize,22165},{old_heap_block_size,410034027},{heap_block_size,28690},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287516989},{heap_size,21},{bin_vheap_size,31252},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:46.838 gc_minor_start [{wordsize,0},{old_heap_block_size,410034027},{heap_block_size,28690},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287516989},{heap_size,161},{bin_vheap_size,46878},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:47.549 gc_minor_end [{wordsize,137},{old_heap_block_size,410034027},{heap_block_size,233},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287516992},{heap_size,21},{bin_vheap_size,15626},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:47.946 gc_minor_start [{wordsize,15627},{old_heap_block_size,410034027},{heap_block_size,233},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287516992},{heap_size,135},{bin_vheap_size,62502},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:48.313 gc_minor_end [{wordsize,114},{old_heap_block_size,410034027},{heap_block_size,17731},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287516992},{heap_size,21},{bin_vheap_size,31252},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:48.313 gc_minor_start [{wordsize,0},{old_heap_block_size,410034027},{heap_block_size,17731},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287516992},{heap_size,155},{bin_vheap_size,46878},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:49.030 gc_minor_end [{wordsize,131},{old_heap_block_size,410034027},{heap_block_size,233},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287516995},{heap_size,21},{bin_vheap_size,15626},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:49.788 gc_minor_start [{wordsize,15627},{old_heap_block_size,410034027},{heap_block_size,233},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287516995},{heap_size,135},{bin_vheap_size,62502},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:49.789 gc_minor_end [{wordsize,114},{old_heap_block_size,410034027},{heap_block_size,17731},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287516995},{heap_size,21},{bin_vheap_size,31252},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:49.789 gc_minor_start [{wordsize,0},{old_heap_block_size,410034027},{heap_block_size,17731},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287516995},{heap_size,161},{bin_vheap_size,46878},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:50.508 gc_minor_end [{wordsize,137},{old_heap_block_size,410034027},{heap_block_size,233},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287516998},{heap_size,21},{bin_vheap_size,15626},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:50.520 gc_minor_start [{wordsize,15627},{old_heap_block_size,410034027},{heap_block_size,233},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287516998},{heap_size,135},{bin_vheap_size,62502},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:51.271 gc_minor_end [{wordsize,114},{old_heap_block_size,410034027},{heap_block_size,17731},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287516998},{heap_size,21},{bin_vheap_size,31252},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:51.271 gc_minor_start [{wordsize,0},{old_heap_block_size,410034027},{heap_block_size,17731},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287516998},{heap_size,155},{bin_vheap_size,46878},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:51.981 gc_minor_end [{wordsize,131},{old_heap_block_size,410034027},{heap_block_size,233},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287517001},{heap_size,21},{bin_vheap_size,15626},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:51.993 gc_minor_start [{wordsize,15627},{old_heap_block_size,410034027},{heap_block_size,233},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287517001},{heap_size,135},{bin_vheap_size,62503},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:52.738 gc_minor_end [{wordsize,114},{old_heap_block_size,410034027},{heap_block_size,17731},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287517001},{heap_size,21},{bin_vheap_size,31252},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
2019-03-28 00:54:52.739 gc_minor_start [{wordsize,0},{old_heap_block_size,410034027},{heap_block_size,17731},{mbuf_size,0},{recent_size,21},{stack_size,38},{old_heap_size,287517001},{heap_size,155},{bin_vheap_size,46878},{bin_vheap_block_size,46422},{bin_old_vheap_size,5187893},{bin_old_vheap_block_size,5708181}]
...
Oliver
On 27.03.19 21:30, Kostis Sagonas wrote:
> On 3/27/19 1:04 PM, John Högberg wrote:
>> Hi Oliver,
>>
>> I've tried to reproduce this discrepancy on my machine, but I can
>> only see a modest difference on latest OTP 21 (the results are in
>> microseconds):
>>
>> Erlang/OTP 21 [erts-10.3.1] [source] [64-bit] [smp:24:24]
>> [ds:24:24:10] [async-threads:1] [hipe]
>>
>>
>> Eshell V10.3.1 (abort with ^G)
>>
>> 1> c(t, []).
>>
>> {ok,t}
>>
>> 2> t:bench(one).
>>
>> 15957304
>>
>> 3> t:bench(union).
>>
>> 559470
>>
>> 4> c(t, [native]).
>>
>> {ok,t}
>>
>> 5> t:bench(one).
>>
>> 3611371
>>
>> 6> t:bench(union).
>>
>> 501871
>>
>>
>> I've attached the source code I used for this test, am I missing
>> something?
>
>
> We are obviously missing the benchmark program that Oliver used to get
> numbers. But the "400x faster" figure cannot possibly be right.
>
> Personally, I cannot see how one could turn function union/2, which is
> a one-liner with two bitstring matches and one construction, into a
> benchmark. So, it's not surprising that the performance difference
> there is very small.
>
> On the other hand, I would not call the performance difference between
> BEAM and HiPE that you observed "modest". Four times faster execution
> is IMO something that deserves a better adjective.
>
> Kostis
>
>
>> On Wed, 2019-03-27 at 10:09 +0100, Oliver Bollmann wrote:
>>>
>>> I use, with binaries like <<1:1000000>>,
>>>
>>> one(F,<<Size:64/unsigned,Bitmap:Size/bitstring,_/bitstring>>) ->
>>> one(F,Bitmap,0,[]).
>>> one(F, <<0:1,R/bitstring>>,N,Acc) ->
>>> one(F,R,N +1,Acc);
>>> one(F, <<1:1,R/bitstring>>,N,Acc) ->
>>> one(F,R,N +1, [F(N) |Acc]);
>>> one(_, <<>>,_,Acc) ->Acc.
>>> union(<<Size:64/unsigned,L:Size/unsigned,P/bitstring>>,
>>> <<Size:64/unsigned,R:Size/unsigned,_/bitstring>>) ->
>>> <<Size:64/unsigned, (L bor R):Size/unsigned,P/bitstring>>.
>>>
>>> and call this functions 1,000,000 times, this takes for 1,000 calls
>>> about 20 minutes,
>>>
>>> if i compile with native -compile([native,{hipe, o2}])it takes 3
>>> seconds for 1,000 calls, so it is about 400x faster !!
>>>
>>> OS: OSX
>>>
>>> What is the secret?
>>>
>>> --
>>> Grüße
>>> Oliver Bollmann
>>> _______________________________________________
>>> erlang-questions mailing list
>>> erlang-questions@REDACTED <mailto:erlang-questions@REDACTED>
>>> http://erlang.org/mailman/listinfo/erlang-questions
>>>
>>
>> _______________________________________________
>> erlang-questions mailing list
>> erlang-questions@REDACTED
>> http://erlang.org/mailman/listinfo/erlang-questions
>>
>
>
--
Grüße
Oliver Bollmann
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://erlang.org/pipermail/erlang-questions/attachments/20190328/74a80890/attachment.htm>
-------------- next part --------------
-module(bitmap_hipe_test).
%-compile([native]).
%Erlang 20.3
% native on
%bitmap_hipe_test:run(1000000,100,1000,true,false).
%{3634346,done}
% native off
%bitmap_hipe_test:run(1000000,100,1000,true,false).
%{13435975,done}
%
% factor 3,7
%% API
-export([run/0,run/5]).
run() -> run(1000000,100,1000,true,true).
run(Size,Count,Loop,UseTC,UseProf) ->
% first make count bitmaps
Bitmaps = make_bitmaps(Size,Count),
% function F
F = fun(N) -> case get(N) of _ -> N end end,
FRun = fun() -> run_(F,Bitmaps,Bitmaps,Loop) end,
profiling(UseProf,start),
Res = case UseTC of true -> timer:tc(FRun); _-> FRun() end,
profiling(UseProf,stop),
Res.
profiling(false,_) -> ok;
profiling(true,start) -> consider_profiling();
profiling(true,stop) -> profile_output().
run_(_,_,_,0) -> done;
run_(F,[],OrgBitmaps,Loop) ->
run_(F,OrgBitmaps,OrgBitmaps,Loop-1);
run_(F,[Bitmap1,Bitmap2|Bitmaps],OrgBitmaps,Loop) ->
one(F,Bitmap1),
union(Bitmap1,Bitmap2),
run_(F,[Bitmap2|Bitmaps],OrgBitmaps,Loop-1);
run_(F,[Bitmap|Bitmaps],OrgBitmaps,Loop) ->
one(F,Bitmap),
run_(F,Bitmaps,OrgBitmaps,Loop-1).
one(F,<<Size:64/unsigned, Bitmap:Size/bitstring, _/bitstring>>) ->
one(F,Bitmap,0,[]).
one(F, <<0:1, R/bitstring>>, N, Acc) ->
one(F, R, N + 1, Acc);
one(F, <<1:1, R/bitstring>>, N, Acc) ->
one(F, R, N + 1, [F(N) | Acc]);
one(_, <<>>, _, Acc) -> Acc.
union(<<Size:64/unsigned, L:Size/unsigned, P/bitstring>>,
<<Size:64/unsigned, R:Size/unsigned, _/bitstring>>) ->
<<Size:64/unsigned, (L bor R):Size/unsigned, P/bitstring>>.
make_bitmaps(Size,Count) ->
make_bitmaps(Size,Count,[]).
make_bitmaps(_Size,0,Acc) -> Acc;
make_bitmaps(Size,Count,Acc) ->
make_bitmaps(Size,Count-1,[random_bitmap(Size)|Acc]).
random_bitmap(Size) ->
% we set 20% of the bitmap, about
Bytes = bytes(Size) - 8,
Bits = Bytes * 8,
Elements = lists:usort([0|[begin R = rand:uniform(Bits-1), erlang:phash2(R,2)*R end || _ <- lists:seq(1, 100000)]]),
from_list(Elements,[{size,Size}]).
new([{size, Size}]) when Size > 0->
%% bytes gives us the total bytes of a bitmap, including the
%% 8 byte size prefix, so we need to substract that.
Bytes = bytes(Size) - 8,
Bits = Bytes * 8,
<<Size:64/unsigned, 0:Bits/unsigned>>.
bytes(Size) ->
ceiling(Size / 8) + 8.
ceiling(X) when X < 0 ->
trunc(X);
ceiling(X) ->
T = trunc(X),
case X - T == 0 of
true -> T;
false -> T + 1
end.
size(<<Size:64/unsigned, _/binary>>) ->
Size.
from_list(Elements, [{size, Size}]) ->
R = from_list_(0, Elements, <<>>),
Bytes = bytes(Size) - 8,
Bits = Bytes * 8,
Missing = Bits - bit_size(R),
<<Size:64/unsigned, R/bitstring, 0:Missing>>.
from_list_(Pos, [X | R], Acc) when X > Pos ->
Missing = X - Pos,
from_list_(X + 1, R, <<Acc/bitstring, 0:Missing, 1:1>>);
from_list_(Pos, [Pos | R], Acc) ->
from_list_(Pos + 1, R, <<Acc/bitstring, 1:1>>);
from_list_(_, [], R) ->
R.
profile_output() ->
eprof:stop_profiling(),
%eprof:log("procs.profile"),
%eprof:analyze(procs),
eprof:log("total.profile"),
eprof:analyze(total).
consider_profiling() ->
{ok, Pid} = case eprof:start() of
{error,{already_started,Pid}} -> {ok,Pid};
{ok, Pid} -> {ok, Pid}
end,
eprof:start_profiling([self()]).%processes() -- [Pid]).
More information about the erlang-questions
mailing list