Traversing a binary
Per Gustafsson
per.gustafsson@REDACTED
Tue Oct 5 10:38:16 CEST 2004
I've tested the opposite of this, something that avoids bignums and folds
the sum whenever it gets close to being a bignum.
The program exists in four different versions, the diffrence between the
programs are wheter or not they create subbinaries and wheter or not they
take out the 16-bit integer in a little-endian way.
I compare the results with a handcoded version of Tony's program using 992
as the parameter.
When I compute the checksum of a binary of about 64k 3000 times on a 2.4
GHz pentium 4 I get the following results:
No sub-binaries, little-endian:
BEAM: 9.72 s
HiPE: 0.98 s
Sub-binaries, little-endian:
BEAM: 9.46 s
HiPE: 0.86 s
No sub-binaries, big-endian:
BEAM: 3.37 s
HiPE: 1.25 s
Sub-binaries, big-endian:
BEAM: 4.51 s
HiPE: 1.14 s
Tony's Bignum version n=992:
BEAM: 2.96 s
HiPE: 3.72 s
Obviously the little-endian version is only better on a little-endian
machine, but it seems that the bignum approach is competitive as long as
one does not native compile.
The testruns are displayed below:
39> c(csum).
{ok,csum}
40> hipe:c(csum).
{ok,csum}
41> csum:test().
{66264,39354,39354,39354,39354,39354,970,870,1220,1130,3890}
42> c(csum).
{ok,csum}
43> hipe:c(csum).
{ok,csum}
44> csum:test().
{66048,17289,980,860,1250,1140,3720}
45> c(csum).
{ok,csum}
46> csum:test().
{66048,17289,9720,9460,3370,4510,2960}
47>
-------------- next part --------------
%%%-------------------------------------------------------------------
%%% File : checksum.erl
%%% Author : Javier Paris Fernandez <paris@REDACTED>
%%% Description : Ip Protocol Checksum
%%%
%%% Created : 4 Aug 2004 by Javier Paris Fernandez <paris@REDACTED>
%%%-------------------------------------------------------------------
-module(csum).
-export([test/0]).
-define(INT16MAX, 65535).
test() ->
{ok,B} = file:read_file('csum.erl'),
C = <<B/binary,B/binary,B/binary,B/binary>>,
Bin = <<C/binary,C/binary,C/binary>>,
Size=size(Bin),
garbage_collect(),
statistics(runtime),
V=loop1(Bin, 3000),
{_,R1}=statistics(runtime),
garbage_collect(),
statistics(runtime),
V=loop2(Bin, 3000),
{_,R2}=statistics(runtime),
garbage_collect(),
statistics(runtime),
V=loop3(Bin, 3000),
{_,R3}=statistics(runtime),
garbage_collect(),
statistics(runtime),
V=loop4(Bin, 3000),
{_,R4}=statistics(runtime),
garbage_collect(),
statistics(runtime),
V=loop5(Bin, 3000),
{_,R5}=statistics(runtime),
{Size,V,R1,R2,R3,R4, R5}.
loop1(Inp, 1) ->
checksum_1(0,Inp,0);
loop1(Inp, N) ->
checksum_1(0,Inp,0),
loop1(Inp, N-1).
loop2(Inp, 1) ->
checksum_2(Inp,0);
loop2(Inp, N) ->
checksum_2(Inp,0),
loop2(Inp, N-1).
loop3(Inp, 1) ->
checksum_3(0,Inp,0);
loop3(Inp, N) ->
checksum_3(0,Inp,0),
loop3(Inp, N-1).
loop4(Inp, 1) ->
checksum_4(Inp,0);
loop4(Inp, N) ->
checksum_4(Inp,0),
loop4(Inp, N-1).
loop5(Inp, 1) ->
checksum_5(Inp);
loop5(Inp, N) ->
checksum_5(Inp),
loop5(Inp, N-1).
%% uses little-endian byteordering and does not create sub-binaries
checksum_1(N,Bin,Csum) when Csum > 16#6ffffff ->
checksum_1(N, Bin, Csum band ?INT16MAX + (Csum bsr 16));
checksum_1(N,Bin,Csum) ->
case Bin of
<<_:N/binary, N1:16/integer-little,N2:16/integer-little,N3:16/integer-little,
N4:16/integer-little,N5:16/integer-little,N6:16/integer-little,
N7:16/integer-little,N8:16/integer-little,_/binary>> ->
checksum_1(N+16,Bin,Csum+(N1+N2)+(N3+N4)+(N5+N6)+(N7+N8));
<<_:N/binary, N1:16/integer-little,_/binary>> ->
checksum_1(N+2,Bin,Csum+N1);
<<_:N/binary, Num:8/integer-little>> ->
checksum_1(N+1,Bin,Csum+(Num bsl 8));
_ when Csum > ?INT16MAX ->
Val = (Csum band ?INT16MAX + (Csum bsr 16)),
checksum_1(N,Bin,Val);
_ ->
Val = (((Csum band 16#ff) bsl 8) bor ((Csum band 16#ff00) bsr 8)),
(bnot Val) band ?INT16MAX
end.
%% uses little-endian byteordering and creates sub-binaries
checksum_2(Bin,Csum) when Csum > 16#6ffffff ->
checksum_2(Bin, Csum band ?INT16MAX + (Csum bsr 16));
checksum_2(<<N1:16/integer-little, N2:16/integer-little, N3:16/integer-little,
N4:16/integer-little, N5:16/integer-little, N6:16/integer-little,
N7:16/integer-little, N8:16/integer-little, N9:16/integer-little,
N10:16/integer-little, N11:16/integer-little, N12:16/integer-little,
N13:16/integer-little, N14:16/integer-little, N15:16/integer-little,
N16:16/integer-little,
Rem/binary>>, Csum) ->
checksum_2(Rem, Csum+(N1+N2)+(N3+N4)+(N5+N6)+(N7+N8)+
(N9+N10)+(N11+N12)+(N13+N14)+(N15+N16) );
checksum_2(<<N1:16/integer-little, Rem/binary>>,Csum) ->
checksum_2(Rem, Csum+N1);
checksum_2(<<N1:8/integer>>,Csum) ->
checksum_2(<<>>,Csum+(N1 bsl 8));
checksum_2(<<>> = Bin,Csum) when Csum > ?INT16MAX ->
NewCsum=Csum band ?INT16MAX + (Csum bsr 16),
checksum_2(Bin,NewCsum);
checksum_2(<<>>,Csum) ->
Val=(((Csum band 16#ff) bsl 8) bor ((Csum band 16#ff00) bsr 8)),
(bnot Val) band ?INT16MAX.
%% uses big-endian byteordering and does not create sub-binaries
checksum_3(N,Bin,Csum) when Csum > 16#6ffffff ->
checksum_3(N, Bin, Csum band ?INT16MAX + (Csum bsr 16));
checksum_3(N,Bin,Csum) ->
case Bin of
<<_:N/binary, N1:16/integer,N2:16/integer,N3:16/integer,
N4:16/integer,N5:16/integer,N6:16/integer,
N7:16/integer,N8:16/integer,_/binary>> ->
checksum_3(N+16,Bin,Csum+N1+N2+N3+N4+N5+N6+N7+N8);
<<_:N/binary, N1:16/integer,_/binary>> ->
checksum_3(N+2,Bin,Csum+N1);
<<_:N/binary, Num:8/integer>> ->
checksum_3(N+1,Bin,Csum+(Num bsl 8));
_ when Csum > ?INT16MAX ->
Val = (Csum band ?INT16MAX + (Csum bsr 16)),
checksum_3(N,Bin,Val);
_ ->
(bnot Csum) band ?INT16MAX
end.
%% uses big-endian byteordering and creates sub-binaries
checksum_4(Bin,Csum) when Csum > 16#6ffffff ->
checksum_4(Bin, Csum band 16#ffff + (Csum bsr 16));
checksum_4(<<N1:16/integer,N2:16/integer,N3:16/integer,
N4:16/integer,N5:16/integer,N6:16/integer,
N7:16/integer,N8:16/integer, Rem/binary>>,
Csum) ->
checksum_4(Rem, Csum+N1+N2+N3+N4+N5+N6+N7+N8);
checksum_4(<<N1:16/integer, Rem/binary>>,Csum) ->
checksum_4(Rem, Csum+N1);
checksum_4(<<N1:8/integer>>,Csum) ->
checksum_4(<<>>,Csum+(N1 bsl 8));
checksum_4(<<>>,Csum) when Csum > ?INT16MAX ->
Val=(Csum band ?INT16MAX) + (Csum bsr 16),
checksum_4(<<>>,Val);
checksum_4(<<>>,Csum) ->
(bnot Csum) band ?INT16MAX.
checksum_5(Inp) ->
cs_992(0, Inp, 0).
%% General loop
cs_992(I, Bin, Csum) ->
case Bin of
<<_:I/binary, Num:992, _/binary>> -> cs_992(I+124, Bin,Csum+Num);
<<_:I/binary, Rest/binary>> ->
RestBits = size(Rest)*8,
<<Num:RestBits>> = Rest,
cs_fold(Csum + (Num bsl (992-RestBits)))
end.
cs_fold(Csum) when Csum > ?INT16MAX ->
cs_fold((Csum bsr 16) + (Csum band ?INT16MAX));
cs_fold(Csum) -> (bnot Csum) band ?INT16MAX.
More information about the erlang-questions
mailing list