Traversing a binary

Tue Oct 5 10:38:16 CEST 2004

I've tested the opposite of this, something that avoids bignums and folds
the sum whenever it gets close to being a bignum.

The program exists in four different versions, the diffrence between the
programs are wheter or not they create subbinaries and wheter or not they
take out the 16-bit integer in a little-endian way.

I compare the results with a handcoded version of Tony's program using 992
as the parameter.

When I compute the checksum of a binary of about 64k 3000 times on a 2.4
GHz pentium 4 I get the following results:

No sub-binaries, little-endian:
BEAM: 9.72 s
HiPE: 0.98 s

Sub-binaries, little-endian:
BEAM: 9.46 s
HiPE: 0.86 s

No sub-binaries, big-endian:
BEAM: 3.37 s
HiPE: 1.25 s

Sub-binaries, big-endian:
BEAM: 4.51 s
HiPE: 1.14 s

Tony's Bignum version n=992:
BEAM: 2.96 s
HiPE: 3.72 s

Obviously the little-endian version is only better on a little-endian
machine, but it seems that the bignum approach is competitive as long as
one does not native compile.

The testruns are displayed below:

39> c(csum).
{ok,csum}
40> hipe:c(csum).
{ok,csum}
41> csum:test().
{66264,39354,39354,39354,39354,39354,970,870,1220,1130,3890}
42> c(csum).
{ok,csum}
43> hipe:c(csum).
{ok,csum}
44> csum:test().
{66048,17289,980,860,1250,1140,3720}
45> c(csum).
{ok,csum}
46> csum:test().
{66048,17289,9720,9460,3370,4510,2960}
47>

-------------- next part --------------
%%%-------------------------------------------------------------------
%%% File    : checksum.erl
%%% Author  : Javier Paris Fernandez <paris@REDACTED>
%%% Description : Ip Protocol Checksum
%%%
%%% Created :  4 Aug 2004 by Javier Paris Fernandez <paris@REDACTED>
%%%-------------------------------------------------------------------
-module(csum).

-export([test/0]).

-define(INT16MAX, 65535).
test() ->
  {ok,B} = file:read_file('csum.erl'),
  C = <<B/binary,B/binary,B/binary,B/binary>>,
  Bin = <<C/binary,C/binary,C/binary>>,
  Size=size(Bin),
  garbage_collect(),
  statistics(runtime),
  V=loop1(Bin, 3000),
  {_,R1}=statistics(runtime), 
  garbage_collect(),
  statistics(runtime),
  V=loop2(Bin, 3000),
  {_,R2}=statistics(runtime), 
  garbage_collect(),
  statistics(runtime),
  V=loop3(Bin, 3000),
  {_,R3}=statistics(runtime), 
  garbage_collect(),
  statistics(runtime),
  V=loop4(Bin, 3000),
  {_,R4}=statistics(runtime),
  garbage_collect(),
  statistics(runtime),
  V=loop5(Bin, 3000),
 {_,R5}=statistics(runtime),
  {Size,V,R1,R2,R3,R4, R5}. 

loop1(Inp, 1) ->
  checksum_1(0,Inp,0);
loop1(Inp, N) ->
  checksum_1(0,Inp,0),
  loop1(Inp, N-1).
loop2(Inp, 1) ->
  checksum_2(Inp,0);
loop2(Inp, N) ->
  checksum_2(Inp,0),
  loop2(Inp, N-1).
loop3(Inp, 1) ->
  checksum_3(0,Inp,0);
loop3(Inp, N) ->
  checksum_3(0,Inp,0),
  loop3(Inp, N-1).
loop4(Inp, 1) ->
  checksum_4(Inp,0);
loop4(Inp, N) ->
  checksum_4(Inp,0),
  loop4(Inp, N-1).
loop5(Inp, 1) ->
  checksum_5(Inp);
loop5(Inp, N) ->
  checksum_5(Inp),
  loop5(Inp, N-1).

%% uses little-endian byteordering and does not create sub-binaries
checksum_1(N,Bin,Csum) when Csum > 16#6ffffff ->
  checksum_1(N, Bin, Csum band ?INT16MAX + (Csum bsr 16));
checksum_1(N,Bin,Csum) ->
  case Bin of 
    <<_:N/binary, N1:16/integer-little,N2:16/integer-little,N3:16/integer-little,
     N4:16/integer-little,N5:16/integer-little,N6:16/integer-little,
     N7:16/integer-little,N8:16/integer-little,_/binary>> ->
      checksum_1(N+16,Bin,Csum+(N1+N2)+(N3+N4)+(N5+N6)+(N7+N8));
    <<_:N/binary, N1:16/integer-little,_/binary>> ->
      checksum_1(N+2,Bin,Csum+N1);
    <<_:N/binary, Num:8/integer-little>> ->
      checksum_1(N+1,Bin,Csum+(Num bsl 8));
    _ when Csum > ?INT16MAX ->
       Val = (Csum band ?INT16MAX + (Csum bsr 16)),
      checksum_1(N,Bin,Val);
    _ -> 
      Val = (((Csum band 16#ff) bsl 8) bor ((Csum band 16#ff00) bsr 8)),
      (bnot Val) band ?INT16MAX
 end.

%% uses little-endian byteordering and creates sub-binaries
checksum_2(Bin,Csum) when Csum > 16#6ffffff ->
  checksum_2(Bin, Csum band ?INT16MAX + (Csum bsr 16));
checksum_2(<<N1:16/integer-little, N2:16/integer-little, N3:16/integer-little, 
		  N4:16/integer-little, N5:16/integer-little, N6:16/integer-little, 
		  N7:16/integer-little, N8:16/integer-little, N9:16/integer-little,
		  N10:16/integer-little, N11:16/integer-little, N12:16/integer-little,
		  N13:16/integer-little, N14:16/integer-little, N15:16/integer-little,
		  N16:16/integer-little,
		  Rem/binary>>, Csum) ->
    checksum_2(Rem, Csum+(N1+N2)+(N3+N4)+(N5+N6)+(N7+N8)+
	     (N9+N10)+(N11+N12)+(N13+N14)+(N15+N16) );
checksum_2(<<N1:16/integer-little, Rem/binary>>,Csum) ->
   checksum_2(Rem, Csum+N1);
checksum_2(<<N1:8/integer>>,Csum) ->
  checksum_2(<<>>,Csum+(N1 bsl 8));
checksum_2(<<>> = Bin,Csum) when Csum > ?INT16MAX  ->
  NewCsum=Csum band ?INT16MAX + (Csum bsr 16),
  checksum_2(Bin,NewCsum);
checksum_2(<<>>,Csum) ->
  Val=(((Csum band 16#ff) bsl 8) bor ((Csum band 16#ff00) bsr 8)),
  (bnot Val) band ?INT16MAX.  

%% uses big-endian byteordering and does not create sub-binaries
checksum_3(N,Bin,Csum) when Csum > 16#6ffffff ->
  checksum_3(N, Bin, Csum band ?INT16MAX + (Csum bsr 16));
checksum_3(N,Bin,Csum) ->
  case Bin of 
    <<_:N/binary, N1:16/integer,N2:16/integer,N3:16/integer,
     N4:16/integer,N5:16/integer,N6:16/integer,
     N7:16/integer,N8:16/integer,_/binary>> ->
      checksum_3(N+16,Bin,Csum+N1+N2+N3+N4+N5+N6+N7+N8);
    <<_:N/binary, N1:16/integer,_/binary>> ->
      checksum_3(N+2,Bin,Csum+N1);
    <<_:N/binary, Num:8/integer>> ->
      checksum_3(N+1,Bin,Csum+(Num bsl 8));
    _ when Csum > ?INT16MAX ->
       Val = (Csum band ?INT16MAX + (Csum bsr 16)),
      checksum_3(N,Bin,Val);
    _ -> 
      (bnot Csum) band ?INT16MAX
  end.

%% uses big-endian byteordering and creates sub-binaries
checksum_4(Bin,Csum) when Csum > 16#6ffffff ->
  checksum_4(Bin, Csum band 16#ffff + (Csum bsr 16));
checksum_4(<<N1:16/integer,N2:16/integer,N3:16/integer,
     N4:16/integer,N5:16/integer,N6:16/integer,
     N7:16/integer,N8:16/integer, Rem/binary>>, 
        Csum) ->
    checksum_4(Rem, Csum+N1+N2+N3+N4+N5+N6+N7+N8);
checksum_4(<<N1:16/integer, Rem/binary>>,Csum) ->
   checksum_4(Rem, Csum+N1);
checksum_4(<<N1:8/integer>>,Csum) ->
  checksum_4(<<>>,Csum+(N1 bsl 8));
checksum_4(<<>>,Csum) when Csum > ?INT16MAX ->
  Val=(Csum band ?INT16MAX) + (Csum bsr 16),
   checksum_4(<<>>,Val);
checksum_4(<<>>,Csum) ->
  (bnot Csum) band ?INT16MAX.

checksum_5(Inp) ->
  cs_992(0, Inp, 0).

%% General loop

cs_992(I, Bin, Csum) ->
     case Bin of
        <<_:I/binary, Num:992, _/binary>> -> cs_992(I+124, Bin,Csum+Num);
        <<_:I/binary, Rest/binary>> ->
            RestBits = size(Rest)*8,
            <<Num:RestBits>> = Rest,
            cs_fold(Csum + (Num bsl (992-RestBits)))
     end.

cs_fold(Csum) when Csum > ?INT16MAX ->
     cs_fold((Csum bsr 16) + (Csum band ?INT16MAX));
cs_fold(Csum) -> (bnot Csum) band ?INT16MAX.