Traversing a binary

Mon Oct 4 17:01:32 CEST 2004

Erik Stenman <erik.stenman@REDACTED> writes:

> I'm no Tcp expert, but are you sure?
> Is it not defined as the 16-bit 1's complement sum?
> I.e. something like:
> '+'(A,B) ->
>     S = A + B,
>     S band 16#ffff + (S bsr 16).
> 
> One problem with your code is that it generates bignums
> (heap-allocated large integers),
> but if it is the 16-bit 1's complement sum, then you don't need large
> integers.

Maybe bignums could be some fun?

In RFC 1071 ("Computing the internet checksum") they have this tip:

   (C)  Parallel Summation

        On machines that have word-sizes that are multiples of 16 bits,
        it is possible to develop even more efficient implementations.
        Because addition is associative, we do not have to sum the
        integers in the order they appear in the message.  Instead we
        can add them in "parallel" by exploiting the larger word size.

        To compute the checksum in parallel, simply do a 1's complement
        addition of the message using the native word size of the
        machine.  For example, on a 32-bit machine we can add 4 bytes at
        a time: [A,B,C,D]+'... When the sum has been computed, we "fold"
        the long sum into 16 bits by adding the 16-bit segments.  Each
        16-bit addition may produce new end-around carries that must be
        added.

Since the Erlang machine is arbitrary-precision one could load the
whole packet into a bignum and call that the "machine word" checksum,
then repeatedly "fold" it in half until it's down to 16 bits.

At first glance it isn't obvious to me if that's a good idea, but the
interesting thing is that it offloads the real work onto the runtime
system (bignum arithmetic) which is already coded in C.

I did a mock-up implementation that seems to compare well with the
non-HIPE-compiled Erlang versions on some inputs. (The overall results
seeming to vary quite a bit between inputs). I include it below as
checksum_4 more because I think it's cute than that I think it's the
right way. :-)

-module(tc).
-export([checksum_1/2,t/0,c/3,checksum_2/2,checksum_3/2,checksum_4/1]).
-compile(inline).

t() ->
    {ok,B} = file:read_file("tc.erl"),
    C = <<B/binary,B/binary,B/binary,B/binary,B/binary>>,
    D = <<C/binary,C/binary,C/binary,C/binary,C/binary>>,
    E = <<D/binary,D/binary,D/binary,D/binary,D/binary,
	 D/binary,D/binary,D/binary,D/binary,D/binary,
	 D/binary,D/binary,D/binary,D/binary,D/binary,
	 D/binary,D/binary,D/binary,D/binary,D/binary,
	 D/binary,D/binary,42:8/integer>>,
    %% checksum_4 crashes on this:
%%    F = <<E/binary,E/binary,E/binary,E/binary,E/binary>>,
    %% and performs better on this:
%%    F = D,
    F = E,
    io:format("N=~p~n", [size(F)]),
    garbage_collect(),
    io:format("M1: ~w~n",[timer:tc(tc,checksum_1,[F,0])]),
    garbage_collect(),
    io:format("M2: ~w~n",[timer:tc(tc,checksum_2,[F,0])]),
    garbage_collect(),
    io:format("M3: ~w~n",[timer:tc(tc,checksum_3,[F,0])]),
    garbage_collect(),
    io:format("M4: ~w~n",[timer:tc(tc,checksum_4,[F])]),
    garbage_collect(),
    io:format("M5: ~w~n",[timer:tc(tc,c,[0,F,0])]).

checksum_1(<<Num:16/integer, Remainder/binary>>, Csum) ->
    checksum_1(Remainder, Csum + Num);
checksum_1(<<Num:8/integer>>,CSum) -> 
    checksum_1(<<>>,CSum+(Num bsl 8));
checksum_1(_, Csum) when Csum > 16#ffff ->
    checksum_1(<<>>,Csum band 16#ffff + (Csum bsr 16));
checksum_1(_,CSum) -> CSum.

checksum_2(Bin = <<N1:16/integer, N2:16/integer, N3:16/integer, 
	N4:16/integer, N5:16/integer, N6:16/integer, 
	N7:16/integer, N8:16/integer, Rem/binary>>, 
	Csum) when size(Bin)>=16 ->
    checksum_2(Rem, Csum+N1+N2+N3+N4+N5+N6+N7+N8);
checksum_2(Bin,Csum) ->
    checksum_1(Bin,Csum).

checksum_3(Bin = <<N1:16/integer, N2:16/integer, N3:16/integer, 
	N4:16/integer, N5:16/integer, N6:16/integer, 
	N7:16/integer, N8:16/integer, Rem/binary>>, 
	Csum) when size(Bin)>=16 ->
    checksum_3(Rem, '+'(Csum,'+'(N1,'+'(N2,'+'(N3,'+'(N4,'+'(N5,'+'(N6,'+'(N7,N8)))))))));
checksum_3(Bin,Csum) ->
    c(0,Bin,Csum).

checksum_4(Bin0) ->
    %% Pad the binary to even size if needed.
    Bin = case size(Bin0) rem 2 of
	      0 -> Bin0;
	      1 -> <<Bin0/binary, 0>>
	  end,
    Bits = size(Bin) * 8,
    %% C is the "machine word" (arbitrary precision) checksum
    <<C:Bits>> = Bin,
    checksum_4(mul16(Bits, 16), C).

checksum_4(16, C) -> C;
checksum_4(B0, C0) when B0 > 16 ->
    B1 = B0 div 2,
    Mask = (1 bsl B1) -1,
    High = C0 bsr B1,
    Low  = C0 band Mask,
    C1 = High + Low,
    checksum_4(B1, C1 band Mask + (C1 bsr B1)).

%% Return the smallest multiple of 16 that is greater than N.
%% (Where's the logarithm function?)
mul16(N, Acc) when Acc > N -> Acc;
mul16(N, Acc) -> mul16(N, Acc*16).

c(N,Bin,Csum) ->
    case Bin of 
	<<_:N/binary, Num:16/integer,_/binary>> ->
	    c(N+2,Bin,'+'(Csum,Num));
        <<_:N/binary, Num:8/integer>> ->
	    '+'(Csum,Num bsl 8);
	_ -> Csum
    end.

'+'(A,B) ->
    S = A + B,
    S band 16#ffff + (S bsr 16).