base64 encoding using the new binary syntax

Bjorn Gustavsson bjorn@REDACTED
Tue Sep 26 10:47:41 CEST 2000


The bit syntax as currently implemented always copies binaries
in construction expressions such as:

	<<Acc/binary, P:6, Q:6, R:6, S:6>>

It is actually faster to construct lists of binaries and use
list_to_binary/1 to combine them. For an example, see my
version of the mime module below.

/Bjorn

> Hi,
> 
> About a year ago there was some discussion on the erlang list (or
> maybe it was an internal Ericsson discussion) about base64 encoding
> (as used in MIME) using the binary syntax.
> 
> Since the binary syntax is here now, maybe people are interested in
> exploring its performance characteristics. Here's something to start
> the discussion. Performance of the attached module when en(de)coding
> rfc2045.txt (71kb) on my 167MHz ultrasparc:
> 
>              Erlang bitsyntax        unix 'mimencode' program
>    -------------------------------------------------------------------
>    encoding: 1.07 s                  0.045s
>    decoding: 10.2 s                  0.035s
> 
> The 20x slowdown on encoding looks familiar ;-). I haven't
> investigated why the decoding is so much slower, I'd guess there's an
> equivalent but uglier and faster approach.
> 
> Matthias


-- 
Björn Gustavsson            Ericsson Utvecklings AB
bjorn@REDACTED      ÄT2/UAB/F/P
			    BOX 1505
+46 8 727 56 87 	    125 25 Älvsjö


-module(mime2).
-export([pack/1, unpack/1]).

pack(List) when list(List) ->
    pack(list_to_binary(List));

pack(Bin) when binary(Bin) ->
    acc_pack(Bin, []).

acc_pack(<<Bin:54/binary, T/binary>>, Acc) ->
    acc_pack(T, [Acc,enc(Bin),$\n]);
acc_pack(Bin, Acc) ->
    list_to_binary([Acc,enc(Bin),$\n]).

unpack(Bin) when binary(Bin) ->
    list_to_binary(dec(Bin)).

%% base-64 encoding: take 6 bits at a time from the head of the binary
%% and emit it as 8 bit characters.

enc(Bin) ->
    list_to_binary(enc1(Bin)).

enc1(<<A:6, B:6, C:6, D:6, T/binary>>) ->
    AA = int_to_b64(A),
    BB = int_to_b64(B),
    CC = int_to_b64(C),
    DD = int_to_b64(D),
    [AA, BB, CC, DD| enc1(T)];

enc1(<<A:6, B:6, C:4>>) -> 
    AA = int_to_b64(A),
    BB = int_to_b64(B),
    CC = int_to_b64(C bsl 2),
    [AA, BB, CC, $=];

enc1(<<A:6, B:2>>) -> 
    AA = int_to_b64(A),
    BB = int_to_b64(B bsl 4),
    [AA, BB, $=, $=];

enc1(<<>>) -> [].

%% Decoding. Works by consuming groups of 4 input characters to create
%% a group of 3 output characters, with the three special-cases for
%% end-of-input first:

dec(Bin) ->
    dec(512, Bin, [], []).

dec(0, Bin, List, Acc) ->
    dec(512, Bin, List, [list_to_binary(Acc)]);
dec(N, <<>>, [], Acc) -> Acc;
dec(N, <<>>, [R,Q,P], Acc) -> [Acc|<<P:6, Q:6, (R bsr 2):4>>];
dec(N, <<>>, [Q,P], Acc) -> [Acc|<<P:6, (Q bsr 4):2>>];
dec(N, Bin, [S,R,Q,P], Acc) ->
    dec(N-1, Bin, [], [Acc|<<P:6, Q:6, R:6, S:6>>]);
dec(N, <<A:8, T/binary>>, List, Acc) ->
    case b64_to_int(A) of
 	ignore -> dec(N, T, List, Acc);
 	Sixbits -> dec(N, T, [Sixbits|List], Acc)
    end.

b64_to_int(X) when X >= $A, X =< $Z -> X - $A;
b64_to_int(X) when X >= $a, X =< $z -> X - $a + 26;
b64_to_int(X) when X >= $0, X =< $9 -> X - $0 + 52;
b64_to_int($+) -> 62;
b64_to_int($/) -> 63;
b64_to_int(_) -> ignore.

int_to_b64(X) when X >= 0, X =< 25 -> X + $A;
int_to_b64(X) when X >= 26, X =< 51 -> X - 26 + $a;
int_to_b64(X) when X >= 52, X =< 61 -> X - 52 + $0;
int_to_b64(62) -> $+;
int_to_b64(63) -> $/.



Matthias.Lang@REDACTED writes:

> Hi,
> 
> About a year ago there was some discussion on the erlang list (or
> maybe it was an internal Ericsson discussion) about base64 encoding
> (as used in MIME) using the binary syntax.
> 
> Since the binary syntax is here now, maybe people are interested in
> exploring its performance characteristics. Here's something to start
> the discussion. Performance of the attached module when en(de)coding
> rfc2045.txt (71kb) on my 167MHz ultrasparc:
> 
>              Erlang bitsyntax        unix 'mimencode' program
>    -------------------------------------------------------------------
>    encoding: 1.07 s                  0.045s
>    decoding: 10.2 s                  0.035s
> 
> The 20x slowdown on encoding looks familiar ;-). I haven't
> investigated why the decoding is so much slower, I'd guess there's an
> equivalent but uglier and faster approach.
> 
> Matthias
> 
> (I appreciate that a port program or a linked-in driver is a better
> approach if speed is crucial. That's not the point here.)
> 
> %% Base 64 encoder/decoder. See RFC2045
> -module(mime).
> -export([pack/1, unpack/1]).
> 
> pack(List) when list(List) ->
>     pack(list_to_binary(List));
> 
> pack(Bin) when binary(Bin) ->
>     acc_pack(Bin, <<>>).
> 
> acc_pack(<<Bin:54/binary, T/binary>>, Acc) ->
> 	acc_pack(T, <<Acc/binary, (enc(Bin))/binary, "\n">>);
> 
> acc_pack(Bin, Acc) ->
>     <<Acc/binary, (enc(Bin))/binary, "\n">>.
> 
> unpack(Bin) when binary(Bin) ->
>     dec(Bin, [], <<>>).
> 
> %% base-64 encoding: take 6 bits at a time from the head of the binary
> %% and emit it as 8 bit characters.
> 
> enc(<<A:6, B:6, C:6, D:6, T/binary>>) ->
>     AA = int_to_b64(A),
>     BB = int_to_b64(B),
>     CC = int_to_b64(C),
>     DD = int_to_b64(D),
>     <<AA:8, BB:8, CC:8, DD:8, (enc(T))/binary>>;
> 
> enc(<<A:6, B:6, C:4>>) -> 
>     AA = int_to_b64(A),
>     BB = int_to_b64(B),
>     CC = int_to_b64(C bsl 2),
>     <<AA:8, BB:8, CC:8, $=:8>>;
> 
> enc(<<A:6, B:2>>) -> 
>     AA = int_to_b64(A),
>     BB = int_to_b64(B bsl 4),
>     <<AA:8, BB:8, $=:8, $=:8>>;
> 
> enc(<<>>) -> <<>>.
> 
> %% Decoding. Works by consuming groups of 4 input characters to create
> %% a group of 3 output characters, with the three special-cases for
> %% end-of-input first:
> 
> dec(<<>>, [], Acc) -> Acc;
> dec(<<>>, [R,Q,P], Acc) -> <<Acc/binary, P:6, Q:6, (R bsr 2):4>>;
> dec(<<>>, [Q,P], Acc) -> <<Acc/binary, P:6, (Q bsr 4):2>>;
> 
> dec(Bin, [S,R,Q,P], Acc) -> dec(Bin, [], <<Acc/binary, P:6, Q:6, R:6, S:6>>);
> 
> dec(<<A:8, T/binary>>, List, Acc) ->
>     case b64_to_int(A) of
> 	ignore -> dec(T, List, Acc);
> 	Sixbits -> dec(T, [Sixbits|List], Acc)
>     end.
> 
> b64_to_int(X) when X >= $A, X =< $Z -> X - $A;
> b64_to_int(X) when X >= $a, X =< $z -> X - $a + 26;
> b64_to_int(X) when X >= $0, X =< $9 -> X - $0 + 52;
> b64_to_int($+) -> 62;
> b64_to_int($/) -> 63;
> b64_to_int(_) -> ignore.
> 
> int_to_b64(X) when X >= 0, X =< 25 -> X + $A;
> int_to_b64(X) when X >= 26, X =< 51 -> X - 26 + $a;
> int_to_b64(X) when X >= 52, X =< 61 -> X - 52 + $0;
> int_to_b64(62) -> $+;
> int_to_b64(63) -> $/.
> 
> 
> %%----------------------------------------------------------------------
> Warning: earlier versions of R7A, possibly including the one on the
> erlang.org site have a bug in the binary syntax which causes the wrong
> clause to be chosen in some cases. The above mime decoder avoids the
> bug. If your erlang has the bug, clause:test() will return multi_byte_binary:
> 
> -module(clause).
> -export([test/0, f/1]).
> 
> test() ->
> 	A = list_to_binary([1]),
> 	<<B:8>> = A,
> 	f(A).
> 
> f(<<A:8>>) -> single_byte_binary;
> f(<<A:8, T/binary>>) -> multi_byte_binary.
> 



More information about the erlang-questions mailing list