base64 encoding using the new binary syntax

Matthias.Lang <>
Mon Sep 25 19:00:59 CEST 2000


Hi,

About a year ago there was some discussion on the erlang list (or
maybe it was an internal Ericsson discussion) about base64 encoding
(as used in MIME) using the binary syntax.

Since the binary syntax is here now, maybe people are interested in
exploring its performance characteristics. Here's something to start
the discussion. Performance of the attached module when en(de)coding
rfc2045.txt (71kb) on my 167MHz ultrasparc:

             Erlang bitsyntax        unix 'mimencode' program
   -------------------------------------------------------------------
   encoding: 1.07 s                  0.045s
   decoding: 10.2 s                  0.035s

The 20x slowdown on encoding looks familiar ;-). I haven't
investigated why the decoding is so much slower, I'd guess there's an
equivalent but uglier and faster approach.

Matthias

(I appreciate that a port program or a linked-in driver is a better
approach if speed is crucial. That's not the point here.)

%% Base 64 encoder/decoder. See RFC2045
-module(mime).
-export([pack/1, unpack/1]).

pack(List) when list(List) ->
    pack(list_to_binary(List));

pack(Bin) when binary(Bin) ->
    acc_pack(Bin, <<>>).

acc_pack(<<Bin:54/binary, T/binary>>, Acc) ->
	acc_pack(T, <<Acc/binary, (enc(Bin))/binary, "\n">>);

acc_pack(Bin, Acc) ->
    <<Acc/binary, (enc(Bin))/binary, "\n">>.

unpack(Bin) when binary(Bin) ->
    dec(Bin, [], <<>>).

%% base-64 encoding: take 6 bits at a time from the head of the binary
%% and emit it as 8 bit characters.

enc(<<A:6, B:6, C:6, D:6, T/binary>>) ->
    AA = int_to_b64(A),
    BB = int_to_b64(B),
    CC = int_to_b64(C),
    DD = int_to_b64(D),
    <<AA:8, BB:8, CC:8, DD:8, (enc(T))/binary>>;

enc(<<A:6, B:6, C:4>>) -> 
    AA = int_to_b64(A),
    BB = int_to_b64(B),
    CC = int_to_b64(C bsl 2),
    <<AA:8, BB:8, CC:8, $=:8>>;

enc(<<A:6, B:2>>) -> 
    AA = int_to_b64(A),
    BB = int_to_b64(B bsl 4),
    <<AA:8, BB:8, $=:8, $=:8>>;

enc(<<>>) -> <<>>.

%% Decoding. Works by consuming groups of 4 input characters to create
%% a group of 3 output characters, with the three special-cases for
%% end-of-input first:

dec(<<>>, [], Acc) -> Acc;
dec(<<>>, [R,Q,P], Acc) -> <<Acc/binary, P:6, Q:6, (R bsr 2):4>>;
dec(<<>>, [Q,P], Acc) -> <<Acc/binary, P:6, (Q bsr 4):2>>;

dec(Bin, [S,R,Q,P], Acc) -> dec(Bin, [], <<Acc/binary, P:6, Q:6, R:6, S:6>>);

dec(<<A:8, T/binary>>, List, Acc) ->
    case b64_to_int(A) of
	ignore -> dec(T, List, Acc);
	Sixbits -> dec(T, [Sixbits|List], Acc)
    end.

b64_to_int(X) when X >= $A, X =< $Z -> X - $A;
b64_to_int(X) when X >= $a, X =< $z -> X - $a + 26;
b64_to_int(X) when X >= $0, X =< $9 -> X - $0 + 52;
b64_to_int($+) -> 62;
b64_to_int($/) -> 63;
b64_to_int(_) -> ignore.

int_to_b64(X) when X >= 0, X =< 25 -> X + $A;
int_to_b64(X) when X >= 26, X =< 51 -> X - 26 + $a;
int_to_b64(X) when X >= 52, X =< 61 -> X - 52 + $0;
int_to_b64(62) -> $+;
int_to_b64(63) -> $/.


%%----------------------------------------------------------------------
Warning: earlier versions of R7A, possibly including the one on the
erlang.org site have a bug in the binary syntax which causes the wrong
clause to be chosen in some cases. The above mime decoder avoids the
bug. If your erlang has the bug, clause:test() will return multi_byte_binary:

-module(clause).
-export([test/0, f/1]).

test() ->
	A = list_to_binary([1]),
	<<B:8>> = A,
	f(A).

f(<<A:8>>) -> single_byte_binary;
f(<<A:8, T/binary>>) -> multi_byte_binary.




More information about the erlang-questions mailing list