base64 encoding using the new binary syntax
Bjorn Gustavsson
bjorn@REDACTED
Tue Sep 26 10:47:41 CEST 2000
The bit syntax as currently implemented always copies binaries
in construction expressions such as:
<<Acc/binary, P:6, Q:6, R:6, S:6>>
It is actually faster to construct lists of binaries and use
list_to_binary/1 to combine them. For an example, see my
version of the mime module below.
/Bjorn
> Hi,
>
> About a year ago there was some discussion on the erlang list (or
> maybe it was an internal Ericsson discussion) about base64 encoding
> (as used in MIME) using the binary syntax.
>
> Since the binary syntax is here now, maybe people are interested in
> exploring its performance characteristics. Here's something to start
> the discussion. Performance of the attached module when en(de)coding
> rfc2045.txt (71kb) on my 167MHz ultrasparc:
>
> Erlang bitsyntax unix 'mimencode' program
> -------------------------------------------------------------------
> encoding: 1.07 s 0.045s
> decoding: 10.2 s 0.035s
>
> The 20x slowdown on encoding looks familiar ;-). I haven't
> investigated why the decoding is so much slower, I'd guess there's an
> equivalent but uglier and faster approach.
>
> Matthias
--
Björn Gustavsson Ericsson Utvecklings AB
bjorn@REDACTED ÄT2/UAB/F/P
BOX 1505
+46 8 727 56 87 125 25 Älvsjö
-module(mime2).
-export([pack/1, unpack/1]).
pack(List) when list(List) ->
pack(list_to_binary(List));
pack(Bin) when binary(Bin) ->
acc_pack(Bin, []).
acc_pack(<<Bin:54/binary, T/binary>>, Acc) ->
acc_pack(T, [Acc,enc(Bin),$\n]);
acc_pack(Bin, Acc) ->
list_to_binary([Acc,enc(Bin),$\n]).
unpack(Bin) when binary(Bin) ->
list_to_binary(dec(Bin)).
%% base-64 encoding: take 6 bits at a time from the head of the binary
%% and emit it as 8 bit characters.
enc(Bin) ->
list_to_binary(enc1(Bin)).
enc1(<<A:6, B:6, C:6, D:6, T/binary>>) ->
AA = int_to_b64(A),
BB = int_to_b64(B),
CC = int_to_b64(C),
DD = int_to_b64(D),
[AA, BB, CC, DD| enc1(T)];
enc1(<<A:6, B:6, C:4>>) ->
AA = int_to_b64(A),
BB = int_to_b64(B),
CC = int_to_b64(C bsl 2),
[AA, BB, CC, $=];
enc1(<<A:6, B:2>>) ->
AA = int_to_b64(A),
BB = int_to_b64(B bsl 4),
[AA, BB, $=, $=];
enc1(<<>>) -> [].
%% Decoding. Works by consuming groups of 4 input characters to create
%% a group of 3 output characters, with the three special-cases for
%% end-of-input first:
dec(Bin) ->
dec(512, Bin, [], []).
dec(0, Bin, List, Acc) ->
dec(512, Bin, List, [list_to_binary(Acc)]);
dec(N, <<>>, [], Acc) -> Acc;
dec(N, <<>>, [R,Q,P], Acc) -> [Acc|<<P:6, Q:6, (R bsr 2):4>>];
dec(N, <<>>, [Q,P], Acc) -> [Acc|<<P:6, (Q bsr 4):2>>];
dec(N, Bin, [S,R,Q,P], Acc) ->
dec(N-1, Bin, [], [Acc|<<P:6, Q:6, R:6, S:6>>]);
dec(N, <<A:8, T/binary>>, List, Acc) ->
case b64_to_int(A) of
ignore -> dec(N, T, List, Acc);
Sixbits -> dec(N, T, [Sixbits|List], Acc)
end.
b64_to_int(X) when X >= $A, X =< $Z -> X - $A;
b64_to_int(X) when X >= $a, X =< $z -> X - $a + 26;
b64_to_int(X) when X >= $0, X =< $9 -> X - $0 + 52;
b64_to_int($+) -> 62;
b64_to_int($/) -> 63;
b64_to_int(_) -> ignore.
int_to_b64(X) when X >= 0, X =< 25 -> X + $A;
int_to_b64(X) when X >= 26, X =< 51 -> X - 26 + $a;
int_to_b64(X) when X >= 52, X =< 61 -> X - 52 + $0;
int_to_b64(62) -> $+;
int_to_b64(63) -> $/.
Matthias.Lang@REDACTED writes:
> Hi,
>
> About a year ago there was some discussion on the erlang list (or
> maybe it was an internal Ericsson discussion) about base64 encoding
> (as used in MIME) using the binary syntax.
>
> Since the binary syntax is here now, maybe people are interested in
> exploring its performance characteristics. Here's something to start
> the discussion. Performance of the attached module when en(de)coding
> rfc2045.txt (71kb) on my 167MHz ultrasparc:
>
> Erlang bitsyntax unix 'mimencode' program
> -------------------------------------------------------------------
> encoding: 1.07 s 0.045s
> decoding: 10.2 s 0.035s
>
> The 20x slowdown on encoding looks familiar ;-). I haven't
> investigated why the decoding is so much slower, I'd guess there's an
> equivalent but uglier and faster approach.
>
> Matthias
>
> (I appreciate that a port program or a linked-in driver is a better
> approach if speed is crucial. That's not the point here.)
>
> %% Base 64 encoder/decoder. See RFC2045
> -module(mime).
> -export([pack/1, unpack/1]).
>
> pack(List) when list(List) ->
> pack(list_to_binary(List));
>
> pack(Bin) when binary(Bin) ->
> acc_pack(Bin, <<>>).
>
> acc_pack(<<Bin:54/binary, T/binary>>, Acc) ->
> acc_pack(T, <<Acc/binary, (enc(Bin))/binary, "\n">>);
>
> acc_pack(Bin, Acc) ->
> <<Acc/binary, (enc(Bin))/binary, "\n">>.
>
> unpack(Bin) when binary(Bin) ->
> dec(Bin, [], <<>>).
>
> %% base-64 encoding: take 6 bits at a time from the head of the binary
> %% and emit it as 8 bit characters.
>
> enc(<<A:6, B:6, C:6, D:6, T/binary>>) ->
> AA = int_to_b64(A),
> BB = int_to_b64(B),
> CC = int_to_b64(C),
> DD = int_to_b64(D),
> <<AA:8, BB:8, CC:8, DD:8, (enc(T))/binary>>;
>
> enc(<<A:6, B:6, C:4>>) ->
> AA = int_to_b64(A),
> BB = int_to_b64(B),
> CC = int_to_b64(C bsl 2),
> <<AA:8, BB:8, CC:8, $=:8>>;
>
> enc(<<A:6, B:2>>) ->
> AA = int_to_b64(A),
> BB = int_to_b64(B bsl 4),
> <<AA:8, BB:8, $=:8, $=:8>>;
>
> enc(<<>>) -> <<>>.
>
> %% Decoding. Works by consuming groups of 4 input characters to create
> %% a group of 3 output characters, with the three special-cases for
> %% end-of-input first:
>
> dec(<<>>, [], Acc) -> Acc;
> dec(<<>>, [R,Q,P], Acc) -> <<Acc/binary, P:6, Q:6, (R bsr 2):4>>;
> dec(<<>>, [Q,P], Acc) -> <<Acc/binary, P:6, (Q bsr 4):2>>;
>
> dec(Bin, [S,R,Q,P], Acc) -> dec(Bin, [], <<Acc/binary, P:6, Q:6, R:6, S:6>>);
>
> dec(<<A:8, T/binary>>, List, Acc) ->
> case b64_to_int(A) of
> ignore -> dec(T, List, Acc);
> Sixbits -> dec(T, [Sixbits|List], Acc)
> end.
>
> b64_to_int(X) when X >= $A, X =< $Z -> X - $A;
> b64_to_int(X) when X >= $a, X =< $z -> X - $a + 26;
> b64_to_int(X) when X >= $0, X =< $9 -> X - $0 + 52;
> b64_to_int($+) -> 62;
> b64_to_int($/) -> 63;
> b64_to_int(_) -> ignore.
>
> int_to_b64(X) when X >= 0, X =< 25 -> X + $A;
> int_to_b64(X) when X >= 26, X =< 51 -> X - 26 + $a;
> int_to_b64(X) when X >= 52, X =< 61 -> X - 52 + $0;
> int_to_b64(62) -> $+;
> int_to_b64(63) -> $/.
>
>
> %%----------------------------------------------------------------------
> Warning: earlier versions of R7A, possibly including the one on the
> erlang.org site have a bug in the binary syntax which causes the wrong
> clause to be chosen in some cases. The above mime decoder avoids the
> bug. If your erlang has the bug, clause:test() will return multi_byte_binary:
>
> -module(clause).
> -export([test/0, f/1]).
>
> test() ->
> A = list_to_binary([1]),
> <<B:8>> = A,
> f(A).
>
> f(<<A:8>>) -> single_byte_binary;
> f(<<A:8, T/binary>>) -> multi_byte_binary.
>
More information about the erlang-questions
mailing list