Pickler combinators

Joel Reymont joelr1@REDACTED
Sun Dec 25 21:58:51 CET 2005


Folks,

Inspired by http://research.microsoft.com/~akenn/fun/ 
picklercombinators.pdf and as part of my Haskell to Erlang rewrite I  
would like to offer my take on Pickler Combinators.

Parsing binaries is great but all the manual gets boring after a  
while. This code streamlines things quite a bit (see Unit Test code).  
My goal was to have a single spec for pickling and unpickling, to be  
able to pickle nested structures and to be able to have "enums".

Please let me know if I made any performance-related mistakes and  
feel free to make suggestions.

	Thanks, Joel

P.S. I get data in little-endian format so I didn't bother with be_  
picklers.

-module(pickle).

-export([pickle/2, unpickle/2, test/0]).
-export([byte/0, short/0, sshort/0,
	 int/0, sint/0, long/0, slong/0]).
-export([le_short/0, le_sshort/0, le_int/0,
	 le_sint/0, le_long/0, le_slong/0]).
-export([list/2, choice/2, optional/1, wrap/2,
	 tuple/1, record/2]).

-compile([export_all]).

%% Pickle and unpickle

pickle({Pickler, _}, Value) ->
     lists:reverse(Pickler([], Value)).

unpickle({_, Pickler}, Bin) ->
     element(1, Pickler(Bin)).

%% Byte

byte() ->
     {fun write_byte/2, fun read_byte/1}.

write_byte(Acc, Sbyte) ->
     [<<Sbyte:8>>|Acc].

read_byte(Bin) ->
     <<Sbyte:8, Rest/binary>> = Bin,
     {Sbyte, Rest}.

%% Unsigned short

short() ->
     {fun write_short/2, fun read_short/1}.

write_short(Acc, Word) ->
     [<<Word:16>>|Acc].

read_short(Bin) ->
     <<Word:16, Rest/binary>> = Bin,
     {Word, Rest}.

%% Signed short

sshort() ->
     {fun write_sshort/2, fun read_sshort/1}.

write_sshort(Acc, Word) ->
     [<<Word:16/signed>>|Acc].

read_sshort(Bin) ->
     <<Word:16/signed, Rest/binary>> = Bin,
     {Word, Rest}.

%% Unsigned little-endian short

le_short() ->
     {fun write_le_short/2, fun read_le_short/1}.

write_le_short(Acc, Word) ->
     [<<Word:16/little>>|Acc].

read_le_short(Bin) ->
     <<Word:16/little, Rest/binary>> = Bin,
     {Word, Rest}.

%% Signed little-endian short

le_sshort() ->
     {fun write_le_sshort/2, fun read_le_sshort/1}.

write_le_sshort(Acc, Word) ->
     [<<Word:16/little-signed>>|Acc].

read_le_sshort(Bin) ->
     <<Word:16/little-signed, Rest/binary>> = Bin,
     {Word, Rest}.

%% Unsigned int

int() ->
     {fun write_int/2, fun read_int/1}.

write_int(Acc, Word) ->
     [<<Word:32>>|Acc].

read_int(Bin) ->
     <<Word:32, Rest/binary>> = Bin,
     {Word, Rest}.

%% Signed int

sint() ->
     {fun write_sint/2, fun read_sint/1}.

write_sint(Acc, Word) ->
     [<<Word:32/signed>>|Acc].

read_sint(Bin) ->
     <<Word:32/signed, Rest/binary>> = Bin,
     {Word, Rest}.

%% Unsigned little-endian int

le_int() ->
     {fun write_le_int/2, fun read_le_int/1}.

write_le_int(Acc, Word) ->
     [<<Word:32/little>>|Acc].

read_le_int(Bin) ->
     <<Word:32/little, Rest/binary>> = Bin,
     {Word, Rest}.

%% Signed little-endian int

le_sint() ->
     {fun write_le_sint/2, fun read_le_sint/1}.

write_le_sint(Acc, Word) ->
     [<<Word:32/little-signed>>|Acc].

read_le_sint(Bin) ->
     <<Word:32/little-signed, Rest/binary>> = Bin,
     {Word, Rest}.

%% Unsigned long

long() ->
     {fun write_long/2, fun read_long/1}.

write_long(Acc, Word) ->
     [<<Word:64>>|Acc].

read_long(Bin) ->
     <<Word:64, Rest/binary>> = Bin,
     {Word, Rest}.

%% Signed long

slong() ->
     {fun write_slong/2, fun read_slong/1}.

write_slong(Acc, Word) ->
     [<<Word:64/signed>>|Acc].

read_slong(Bin) ->
     <<Word:64/signed, Rest/binary>> = Bin,
     {Word, Rest}.

%% Unsigned little-endian long

le_long() ->
     {fun write_le_long/2, fun read_le_long/1}.

write_le_long(Acc, Word) ->
     [<<Word:64/little>>|Acc].

read_le_long(Bin) ->
     <<Word:64/little, Rest/binary>> = Bin,
     {Word, Rest}.

%% Signed little-endian long

le_slong() ->
     {fun write_le_slong/2, fun read_le_slong/1}.

write_le_slong(Acc, Word) ->
     [<<Word:64/little-signed>>|Acc].

read_le_slong(Bin) ->
     <<Word:64/little-signed, Rest/binary>> = Bin,
     {Word, Rest}.

%% List. We supply a pickler for list length
%% as well as a pickler for list elements.

list(Len, Elem) ->
     {fun(Acc, List) -> write_list(Len, Elem, Acc, List) end,
      fun(Bin) -> read_list(Len, Elem, Bin) end }.

write_list({Len, _}, {Elem, _}, Acc, List) ->
     Acc1 = Len(Acc, length(List)),
     Fun = fun(A, Acc2) -> Elem(Acc2, A) end,
     lists:foldr(Fun, Acc1, List).

read_list({_, Len}, {_, Elem}, Bin) ->
     {N, Bin1} = Len(Bin),
     read_list(N, [], Elem, Bin1).

read_list(0, Acc, _, Bin) -> {Acc, Bin};
read_list(N, Acc, Elem, Bin) ->
     {E, Bin1} = Elem(Bin),
     read_list(N - 1, [E|Acc], Elem, Bin1).

%% Alternative selection. This could probably use some
%% deeper thinking. Otherwise, we take a pickler for the tag
%% as well as a tuple of two functions. The first one
%% returns the tag value and the pickler based on a supplied
%% value. The second one selects a pickler based on the tag value.

choice(Tag, Choice) ->
     {fun(Acc, Value) -> write_choice(Tag, Choice, Acc, Value) end,
      fun(Bin) -> read_choice(Tag, Choice, Bin) end }.

write_choice({Tag, _}, {Choice, _}, Acc, Value)
   when is_function(Tag),
        is_function(Choice) ->
     {T, {Pickler, _}} = Choice(Value),
     Acc1 = Tag(Acc, T),
     Pickler(Acc1, Value).

read_choice({_, Tag}, {_, Choice}, Bin)
   when is_function(Tag),
        is_function(Choice) ->
     {T, Bin1} = Tag(Bin),
     {_, Pickler} = Choice(T),
     Pickler(Bin1).

%% Optional value. Use 'none' to indicate no value.

optional(Pickler) ->
     {fun(Acc, Value) -> write_optional(Pickler, Acc, Value) end,
      fun(Bin) -> read_optional(Pickler, Bin) end}.
	
write_optional(_, Acc, none) ->
     [<<0>>|Acc];

write_optional({Pickler, _}, Acc, Value) ->
     Pickler([<<1>>|Acc], Value).

read_optional({_, Pickler}, Bin) ->
     <<Opt:8, Bin1/binary>> = Bin,
     case Opt of
	0 -> {none, Bin1};
	_ -> Pickler(Bin1)
     end.
		
%% Wrapper. Take a pickler and a wrapper tuple of two functions
%% where the first one is used to convert the value before
%% pickling and the second one after unpickling.

wrap(Wrap, Pickler) ->
     {fun(Acc, Value) -> write_wrap(Wrap, Pickler, Acc, Value) end,
      fun(Bin) -> read_wrap(Wrap, Pickler, Bin) end}.

write_wrap({Wrap, _}, {Pickler, _}, Acc, Value) ->
     Pickler(Acc, Wrap(Value)).

read_wrap({_, Wrap}, {_, Pickler}, Bin) ->
     {Value, Bin1} = Pickler(Bin),
     {Wrap(Value), Bin1}.

%% Erlang does not support enumerations but I want to have
%% {cow, sheep, horse} as well as [{cow, 10}, {sheep, 100}]
%% and be able to marshal these back and forth. Enumerated
%% values start from 1 for the tuple case.

prep_enum_tuple(Enum)
   when is_tuple(Enum) ->
     prep_enum_tuple(Enum, size(Enum), [], []).

prep_enum_tuple(_, 0, Acc1, Acc2) ->
     {Acc1, Acc2};

prep_enum_tuple(Enum, N, Acc1, Acc2) ->
     prep_enum_tuple(Enum, N - 1,
		    [{element(N, Enum), N}|Acc1],
		    [{N, element(N, Enum)}|Acc2]).

prep_enum_list(Enum)
   when is_list(Enum) ->
     % expect a list of {tag, #value} pairs
     Inv = fun({Key, Val}) -> {Val, Key} end,
     InvEnum = lists:map(Inv, Enum),
     {Enum, InvEnum}.

wrap_enum(Enum)
   when is_tuple(Enum) ->
     wrap_enum_1(prep_enum_tuple(Enum));

wrap_enum(Enum)
   when is_list(Enum) ->
     wrap_enum_1(prep_enum_list(Enum)).

wrap_enum_1({List1, List2}) ->
     F = fun(A, B) -> A < B end,
     % gb_trees needs an ordered list
     Dict1 = lists:sort(F, List1),
     Dict2 = lists:sort(F, List2),
     Tree1 = gb_trees:from_orddict(Dict1),
     Tree2 = gb_trees:from_orddict(Dict2),
     {fun(Key) -> gb_trees:get(Key, Tree1) end,
      fun(Key) -> gb_trees:get(Key, Tree2) end}.	

enum(Enum, Pickler) ->
     wrap(wrap_enum(Enum), Pickler).

%% Tuple. Uses a tuple of picklers of the same size.

tuple(Picklers)
   when is_tuple(Picklers) ->
     wrap({fun tuple_to_list/1,
	  fun list_to_tuple/1},
	 tuple_0(tuple_to_list(Picklers))).

%% Record. We rely on Erlang records being tuples
%% and just add the record tag as the first element
%% when unpickling the record.

record(Tag, Picklers)
   when is_tuple(Picklers) ->
     wrap({fun(Record) -> record_to_list(Tag, Record) end,
	  fun(List) -> list_to_record(Tag, List) end},
	 tuple_0(tuple_to_list(Picklers))).

write_tuple_0([], Acc, _) ->
     Acc;

write_tuple_0([{Pickler, _}|Rest], Acc, [Value|Tuple]) ->
     write_tuple_0(Rest, Pickler(Acc, Value), Tuple).

read_tuple_0(Picklers, Bin) ->
     read_tuple_0(Picklers, Bin, []).

read_tuple_0([], Bin, Acc) ->
     {lists:reverse(Acc), Bin};

read_tuple_0([{_, Pickler}|Rest], Bin, Acc) ->
     {Value, Bin1} = Pickler(Bin),
     read_tuple_0(Rest, Bin1, [Value|Acc]).

%% It's convenient to be able to convert the tuple
%% to a list first as there's no erlang:prepend_element/2.

tuple_0(Picklers)
   when is_list(Picklers) ->
     {fun(Acc, Value) -> write_tuple_0(Picklers, Acc, Value) end,
      fun(Bin) -> read_tuple_0(Picklers, Bin) end}.

record_to_list(Tag, Record)
   when is_atom(Tag) ->
     lists:nthtail(1, tuple_to_list(Record)).

list_to_record(Tag, List)
   when is_atom(Tag),
        is_list(List) ->
     list_to_tuple([Tag|List]).

%% Unit test

-define(error1(Expr, Expected, Actual),
	io:format("~s is ~w instead of ~w at ~w:~w~n",
		  [??Expr, Actual, Expected, ?MODULE, ?LINE])).

-define(match(Expected, Expr),
         fun() ->
		Actual = (catch (Expr)),
		case Actual of
		    Expected ->
			{success, Actual};
		    _ ->
			?error1(Expr, Expected, Actual),
			erlang:error("match failed", Actual)
		end
	end()).

check(Pickler, Value) ->
     X = pickle(Pickler, Value),
     Bin = list_to_binary(X),
     unpickle(Pickler, Bin).

test() ->
     test1(),
     test2(),
     test3(),
     test4(),
     test5(),
     test6(),
     test7(),
     test8(),
     test9(),
     test10(),
     test11(),
     test12(),
     test13(),
     test14(),
     ok.

test1() ->
     X = 16#ff,
     ?match(X, check(byte(), X)).

test2() ->
     X = 16#ffff,
     ?match(X, check(short(), X)).

test3() ->
     X = -1,
     ?match(X, check(sshort(), X)).

test4() ->
     X = 16#ffffffff,
     ?match(X, check(int(), X)).

test5() ->
     X = -1,
     ?match(X, check(sint(), X)).

test6() ->
     X = 16#aabbccddeeff0011,
     ?match(X, check(le_long(), X)).

test7() ->
     X = -1,
     ?match(X, check(slong(), X)).

test8() ->
     X = "Wazzup!",
     ?match(X, check(list(int(), byte()), X)).

%% A choice of serializing either a list or a long.

value2tag(Action)
   when is_list(Action) ->
     {0, list(byte(), byte())};

value2tag(_) ->
     {1, long()}.

tag2value(0) ->
     list(byte(), byte());

tag2value(1) ->
     long().

selector() ->
     {fun value2tag/1, fun tag2value/1}.

test9() ->
     X1 = "Just testing",
     X2 = 16#ffff,
     ?match(X1, check(choice(byte(), selector()), X1)),
     ?match(X2, check(choice(byte(), selector()), X2)).

%% Optional value

test10() ->
     X1 = none,
     X2 = 55,
     ?match(X1, check(optional(byte()), X1)),
     ?match(X2, check(optional(byte()), X2)).

%% Tuple given as a tuple and a list of key/value pairs.

test11() ->
     % tuple enum
     Enum1 = {cow, sheep, horse},
     {FROM1, TO1} = wrap_enum(Enum1),
     ?match(1, FROM1(cow)),
     ?match(2, FROM1(sheep)),
     ?match(3, FROM1(horse)),
     ?match(cow, TO1(1)),
     ?match(sheep, TO1(2)),
     ?match(horse, TO1(3)),
     % list enum
     Enum2 = [{cow, 20}, {sheep, 30}, {horse, 40}],
     {FROM2, TO2} = wrap_enum(Enum2),
     ?match(20, FROM2(cow)),
     ?match(30, FROM2(sheep)),
     ?match(40, FROM2(horse)),
     ?match(cow, TO2(20)),
     ?match(sheep, TO2(30)),
     ?match(horse, TO2(40)).

test12() ->
     Enum1 = {cow, sheep, horse},
     Enum2 = [{cow, 20}, {sheep, 30}, {horse, 40}],
     ?match(cow, check(enum(Enum1, byte()), cow)),
     ?match(sheep, check(enum(Enum2, byte()), sheep)).

test13() ->
     Tuple = {"Joel", 16#ff00, none},
     Spec = {list(byte(),byte()), le_short(), optional(byte())},
     ?match(Tuple, check(tuple(Spec), Tuple)).

%% Nested records.

-record(foo, { a, b }).
-record(bar, { c, d }).
-record(baz, { e, f }).
	
test14() ->
     R1 = #foo {
       a = 10,
       b = #bar {
	c = 20,
	d = #baz {
	  e = 30,
	  f = "Enough nesting!"
	 }
        }
      },
     Pickler = record(foo, {
		       byte(),
		       record(bar, {
				int(),
				record(baz, {
					 le_sshort(),
					 list(byte(), byte())
					})
			       })
		      }),
     ?match(R1, check(Pickler, R1)).


--
http://wagerlabs.com/








More information about the erlang-questions mailing list