Pickler combinators
Joel Reymont
joelr1@REDACTED
Sun Dec 25 21:58:51 CET 2005
Folks,
Inspired by http://research.microsoft.com/~akenn/fun/
picklercombinators.pdf and as part of my Haskell to Erlang rewrite I
would like to offer my take on Pickler Combinators.
Parsing binaries is great but all the manual gets boring after a
while. This code streamlines things quite a bit (see Unit Test code).
My goal was to have a single spec for pickling and unpickling, to be
able to pickle nested structures and to be able to have "enums".
Please let me know if I made any performance-related mistakes and
feel free to make suggestions.
Thanks, Joel
P.S. I get data in little-endian format so I didn't bother with be_
picklers.
-module(pickle).
-export([pickle/2, unpickle/2, test/0]).
-export([byte/0, short/0, sshort/0,
int/0, sint/0, long/0, slong/0]).
-export([le_short/0, le_sshort/0, le_int/0,
le_sint/0, le_long/0, le_slong/0]).
-export([list/2, choice/2, optional/1, wrap/2,
tuple/1, record/2]).
-compile([export_all]).
%% Pickle and unpickle
pickle({Pickler, _}, Value) ->
lists:reverse(Pickler([], Value)).
unpickle({_, Pickler}, Bin) ->
element(1, Pickler(Bin)).
%% Byte
byte() ->
{fun write_byte/2, fun read_byte/1}.
write_byte(Acc, Sbyte) ->
[<<Sbyte:8>>|Acc].
read_byte(Bin) ->
<<Sbyte:8, Rest/binary>> = Bin,
{Sbyte, Rest}.
%% Unsigned short
short() ->
{fun write_short/2, fun read_short/1}.
write_short(Acc, Word) ->
[<<Word:16>>|Acc].
read_short(Bin) ->
<<Word:16, Rest/binary>> = Bin,
{Word, Rest}.
%% Signed short
sshort() ->
{fun write_sshort/2, fun read_sshort/1}.
write_sshort(Acc, Word) ->
[<<Word:16/signed>>|Acc].
read_sshort(Bin) ->
<<Word:16/signed, Rest/binary>> = Bin,
{Word, Rest}.
%% Unsigned little-endian short
le_short() ->
{fun write_le_short/2, fun read_le_short/1}.
write_le_short(Acc, Word) ->
[<<Word:16/little>>|Acc].
read_le_short(Bin) ->
<<Word:16/little, Rest/binary>> = Bin,
{Word, Rest}.
%% Signed little-endian short
le_sshort() ->
{fun write_le_sshort/2, fun read_le_sshort/1}.
write_le_sshort(Acc, Word) ->
[<<Word:16/little-signed>>|Acc].
read_le_sshort(Bin) ->
<<Word:16/little-signed, Rest/binary>> = Bin,
{Word, Rest}.
%% Unsigned int
int() ->
{fun write_int/2, fun read_int/1}.
write_int(Acc, Word) ->
[<<Word:32>>|Acc].
read_int(Bin) ->
<<Word:32, Rest/binary>> = Bin,
{Word, Rest}.
%% Signed int
sint() ->
{fun write_sint/2, fun read_sint/1}.
write_sint(Acc, Word) ->
[<<Word:32/signed>>|Acc].
read_sint(Bin) ->
<<Word:32/signed, Rest/binary>> = Bin,
{Word, Rest}.
%% Unsigned little-endian int
le_int() ->
{fun write_le_int/2, fun read_le_int/1}.
write_le_int(Acc, Word) ->
[<<Word:32/little>>|Acc].
read_le_int(Bin) ->
<<Word:32/little, Rest/binary>> = Bin,
{Word, Rest}.
%% Signed little-endian int
le_sint() ->
{fun write_le_sint/2, fun read_le_sint/1}.
write_le_sint(Acc, Word) ->
[<<Word:32/little-signed>>|Acc].
read_le_sint(Bin) ->
<<Word:32/little-signed, Rest/binary>> = Bin,
{Word, Rest}.
%% Unsigned long
long() ->
{fun write_long/2, fun read_long/1}.
write_long(Acc, Word) ->
[<<Word:64>>|Acc].
read_long(Bin) ->
<<Word:64, Rest/binary>> = Bin,
{Word, Rest}.
%% Signed long
slong() ->
{fun write_slong/2, fun read_slong/1}.
write_slong(Acc, Word) ->
[<<Word:64/signed>>|Acc].
read_slong(Bin) ->
<<Word:64/signed, Rest/binary>> = Bin,
{Word, Rest}.
%% Unsigned little-endian long
le_long() ->
{fun write_le_long/2, fun read_le_long/1}.
write_le_long(Acc, Word) ->
[<<Word:64/little>>|Acc].
read_le_long(Bin) ->
<<Word:64/little, Rest/binary>> = Bin,
{Word, Rest}.
%% Signed little-endian long
le_slong() ->
{fun write_le_slong/2, fun read_le_slong/1}.
write_le_slong(Acc, Word) ->
[<<Word:64/little-signed>>|Acc].
read_le_slong(Bin) ->
<<Word:64/little-signed, Rest/binary>> = Bin,
{Word, Rest}.
%% List. We supply a pickler for list length
%% as well as a pickler for list elements.
list(Len, Elem) ->
{fun(Acc, List) -> write_list(Len, Elem, Acc, List) end,
fun(Bin) -> read_list(Len, Elem, Bin) end }.
write_list({Len, _}, {Elem, _}, Acc, List) ->
Acc1 = Len(Acc, length(List)),
Fun = fun(A, Acc2) -> Elem(Acc2, A) end,
lists:foldr(Fun, Acc1, List).
read_list({_, Len}, {_, Elem}, Bin) ->
{N, Bin1} = Len(Bin),
read_list(N, [], Elem, Bin1).
read_list(0, Acc, _, Bin) -> {Acc, Bin};
read_list(N, Acc, Elem, Bin) ->
{E, Bin1} = Elem(Bin),
read_list(N - 1, [E|Acc], Elem, Bin1).
%% Alternative selection. This could probably use some
%% deeper thinking. Otherwise, we take a pickler for the tag
%% as well as a tuple of two functions. The first one
%% returns the tag value and the pickler based on a supplied
%% value. The second one selects a pickler based on the tag value.
choice(Tag, Choice) ->
{fun(Acc, Value) -> write_choice(Tag, Choice, Acc, Value) end,
fun(Bin) -> read_choice(Tag, Choice, Bin) end }.
write_choice({Tag, _}, {Choice, _}, Acc, Value)
when is_function(Tag),
is_function(Choice) ->
{T, {Pickler, _}} = Choice(Value),
Acc1 = Tag(Acc, T),
Pickler(Acc1, Value).
read_choice({_, Tag}, {_, Choice}, Bin)
when is_function(Tag),
is_function(Choice) ->
{T, Bin1} = Tag(Bin),
{_, Pickler} = Choice(T),
Pickler(Bin1).
%% Optional value. Use 'none' to indicate no value.
optional(Pickler) ->
{fun(Acc, Value) -> write_optional(Pickler, Acc, Value) end,
fun(Bin) -> read_optional(Pickler, Bin) end}.
write_optional(_, Acc, none) ->
[<<0>>|Acc];
write_optional({Pickler, _}, Acc, Value) ->
Pickler([<<1>>|Acc], Value).
read_optional({_, Pickler}, Bin) ->
<<Opt:8, Bin1/binary>> = Bin,
case Opt of
0 -> {none, Bin1};
_ -> Pickler(Bin1)
end.
%% Wrapper. Take a pickler and a wrapper tuple of two functions
%% where the first one is used to convert the value before
%% pickling and the second one after unpickling.
wrap(Wrap, Pickler) ->
{fun(Acc, Value) -> write_wrap(Wrap, Pickler, Acc, Value) end,
fun(Bin) -> read_wrap(Wrap, Pickler, Bin) end}.
write_wrap({Wrap, _}, {Pickler, _}, Acc, Value) ->
Pickler(Acc, Wrap(Value)).
read_wrap({_, Wrap}, {_, Pickler}, Bin) ->
{Value, Bin1} = Pickler(Bin),
{Wrap(Value), Bin1}.
%% Erlang does not support enumerations but I want to have
%% {cow, sheep, horse} as well as [{cow, 10}, {sheep, 100}]
%% and be able to marshal these back and forth. Enumerated
%% values start from 1 for the tuple case.
prep_enum_tuple(Enum)
when is_tuple(Enum) ->
prep_enum_tuple(Enum, size(Enum), [], []).
prep_enum_tuple(_, 0, Acc1, Acc2) ->
{Acc1, Acc2};
prep_enum_tuple(Enum, N, Acc1, Acc2) ->
prep_enum_tuple(Enum, N - 1,
[{element(N, Enum), N}|Acc1],
[{N, element(N, Enum)}|Acc2]).
prep_enum_list(Enum)
when is_list(Enum) ->
% expect a list of {tag, #value} pairs
Inv = fun({Key, Val}) -> {Val, Key} end,
InvEnum = lists:map(Inv, Enum),
{Enum, InvEnum}.
wrap_enum(Enum)
when is_tuple(Enum) ->
wrap_enum_1(prep_enum_tuple(Enum));
wrap_enum(Enum)
when is_list(Enum) ->
wrap_enum_1(prep_enum_list(Enum)).
wrap_enum_1({List1, List2}) ->
F = fun(A, B) -> A < B end,
% gb_trees needs an ordered list
Dict1 = lists:sort(F, List1),
Dict2 = lists:sort(F, List2),
Tree1 = gb_trees:from_orddict(Dict1),
Tree2 = gb_trees:from_orddict(Dict2),
{fun(Key) -> gb_trees:get(Key, Tree1) end,
fun(Key) -> gb_trees:get(Key, Tree2) end}.
enum(Enum, Pickler) ->
wrap(wrap_enum(Enum), Pickler).
%% Tuple. Uses a tuple of picklers of the same size.
tuple(Picklers)
when is_tuple(Picklers) ->
wrap({fun tuple_to_list/1,
fun list_to_tuple/1},
tuple_0(tuple_to_list(Picklers))).
%% Record. We rely on Erlang records being tuples
%% and just add the record tag as the first element
%% when unpickling the record.
record(Tag, Picklers)
when is_tuple(Picklers) ->
wrap({fun(Record) -> record_to_list(Tag, Record) end,
fun(List) -> list_to_record(Tag, List) end},
tuple_0(tuple_to_list(Picklers))).
write_tuple_0([], Acc, _) ->
Acc;
write_tuple_0([{Pickler, _}|Rest], Acc, [Value|Tuple]) ->
write_tuple_0(Rest, Pickler(Acc, Value), Tuple).
read_tuple_0(Picklers, Bin) ->
read_tuple_0(Picklers, Bin, []).
read_tuple_0([], Bin, Acc) ->
{lists:reverse(Acc), Bin};
read_tuple_0([{_, Pickler}|Rest], Bin, Acc) ->
{Value, Bin1} = Pickler(Bin),
read_tuple_0(Rest, Bin1, [Value|Acc]).
%% It's convenient to be able to convert the tuple
%% to a list first as there's no erlang:prepend_element/2.
tuple_0(Picklers)
when is_list(Picklers) ->
{fun(Acc, Value) -> write_tuple_0(Picklers, Acc, Value) end,
fun(Bin) -> read_tuple_0(Picklers, Bin) end}.
record_to_list(Tag, Record)
when is_atom(Tag) ->
lists:nthtail(1, tuple_to_list(Record)).
list_to_record(Tag, List)
when is_atom(Tag),
is_list(List) ->
list_to_tuple([Tag|List]).
%% Unit test
-define(error1(Expr, Expected, Actual),
io:format("~s is ~w instead of ~w at ~w:~w~n",
[??Expr, Actual, Expected, ?MODULE, ?LINE])).
-define(match(Expected, Expr),
fun() ->
Actual = (catch (Expr)),
case Actual of
Expected ->
{success, Actual};
_ ->
?error1(Expr, Expected, Actual),
erlang:error("match failed", Actual)
end
end()).
check(Pickler, Value) ->
X = pickle(Pickler, Value),
Bin = list_to_binary(X),
unpickle(Pickler, Bin).
test() ->
test1(),
test2(),
test3(),
test4(),
test5(),
test6(),
test7(),
test8(),
test9(),
test10(),
test11(),
test12(),
test13(),
test14(),
ok.
test1() ->
X = 16#ff,
?match(X, check(byte(), X)).
test2() ->
X = 16#ffff,
?match(X, check(short(), X)).
test3() ->
X = -1,
?match(X, check(sshort(), X)).
test4() ->
X = 16#ffffffff,
?match(X, check(int(), X)).
test5() ->
X = -1,
?match(X, check(sint(), X)).
test6() ->
X = 16#aabbccddeeff0011,
?match(X, check(le_long(), X)).
test7() ->
X = -1,
?match(X, check(slong(), X)).
test8() ->
X = "Wazzup!",
?match(X, check(list(int(), byte()), X)).
%% A choice of serializing either a list or a long.
value2tag(Action)
when is_list(Action) ->
{0, list(byte(), byte())};
value2tag(_) ->
{1, long()}.
tag2value(0) ->
list(byte(), byte());
tag2value(1) ->
long().
selector() ->
{fun value2tag/1, fun tag2value/1}.
test9() ->
X1 = "Just testing",
X2 = 16#ffff,
?match(X1, check(choice(byte(), selector()), X1)),
?match(X2, check(choice(byte(), selector()), X2)).
%% Optional value
test10() ->
X1 = none,
X2 = 55,
?match(X1, check(optional(byte()), X1)),
?match(X2, check(optional(byte()), X2)).
%% Tuple given as a tuple and a list of key/value pairs.
test11() ->
% tuple enum
Enum1 = {cow, sheep, horse},
{FROM1, TO1} = wrap_enum(Enum1),
?match(1, FROM1(cow)),
?match(2, FROM1(sheep)),
?match(3, FROM1(horse)),
?match(cow, TO1(1)),
?match(sheep, TO1(2)),
?match(horse, TO1(3)),
% list enum
Enum2 = [{cow, 20}, {sheep, 30}, {horse, 40}],
{FROM2, TO2} = wrap_enum(Enum2),
?match(20, FROM2(cow)),
?match(30, FROM2(sheep)),
?match(40, FROM2(horse)),
?match(cow, TO2(20)),
?match(sheep, TO2(30)),
?match(horse, TO2(40)).
test12() ->
Enum1 = {cow, sheep, horse},
Enum2 = [{cow, 20}, {sheep, 30}, {horse, 40}],
?match(cow, check(enum(Enum1, byte()), cow)),
?match(sheep, check(enum(Enum2, byte()), sheep)).
test13() ->
Tuple = {"Joel", 16#ff00, none},
Spec = {list(byte(),byte()), le_short(), optional(byte())},
?match(Tuple, check(tuple(Spec), Tuple)).
%% Nested records.
-record(foo, { a, b }).
-record(bar, { c, d }).
-record(baz, { e, f }).
test14() ->
R1 = #foo {
a = 10,
b = #bar {
c = 20,
d = #baz {
e = 30,
f = "Enough nesting!"
}
}
},
Pickler = record(foo, {
byte(),
record(bar, {
int(),
record(baz, {
le_sshort(),
list(byte(), byte())
})
})
}),
?match(R1, check(Pickler, R1)).
--
http://wagerlabs.com/
More information about the erlang-questions
mailing list