[erlang-questions] BEAM micro-optimization: term comparison (> 0) versus pattern-matching
Mikael Pettersson
mikpelinux@REDACTED
Sun Apr 16 15:41:04 CEST 2017
Mikael Pettersson writes:
> Suppose a frequently called function returns a positive integer (always a fixnum)
> and a boolean "success" or "failure" indicator (even in the failure case the return
> value is significant and will be used). Which is better from a performance
> perspective:
>
> 1. Tag the return value, {success, X} or {failure, Y}, and have callers pattern-match
> on that
>
> or,
>
> 2. Indicate failure by negating Y, and have callers match on
>
> X when X > 0 -> success case;
> MinusY -> failure case % MinusY =< 0 is implicit
>
> Option 2 avoids the consing overheads of option 1, but I'm worried that the X > 0
> guard may be less optimized than a plain structural pattern-match. The last time
> I checked, term-comparisons would check inline for identity, and otherwise call
> the general cmp() function which would then do a lot of type-casing.
>
> In my pure Erlang code I'm using option 1, but I'm reimplementing the function
> as a NIF, and would like to avoid the consing overheads is that's indeed better.
I put together a micro-benchmark module (appended below), and have
the following observations:
- Option 2 above is about 15-25% faster than option 1 above.
- Adding an is_integer/1 guard before the X > 0 in option 2 makes the code slower.
- A major cost in option 1 is having to do case analysis on two atoms (because the
runtime representation of an atom isn't known until the code is loaded into a
running VM). However, replacing the atoms with the fixnums 0 and 1 makes the
code much slower.
- The best version I've found so far (function 'k' in the bechmark module) eliminates
the tags, represents one case by wrapping the integer in a cons cell [X | []], and
the other case as the plain integer. [X | []] is slightly better than {X} since the
latter requires an additional load and test in the pattern-matching path.
On my desktop box (OTP-19.3, Ivy Bridge @ 3.5 GHz), typical output from the benchmark is:
> test:all(100000000).
[{f,{2564769,2}},
{g,{1806006,2}},
{h,{1999038,2}},
{i,{3295790,2}},
{j,{1779385,2}},
{k,{1504119,2}},
{l,{1526827,2}}]
f is option 1 above, g is option 2. h is g with added is_integer/1 check.
i is f but with integer tags not atoms. j wraps the two cases as [X|[]] and {Y}.
k is j but with Y left unwrapped. l is j but swaps the order and checks with
is_integer/1 before matching on [Y|_].
/Mikael
==snip==
-module(test).
-export([all/1, f/1, g/1, h/1, i/1, j/1, k/1, l/1]).
all(N) ->
L = [ {f, fun f/1}
, {g, fun g/1}
, {h, fun h/1}
, {i, fun i/1}
, {j, fun j/1}
, {k, fun k/1}
, {l, fun l/1}
],
[{A, F(N)} || {A, F} <- L].
f(N) -> timer:tc(fun() -> f2(N, [], []) end).
g(N) -> timer:tc(fun() -> g2(N, [], []) end).
h(N) -> timer:tc(fun() -> h2(N, [], []) end).
i(N) -> timer:tc(fun() -> i2(N, [], []) end).
j(N) -> timer:tc(fun() -> j2(N, [], []) end).
k(N) -> timer:tc(fun() -> k2(N, [], []) end).
l(N) -> timer:tc(fun() -> l2(N, [], []) end).
%% f: tag with {success, X} or {failure, Y}
f2(0, X, _Y) -> X;
f2(N, X, Y) ->
case f3(N) of
{success, A} -> f2(N - 1, A, Y);
{failure, B} -> f2(N - 1, X, B)
end.
f3(N) ->
case N band 1 of
0 -> {success, N};
_ -> {failure, N}
end.
%% g: indicate success or failure via the sign
g2(0, X, _Y) -> X;
g2(N, X, Y) ->
case g3(N) of
A when A > 0 -> g2(N - 1, A, Y);
MinusB -> g2(N - 1, X, -MinusB)
end.
g3(N) ->
case N band 1 of
0 -> N;
_ -> -N
end.
%% h: like g, but check is_integer before checking > 0
h2(0, X, _Y) -> X;
h2(N, X, Y) ->
case g3(N) of
A when is_integer(A), A > 0 -> h2(N - 1, A, Y);
MinusB -> h2(N - 1, X, -MinusB)
end.
%% i: like f, but tag with integers 0 and 1 instead
i2(0, X, _Y) -> X;
i2(N, X, Y) ->
case i3(N) of
{0, A} -> i2(N - 1, A, Y);
{1, B} -> i2(N - 1, X, B)
end.
i3(N) ->
case N band 1 of
0 -> {0, N};
_ -> {1, N}
end.
%% j: like f but drop the tags, instead tag as [X|[]] for success or {Y} for failure
j2(0, X, _Y) -> X;
j2(N, X, Y) ->
case j3(N) of
[A | _] -> j2(N - 1, A, Y);
{B} -> j2(N - 1, X, B)
end.
j3(N) ->
case N band 1 of
0 -> [N | []];
_ -> {N}
end.
%% k: like j, but only tag the success case not the failure case
k2(0, X, _Y) -> X;
k2(N, X, Y) ->
case k3(N) of
[A | _] -> k2(N - 1, A, Y);
B -> k2(N - 1, X, B)
end.
k3(N) ->
case N band 1 of
0 -> [N | []];
_ -> N
end.
%% l: like k but the other way around: tag the failure case [Y|[]],
%% check for success with is_integer/1
l2(0, X, _Y) -> X;
l2(N, X, Y) ->
case l3(N) of
A when is_integer(A) -> l2(N - 1, A, Y);
[B | _] -> l2(N - 1, X, B)
end.
l3(N) ->
case N band 1 of
0 -> N;
_ -> [N | []]
end.
==snip==
More information about the erlang-questions
mailing list