[erlang-questions] BEAM micro-optimization: term comparison (> 0) versus pattern-matching

Sun Apr 16 15:41:04 CEST 2017

Mikael Pettersson writes:
 > Suppose a frequently called function returns a positive integer (always a fixnum)
 > and a boolean "success" or "failure" indicator (even in the failure case the return
 > value is significant and will be used).  Which is better from a performance
 > perspective:
 > 
 > 1. Tag the return value, {success, X} or {failure, Y}, and have callers pattern-match
 >    on that
 > 
 > or,
 > 
 > 2. Indicate failure by negating Y, and have callers match on
 > 
 >       X when X > 0 -> success case;
 >       MinusY -> failure case % MinusY =< 0 is implicit
 > 
 > Option 2 avoids the consing overheads of option 1, but I'm worried that the X > 0
 > guard may be less optimized than a plain structural pattern-match.  The last time
 > I checked, term-comparisons would check inline for identity, and otherwise call
 > the general cmp() function which would then do a lot of type-casing.
 > 
 > In my pure Erlang code I'm using option 1, but I'm reimplementing the function
 > as a NIF, and would like to avoid the consing overheads is that's indeed better.

I put together a micro-benchmark module (appended below), and have
the following observations:

- Option 2 above is about 15-25% faster than option 1 above.
- Adding an is_integer/1 guard before the X > 0 in option 2 makes the code slower.
- A major cost in option 1 is having to do case analysis on two atoms (because the
  runtime representation of an atom isn't known until the code is loaded into a
  running VM).  However, replacing the atoms with the fixnums 0 and 1 makes the
  code much slower.
- The best version I've found so far (function 'k' in the bechmark module) eliminates
  the tags, represents one case by wrapping the integer in a cons cell [X | []], and
  the other case as the plain integer.  [X | []] is slightly better than {X} since the
  latter requires an additional load and test in the pattern-matching path.

On my desktop box (OTP-19.3, Ivy Bridge @ 3.5 GHz), typical output from the benchmark is:

> test:all(100000000).
[{f,{2564769,2}},
 {g,{1806006,2}},
 {h,{1999038,2}},
 {i,{3295790,2}},
 {j,{1779385,2}},
 {k,{1504119,2}},
 {l,{1526827,2}}]

f is option 1 above, g is option 2.  h is g with added is_integer/1 check.
i is f but with integer tags not atoms.  j wraps the two cases as [X|[]] and {Y}.
k is j but with Y left unwrapped.  l is j but swaps the order and checks with
is_integer/1 before matching on [Y|_].

/Mikael

==snip==
-module(test).
-export([all/1, f/1, g/1, h/1, i/1, j/1, k/1, l/1]).

all(N) ->
  L = [ {f, fun f/1}
      , {g, fun g/1}
      , {h, fun h/1}
      , {i, fun i/1}
      , {j, fun j/1}
      , {k, fun k/1}
      , {l, fun l/1}
      ],
  [{A, F(N)} || {A, F} <- L].

f(N) -> timer:tc(fun() -> f2(N, [], []) end).
g(N) -> timer:tc(fun() -> g2(N, [], []) end).
h(N) -> timer:tc(fun() -> h2(N, [], []) end).
i(N) -> timer:tc(fun() -> i2(N, [], []) end).
j(N) -> timer:tc(fun() -> j2(N, [], []) end).
k(N) -> timer:tc(fun() -> k2(N, [], []) end).
l(N) -> timer:tc(fun() -> l2(N, [], []) end).

%% f: tag with {success, X} or {failure, Y}

f2(0, X, _Y) -> X;
f2(N, X, Y) ->
  case f3(N) of
    {success, A} -> f2(N - 1, A, Y);
    {failure, B} -> f2(N - 1, X, B)
  end.

f3(N) ->
  case N band 1 of
    0 -> {success, N};
    _ -> {failure, N}
  end.

%% g: indicate success or failure via the sign

g2(0, X, _Y) -> X;
g2(N, X, Y) ->
  case g3(N) of
    A when A > 0 -> g2(N - 1, A, Y);
    MinusB -> g2(N - 1, X, -MinusB)
  end.

g3(N) ->
  case N band 1 of
    0 -> N;
    _ -> -N
  end.

%% h: like g, but check is_integer before checking > 0

h2(0, X, _Y) -> X;
h2(N, X, Y) ->
  case g3(N) of
    A when is_integer(A), A > 0 -> h2(N - 1, A, Y);
    MinusB -> h2(N - 1, X, -MinusB)
  end.

%% i: like f, but tag with integers 0 and 1 instead

i2(0, X, _Y) -> X;
i2(N, X, Y) ->
  case i3(N) of
    {0, A} -> i2(N - 1, A, Y);
    {1, B} -> i2(N - 1, X, B)
  end.

i3(N) ->
  case N band 1 of
    0 -> {0, N};
    _ -> {1, N}
  end.

%% j: like f but drop the tags, instead tag as [X|[]] for success or {Y} for failure

j2(0, X, _Y) -> X;
j2(N, X, Y) ->
  case j3(N) of
    [A | _] -> j2(N - 1, A, Y);
    {B} -> j2(N - 1, X, B)
  end.

j3(N) ->
  case N band 1 of
    0 -> [N | []];
    _ -> {N}
  end.

%% k: like j, but only tag the success case not the failure case

k2(0, X, _Y) -> X;
k2(N, X, Y) ->
  case k3(N) of
    [A | _] -> k2(N - 1, A, Y);
    B -> k2(N - 1, X, B)
  end.

k3(N) ->
  case N band 1 of
    0 -> [N | []];
    _ -> N
  end.

%% l: like k but the other way around: tag the failure case [Y|[]],
%% check for success with is_integer/1

l2(0, X, _Y) -> X;
l2(N, X, Y) ->
  case l3(N) of
    A when is_integer(A) -> l2(N - 1, A, Y);
    [B | _] -> l2(N - 1, X, B)
  end.

l3(N) ->
  case N band 1 of
    0 -> N;
    _ -> [N | []]
  end.
==snip==