[erlang-questions] Illegal Guard?

Tue Feb 9 02:18:53 CET 2016

On 6/02/16 4:25 am, Trond Endrestøl wrote:
> Is there a way of convincing the Erlang compiler that a certain
> function is free of any side effects?
I did some work on this a *long* time ago (back in the 90s) but then the
language changed (funs were added) making my analysis useless.  More
recently, see

http://user.it.uu.se/~kostis/Papers/purity.pdf

The thing is that "purity" isn't quite strong enough.
We want *boundedness* as well: bounded time and
bounded stack space.  It is a real pity that Erlang
allows length(_) in guards because usually what you
really want is something *bounded*: has exactly N
elements, has at least N elements, has at most N
elements.  But at least length(L) is *bounded*.

Note also, as the paper linked above certainly does,
that hot-loading means that you can never be certain
that a function from another module will be pure when
you call it.
>
> Below is what I believe to be a valid case.
>
> While watching prof. Simon Thompson's Erlang Master Classes last year,
> he demonstrated in Master Class 1, Video 6, a parser where parts of
> the Erlang code was duplicated too much for my own taste.
>
> Ref.: http://www.kent.ac.uk/elearning/themes/masterclasses.html
>
> % Master Class 1, Video 6:
>
> % I really wish we could convince the compiler to believe these three
> % functions are without any side effects, and thus suitable as guard
> % tests.
>
> % Using macros is one way of managing the duplicate code.
>
> -spec is_alpha(integer()) -> boolean().
>
> -define(IS_ALPHA(Ch), $a =< Ch andalso Ch =< $z).
>
> is_alpha(Ch) ->
>      ?IS_ALPHA(Ch).
>
> -spec is_num(integer()) -> boolean().
>
> -define(IS_NUM(Ch), $0 =< Ch andalso Ch =< $9).
>
> is_num(Ch) ->
>      ?IS_NUM(Ch).
>
> -spec is_white(integer()) -> boolean().
>
> -define(HT,  9).
> -define(LF, 10).
> -define(VT, 11).
> -define(FF, 12).
> -define(CR, 13).
> -define(SP, 32).
>
> -define(IS_WHITE(Ch),
>      (Ch == ?SP) orelse (Ch == ?HT) orelse (Ch == ?LF) orelse
>      (Ch == ?CR) orelse (Ch == ?FF) orelse (Ch == ?VT)).
>
> is_white(Ch) ->
>      ?IS_WHITE(Ch).
>
> %% Lines omitted
>
> -spec parse(string()) -> {expr(), string()} | string().
>
> parse([$( | Rest]) ->
>      {E1, Rest1}      = parse(Rest),
>      [Op | Rest2]     = parse(Rest1),
>      {E2, Rest3}      = parse(Rest2),
>      [$) | RestFinal] = parse(Rest3),
>      {case Op of
>           $+ ->
>               {'add', E1, E2};
>           $- ->
>               {'sub', E1, E2};
>           $* ->
>               {'mul', E1, E2};
>           $/ ->
>               {'div', E1, E2}
>       end,
>      RestFinal};
> parse([Ch | Rest]) when (Ch == $)) orelse (Ch == $+) orelse
>                          (Ch == $-) orelse (Ch == $*) orelse
>                          (Ch == $/) ->
>      [Ch | Rest];
> parse([Ch | Rest]) when ?IS_ALPHA(Ch) ->
>      {Succeeds, Remainder} = get_while(fun is_alpha/1, Rest),
>      {{var, list_to_atom([Ch | Succeeds])}, Remainder};
> parse([Ch | Rest]) when ?IS_NUM(Ch) ->
>      {Succeeds, Remainder} = get_while(fun is_num/1, Rest),
>      {{num, list_to_integer([Ch | Succeeds])}, Remainder};
> parse([Ch | Rest]) when ?IS_WHITE(Ch) ->
>      {_Succeeds, Remainder} = get_while(fun is_white/1, Rest),
>      parse(Remainder).

Really, user-defined guards would not make this good.
Here's something better.

-spec classify(integer()) -> letter | digit | space | open | error.
%  classify(C) says what C can be at the beginning of an expression.

classify(C) when C >= $a, C =< $z -> letter;
classify(C) when C >= $A, C =< $A -> letter;
classify(C) when C >= $0, C =< $9 -> digit;
classify(C) when C == $\t; C == $\n; C == $\r;
   C == $\f; C == $\v; C == $\s    -> space;
classify($() -> open;
classify(_) -> error.

parse([C|Cs]) ->
     case classify(C)
       of space ->
              parse(Cs)
        ; letter ->
              take(letter, Cs, [C], fun (Ts) -> {var,list_to_atom(Ts)} end)
        ; digit ->
              take(digit,  Cs, [C], fun (Ts) -> 
{num,list_to_integer(Ts)} end)
        ; open ->
              {E1,[Op|Cs1]} = parse(Cs),
              {E2,[$)|Cs2]} = parse(Cs1),
              T = case Op
                    of $+ -> {add,E1,E2}
                     ; $- -> {sub,E1,E2}
                     ; $* -> {mul,E1,E2}
                     ; $/ -> {dvd,E1,E2}
                  end,
              {T,Cs2}
     end.

take(Class, [C|Cs], Ts, Fun) ->
     case classify(C)
       of Class -> take(Class, Cs, [C|Ts], Fun)
        ; _     -> {Fun(lists:reverse(Ts)), [C|Cs]}
     end;
take(_, [], Ts, Fun) ->
     {Fun(lists:reverse(Ts)), []}.

I've written more tokenisers in Prolog and Erlang than I care to remember,
and trust me on this, you do NOT want character literals all over the
place.  Amongst other things, the code above is ASCII dependent, but
the ASCII dependence is now in *one* place, the classify/1 function.
(I'm assuming a fixed set of binary operators here.)