[erlang-questions] regexp sux!

Luke Gorrie luke.gorrie@REDACTED
Fri Jun 1 00:37:08 CEST 2007


Darius Bacon <darius@REDACTED> writes:

> There's no attempt to conform to any standard, and the regex parser
> ought to be replaced with a real one that understands char ranges,
> inverted char classes, escaping, etc.

I was too lazy to add this so instead I wrote a linter for the result
of regexp:parse that tells you if the regexp can loop forever.

I hope it's correct :-)

%%
%% Regexp linter: reject regexps that can put the matcher into an
%% infinite loop, i.e. anything that can consider infinitely many
%% matches of the empty string.
%%

lint_regexp(R) ->
    case catch match_empty(R) of
        false           -> ok;
        true            -> ok;
        {error, Reason} -> {error, Reason}
    end.

%% Return true if R can match an empty string.
%% Throw an error if R could put the matcher into an infinite loop.
match_empty(R) ->
    case R of
        {kclosure,X}  -> check_nonempty(X), true;
        {pclosure,X}  -> check_nonempty(X), false;
        {optional, _} -> true;
        {concat,A,B}  -> match_empty(A) and match_empty(B);
        {'or',A,B}    -> match_empty(A) or match_empty(B);
        empty         -> true;
        _             -> false
    end.

check_nonempty(R) ->
    case match_empty(R) of
        true  -> throw({error,nonterminating});
        false -> ok
    end.




More information about the erlang-questions mailing list