[erlang-questions] regexp sux!
Luke Gorrie
luke.gorrie@REDACTED
Sat Jun 2 10:05:36 CEST 2007
Darius Bacon <darius@REDACTED> writes:
> There's no attempt to conform to any standard, and the regex parser
> ought to be replaced with a real one that understands char ranges,
> inverted char classes, escaping, etc.
I was too lazy to add this so instead I wrote a linter for the result
of regexp:parse that tells you if the regexp can loop forever.
I hope it's correct :-)
%%
%% Regexp linter: reject regexps that can put the matcher into an
%% infinite loop, i.e. anything that can consider infinitely many
%% matches of the empty string.
%%
lint_regexp(R) ->
case catch match_empty(R) of
false -> ok;
true -> ok;
{error, Reason} -> {error, Reason}
end.
%% Return true if R can match an empty string.
%% Throw an error if R could put the matcher into an infinite loop.
match_empty(R) ->
case R of
{kclosure,X} -> check_nonempty(X), true;
{pclosure,X} -> check_nonempty(X), false;
{optional, _} -> true;
{concat,A,B} -> match_empty(A) and match_empty(B);
{'or',A,B} -> match_empty(A) or match_empty(B);
empty -> true;
_ -> false
end.
check_nonempty(R) ->
case match_empty(R) of
true -> throw({error,nonterminating});
false -> ok
end.
More information about the erlang-questions
mailing list