[erlang-questions] Adoption of perl/javascript-style regexp syntax
Ulf Wiger
ulf.wiger@REDACTED
Mon Jun 1 16:17:01 CEST 2009
Dmitrii Dimandt wrote:
>
> I've just come across re and I like it :)
>
> The only issue I have with it is that I have to specify regexps as
> strings. This leads to ugly-as-hell constucts like these:
>
> {ok, Re} = re:compile("(?<!\\\\)#")
>
> It actually tries to find two backslashes there... Or just one? I
> don't know :) What if Erlang could allow this:
>
> Re = /(?<!\\)#/
>
> ?
>
> Benefits:
> - Less error-prone
> - Expressions written this way can be parsed and compiled by the
> compiler (boost in performance, syntax checked at compile-time)
It's not going to boost performance, as this is just
a preprocessor issue. But having to escape the backslashes
when working with regexps is a pain.
Perhaps a better syntax would be to imitate the
LaTex \verb command. It allows you to specify the
delimiter, and then consumes all chars until it finds
that delimiter, e.g. \verb!gdl4$%\^\$£$!
Since this exact syntax doesn't work in Erlang, a
slight adjustment is in order. The scanner recognizes
backticks today, but the parser doesn't. So, if we
change the scanner to recognize ` as the Erlang version
of \verb, we can write:
1> re:split("foo\nbar",`!\n!).
[<<"foo">>,<<"bar">>]
where
2> `!\n!.
"\\n"
Diff follows. It was a quick hack, so it needs improvement.
--- /home/uwiger/src/otp/otp_src_R13B/lib/stdlib/src/erl_scan.erl 2009-04-16 05:23:36.000000000 -0400
+++ erl_scan.erl 2009-06-01 09:09:49.000000000 -0400
@@ -559,4 +559,2 @@
tok2(Cs, St, Line, Col, Toks, "^", '^', 1);
-scan1([$`|Cs], St, Line, Col, Toks) ->
- tok2(Cs, St, Line, Col, Toks, "`", '`', 1);
scan1([$~|Cs], St, Line, Col, Toks) ->
@@ -565,2 +563,4 @@
tok2(Cs, St, Line, Col, Toks, "&", '&', 1);
+scan1([$`|Cs], St, Line, Col, Toks) ->
+ scan_verb(Cs, St, Line, Col, Toks, []);
%% End of optimization.
@@ -580,2 +580,27 @@
+scan_verb([], _St, Line, Col, Toks, Acc) ->
+ {more, {[],Col,Toks,Line,Acc,fun scan_verb/6}};
+scan_verb([Delim|Cs0], St, Line, Col, Toks, Acc) when Delim =/= $\n,
+ Delim =/= $\\ ->
+ {Str, Cs, Line1, Col1} = scan_verb_chars(
+ Cs0, St, Line, Col, Toks, {Acc,Delim}),
+ tok3(Cs, St, Line1, Col1, Toks, string, Str, Str, 0).
+
+scan_verb_chars([], _St, Line, Col, Toks, {Acc, Delim}) ->
+ {more, {[], Col, Toks, Line, {Acc,Delim}, fun scan_verb_chars/6}};
+scan_verb_chars([Delim|Cs], _St, Line, Col, Toks, {Acc, Delim}) ->
+ {lists:reverse(Acc), Cs, Line, Col};
+scan_verb_chars([C|Cs], St, Line, Col, Toks, {Acc, Delim}) when C =/= Delim->
+ {Line1,Col1} = case C of
+ $\n ->
+ {Line+1, Col};
+ _ ->
+ {Line, inc_col(Col,1)}
+ end,
+ scan_verb_chars(Cs, St, Line1, Col1, Toks, {[C|Acc], Delim}).
+
+inc_col(no_col,_) -> no_col;
+inc_col(C, N) when is_integer(C) -> C+N.
+
+
scan_atom(Cs0, St, Line, Col, Toks, Ncs0) ->
--
Ulf Wiger
CTO, Erlang Training & Consulting Ltd.
http://www.erlang-consulting.com
More information about the erlang-questions
mailing list