[erlang-questions] Adoption of perl/javascript-style regexp syntax

Ulf Wiger <>
Mon Jun 1 16:17:01 CEST 2009


Dmitrii Dimandt wrote:
>
> I've just come across re and I like it :)
> 
> The only issue I have with it is that I have to specify regexps as  
> strings. This leads to ugly-as-hell constucts like these:
> 
> {ok, Re} = re:compile("(?<!\\\\)#")
> 
> It actually tries to find two backslashes there... Or just one? I  
> don't know :) What if Erlang could allow this:
> 
> Re = /(?<!\\)#/
> 
> ?
> 
> Benefits:
> - Less error-prone
> - Expressions written this way can be parsed and compiled by the  
> compiler (boost in performance, syntax checked at compile-time)


It's not going to boost performance, as this is just 
a preprocessor issue. But having to escape the backslashes
when working with regexps is a pain.

Perhaps a better syntax would be to imitate the 
LaTex \verb command. It allows you to specify the 
delimiter, and then consumes all chars until it finds
that delimiter, e.g. \verb!gdl4$%\^\$£$!

Since this exact syntax doesn't work in Erlang, a
slight adjustment is in order. The scanner recognizes
backticks today, but the parser doesn't. So, if we 
change the scanner to recognize ` as the Erlang version
of \verb, we can write:


1> re:split("foo\nbar",`!\n!).
[<<"foo">>,<<"bar">>]

where

2> `!\n!.
"\\n"


Diff follows. It was a quick hack, so it needs improvement.

--- /home/uwiger/src/otp/otp_src_R13B/lib/stdlib/src/erl_scan.erl       2009-04-16 05:23:36.000000000 -0400
+++ erl_scan.erl        2009-06-01 09:09:49.000000000 -0400
@@ -559,4 +559,2 @@
     tok2(Cs, St, Line, Col, Toks, "^", '^', 1);
-scan1([$`|Cs], St, Line, Col, Toks) ->
-    tok2(Cs, St, Line, Col, Toks, "`", '`', 1);
 scan1([$~|Cs], St, Line, Col, Toks) ->
@@ -565,2 +563,4 @@
     tok2(Cs, St, Line, Col, Toks, "&", '&', 1);
+scan1([$`|Cs], St, Line, Col, Toks) ->
+    scan_verb(Cs, St, Line, Col, Toks, []);
 %% End of optimization.
@@ -580,2 +580,27 @@
 
+scan_verb([], _St, Line, Col, Toks, Acc) ->
+    {more, {[],Col,Toks,Line,Acc,fun scan_verb/6}};
+scan_verb([Delim|Cs0], St, Line, Col, Toks, Acc) when Delim =/= $\n,
+                                                      Delim =/= $\\ ->
+    {Str, Cs, Line1, Col1} =  scan_verb_chars(
+                                Cs0, St, Line, Col, Toks, {Acc,Delim}),
+    tok3(Cs, St, Line1, Col1, Toks, string, Str, Str, 0).
+
+scan_verb_chars([], _St, Line, Col, Toks, {Acc, Delim}) ->
+    {more, {[], Col, Toks, Line, {Acc,Delim}, fun scan_verb_chars/6}};
+scan_verb_chars([Delim|Cs], _St, Line, Col, Toks, {Acc, Delim}) ->
+    {lists:reverse(Acc), Cs, Line, Col};
+scan_verb_chars([C|Cs], St, Line, Col, Toks, {Acc, Delim}) when C =/= Delim->
+    {Line1,Col1} = case C of
+                       $\n ->
+                           {Line+1, Col};
+                       _ ->
+                           {Line, inc_col(Col,1)}
+                   end,
+    scan_verb_chars(Cs, St, Line1, Col1, Toks, {[C|Acc], Delim}).
+
+inc_col(no_col,_) -> no_col;
+inc_col(C, N) when is_integer(C) -> C+N.
+    
+
 scan_atom(Cs0, St, Line, Col, Toks, Ncs0) ->


-- 
Ulf Wiger
CTO, Erlang Training & Consulting Ltd.
http://www.erlang-consulting.com


More information about the erlang-questions mailing list