[erlang-patches] Comprehensive xmerl_xpath patch

Matthew Dempsky <>
Sat Aug 16 06:22:40 CEST 2008


Below is a comprehensive patch including all of the changes I've made
based on pristine R12B-3 (the get_content() change in my first patch
isn't actually needed).  Also included is a small test suite covering
a handful of basic cases.

Issues fixed:

  - fixes matching against attributes when the context node set
    includes non-element nodes (from original patch)

  - fixes a compile error when using -Ddebug=true (from original
    patch)

  - fixes handling of 'following', 'following-sibling', 'preceding',
    and 'preceding-sibling' axes when the context node set includes a
    text node (from original patch)

  - fixes parse error for expressions using the 'mod' operator

  - fixes "/" to evaluate to the document node

  - fixes "[N]" short-circuit logic to work correctly when using a
    reverse axis

  - fixes the "ancestor-or-self" axis to be recognized as a reverse
    axis

  - fixes axis handling to consistently return nodes in document order

The code is also refactored a bit to reuse match_self() and
match_descendant_or_self() where possible rather than reimplementing
them for other axes.

(Assuming I can refrain from digging into the code more, hopefully
this is my last email to erlang-patches@ about this. ;-)


test() ->
    [begin
         {Doc, _} = xmerl_scan:string(XML),
         [test(XML, Doc, XPath, Exp) || {XPath, Exp} <- Tests]
     end || {XML, Tests} <- tests()],
    ok.

test(XML, Doc, XPath, Exp) ->
    Result = xmerl_xpath:string(XPath, Doc),
    Got = [Name || #xmlElement{name = Name} <- Result],
    if
        Exp =:= Got ->
            ok;
        true ->
            io:format("XML = ~s.~nXPath = ~s.~nExp = ~p.~nGot =
~p.~n~n", [XML, XPath, Exp, Got])
    end.

tests() ->
    [{"<a><b/> <c/> <d/> <e/></a>",
      [{"//b/following::*", [c, d, e]},
       {"//b/following::*[1]", [c]},
       {"//b/following::*[position()=1]", [c]},
       {"//b/following::*[3]", [e]},
       {"//b/following::*[position()=3]", [e]},

       {"//e/preceding::*", [b, c, d]},
       {"//e/preceding::*[1]", [d]},
       {"//e/preceding::*[position()=1]", [d]},
       {"//e/preceding::*[3]", [b]},
       {"//e/preceding::*[position()=3]", [b]},

       {"//b/following::*[position() mod 2=0]", [d]},
       {"//b/self::*", [b]}
      ]},

     {"<a><b/> <c><d/></c> <e/> <f><g/></f> <h/> <i><j/></i> <k/></a>",
      [{"//g/preceding::*", [b, c, d, e]},
       {"//g/following::*", [h, i, j, k]},
       {"//g/ancestor::*", [a, f]},
       {"//g/ancestor::*[1]", [f]},
       {"//g/ancestor::*[2]", [a]},
       {"//g/ancestor-or-self::*", [a, f, g]},
       {"//g/ancestor-or-self::*[1]", [g]},
       {"//g/ancestor-or-self::*[2]", [f]},
       {"//g/ancestor-or-self::*[3]", [a]},
       {"/descendant::*", [a, b, c, d, e, f, g, h, i, j, k]},
       {"//f/preceding-sibling::*", [b, c, e]},
       {"//f/following-sibling::*", [h, i, k]},

       {"//f/self::*", [f]},
       {"//f/ancestor::*", [a]},
       {"//f/descendant::*", [g]},
       {"//f/preceding::*", [b, c, d, e]},
       {"//f/following::*", [h, i, j, k]},

       {"//text()[1]/following-sibling::*", [c, e, f, h, i, k]}
      ]}
    ].



--- xmerl_xpath_parse.yrl~	2006-05-04 10:38:38.000000000 -0500
+++ xmerl_xpath_parse.yrl	2008-08-15 14:05:40.000000000 -0500
@@ -259,10 +259,10 @@
 'MultiplicativeExpr' -> 'UnaryExpr' : '$1' .
 'MultiplicativeExpr' -> 'MultiplicativeExpr' 'MultiplyOperator' 'UnaryExpr'
 	: {arith, '$2', '$1', '$3'} .
-'MultiplicativeExpr' -> 'MultiplicativeExpr' 'div' 'UnaryExpr'
+'MultiplicativeExpr' -> 'MultiplicativeExpr' 'div' 'UnaryExpr'
 	: {arith, 'div', '$1', '$3'} .
-'MultiplicativeExpr' -> 'MultiplicativeExpr' 'mod' 'UnaryExpr' :
-	{arith, 'mod', '$1', '$2'} .
+'MultiplicativeExpr' -> 'MultiplicativeExpr' 'mod' 'UnaryExpr'
+	: {arith, 'mod', '$1', '$3'} .


 %% [27]
--- xmerl_xpath.erl~	2008-08-15 22:36:56.000000000 -0500
+++ xmerl_xpath.erl	2008-08-15 22:59:37.000000000 -0500
@@ -148,7 +148,7 @@
     Context=(new_context(Options))#xmlContext{context_node = ContextNode,
 					      whole_document = WholeDoc},
 %io:format("string Context=~p~n",[Context]),
-    #state{context =  NewContext} = match(Str, #state{context = Context}),
+    #state{context = NewContext} = match(Str, #state{context = Context}),
 %io:format("string NewContext=~p~n",[NewContext]),
     [N || #xmlNode{node = N} <- NewContext#xmlContext.nodeset].

@@ -236,7 +236,9 @@
 							 acc = Acc}) ->
     ?dbg("PredExpr = ~p~n", [PredExpr]),
     NewContext = axis(Axis, NodeTest, C, Acc),
-    pred_expr(PredExpr, S#state{context = NewContext}).
+    pred_expr(PredExpr, S#state{context = NewContext});
+path_expr('/', S) ->
+    S.



@@ -250,9 +252,16 @@
 %% simple case: the predicate is a number, e.g. para[5].
 %% No need to iterate over all nodes in the nodeset; we know what to do.
 %%
-eval_pred({number, N}, S = #state{context = C = #xmlContext{nodeset = NS}}) ->
-    case length(NS)>=N of
+eval_pred({number, N0}, S = #state{context = C = #xmlContext{nodeset
= NS, axis_type = AxisType}}) ->
+    Len = length(NS),
+    case Len>=N0 of
 	true ->
+	    N = case AxisType of
+		    forward ->
+			N0;
+		    reverse ->
+			Len + 1 - N0
+		end,
 	    NewNodeSet = [lists:nth(N, NS)],
 	    NewContext = C#xmlContext{nodeset = NewNodeSet},
 	    S#state{context = NewContext};
@@ -365,6 +374,8 @@

 fwd_or_reverse(ancestor, Context) ->
     reverse_axis(Context);
+fwd_or_reverse(ancestor_or_self, Context) ->
+    reverse_axis(Context);
 fwd_or_reverse(preceding_sibling, Context) ->
     reverse_axis(Context);
 fwd_or_reverse(preceding, Context) ->
@@ -378,11 +389,9 @@
     Context#xmlContext{axis_type = forward}.


-
 match_self(Tok, N, Acc, Context) ->
     case node_test(Tok, N, Context) of
 	true ->
-	    %io:format("node_test -> true.~n", []),
 	    [N|Acc];
 	false ->
 	    Acc
@@ -393,7 +402,6 @@
     #xmlNode{parents = Ps, node = Node, type = Type} = N,
     case Type of
 	El when El == element; El == root_node ->
-%	element ->
 	    NewPs = [N|Ps],
 	    match_desc(get_content(Node), NewPs, Tok, Acc, Context);
 	_Other ->
@@ -404,44 +412,29 @@
 %    match_desc(Content, Parents, Tok, [], Context).

 match_desc([E = #xmlElement{}|T], Parents, Tok, Acc, Context) ->
+    Acc1 = match_desc(T, Parents, Tok, Acc, Context),
     N = #xmlNode{type = node_type(E),
 		 node = E,
 		 parents = Parents},
     NewParents = [N|Parents],
-    Acc1 = case node_test(Tok, N, Context) of
-	       true ->
-		   [N|Acc];
-	       false ->
-		   Acc
-	   end,
     Acc2 = match_desc(get_content(E), NewParents, Tok, Acc1, Context),
-    match_desc(T, Parents, Tok, Acc2, Context);
+    match_self(Tok, N, Acc2, Context);
 match_desc([E|T], Parents, Tok, Acc, Context) ->
+    Acc1 = match_desc(T, Parents, Tok, Acc, Context),
     N = #xmlNode{node = E,
 		 type = node_type(E),
 		 parents = Parents},
-    Acc1 = case node_test(Tok, N, Context) of
-	       true ->
-		   [N|Acc];
-	       false ->
-		   Acc
-	   end,
-    match_desc(T, Parents, Tok, Acc1, Context);
+    match_self(Tok, N, Acc1, Context);
 match_desc([], _Parents, _Tok, Acc, _Context) ->
     Acc.
-			
+


 %% "The 'descendant-or-self' axis contains the context node and the
 %% descendants of the context node."
 match_descendant_or_self(Tok, N, Acc, Context) ->
-    Acc1 = case node_test(Tok, N, Context) of
-	       true ->
-		   [N|Acc];
-	       false ->
-		   Acc
-	   end,
-    match_descendant(Tok, N, Acc1, Context).
+    Acc1 = match_descendant(Tok, N, Acc, Context),
+    match_self(Tok, N, Acc1, Context).


 match_child(Tok, N, Acc, Context) ->
@@ -455,12 +448,7 @@
 		      ThisN = #xmlNode{type = node_type(E),
 				       node = E,
 				       parents = NewPs},
-		      case node_test(Tok, ThisN, Context) of
-			  true ->
-			      [ThisN|AccX];
-			  false ->
-			      AccX
-		      end
+		      match_self(Tok, ThisN, AccX, Context)
 	      end, Acc, get_content(Node));
 	_Other ->
 	    Acc
@@ -474,12 +462,7 @@
 	[] ->
 	    Acc;
 	[PN|_] ->
-	    case node_test(Tok, PN, Context) of
-		true ->
-		    [PN|Acc];
-		false ->
-		    Acc
-	    end
+	    match_self(Tok, PN, Acc, Context)
     end.


@@ -489,14 +472,9 @@
 %% always include the root node, unless the context node is the root node."
 match_ancestor(Tok, N, Acc, Context) ->
     Parents = N#xmlNode.parents,
-    lists:foldr(
+    lists:foldl(
       fun(PN, AccX) ->
-	      case node_test(Tok, PN, Context) of
-		  true ->
-		      [PN|AccX];
-		  false ->
-		      AccX
-	      end
+	      match_self(Tok, PN, AccX, Context)
       end, Acc, Parents).


@@ -506,12 +484,7 @@
 %% of the context node; thus, the acestor axis will always include the
 %% root node."
 match_ancestor_or_self(Tok, N, Acc, Context) ->
-    Acc1 = case node_test(Tok, N, Context) of
-	       true ->
-		   [N|Acc];
-	       false ->
-		   Acc
-	   end,
+    Acc1 = match_self(Tok, N, Acc, Context),
     match_ancestor(Tok, N, Acc1, Context).


@@ -525,19 +498,14 @@
     case Ps of
 	[#xmlNode{type = element,
 		  node = #xmlElement{} = PNode}|_] ->
-	    FollowingSiblings = lists:nthtail(Node#xmlElement.pos,
+	    FollowingSiblings = lists:nthtail(get_position(Node),
 					      get_content(PNode)),
 	    lists:foldr(
 	      fun(E, AccX) ->
 		      ThisN = #xmlNode{type = node_type(E),
 				       node = E,
 				       parents = Ps},
-		      case node_test(Tok, ThisN, Context) of
-			  true ->
-			      [ThisN|AccX];
-			  false ->
-			      AccX
-		      end
+		      match_self(Tok, ThisN, AccX, Context)
 	      end, Acc, FollowingSiblings);
 	_Other ->
 	    Acc
@@ -547,28 +515,21 @@
 %% "The 'following' axis contains all nodes in the same document as the
 %% context node that are after the context node in document order, excluding
 %% any descendants and excluding attribute nodes and namespace nodes."
-%% (UW: I interpret this as "following siblings and their descendants")
 match_following(Tok, N, Acc, Context) ->
     #xmlNode{parents = Ps, node = Node} = N,
     case Ps of
 	[#xmlNode{type = element,
-		  node = #xmlElement{} = PNode}|_] ->
-	    FollowingSiblings = lists:nthtail(Node#xmlElement.pos,
+		  node = #xmlElement{} = PNode} = P|_] ->
+	    Acc0 = match_following(Tok, P, Acc, Context),
+	    FollowingSiblings = lists:nthtail(get_position(Node),
 					      get_content(PNode)),
 	    lists:foldr(
 	      fun(E, AccX) ->
 		      ThisN = #xmlNode{type = node_type(E),
 				       node = E,
 				       parents = Ps},
-		      Acc1 =
-			  case node_test(Tok, ThisN, Context) of
-			      true ->
-				  [ThisN|AccX];
-			      false ->
-				  AccX
-			  end,
-		      match_desc(get_content(E), Tok, Ps, Acc1, Context)
-	      end, Acc, FollowingSiblings);
+		      match_descendant_or_self(Tok, ThisN, AccX, Context)
+	      end, Acc0, FollowingSiblings);
 	_Other ->
 	    Acc
     end.
@@ -588,51 +549,39 @@
 	[#xmlNode{type = element,
 		  node = #xmlElement{} = PNode}|_] ->
 	    PrecedingSiblings = lists:sublist(get_content(PNode), 1,
-					      Node#xmlElement.pos-1),
+					      get_position(Node) - 1),
 	    lists:foldr(
 	      fun(E, AccX) ->
 		      ThisN = #xmlNode{type = node_type(E),
 				       node = E,
 				       parents = Ps},
-		      case node_test(Tok, ThisN, Context) of
-			  true ->
-			      [ThisN|AccX];
-			  false ->
-			      AccX
-		      end
+		      match_self(Tok, ThisN, AccX, Context)
 	      end, Acc, PrecedingSiblings);
 	_Other ->
-	    []
+	    Acc
     end.


 %% "The 'preceding' axis contains all nodes in the same document as the context
 %% node that are before the context node in document order, exluding any
 %% ancestors and excluding attribute nodes and namespace nodes."
-%% (UW: I interpret this as "preceding siblings and their descendants".)
 match_preceding(Tok, N, Acc, Context) ->
     #xmlNode{parents = Ps, node = Node} = N,
     case Ps of
 	[#xmlNode{type = element,
-		  node = #xmlElement{} = PNode}|_] ->
+		  node = #xmlElement{} = PNode} = P|_] ->
 	    PrecedingSiblings = lists:sublist(get_content(PNode), 1,
-					      Node#xmlElement.pos-1),
-	    lists:foldr(
-	      fun(E, AccX) ->
-		      ThisN = #xmlNode{type = node_type(E),
-				       node = E,
-				       parents = Ps},
-		      Acc1 =
-			  case node_test(Tok, ThisN, Context) of
-			      true ->
-				  [ThisN|AccX];
-				  false ->
-				      AccX
-			  end,
-		      match_desc(get_content(E), Tok, Ps, Acc1, Context)
-	      end, Acc, PrecedingSiblings);
+					      get_position(Node) - 1),
+	    Acc0 = lists:foldr(
+		     fun(E, AccX) ->
+		     ThisN = #xmlNode{type = node_type(E),
+				      node = E,
+				      parents = Ps},
+		     match_descendant_or_self(Tok, ThisN, AccX, Context)
+		     end, Acc, PrecedingSiblings),
+	    match_preceding(Tok, P, Acc0, Context);
 	_Other ->
-	    []
+	    Acc
     end.


@@ -642,20 +591,15 @@
     case N#xmlNode.type of
 	element ->
 	    #xmlNode{parents = Ps, node = E} = N,
-	    lists:foldl(
+	    lists:foldr(
 	      fun(A, AccX) ->
 		      ThisN = #xmlNode{type = attribute,
 				       node = A,
 				       parents = [N|Ps]},
-		      case node_test(Tok, ThisN, Context) of
-			  true ->
-			      [ThisN|AccX];
-			  false ->
-			      AccX
-		      end
+		      match_self(Tok, ThisN, AccX, Context)
 	      end, Acc, E#xmlElement.attributes);
 	_Other ->
-	    []
+	    Acc
     end.

 node_type(#xmlAttribute{}) ->	attribute;
@@ -672,27 +616,20 @@
 %    erlang:fault(not_yet_implemented).


-update_nodeset(Context = #xmlContext{axis_type = reverse}, NodeSet) ->
-    Context#xmlContext{nodeset = reverse(NodeSet)};
-update_nodeset(Context, NodeSet) ->
-    Context#xmlContext{nodeset = forward(NodeSet)}.
-
-reverse(NodeSet) ->
-    reverse(NodeSet, 1, []).
-
-reverse([H|T], Pos, Acc) ->
-    reverse(T, Pos+1, [H#xmlNode{pos = Pos}|Acc]);
-reverse([], _Pos, Acc) ->
-    Acc.
-
-forward(NodeSet) ->
-    forward(NodeSet, 1).
-
-forward([H|T], Pos) ->
-    [H#xmlNode{pos = Pos}|forward(T, Pos+1)];
-forward([], _Pos) ->
-    [].

+update_nodeset(Context = #xmlContext{axis_type = AxisType}, NodeSet) ->
+    MapFold =
+	case AxisType of
+	    forward ->
+		mapfoldl;
+	    reverse ->
+		mapfoldr
+	end,
+    {Result, _N} =
+	lists:MapFold(fun(Node, N) ->
+		      {Node#xmlNode{pos = N}, N + 1}
+		      end, 1, NodeSet),
+    Context#xmlContext{nodeset = Result}.


 node_test(F, N, Context) when function(F) ->
@@ -736,12 +673,12 @@
     case expanded_name(Prefix, Local, Context) of
 	[] ->
 	    ?dbg("node_test(~p, ~p) -> ~p.~n",
-		 [{Tag, Prefix, Local}, write_node(Name), false]),
+		 [{_Tag, Prefix, Local}, write_node(Name), false]),
 	    false;
 	ExpName ->
 	    Res = (ExpName == {NS#xmlNamespace.default,Name}),
 	    ?dbg("node_test(~p, ~p) -> ~p.~n",
-		 [{Tag, Prefix, Local}, write_node(Name), Res]),
+		 [{_Tag, Prefix, Local}, write_node(Name), Res]),
 	    Res
     end;
 node_test({name, {Tag,_Prefix,_Local}},
@@ -812,3 +749,8 @@
     C;
 get_content(#xmlDocument{content = C}) ->
     [C].
+
+get_position(#xmlElement{pos = N}) ->
+    N;
+get_position(#xmlText{pos = N}) ->
+    N.



More information about the erlang-patches mailing list