[erlang-patches] More xpath issues: following and preceding axes not correct

Matthew Dempsky matthew@REDACTED
Fri Aug 15 20:57:44 CEST 2008


The comments in xmerl_xpath explain that the 'following' and
'preceding' axes are interpreted as "following (resp. preceding)
siblings and their descendants", but the XPath spec implies that they
should include *all* following and preceding nodes when it states that

    The ancestor, descendant, following, preceding and self axes
    partition a document (ignoring attribute and namespace nodes):
    they do not overlap and together they contain all the nodes in the
    document.

E.g., against the XML document below, "//g/preceding::*" should
include elements b, c, d, and e; and "//g/following::*" should include
h, i, j, and k (both in order):

   <a>
    <b/>
    <c><d/></c>
    <e/>
    <f><g/></f>
    <h/>
    <i><j/></i>
    <k/>
   </a>

The patch below corrects this.  However, I'm afraid of silly white
space conflicts (particularly with my last patch), so I just want to
make sure to point out that the calls to match_desc() in
match_following() and match_preceding() had the argument ordering for
Tok and Ps swapped.


--- xmerl_xpath.erl	2008-08-15 13:37:13.000000000 -0500
+++ xmerl_xpath.erl.patched	2008-08-15 13:43:40.000000000 -0500
@@ -547,12 +547,12 @@
 %% "The 'following' axis contains all nodes in the same document as the
 %% context node that are after the context node in document order, excluding
 %% any descendants and excluding attribute nodes and namespace nodes."
-%% (UW: I interpret this as "following siblings and their descendants")
 match_following(Tok, N, Acc, Context) ->
     #xmlNode{parents = Ps, node = Node} = N,
     case Ps of
 	[#xmlNode{type = element,
-		  node = #xmlElement{} = PNode}|_] ->
+		  node = #xmlElement{} = PNode} = P|_] ->
+	    Acc0 = match_following(Tok, P, Acc, Context),
 	    FollowingSiblings = lists:nthtail(get_position(Node),
 					      get_content(PNode)),
 	    lists:foldr(
@@ -560,15 +560,14 @@
 		      ThisN = #xmlNode{type = node_type(E),
 				       node = E,
 				       parents = Ps},
-		      Acc1 =
-			  case node_test(Tok, ThisN, Context) of
-			      true ->
-				  [ThisN|AccX];
-			      false ->
-				  AccX
-			  end,
-		      match_desc(get_content(E), Tok, Ps, Acc1, Context)
-	      end, Acc, FollowingSiblings);
+		      Acc1 = match_desc(get_content(E), Ps, Tok, AccX, Context),
+                      case node_test(Tok, ThisN, Context) of
+                          true ->
+                              [ThisN|Acc1];
+                          false ->
+                              Acc1
+                      end
+	      end, Acc0, FollowingSiblings);
 	_Other ->
 	    Acc
     end.
@@ -609,30 +608,29 @@
 %% "The 'preceding' axis contains all nodes in the same document as the context
 %% node that are before the context node in document order, exluding any
 %% ancestors and excluding attribute nodes and namespace nodes."
-%% (UW: I interpret this as "preceding siblings and their descendants".)
 match_preceding(Tok, N, Acc, Context) ->
     #xmlNode{parents = Ps, node = Node} = N,
     case Ps of
 	[#xmlNode{type = element,
-		  node = #xmlElement{} = PNode}|_] ->
+		  node = #xmlElement{} = PNode} = P|_] ->
+	    Acc0 = match_preceding(Tok, P, Acc, Context),
 	    PrecedingSiblings = lists:sublist(get_content(PNode), 1,
 					      get_position(Node)-1),
-	    lists:foldr(
+	    lists:foldl(
 	      fun(E, AccX) ->
 		      ThisN = #xmlNode{type = node_type(E),
 				       node = E,
 				       parents = Ps},
-		      Acc1 =
-			  case node_test(Tok, ThisN, Context) of
-			      true ->
-				  [ThisN|AccX];
-				  false ->
-				      AccX
-			  end,
-		      match_desc(get_content(E), Tok, Ps, Acc1, Context)
-	      end, Acc, PrecedingSiblings);
+                      Acc1 = case node_test(Tok, ThisN, Context) of
+                                 true ->
+                                     [ThisN|AccX];
+                                 false ->
+                                     AccX
+                             end,
+                      match_desc(get_content(E), Ps, Tok, Acc1, Context)
+	      end, Acc0, PrecedingSiblings);
 	_Other ->
-	    []
+	    Acc
     end.



More information about the erlang-patches mailing list