[erlang-patches] Comprehensive xmerl_xpath patch

Bertil Karlsson <>
Fri Aug 22 12:49:35 CEST 2008


Thanks, I have added those corrections.

/Bertil

Matthew Dempsky wrote:
> Below is a comprehensive patch including all of the changes I've made
> based on pristine R12B-3 (the get_content() change in my first patch
> isn't actually needed).  Also included is a small test suite covering
> a handful of basic cases.
>
> Issues fixed:
>
>   - fixes matching against attributes when the context node set
>     includes non-element nodes (from original patch)
>
>   - fixes a compile error when using -Ddebug=true (from original
>     patch)
>
>   - fixes handling of 'following', 'following-sibling', 'preceding',
>     and 'preceding-sibling' axes when the context node set includes a
>     text node (from original patch)
>
>   - fixes parse error for expressions using the 'mod' operator
>
>   - fixes "/" to evaluate to the document node
>
>   - fixes "[N]" short-circuit logic to work correctly when using a
>     reverse axis
>
>   - fixes the "ancestor-or-self" axis to be recognized as a reverse
>     axis
>
>   - fixes axis handling to consistently return nodes in document order
>
> The code is also refactored a bit to reuse match_self() and
> match_descendant_or_self() where possible rather than reimplementing
> them for other axes.
>
> (Assuming I can refrain from digging into the code more, hopefully
> this is my last email to erlang-patches@ about this. ;-)
>
>
> test() ->
>     [begin
>          {Doc, _} = xmerl_scan:string(XML),
>          [test(XML, Doc, XPath, Exp) || {XPath, Exp} <- Tests]
>      end || {XML, Tests} <- tests()],
>     ok.
>
> test(XML, Doc, XPath, Exp) ->
>     Result = xmerl_xpath:string(XPath, Doc),
>     Got = [Name || #xmlElement{name = Name} <- Result],
>     if
>         Exp =:= Got ->
>             ok;
>         true ->
>             io:format("XML = ~s.~nXPath = ~s.~nExp = ~p.~nGot =
> ~p.~n~n", [XML, XPath, Exp, Got])
>     end.
>
> tests() ->
>     [{"<a><b/> <c/> <d/> <e/></a>",
>       [{"//b/following::*", [c, d, e]},
>        {"//b/following::*[1]", [c]},
>        {"//b/following::*[position()=1]", [c]},
>        {"//b/following::*[3]", [e]},
>        {"//b/following::*[position()=3]", [e]},
>
>        {"//e/preceding::*", [b, c, d]},
>        {"//e/preceding::*[1]", [d]},
>        {"//e/preceding::*[position()=1]", [d]},
>        {"//e/preceding::*[3]", [b]},
>        {"//e/preceding::*[position()=3]", [b]},
>
>        {"//b/following::*[position() mod 2=0]", [d]},
>        {"//b/self::*", [b]}
>       ]},
>
>      {"<a><b/> <c><d/></c> <e/> <f><g/></f> <h/> <i><j/></i> <k/></a>",
>       [{"//g/preceding::*", [b, c, d, e]},
>        {"//g/following::*", [h, i, j, k]},
>        {"//g/ancestor::*", [a, f]},
>        {"//g/ancestor::*[1]", [f]},
>        {"//g/ancestor::*[2]", [a]},
>        {"//g/ancestor-or-self::*", [a, f, g]},
>        {"//g/ancestor-or-self::*[1]", [g]},
>        {"//g/ancestor-or-self::*[2]", [f]},
>        {"//g/ancestor-or-self::*[3]", [a]},
>        {"/descendant::*", [a, b, c, d, e, f, g, h, i, j, k]},
>        {"//f/preceding-sibling::*", [b, c, e]},
>        {"//f/following-sibling::*", [h, i, k]},
>
>        {"//f/self::*", [f]},
>        {"//f/ancestor::*", [a]},
>        {"//f/descendant::*", [g]},
>        {"//f/preceding::*", [b, c, d, e]},
>        {"//f/following::*", [h, i, j, k]},
>
>        {"//text()[1]/following-sibling::*", [c, e, f, h, i, k]}
>       ]}
>     ].
>
>
>
> --- xmerl_xpath_parse.yrl~	2006-05-04 10:38:38.000000000 -0500
> +++ xmerl_xpath_parse.yrl	2008-08-15 14:05:40.000000000 -0500
> @@ -259,10 +259,10 @@
>  'MultiplicativeExpr' -> 'UnaryExpr' : '$1' .
>  'MultiplicativeExpr' -> 'MultiplicativeExpr' 'MultiplyOperator' 'UnaryExpr'
>  	: {arith, '$2', '$1', '$3'} .
> -'MultiplicativeExpr' -> 'MultiplicativeExpr' 'div' 'UnaryExpr'
> +'MultiplicativeExpr' -> 'MultiplicativeExpr' 'div' 'UnaryExpr'
>  	: {arith, 'div', '$1', '$3'} .
> -'MultiplicativeExpr' -> 'MultiplicativeExpr' 'mod' 'UnaryExpr' :
> -	{arith, 'mod', '$1', '$2'} .
> +'MultiplicativeExpr' -> 'MultiplicativeExpr' 'mod' 'UnaryExpr'
> +	: {arith, 'mod', '$1', '$3'} .
>
>
>  %% [27]
> --- xmerl_xpath.erl~	2008-08-15 22:36:56.000000000 -0500
> +++ xmerl_xpath.erl	2008-08-15 22:59:37.000000000 -0500
> @@ -148,7 +148,7 @@
>      Context=(new_context(Options))#xmlContext{context_node = ContextNode,
>  					      whole_document = WholeDoc},
>  %io:format("string Context=~p~n",[Context]),
> -    #state{context =  NewContext} = match(Str, #state{context = Context}),
> +    #state{context = NewContext} = match(Str, #state{context = Context}),
>  %io:format("string NewContext=~p~n",[NewContext]),
>      [N || #xmlNode{node = N} <- NewContext#xmlContext.nodeset].
>
> @@ -236,7 +236,9 @@
>  							 acc = Acc}) ->
>      ?dbg("PredExpr = ~p~n", [PredExpr]),
>      NewContext = axis(Axis, NodeTest, C, Acc),
> -    pred_expr(PredExpr, S#state{context = NewContext}).
> +    pred_expr(PredExpr, S#state{context = NewContext});
> +path_expr('/', S) ->
> +    S.
>
>
>
> @@ -250,9 +252,16 @@
>  %% simple case: the predicate is a number, e.g. para[5].
>  %% No need to iterate over all nodes in the nodeset; we know what to do.
>  %%
> -eval_pred({number, N}, S = #state{context = C = #xmlContext{nodeset = NS}}) ->
> -    case length(NS)>=N of
> +eval_pred({number, N0}, S = #state{context = C = #xmlContext{nodeset
> = NS, axis_type = AxisType}}) ->
> +    Len = length(NS),
> +    case Len>=N0 of
>  	true ->
> +	    N = case AxisType of
> +		    forward ->
> +			N0;
> +		    reverse ->
> +			Len + 1 - N0
> +		end,
>  	    NewNodeSet = [lists:nth(N, NS)],
>  	    NewContext = C#xmlContext{nodeset = NewNodeSet},
>  	    S#state{context = NewContext};
> @@ -365,6 +374,8 @@
>
>  fwd_or_reverse(ancestor, Context) ->
>      reverse_axis(Context);
> +fwd_or_reverse(ancestor_or_self, Context) ->
> +    reverse_axis(Context);
>  fwd_or_reverse(preceding_sibling, Context) ->
>      reverse_axis(Context);
>  fwd_or_reverse(preceding, Context) ->
> @@ -378,11 +389,9 @@
>      Context#xmlContext{axis_type = forward}.
>
>
> -
>  match_self(Tok, N, Acc, Context) ->
>      case node_test(Tok, N, Context) of
>  	true ->
> -	    %io:format("node_test -> true.~n", []),
>  	    [N|Acc];
>  	false ->
>  	    Acc
> @@ -393,7 +402,6 @@
>      #xmlNode{parents = Ps, node = Node, type = Type} = N,
>      case Type of
>  	El when El == element; El == root_node ->
> -%	element ->
>  	    NewPs = [N|Ps],
>  	    match_desc(get_content(Node), NewPs, Tok, Acc, Context);
>  	_Other ->
> @@ -404,44 +412,29 @@
>  %    match_desc(Content, Parents, Tok, [], Context).
>
>  match_desc([E = #xmlElement{}|T], Parents, Tok, Acc, Context) ->
> +    Acc1 = match_desc(T, Parents, Tok, Acc, Context),
>      N = #xmlNode{type = node_type(E),
>  		 node = E,
>  		 parents = Parents},
>      NewParents = [N|Parents],
> -    Acc1 = case node_test(Tok, N, Context) of
> -	       true ->
> -		   [N|Acc];
> -	       false ->
> -		   Acc
> -	   end,
>      Acc2 = match_desc(get_content(E), NewParents, Tok, Acc1, Context),
> -    match_desc(T, Parents, Tok, Acc2, Context);
> +    match_self(Tok, N, Acc2, Context);
>  match_desc([E|T], Parents, Tok, Acc, Context) ->
> +    Acc1 = match_desc(T, Parents, Tok, Acc, Context),
>      N = #xmlNode{node = E,
>  		 type = node_type(E),
>  		 parents = Parents},
> -    Acc1 = case node_test(Tok, N, Context) of
> -	       true ->
> -		   [N|Acc];
> -	       false ->
> -		   Acc
> -	   end,
> -    match_desc(T, Parents, Tok, Acc1, Context);
> +    match_self(Tok, N, Acc1, Context);
>  match_desc([], _Parents, _Tok, Acc, _Context) ->
>      Acc.
> -			
> +
>
>
>  %% "The 'descendant-or-self' axis contains the context node and the
>  %% descendants of the context node."
>  match_descendant_or_self(Tok, N, Acc, Context) ->
> -    Acc1 = case node_test(Tok, N, Context) of
> -	       true ->
> -		   [N|Acc];
> -	       false ->
> -		   Acc
> -	   end,
> -    match_descendant(Tok, N, Acc1, Context).
> +    Acc1 = match_descendant(Tok, N, Acc, Context),
> +    match_self(Tok, N, Acc1, Context).
>
>
>  match_child(Tok, N, Acc, Context) ->
> @@ -455,12 +448,7 @@
>  		      ThisN = #xmlNode{type = node_type(E),
>  				       node = E,
>  				       parents = NewPs},
> -		      case node_test(Tok, ThisN, Context) of
> -			  true ->
> -			      [ThisN|AccX];
> -			  false ->
> -			      AccX
> -		      end
> +		      match_self(Tok, ThisN, AccX, Context)
>  	      end, Acc, get_content(Node));
>  	_Other ->
>  	    Acc
> @@ -474,12 +462,7 @@
>  	[] ->
>  	    Acc;
>  	[PN|_] ->
> -	    case node_test(Tok, PN, Context) of
> -		true ->
> -		    [PN|Acc];
> -		false ->
> -		    Acc
> -	    end
> +	    match_self(Tok, PN, Acc, Context)
>      end.
>
>
> @@ -489,14 +472,9 @@
>  %% always include the root node, unless the context node is the root node."
>  match_ancestor(Tok, N, Acc, Context) ->
>      Parents = N#xmlNode.parents,
> -    lists:foldr(
> +    lists:foldl(
>        fun(PN, AccX) ->
> -	      case node_test(Tok, PN, Context) of
> -		  true ->
> -		      [PN|AccX];
> -		  false ->
> -		      AccX
> -	      end
> +	      match_self(Tok, PN, AccX, Context)
>        end, Acc, Parents).
>
>
> @@ -506,12 +484,7 @@
>  %% of the context node; thus, the acestor axis will always include the
>  %% root node."
>  match_ancestor_or_self(Tok, N, Acc, Context) ->
> -    Acc1 = case node_test(Tok, N, Context) of
> -	       true ->
> -		   [N|Acc];
> -	       false ->
> -		   Acc
> -	   end,
> +    Acc1 = match_self(Tok, N, Acc, Context),
>      match_ancestor(Tok, N, Acc1, Context).
>
>
> @@ -525,19 +498,14 @@
>      case Ps of
>  	[#xmlNode{type = element,
>  		  node = #xmlElement{} = PNode}|_] ->
> -	    FollowingSiblings = lists:nthtail(Node#xmlElement.pos,
> +	    FollowingSiblings = lists:nthtail(get_position(Node),
>  					      get_content(PNode)),
>  	    lists:foldr(
>  	      fun(E, AccX) ->
>  		      ThisN = #xmlNode{type = node_type(E),
>  				       node = E,
>  				       parents = Ps},
> -		      case node_test(Tok, ThisN, Context) of
> -			  true ->
> -			      [ThisN|AccX];
> -			  false ->
> -			      AccX
> -		      end
> +		      match_self(Tok, ThisN, AccX, Context)
>  	      end, Acc, FollowingSiblings);
>  	_Other ->
>  	    Acc
> @@ -547,28 +515,21 @@
>  %% "The 'following' axis contains all nodes in the same document as the
>  %% context node that are after the context node in document order, excluding
>  %% any descendants and excluding attribute nodes and namespace nodes."
> -%% (UW: I interpret this as "following siblings and their descendants")
>  match_following(Tok, N, Acc, Context) ->
>      #xmlNode{parents = Ps, node = Node} = N,
>      case Ps of
>  	[#xmlNode{type = element,
> -		  node = #xmlElement{} = PNode}|_] ->
> -	    FollowingSiblings = lists:nthtail(Node#xmlElement.pos,
> +		  node = #xmlElement{} = PNode} = P|_] ->
> +	    Acc0 = match_following(Tok, P, Acc, Context),
> +	    FollowingSiblings = lists:nthtail(get_position(Node),
>  					      get_content(PNode)),
>  	    lists:foldr(
>  	      fun(E, AccX) ->
>  		      ThisN = #xmlNode{type = node_type(E),
>  				       node = E,
>  				       parents = Ps},
> -		      Acc1 =
> -			  case node_test(Tok, ThisN, Context) of
> -			      true ->
> -				  [ThisN|AccX];
> -			      false ->
> -				  AccX
> -			  end,
> -		      match_desc(get_content(E), Tok, Ps, Acc1, Context)
> -	      end, Acc, FollowingSiblings);
> +		      match_descendant_or_self(Tok, ThisN, AccX, Context)
> +	      end, Acc0, FollowingSiblings);
>  	_Other ->
>  	    Acc
>      end.
> @@ -588,51 +549,39 @@
>  	[#xmlNode{type = element,
>  		  node = #xmlElement{} = PNode}|_] ->
>  	    PrecedingSiblings = lists:sublist(get_content(PNode), 1,
> -					      Node#xmlElement.pos-1),
> +					      get_position(Node) - 1),
>  	    lists:foldr(
>  	      fun(E, AccX) ->
>  		      ThisN = #xmlNode{type = node_type(E),
>  				       node = E,
>  				       parents = Ps},
> -		      case node_test(Tok, ThisN, Context) of
> -			  true ->
> -			      [ThisN|AccX];
> -			  false ->
> -			      AccX
> -		      end
> +		      match_self(Tok, ThisN, AccX, Context)
>  	      end, Acc, PrecedingSiblings);
>  	_Other ->
> -	    []
> +	    Acc
>      end.
>
>
>  %% "The 'preceding' axis contains all nodes in the same document as the context
>  %% node that are before the context node in document order, exluding any
>  %% ancestors and excluding attribute nodes and namespace nodes."
> -%% (UW: I interpret this as "preceding siblings and their descendants".)
>  match_preceding(Tok, N, Acc, Context) ->
>      #xmlNode{parents = Ps, node = Node} = N,
>      case Ps of
>  	[#xmlNode{type = element,
> -		  node = #xmlElement{} = PNode}|_] ->
> +		  node = #xmlElement{} = PNode} = P|_] ->
>  	    PrecedingSiblings = lists:sublist(get_content(PNode), 1,
> -					      Node#xmlElement.pos-1),
> -	    lists:foldr(
> -	      fun(E, AccX) ->
> -		      ThisN = #xmlNode{type = node_type(E),
> -				       node = E,
> -				       parents = Ps},
> -		      Acc1 =
> -			  case node_test(Tok, ThisN, Context) of
> -			      true ->
> -				  [ThisN|AccX];
> -				  false ->
> -				      AccX
> -			  end,
> -		      match_desc(get_content(E), Tok, Ps, Acc1, Context)
> -	      end, Acc, PrecedingSiblings);
> +					      get_position(Node) - 1),
> +	    Acc0 = lists:foldr(
> +		     fun(E, AccX) ->
> +		     ThisN = #xmlNode{type = node_type(E),
> +				      node = E,
> +				      parents = Ps},
> +		     match_descendant_or_self(Tok, ThisN, AccX, Context)
> +		     end, Acc, PrecedingSiblings),
> +	    match_preceding(Tok, P, Acc0, Context);
>  	_Other ->
> -	    []
> +	    Acc
>      end.
>
>
> @@ -642,20 +591,15 @@
>      case N#xmlNode.type of
>  	element ->
>  	    #xmlNode{parents = Ps, node = E} = N,
> -	    lists:foldl(
> +	    lists:foldr(
>  	      fun(A, AccX) ->
>  		      ThisN = #xmlNode{type = attribute,
>  				       node = A,
>  				       parents = [N|Ps]},
> -		      case node_test(Tok, ThisN, Context) of
> -			  true ->
> -			      [ThisN|AccX];
> -			  false ->
> -			      AccX
> -		      end
> +		      match_self(Tok, ThisN, AccX, Context)
>  	      end, Acc, E#xmlElement.attributes);
>  	_Other ->
> -	    []
> +	    Acc
>      end.
>
>  node_type(#xmlAttribute{}) ->	attribute;
> @@ -672,27 +616,20 @@
>  %    erlang:fault(not_yet_implemented).
>
>
> -update_nodeset(Context = #xmlContext{axis_type = reverse}, NodeSet) ->
> -    Context#xmlContext{nodeset = reverse(NodeSet)};
> -update_nodeset(Context, NodeSet) ->
> -    Context#xmlContext{nodeset = forward(NodeSet)}.
> -
> -reverse(NodeSet) ->
> -    reverse(NodeSet, 1, []).
> -
> -reverse([H|T], Pos, Acc) ->
> -    reverse(T, Pos+1, [H#xmlNode{pos = Pos}|Acc]);
> -reverse([], _Pos, Acc) ->
> -    Acc.
> -
> -forward(NodeSet) ->
> -    forward(NodeSet, 1).
> -
> -forward([H|T], Pos) ->
> -    [H#xmlNode{pos = Pos}|forward(T, Pos+1)];
> -forward([], _Pos) ->
> -    [].
>
> +update_nodeset(Context = #xmlContext{axis_type = AxisType}, NodeSet) ->
> +    MapFold =
> +	case AxisType of
> +	    forward ->
> +		mapfoldl;
> +	    reverse ->
> +		mapfoldr
> +	end,
> +    {Result, _N} =
> +	lists:MapFold(fun(Node, N) ->
> +		      {Node#xmlNode{pos = N}, N + 1}
> +		      end, 1, NodeSet),
> +    Context#xmlContext{nodeset = Result}.
>
>
>  node_test(F, N, Context) when function(F) ->
> @@ -736,12 +673,12 @@
>      case expanded_name(Prefix, Local, Context) of
>  	[] ->
>  	    ?dbg("node_test(~p, ~p) -> ~p.~n",
> -		 [{Tag, Prefix, Local}, write_node(Name), false]),
> +		 [{_Tag, Prefix, Local}, write_node(Name), false]),
>  	    false;
>  	ExpName ->
>  	    Res = (ExpName == {NS#xmlNamespace.default,Name}),
>  	    ?dbg("node_test(~p, ~p) -> ~p.~n",
> -		 [{Tag, Prefix, Local}, write_node(Name), Res]),
> +		 [{_Tag, Prefix, Local}, write_node(Name), Res]),
>  	    Res
>      end;
>  node_test({name, {Tag,_Prefix,_Local}},
> @@ -812,3 +749,8 @@
>      C;
>  get_content(#xmlDocument{content = C}) ->
>      [C].
> +
> +get_position(#xmlElement{pos = N}) ->
> +    N;
> +get_position(#xmlText{pos = N}) ->
> +    N.
> _______________________________________________
> erlang-patches mailing list
> 
> http://www.erlang.org/mailman/listinfo/erlang-patches
>
>   




More information about the erlang-patches mailing list