[erlang-patches] Comprehensive xmerl_xpath patch
Bertil Karlsson
bertil.karlsson@REDACTED
Fri Aug 22 12:49:35 CEST 2008
Thanks, I have added those corrections.
/Bertil
Matthew Dempsky wrote:
> Below is a comprehensive patch including all of the changes I've made
> based on pristine R12B-3 (the get_content() change in my first patch
> isn't actually needed). Also included is a small test suite covering
> a handful of basic cases.
>
> Issues fixed:
>
> - fixes matching against attributes when the context node set
> includes non-element nodes (from original patch)
>
> - fixes a compile error when using -Ddebug=true (from original
> patch)
>
> - fixes handling of 'following', 'following-sibling', 'preceding',
> and 'preceding-sibling' axes when the context node set includes a
> text node (from original patch)
>
> - fixes parse error for expressions using the 'mod' operator
>
> - fixes "/" to evaluate to the document node
>
> - fixes "[N]" short-circuit logic to work correctly when using a
> reverse axis
>
> - fixes the "ancestor-or-self" axis to be recognized as a reverse
> axis
>
> - fixes axis handling to consistently return nodes in document order
>
> The code is also refactored a bit to reuse match_self() and
> match_descendant_or_self() where possible rather than reimplementing
> them for other axes.
>
> (Assuming I can refrain from digging into the code more, hopefully
> this is my last email to erlang-patches@ about this. ;-)
>
>
> test() ->
> [begin
> {Doc, _} = xmerl_scan:string(XML),
> [test(XML, Doc, XPath, Exp) || {XPath, Exp} <- Tests]
> end || {XML, Tests} <- tests()],
> ok.
>
> test(XML, Doc, XPath, Exp) ->
> Result = xmerl_xpath:string(XPath, Doc),
> Got = [Name || #xmlElement{name = Name} <- Result],
> if
> Exp =:= Got ->
> ok;
> true ->
> io:format("XML = ~s.~nXPath = ~s.~nExp = ~p.~nGot =
> ~p.~n~n", [XML, XPath, Exp, Got])
> end.
>
> tests() ->
> [{"<a><b/> <c/> <d/> <e/></a>",
> [{"//b/following::*", [c, d, e]},
> {"//b/following::*[1]", [c]},
> {"//b/following::*[position()=1]", [c]},
> {"//b/following::*[3]", [e]},
> {"//b/following::*[position()=3]", [e]},
>
> {"//e/preceding::*", [b, c, d]},
> {"//e/preceding::*[1]", [d]},
> {"//e/preceding::*[position()=1]", [d]},
> {"//e/preceding::*[3]", [b]},
> {"//e/preceding::*[position()=3]", [b]},
>
> {"//b/following::*[position() mod 2=0]", [d]},
> {"//b/self::*", [b]}
> ]},
>
> {"<a><b/> <c><d/></c> <e/> <f><g/></f> <h/> <i><j/></i> <k/></a>",
> [{"//g/preceding::*", [b, c, d, e]},
> {"//g/following::*", [h, i, j, k]},
> {"//g/ancestor::*", [a, f]},
> {"//g/ancestor::*[1]", [f]},
> {"//g/ancestor::*[2]", [a]},
> {"//g/ancestor-or-self::*", [a, f, g]},
> {"//g/ancestor-or-self::*[1]", [g]},
> {"//g/ancestor-or-self::*[2]", [f]},
> {"//g/ancestor-or-self::*[3]", [a]},
> {"/descendant::*", [a, b, c, d, e, f, g, h, i, j, k]},
> {"//f/preceding-sibling::*", [b, c, e]},
> {"//f/following-sibling::*", [h, i, k]},
>
> {"//f/self::*", [f]},
> {"//f/ancestor::*", [a]},
> {"//f/descendant::*", [g]},
> {"//f/preceding::*", [b, c, d, e]},
> {"//f/following::*", [h, i, j, k]},
>
> {"//text()[1]/following-sibling::*", [c, e, f, h, i, k]}
> ]}
> ].
>
>
>
> --- xmerl_xpath_parse.yrl~ 2006-05-04 10:38:38.000000000 -0500
> +++ xmerl_xpath_parse.yrl 2008-08-15 14:05:40.000000000 -0500
> @@ -259,10 +259,10 @@
> 'MultiplicativeExpr' -> 'UnaryExpr' : '$1' .
> 'MultiplicativeExpr' -> 'MultiplicativeExpr' 'MultiplyOperator' 'UnaryExpr'
> : {arith, '$2', '$1', '$3'} .
> -'MultiplicativeExpr' -> 'MultiplicativeExpr' 'div' 'UnaryExpr'
> +'MultiplicativeExpr' -> 'MultiplicativeExpr' 'div' 'UnaryExpr'
> : {arith, 'div', '$1', '$3'} .
> -'MultiplicativeExpr' -> 'MultiplicativeExpr' 'mod' 'UnaryExpr' :
> - {arith, 'mod', '$1', '$2'} .
> +'MultiplicativeExpr' -> 'MultiplicativeExpr' 'mod' 'UnaryExpr'
> + : {arith, 'mod', '$1', '$3'} .
>
>
> %% [27]
> --- xmerl_xpath.erl~ 2008-08-15 22:36:56.000000000 -0500
> +++ xmerl_xpath.erl 2008-08-15 22:59:37.000000000 -0500
> @@ -148,7 +148,7 @@
> Context=(new_context(Options))#xmlContext{context_node = ContextNode,
> whole_document = WholeDoc},
> %io:format("string Context=~p~n",[Context]),
> - #state{context = NewContext} = match(Str, #state{context = Context}),
> + #state{context = NewContext} = match(Str, #state{context = Context}),
> %io:format("string NewContext=~p~n",[NewContext]),
> [N || #xmlNode{node = N} <- NewContext#xmlContext.nodeset].
>
> @@ -236,7 +236,9 @@
> acc = Acc}) ->
> ?dbg("PredExpr = ~p~n", [PredExpr]),
> NewContext = axis(Axis, NodeTest, C, Acc),
> - pred_expr(PredExpr, S#state{context = NewContext}).
> + pred_expr(PredExpr, S#state{context = NewContext});
> +path_expr('/', S) ->
> + S.
>
>
>
> @@ -250,9 +252,16 @@
> %% simple case: the predicate is a number, e.g. para[5].
> %% No need to iterate over all nodes in the nodeset; we know what to do.
> %%
> -eval_pred({number, N}, S = #state{context = C = #xmlContext{nodeset = NS}}) ->
> - case length(NS)>=N of
> +eval_pred({number, N0}, S = #state{context = C = #xmlContext{nodeset
> = NS, axis_type = AxisType}}) ->
> + Len = length(NS),
> + case Len>=N0 of
> true ->
> + N = case AxisType of
> + forward ->
> + N0;
> + reverse ->
> + Len + 1 - N0
> + end,
> NewNodeSet = [lists:nth(N, NS)],
> NewContext = C#xmlContext{nodeset = NewNodeSet},
> S#state{context = NewContext};
> @@ -365,6 +374,8 @@
>
> fwd_or_reverse(ancestor, Context) ->
> reverse_axis(Context);
> +fwd_or_reverse(ancestor_or_self, Context) ->
> + reverse_axis(Context);
> fwd_or_reverse(preceding_sibling, Context) ->
> reverse_axis(Context);
> fwd_or_reverse(preceding, Context) ->
> @@ -378,11 +389,9 @@
> Context#xmlContext{axis_type = forward}.
>
>
> -
> match_self(Tok, N, Acc, Context) ->
> case node_test(Tok, N, Context) of
> true ->
> - %io:format("node_test -> true.~n", []),
> [N|Acc];
> false ->
> Acc
> @@ -393,7 +402,6 @@
> #xmlNode{parents = Ps, node = Node, type = Type} = N,
> case Type of
> El when El == element; El == root_node ->
> -% element ->
> NewPs = [N|Ps],
> match_desc(get_content(Node), NewPs, Tok, Acc, Context);
> _Other ->
> @@ -404,44 +412,29 @@
> % match_desc(Content, Parents, Tok, [], Context).
>
> match_desc([E = #xmlElement{}|T], Parents, Tok, Acc, Context) ->
> + Acc1 = match_desc(T, Parents, Tok, Acc, Context),
> N = #xmlNode{type = node_type(E),
> node = E,
> parents = Parents},
> NewParents = [N|Parents],
> - Acc1 = case node_test(Tok, N, Context) of
> - true ->
> - [N|Acc];
> - false ->
> - Acc
> - end,
> Acc2 = match_desc(get_content(E), NewParents, Tok, Acc1, Context),
> - match_desc(T, Parents, Tok, Acc2, Context);
> + match_self(Tok, N, Acc2, Context);
> match_desc([E|T], Parents, Tok, Acc, Context) ->
> + Acc1 = match_desc(T, Parents, Tok, Acc, Context),
> N = #xmlNode{node = E,
> type = node_type(E),
> parents = Parents},
> - Acc1 = case node_test(Tok, N, Context) of
> - true ->
> - [N|Acc];
> - false ->
> - Acc
> - end,
> - match_desc(T, Parents, Tok, Acc1, Context);
> + match_self(Tok, N, Acc1, Context);
> match_desc([], _Parents, _Tok, Acc, _Context) ->
> Acc.
> -
> +
>
>
> %% "The 'descendant-or-self' axis contains the context node and the
> %% descendants of the context node."
> match_descendant_or_self(Tok, N, Acc, Context) ->
> - Acc1 = case node_test(Tok, N, Context) of
> - true ->
> - [N|Acc];
> - false ->
> - Acc
> - end,
> - match_descendant(Tok, N, Acc1, Context).
> + Acc1 = match_descendant(Tok, N, Acc, Context),
> + match_self(Tok, N, Acc1, Context).
>
>
> match_child(Tok, N, Acc, Context) ->
> @@ -455,12 +448,7 @@
> ThisN = #xmlNode{type = node_type(E),
> node = E,
> parents = NewPs},
> - case node_test(Tok, ThisN, Context) of
> - true ->
> - [ThisN|AccX];
> - false ->
> - AccX
> - end
> + match_self(Tok, ThisN, AccX, Context)
> end, Acc, get_content(Node));
> _Other ->
> Acc
> @@ -474,12 +462,7 @@
> [] ->
> Acc;
> [PN|_] ->
> - case node_test(Tok, PN, Context) of
> - true ->
> - [PN|Acc];
> - false ->
> - Acc
> - end
> + match_self(Tok, PN, Acc, Context)
> end.
>
>
> @@ -489,14 +472,9 @@
> %% always include the root node, unless the context node is the root node."
> match_ancestor(Tok, N, Acc, Context) ->
> Parents = N#xmlNode.parents,
> - lists:foldr(
> + lists:foldl(
> fun(PN, AccX) ->
> - case node_test(Tok, PN, Context) of
> - true ->
> - [PN|AccX];
> - false ->
> - AccX
> - end
> + match_self(Tok, PN, AccX, Context)
> end, Acc, Parents).
>
>
> @@ -506,12 +484,7 @@
> %% of the context node; thus, the acestor axis will always include the
> %% root node."
> match_ancestor_or_self(Tok, N, Acc, Context) ->
> - Acc1 = case node_test(Tok, N, Context) of
> - true ->
> - [N|Acc];
> - false ->
> - Acc
> - end,
> + Acc1 = match_self(Tok, N, Acc, Context),
> match_ancestor(Tok, N, Acc1, Context).
>
>
> @@ -525,19 +498,14 @@
> case Ps of
> [#xmlNode{type = element,
> node = #xmlElement{} = PNode}|_] ->
> - FollowingSiblings = lists:nthtail(Node#xmlElement.pos,
> + FollowingSiblings = lists:nthtail(get_position(Node),
> get_content(PNode)),
> lists:foldr(
> fun(E, AccX) ->
> ThisN = #xmlNode{type = node_type(E),
> node = E,
> parents = Ps},
> - case node_test(Tok, ThisN, Context) of
> - true ->
> - [ThisN|AccX];
> - false ->
> - AccX
> - end
> + match_self(Tok, ThisN, AccX, Context)
> end, Acc, FollowingSiblings);
> _Other ->
> Acc
> @@ -547,28 +515,21 @@
> %% "The 'following' axis contains all nodes in the same document as the
> %% context node that are after the context node in document order, excluding
> %% any descendants and excluding attribute nodes and namespace nodes."
> -%% (UW: I interpret this as "following siblings and their descendants")
> match_following(Tok, N, Acc, Context) ->
> #xmlNode{parents = Ps, node = Node} = N,
> case Ps of
> [#xmlNode{type = element,
> - node = #xmlElement{} = PNode}|_] ->
> - FollowingSiblings = lists:nthtail(Node#xmlElement.pos,
> + node = #xmlElement{} = PNode} = P|_] ->
> + Acc0 = match_following(Tok, P, Acc, Context),
> + FollowingSiblings = lists:nthtail(get_position(Node),
> get_content(PNode)),
> lists:foldr(
> fun(E, AccX) ->
> ThisN = #xmlNode{type = node_type(E),
> node = E,
> parents = Ps},
> - Acc1 =
> - case node_test(Tok, ThisN, Context) of
> - true ->
> - [ThisN|AccX];
> - false ->
> - AccX
> - end,
> - match_desc(get_content(E), Tok, Ps, Acc1, Context)
> - end, Acc, FollowingSiblings);
> + match_descendant_or_self(Tok, ThisN, AccX, Context)
> + end, Acc0, FollowingSiblings);
> _Other ->
> Acc
> end.
> @@ -588,51 +549,39 @@
> [#xmlNode{type = element,
> node = #xmlElement{} = PNode}|_] ->
> PrecedingSiblings = lists:sublist(get_content(PNode), 1,
> - Node#xmlElement.pos-1),
> + get_position(Node) - 1),
> lists:foldr(
> fun(E, AccX) ->
> ThisN = #xmlNode{type = node_type(E),
> node = E,
> parents = Ps},
> - case node_test(Tok, ThisN, Context) of
> - true ->
> - [ThisN|AccX];
> - false ->
> - AccX
> - end
> + match_self(Tok, ThisN, AccX, Context)
> end, Acc, PrecedingSiblings);
> _Other ->
> - []
> + Acc
> end.
>
>
> %% "The 'preceding' axis contains all nodes in the same document as the context
> %% node that are before the context node in document order, exluding any
> %% ancestors and excluding attribute nodes and namespace nodes."
> -%% (UW: I interpret this as "preceding siblings and their descendants".)
> match_preceding(Tok, N, Acc, Context) ->
> #xmlNode{parents = Ps, node = Node} = N,
> case Ps of
> [#xmlNode{type = element,
> - node = #xmlElement{} = PNode}|_] ->
> + node = #xmlElement{} = PNode} = P|_] ->
> PrecedingSiblings = lists:sublist(get_content(PNode), 1,
> - Node#xmlElement.pos-1),
> - lists:foldr(
> - fun(E, AccX) ->
> - ThisN = #xmlNode{type = node_type(E),
> - node = E,
> - parents = Ps},
> - Acc1 =
> - case node_test(Tok, ThisN, Context) of
> - true ->
> - [ThisN|AccX];
> - false ->
> - AccX
> - end,
> - match_desc(get_content(E), Tok, Ps, Acc1, Context)
> - end, Acc, PrecedingSiblings);
> + get_position(Node) - 1),
> + Acc0 = lists:foldr(
> + fun(E, AccX) ->
> + ThisN = #xmlNode{type = node_type(E),
> + node = E,
> + parents = Ps},
> + match_descendant_or_self(Tok, ThisN, AccX, Context)
> + end, Acc, PrecedingSiblings),
> + match_preceding(Tok, P, Acc0, Context);
> _Other ->
> - []
> + Acc
> end.
>
>
> @@ -642,20 +591,15 @@
> case N#xmlNode.type of
> element ->
> #xmlNode{parents = Ps, node = E} = N,
> - lists:foldl(
> + lists:foldr(
> fun(A, AccX) ->
> ThisN = #xmlNode{type = attribute,
> node = A,
> parents = [N|Ps]},
> - case node_test(Tok, ThisN, Context) of
> - true ->
> - [ThisN|AccX];
> - false ->
> - AccX
> - end
> + match_self(Tok, ThisN, AccX, Context)
> end, Acc, E#xmlElement.attributes);
> _Other ->
> - []
> + Acc
> end.
>
> node_type(#xmlAttribute{}) -> attribute;
> @@ -672,27 +616,20 @@
> % erlang:fault(not_yet_implemented).
>
>
> -update_nodeset(Context = #xmlContext{axis_type = reverse}, NodeSet) ->
> - Context#xmlContext{nodeset = reverse(NodeSet)};
> -update_nodeset(Context, NodeSet) ->
> - Context#xmlContext{nodeset = forward(NodeSet)}.
> -
> -reverse(NodeSet) ->
> - reverse(NodeSet, 1, []).
> -
> -reverse([H|T], Pos, Acc) ->
> - reverse(T, Pos+1, [H#xmlNode{pos = Pos}|Acc]);
> -reverse([], _Pos, Acc) ->
> - Acc.
> -
> -forward(NodeSet) ->
> - forward(NodeSet, 1).
> -
> -forward([H|T], Pos) ->
> - [H#xmlNode{pos = Pos}|forward(T, Pos+1)];
> -forward([], _Pos) ->
> - [].
>
> +update_nodeset(Context = #xmlContext{axis_type = AxisType}, NodeSet) ->
> + MapFold =
> + case AxisType of
> + forward ->
> + mapfoldl;
> + reverse ->
> + mapfoldr
> + end,
> + {Result, _N} =
> + lists:MapFold(fun(Node, N) ->
> + {Node#xmlNode{pos = N}, N + 1}
> + end, 1, NodeSet),
> + Context#xmlContext{nodeset = Result}.
>
>
> node_test(F, N, Context) when function(F) ->
> @@ -736,12 +673,12 @@
> case expanded_name(Prefix, Local, Context) of
> [] ->
> ?dbg("node_test(~p, ~p) -> ~p.~n",
> - [{Tag, Prefix, Local}, write_node(Name), false]),
> + [{_Tag, Prefix, Local}, write_node(Name), false]),
> false;
> ExpName ->
> Res = (ExpName == {NS#xmlNamespace.default,Name}),
> ?dbg("node_test(~p, ~p) -> ~p.~n",
> - [{Tag, Prefix, Local}, write_node(Name), Res]),
> + [{_Tag, Prefix, Local}, write_node(Name), Res]),
> Res
> end;
> node_test({name, {Tag,_Prefix,_Local}},
> @@ -812,3 +749,8 @@
> C;
> get_content(#xmlDocument{content = C}) ->
> [C].
> +
> +get_position(#xmlElement{pos = N}) ->
> + N;
> +get_position(#xmlText{pos = N}) ->
> + N.
> _______________________________________________
> erlang-patches mailing list
> erlang-patches@REDACTED
> http://www.erlang.org/mailman/listinfo/erlang-patches
>
>
More information about the erlang-patches
mailing list