[erlang-patches] Comprehensive xmerl_xpath patch
Matthew Dempsky
matthew@REDACTED
Sat Aug 16 06:22:40 CEST 2008
Below is a comprehensive patch including all of the changes I've made
based on pristine R12B-3 (the get_content() change in my first patch
isn't actually needed). Also included is a small test suite covering
a handful of basic cases.
Issues fixed:
- fixes matching against attributes when the context node set
includes non-element nodes (from original patch)
- fixes a compile error when using -Ddebug=true (from original
patch)
- fixes handling of 'following', 'following-sibling', 'preceding',
and 'preceding-sibling' axes when the context node set includes a
text node (from original patch)
- fixes parse error for expressions using the 'mod' operator
- fixes "/" to evaluate to the document node
- fixes "[N]" short-circuit logic to work correctly when using a
reverse axis
- fixes the "ancestor-or-self" axis to be recognized as a reverse
axis
- fixes axis handling to consistently return nodes in document order
The code is also refactored a bit to reuse match_self() and
match_descendant_or_self() where possible rather than reimplementing
them for other axes.
(Assuming I can refrain from digging into the code more, hopefully
this is my last email to erlang-patches@ about this. ;-)
test() ->
[begin
{Doc, _} = xmerl_scan:string(XML),
[test(XML, Doc, XPath, Exp) || {XPath, Exp} <- Tests]
end || {XML, Tests} <- tests()],
ok.
test(XML, Doc, XPath, Exp) ->
Result = xmerl_xpath:string(XPath, Doc),
Got = [Name || #xmlElement{name = Name} <- Result],
if
Exp =:= Got ->
ok;
true ->
io:format("XML = ~s.~nXPath = ~s.~nExp = ~p.~nGot =
~p.~n~n", [XML, XPath, Exp, Got])
end.
tests() ->
[{"<a><b/> <c/> <d/> <e/></a>",
[{"//b/following::*", [c, d, e]},
{"//b/following::*[1]", [c]},
{"//b/following::*[position()=1]", [c]},
{"//b/following::*[3]", [e]},
{"//b/following::*[position()=3]", [e]},
{"//e/preceding::*", [b, c, d]},
{"//e/preceding::*[1]", [d]},
{"//e/preceding::*[position()=1]", [d]},
{"//e/preceding::*[3]", [b]},
{"//e/preceding::*[position()=3]", [b]},
{"//b/following::*[position() mod 2=0]", [d]},
{"//b/self::*", [b]}
]},
{"<a><b/> <c><d/></c> <e/> <f><g/></f> <h/> <i><j/></i> <k/></a>",
[{"//g/preceding::*", [b, c, d, e]},
{"//g/following::*", [h, i, j, k]},
{"//g/ancestor::*", [a, f]},
{"//g/ancestor::*[1]", [f]},
{"//g/ancestor::*[2]", [a]},
{"//g/ancestor-or-self::*", [a, f, g]},
{"//g/ancestor-or-self::*[1]", [g]},
{"//g/ancestor-or-self::*[2]", [f]},
{"//g/ancestor-or-self::*[3]", [a]},
{"/descendant::*", [a, b, c, d, e, f, g, h, i, j, k]},
{"//f/preceding-sibling::*", [b, c, e]},
{"//f/following-sibling::*", [h, i, k]},
{"//f/self::*", [f]},
{"//f/ancestor::*", [a]},
{"//f/descendant::*", [g]},
{"//f/preceding::*", [b, c, d, e]},
{"//f/following::*", [h, i, j, k]},
{"//text()[1]/following-sibling::*", [c, e, f, h, i, k]}
]}
].
--- xmerl_xpath_parse.yrl~ 2006-05-04 10:38:38.000000000 -0500
+++ xmerl_xpath_parse.yrl 2008-08-15 14:05:40.000000000 -0500
@@ -259,10 +259,10 @@
'MultiplicativeExpr' -> 'UnaryExpr' : '$1' .
'MultiplicativeExpr' -> 'MultiplicativeExpr' 'MultiplyOperator' 'UnaryExpr'
: {arith, '$2', '$1', '$3'} .
-'MultiplicativeExpr' -> 'MultiplicativeExpr' 'div' 'UnaryExpr'
+'MultiplicativeExpr' -> 'MultiplicativeExpr' 'div' 'UnaryExpr'
: {arith, 'div', '$1', '$3'} .
-'MultiplicativeExpr' -> 'MultiplicativeExpr' 'mod' 'UnaryExpr' :
- {arith, 'mod', '$1', '$2'} .
+'MultiplicativeExpr' -> 'MultiplicativeExpr' 'mod' 'UnaryExpr'
+ : {arith, 'mod', '$1', '$3'} .
%% [27]
--- xmerl_xpath.erl~ 2008-08-15 22:36:56.000000000 -0500
+++ xmerl_xpath.erl 2008-08-15 22:59:37.000000000 -0500
@@ -148,7 +148,7 @@
Context=(new_context(Options))#xmlContext{context_node = ContextNode,
whole_document = WholeDoc},
%io:format("string Context=~p~n",[Context]),
- #state{context = NewContext} = match(Str, #state{context = Context}),
+ #state{context = NewContext} = match(Str, #state{context = Context}),
%io:format("string NewContext=~p~n",[NewContext]),
[N || #xmlNode{node = N} <- NewContext#xmlContext.nodeset].
@@ -236,7 +236,9 @@
acc = Acc}) ->
?dbg("PredExpr = ~p~n", [PredExpr]),
NewContext = axis(Axis, NodeTest, C, Acc),
- pred_expr(PredExpr, S#state{context = NewContext}).
+ pred_expr(PredExpr, S#state{context = NewContext});
+path_expr('/', S) ->
+ S.
@@ -250,9 +252,16 @@
%% simple case: the predicate is a number, e.g. para[5].
%% No need to iterate over all nodes in the nodeset; we know what to do.
%%
-eval_pred({number, N}, S = #state{context = C = #xmlContext{nodeset = NS}}) ->
- case length(NS)>=N of
+eval_pred({number, N0}, S = #state{context = C = #xmlContext{nodeset
= NS, axis_type = AxisType}}) ->
+ Len = length(NS),
+ case Len>=N0 of
true ->
+ N = case AxisType of
+ forward ->
+ N0;
+ reverse ->
+ Len + 1 - N0
+ end,
NewNodeSet = [lists:nth(N, NS)],
NewContext = C#xmlContext{nodeset = NewNodeSet},
S#state{context = NewContext};
@@ -365,6 +374,8 @@
fwd_or_reverse(ancestor, Context) ->
reverse_axis(Context);
+fwd_or_reverse(ancestor_or_self, Context) ->
+ reverse_axis(Context);
fwd_or_reverse(preceding_sibling, Context) ->
reverse_axis(Context);
fwd_or_reverse(preceding, Context) ->
@@ -378,11 +389,9 @@
Context#xmlContext{axis_type = forward}.
-
match_self(Tok, N, Acc, Context) ->
case node_test(Tok, N, Context) of
true ->
- %io:format("node_test -> true.~n", []),
[N|Acc];
false ->
Acc
@@ -393,7 +402,6 @@
#xmlNode{parents = Ps, node = Node, type = Type} = N,
case Type of
El when El == element; El == root_node ->
-% element ->
NewPs = [N|Ps],
match_desc(get_content(Node), NewPs, Tok, Acc, Context);
_Other ->
@@ -404,44 +412,29 @@
% match_desc(Content, Parents, Tok, [], Context).
match_desc([E = #xmlElement{}|T], Parents, Tok, Acc, Context) ->
+ Acc1 = match_desc(T, Parents, Tok, Acc, Context),
N = #xmlNode{type = node_type(E),
node = E,
parents = Parents},
NewParents = [N|Parents],
- Acc1 = case node_test(Tok, N, Context) of
- true ->
- [N|Acc];
- false ->
- Acc
- end,
Acc2 = match_desc(get_content(E), NewParents, Tok, Acc1, Context),
- match_desc(T, Parents, Tok, Acc2, Context);
+ match_self(Tok, N, Acc2, Context);
match_desc([E|T], Parents, Tok, Acc, Context) ->
+ Acc1 = match_desc(T, Parents, Tok, Acc, Context),
N = #xmlNode{node = E,
type = node_type(E),
parents = Parents},
- Acc1 = case node_test(Tok, N, Context) of
- true ->
- [N|Acc];
- false ->
- Acc
- end,
- match_desc(T, Parents, Tok, Acc1, Context);
+ match_self(Tok, N, Acc1, Context);
match_desc([], _Parents, _Tok, Acc, _Context) ->
Acc.
-
+
%% "The 'descendant-or-self' axis contains the context node and the
%% descendants of the context node."
match_descendant_or_self(Tok, N, Acc, Context) ->
- Acc1 = case node_test(Tok, N, Context) of
- true ->
- [N|Acc];
- false ->
- Acc
- end,
- match_descendant(Tok, N, Acc1, Context).
+ Acc1 = match_descendant(Tok, N, Acc, Context),
+ match_self(Tok, N, Acc1, Context).
match_child(Tok, N, Acc, Context) ->
@@ -455,12 +448,7 @@
ThisN = #xmlNode{type = node_type(E),
node = E,
parents = NewPs},
- case node_test(Tok, ThisN, Context) of
- true ->
- [ThisN|AccX];
- false ->
- AccX
- end
+ match_self(Tok, ThisN, AccX, Context)
end, Acc, get_content(Node));
_Other ->
Acc
@@ -474,12 +462,7 @@
[] ->
Acc;
[PN|_] ->
- case node_test(Tok, PN, Context) of
- true ->
- [PN|Acc];
- false ->
- Acc
- end
+ match_self(Tok, PN, Acc, Context)
end.
@@ -489,14 +472,9 @@
%% always include the root node, unless the context node is the root node."
match_ancestor(Tok, N, Acc, Context) ->
Parents = N#xmlNode.parents,
- lists:foldr(
+ lists:foldl(
fun(PN, AccX) ->
- case node_test(Tok, PN, Context) of
- true ->
- [PN|AccX];
- false ->
- AccX
- end
+ match_self(Tok, PN, AccX, Context)
end, Acc, Parents).
@@ -506,12 +484,7 @@
%% of the context node; thus, the acestor axis will always include the
%% root node."
match_ancestor_or_self(Tok, N, Acc, Context) ->
- Acc1 = case node_test(Tok, N, Context) of
- true ->
- [N|Acc];
- false ->
- Acc
- end,
+ Acc1 = match_self(Tok, N, Acc, Context),
match_ancestor(Tok, N, Acc1, Context).
@@ -525,19 +498,14 @@
case Ps of
[#xmlNode{type = element,
node = #xmlElement{} = PNode}|_] ->
- FollowingSiblings = lists:nthtail(Node#xmlElement.pos,
+ FollowingSiblings = lists:nthtail(get_position(Node),
get_content(PNode)),
lists:foldr(
fun(E, AccX) ->
ThisN = #xmlNode{type = node_type(E),
node = E,
parents = Ps},
- case node_test(Tok, ThisN, Context) of
- true ->
- [ThisN|AccX];
- false ->
- AccX
- end
+ match_self(Tok, ThisN, AccX, Context)
end, Acc, FollowingSiblings);
_Other ->
Acc
@@ -547,28 +515,21 @@
%% "The 'following' axis contains all nodes in the same document as the
%% context node that are after the context node in document order, excluding
%% any descendants and excluding attribute nodes and namespace nodes."
-%% (UW: I interpret this as "following siblings and their descendants")
match_following(Tok, N, Acc, Context) ->
#xmlNode{parents = Ps, node = Node} = N,
case Ps of
[#xmlNode{type = element,
- node = #xmlElement{} = PNode}|_] ->
- FollowingSiblings = lists:nthtail(Node#xmlElement.pos,
+ node = #xmlElement{} = PNode} = P|_] ->
+ Acc0 = match_following(Tok, P, Acc, Context),
+ FollowingSiblings = lists:nthtail(get_position(Node),
get_content(PNode)),
lists:foldr(
fun(E, AccX) ->
ThisN = #xmlNode{type = node_type(E),
node = E,
parents = Ps},
- Acc1 =
- case node_test(Tok, ThisN, Context) of
- true ->
- [ThisN|AccX];
- false ->
- AccX
- end,
- match_desc(get_content(E), Tok, Ps, Acc1, Context)
- end, Acc, FollowingSiblings);
+ match_descendant_or_self(Tok, ThisN, AccX, Context)
+ end, Acc0, FollowingSiblings);
_Other ->
Acc
end.
@@ -588,51 +549,39 @@
[#xmlNode{type = element,
node = #xmlElement{} = PNode}|_] ->
PrecedingSiblings = lists:sublist(get_content(PNode), 1,
- Node#xmlElement.pos-1),
+ get_position(Node) - 1),
lists:foldr(
fun(E, AccX) ->
ThisN = #xmlNode{type = node_type(E),
node = E,
parents = Ps},
- case node_test(Tok, ThisN, Context) of
- true ->
- [ThisN|AccX];
- false ->
- AccX
- end
+ match_self(Tok, ThisN, AccX, Context)
end, Acc, PrecedingSiblings);
_Other ->
- []
+ Acc
end.
%% "The 'preceding' axis contains all nodes in the same document as the context
%% node that are before the context node in document order, exluding any
%% ancestors and excluding attribute nodes and namespace nodes."
-%% (UW: I interpret this as "preceding siblings and their descendants".)
match_preceding(Tok, N, Acc, Context) ->
#xmlNode{parents = Ps, node = Node} = N,
case Ps of
[#xmlNode{type = element,
- node = #xmlElement{} = PNode}|_] ->
+ node = #xmlElement{} = PNode} = P|_] ->
PrecedingSiblings = lists:sublist(get_content(PNode), 1,
- Node#xmlElement.pos-1),
- lists:foldr(
- fun(E, AccX) ->
- ThisN = #xmlNode{type = node_type(E),
- node = E,
- parents = Ps},
- Acc1 =
- case node_test(Tok, ThisN, Context) of
- true ->
- [ThisN|AccX];
- false ->
- AccX
- end,
- match_desc(get_content(E), Tok, Ps, Acc1, Context)
- end, Acc, PrecedingSiblings);
+ get_position(Node) - 1),
+ Acc0 = lists:foldr(
+ fun(E, AccX) ->
+ ThisN = #xmlNode{type = node_type(E),
+ node = E,
+ parents = Ps},
+ match_descendant_or_self(Tok, ThisN, AccX, Context)
+ end, Acc, PrecedingSiblings),
+ match_preceding(Tok, P, Acc0, Context);
_Other ->
- []
+ Acc
end.
@@ -642,20 +591,15 @@
case N#xmlNode.type of
element ->
#xmlNode{parents = Ps, node = E} = N,
- lists:foldl(
+ lists:foldr(
fun(A, AccX) ->
ThisN = #xmlNode{type = attribute,
node = A,
parents = [N|Ps]},
- case node_test(Tok, ThisN, Context) of
- true ->
- [ThisN|AccX];
- false ->
- AccX
- end
+ match_self(Tok, ThisN, AccX, Context)
end, Acc, E#xmlElement.attributes);
_Other ->
- []
+ Acc
end.
node_type(#xmlAttribute{}) -> attribute;
@@ -672,27 +616,20 @@
% erlang:fault(not_yet_implemented).
-update_nodeset(Context = #xmlContext{axis_type = reverse}, NodeSet) ->
- Context#xmlContext{nodeset = reverse(NodeSet)};
-update_nodeset(Context, NodeSet) ->
- Context#xmlContext{nodeset = forward(NodeSet)}.
-
-reverse(NodeSet) ->
- reverse(NodeSet, 1, []).
-
-reverse([H|T], Pos, Acc) ->
- reverse(T, Pos+1, [H#xmlNode{pos = Pos}|Acc]);
-reverse([], _Pos, Acc) ->
- Acc.
-
-forward(NodeSet) ->
- forward(NodeSet, 1).
-
-forward([H|T], Pos) ->
- [H#xmlNode{pos = Pos}|forward(T, Pos+1)];
-forward([], _Pos) ->
- [].
+update_nodeset(Context = #xmlContext{axis_type = AxisType}, NodeSet) ->
+ MapFold =
+ case AxisType of
+ forward ->
+ mapfoldl;
+ reverse ->
+ mapfoldr
+ end,
+ {Result, _N} =
+ lists:MapFold(fun(Node, N) ->
+ {Node#xmlNode{pos = N}, N + 1}
+ end, 1, NodeSet),
+ Context#xmlContext{nodeset = Result}.
node_test(F, N, Context) when function(F) ->
@@ -736,12 +673,12 @@
case expanded_name(Prefix, Local, Context) of
[] ->
?dbg("node_test(~p, ~p) -> ~p.~n",
- [{Tag, Prefix, Local}, write_node(Name), false]),
+ [{_Tag, Prefix, Local}, write_node(Name), false]),
false;
ExpName ->
Res = (ExpName == {NS#xmlNamespace.default,Name}),
?dbg("node_test(~p, ~p) -> ~p.~n",
- [{Tag, Prefix, Local}, write_node(Name), Res]),
+ [{_Tag, Prefix, Local}, write_node(Name), Res]),
Res
end;
node_test({name, {Tag,_Prefix,_Local}},
@@ -812,3 +749,8 @@
C;
get_content(#xmlDocument{content = C}) ->
[C].
+
+get_position(#xmlElement{pos = N}) ->
+ N;
+get_position(#xmlText{pos = N}) ->
+ N.
More information about the erlang-patches
mailing list