Xmerl performance

Sean Hinde sean.hinde@REDACTED
Fri Jan 14 00:09:55 CET 2005


Hi,

We used the expat binding which was written as part of the ejabberd  
project. Thanks to Alexey Shchepin for writing it and making it  
available (under the GPL).

We adapted the SAX type model of the ejabberd expat driver to create a  
DOM with something like the following code. I can't quite recall which  
bits of this were used but it might provide a guide.

Sean

%%%--------------------------------------------------------------------- 
-
%%% File    : xml_stream.erl
%%% Author  : Alexey Shchepin <alexey@REDACTED>
%%% Purpose :
%%% Created : 17 Nov 2002 by Alexey Shchepin <alexey@REDACTED>
%%% Id      : $Id: xml_stream.erl,v 1.4 2003/03/15 21:11:19 alexey Exp $
%%%--------------------------------------------------------------------- 
-

%% [{xmlelement, Name, Attrs, [{xmlelement, Name, Attrs, []}]}].

-module(xml_stream_sh1).
-author('alexey@REDACTED').
-vsn('$Revision: 1.4 $ ').

-export([start/1, init/1, send_text/2, test/0]).

test() ->
     Data = [{xmlstart,{"People",[]}},
	    {xmlcdata,"1  "},
	    {xmlstart,{"comment",[]}},
	    {xmlcdata,"This is a comment  "},
	    {xmlend,"comment"},
	    {xmlcdata,"2  "},
	    {xmlstart,{"Person",[{"Type","Personal"}]}},
	    {xmlcdata,"3  "},
	    {xmlend,"Person"},
	    {xmlend,"People"}],
     create(Data, []).


%% Data between 2 start tags and after an end tag is just
%% discarded. I'm sure we should insist that we only discard
%% whitespace but there you go!

create([{xmlstart, Start1},{xmlcdata,_},{xmlstart, Start2}|T], Stack) ->
     create([{xmlstart, Start1},{xmlstart, Start2}|T], Stack);
create([{xmlstart, {Name, Attrs}}|T], Stack) ->
     create(T, [{Name, Attrs, []}|Stack]);
create([{xmlend, Name},{xmlcdata, _}|T], Stack) ->
     create([{xmlend, Name}|T], Stack);
create([{xmlend, Name}|T], Stack) ->
     io:format("Stack:~p~n",[Stack]),
     case Stack of
	[{Name1, Attrs, Els}] ->
	    [{Name1, Attrs, Els}];
	[] ->
	    {error, end_with_no_start};
	[El, {Name1, Attr, Els}|St] ->
	    create(T, [{Name1, Attr, lists:reverse([El|Els])}|St])
     end;
create([{xmlcdata, Data}|T], [{Name, Attrs, Els}|Stack]) ->
     create(T, [{Name, Attrs, [{cdata, Data}|Els]}|Stack]);
create([], _) ->
     {error, premature_end};
create({xmlerror, Reason}, _) ->
     {error, Reason}.

% Section dedicated to removing random data between 2 start tags and
% after end tags. We keep the last two tags processed so we know
% whether either of these conditions have been met
build({xmlcdata,Cdata}, Stack, [{xmlend, Name}|T]) ->
     {cont, Stack, [{xmlend, Name}|T]};
build({xmlstart, {Name, Attrs}}, Stack, [{xmlcdata, _}, {xmlstart,  
Val}]) ->
     {cont, [{Name, Attrs, []}|Stack], [{xmlstart, {Name,  
Attrs}},{xmlstart, Val}]};
build({xmlcdata, Data}, Stack, [{xmlstart, Val}|T]) ->
     {cont, Stack, [{xmlcdata, Data},{xmlstart, Val}]};
build(Tag, [{Name, Attrs, Els}|Stack], [{xmlcdata, Data},{xmlstart,  
Val}]) ->
     build(Tag, [{Name, Attrs, [{cdata, Data}|Els]}|Stack],  
[Tag,{xmlcdata, Data}]);



build({xmlstart, {Name, Attrs}}, Stack, Last2) ->
     {cont, [{Name, Attrs,[]}|Stack], last2({xmlstart, {Name, Attrs}},  
Last2)};
build({xmlend, Name}, Stack, Last2) ->
%    io:format("Stack:~p~n",[Stack]),
     case Stack of
	[{Name1, Attrs, Els}] ->
	    {done, [{Name1, Attrs, lists:reverse(Els)}]};
	[] ->
	    {error, end_with_no_start};
	[{Name0, Attr0, Els0}, {Name1, Attr, Els}|St] ->
	    {cont, [{Name1, Attr, [{Name0, Attr0,  
lists:reverse(Els0)}|Els]}|St], last2({xmlend, Name}, Last2)}
     end;
build({xmlcdata, Data}, [{Name, Attrs, Els}|Stack], Last2) ->
     {cont, [{Name, Attrs, [{cdata, Data}|Els]}|Stack], last2({xmlcdata,  
Data}, Last2)};
build(_, {xmlerror, Reason}, _) ->
     {error, Reason}.

last2(New, [H,T]) ->
     [New,H];
last2(New, [H]) ->
     [New,H];
last2(New, []) ->
     [New].

start(CallbackPid) ->
     spawn(?MODULE, init, [CallbackPid]).



init(String) ->
     Port = open_port({spawn, expat_erl}, [binary]),
     Port ! {self(), {command, String}},
     loop(String, Port, [], []).

loop(String, Port, Stack, Last2) ->
     receive
	{Port, {data, Bin}} ->
	    Data = binary_to_term(Bin),
	    case build(Data, Stack, Last2) of
		{done, Res} ->
		    {ok, Res};
		{cont, Stack2, Last22} ->
		    loop(String, Port, Stack2, Last22);
		{error, Reason} ->
		    {error, Reason}		
	    end;
	{From, {send, Str}} ->
	    Port ! {self(), {command, Str}},
	    loop(String, Port, Stack, Last2)
     end.

process_data(CallbackPid, Stack, Data) ->
     io:format("Data:~p~n",[Data]),
     case Data of
	{xmlstart, {Name, Attrs}} ->
	    if Stack == [] ->
		    gen_fsm:send_event(CallbackPid,
				       {xmlstreamstart, Name, Attrs});
	       true -> true
	    end,
	    [{xmlelement, Name, Attrs, []} | Stack];
	{xmlend, EndName} ->
	    case Stack of
		[{xmlelement, Name, Attrs, Els} | Tail] ->
		    NewEl = {xmlelement, Name, Attrs, lists:reverse(Els)},
		    Len = length(Tail),
		    if
			Len >  1 -> add_subelement(NewEl, Tail);
			Len == 1 ->
			    gen_fsm:send_event(CallbackPid,
					       {xmlstreamelement, NewEl}),
			    Tail;
			Len == 0 ->
			    gen_fsm:send_event(CallbackPid,
					       {xmlstreamelement, NewEl}),
			    gen_fsm:send_event(CallbackPid,
					       {xmlstreamend, EndName}),
			    Tail
		    end
	    end;
	{xmlcdata, CData} ->
	    io:format("CDATA:~p~n",[{CData, Stack}]),
	    add_subelement({xmlcdata, CData}, Stack);
	{xmlerror, Err} -> gen_fsm:send_event(CallbackPid,
					      {xmlstreamerror, Err})
     end.


add_subelement(El, Stack) ->
     case Stack of
	[{xmlelement, Name, Attrs, Els} | Tail] ->
	    [{xmlelement, Name, Attrs, [El | Els]} | Tail];
	[] -> []
     end.


send_text(Pid, Text) ->
     Pid ! {self(), {send, Text}}.


On 13 Jan 2005, at 16:30, Erlang Questions wrote:

> Our first attempt is trying to do the parsing from Erlang. If this  
> doesn't
> work fine (from the performance point of view), we'll evaluate using an
> Erlang port or some other mechanism to obtain the parsing results from
> Erlang and do the parsing in C or other languague.
>
> Thanks,
>     Sebastian-
>
> ----- Original Message -----
> From: "todd" <todd@REDACTED>
> To: "Chandrashekhar Mullaparthi"  
> <chandrashekhar.mullaparthi@REDACTED>
> Cc: "Erlang Questions" <erlang@REDACTED>; "Erlang Questions"
> <erlang-questions@REDACTED>
> Sent: Thursday, January 13, 2005 2:06 PM
> Subject: Re: Xmerl performance
>
>
>> I've used expat embedded in a class 5 sonet switch and found that
>> it reliable and performed well with fixed memory usage.  I never
>> used it with erlang though. It's C based and uses a fairly simple  
>> callback
>> model. At the time it was non-validating, I don't know if that has
>> changed.
>>
>> Chandrashekhar Mullaparthi wrote:
>>
>>> http://expat.sourceforge.net/
>>>
>>> cheers
>>> Chandru
>>>
>>> On 12 Jan 2005, at 18:56, Erlang Questions wrote:
>>>
>>>> Thanks Chandru. Is expat an Erlang module? Where can I find it?
>>>>     Sebastian-
>>>>
>>>>
>>>
>>>
>>
>
>




More information about the erlang-questions mailing list