Erlang Efficiency quesitons

Thu Mar 15 12:21:50 CET 2001

On Thu, Mar 15, 2001 at 10:36:21AM +0100, Mickael Remond wrote:
> Klacke (klacke@REDACTED) wrote:
> > 
> > This was one of the original goals of the bitsyntax as well, i.e. 
> > to be able to with ease use Binaries as an efficent replacement
> > for strings. I even once wrote a bstring.erl which was a module
> > with an equivalent interface as string.erl
> 
> Did you make some performance comparisons ?
> Would you mind releasing this module ?

I'll attach it here, It's written in our original
bit syntax and it woun't compile today.

> 
> > String handling has been (and still is) one of the really
> > weak points of Erlang. At least from a performance point
> > of view, strings as lists (as we have it today) are slow, but
> > very flexible and nice. Right !!. Probably we want both.
> 
> Yes. We want probably both.
> These days with the rise of XML, efficient string support will become
> much more critical.
> 

Yup,

/klacke

-- 
Claes Wikstrom                        -- Caps lock is nowhere and
Alteon WebSystems                     -- everything is under control          
http://www.bluetail.com/~klacke       --

-------------- next part --------------
%%%----------------------------------------------------------------------
%%% File    : bstring.erl
%%% Author  : Claes Wikstrom <klacke@REDACTED>
%%% Purpose : Manipulation of binary strings
%%% Created :  2 Oct 1998 by Claes Wikstrom <klacke@REDACTED>
%%%----------------------------------------------------------------------

-module(bstring).
-author('klacke@REDACTED').

-compile(export_all).

len(S) -> size(S).

equal(S, S) -> true;
equal(_, _) -> false.

concat(S1, S2) ->
    <S1/binary | S2>.

%% chr(String, Char)
%% rchr(String, Char)
%%  Return the first/last index of the character in a string.

chr(S, C) when binary(S) ->
    case S of
	<_:Size/binary, C:8/char |_> ->
	    Size + 1;
	_ ->
	    0
    end.

rchr(S, C) -> rchr(S, C, 1, 0).

rchr(<C/char|Cs>, C, I, L) ->			%Found one, now find next!
    rchr(Cs, C, I+1, I);
rchr(<_/char|Cs>, C, I, L) ->
    rchr(Cs, C, I+1, L);
rchr(<>, C, I, L) -> L.

%% Return
ix(B, Pos) ->
    <_:(Pos-1)/binary, Ch:8/char |_> = B,
    Ch.

%% str(String, SubString)
%% rstr(String, SubString)
%% index(String, SubString)
%%  Return the first/last index of the sub-string in a string.
%%  index/2 is kept for backwards compatibility.

str(S, Sub) ->
    case S of
	<_:Ix/binary, Sub/binary | _> ->
	    Ix + 1;
	_  ->
	    0
    end.

rstr(S, Sub) ->
    rstr(S, Sub, 0).

rstr(S, Sub, I) ->
    case S of
	<_/binary, Sub/binary |T> ->
	    rstr(T, Sub, I+1);
	_ ->
	    I
    end.

index(S, Sub) -> str(S, Sub).

bmember(C, Cs) ->
    case chr(Cs, C) of
	0 ->  false;
	_ ->  true
    end.

%% span(String, Chars) -> Length.
%% cspan(String, Chars) -> Length.

span(S, Cs) -> span(S, Cs, 0).

span(<C/char|S>, Cs, I) ->
    case bmember(C, Cs) of
	true -> span(S, Cs, I+1);
	false -> I
    end;
span(<>, Cs, I) -> I.

cspan(S, Cs) -> cspan(S, Cs, 0).

cspan(<C/char|S>, Cs, I) ->
    case bmember(C, Cs) of
	true -> I;
	false -> cspan(S, Cs, I+1)
    end;
cspan(<>, Cs, I) -> I.

%% substr(String, Start)
%% substr(String, Start, Length)
%%  Extract a sub-string from String.

substr(Str, Len) ->
    <_:(Len-1)/binary | Tail> = Str,
    Tail.

substr(Str, Start, Len) ->
    <_:(Start-1)/binary, B:Len/binary |_> = Str,
    B.

%% tokens(String, Seperators).
%%  Return a list of tokens seperated by characters in Seperators.

token(S, Seps) ->
    tokens1(S, Seps, []).

tokens1(S, [], Ack) ->
    Ack;
tokens1(S , [Sep|Seps], Ack) ->
    A2 = tokens2(S, Sep, Ack),
    tokens1(S, Seps, A2).

tokens2(S, Sep, Ack) ->
    case S of
	<B:Sz/binary, Sep/char | Tail> when Sz > 0 ->
	    tokens2(Tail, Sep, [B|Ack]);
	_ ->
	    Ack
    end.

chars(C, N) -> chars(C, N, <>).

chars(C, N, Tail) when N > 0 ->
    Btail = chars(C, N-1, Tail),
    <C/char| Btail>;
chars(C, 0, Tail) ->
    Tail.

%%% COPIES %%%

copies(_, 0) -> <>;
copies(S, Num) ->
    Btail = copies(S, Num-1),
    <S/binary | Btail>.

%%% WORDS %%%

words(String) -> words(String, $ ).

words(String, Char) ->
    case String of
	<B/binary, Char/char | Tail> ->
	    1 + words(Tail, Char);
	_ ->
	    0
    end.

%%% SUB_WORDS %%%

sub_word(String, Index) -> 
    sub_word(String, Index, $ ).

sub_word(String, Ix, Char) ->
    sub_word(String, Ix, Char, 0).

sub_word(String, Ix, Char, Sofar)  ->
    case String of
	<B/binary, Char/char |_> when Sofar == Ix ->
	    B;
	<B/binary, Char/char |Tail> ->
	    sub_word(Tail, Ix, Char, Sofar+1);
	_ ->
	    <>
    end.

%%% STRIP %%%

strip(String) -> strip(String, both).

strip(String, left) -> strip_left(String, $ );
strip(String, right) -> strip_right(String, $ );
strip(String, both) ->
    strip_right(strip_left(String, $ ), $ ).

strip(String, right, Char) -> strip_right(String, Char);
strip(String, left, Char) -> strip_left(String, Char);
strip(String, both, Char) ->
    strip_right(strip_left(String, Char), Char).

strip_left(<C/char|Tail>, C) ->
    strip_left(Tail, C);
strip_left(B, _) ->
    B.

strip_right(<B/binary, Char/char>, Char) ->
    strip_right(B, Char);
strip_right(B, Char) ->
    B.

%%% LEFT %%%

left(String, Len) -> left(String, Len, $ ).

left(String, Len, Char)  ->
    Slen = length(String),
    if
	Slen > Len -> substr(String, 1, Len);
	Slen < Len -> l_pad(String, Len-Slen, Char);
	Slen == Len -> String
    end.

l_pad(String, Num, Char) -> 
    Btail = chars(Char, Num),
    <String/binary | Btail>.

%%% RIGHT %%%

right(String, Len) -> right(String, Len, $ ).

right(String, Len, Char) ->
    Slen = size(String),
    if
	Slen > Len -> substr(String, Slen-Len+1);
	Slen < Len -> r_pad(String, Len-Slen, Char);
	Slen == Len -> String
    end.

r_pad(String, Num, Char) -> chars(Char, Num, String).

%%% CENTRE %%%

centre(String, Len) -> centre(String ,Len, $ ).

centre(String, 0, _) -> [];			%Strange cases to centre string
centre(String, Len, Char) ->
    Slen = size(String),
    if
	Slen > Len -> substr(String, (Slen-Len) div 2 + 1, Len);
	Slen < Len ->
	    N = (Len-Slen) div 2,
	    r_pad(l_pad(String, Len-(Slen+N), Char), N, Char);
	Slen == Len -> String
    end.

%%% SUB_STRING %%%

sub_string(String, Start) -> substr(String, Start).

sub_string(String, Start, Stop) -> substr(String, Start, Stop - Start + 1).

%% The Regular Expression Matching Functions.
%%
%%  These have been rewritten. As their interface has changed slightly
%%  (much to the better) I have moved them to a new module 'regexp' to
%%  avoid another "interface war" about something which doesn't
%%  serioulsy affect that many people. This interface is kept for
%%  backwards compatibility so I don't get shot for that as well.
%%
%%  /Robert Virding

re_sh_to_awk(ShellRegExp) -> 
    regexp:sh_to_awk(ShellRegExp).

re_parse(RegExp) ->
    case bregexp:parse(RegExp) of
	{ok,RE} -> {regexp,RE};
	{error,E} -> {error,E} 
    end.

re_match(String, RegExp) ->
    case bregexp:match(String, RegExp) of
	{match,Start,Len} -> {match,substr(String, Start, Len),Start};
	nomatch -> nomatch;
	{error,E} -> {error,E}
    end.

re_sub(String, RegExp, New) ->
    case bregexp:sub(String, RegExp, New) of
	{ok,Res,N} -> {ok,Res};
	{error,E} -> {error,E}
    end.

re_gsub(String, RegExp, New) ->
    case bregexp:gsub(String, RegExp, New) of
	{ok,Res,N} -> {ok,Res};
	{error,E} -> {error,E}
    end.

re_split(String, RegExp) -> bregexp:split(String, RegExp).