Erlang Efficiency quesitons
Klacke
klacke@REDACTED
Thu Mar 15 12:21:50 CET 2001
On Thu, Mar 15, 2001 at 10:36:21AM +0100, Mickael Remond wrote:
> Klacke (klacke@REDACTED) wrote:
> >
> > This was one of the original goals of the bitsyntax as well, i.e.
> > to be able to with ease use Binaries as an efficent replacement
> > for strings. I even once wrote a bstring.erl which was a module
> > with an equivalent interface as string.erl
>
> Did you make some performance comparisons ?
> Would you mind releasing this module ?
I'll attach it here, It's written in our original
bit syntax and it woun't compile today.
>
> > String handling has been (and still is) one of the really
> > weak points of Erlang. At least from a performance point
> > of view, strings as lists (as we have it today) are slow, but
> > very flexible and nice. Right !!. Probably we want both.
>
> Yes. We want probably both.
> These days with the rise of XML, efficient string support will become
> much more critical.
>
Yup,
/klacke
--
Claes Wikstrom -- Caps lock is nowhere and
Alteon WebSystems -- everything is under control
http://www.bluetail.com/~klacke --
-------------- next part --------------
%%%----------------------------------------------------------------------
%%% File : bstring.erl
%%% Author : Claes Wikstrom <klacke@REDACTED>
%%% Purpose : Manipulation of binary strings
%%% Created : 2 Oct 1998 by Claes Wikstrom <klacke@REDACTED>
%%%----------------------------------------------------------------------
-module(bstring).
-author('klacke@REDACTED').
-compile(export_all).
len(S) -> size(S).
equal(S, S) -> true;
equal(_, _) -> false.
concat(S1, S2) ->
<S1/binary | S2>.
%% chr(String, Char)
%% rchr(String, Char)
%% Return the first/last index of the character in a string.
chr(S, C) when binary(S) ->
case S of
<_:Size/binary, C:8/char |_> ->
Size + 1;
_ ->
0
end.
rchr(S, C) -> rchr(S, C, 1, 0).
rchr(<C/char|Cs>, C, I, L) -> %Found one, now find next!
rchr(Cs, C, I+1, I);
rchr(<_/char|Cs>, C, I, L) ->
rchr(Cs, C, I+1, L);
rchr(<>, C, I, L) -> L.
%% Return
ix(B, Pos) ->
<_:(Pos-1)/binary, Ch:8/char |_> = B,
Ch.
%% str(String, SubString)
%% rstr(String, SubString)
%% index(String, SubString)
%% Return the first/last index of the sub-string in a string.
%% index/2 is kept for backwards compatibility.
str(S, Sub) ->
case S of
<_:Ix/binary, Sub/binary | _> ->
Ix + 1;
_ ->
0
end.
rstr(S, Sub) ->
rstr(S, Sub, 0).
rstr(S, Sub, I) ->
case S of
<_/binary, Sub/binary |T> ->
rstr(T, Sub, I+1);
_ ->
I
end.
index(S, Sub) -> str(S, Sub).
bmember(C, Cs) ->
case chr(Cs, C) of
0 -> false;
_ -> true
end.
%% span(String, Chars) -> Length.
%% cspan(String, Chars) -> Length.
span(S, Cs) -> span(S, Cs, 0).
span(<C/char|S>, Cs, I) ->
case bmember(C, Cs) of
true -> span(S, Cs, I+1);
false -> I
end;
span(<>, Cs, I) -> I.
cspan(S, Cs) -> cspan(S, Cs, 0).
cspan(<C/char|S>, Cs, I) ->
case bmember(C, Cs) of
true -> I;
false -> cspan(S, Cs, I+1)
end;
cspan(<>, Cs, I) -> I.
%% substr(String, Start)
%% substr(String, Start, Length)
%% Extract a sub-string from String.
substr(Str, Len) ->
<_:(Len-1)/binary | Tail> = Str,
Tail.
substr(Str, Start, Len) ->
<_:(Start-1)/binary, B:Len/binary |_> = Str,
B.
%% tokens(String, Seperators).
%% Return a list of tokens seperated by characters in Seperators.
token(S, Seps) ->
tokens1(S, Seps, []).
tokens1(S, [], Ack) ->
Ack;
tokens1(S , [Sep|Seps], Ack) ->
A2 = tokens2(S, Sep, Ack),
tokens1(S, Seps, A2).
tokens2(S, Sep, Ack) ->
case S of
<B:Sz/binary, Sep/char | Tail> when Sz > 0 ->
tokens2(Tail, Sep, [B|Ack]);
_ ->
Ack
end.
chars(C, N) -> chars(C, N, <>).
chars(C, N, Tail) when N > 0 ->
Btail = chars(C, N-1, Tail),
<C/char| Btail>;
chars(C, 0, Tail) ->
Tail.
%%% COPIES %%%
copies(_, 0) -> <>;
copies(S, Num) ->
Btail = copies(S, Num-1),
<S/binary | Btail>.
%%% WORDS %%%
words(String) -> words(String, $ ).
words(String, Char) ->
case String of
<B/binary, Char/char | Tail> ->
1 + words(Tail, Char);
_ ->
0
end.
%%% SUB_WORDS %%%
sub_word(String, Index) ->
sub_word(String, Index, $ ).
sub_word(String, Ix, Char) ->
sub_word(String, Ix, Char, 0).
sub_word(String, Ix, Char, Sofar) ->
case String of
<B/binary, Char/char |_> when Sofar == Ix ->
B;
<B/binary, Char/char |Tail> ->
sub_word(Tail, Ix, Char, Sofar+1);
_ ->
<>
end.
%%% STRIP %%%
strip(String) -> strip(String, both).
strip(String, left) -> strip_left(String, $ );
strip(String, right) -> strip_right(String, $ );
strip(String, both) ->
strip_right(strip_left(String, $ ), $ ).
strip(String, right, Char) -> strip_right(String, Char);
strip(String, left, Char) -> strip_left(String, Char);
strip(String, both, Char) ->
strip_right(strip_left(String, Char), Char).
strip_left(<C/char|Tail>, C) ->
strip_left(Tail, C);
strip_left(B, _) ->
B.
strip_right(<B/binary, Char/char>, Char) ->
strip_right(B, Char);
strip_right(B, Char) ->
B.
%%% LEFT %%%
left(String, Len) -> left(String, Len, $ ).
left(String, Len, Char) ->
Slen = length(String),
if
Slen > Len -> substr(String, 1, Len);
Slen < Len -> l_pad(String, Len-Slen, Char);
Slen == Len -> String
end.
l_pad(String, Num, Char) ->
Btail = chars(Char, Num),
<String/binary | Btail>.
%%% RIGHT %%%
right(String, Len) -> right(String, Len, $ ).
right(String, Len, Char) ->
Slen = size(String),
if
Slen > Len -> substr(String, Slen-Len+1);
Slen < Len -> r_pad(String, Len-Slen, Char);
Slen == Len -> String
end.
r_pad(String, Num, Char) -> chars(Char, Num, String).
%%% CENTRE %%%
centre(String, Len) -> centre(String ,Len, $ ).
centre(String, 0, _) -> []; %Strange cases to centre string
centre(String, Len, Char) ->
Slen = size(String),
if
Slen > Len -> substr(String, (Slen-Len) div 2 + 1, Len);
Slen < Len ->
N = (Len-Slen) div 2,
r_pad(l_pad(String, Len-(Slen+N), Char), N, Char);
Slen == Len -> String
end.
%%% SUB_STRING %%%
sub_string(String, Start) -> substr(String, Start).
sub_string(String, Start, Stop) -> substr(String, Start, Stop - Start + 1).
%% The Regular Expression Matching Functions.
%%
%% These have been rewritten. As their interface has changed slightly
%% (much to the better) I have moved them to a new module 'regexp' to
%% avoid another "interface war" about something which doesn't
%% serioulsy affect that many people. This interface is kept for
%% backwards compatibility so I don't get shot for that as well.
%%
%% /Robert Virding
re_sh_to_awk(ShellRegExp) ->
regexp:sh_to_awk(ShellRegExp).
re_parse(RegExp) ->
case bregexp:parse(RegExp) of
{ok,RE} -> {regexp,RE};
{error,E} -> {error,E}
end.
re_match(String, RegExp) ->
case bregexp:match(String, RegExp) of
{match,Start,Len} -> {match,substr(String, Start, Len),Start};
nomatch -> nomatch;
{error,E} -> {error,E}
end.
re_sub(String, RegExp, New) ->
case bregexp:sub(String, RegExp, New) of
{ok,Res,N} -> {ok,Res};
{error,E} -> {error,E}
end.
re_gsub(String, RegExp, New) ->
case bregexp:gsub(String, RegExp, New) of
{ok,Res,N} -> {ok,Res};
{error,E} -> {error,E}
end.
re_split(String, RegExp) -> bregexp:split(String, RegExp).
More information about the erlang-questions
mailing list