[erlang-questions] Erlang elseif

Edwin Fine <>
Thu Nov 20 16:21:30 CET 2008


On Thu, Nov 20, 2008 at 8:08 AM, damien morton <>wrote:

> traditionally, at least in C, this stuff is done with a bitvector, or
> rather a vector of bits.
>
> Here's my 'C-like' contribution. It's pretty quick, too (the time below
excludes the time taken to read the file into a binary).

32> charclass:bm("/home/efine/erlang/otp_src_R12B-3.tar.gz").
File "/home/efine/erlang/otp_src_R12B-3.tar.gz" size is 42195557 bytes
Speed = 23965801 bytes/sec

-module(charclass).

-define(CLS_CNTRL, 2#00000001).
-define(CLS_UPPER, 2#00000010).
-define(CLS_LOWER, 2#00000100).
-define(CLS_DIGIT, 2#00001000).
-define(CLS_PUNCT, 2#00010000).
-define(CLS_BLANK, 2#00100000).
-define(CLS_SPACE, 2#01000000).
-define(CLS_8BIT,  2#10000000).
-define(CLS_ALPHA, (?CLS_UPPER bor ?CLS_LOWER)).
-define(CLS_ALNUM, (?CLS_UPPER bor ?CLS_LOWER bor ?CLS_DIGIT)).
-define(CLS_PRINT, (bnot (?CLS_CNTRL bor ?CLS_8BIT))).

-compile([export_all]).

is_cntrl(Ch) -> class(Ch) band ?CLS_CNTRL /= 0.
is_upper(Ch) -> class(Ch) band ?CLS_UPPER /= 0.
is_lower(Ch) -> class(Ch) band ?CLS_LOWER /= 0.
is_digit(Ch) -> class(Ch) band ?CLS_DIGIT /= 0.
is_punct(Ch) -> class(Ch) band ?CLS_PUNCT /= 0.
is_alpha(Ch) -> class(Ch) band ?CLS_ALPHA /= 0.
is_alnum(Ch) -> class(Ch) band ?CLS_ALNUM /= 0.
is_blank(Ch) -> class(Ch) band ?CLS_BLANK /= 0.
is_space(Ch) -> class(Ch) band ?CLS_SPACE /= 0.
is_print(Ch) -> class(Ch) band ?CLS_PRINT /= 0.
is_8bit(Ch)  -> class(Ch) band ?CLS_8BIT  /= 0.

class_names(Ch) ->
    Cls = class(Ch),
    Masks = [
        ?CLS_UPPER, ?CLS_LOWER, ?CLS_DIGIT, ?CLS_PUNCT, ?CLS_PRINT,
        ?CLS_BLANK, ?CLS_SPACE, ?CLS_ALPHA, ?CLS_ALNUM, ?CLS_CNTRL,
        ?CLS_8BIT
    ],
    [class_name(Mask) || Mask <- Masks, Cls band Mask /= 0].

class_name(?CLS_CNTRL) -> cntrl;
class_name(?CLS_UPPER) -> upper;
class_name(?CLS_LOWER) -> lower;
class_name(?CLS_DIGIT) -> digit;
class_name(?CLS_PUNCT) -> punct;
class_name(?CLS_SPACE) -> space;
class_name(?CLS_BLANK) -> blank;
class_name(?CLS_ALPHA) -> alpha;
class_name(?CLS_ALNUM) -> alnum;
class_name(?CLS_PRINT) -> print;
class_name(?CLS_8BIT)  -> '8bit'.

class(Ch) ->
    element(Ch + 1,
    { %   0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
          1,  1,  1,  1,  1,  1,  1,  1,  1, 97, 65, 65, 65, 65,  1,  1, % 1
          1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, % 1
         96, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, % 2
          8,  8,  8,  8,  8,  8,  8,  8,  8,  8, 16, 16, 16, 16, 16, 16, % 3
         16,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, % 4
          2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, 16, 16, 16, 16, 16, % 5
         16,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, % 6
          4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, 16, 16, 16, 16,  1, % 7
        128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128, % 8
        128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128, % 9
        128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128, % A
        128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128, % B
        128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128, % C
        128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128, % D
        128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128, % E
        128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128  % F
    }).

test() ->
    [{Ch, [Ch], class_names(Ch)}|| Ch <- lists:seq(0,255)].

bm(File) ->
    {ok, B} = file:read_file(File),
    Size = byte_size(B),
    io:format("File ~p size is ~B bytes~n", [File, Size]),
    {Micros,_} = timer:tc(?MODULE, classify_binary, [B]),
    io:format("Speed = ~b bytes/sec~n", [Size * 1000000 div Micros]).

classify_binary(<<>>) ->
    ok;
classify_binary(<<Ch,Rest/bytes>>) ->
    class(Ch),
    classify_binary(Rest).
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://erlang.org/pipermail/erlang-questions/attachments/20081120/52831bc6/attachment.html>


More information about the erlang-questions mailing list