Complexity Shock Horror II: the Sequel (was Re: MD5 in erlang.)

Sun Mar 30 03:09:27 CEST 2003

Hi Happi. I am digging into the I/O system for performance reasons, so 
this time I will add the any-base printing to io_lib for R9C.

Not exactly as your suggestion, though.

As I read your code, you suggested:

io_lib:format("~#", [-31,16]) -> "-16#1f"
io_lib:format("~B", [-30]) -> "-1111110"
io_lib:format("~b", [18]) -> "00000000000000000000000000010010"
io_lib:format("~.8b", [-18]) -> "11101110"
io_lib:format("~x", [-31]) -> "-0x1f"
io_lib:format("~X", [-31]) -> "-0x1F"

And, first I do not want 0x-prefix for hex notation. That is C, not 
Erlang. A user definable prefix would be better.

Second, I would like to be able to choose either upper or lowercase for 
"~#".

Third, I don't see why binary notation would deserve a letter of its 
own, base 2 without prefix would do the job. So I want a possibility to 
loose the prefix.

So I will most probably implement (if no-one convinces me that I must 
change something):

io_lib:format("~.16b", [-31]) -> "-16#1f"
io_lib:format("~.16B", [-31]) -> "-16#1F"
io_lib:format("~.16x", [-31,"0x"]) -> "-0x1f"
io_lib:format("~.16X", [-31,"0x"]) -> "-0x1F"
io_lib:format("~.2.0u", [18,32]) -> "00000000000000000000000000010010"
io_lib:format("~.2u", [-18,8]) -> "11101110"
io_lib:format("~.16u", [31,16]) -> "1f"
io_lib:format("~.16U", [-31,16]) -> "FFEE"

I.e b/B for Erlang signed any-Base notation.
     x/X for eXplicitly prefiXed signed any-base notation.
     u/U for unprefixed unsigned words of any width.
     The precision field chooses number base.
     Second mandatory argument for x/X chooses prefix,
       for u/U word width.
     Default base is 10.

I am not too happy with my choice of letters, but the best were taken.

/ Raimo Niskanen, Erlang/OTP

Erik Stenman wrote:
> Chris Pressey wrote:
> [...]
> 
>>Every few months this comes up and every time it does I get more
>>irritated.
> 
> [...]
> 
> Here we go agian, my pet peeve ;)
> 
> We have had the posibility to format integers in different bases in our
> local HiPE system for several years now,
> but for some reason the OTP team does not want to add it to the
> distribution.
> Robert Virding has also promised to implement it but that implementation has
> not made it into the distribution either.
> 
> Please, please add this functionallity to stdlib!
> (I can write the documentation if that is a problem ;)
> 
> /Erik
> --------------------------------------
> I'm Happi, you should be happy.
> Praeterea censeo "0xCA" scribere Erlang posse.
> 
> 
> Index: lib/stdlib/src/io_lib_format.erl
> ===================================================================
> RCS file:
> /it/project/fo/hipe/repository/otp/lib/stdlib/src/io_lib_format.erl,v
> retrieving revision 1.1.1.1
> retrieving revision 1.4
> diff -u -r1.1.1.1 -r1.4
> --- lib/stdlib/src/io_lib_format.erl 26 Mar 2001 18:36:34 -0000 1.1.1.1
> +++ lib/stdlib/src/io_lib_format.erl 27 Mar 2002 16:47:22 -0000 1.4
> @@ -101,11 +101,16 @@
>  collect_cc([$p|Fmt], [A|Args]) -> {$p,[A],Fmt,Args};
>  collect_cc([$W|Fmt], [A,Depth|Args]) -> {$W,[A,Depth],Fmt,Args};
>  collect_cc([$P|Fmt], [A,Depth|Args]) -> {$P,[A,Depth],Fmt,Args};
> +collect_cc([$#|Fmt], [A,Base|Args]) -> {$#,[A,Base],Fmt,Args};
> +collect_cc([$B|Fmt], [A|Args]) -> {$B,[A],Fmt,Args};
> +collect_cc([$b|Fmt], [A|Args]) -> {$b,[A],Fmt,Args};
>  collect_cc([$s|Fmt], [A|Args]) -> {$s,[A],Fmt,Args};
>  collect_cc([$e|Fmt], [A|Args]) -> {$e,[A],Fmt,Args};
>  collect_cc([$f|Fmt], [A|Args]) -> {$f,[A],Fmt,Args};
>  collect_cc([$g|Fmt], [A|Args]) -> {$g,[A],Fmt,Args};
>  collect_cc([$c|Fmt], [A|Args]) -> {$c,[A],Fmt,Args};
> +collect_cc([$x|Fmt], [A|Args]) -> {$x,[A],Fmt,Args};
> +collect_cc([$X|Fmt], [A|Args]) -> {$X,[A],Fmt,Args};
>  collect_cc([$~|Fmt], Args) -> {$~,[],Fmt,Args};
>  collect_cc([$n|Fmt], Args) -> {$n,[],Fmt,Args};
>  collect_cc([$i|Fmt], [A|Args]) -> {$i,[A],Fmt,Args}.
> @@ -155,6 +160,20 @@
>      term(io_lib:write(A, Depth), F, Adj, P, Pad);
>  control($P, [A,Depth], F, Adj, P, Pad, I) when integer(Depth) ->
>      print(A, Depth, F, Adj, P, Pad, I);
> +control($#, [A,Base], F, Adj, P, Pad, I) when integer(Base),
> +           2 =< Base,
> +           Base =< 16->
> +    string(int_to_base(A, Base), F, Adj, P, Pad);
> +control($B, [A], F, Adj, P, Pad, I) ->
> +  string(
> +    if A < 0 -> [$- | int_to_base(-A, [], 2)];
> +     true ->  int_to_base(A, [], 2)
> +    end,
> +    F, Adj, P, Pad);
> +control($b, [A], F, Adj, none, Pad, I) ->
> +    string(int_to_binary(A, 32), F, Adj, none, Pad);
> +control($b, [A], F, Adj, P, Pad, I) ->
> +    string(int_to_binary(A, P), F, Adj, P, Pad);
>  control($s, [A], F, Adj, P, Pad, I) when atom(A) ->
>      string(atom_to_list(A), F, Adj, P, Pad);
>  control($s, [L], F, Adj, P, Pad, I) ->
> @@ -168,6 +187,10 @@
>      fwrite_g(A, F, Adj, P, Pad);
>  control($c, [A], F, Adj, P, Pad, I) when integer(A) ->
>      char(A band 255, F, Adj, P, Pad);
> +control($x, [A], F, Adj, P, Pad, I) when integer(A) ->
> +    string(int_to_hex(A), F, Adj, P, Pad);
> +control($X, [A], F, Adj, P, Pad, I) when integer(A) ->
> +    string(int_to_Hex(A), F, Adj, P, Pad);
>  control($~, [], F, Adj, P, Pad, I) -> char($~, F, Adj, P, Pad);
>  control($n, [], F, Adj, P, Pad, I) -> newline(F, Adj, P, Pad);
>  control($i, [A], F, Adj, P, Pad, I) -> [].
> @@ -388,3 +411,58 @@
>  flat_length([H|T], L) ->
>      flat_length(T, L + 1);
>  flat_length([], L) -> L.
> +
> +int_to_hex(N) -> int_to_hex(N, $a-10).
> +int_to_Hex(N) -> int_to_hex(N, $A-10).
> +
> +int_to_hex(N, LetterBase) ->
> +    if N < 0 -> [$-, $0, $x | int_to_hex(-N, [], LetterBase)];
> +       true ->  [$0, $x | int_to_hex(N, [], LetterBase)]
> +    end.
> +
> +int_to_hex(N, Tail, LetterBase) ->
> +    NewN = N bsr 4,
> +    Digit = N band 15,
> +    Char =
> + if Digit < 10 -> Digit+$0;
> +    true -> Digit+LetterBase
> + end,
> +    NewTail = [Char | Tail],
> +    if NewN =:= 0 -> NewTail;
> +       true -> int_to_hex(NewN, NewTail, LetterBase)
> +    end.
> +
> +int_to_binary(N, Wordsize) ->
> +    if N < 0 ->
> + Bits = int_to_base(-N, [],2),
> + pad(length(Bits),Bits,Wordsize,$1);
> +       true ->
> + Bits = int_to_base(N, [], 2),
> + pad(length(Bits),Bits,Wordsize,$0)
> +    end.
> +
> +pad(N,Bits,Wordsize, Pad) ->
> +  if N < Wordsize ->
> +      pad(N+1,[Pad|Bits],Wordsize,Pad);
> +     true ->
> +      Bits
> +  end.
> +
> +
> +int_to_base(N, Base) ->
> +    if N < 0 -> [$- | integer_to_list(Base)]
> +    ++ [$# | int_to_base(-N, [], Base)];
> +       true ->  integer_to_list(Base)  ++ [$# | int_to_base(N, [], Base)]
> +    end.
> +
> +int_to_base(N, Tail, Base) ->
> +    NewN = N div Base,
> +    Digit = N - (NewN*Base),
> +    Char =
> + if Digit < 10 -> Digit+$0;
> +    true -> Digit+$a-10
> + end,
> +    NewTail = [Char | Tail],
> +    if NewN =:= 0 -> NewTail;
> +       true -> int_to_base(NewN, NewTail, Base)
> +    end.
>