[erlang-questions] Parsing binaries performance

Per Gustafsson per.gustafsson@REDACTED
Thu Jun 26 10:48:18 CEST 2008


I would suggest that you update your system to R12B-3. On my computer 
your program is roughly 6 times faster on R12B-3 than on R11B-5 this is 
due to a specific optimization for this kind of code. It will still be 
slower than the Java code, but if you use native compilation it should 
approach the same speed.

Per


Sebastian Dehne wrote:
> Hello list,
>
> I'm trying to write a parser in Erlang for a byte-stream (which I 
> receive from the TCP socket), but I realise that my code is slow 
> compared to the java version which I have. I've attached both version.
>
> The java prints version:
> Done: 93
>
> The erlang version prints:
> $ erl
> Erlang (BEAM) emulator version 5.5.5 [source] [async-threads:0] 
> [kernel-poll:false]
>
> Eshell V5.5.5  (abort with ^G)
> 1> c(test).
> {ok,test}
> 2> Data1 = <<"Somestringcontainingcrlf\r\nandsomemoretextbehind\r\n">>.
> <<"Somestringcontainingcrlf\r\nandsomemoretextbehind\r\n">>
> 3>
> 3> test:testwrapper(Data1, 500000).
> Erlang Elapsed 1570 (runtime) 1586 (wall clock) milliseconds
> ok
>
> Both tests are run on the same machine. I also run the test several 
> times. Quite a difference between 93ms and 1586ms.
>
> What am I doing wrong? How should I write the code in Erlang such that 
> it matches the performance of Java?
>
>
> Thanks
>
> Sebastian
> ------------------------------------------------------------------------
>
> import java.io.File;
> import java.io.FileInputStream;
> import java.io.IOException;
> import java.util.ArrayList;
> import java.util.LinkedList;
> import java.util.List;
>
> public class Test {
>     public static void main(String[] args) throws Exception {
>         test2();
>     }
>
>     public static void test2() throws Exception {
>         final byte[] data = "Somestringcontainingcrlf\r\nandsomemoretextbehind\r\n".getBytes();
>
>         long start = System.currentTimeMillis();
>         for (int i = 0; i < 500000; i++) {
>             if (findPos(data) == -1) {
>                 throw new Exception("Error");
>             }
>         }
>         System.out.println("Done: " + (System.currentTimeMillis() - start));
>     }
>
>     public static int findPos(byte[] data) {
>         boolean cr_found = false;
>         int pos = 0;
>         for (byte b : data) {
>             ++pos;
>             if (b == '\r') {
>                 cr_found = true;
>             } else if (cr_found && b == '\n') {
>                 return pos;
>             } else {
>                 cr_found = false;
>             }
>         }
>         return -1;
>     }
>
> }
>
>   
> ------------------------------------------------------------------------
>
> -module(test).
> -compile(export_all).
>
> testwrapper(Data, I) ->
>   statistics(runtime),
>   statistics(wall_clock),
>   test(Data, I),
>   {_, Time1} = statistics(runtime),
>   {_, Time2} = statistics(wall_clock),
>   io:format("Erlang Elapsed ~p (runtime) ~p (wall clock) milliseconds~n", [Time1, Time2]).
>
> test(_Data, 0) ->
>   done;
> test(Data, I) ->
>   {true, _Pos} = find_crlf_or_end(Data, 0),
>   test(Data, I - 1).
>
> % Find the position of the first '\r\n'
> find_crlf_or_end(<<$\r, $\n, _/binary>>, Pos) ->
>   {true, Pos};
> find_crlf_or_end(<<_, Rest/binary>>, Pos) ->
>   find_crlf_or_end(Rest, Pos + 1);
> find_crlf_or_end(<<>>, Pos) ->
>   {true, Pos}.
>
>
>   
> ------------------------------------------------------------------------
>
> _______________________________________________
> erlang-questions mailing list
> erlang-questions@REDACTED
> http://www.erlang.org/mailman/listinfo/erlang-questions




More information about the erlang-questions mailing list