gen_tcp very slow to fetch data

zabrane Mikael zabrane3@REDACTED
Mon Nov 16 17:17:32 CET 2009


Hi List !

New to Erlang, I'm trying to implement a simple URL fetcher.
Here's my code (please, feel free to correct it if you find any bug or know
a better approach):

8-----8-----8-----8-----8-----8-----8-----8-----8-----8-----8-----8-----8----
-module(fetch).

-export([url/1]).

-define(TIMEOUT,    7000).
-define(TCP_OPTS,   [binary, {packet, raw}, {nodelay, true},
                     {active, true}]).

url(Url) ->
    {ok, _Tag, Host, Port} = split_url(Url),

    Hdrs = [],
    Request = ["GET ", Url, " HTTP/1.1\r\n", Hdrs, "\r\n\r\n"],

    case catch gen_tcp:connect(Host, Port, ?TCP_OPTS) of
      {'EXIT', Why} ->
            {error, {socket_exit, Why}};
        {error, Why} ->
            {error, {socket_error, Why}};
        {ok, Socket} ->
            gen_tcp:send(Socket, list_to_binary(Request)),
            recv(Socket, list_to_binary([]))
    end.

recv(Socket, Bin) ->
    receive
        {tcp, Socket, B} ->
            io:format(".", []),
            recv(Socket, concat_binary([Bin, B]));
        {tcp_closed, Socket} ->
            {ok, Bin};
        Other ->
            {error, {socket, Other}}
after
    ?TIMEOUT ->
            {error, {socket, timeout}}
    end.


split_url([$h,$t,$t,$p,$:,$/,$/|T]) ->  split_url(http, T);
split_url(_X)                       ->  {error, split_url}.

split_url(Tag, X) ->
    case string:chr(X, $:) of
        0 ->
            Port = 80,
            case string:chr(X,$/) of
                0 ->
                    {ok, Tag, X, Port};
                N ->
                    Site = string:substr(X,1,N-1),
                    {ok, Tag, Site, Port}
            end;
        N1 ->
            case string:chr(X,$/) of
                0 ->
                    error;
                N2 ->
                    PortStr = string:substr(X,N1+1, N2-N1-1),
                    case catch list_to_integer(PortStr) of
                        {'EXIT', _} ->
                            {error, port_number};
                        Port ->
                            Site = string:substr(X,1,N1-1),
                            {ok, Tag, Site, Port}
                    end
            end
    end.

8-----8-----8-----8-----8-----8-----8-----8-----8-----8-----8-----8-----8------

When testing it, the receiving socket gets very very slow:
$ erl
1> c(fetch).
2> Bin = fetch:url("http://www.google.com").
......{error,{socket,timeout}}

Am I missing something?
What I like to get at the end is a very fast fetcher. Any hint?

Regards
Zabrane


More information about the erlang-questions mailing list