[erlang-questions] gen_tcp very slow to fetch data

Joe Armstrong erlang@REDACTED
Tue Nov 17 09:52:33 CET 2009


On Mon, Nov 16, 2009 at 6:53 PM, Chandru
<chandrashekhar.mullaparthi@REDACTED> wrote:
> You are expecting the server to indicate end of response by closing the
> connection, but because you specify HTTP/1.1 in the request, the server is
> holding up your connection, and you are timing out. Try replacing HTTP/1.1
> with HTTP/1.0 in your request, or parse the response to detect end of
> response.

This will get you into murkier waters - you'll have to check if a
content length is defined and
then read exactly this number of bytes.

I wrote a tutorial about this a while back

http://www.sics.se/~joe/tutorials/web_server/web_server.html

You'll find a module called http_driver here that does the parsing and
collects the appropriate number of bytes.

/Joe



I


>
> cheers
> Chandru
>
> 2009/11/16 zabrane Mikael <zabrane3@REDACTED>
>
>> Hi List !
>>
>> New to Erlang, I'm trying to implement a simple URL fetcher.
>> Here's my code (please, feel free to correct it if you find any bug or know
>> a better approach):
>>
>>
>> 8-----8-----8-----8-----8-----8-----8-----8-----8-----8-----8-----8-----8----
>> -module(fetch).
>>
>> -export([url/1]).
>>
>> -define(TIMEOUT,    7000).
>> -define(TCP_OPTS,   [binary, {packet, raw}, {nodelay, true},
>>                     {active, true}]).
>>
>> url(Url) ->
>>    {ok, _Tag, Host, Port} = split_url(Url),
>>
>>    Hdrs = [],
>>    Request = ["GET ", Url, " HTTP/1.1\r\n", Hdrs, "\r\n\r\n"],
>>
>>    case catch gen_tcp:connect(Host, Port, ?TCP_OPTS) of
>>      {'EXIT', Why} ->
>>            {error, {socket_exit, Why}};
>>        {error, Why} ->
>>            {error, {socket_error, Why}};
>>        {ok, Socket} ->
>>            gen_tcp:send(Socket, list_to_binary(Request)),
>>            recv(Socket, list_to_binary([]))
>>    end.
>>
>> recv(Socket, Bin) ->
>>    receive
>>        {tcp, Socket, B} ->
>>            io:format(".", []),
>>            recv(Socket, concat_binary([Bin, B]));
>>        {tcp_closed, Socket} ->
>>            {ok, Bin};
>>        Other ->
>>            {error, {socket, Other}}
>> after
>>    ?TIMEOUT ->
>>            {error, {socket, timeout}}
>>    end.
>>
>>
>> split_url([$h,$t,$t,$p,$:,$/,$/|T]) ->  split_url(http, T);
>> split_url(_X)                       ->  {error, split_url}.
>>
>> split_url(Tag, X) ->
>>    case string:chr(X, $:) of
>>        0 ->
>>            Port = 80,
>>            case string:chr(X,$/) of
>>                0 ->
>>                    {ok, Tag, X, Port};
>>                N ->
>>                    Site = string:substr(X,1,N-1),
>>                    {ok, Tag, Site, Port}
>>            end;
>>        N1 ->
>>            case string:chr(X,$/) of
>>                0 ->
>>                    error;
>>                N2 ->
>>                    PortStr = string:substr(X,N1+1, N2-N1-1),
>>                    case catch list_to_integer(PortStr) of
>>                        {'EXIT', _} ->
>>                            {error, port_number};
>>                        Port ->
>>                            Site = string:substr(X,1,N1-1),
>>                            {ok, Tag, Site, Port}
>>                    end
>>            end
>>    end.
>>
>>
>> 8-----8-----8-----8-----8-----8-----8-----8-----8-----8-----8-----8-----8------
>>
>> When testing it, the receiving socket gets very very slow:
>> $ erl
>> 1> c(fetch).
>> 2> Bin = fetch:url("http://www.google.com").
>> ......{error,{socket,timeout}}
>>
>> Am I missing something?
>> What I like to get at the end is a very fast fetcher. Any hint?
>>
>> Regards
>> Zabrane
>>
>


More information about the erlang-questions mailing list