[erlang-questions] gen_tcp very slow to fetch data
Joe Armstrong
erlang@REDACTED
Tue Nov 17 09:52:33 CET 2009
On Mon, Nov 16, 2009 at 6:53 PM, Chandru
<chandrashekhar.mullaparthi@REDACTED> wrote:
> You are expecting the server to indicate end of response by closing the
> connection, but because you specify HTTP/1.1 in the request, the server is
> holding up your connection, and you are timing out. Try replacing HTTP/1.1
> with HTTP/1.0 in your request, or parse the response to detect end of
> response.
This will get you into murkier waters - you'll have to check if a
content length is defined and
then read exactly this number of bytes.
I wrote a tutorial about this a while back
http://www.sics.se/~joe/tutorials/web_server/web_server.html
You'll find a module called http_driver here that does the parsing and
collects the appropriate number of bytes.
/Joe
I
>
> cheers
> Chandru
>
> 2009/11/16 zabrane Mikael <zabrane3@REDACTED>
>
>> Hi List !
>>
>> New to Erlang, I'm trying to implement a simple URL fetcher.
>> Here's my code (please, feel free to correct it if you find any bug or know
>> a better approach):
>>
>>
>> 8-----8-----8-----8-----8-----8-----8-----8-----8-----8-----8-----8-----8----
>> -module(fetch).
>>
>> -export([url/1]).
>>
>> -define(TIMEOUT, 7000).
>> -define(TCP_OPTS, [binary, {packet, raw}, {nodelay, true},
>> {active, true}]).
>>
>> url(Url) ->
>> {ok, _Tag, Host, Port} = split_url(Url),
>>
>> Hdrs = [],
>> Request = ["GET ", Url, " HTTP/1.1\r\n", Hdrs, "\r\n\r\n"],
>>
>> case catch gen_tcp:connect(Host, Port, ?TCP_OPTS) of
>> {'EXIT', Why} ->
>> {error, {socket_exit, Why}};
>> {error, Why} ->
>> {error, {socket_error, Why}};
>> {ok, Socket} ->
>> gen_tcp:send(Socket, list_to_binary(Request)),
>> recv(Socket, list_to_binary([]))
>> end.
>>
>> recv(Socket, Bin) ->
>> receive
>> {tcp, Socket, B} ->
>> io:format(".", []),
>> recv(Socket, concat_binary([Bin, B]));
>> {tcp_closed, Socket} ->
>> {ok, Bin};
>> Other ->
>> {error, {socket, Other}}
>> after
>> ?TIMEOUT ->
>> {error, {socket, timeout}}
>> end.
>>
>>
>> split_url([$h,$t,$t,$p,$:,$/,$/|T]) -> split_url(http, T);
>> split_url(_X) -> {error, split_url}.
>>
>> split_url(Tag, X) ->
>> case string:chr(X, $:) of
>> 0 ->
>> Port = 80,
>> case string:chr(X,$/) of
>> 0 ->
>> {ok, Tag, X, Port};
>> N ->
>> Site = string:substr(X,1,N-1),
>> {ok, Tag, Site, Port}
>> end;
>> N1 ->
>> case string:chr(X,$/) of
>> 0 ->
>> error;
>> N2 ->
>> PortStr = string:substr(X,N1+1, N2-N1-1),
>> case catch list_to_integer(PortStr) of
>> {'EXIT', _} ->
>> {error, port_number};
>> Port ->
>> Site = string:substr(X,1,N1-1),
>> {ok, Tag, Site, Port}
>> end
>> end
>> end.
>>
>>
>> 8-----8-----8-----8-----8-----8-----8-----8-----8-----8-----8-----8-----8------
>>
>> When testing it, the receiving socket gets very very slow:
>> $ erl
>> 1> c(fetch).
>> 2> Bin = fetch:url("http://www.google.com").
>> ......{error,{socket,timeout}}
>>
>> Am I missing something?
>> What I like to get at the end is a very fast fetcher. Any hint?
>>
>> Regards
>> Zabrane
>>
>
More information about the erlang-questions
mailing list