[erlang-bugs] [erlang-questions] Process/FD leak in SSL R15B01
Ingela Anderton Andin
Ingela.Anderton.Andin@REDACTED
Fri Nov 9 10:03:25 CET 2012
Hi!
Here is the patch to make timouts server side.
diff --git a/lib/ssl/src/ssl_connection.erl b/lib/ssl/src/ssl_connection.erl
index ff2556c..ce64f05 100644
--- a/lib/ssl/src/ssl_connection.erl
+++ b/lib/ssl/src/ssl_connection.erl
@@ -118,7 +118,7 @@ send(Pid, Data) ->
sync_send_all_state_event(Pid, {application_data,
%% iolist_to_binary should really
%% be called iodata_to_binary()
- erlang:iolist_to_binary(Data)},
infinity).
+ erlang:iolist_to_binary(Data)}).
%%--------------------------------------------------------------------
-spec recv(pid(), integer(), timeout()) ->
@@ -127,7 +127,7 @@ send(Pid, Data) ->
%% Description: Receives data when active = false
%%--------------------------------------------------------------------
recv(Pid, Length, Timeout) ->
- sync_send_all_state_event(Pid, {recv, Length}, Timeout).
+ sync_send_all_state_event(Pid, {recv, Length, Timeout}).
%%--------------------------------------------------------------------
-spec connect(host(), inet:port_number(), port(), {#ssl_options{},
#socket_options{}},
pid(), tuple(), timeout()) ->
@@ -164,7 +164,7 @@ ssl_accept(Port, Socket, Opts, User, CbInfo, Timeout) ->
%% Description: Starts ssl handshake.
%%--------------------------------------------------------------------
handshake(#sslsocket{pid = Pid}, Timeout) ->
- case sync_send_all_state_event(Pid, start, Timeout) of
+ case sync_send_all_state_event(Pid, {start, Timeout}) of
connected ->
ok;
Error ->
@@ -335,15 +335,15 @@ init([Role, Host, Port, Socket, {SSLOpts0, _} =
Options, User, CbInfo]) ->
#state{}) -> gen_fsm_state_return().
%%--------------------------------------------------------------------
hello(start, #state{host = Host, port = Port, role = client,
- ssl_options = SslOpts,
- session = #session{own_certificate = Cert} = Session0,
- session_cache = Cache, session_cache_cb = CacheCb,
- transport_cb = Transport, socket = Socket,
- connection_states = ConnectionStates0,
- renegotiation = {Renegotiation, _}} = State0) ->
+ ssl_options = SslOpts,
+ session = #session{own_certificate = Cert}
= Session0,
+ session_cache = Cache, session_cache_cb =
CacheCb,
+ transport_cb = Transport, socket = Socket,
+ connection_states = ConnectionStates0,
+ renegotiation = {Renegotiation, _}} =
State0) ->
Hello = ssl_handshake:client_hello(Host, Port, ConnectionStates0,
SslOpts,
Cache, CacheCb, Renegotiation,
Cert),
-
+
Version = Hello#client_hello.client_version,
Handshake0 = ssl_handshake:init_handshake_history(),
{BinMsg, ConnectionStates, Handshake} =
@@ -768,7 +768,8 @@ handle_sync_event({application_data, Data}, From,
StateName,
State#state{send_queue = queue:in({From, Data}, Queue)},
get_timeout(State)};
-handle_sync_event(start, StartFrom, hello, State) ->
+handle_sync_event({start, Timeout} = Start, StartFrom, hello, State) ->
+ start_or_recv_cancel_timer(Timeout, StartFrom),
hello(start, State#state{start_or_recv_from = StartFrom});
%% The two clauses below could happen if a server upgrades a socket in
@@ -778,12 +779,14 @@ handle_sync_event(start, StartFrom, hello, State) ->
%% mode before telling the client that it is willing to upgrade
%% and before calling ssl:ssl_accept/2. These clauses are
%% here to make sure it is the users problem and not owers if
-%% they upgrade a active socket.
-handle_sync_event(start, _, connection, State) ->
+%% they upgrade an active socket.
+handle_sync_event({start,_}, _, connection, State) ->
{reply, connected, connection, State, get_timeout(State)};
-handle_sync_event(start, _From, error, {Error, State = #state{}}) ->
+handle_sync_event({start,_}, _From, error, {Error, State = #state{}}) ->
{stop, {shutdown, Error}, {error, Error}, State};
-handle_sync_event(start, StartFrom, StateName, State) ->
+
+handle_sync_event({start, Timeout}, StartFrom, StateName, State) ->
+ start_or_recv_cancel_timer(Timeout, StartFrom),
{next_state, StateName, State#state{start_or_recv_from =
StartFrom}, get_timeout(State)};
handle_sync_event(close, _, StateName, State) ->
@@ -815,12 +818,14 @@ handle_sync_event({shutdown, How0}, _, StateName,
{stop, normal, Error, State}
end;
-handle_sync_event({recv, N}, RecvFrom, connection = StateName, State0) ->
+handle_sync_event({recv, N, Timeout}, RecvFrom, connection = StateName,
State0) ->
+ start_or_recv_cancel_timer(Timeout, RecvFrom),
passive_receive(State0#state{bytes_to_read = N, start_or_recv_from
= RecvFrom}, StateName);
%% Doing renegotiate wait with handling request until renegotiate is
%% finished. Will be handled by next_state_is_connection/2.
-handle_sync_event({recv, N}, RecvFrom, StateName, State) ->
+handle_sync_event({recv, N, Timeout}, RecvFrom, StateName, State) ->
+ start_or_recv_cancel_timer(Timeout, RecvFrom),
{next_state, StateName, State#state{bytes_to_read = N,
start_or_recv_from = RecvFrom},
get_timeout(State)};
@@ -990,7 +995,14 @@ handle_info({'DOWN', MonitorRef, _, _, _}, _,
handle_info(allow_renegotiate, StateName, State) ->
{next_state, StateName, State#state{allow_renegotiate = true},
get_timeout(State)};
-
+
+handle_info({cancel_start_or_recv, RecvFrom}, connection = StateName,
#state{start_or_recv_from = RecvFrom} = State) ->
+ gen_fsm:reply(RecvFrom, {error, timeout}),
+ {next_state, StateName, State#state{start_or_recv_from =
undefined}, get_timeout(State)};
+
+handle_info({cancel_start_or_recv, _RecvFrom}, StateName, State) ->
+ {next_state, StateName, State, get_timeout(State)};
+
handle_info(Msg, StateName, State) ->
Report = io_lib:format("SSL: Got unexpected info: ~p ~n", [Msg]),
error_logger:info_report(Report),
@@ -1201,15 +1213,10 @@ init_diffie_hellman(DbHandle,_, DHParamFile,
server) ->
end.
sync_send_all_state_event(FsmPid, Event) ->
- sync_send_all_state_event(FsmPid, Event, infinity).
-
-sync_send_all_state_event(FsmPid, Event, Timeout) ->
- try gen_fsm:sync_send_all_state_event(FsmPid, Event, Timeout)
+ try gen_fsm:sync_send_all_state_event(FsmPid, Event, infinity)
catch
exit:{noproc, _} ->
{error, closed};
- exit:{timeout, _} ->
- {error, timeout};
exit:{normal, _} ->
{error, closed};
exit:{shutdown, _} ->
@@ -2465,3 +2472,8 @@ default_hashsign(_Version, KeyExchange)
default_hashsign(_Version, KeyExchange)
when KeyExchange == dh_anon ->
{null, anon}.
+
+start_or_recv_cancel_timer(infinity, _RecvFrom) ->
+ ok;
+start_or_recv_cancel_timer(Timeout, RecvFrom) ->
+ erlang:send_after(Timeout, self(), {cancel_start_or_recv, RecvFrom}).
Regards Ingela Erlang/OTP team - Ericsson AB
Loïc Hoguin wrote:
> Thanks!
>
> Ingela Anderton Andin <ingela.anderton.andin@REDACTED> wrote:
>
>> Hi!
>>
>> Well as you have a server increase the timeout in ssl_accept/[2,3], you
>> do not want
>> it to expire unless there is a network failure.
>>
>> Regards Ingela Erlang/OTP team - Ericsson AB
>>
>> Loïc Hoguin wrote:
>>> Alright.
>>>
>>> What should I do for a temporary fix to make sure this is the right
>>> issue?
>>>
>>> On 11/07/2012 04:12 PM, Ingela Anderton Andin wrote:
>>>> Hi!
>>>>
>>>> The problem is that "call-timeouts" in gen_server/fsm suck, as they are
>>>> purly client side. The {ref, connected} is a gen:fsm-reply that should
>>>> have been received by the ssl connect code. Like the recv problem on
>>>> erlang-questions this is solved by making the timer server side, it
>>>> could be argued some of these timeouts in ssl API are not needed, but
>>>> they are legacy... We will fix it. After some investigation I think
>>>> the "best" solution to your other problem will be call gen_tcp:recv/3 in
>>>> the workaround. We will also clean up the logic in terminate.
>>>>
>>>> Regards Ingela Erlang/OTP team - Ericsson AB
>>>>
>>>>
>>>> Loïc Hoguin wrote:
>>>>> On 11/01/2012 12:38 PM, Loïc Hoguin wrote:
>>>>>> * There is another problem where processes get stuck elsewhere, which
>>>>>> I'm going to try to investigate. Note that the sockets for this
>>>>>> problem
>>>>>> stay in ESTABLISHED.
>>>>> In this particular case, here's what we got:
>>>>>
>>>>> [{current_function,{gen_fsm,loop,7}},
>>>>> {initial_call,{proc_lib,init_p,5}},
>>>>> {status,waiting},
>>>>> {message_queue_len,0},
>>>>> {messages,[]},
>>>>> {links,[<0.897.0>,#Port<0.264729349>]},
>>>>> {dictionary,[{ssl_manager,ssl_manager},
>>>>> {'$ancestors',[ssl_connection_sup,ssl_sup,<0.894.0>]},
>>>>> {'$initial_call',{ssl_connection,init,1}}]},
>>>>> {trap_exit,false},
>>>>> {error_handler,error_handler},
>>>>> {priority,normal},
>>>>> {group_leader,<0.893.0>},
>>>>> {total_heap_size,8362},
>>>>> {heap_size,4181},
>>>>> {stack_size,10},
>>>>> {reductions,7029},
>>>>> {garbage_collection,[{min_bin_vheap_size,46368},
>>>>> {min_heap_size,233},
>>>>> {fullsweep_after,10},
>>>>> {minor_gcs,10}]},
>>>>> {suspending,[]}]
>>>>>
>>>>> Looking further, I notice something weird.
>>>>>
>>>>>> erlang:process_info(Pid, monitors).
>>>>> {monitors,[{process,<0.1055.0>}]}
>>>>>
>>>>> This is a very old pid.
>>>>>
>>>>>> erlang:process_info(OldPid).
>>>>> [{current_function,{prim_inet,accept0,2}},
>>>>> {initial_call,{cowboy_acceptor,acceptor,7}},
>>>>> {status,waiting},
>>>>> {message_queue_len,1602},
>>>>> {messages,[{#Ref<0.0.19.196440>,connected},
>>>>> {#Ref<0.0.21.74727>,connected},
>>>>> {#Ref<0.0.28.93234>,connected},
>>>>> {#Ref<0.0.64.192190>,connected},
>>>>> {#Ref<0.0.167.184831>,connected},
>>>>> {#Ref<0.0.208.24369>,connected},
>>>>> {#Ref<0.0.282.59352>,connected},
>>>>> {#Ref<0.0.340.181599>,connected},
>>>>> {#Ref<0.0.341.57338>,connected},
>>>>> {#Ref<0.0.427.15661>,connected},
>>>>> {#Ref<0.0.430.8560>,connected},
>>>>> {#Ref<0.0.439.40688>,connected},
>>>>> {#Ref<0.0.439.214050>,connected},
>>>>> {#Ref<0.0.440.206978>,connected},
>>>>> {#Ref<0.0.466.173049>,connected},
>>>>> {#Ref<0.0.497.35749>,connected},
>>>>> {#Ref<0.0.514.36774>,connected},
>>>>> {#Ref<0.0.514.109971>,connected},
>>>>> {#Ref<0.0.541.246233>,connected},
>>>>> {#Ref<0.0.544.168339>,connected},
>>>>> {#Ref<0.0.584.43294>,...},
>>>>> {...}|...]},
>>>>> {links,[<0.1028.0>]},
>>>>> {dictionary,[]},
>>>>> {trap_exit,false},
>>>>> {error_handler,error_handler},
>>>>> {priority,normal},
>>>>> {group_leader,<0.868.0>},
>>>>> {total_heap_size,32838},
>>>>> {heap_size,4181},
>>>>> {stack_size,22},
>>>>> {reductions,219876367},
>>>>> {garbage_collection,[{min_bin_vheap_size,46368},
>>>>> {min_heap_size,233},
>>>>> {fullsweep_after,10},
>>>>> {minor_gcs,1}]},
>>>>> {suspending,[]}]
>>>>>
>>>>> So this is the acceptor process. It doesn't make sense though, why
>>>>> would the ssl process still monitor the acceptor? The acceptor code is
>>>>> equivalent to this:
>>>>>
>>>>> {ok, Socket} = ssl:transport_accept(ListenSocket, 2000),
>>>>> ok = ssl:ssl_accept(Socket, 2000),
>>>>> {ok, Pid} = supervisor:start_child(SupPid, Args),
>>>>> Transport:controlling_process(Socket, Pid),
>>>>> %% ...
>>>>>
>>>>> I'm also not sure what {#Ref<0.0.19.196440>,connected} is. Are they
>>>>> supposed to receive this?
>>>>>
>>>>> Any pointer as to where to look next would help.
>>>>>
>>>>> Thanks.
>>>>>
>>>
More information about the erlang-bugs
mailing list