[erlang-bugs] Problem with tw timer support in diameter app (otp_R16B)
Aleksander Nycz
Aleksander.Nycz@REDACTED
Mon May 20 10:41:46 CEST 2013
Hello,
I change default value for param *restrict_connections *from 'nodes' to
'false'.
After that I run very simple test using seagull symulator. Test scenario
was following:
1. seagull: send CER
2. seagull: recv CEA
3. seagull: send CCR (init)
4. seagull: recv CCA (init)
5. seagull: send CCR (update)
6. seagull: recv CCR (update)
7. seagull: send CCR (terminate)
8. seagull: recv CCA (terminate)
After step 8. seagull does't send DPR, but just closes transport
connection (TCP)
On server side every think looks good, but 30 sec. after CCR (terminate)
when tw elapsed, following error message appears in log:
13:40:58.187129: <0.5046.0>: error: error_logger: --:--/--: ** Generic
server <0.5046.0> terminating
** Last message in was {timeout,#Ref<0.0.0.14845>,tw}
** When Server state == {watchdog,down,false,30000,0,<0.1009.0>,undefined,
#Ref<0.0.0.14845>,diameter_gen_base_rfc3588,
{recvdata,4259932,diameterNode,
[{diameter_app,diameterNode,dictionaryDCCA,
[dccaCallback],
diameterNode,4,false,
[{answer_errors,report},
{request_errors,answer_3xxx}]}],
{0,32}},
{0,32},
{false,false},
false}
** Reason for termination ==
** {function_clause,
[{diameter_watchdog,set_watchdog,
[stop],
[{file,"base/diameter_watchdog.erl"},{line,451}]},
{diameter_watchdog,handle_info,2,
[{file,"base/diameter_watchdog.erl"},{line,211}]},
{gen_server,handle_msg,5,[{file,"gen_server.erl"},{line,597}]},
{proc_lib,init_p_do_apply,3,[{file,"proc_lib.erl"},{line,227}]}]}
13:40:58.187500: <0.5046.0>: error: error_logger: --:--/--:
[crash_report][[[{initial_call,{diameter_watchdog,init,['Argument__1']}},
{pid,<0.5046.0>},
{registered_name,[]},
{error_info,{exit,{function_clause,[{diameter_watchdog,set_watchdog,[stop],[{file,"base/diameter_watchdog.erl"},{line,451}]},
{diameter_watchdog,handle_info,2,[{file,"base/diameter_watchdog.erl"},{line,211}]},
{gen_server,handle_msg,5,[{file,"gen_server.erl"},{line,597}]},
{proc_lib,init_p_do_apply,3,[{file,"proc_lib.erl"},{line,227}]}]},
[{gen_server,terminate,6,[{file,"gen_server.erl"},{line,737}]},
{proc_lib,init_p_do_apply,3,[{file,"proc_lib.erl"},{line,227}]}]}},
{ancestors,[diameter_watchdog_sup,diameter_sup,<0.946.0>]},
{messages,[]},
{links,[<0.954.0>]},
{dictionary,[{random_seed,{15047,18051,14647}},
{{diameter_watchdog,restart},
{{accept,#Ref<0.0.0.1696>},
[{transport_module,diameter_tcp},
{transport_config,[{reuseaddr,true},{ip,{0,0,0,0}},{port,4068}]},
{capabilities_cb,[#Fun<diameterNode.acceptCER.2>]},
{watchdog_timer,30000},
{reconnect_timer,60000}],
{diameter_service,<0.1009.0>,
{diameter_caps,"zyndram.krakow.comarch","krakow.comarch",[],25429,"Comarch
DIAMETER Server",[],
[12645,10415,8164],
[4],
[],[],[],[],[]},
[{diameter_app,diameterNode,dictionaryDCCA,
[dccaCallback],
diameterNode,4,false,
[{answer_errors,report},{request_errors,answer_3xxx}]}]}}},
{{diameter_watchdog,dwr},
['DWR',{'Origin-Host',"zyndram.krakow.comarch"},{'Origin-Realm',"krakow.comarch"},{'Origin-State-Id',[]}]}]},
{trap_exit,false},
{status,running},
{heap_size,75025},
{stack_size,24},
{reductions,294}],
[]]]
13:40:58.189060: <0.954.0>: error: error_logger: --:--/--:
[supervisor_report][[{supervisor,{local,diameter_watchdog_sup}},
{errorContext,child_terminated},
{reason,{function_clause,[{diameter_watchdog,set_watchdog,[stop],[{file,"base/diameter_watchdog.erl"},{line,451}]},
{diameter_watchdog,handle_info,2,[{file,"base/diameter_watchdog.erl"},{line,211}]},
{gen_server,handle_msg,5,[{file,"gen_server.erl"},{line,597}]},
{proc_lib,init_p_do_apply,3,[{file,"proc_lib.erl"},{line,227}]}]}},
{offender,[{pid,<0.5046.0>},
{name,diameter_watchdog},
{mfargs,{diameter_watchdog,start_link,undefined}},
{restart_type,temporary},
{shutdown,1000},
{child_type,worker}]}]]
You can check, that function set_watchdog should be called with param
#watchdog{}, but 'stop' param is used instead.
As a result function_clause exception is thrown.
I suggest following change in code to correct this problem (file
diameter_watchdog.erl):
$ diff diameter_watchdog.erl_org diameter_watchdog.erl
385a386,393
> transition({timeout, TRef, tw}, #watchdog{tref = TRef, status = T} = S)
> when T == initial;
> T == down ->
> case restart(S) of
> stop -> stop;
> #watchdog{} = NewS -> set_watchdog(NewS)
> end;
>
You can find this solution in attachement.
Best regards
Aleksander Nycz
--
Aleksander Nycz
Senior Software Engineer
Telco_021 BSS R&D
Comarch SA
Phone: +48 12 646 1216
Mobile: +48 691 464 275
website: www.comarch.pl
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://erlang.org/pipermail/erlang-bugs/attachments/20130520/44c7c439/attachment.htm>
-------------- next part --------------
%%
%% %CopyrightBegin%
%%
%% Copyright Ericsson AB 2010-2013. All Rights Reserved.
%%
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
%% compliance with the License. You should have received a copy of the
%% Erlang Public License along with this software. If not, it can be
%% retrieved online at http://www.erlang.org/.
%%
%% Software distributed under the License is distributed on an "AS IS"
%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
%% the License for the specific language governing rights and limitations
%% under the License.
%%
%% %CopyrightEnd%
%%
%%
%% This module implements (as a process) the state machine documented
%% in Appendix A of RFC 3539.
%%
-module(diameter_watchdog).
-behaviour(gen_server).
%% towards diameter_service
-export([start/2]).
%% gen_server callbacks
-export([init/1,
handle_call/3,
handle_cast/2,
handle_info/2,
terminate/2,
code_change/3]).
%% diameter_watchdog_sup callback
-export([start_link/1]).
-include_lib("diameter/include/diameter.hrl").
-include("diameter_internal.hrl").
-define(DEFAULT_TW_INIT, 30000). %% RFC 3539 ch 3.4.1
-define(NOMASK, {0,32}). %% default sequence mask
-define(BASE, ?DIAMETER_DICT_COMMON).
-record(watchdog,
{%% PCB - Peer Control Block; see RFC 3539, Appendix A
status = initial :: initial | okay | suspect | down | reopen,
pending = false :: boolean(), %% DWA
tw :: 6000..16#FFFFFFFF | {module(), atom(), list()},
%% {M,F,A} -> integer() >= 0
num_dwa = 0 :: -1 | non_neg_integer(),
%% number of DWAs received during reopen
%% end PCB
parent = self() :: pid(), %% service process
transport :: pid() | undefined, %% peer_fsm process
tref :: reference(), %% reference for current watchdog timer
dictionary :: module(), %% common dictionary
receive_data :: term(),
%% term passed into diameter_service with incoming message
sequence :: diameter:sequence(), %% mask
restrict :: {diameter:restriction(), boolean()},
shutdown = false :: boolean()}).
%% ---------------------------------------------------------------------------
%% start/2
%%
%% Start a monitor before the watchdog is allowed to proceed to ensure
%% that a failed capabilities exchange produces the desired exit
%% reason.
%% ---------------------------------------------------------------------------
-spec start(Type, {RecvData, [Opt], SvcOpts, #diameter_service{}})
-> {reference(), pid()}
when Type :: {connect|accept, diameter:transport_ref()},
RecvData :: term(),
Opt :: diameter:transport_opt(),
SvcOpts :: [diameter:service_opt()].
start({_,_} = Type, T) ->
Ack = make_ref(),
{ok, Pid} = diameter_watchdog_sup:start_child({Ack, Type, self(), T}),
try
{erlang:monitor(process, Pid), Pid}
after
send(Pid, Ack)
end.
start_link(T) ->
{ok, _} = proc_lib:start_link(?MODULE,
init,
[T],
infinity,
diameter_lib:spawn_opts(server, [])).
%% ===========================================================================
%% ===========================================================================
%% init/1
init(T) ->
proc_lib:init_ack({ok, self()}),
gen_server:enter_loop(?MODULE, [], i(T)).
i({Ack, T, Pid, {RecvData,
Opts,
SvcOpts,
#diameter_service{applications = Apps,
capabilities = Caps}
= Svc}}) ->
erlang:monitor(process, Pid),
wait(Ack, Pid),
random:seed(now()),
putr(restart, {T, Opts, Svc}), %% save seeing it in trace
putr(dwr, dwr(Caps)), %%
{_,_} = Mask = proplists:get_value(sequence, SvcOpts),
Restrict = proplists:get_value(restrict_connections, SvcOpts),
Nodes = restrict_nodes(Restrict),
Dict0 = common_dictionary(Apps),
#watchdog{parent = Pid,
transport = start(T, Opts, Mask, Nodes, Dict0, Svc),
tw = proplists:get_value(watchdog_timer,
Opts,
?DEFAULT_TW_INIT),
receive_data = RecvData,
dictionary = Dict0,
sequence = Mask,
restrict = {Restrict, lists:member(node(), Nodes)}}.
wait(Ref, Pid) ->
receive
Ref ->
ok;
{'DOWN', _, process, Pid, _} = D ->
exit({shutdown, D})
end.
%% start/5
start(T, Opts, Mask, Nodes, Dict0, Svc) ->
{_MRef, Pid}
= diameter_peer_fsm:start(T, Opts, {Mask, Nodes, Dict0, Svc}),
Pid.
%% common_dictionary/1
%%
%% Determine the dictionary of the Diameter common application with
%% Application Id 0. Fail on config errors.
common_dictionary(Apps) ->
case
orddict:fold(fun dict0/3,
false,
lists:foldl(fun(#diameter_app{dictionary = M}, D) ->
orddict:append(M:id(), M, D)
end,
orddict:new(),
Apps))
of
{value, Mod} ->
Mod;
false ->
%% A transport should configure a common dictionary but
%% don't require it. Not configuring a common dictionary
%% means a user won't be able either send of receive
%% messages in the common dictionary: incoming request
%% will be answered with 3007 and outgoing requests cannot
%% be sent. The dictionary returned here is oly used for
%% messages diameter sends and receives: CER/CEA, DPR/DPA
%% and DWR/DWA.
?BASE
end.
%% Each application should be represented by a single dictionary.
dict0(Id, [_,_|_] = Ms, _) ->
config_error({multiple_dictionaries, Ms, {application_id, Id}});
%% An explicit common dictionary.
dict0(?APP_ID_COMMON, [Mod], _) ->
{value, Mod};
%% A pure relay, in which case the common application is implicit.
%% This uses the fact that the common application will already have
%% been folded.
dict0(?APP_ID_RELAY, _, false) ->
{value, ?BASE};
dict0(_, _, Acc) ->
Acc.
config_error(T) ->
?ERROR({configuration_error, T}).
%% handle_call/3
handle_call(_, _, State) ->
{reply, nok, State}.
%% handle_cast/2
handle_cast(_, State) ->
{noreply, State}.
%% handle_info/2
handle_info(T, #watchdog{} = State) ->
case transition(T, State) of
ok ->
{noreply, State};
#watchdog{} = S ->
close(T, State), %% service expects 'close' message
event(T, State, S), %% before 'watchdog'
{noreply, S};
stop ->
?LOG(stop, T),
event(T, State, State#watchdog{status = down}),
{stop, {shutdown, T}, State}
end.
close({'DOWN', _, process, TPid, {shutdown, Reason}},
#watchdog{transport = TPid,
parent = Pid}) ->
send(Pid, {close, self(), Reason});
close(_, _) ->
ok.
event(_, #watchdog{status = T}, #watchdog{status = T}) ->
ok;
event(_, #watchdog{transport = undefined}, #watchdog{transport = undefined}) ->
ok;
event(Msg,
#watchdog{status = From, transport = F, parent = Pid},
#watchdog{status = To, transport = T}) ->
TPid = tpid(F,T),
E = {[TPid | data(Msg, TPid, From, To)], From, To},
send(Pid, {watchdog, self(), E}),
?LOG(transition, {self(), E}).
data(Msg, TPid, reopen, okay) ->
{recv, TPid, 'DWA', _Pkt} = Msg, %% assert
{TPid, T} = eraser(open),
[T];
data({open, TPid, _Hosts, T}, TPid, _From, To)
when To == okay;
To == reopen ->
[T];
data(_, _, _, _) ->
[].
tpid(_, Pid)
when is_pid(Pid) ->
Pid;
tpid(Pid, _) ->
Pid.
send(Pid, T) ->
Pid ! T.
%% terminate/2
terminate(_, _) ->
ok.
%% code_change/3
code_change(_, State, _) ->
{ok, State}.
%% ===========================================================================
%% ===========================================================================
%% transition/2
%%
%% The state transitions documented here are extracted from RFC 3539,
%% the commentary is ours.
%% Service or watchdog is telling the watchdog of an accepting
%% transport to die after reconnect_timer expiry or reestablished
%% connection (in another transport process) respectively.
transition(close, #watchdog{status = down}) ->
{{accept, _}, _, _} = getr(restart), %% assert
stop;
transition(close, #watchdog{}) ->
ok;
%% Service is asking for the peer to be taken down gracefully.
transition({shutdown, Pid, _}, #watchdog{parent = Pid,
transport = undefined}) ->
stop;
transition({shutdown = T, Pid, Reason}, #watchdog{parent = Pid,
transport = TPid}
= S) ->
send(TPid, {T, self(), Reason}),
S#watchdog{shutdown = true};
%% Parent process has died,
transition({'DOWN', _, process, Pid, _Reason},
#watchdog{parent = Pid}) ->
stop;
%% Transport has accepted a connection.
transition({accepted = T, TPid}, #watchdog{transport = TPid,
parent = Pid}) ->
send(Pid, {T, self(), TPid}),
ok;
%% STATE Event Actions New State
%% ===== ------ ------- ----------
%% INITIAL Connection up SetWatchdog() OKAY
%% By construction, the watchdog timer isn't set until we move into
%% state okay as the result of the Peer State Machine reaching the
%% Open state.
%%
%% If we're accepting then we may be resuming a connection that went
%% down in another watchdog process, in which case this is the
%% transition below, from down to reopen. That is, it's not until we
%% know the identity of the peer (ie. now) that we know that we're in
%% state down rather than initial.
transition({open, TPid, Hosts, _} = Open,
#watchdog{transport = TPid,
status = initial,
restrict = {_, R}}
= S) ->
case okay(getr(restart), Hosts, R) of
okay ->
set_watchdog(S#watchdog{status = okay});
reopen ->
transition(Open, S#watchdog{status = down})
end;
%% DOWN Connection up NumDWA = 0
%% SendWatchdog()
%% SetWatchdog()
%% Pending = TRUE REOPEN
transition({open = Key, TPid, _Hosts, T},
#watchdog{transport = TPid,
status = down}
= S) ->
%% Store the info we need to notify the parent to reopen the
%% connection after the requisite DWA's are received, at which
%% time we eraser(open). The reopen message is a later addition,
%% to communicate the new capabilities as soon as they're known.
putr(Key, {TPid, T}),
set_watchdog(send_watchdog(S#watchdog{status = reopen,
num_dwa = 0}));
%% OKAY Connection down CloseConnection()
%% Failover()
%% SetWatchdog() DOWN
%% SUSPECT Connection down CloseConnection()
%% SetWatchdog() DOWN
%% REOPEN Connection down CloseConnection()
%% SetWatchdog() DOWN
transition({'DOWN', _, process, TPid, _Reason},
#watchdog{transport = TPid,
shutdown = true}) ->
stop;
transition({'DOWN', _, process, TPid, _Reason},
#watchdog{transport = TPid,
status = T}
= S) ->
set_watchdog(S#watchdog{status = case T of initial -> T; _ -> down end,
pending = false,
transport = undefined});
%% Incoming message.
transition({recv, TPid, Name, Pkt}, #watchdog{transport = TPid} = S) ->
recv(Name, Pkt, S);
%% Current watchdog has timed out.
transition({timeout, TRef, tw}, #watchdog{tref = TRef, status = T} = S)
when T == initial;
T == down ->
case restart(S) of
stop -> stop;
#watchdog{} = NewS -> set_watchdog(NewS)
end;
transition({timeout, TRef, tw}, #watchdog{tref = TRef} = S) ->
set_watchdog(timeout(S));
%% Timer was canceled after message was already sent.
transition({timeout, _, tw}, #watchdog{}) ->
ok;
%% State query.
transition({state, Pid}, #watchdog{status = S}) ->
send(Pid, {self(), S}),
ok.
%% ===========================================================================
putr(Key, Val) ->
put({?MODULE, Key}, Val).
getr(Key) ->
get({?MODULE, Key}).
eraser(Key) ->
erase({?MODULE, Key}).
%% encode/3
encode(Msg, Mask, Dict) ->
Seq = diameter_session:sequence(Mask),
Hdr = #diameter_header{version = ?DIAMETER_VERSION,
end_to_end_id = Seq,
hop_by_hop_id = Seq},
Pkt = #diameter_packet{header = Hdr,
msg = Msg},
#diameter_packet{bin = Bin} = diameter_codec:encode(Dict, Pkt),
Bin.
%% okay/3
okay({{accept, Ref}, _, _}, Hosts, Restrict) ->
T = {?MODULE, connection, Ref, Hosts},
diameter_reg:add(T),
if Restrict ->
okay(diameter_reg:match(T));
true ->
okay
end;
%% Register before matching so that at least one of two registering
%% processes will match the other.
okay({{connect, _}, _, _}, _, _) ->
okay.
%% okay/2
%% The peer hasn't been connected recently ...
okay([{_,P}]) ->
P = self(), %% assert
okay;
%% ... or it has.
okay(C) ->
[_|_] = [send(P, close) || {_,P} <- C, self() /= P],
reopen.
%% set_watchdog/1
set_watchdog(#watchdog{tw = TwInit,
tref = TRef}
= S) ->
cancel(TRef),
S#watchdog{tref = erlang:start_timer(tw(TwInit), self(), tw)}.
cancel(undefined) ->
ok;
cancel(TRef) ->
erlang:cancel_timer(TRef).
tw(T)
when is_integer(T), T >= 6000 ->
T - 2000 + (random:uniform(4001) - 1); %% RFC3539 jitter of +/- 2 sec.
tw({M,F,A}) ->
apply(M,F,A).
%% send_watchdog/1
send_watchdog(#watchdog{pending = false,
transport = TPid,
dictionary = Dict0,
sequence = Mask}
= S) ->
send(TPid, {send, encode(getr(dwr), Mask, Dict0)}),
?LOG(send, 'DWR'),
S#watchdog{pending = true}.
%% recv/3
recv(Name, Pkt, S) ->
try rcv(Name, S) of
#watchdog{} = NS ->
rcv(Name, Pkt, S),
NS
catch
{?MODULE, throwaway, #watchdog{} = NS} ->
NS
end.
%% rcv/3
rcv(N, _, _)
when N == 'CER';
N == 'CEA';
N == 'DWR';
N == 'DWA';
N == 'DPR';
N == 'DPA' ->
false;
rcv(_, Pkt, #watchdog{transport = TPid,
dictionary = Dict0,
receive_data = T}) ->
diameter_traffic:receive_message(TPid, Pkt, Dict0, T).
throwaway(S) ->
throw({?MODULE, throwaway, S}).
%% rcv/2
%%
%% The lack of Hop-by-Hop and End-to-End Identifiers checks in a
%% received DWA is intentional. The purpose of the message is to
%% demonstrate life but a peer that consistently bungles it by sending
%% the wrong identifiers causes the connection to toggle between OPEN
%% and SUSPECT, with failover and failback as result, despite there
%% being no real problem with connectivity. Thus, relax and accept any
%% incoming DWA as being in response to an outgoing DWR.
%% INITIAL Receive DWA Pending = FALSE
%% Throwaway() INITIAL
%% INITIAL Receive non-DWA Throwaway() INITIAL
rcv('DWA', #watchdog{status = initial} = S) ->
throwaway(S#watchdog{pending = false});
rcv(_, #watchdog{status = initial} = S) ->
throwaway(S);
%% DOWN Receive DWA Pending = FALSE
%% Throwaway() DOWN
%% DOWN Receive non-DWA Throwaway() DOWN
rcv('DWA', #watchdog{status = down} = S) ->
throwaway(S#watchdog{pending = false});
rcv(_, #watchdog{status = down} = S) ->
throwaway(S);
%% OKAY Receive DWA Pending = FALSE
%% SetWatchdog() OKAY
%% OKAY Receive non-DWA SetWatchdog() OKAY
rcv('DWA', #watchdog{status = okay} = S) ->
set_watchdog(S#watchdog{pending = false});
rcv(_, #watchdog{status = okay} = S) ->
set_watchdog(S);
%% SUSPECT Receive DWA Pending = FALSE
%% Failback()
%% SetWatchdog() OKAY
%% SUSPECT Receive non-DWA Failback()
%% SetWatchdog() OKAY
rcv('DWA', #watchdog{status = suspect} = S) ->
set_watchdog(S#watchdog{status = okay,
pending = false});
rcv(_, #watchdog{status = suspect} = S) ->
set_watchdog(S#watchdog{status = okay});
%% REOPEN Receive DWA & Pending = FALSE
%% NumDWA == 2 NumDWA++
%% Failback() OKAY
rcv('DWA', #watchdog{status = reopen,
num_dwa = 2 = N}
= S) ->
S#watchdog{status = okay,
num_dwa = N+1,
pending = false};
%% REOPEN Receive DWA & Pending = FALSE
%% NumDWA < 2 NumDWA++ REOPEN
rcv('DWA', #watchdog{status = reopen,
num_dwa = N}
= S) ->
S#watchdog{num_dwa = N+1,
pending = false};
%% REOPEN Receive non-DWA Throwaway() REOPEN
rcv(_, #watchdog{status = reopen} = S) ->
throwaway(S).
%% timeout/1
%%
%% The caller sets the watchdog on the return value.
%% OKAY Timer expires & SendWatchdog()
%% !Pending SetWatchdog()
%% Pending = TRUE OKAY
%% REOPEN Timer expires & SendWatchdog()
%% !Pending SetWatchdog()
%% Pending = TRUE REOPEN
timeout(#watchdog{status = T,
pending = false}
= S)
when T == okay;
T == reopen ->
send_watchdog(S);
%% OKAY Timer expires & Failover()
%% Pending SetWatchdog() SUSPECT
timeout(#watchdog{status = okay,
pending = true}
= S) ->
S#watchdog{status = suspect};
%% SUSPECT Timer expires CloseConnection()
%% SetWatchdog() DOWN
%% REOPEN Timer expires & CloseConnection()
%% Pending & SetWatchdog()
%% NumDWA < 0 DOWN
timeout(#watchdog{status = T,
pending = P,
num_dwa = N,
transport = TPid}
= S)
when T == suspect;
T == reopen, P, N < 0 ->
exit(TPid, {shutdown, watchdog_timeout}),
S#watchdog{status = down};
%% REOPEN Timer expires & NumDWA = -1
%% Pending & SetWatchdog()
%% NumDWA >= 0 REOPEN
timeout(#watchdog{status = reopen,
pending = true,
num_dwa = N}
= S)
when 0 =< N ->
S#watchdog{num_dwa = -1};
%% DOWN Timer expires AttemptOpen()
%% SetWatchdog() DOWN
%% INITIAL Timer expires AttemptOpen()
%% SetWatchdog() INITIAL
%% RFC 3539, 3.4.1:
%%
%% [5] While the connection is in the closed state, the AAA client MUST
%% NOT attempt to send further watchdog messages on the connection.
%% However, after the connection is closed, the AAA client continues
%% to periodically attempt to reopen the connection.
%%
%% The AAA client SHOULD wait for the transport layer to report
%% connection failure before attempting again, but MAY choose to
%% bound this wait time by the watchdog interval, Tw.
%% Don't bound, restarting the peer process only when the previous
%% process has died. We only need to handle state down since we start
%% the first watchdog when transitioning out of initial.
timeout(#watchdog{status = T} = S)
when T == initial;
T == down ->
restart(S).
%% restart/1
restart(#watchdog{transport = undefined} = S) ->
restart(getr(restart), S);
restart(S) ->
S.
%% restart/2
%%
%% Only restart the transport in the connecting case. For an accepting
%% transport, there's no guarantee that an accepted connection in a
%% restarted transport if from the peer we've lost contact with so
%% have to be prepared for another watchdog to handle it. This is what
%% the diameter_reg registration in this module is for: the peer
%% connection is registered when leaving state initial and this is
%% used by a new accepting watchdog to realize that it's actually in
%% state down rather then initial when receiving notification of an
%% open connection.
restart({{connect, _} = T, Opts, Svc},
#watchdog{parent = Pid,
sequence = Mask,
restrict = {R,_},
dictionary = Dict0}
= S) ->
send(Pid, {reconnect, self()}),
Nodes = restrict_nodes(R),
S#watchdog{transport = start(T, Opts, Mask, Nodes, Dict0, Svc),
restrict = {R, lists:member(node(), Nodes)}};
%% No restriction on the number of connections to the same peer: just
%% die. Note that a state machine never enters state REOPEN in this
%% case.
restart({{accept, _}, _, _}, #watchdog{restrict = {_, false}}) ->
stop;
%% Otherwise hang around until told to die.
restart({{accept, _}, _, _}, S) ->
S.
%% Don't currently use Opts/Svc in the accept case.
%% dwr/1
dwr(#diameter_caps{origin_host = OH,
origin_realm = OR,
origin_state_id = OSI}) ->
['DWR', {'Origin-Host', OH},
{'Origin-Realm', OR},
{'Origin-State-Id', OSI}].
%% restrict_nodes/1
restrict_nodes(false) ->
[];
restrict_nodes(nodes) ->
[node() | nodes()];
restrict_nodes(node) ->
[node()];
restrict_nodes(Nodes)
when [] == Nodes;
is_atom(hd(Nodes)) ->
Nodes;
restrict_nodes(F) ->
diameter_lib:eval(F).
-------------- next part --------------
A non-text attachment was scrubbed...
Name: smime.p7s
Type: application/pkcs7-signature
Size: 2182 bytes
Desc: Kryptograficzna sygnatura S/MIME
URL: <http://erlang.org/pipermail/erlang-bugs/attachments/20130520/44c7c439/attachment.bin>
More information about the erlang-bugs
mailing list