[erlang-questions] Re: Changing heartbeat timeout while the system is running

Alexey Romanov alexey.v.romanov@REDACTED
Wed Apr 6 08:38:42 CEST 2011


Great, thanks!

Yours, Alexey Romanov



On Wed, Apr 6, 2011 at 4:55 AM, Michael Santos <michael.santos@REDACTED> wrote:
> On Tue, Apr 05, 2011 at 07:59:32AM -0400, Michael Santos wrote:
>> On Tue, Apr 05, 2011 at 03:17:41PM +0400, Alexey Romanov wrote:
>> > From http://www.erlang.org/doc/man/heart.html I expect that heart only
>> > checks the value of HEART_BEAT_TIMEOUT on startup and it can't be
>> > changed while the program runs. Is this correct (I hope not)?
>>
>> Yes, the value is only checked on startup. heart is a port, the timeout
>> environment variable is passed as a command switch.
>>
>> heart already supports changing the reboot command on the fly
>> (using heart:set_cmd/1), so adding support for HEART_BEAT_TIMEOUT and
>> HEART_BEAT_BOOT_DELAY (maybe heart:set_timeout/1, heart:set_delay/1)
>> looks simple. I can look into making a patch, if it'd be helpful.
>
> Attached is a patch adding heart:set_timeout/1 and heart:set_boot_delay/1
> which seems to work ok. I'll add tests and update the docs later.
>
> diff --git a/erts/etc/common/Makefile.in b/erts/etc/common/Makefile.in
> index 4754328..f617dfb 100644
> --- a/erts/etc/common/Makefile.in
> +++ b/erts/etc/common/Makefile.in
> @@ -449,7 +449,8 @@ endif
>
>  ifeq ($(findstring vxworks,$(TARGET)), vxworks)
>  $(BINDIR)/heart: $(OBJDIR)/heart.o $(OBJDIR)/heart_config.o
> -       $(LD) $(LDFLAGS) -o $@ $(OBJDIR)/heart.o $(OBJDIR)/heart_config.o
> +       $(LD) $(LDFLAGS) -o $@ $(OBJDIR)/heart.o $(OBJDIR)/heart_config.o \
> +               $(ERTS_INTERNAL_LIBS)
>
>  $(OBJDIR)/heart_config.o: $(VXETC)/heart_config.c
>        $(CC) $(CFLAGS) -o $@ -c $(VXETC)/heart_config.c
> @@ -467,7 +468,7 @@ else
>
>  $(BINDIR)/heart@REDACTED@: $(OBJDIR)/heart.o $(ENTRY_OBJ)
>        $(LD) $(LDFLAGS) $(ENTRY_LDFLAGS) -o $@ $(OBJDIR)/heart.o \
> -               $(ENTRY_OBJ) $(WINDSOCK)
> +               $(ENTRY_OBJ) $(WINDSOCK) $(ERTS_INTERNAL_LIBS)
>
>  $(OBJDIR)/heart.o: heart.c
>        $(CC) $(CFLAGS) -o $@ -c heart.c
> diff --git a/erts/etc/common/heart.c b/erts/etc/common/heart.c
> index 778b356..88b9f0a 100644
> --- a/erts/etc/common/heart.c
> +++ b/erts/etc/common/heart.c
> @@ -163,6 +163,12 @@ struct msg {
>  #define  CLEAR_CMD    5
>  #define  GET_CMD      6
>  #define  HEART_CMD    7
> +#define  SET_TIMEOUT  8
> +#define  GET_TIMEOUT  9
> +#define  HEART_TIMEOUT 10
> +#define  SET_BOOT_DELAY    11
> +#define  GET_BOOT_DELAY    12
> +#define  HEART_BOOT_DELAY  13
>
>
>  /*  Maybe interesting to change */
> @@ -207,6 +213,7 @@ static int message_loop(int,int);
>  static void do_terminate(int);
>  static int notify_ack(int);
>  static int heart_cmd_reply(int, char *);
> +static int heart_val_reply(int, int, int);
>  static int write_message(int, struct msg *);
>  static int read_message(int, struct msg *);
>  static int read_skip(int, char *, int, int);
> @@ -506,6 +513,38 @@ message_loop(erlin_fd, erlout_fd)
>                                    free_env_val(env);
>                                }
>                                break;
> +                       case SET_TIMEOUT:
> +                               /* override the HEART_BEAT_TIMEOUT command */
> +                               {
> +                               char buf[10];
> +                               memcpy(&buf, &(mp->fill[0]),
> +                                      tlen-MSG_HDR_PLUS_OP_SIZE);
> +                               buf[tlen-MSG_HDR_PLUS_OP_SIZE] = '\0';
> +                               heart_beat_timeout = atoi(buf);
> +                               notify_ack(erlout_fd);
> +                               }
> +                               break;
> +                       case GET_TIMEOUT:
> +                               /* send back timeout value */
> +                               heart_val_reply(erlout_fd, HEART_TIMEOUT,
> +                                      heart_beat_timeout);
> +                               break;
> +                       case SET_BOOT_DELAY:
> +                               /* override the HEART_BEAT_DELAY command */
> +                               {
> +                               char buf[10];
> +                               memcpy(&buf, &(mp->fill[0]),
> +                                      tlen-MSG_HDR_PLUS_OP_SIZE);
> +                               buf[tlen-MSG_HDR_PLUS_OP_SIZE] = '\0';
> +                               heart_beat_boot_delay = atoi(buf);
> +                               notify_ack(erlout_fd);
> +                               }
> +                               break;
> +                       case GET_BOOT_DELAY:
> +                               /* send back delay value */
> +                               heart_val_reply(erlout_fd, HEART_BOOT_DELAY,
> +                                      heart_beat_boot_delay);
> +                               break;
>                        default:
>                                /* ignore all other messages */
>                                break;
> @@ -744,6 +783,23 @@ heart_cmd_reply(int fd, char *s)
>
>
>  /*
> + * sends back an ascii representation of an integer value
> + *
> + */
> +static int
> +heart_val_reply(int fd, int op, int val)
> +{
> +  struct msg m;
> +
> +  erts_snprintf(m.fill, sizeof(m.fill), "%d", val);
> +  m.op = op;
> +  m.len = htons(strlen(m.fill) + 1);   /* Include Op */
> +
> +  return write_message(fd, &m);
> +}
> +
> +
> +/*
>  *  write_message
>  *
>  *  Writes a message to a blocking file descriptor. Returns the total
> diff --git a/lib/kernel/src/heart.erl b/lib/kernel/src/heart.erl
> index e78acfc..3bf7325 100644
> --- a/lib/kernel/src/heart.erl
> +++ b/lib/kernel/src/heart.erl
> @@ -29,7 +29,8 @@
>  %%%
>  %%% It recognizes the flag '-heart'
>  %%%--------------------------------------------------------------------
> --export([start/0, init/2, set_cmd/1, clear_cmd/0, get_cmd/0, cycle/0]).
> +-export([start/0, init/2, set_cmd/1, clear_cmd/0, get_cmd/0, cycle/0,
> +        set_timeout/1, get_timeout/0, set_boot_delay/1, get_boot_delay/0]).
>
>  -define(START_ACK, 1).
>  -define(HEART_BEAT, 2).
> @@ -38,6 +39,12 @@
>  -define(CLEAR_CMD, 5).
>  -define(GET_CMD, 6).
>  -define(HEART_CMD, 7).
> +-define(SET_TIMEOUT, 8).
> +-define(GET_TIMEOUT, 9).
> +-define(HEART_TIMEOUT, 10).
> +-define(SET_DELAY, 11).
> +-define(GET_DELAY, 12).
> +-define(HEART_BOOT_DELAY, 13).
>
>  -define(TIMEOUT, 5000).
>  -define(CYCLE_TIMEOUT, 10000).
> @@ -103,6 +110,29 @@ clear_cmd() ->
>     heart ! {self(), clear_cmd},
>     wait().
>
> +-spec set_timeout(string()) -> 'ok' | {'error', {'bad_timeout', non_neg_integer()}}.
> +
> +set_timeout(Timeout) ->
> +    heart ! {self(), set_timeout, Timeout},
> +    wait().
> +
> +-spec get_timeout() -> 'ok'.
> +
> +get_timeout() ->
> +    heart ! {self(), get_timeout},
> +    wait().
> +
> +-spec set_boot_delay(string()) -> 'ok' | {'error', {'bad_boot_delay', non_neg_integer()}}.
> +
> +set_boot_delay(Delay) ->
> +    heart ! {self(), set_boot_delay, Delay},
> +    wait().
> +
> +-spec get_boot_delay() -> 'ok'.
> +
> +get_boot_delay() ->
> +    heart ! {self(), get_boot_delay},
> +    wait().
>
>  %%% Should be used solely by the release handler!!!!!!!
>  -spec cycle() -> 'ok' | {'error', term()}.
> @@ -190,6 +220,28 @@ loop(Parent, Port, Cmd) ->
>        {From, get_cmd} ->
>            From ! {heart, get_heart_cmd(Port)},
>            loop(Parent, Port, Cmd);
> +       {From, set_timeout, NewTimeout} when NewTimeout > 10, NewTimeout =< 65535 ->
> +           send_heart_timeout(Port, NewTimeout),
> +           wait_ack(Port),
> +           From ! {heart, ok},
> +           loop(Parent, Port, Cmd);
> +       {From, set_timeout, NewTimeout} ->
> +           From ! {heart, {error, {bad_timeout, NewTimeout}}},
> +           loop(Parent, Port, Cmd);
> +       {From, get_timeout} ->
> +           From ! {heart, get_heart_timeout(Port)},
> +           loop(Parent, Port, Cmd);
> +       {From, set_boot_delay, NewDelay} when NewDelay > 10, NewDelay =< 65535 ->
> +           send_heart_boot_delay(Port, NewDelay),
> +           wait_ack(Port),
> +           From ! {heart, ok},
> +           loop(Parent, Port, Cmd);
> +       {From, set_boot_delay, NewDelay} ->
> +           From ! {heart, {error, {bad_boot_delay, NewDelay}}},
> +           loop(Parent, Port, Cmd);
> +       {From, get_boot_delay} ->
> +           From ! {heart, get_heart_boot_delay(Port)},
> +           loop(Parent, Port, Cmd);
>        {From, cycle} ->
>            %% Calls back to loop
>            do_cycle_port_program(From, Parent, Port, Cmd);
> @@ -245,7 +297,7 @@ do_cycle_port_program(Caller, Parent, Port, Cmd) ->
>     end.
>
>
> -%% "Beates" the heart once.
> +%% "Beats" the heart once.
>  send_heart_beat(Port) -> Port ! {self(), {command, [?HEART_BEAT]}}.
>
>  %% Set a new HEART_COMMAND.
> @@ -261,6 +313,28 @@ get_heart_cmd(Port) ->
>            {ok, Cmd}
>     end.
>
> +%% Set a new HEART_BEAT_TIMEOUT.
> +send_heart_timeout(Port, Timeout) ->
> +    Port ! {self(), {command, [?SET_TIMEOUT|integer_to_list(Timeout)]}}.
> +
> +get_heart_timeout(Port) ->
> +    Port ! {self(), {command, [?GET_TIMEOUT]}},
> +    receive
> +       {Port, {data, [?HEART_TIMEOUT | Timeout]}} ->
> +           {ok, list_to_integer(Timeout)}
> +    end.
> +
> +%% Set a new HEART_BEAT_BOOT_DELAY.
> +send_heart_boot_delay(Port, Delay) ->
> +    Port ! {self(), {command, [?SET_DELAY|integer_to_list(Delay)]}}.
> +
> +get_heart_boot_delay(Port) ->
> +    Port ! {self(), {command, [?GET_DELAY]}},
> +    receive
> +       {Port, {data, [?HEART_BOOT_DELAY | Delay]}} ->
> +           {ok, list_to_integer(Delay)}
> +    end.
> +
>  %% Sends shutdown command to the port.
>  send_shutdown(Port) -> Port ! {self(), {command, [?SHUT_DOWN]}}.
>
>



More information about the erlang-questions mailing list