[erlang-questions] Re: Changing heartbeat timeout while the system is running
Alexey Romanov
alexey.v.romanov@REDACTED
Wed Apr 6 08:38:42 CEST 2011
Great, thanks!
Yours, Alexey Romanov
On Wed, Apr 6, 2011 at 4:55 AM, Michael Santos <michael.santos@REDACTED> wrote:
> On Tue, Apr 05, 2011 at 07:59:32AM -0400, Michael Santos wrote:
>> On Tue, Apr 05, 2011 at 03:17:41PM +0400, Alexey Romanov wrote:
>> > From http://www.erlang.org/doc/man/heart.html I expect that heart only
>> > checks the value of HEART_BEAT_TIMEOUT on startup and it can't be
>> > changed while the program runs. Is this correct (I hope not)?
>>
>> Yes, the value is only checked on startup. heart is a port, the timeout
>> environment variable is passed as a command switch.
>>
>> heart already supports changing the reboot command on the fly
>> (using heart:set_cmd/1), so adding support for HEART_BEAT_TIMEOUT and
>> HEART_BEAT_BOOT_DELAY (maybe heart:set_timeout/1, heart:set_delay/1)
>> looks simple. I can look into making a patch, if it'd be helpful.
>
> Attached is a patch adding heart:set_timeout/1 and heart:set_boot_delay/1
> which seems to work ok. I'll add tests and update the docs later.
>
> diff --git a/erts/etc/common/Makefile.in b/erts/etc/common/Makefile.in
> index 4754328..f617dfb 100644
> --- a/erts/etc/common/Makefile.in
> +++ b/erts/etc/common/Makefile.in
> @@ -449,7 +449,8 @@ endif
>
> ifeq ($(findstring vxworks,$(TARGET)), vxworks)
> $(BINDIR)/heart: $(OBJDIR)/heart.o $(OBJDIR)/heart_config.o
> - $(LD) $(LDFLAGS) -o $@ $(OBJDIR)/heart.o $(OBJDIR)/heart_config.o
> + $(LD) $(LDFLAGS) -o $@ $(OBJDIR)/heart.o $(OBJDIR)/heart_config.o \
> + $(ERTS_INTERNAL_LIBS)
>
> $(OBJDIR)/heart_config.o: $(VXETC)/heart_config.c
> $(CC) $(CFLAGS) -o $@ -c $(VXETC)/heart_config.c
> @@ -467,7 +468,7 @@ else
>
> $(BINDIR)/heart@REDACTED@: $(OBJDIR)/heart.o $(ENTRY_OBJ)
> $(LD) $(LDFLAGS) $(ENTRY_LDFLAGS) -o $@ $(OBJDIR)/heart.o \
> - $(ENTRY_OBJ) $(WINDSOCK)
> + $(ENTRY_OBJ) $(WINDSOCK) $(ERTS_INTERNAL_LIBS)
>
> $(OBJDIR)/heart.o: heart.c
> $(CC) $(CFLAGS) -o $@ -c heart.c
> diff --git a/erts/etc/common/heart.c b/erts/etc/common/heart.c
> index 778b356..88b9f0a 100644
> --- a/erts/etc/common/heart.c
> +++ b/erts/etc/common/heart.c
> @@ -163,6 +163,12 @@ struct msg {
> #define CLEAR_CMD 5
> #define GET_CMD 6
> #define HEART_CMD 7
> +#define SET_TIMEOUT 8
> +#define GET_TIMEOUT 9
> +#define HEART_TIMEOUT 10
> +#define SET_BOOT_DELAY 11
> +#define GET_BOOT_DELAY 12
> +#define HEART_BOOT_DELAY 13
>
>
> /* Maybe interesting to change */
> @@ -207,6 +213,7 @@ static int message_loop(int,int);
> static void do_terminate(int);
> static int notify_ack(int);
> static int heart_cmd_reply(int, char *);
> +static int heart_val_reply(int, int, int);
> static int write_message(int, struct msg *);
> static int read_message(int, struct msg *);
> static int read_skip(int, char *, int, int);
> @@ -506,6 +513,38 @@ message_loop(erlin_fd, erlout_fd)
> free_env_val(env);
> }
> break;
> + case SET_TIMEOUT:
> + /* override the HEART_BEAT_TIMEOUT command */
> + {
> + char buf[10];
> + memcpy(&buf, &(mp->fill[0]),
> + tlen-MSG_HDR_PLUS_OP_SIZE);
> + buf[tlen-MSG_HDR_PLUS_OP_SIZE] = '\0';
> + heart_beat_timeout = atoi(buf);
> + notify_ack(erlout_fd);
> + }
> + break;
> + case GET_TIMEOUT:
> + /* send back timeout value */
> + heart_val_reply(erlout_fd, HEART_TIMEOUT,
> + heart_beat_timeout);
> + break;
> + case SET_BOOT_DELAY:
> + /* override the HEART_BEAT_DELAY command */
> + {
> + char buf[10];
> + memcpy(&buf, &(mp->fill[0]),
> + tlen-MSG_HDR_PLUS_OP_SIZE);
> + buf[tlen-MSG_HDR_PLUS_OP_SIZE] = '\0';
> + heart_beat_boot_delay = atoi(buf);
> + notify_ack(erlout_fd);
> + }
> + break;
> + case GET_BOOT_DELAY:
> + /* send back delay value */
> + heart_val_reply(erlout_fd, HEART_BOOT_DELAY,
> + heart_beat_boot_delay);
> + break;
> default:
> /* ignore all other messages */
> break;
> @@ -744,6 +783,23 @@ heart_cmd_reply(int fd, char *s)
>
>
> /*
> + * sends back an ascii representation of an integer value
> + *
> + */
> +static int
> +heart_val_reply(int fd, int op, int val)
> +{
> + struct msg m;
> +
> + erts_snprintf(m.fill, sizeof(m.fill), "%d", val);
> + m.op = op;
> + m.len = htons(strlen(m.fill) + 1); /* Include Op */
> +
> + return write_message(fd, &m);
> +}
> +
> +
> +/*
> * write_message
> *
> * Writes a message to a blocking file descriptor. Returns the total
> diff --git a/lib/kernel/src/heart.erl b/lib/kernel/src/heart.erl
> index e78acfc..3bf7325 100644
> --- a/lib/kernel/src/heart.erl
> +++ b/lib/kernel/src/heart.erl
> @@ -29,7 +29,8 @@
> %%%
> %%% It recognizes the flag '-heart'
> %%%--------------------------------------------------------------------
> --export([start/0, init/2, set_cmd/1, clear_cmd/0, get_cmd/0, cycle/0]).
> +-export([start/0, init/2, set_cmd/1, clear_cmd/0, get_cmd/0, cycle/0,
> + set_timeout/1, get_timeout/0, set_boot_delay/1, get_boot_delay/0]).
>
> -define(START_ACK, 1).
> -define(HEART_BEAT, 2).
> @@ -38,6 +39,12 @@
> -define(CLEAR_CMD, 5).
> -define(GET_CMD, 6).
> -define(HEART_CMD, 7).
> +-define(SET_TIMEOUT, 8).
> +-define(GET_TIMEOUT, 9).
> +-define(HEART_TIMEOUT, 10).
> +-define(SET_DELAY, 11).
> +-define(GET_DELAY, 12).
> +-define(HEART_BOOT_DELAY, 13).
>
> -define(TIMEOUT, 5000).
> -define(CYCLE_TIMEOUT, 10000).
> @@ -103,6 +110,29 @@ clear_cmd() ->
> heart ! {self(), clear_cmd},
> wait().
>
> +-spec set_timeout(string()) -> 'ok' | {'error', {'bad_timeout', non_neg_integer()}}.
> +
> +set_timeout(Timeout) ->
> + heart ! {self(), set_timeout, Timeout},
> + wait().
> +
> +-spec get_timeout() -> 'ok'.
> +
> +get_timeout() ->
> + heart ! {self(), get_timeout},
> + wait().
> +
> +-spec set_boot_delay(string()) -> 'ok' | {'error', {'bad_boot_delay', non_neg_integer()}}.
> +
> +set_boot_delay(Delay) ->
> + heart ! {self(), set_boot_delay, Delay},
> + wait().
> +
> +-spec get_boot_delay() -> 'ok'.
> +
> +get_boot_delay() ->
> + heart ! {self(), get_boot_delay},
> + wait().
>
> %%% Should be used solely by the release handler!!!!!!!
> -spec cycle() -> 'ok' | {'error', term()}.
> @@ -190,6 +220,28 @@ loop(Parent, Port, Cmd) ->
> {From, get_cmd} ->
> From ! {heart, get_heart_cmd(Port)},
> loop(Parent, Port, Cmd);
> + {From, set_timeout, NewTimeout} when NewTimeout > 10, NewTimeout =< 65535 ->
> + send_heart_timeout(Port, NewTimeout),
> + wait_ack(Port),
> + From ! {heart, ok},
> + loop(Parent, Port, Cmd);
> + {From, set_timeout, NewTimeout} ->
> + From ! {heart, {error, {bad_timeout, NewTimeout}}},
> + loop(Parent, Port, Cmd);
> + {From, get_timeout} ->
> + From ! {heart, get_heart_timeout(Port)},
> + loop(Parent, Port, Cmd);
> + {From, set_boot_delay, NewDelay} when NewDelay > 10, NewDelay =< 65535 ->
> + send_heart_boot_delay(Port, NewDelay),
> + wait_ack(Port),
> + From ! {heart, ok},
> + loop(Parent, Port, Cmd);
> + {From, set_boot_delay, NewDelay} ->
> + From ! {heart, {error, {bad_boot_delay, NewDelay}}},
> + loop(Parent, Port, Cmd);
> + {From, get_boot_delay} ->
> + From ! {heart, get_heart_boot_delay(Port)},
> + loop(Parent, Port, Cmd);
> {From, cycle} ->
> %% Calls back to loop
> do_cycle_port_program(From, Parent, Port, Cmd);
> @@ -245,7 +297,7 @@ do_cycle_port_program(Caller, Parent, Port, Cmd) ->
> end.
>
>
> -%% "Beates" the heart once.
> +%% "Beats" the heart once.
> send_heart_beat(Port) -> Port ! {self(), {command, [?HEART_BEAT]}}.
>
> %% Set a new HEART_COMMAND.
> @@ -261,6 +313,28 @@ get_heart_cmd(Port) ->
> {ok, Cmd}
> end.
>
> +%% Set a new HEART_BEAT_TIMEOUT.
> +send_heart_timeout(Port, Timeout) ->
> + Port ! {self(), {command, [?SET_TIMEOUT|integer_to_list(Timeout)]}}.
> +
> +get_heart_timeout(Port) ->
> + Port ! {self(), {command, [?GET_TIMEOUT]}},
> + receive
> + {Port, {data, [?HEART_TIMEOUT | Timeout]}} ->
> + {ok, list_to_integer(Timeout)}
> + end.
> +
> +%% Set a new HEART_BEAT_BOOT_DELAY.
> +send_heart_boot_delay(Port, Delay) ->
> + Port ! {self(), {command, [?SET_DELAY|integer_to_list(Delay)]}}.
> +
> +get_heart_boot_delay(Port) ->
> + Port ! {self(), {command, [?GET_DELAY]}},
> + receive
> + {Port, {data, [?HEART_BOOT_DELAY | Delay]}} ->
> + {ok, list_to_integer(Delay)}
> + end.
> +
> %% Sends shutdown command to the port.
> send_shutdown(Port) -> Port ! {self(), {command, [?SHUT_DOWN]}}.
>
>
More information about the erlang-questions
mailing list