[erlang-questions] Re: Changing heartbeat timeout while the system is running
Michael Santos
michael.santos@REDACTED
Wed Apr 6 02:55:50 CEST 2011
On Tue, Apr 05, 2011 at 07:59:32AM -0400, Michael Santos wrote:
> On Tue, Apr 05, 2011 at 03:17:41PM +0400, Alexey Romanov wrote:
> > From http://www.erlang.org/doc/man/heart.html I expect that heart only
> > checks the value of HEART_BEAT_TIMEOUT on startup and it can't be
> > changed while the program runs. Is this correct (I hope not)?
>
> Yes, the value is only checked on startup. heart is a port, the timeout
> environment variable is passed as a command switch.
>
> heart already supports changing the reboot command on the fly
> (using heart:set_cmd/1), so adding support for HEART_BEAT_TIMEOUT and
> HEART_BEAT_BOOT_DELAY (maybe heart:set_timeout/1, heart:set_delay/1)
> looks simple. I can look into making a patch, if it'd be helpful.
Attached is a patch adding heart:set_timeout/1 and heart:set_boot_delay/1
which seems to work ok. I'll add tests and update the docs later.
diff --git a/erts/etc/common/Makefile.in b/erts/etc/common/Makefile.in
index 4754328..f617dfb 100644
--- a/erts/etc/common/Makefile.in
+++ b/erts/etc/common/Makefile.in
@@ -449,7 +449,8 @@ endif
ifeq ($(findstring vxworks,$(TARGET)), vxworks)
$(BINDIR)/heart: $(OBJDIR)/heart.o $(OBJDIR)/heart_config.o
- $(LD) $(LDFLAGS) -o $@ $(OBJDIR)/heart.o $(OBJDIR)/heart_config.o
+ $(LD) $(LDFLAGS) -o $@ $(OBJDIR)/heart.o $(OBJDIR)/heart_config.o \
+ $(ERTS_INTERNAL_LIBS)
$(OBJDIR)/heart_config.o: $(VXETC)/heart_config.c
$(CC) $(CFLAGS) -o $@ -c $(VXETC)/heart_config.c
@@ -467,7 +468,7 @@ else
$(BINDIR)/heart@REDACTED@: $(OBJDIR)/heart.o $(ENTRY_OBJ)
$(LD) $(LDFLAGS) $(ENTRY_LDFLAGS) -o $@ $(OBJDIR)/heart.o \
- $(ENTRY_OBJ) $(WINDSOCK)
+ $(ENTRY_OBJ) $(WINDSOCK) $(ERTS_INTERNAL_LIBS)
$(OBJDIR)/heart.o: heart.c
$(CC) $(CFLAGS) -o $@ -c heart.c
diff --git a/erts/etc/common/heart.c b/erts/etc/common/heart.c
index 778b356..88b9f0a 100644
--- a/erts/etc/common/heart.c
+++ b/erts/etc/common/heart.c
@@ -163,6 +163,12 @@ struct msg {
#define CLEAR_CMD 5
#define GET_CMD 6
#define HEART_CMD 7
+#define SET_TIMEOUT 8
+#define GET_TIMEOUT 9
+#define HEART_TIMEOUT 10
+#define SET_BOOT_DELAY 11
+#define GET_BOOT_DELAY 12
+#define HEART_BOOT_DELAY 13
/* Maybe interesting to change */
@@ -207,6 +213,7 @@ static int message_loop(int,int);
static void do_terminate(int);
static int notify_ack(int);
static int heart_cmd_reply(int, char *);
+static int heart_val_reply(int, int, int);
static int write_message(int, struct msg *);
static int read_message(int, struct msg *);
static int read_skip(int, char *, int, int);
@@ -506,6 +513,38 @@ message_loop(erlin_fd, erlout_fd)
free_env_val(env);
}
break;
+ case SET_TIMEOUT:
+ /* override the HEART_BEAT_TIMEOUT command */
+ {
+ char buf[10];
+ memcpy(&buf, &(mp->fill[0]),
+ tlen-MSG_HDR_PLUS_OP_SIZE);
+ buf[tlen-MSG_HDR_PLUS_OP_SIZE] = '\0';
+ heart_beat_timeout = atoi(buf);
+ notify_ack(erlout_fd);
+ }
+ break;
+ case GET_TIMEOUT:
+ /* send back timeout value */
+ heart_val_reply(erlout_fd, HEART_TIMEOUT,
+ heart_beat_timeout);
+ break;
+ case SET_BOOT_DELAY:
+ /* override the HEART_BEAT_DELAY command */
+ {
+ char buf[10];
+ memcpy(&buf, &(mp->fill[0]),
+ tlen-MSG_HDR_PLUS_OP_SIZE);
+ buf[tlen-MSG_HDR_PLUS_OP_SIZE] = '\0';
+ heart_beat_boot_delay = atoi(buf);
+ notify_ack(erlout_fd);
+ }
+ break;
+ case GET_BOOT_DELAY:
+ /* send back delay value */
+ heart_val_reply(erlout_fd, HEART_BOOT_DELAY,
+ heart_beat_boot_delay);
+ break;
default:
/* ignore all other messages */
break;
@@ -744,6 +783,23 @@ heart_cmd_reply(int fd, char *s)
/*
+ * sends back an ascii representation of an integer value
+ *
+ */
+static int
+heart_val_reply(int fd, int op, int val)
+{
+ struct msg m;
+
+ erts_snprintf(m.fill, sizeof(m.fill), "%d", val);
+ m.op = op;
+ m.len = htons(strlen(m.fill) + 1); /* Include Op */
+
+ return write_message(fd, &m);
+}
+
+
+/*
* write_message
*
* Writes a message to a blocking file descriptor. Returns the total
diff --git a/lib/kernel/src/heart.erl b/lib/kernel/src/heart.erl
index e78acfc..3bf7325 100644
--- a/lib/kernel/src/heart.erl
+++ b/lib/kernel/src/heart.erl
@@ -29,7 +29,8 @@
%%%
%%% It recognizes the flag '-heart'
%%%--------------------------------------------------------------------
--export([start/0, init/2, set_cmd/1, clear_cmd/0, get_cmd/0, cycle/0]).
+-export([start/0, init/2, set_cmd/1, clear_cmd/0, get_cmd/0, cycle/0,
+ set_timeout/1, get_timeout/0, set_boot_delay/1, get_boot_delay/0]).
-define(START_ACK, 1).
-define(HEART_BEAT, 2).
@@ -38,6 +39,12 @@
-define(CLEAR_CMD, 5).
-define(GET_CMD, 6).
-define(HEART_CMD, 7).
+-define(SET_TIMEOUT, 8).
+-define(GET_TIMEOUT, 9).
+-define(HEART_TIMEOUT, 10).
+-define(SET_DELAY, 11).
+-define(GET_DELAY, 12).
+-define(HEART_BOOT_DELAY, 13).
-define(TIMEOUT, 5000).
-define(CYCLE_TIMEOUT, 10000).
@@ -103,6 +110,29 @@ clear_cmd() ->
heart ! {self(), clear_cmd},
wait().
+-spec set_timeout(string()) -> 'ok' | {'error', {'bad_timeout', non_neg_integer()}}.
+
+set_timeout(Timeout) ->
+ heart ! {self(), set_timeout, Timeout},
+ wait().
+
+-spec get_timeout() -> 'ok'.
+
+get_timeout() ->
+ heart ! {self(), get_timeout},
+ wait().
+
+-spec set_boot_delay(string()) -> 'ok' | {'error', {'bad_boot_delay', non_neg_integer()}}.
+
+set_boot_delay(Delay) ->
+ heart ! {self(), set_boot_delay, Delay},
+ wait().
+
+-spec get_boot_delay() -> 'ok'.
+
+get_boot_delay() ->
+ heart ! {self(), get_boot_delay},
+ wait().
%%% Should be used solely by the release handler!!!!!!!
-spec cycle() -> 'ok' | {'error', term()}.
@@ -190,6 +220,28 @@ loop(Parent, Port, Cmd) ->
{From, get_cmd} ->
From ! {heart, get_heart_cmd(Port)},
loop(Parent, Port, Cmd);
+ {From, set_timeout, NewTimeout} when NewTimeout > 10, NewTimeout =< 65535 ->
+ send_heart_timeout(Port, NewTimeout),
+ wait_ack(Port),
+ From ! {heart, ok},
+ loop(Parent, Port, Cmd);
+ {From, set_timeout, NewTimeout} ->
+ From ! {heart, {error, {bad_timeout, NewTimeout}}},
+ loop(Parent, Port, Cmd);
+ {From, get_timeout} ->
+ From ! {heart, get_heart_timeout(Port)},
+ loop(Parent, Port, Cmd);
+ {From, set_boot_delay, NewDelay} when NewDelay > 10, NewDelay =< 65535 ->
+ send_heart_boot_delay(Port, NewDelay),
+ wait_ack(Port),
+ From ! {heart, ok},
+ loop(Parent, Port, Cmd);
+ {From, set_boot_delay, NewDelay} ->
+ From ! {heart, {error, {bad_boot_delay, NewDelay}}},
+ loop(Parent, Port, Cmd);
+ {From, get_boot_delay} ->
+ From ! {heart, get_heart_boot_delay(Port)},
+ loop(Parent, Port, Cmd);
{From, cycle} ->
%% Calls back to loop
do_cycle_port_program(From, Parent, Port, Cmd);
@@ -245,7 +297,7 @@ do_cycle_port_program(Caller, Parent, Port, Cmd) ->
end.
-%% "Beates" the heart once.
+%% "Beats" the heart once.
send_heart_beat(Port) -> Port ! {self(), {command, [?HEART_BEAT]}}.
%% Set a new HEART_COMMAND.
@@ -261,6 +313,28 @@ get_heart_cmd(Port) ->
{ok, Cmd}
end.
+%% Set a new HEART_BEAT_TIMEOUT.
+send_heart_timeout(Port, Timeout) ->
+ Port ! {self(), {command, [?SET_TIMEOUT|integer_to_list(Timeout)]}}.
+
+get_heart_timeout(Port) ->
+ Port ! {self(), {command, [?GET_TIMEOUT]}},
+ receive
+ {Port, {data, [?HEART_TIMEOUT | Timeout]}} ->
+ {ok, list_to_integer(Timeout)}
+ end.
+
+%% Set a new HEART_BEAT_BOOT_DELAY.
+send_heart_boot_delay(Port, Delay) ->
+ Port ! {self(), {command, [?SET_DELAY|integer_to_list(Delay)]}}.
+
+get_heart_boot_delay(Port) ->
+ Port ! {self(), {command, [?GET_DELAY]}},
+ receive
+ {Port, {data, [?HEART_BOOT_DELAY | Delay]}} ->
+ {ok, list_to_integer(Delay)}
+ end.
+
%% Sends shutdown command to the port.
send_shutdown(Port) -> Port ! {self(), {command, [?SHUT_DOWN]}}.
More information about the erlang-questions
mailing list