[erlang-questions] Re: Changing heartbeat timeout while the system is running

Michael Santos michael.santos@REDACTED
Wed Apr 6 02:55:50 CEST 2011


On Tue, Apr 05, 2011 at 07:59:32AM -0400, Michael Santos wrote:
> On Tue, Apr 05, 2011 at 03:17:41PM +0400, Alexey Romanov wrote:
> > From http://www.erlang.org/doc/man/heart.html I expect that heart only
> > checks the value of HEART_BEAT_TIMEOUT on startup and it can't be
> > changed while the program runs. Is this correct (I hope not)?
> 
> Yes, the value is only checked on startup. heart is a port, the timeout
> environment variable is passed as a command switch.
> 
> heart already supports changing the reboot command on the fly
> (using heart:set_cmd/1), so adding support for HEART_BEAT_TIMEOUT and
> HEART_BEAT_BOOT_DELAY (maybe heart:set_timeout/1, heart:set_delay/1)
> looks simple. I can look into making a patch, if it'd be helpful.

Attached is a patch adding heart:set_timeout/1 and heart:set_boot_delay/1
which seems to work ok. I'll add tests and update the docs later.

diff --git a/erts/etc/common/Makefile.in b/erts/etc/common/Makefile.in
index 4754328..f617dfb 100644
--- a/erts/etc/common/Makefile.in
+++ b/erts/etc/common/Makefile.in
@@ -449,7 +449,8 @@ endif
 
 ifeq ($(findstring vxworks,$(TARGET)), vxworks)
 $(BINDIR)/heart: $(OBJDIR)/heart.o $(OBJDIR)/heart_config.o 
-	$(LD) $(LDFLAGS) -o $@ $(OBJDIR)/heart.o $(OBJDIR)/heart_config.o
+	$(LD) $(LDFLAGS) -o $@ $(OBJDIR)/heart.o $(OBJDIR)/heart_config.o \
+		$(ERTS_INTERNAL_LIBS)
 
 $(OBJDIR)/heart_config.o: $(VXETC)/heart_config.c
 	$(CC) $(CFLAGS) -o $@ -c $(VXETC)/heart_config.c
@@ -467,7 +468,7 @@ else
 
 $(BINDIR)/heart@REDACTED@: $(OBJDIR)/heart.o $(ENTRY_OBJ)
 	$(LD) $(LDFLAGS) $(ENTRY_LDFLAGS) -o $@ $(OBJDIR)/heart.o \
-		$(ENTRY_OBJ) $(WINDSOCK)
+		$(ENTRY_OBJ) $(WINDSOCK) $(ERTS_INTERNAL_LIBS)
 
 $(OBJDIR)/heart.o: heart.c
 	$(CC) $(CFLAGS) -o $@ -c heart.c
diff --git a/erts/etc/common/heart.c b/erts/etc/common/heart.c
index 778b356..88b9f0a 100644
--- a/erts/etc/common/heart.c
+++ b/erts/etc/common/heart.c
@@ -163,6 +163,12 @@ struct msg {
 #define  CLEAR_CMD    5
 #define  GET_CMD      6
 #define  HEART_CMD    7
+#define  SET_TIMEOUT  8
+#define  GET_TIMEOUT  9
+#define  HEART_TIMEOUT 10
+#define  SET_BOOT_DELAY    11
+#define  GET_BOOT_DELAY    12
+#define  HEART_BOOT_DELAY  13
 
 
 /*  Maybe interesting to change */
@@ -207,6 +213,7 @@ static int message_loop(int,int);
 static void do_terminate(int);
 static int notify_ack(int);
 static int heart_cmd_reply(int, char *);
+static int heart_val_reply(int, int, int);
 static int write_message(int, struct msg *);
 static int read_message(int, struct msg *);
 static int read_skip(int, char *, int, int);
@@ -506,6 +513,38 @@ message_loop(erlin_fd, erlout_fd)
 				    free_env_val(env);
 				}
 			        break;
+			case SET_TIMEOUT:
+				/* override the HEART_BEAT_TIMEOUT command */
+			        {
+			        char buf[10];
+			        memcpy(&buf, &(mp->fill[0]),
+				       tlen-MSG_HDR_PLUS_OP_SIZE);
+			        buf[tlen-MSG_HDR_PLUS_OP_SIZE] = '\0';
+			        heart_beat_timeout = atoi(buf);
+			        notify_ack(erlout_fd);
+			        }
+			        break;
+			case GET_TIMEOUT:
+				/* send back timeout value */
+			        heart_val_reply(erlout_fd, HEART_TIMEOUT,
+				       heart_beat_timeout);
+			        break;
+			case SET_BOOT_DELAY:
+				/* override the HEART_BEAT_DELAY command */
+			        {
+			        char buf[10];
+			        memcpy(&buf, &(mp->fill[0]),
+				       tlen-MSG_HDR_PLUS_OP_SIZE);
+			        buf[tlen-MSG_HDR_PLUS_OP_SIZE] = '\0';
+			        heart_beat_boot_delay = atoi(buf);
+			        notify_ack(erlout_fd);
+			        }
+			        break;
+			case GET_BOOT_DELAY:
+				/* send back delay value */
+			        heart_val_reply(erlout_fd, HEART_BOOT_DELAY,
+				       heart_beat_boot_delay);
+			        break;
 			default:
 				/* ignore all other messages */
 				break;
@@ -744,6 +783,23 @@ heart_cmd_reply(int fd, char *s)
 
 
 /*
+ * sends back an ascii representation of an integer value
+ *
+ */
+static int
+heart_val_reply(int fd, int op, int val)
+{
+  struct msg m;
+
+  erts_snprintf(m.fill, sizeof(m.fill), "%d", val);
+  m.op = op;
+  m.len = htons(strlen(m.fill) + 1);	/* Include Op */
+
+  return write_message(fd, &m);
+}
+
+
+/*
  *  write_message
  *
  *  Writes a message to a blocking file descriptor. Returns the total
diff --git a/lib/kernel/src/heart.erl b/lib/kernel/src/heart.erl
index e78acfc..3bf7325 100644
--- a/lib/kernel/src/heart.erl
+++ b/lib/kernel/src/heart.erl
@@ -29,7 +29,8 @@
 %%%
 %%% It recognizes the flag '-heart'
 %%%--------------------------------------------------------------------
--export([start/0, init/2, set_cmd/1, clear_cmd/0, get_cmd/0, cycle/0]).
+-export([start/0, init/2, set_cmd/1, clear_cmd/0, get_cmd/0, cycle/0,
+        set_timeout/1, get_timeout/0, set_boot_delay/1, get_boot_delay/0]).
 
 -define(START_ACK, 1).
 -define(HEART_BEAT, 2).
@@ -38,6 +39,12 @@
 -define(CLEAR_CMD, 5).
 -define(GET_CMD, 6).
 -define(HEART_CMD, 7).
+-define(SET_TIMEOUT, 8).
+-define(GET_TIMEOUT, 9).
+-define(HEART_TIMEOUT, 10).
+-define(SET_DELAY, 11).
+-define(GET_DELAY, 12).
+-define(HEART_BOOT_DELAY, 13).
 
 -define(TIMEOUT, 5000).
 -define(CYCLE_TIMEOUT, 10000).
@@ -103,6 +110,29 @@ clear_cmd() ->
     heart ! {self(), clear_cmd},
     wait().
 
+-spec set_timeout(string()) -> 'ok' | {'error', {'bad_timeout', non_neg_integer()}}.
+
+set_timeout(Timeout) ->
+    heart ! {self(), set_timeout, Timeout},
+    wait().
+
+-spec get_timeout() -> 'ok'.
+
+get_timeout() ->
+    heart ! {self(), get_timeout},
+    wait().
+
+-spec set_boot_delay(string()) -> 'ok' | {'error', {'bad_boot_delay', non_neg_integer()}}.
+
+set_boot_delay(Delay) ->
+    heart ! {self(), set_boot_delay, Delay},
+    wait().
+
+-spec get_boot_delay() -> 'ok'.
+
+get_boot_delay() ->
+    heart ! {self(), get_boot_delay},
+    wait().
 
 %%% Should be used solely by the release handler!!!!!!!
 -spec cycle() -> 'ok' | {'error', term()}.
@@ -190,6 +220,28 @@ loop(Parent, Port, Cmd) ->
 	{From, get_cmd} ->
 	    From ! {heart, get_heart_cmd(Port)},
 	    loop(Parent, Port, Cmd);
+	{From, set_timeout, NewTimeout} when NewTimeout > 10, NewTimeout =< 65535 ->
+	    send_heart_timeout(Port, NewTimeout),
+	    wait_ack(Port),
+	    From ! {heart, ok},
+	    loop(Parent, Port, Cmd);
+	{From, set_timeout, NewTimeout} ->
+	    From ! {heart, {error, {bad_timeout, NewTimeout}}},
+	    loop(Parent, Port, Cmd);
+	{From, get_timeout} ->
+	    From ! {heart, get_heart_timeout(Port)},
+	    loop(Parent, Port, Cmd);
+	{From, set_boot_delay, NewDelay} when NewDelay > 10, NewDelay =< 65535 ->
+	    send_heart_boot_delay(Port, NewDelay),
+	    wait_ack(Port),
+	    From ! {heart, ok},
+	    loop(Parent, Port, Cmd);
+	{From, set_boot_delay, NewDelay} ->
+	    From ! {heart, {error, {bad_boot_delay, NewDelay}}},
+	    loop(Parent, Port, Cmd);
+	{From, get_boot_delay} ->
+	    From ! {heart, get_heart_boot_delay(Port)},
+	    loop(Parent, Port, Cmd);
 	{From, cycle} ->
 	    %% Calls back to loop
 	    do_cycle_port_program(From, Parent, Port, Cmd);  
@@ -245,7 +297,7 @@ do_cycle_port_program(Caller, Parent, Port, Cmd) ->
     end.
     
 
-%% "Beates" the heart once.
+%% "Beats" the heart once.
 send_heart_beat(Port) -> Port ! {self(), {command, [?HEART_BEAT]}}.
 
 %% Set a new HEART_COMMAND.
@@ -261,6 +313,28 @@ get_heart_cmd(Port) ->
 	    {ok, Cmd}
     end.
 
+%% Set a new HEART_BEAT_TIMEOUT.
+send_heart_timeout(Port, Timeout) ->
+    Port ! {self(), {command, [?SET_TIMEOUT|integer_to_list(Timeout)]}}.
+
+get_heart_timeout(Port) ->
+    Port ! {self(), {command, [?GET_TIMEOUT]}},
+    receive
+	{Port, {data, [?HEART_TIMEOUT | Timeout]}} ->
+	    {ok, list_to_integer(Timeout)}
+    end.
+
+%% Set a new HEART_BEAT_BOOT_DELAY.
+send_heart_boot_delay(Port, Delay) ->
+    Port ! {self(), {command, [?SET_DELAY|integer_to_list(Delay)]}}.
+
+get_heart_boot_delay(Port) ->
+    Port ! {self(), {command, [?GET_DELAY]}},
+    receive
+	{Port, {data, [?HEART_BOOT_DELAY | Delay]}} ->
+	    {ok, list_to_integer(Delay)}
+    end.
+
 %% Sends shutdown command to the port.
 send_shutdown(Port) -> Port ! {self(), {command, [?SHUT_DOWN]}}.
 



More information about the erlang-questions mailing list