aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org>2018-05-20 10:53:30 +0300
committerSergey Poznyakoff <gray@gnu.org>2018-05-20 10:53:30 +0300
commitddb46c6aa42ada061e51c635c0230e4dc8eab881 (patch)
treeb003ae6af354f553207981b4fc281e8f9e19c60e /src
parented8389beadb7cf1f8d95fe7addbc9ff2783f4d07 (diff)
downloadgenrc-ddb46c6aa42ada061e51c635c0230e4dc8eab881.tar.gz
genrc-ddb46c6aa42ada061e51c635c0230e4dc8eab881.tar.bz2
Sentinel mode: restart the program on certain conditions
* Makefile.am: Create the ChangeLog file from git log. * configure.ac: Request git2chg * src/com_start.c: Use sigaction instead of signal. * src/genrc.8: Document new options. * src/genrc.c: New options --restart-on-exit and --restart-on-signal. * src/genrc.h (str_to_sig, str_to_int): New prototypes. (add_restart_condition): New prototype. * src/sentinel.c (restart_on, add_restart_condition): (check_failure_rate): New functions. (wait_loop): Return if restart is requested. (sentinel): Restart the program if needed.
Diffstat (limited to 'src')
-rw-r--r--src/com_start.c12
-rw-r--r--src/genrc.858
-rw-r--r--src/genrc.c85
-rw-r--r--src/genrc.h10
-rw-r--r--src/sentinel.c200
5 files changed, 319 insertions, 46 deletions
diff --git a/src/com_start.c b/src/com_start.c
index 5744e39..3a9dffc 100644
--- a/src/com_start.c
+++ b/src/com_start.c
@@ -45,5 +45,9 @@ timedwaitpid(pid_t pid, int *status)
int rc = -1;
- SIGHANDLER oldsig;
-
- oldsig = signal(SIGCHLD, sigchld);
+ struct sigaction act, oldact;
+
+ act.sa_handler = sigchld;
+ act.sa_flags = 0;
+ sigemptyset(&act.sa_mask);
+ sigaction(SIGCHLD, &act, &oldact);
+
gettimeofday(&stoptime, NULL);
@@ -75,3 +79,3 @@ timedwaitpid(pid_t pid, int *status)
}
- signal(SIGCHLD, oldsig);
+ sigaction(SIGCHLD, &oldact, NULL);
if (rc) {
diff --git a/src/genrc.8 b/src/genrc.8
index 00522ee..959a00e 100644
--- a/src/genrc.8
+++ b/src/genrc.8
@@ -15,3 +15,3 @@
.\" along with genrc. If not, see <http://www.gnu.org/licenses/>.
-.TH GENRC 8 "May 17, 2018" "GENRC" "Genrc User Manual"
+.TH GENRC 8 "May 20, 2018" "GENRC" "Genrc User Manual"
.SH NAME
@@ -38,2 +38,4 @@ genrc \- generic system initialization script helper
[\fB\-\-program=\fIPROGRAM\fR]\
+ [\fB\-\-restart\-on\-exit=\fR[\fB!\fR]\fISTATUS\fR[\fB,\fISTATUS\fR...]]\
+ [\fB\-\-restart\-on\-signal=\fR[\fB!\fR]\fISIG\fR[\fB,\fISIG\fR...]]\
[\fB\-\-sentinel\fR]\
@@ -103,2 +105,26 @@ terminates. Unless the \fB\-\-pid\-from\fR option is given,
\fB\-\-pid\-from=FILE:\fIFILENAME\fR will be assumed.
+.PP
+In sentinel mode, it is possible to restart the program if it
+terminates with a specific exit code or on a specific signal. This is
+controlled by the \fB\-\-restart\-on\-exit\fR and
+\fB\-\-restart\-on\-signal\fR options. Use this feature to ensure the
+service provided by the program won't get terminated because of
+hitting a bug or encountering an unforeseen external condition. For
+example, the following two options will ensure that the program will
+be terminated only if it exits with status 0 or it is terminated by
+SIGTERM or SIGQUIT signal:
+.EX
+--restart-on-exit='!0' --restart-on-signal='!TERM,QUIT'
+.EE
+.PP
+If restarts are requested, \fBgenrc\fR will control how often it has
+to restart the program using the same algorithm as
+.B init (8).
+Namely, if the program is restarted more than 10 times within two
+minutes, \fBgenrc\fR will disable subsequent restarts for the next
+5 minutes. If the \fB\-\-create\-pidfile\fR option was used, the
+PID of the controlling \fBgenrc\fR process will be stored in the
+file during that interval. If the \fBSIGHUP\fR signal is delivered
+during the sleep interval, the sleep will be broken prematurely and
+the program restarted again.
.SS status
@@ -190,5 +216,33 @@ Where to look for PIDs of the running programs.
.TP
+\fB\-\-restart\-on\-exit=\fR[\fB!\fR]\fISTATUS\fR[\fB,\fISTATUS\fR...]
+This option takes effect when used together with
+\fB\-\-sentinel\fR. If the program terminates with one of status
+codes listed as the argument to this option, it will be immediately
+restarted. The exclamation mark at the start of the list inverts the
+set, e.g. \fB\-\-restart\-on\-exit='!0,1'\fR means restart unless the
+program exit code is 0 or 1. Note the use of quotation to prevent the
+\fB!\fR from being interpreted by the shell.
+.TP
+\fB\-\-restart\-on\-signal=\fR[\fB!\fR]\fISIG\fR[\fB,\fISIG\fR...]
+This option takes effect when used together with
+\fB\-\-sentinel\fR. If the program terminates due to receiving one of
+the signals from this list, it will be immediately restarted. Each
+\fISIG\fR is either a signal number, or a signal name, as listed in
+.BR signal (7).
+The \fBSIG\fR prefix can be omitted from the signal name. Names are
+case-insensitive. Thus, \fB1\fR, \fBHUP\fR, \fBSIGHUP\fR, and
+\fBsighup\fR all stand for the same signal.
+.sp
+The exclamation mark at the start of the list complements the signal
+set, so that e.g. \fB\-\-restart\-on\-signal='!TERM,QUIT,INT'\fR will
+restart the program unless it terminates on one of the listed signals.
+.TP
\fB\-\-sentinel\fR
\fIPROGRAM\fR runs in foreground; disconnect from the controlling
-terminal, run it and act as a sentinel.
+terminal, start it and run in background until it terminates. The
+program's stdout and stderr are sent to the syslog facility
+\fBdaemon\fR, priorities \fBinfo\fR and \fBerr\fR, correspondingly.
+
+See the options \fB\-\-restart\-on\-exit\fR and
+\fB\-\-restart\-on\-signal\fR for details on how to restart the program.
.TP
diff --git a/src/genrc.c b/src/genrc.c
index ae3070d..9052987 100644
--- a/src/genrc.c
+++ b/src/genrc.c
@@ -27,3 +27,5 @@ enum {
OPT_SIGNAL_STOP,
- OPT_CREATE_PIDFILE
+ OPT_CREATE_PIDFILE,
+ OPT_RESTART_ON_EXIT,
+ OPT_RESTART_ON_SIGNAL,
};
@@ -31,18 +33,20 @@ enum {
struct option longopts[] = {
- { "help", no_argument, 0, 'h' },
- { "usage", no_argument, 0, OPT_USAGE },
- { "command", required_argument, 0, 'c' },
- { "program", required_argument, 0, 'p' },
- { "pid-from", required_argument, 0, 'P' },
- { "pidfile", required_argument, 0, 'F' },
- { "timeout", required_argument, 0, 't' },
- { "signal-reload", required_argument, 0, OPT_SIGNAL_RELOAD },
- { "no-reload", no_argument, 0, OPT_NO_RELOAD },
- { "signal-stop", required_argument, 0, OPT_SIGNAL_STOP },
- { "sentinel", no_argument, 0, 'S' },
- { "create-pidfile", required_argument, 0, OPT_CREATE_PIDFILE },
- { "version", no_argument, 0, OPT_VERSION },
- { "verbose", no_argument, 0, 'v' },
- { "user", required_argument, 0, 'u' },
- { "group", required_argument, 0, 'g' },
+ { "help", no_argument, 0, 'h' },
+ { "usage", no_argument, 0, OPT_USAGE },
+ { "command", required_argument, 0, 'c' },
+ { "program", required_argument, 0, 'p' },
+ { "pid-from", required_argument, 0, 'P' },
+ { "pidfile", required_argument, 0, 'F' },
+ { "timeout", required_argument, 0, 't' },
+ { "signal-reload", required_argument, 0, OPT_SIGNAL_RELOAD },
+ { "no-reload", no_argument, 0, OPT_NO_RELOAD },
+ { "signal-stop", required_argument, 0, OPT_SIGNAL_STOP },
+ { "sentinel", no_argument, 0, 'S' },
+ { "create-pidfile", required_argument, 0, OPT_CREATE_PIDFILE },
+ { "version", no_argument, 0, OPT_VERSION },
+ { "verbose", no_argument, 0, 'v' },
+ { "user", required_argument, 0, 'u' },
+ { "group", required_argument, 0, 'g' },
+ { "restart-on-exit", required_argument, 0, OPT_RESTART_ON_EXIT },
+ { "restart-on-signal", required_argument, 0, OPT_RESTART_ON_SIGNAL },
{ NULL }
@@ -129,12 +133,18 @@ is_numeric_str(char const *s)
int
-sig_name_to_str(char const *s)
+str_to_int(char const *s)
+{
+ char *end;
+ unsigned long n;
+ errno = 0;
+ n = strtoul(s, &end, 10);
+ if (errno || *end || n > UINT_MAX)
+ return -1;
+ return n;
+}
+
+int
+str_to_sig(char const *s)
{
if (is_numeric_str(s)) {
- char *end;
- unsigned long n;
- errno = 0;
- n = strtoul(s, &end, 10);
- if (errno || *end || n > UINT_MAX)
- return -1;
- return n;
+ return str_to_int(s);
} else {
@@ -185,4 +195,2 @@ char const *help_msg[] = {
" terminate",
- " --sentinel PROGRAM runs in foreground; disconnect from the",
- " controlling terminal, run it and act as a sentinel",
" -P, --pid-from=SOURCE where to look for PIDs of the running programs",
@@ -196,2 +204,13 @@ char const *help_msg[] = {
"",
+ "Sentinel mode:",
+ "",
+ " --sentinel PROGRAM runs in foreground; disconnect from the",
+ " controlling terminal, run it and act as a sentinel",
+ " --restart-on-exit=[!]CODE[,...]",
+ " restart the program if it exits with one of the",
+ " listed status codes",
+ " --restart-on-signal=[!]SIG[,...]",
+ " restart the program if it terminates on one of the",
+ " listed signals",
+ "",
"Informational options:",
@@ -274,2 +293,4 @@ char const *usage_msg[] = {
"[--program=PROGRAM]",
+ "[--restart-on-exit=[!]CODE[,...]]",
+ "[--restart-on-signal=[!]SIG[,...]]",
"[--sentinel]",
@@ -424,2 +445,8 @@ main(int argc, char **argv)
break;
+ case OPT_RESTART_ON_EXIT:
+ add_restart_condition(RESTART_ON_EXIT, optarg);
+ break;
+ case OPT_RESTART_ON_SIGNAL:
+ add_restart_condition(RESTART_ON_SIGNAL, optarg);
+ break;
case OPT_NO_RELOAD:
@@ -452,3 +479,3 @@ main(int argc, char **argv)
else if ((p = getenv("GENRC_SIGNAL_RELOAD")) != NULL) {
- genrc_signal_reload = sig_name_to_str(p);
+ genrc_signal_reload = str_to_sig(p);
if (genrc_signal_reload == -1)
@@ -460,3 +487,3 @@ main(int argc, char **argv)
if ((p = getenv("GENRC_SIGNAL_STOP")) != NULL) {
- genrc_signal_stop = sig_name_to_str(p);
+ genrc_signal_stop = str_to_sig(p);
if (genrc_signal_stop <= 0)
diff --git a/src/genrc.h b/src/genrc.h
index 9842016..c6ee57b 100644
--- a/src/genrc.h
+++ b/src/genrc.h
@@ -65,2 +65,5 @@ int pid_is_running(pid_t pid);
void runas(void);
+int str_to_sig(char const *);
+int str_to_int(char const *);
+
@@ -111,3 +114,10 @@ int match_pcre(PROCSCANBUF buf, char const *arg);
+enum {
+ RESTART_ON_EXIT,
+ RESTART_ON_SIGNAL
+};
+void add_restart_condition(int type, char const *arg);
+
+
struct genrc_pid_closure {
diff --git a/src/sentinel.c b/src/sentinel.c
index 59b89cc..33d3e06 100644
--- a/src/sentinel.c
+++ b/src/sentinel.c
@@ -10,2 +10,3 @@ There is NO WARRANTY, to the extent permitted by law.
#include <syslog.h>
+#include <time.h>
@@ -33,2 +34,9 @@ write_pid_file(pid_t pid)
+static void
+unlink_pid_file(void)
+{
+ if (genrc_create_pidfile)
+ unlink(genrc_create_pidfile);
+}
+
#define LOGBUFSIZE 1024
@@ -70,2 +78,88 @@ log_buffer_read(int fd, struct log_buffer *lb)
+struct restart_cond {
+ struct restart_cond *next;
+ int type;
+ int negate;
+ int numc;
+ int numv[1];
+};
+
+struct restart_cond *restart_head, *restart_tail;
+
+static int
+restart_on(int type, int num)
+{
+ struct restart_cond *cond;
+
+ for (cond = restart_head; cond; cond = cond->next) {
+ if (cond->type == type) {
+ int result = cond->negate;
+ int i;
+ for (i = 0; i < cond->numc; i++) {
+ if (cond->numv[i] == num) {
+ result = !result;
+ break;
+ }
+ }
+ if (result)
+ return 1;
+ }
+ }
+ return 0;
+}
+
+typedef int (*RESTART_STON)(char const *);
+
+static RESTART_STON restart_ston[] = { str_to_int, str_to_sig };
+static char const *restart_what[] = { "exit status", "signal" };
+
+void
+add_restart_condition(int type, char const *arg)
+{
+ struct wordsplit ws;
+ size_t i;
+ int negate = 0;
+ struct restart_cond *cond;
+ RESTART_STON ston = restart_ston[type];
+
+ if (arg[0] == '!') {
+ negate = 1;
+ arg++;
+ }
+
+ ws.ws_delim = ",";
+ ws.ws_error = genrc_error;
+ if (wordsplit(arg, &ws,
+ WRDSF_NOCMD
+ | WRDSF_NOVAR
+ | WRDSF_DELIM
+ | WRDSF_ENOMEMABRT
+ | WRDSF_SHOWERR
+ | WRDSF_ERROR))
+ exit(1);
+
+ if (ws.ws_wordc == 0)
+ usage_error("empty restart condition");
+
+ cond = xmalloc(sizeof(*cond)
+ + (ws.ws_wordc - 1) * sizeof(cond->numv[0]));
+ cond->next = NULL;
+ cond->type = type;
+ cond->negate = negate;
+ cond->numc = ws.ws_wordc;
+ for (i = 0; i < ws.ws_wordc; i++) {
+ int n = ston(ws.ws_wordv[i]);
+ if (n == -1)
+ usage_error("bad %s: %s", restart_what[type],
+ ws.ws_wordv[i]);
+ cond->numv[i] = n;
+ }
+
+ if (restart_tail)
+ restart_tail->next = cond;
+ else
+ restart_head = cond;
+ restart_tail = cond;
+}
+
void
@@ -76,3 +170,3 @@ wait_loop(pid_t child, int out, int err)
struct log_buffer obuf, ebuf;
-
+
openlog(genrc_program, LOG_PID, LOG_DAEMON);
@@ -85,10 +179,12 @@ wait_loop(pid_t child, int out, int err)
if (waitpid(child, &status, WNOHANG) == child) {
- if (genrc_create_pidfile)
- unlink(genrc_create_pidfile);
+ write_pid_file(getpid());
if (WIFEXITED(status)) {
+ int code = WEXITSTATUS(status);
syslog(LOG_INFO, "%s exited with status %d",
- genrc_program, WEXITSTATUS(status));
- _exit(WEXITSTATUS(status));
+ genrc_program, code);
+ if (restart_on(RESTART_ON_EXIT, code))
+ return;
} else if (WIFSIGNALED(status)) {
char const *coremsg = "";
+ int sig = WTERMSIG(status);
#ifdef WCOREDUMP
@@ -98,3 +194,5 @@ wait_loop(pid_t child, int out, int err)
syslog(LOG_INFO, "%s terminated on signal %d%s",
- genrc_program, WTERMSIG(status), coremsg);
+ genrc_program, sig, coremsg);
+ if (restart_on(RESTART_ON_SIGNAL, sig))
+ return;
} else if (WIFSTOPPED(status)) {
@@ -130,3 +228,4 @@ wait_loop(pid_t child, int out, int err)
}
- _exit(1);
+ unlink_pid_file();
+ _exit(0);
}
@@ -180,3 +279,67 @@ start_command(int p[])
}
+
+/* Restart rate control */
+static int volatile hup_received;
+
+static void
+sighup(int sig)
+{
+ hup_received++;
+}
+
+/* Consider the number of restarts during this interval */
+#define TESTTIME 2*60
+/* Stop respawning and go to sleep if it exceeds this number */
+#define MAXSPAWN 10
+/* Sleep that much seconds, then retry */
+#define SLEEPTIME 5*60
+
+struct ratectl {
+ time_t start_time; /* Start of the test interval */
+ unsigned failcount; /* Number of restarts done so far */
+};
+static void
+check_failure_rate(struct ratectl *rate)
+{
+ time_t now;
+ struct timeval start, stop, ttw;
+
+ time(&now);
+ if (rate->start_time + TESTTIME > now)
+ rate->failcount++;
+ else {
+ rate->failcount = 0;
+ rate->start_time = now;
+ }
+
+ if (rate->failcount > MAXSPAWN) {
+ syslog(LOG_NOTICE,
+ "%s respawning too fast; disabled for %d minutes",
+ genrc_program, SLEEPTIME / 60);
+
+ gettimeofday(&stop, NULL);
+ stop.tv_sec += SLEEPTIME;
+ while (1) {
+ gettimeofday(&start, NULL);
+ if (timercmp(&start, &stop, >=))
+ break;
+ timersub(&stop, &start, &ttw);
+ if (select(0, NULL, NULL, NULL, &ttw) < 0) {
+ if (errno == EINTR) {
+ if (hup_received) {
+ hup_received = 0;
+ break;
+ }
+ } else {
+ system_error(errno, "select");
+ break;
+ }
+ }
+ }
+
+ rate->failcount = 0;
+ }
+}
+
int
@@ -186,2 +349,4 @@ sentinel(void)
int p[2];
+ struct ratectl ctl;
+ struct sigaction act;
@@ -207,6 +372,19 @@ sentinel(void)
/* Grand-child */
- pid = start_command(p);
- if (pid == -1)
- _exit(127);
- wait_loop(pid, p[0], p[1]);
+ act.sa_handler = sighup;
+ act.sa_flags = 0;
+ sigemptyset(&act.sa_mask);
+ sigaction(SIGHUP, &act, NULL);
+
+ ctl.start_time = 0;
+ ctl.failcount = 0;
+ while (1) {
+ pid = start_command(p);
+ if (pid == -1)
+ _exit(127);
+ if (pid == 0)
+ break;
+ wait_loop(pid, p[0], p[1]);
+ check_failure_rate(&ctl);
+ syslog(LOG_INFO, "restarting %s", genrc_program);
+ }
_exit(1);

Return to:

Send suggestions and report system problems to the System administrator.