summaryrefslogtreecommitdiffabout
authorSergey Poznyakoff <gray@gnu.org>2018-05-20 07:53:30 (GMT)
committer Sergey Poznyakoff <gray@gnu.org>2018-05-20 07:53:30 (GMT)
commitddb46c6aa42ada061e51c635c0230e4dc8eab881 (patch) (side-by-side diff)
treeb003ae6af354f553207981b4fc281e8f9e19c60e
parented8389beadb7cf1f8d95fe7addbc9ff2783f4d07 (diff)
downloadgenrc-ddb46c6aa42ada061e51c635c0230e4dc8eab881.tar.gz
genrc-ddb46c6aa42ada061e51c635c0230e4dc8eab881.tar.bz2
Sentinel mode: restart the program on certain conditions
* Makefile.am: Create the ChangeLog file from git log. * configure.ac: Request git2chg * src/com_start.c: Use sigaction instead of signal. * src/genrc.8: Document new options. * src/genrc.c: New options --restart-on-exit and --restart-on-signal. * src/genrc.h (str_to_sig, str_to_int): New prototypes. (add_restart_condition): New prototype. * src/sentinel.c (restart_on, add_restart_condition): (check_failure_rate): New functions. (wait_loop): Return if restart is requested. (sentinel): Restart the program if needed.
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--Makefile.am11
-rw-r--r--configure.ac4
-rw-r--r--src/com_start.c12
-rw-r--r--src/genrc.858
-rw-r--r--src/genrc.c85
-rw-r--r--src/genrc.h10
-rw-r--r--src/sentinel.c200
7 files changed, 332 insertions, 48 deletions
diff --git a/Makefile.am b/Makefile.am
index 31e9e5f..dac3cb3 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1 +1,12 @@
SUBDIRS = grecs src
+dist: ChangeLog
+.PHONY: ChangeLog
+ChangeLog:
+ $(AM_V_GEN)if test -d .git; then \
+ git log --pretty='format:%ct %an <%ae>%n%n%s%n%n%b%n' | \
+ awk -f $(top_srcdir)/@GRECS_SUBDIR@/build-aux/git2chg.awk \
+ > ChangeLog.tmp; \
+ cmp ChangeLog ChangeLog.tmp > /dev/null 2>&1 || \
+ mv ChangeLog.tmp ChangeLog; \
+ rm -f ChangeLog.tmp; \
+ fi
diff --git a/configure.ac b/configure.ac
index 5d36092..a568649 100644
--- a/configure.ac
+++ b/configure.ac
@@ -15,7 +15,7 @@
# along with genrc. If not, see <http://www.gnu.org/licenses/>.
AC_PREREQ([2.69])
-AC_INIT([genrc], [1.0], [gray@gnu.org])
+AC_INIT([genrc], [1.0.90], [gray@gnu.org])
AC_CONFIG_SRCDIR([src/genrc.c])
AC_CONFIG_HEADERS([config.h])
AM_INIT_AUTOMAKE([1.11 foreign silent-rules])
@@ -37,7 +37,7 @@ AC_CHECK_HEADERS([getopt.h pcre.h])
# Checks for library functions.
AC_CHECK_FUNCS([getdtablesize])
-GRECS_SETUP(grecs, [all-parsers])
+GRECS_SETUP(grecs, [all-parsers git2chg])
AM_CONDITIONAL([COND_PCRE],
[test "$ac_cv_header_pcre_h" = yes && test "$ac_cv_lib_pcre_main" = yes])
diff --git a/src/com_start.c b/src/com_start.c
index 5744e39..3a9dffc 100644
--- a/src/com_start.c
+++ b/src/com_start.c
@@ -43,9 +43,13 @@ timedwaitpid(pid_t pid, int *status)
{
struct timeval now, stoptime, ttw;
int rc = -1;
- SIGHANDLER oldsig;
-
- oldsig = signal(SIGCHLD, sigchld);
+ struct sigaction act, oldact;
+
+ act.sa_handler = sigchld;
+ act.sa_flags = 0;
+ sigemptyset(&act.sa_mask);
+ sigaction(SIGCHLD, &act, &oldact);
+
gettimeofday(&stoptime, NULL);
stoptime.tv_sec += genrc_timeout;
while (1) {
@@ -73,7 +77,7 @@ timedwaitpid(pid_t pid, int *status)
}
}
- signal(SIGCHLD, oldsig);
+ sigaction(SIGCHLD, &oldact, NULL);
if (rc) {
kill(pid, SIGKILL);
}
diff --git a/src/genrc.8 b/src/genrc.8
index 00522ee..959a00e 100644
--- a/src/genrc.8
+++ b/src/genrc.8
@@ -13,7 +13,7 @@
.\"
.\" You should have received a copy of the GNU General Public License
.\" along with genrc. If not, see <http://www.gnu.org/licenses/>.
-.TH GENRC 8 "May 17, 2018" "GENRC" "Genrc User Manual"
+.TH GENRC 8 "May 20, 2018" "GENRC" "Genrc User Manual"
.SH NAME
genrc \- generic system initialization script helper
.SH SYNOPSIS
@@ -36,6 +36,8 @@ genrc \- generic system initialization script helper
[\fB\-\-pid\-from=\fISOURCE\fR]\
[\fB\-\-pidfile=\fIPIDFILE\fR]\
[\fB\-\-program=\fIPROGRAM\fR]\
+ [\fB\-\-restart\-on\-exit=\fR[\fB!\fR]\fISTATUS\fR[\fB,\fISTATUS\fR...]]\
+ [\fB\-\-restart\-on\-signal=\fR[\fB!\fR]\fISIG\fR[\fB,\fISIG\fR...]]\
[\fB\-\-sentinel\fR]\
[\fB\-\-signal\-reload=\fISIG\fR]\
[\fB\-\-signal\-stop=\fISIG\fR]\
@@ -101,6 +103,30 @@ If the \fB\-\-create\-pidfile=\fIFILENAME\fR option is given together with
in \fIFILE\fR. The file will be unlinked after the subsidiary command
terminates. Unless the \fB\-\-pid\-from\fR option is given,
\fB\-\-pid\-from=FILE:\fIFILENAME\fR will be assumed.
+.PP
+In sentinel mode, it is possible to restart the program if it
+terminates with a specific exit code or on a specific signal. This is
+controlled by the \fB\-\-restart\-on\-exit\fR and
+\fB\-\-restart\-on\-signal\fR options. Use this feature to ensure the
+service provided by the program won't get terminated because of
+hitting a bug or encountering an unforeseen external condition. For
+example, the following two options will ensure that the program will
+be terminated only if it exits with status 0 or it is terminated by
+SIGTERM or SIGQUIT signal:
+.EX
+--restart-on-exit='!0' --restart-on-signal='!TERM,QUIT'
+.EE
+.PP
+If restarts are requested, \fBgenrc\fR will control how often it has
+to restart the program using the same algorithm as
+.B init (8).
+Namely, if the program is restarted more than 10 times within two
+minutes, \fBgenrc\fR will disable subsequent restarts for the next
+5 minutes. If the \fB\-\-create\-pidfile\fR option was used, the
+PID of the controlling \fBgenrc\fR process will be stored in the
+file during that interval. If the \fBSIGHUP\fR signal is delivered
+during the sleep interval, the sleep will be broken prematurely and
+the program restarted again.
.SS status
In \fBstatus\fR mode \fBgenrc\fR verifies if the \fICOMMAND\fR is
already running and outputs its status on the standard output. To this
@@ -188,9 +214,37 @@ Name of the program to run.
\fB\-P\fR, \fB\-\-pid\-from=\fISOURCE\fR
Where to look for PIDs of the running programs.
.TP
+\fB\-\-restart\-on\-exit=\fR[\fB!\fR]\fISTATUS\fR[\fB,\fISTATUS\fR...]
+This option takes effect when used together with
+\fB\-\-sentinel\fR. If the program terminates with one of status
+codes listed as the argument to this option, it will be immediately
+restarted. The exclamation mark at the start of the list inverts the
+set, e.g. \fB\-\-restart\-on\-exit='!0,1'\fR means restart unless the
+program exit code is 0 or 1. Note the use of quotation to prevent the
+\fB!\fR from being interpreted by the shell.
+.TP
+\fB\-\-restart\-on\-signal=\fR[\fB!\fR]\fISIG\fR[\fB,\fISIG\fR...]
+This option takes effect when used together with
+\fB\-\-sentinel\fR. If the program terminates due to receiving one of
+the signals from this list, it will be immediately restarted. Each
+\fISIG\fR is either a signal number, or a signal name, as listed in
+.BR signal (7).
+The \fBSIG\fR prefix can be omitted from the signal name. Names are
+case-insensitive. Thus, \fB1\fR, \fBHUP\fR, \fBSIGHUP\fR, and
+\fBsighup\fR all stand for the same signal.
+.sp
+The exclamation mark at the start of the list complements the signal
+set, so that e.g. \fB\-\-restart\-on\-signal='!TERM,QUIT,INT'\fR will
+restart the program unless it terminates on one of the listed signals.
+.TP
\fB\-\-sentinel\fR
\fIPROGRAM\fR runs in foreground; disconnect from the controlling
-terminal, run it and act as a sentinel.
+terminal, start it and run in background until it terminates. The
+program's stdout and stderr are sent to the syslog facility
+\fBdaemon\fR, priorities \fBinfo\fR and \fBerr\fR, correspondingly.
+
+See the options \fB\-\-restart\-on\-exit\fR and
+\fB\-\-restart\-on\-signal\fR for details on how to restart the program.
.TP
\fB\-\-signal\-reload=\fISIG\fR
Signal to send on reload (default: \fBSIGHUP\fR). Setting it to 0 is
diff --git a/src/genrc.c b/src/genrc.c
index ae3070d..9052987 100644
--- a/src/genrc.c
+++ b/src/genrc.c
@@ -25,26 +25,30 @@ enum {
OPT_SIGNAL_RELOAD,
OPT_NO_RELOAD,
OPT_SIGNAL_STOP,
- OPT_CREATE_PIDFILE
+ OPT_CREATE_PIDFILE,
+ OPT_RESTART_ON_EXIT,
+ OPT_RESTART_ON_SIGNAL,
};
struct option longopts[] = {
- { "help", no_argument, 0, 'h' },
- { "usage", no_argument, 0, OPT_USAGE },
- { "command", required_argument, 0, 'c' },
- { "program", required_argument, 0, 'p' },
- { "pid-from", required_argument, 0, 'P' },
- { "pidfile", required_argument, 0, 'F' },
- { "timeout", required_argument, 0, 't' },
- { "signal-reload", required_argument, 0, OPT_SIGNAL_RELOAD },
- { "no-reload", no_argument, 0, OPT_NO_RELOAD },
- { "signal-stop", required_argument, 0, OPT_SIGNAL_STOP },
- { "sentinel", no_argument, 0, 'S' },
- { "create-pidfile", required_argument, 0, OPT_CREATE_PIDFILE },
- { "version", no_argument, 0, OPT_VERSION },
- { "verbose", no_argument, 0, 'v' },
- { "user", required_argument, 0, 'u' },
- { "group", required_argument, 0, 'g' },
+ { "help", no_argument, 0, 'h' },
+ { "usage", no_argument, 0, OPT_USAGE },
+ { "command", required_argument, 0, 'c' },
+ { "program", required_argument, 0, 'p' },
+ { "pid-from", required_argument, 0, 'P' },
+ { "pidfile", required_argument, 0, 'F' },
+ { "timeout", required_argument, 0, 't' },
+ { "signal-reload", required_argument, 0, OPT_SIGNAL_RELOAD },
+ { "no-reload", no_argument, 0, OPT_NO_RELOAD },
+ { "signal-stop", required_argument, 0, OPT_SIGNAL_STOP },
+ { "sentinel", no_argument, 0, 'S' },
+ { "create-pidfile", required_argument, 0, OPT_CREATE_PIDFILE },
+ { "version", no_argument, 0, OPT_VERSION },
+ { "verbose", no_argument, 0, 'v' },
+ { "user", required_argument, 0, 'u' },
+ { "group", required_argument, 0, 'g' },
+ { "restart-on-exit", required_argument, 0, OPT_RESTART_ON_EXIT },
+ { "restart-on-signal", required_argument, 0, OPT_RESTART_ON_SIGNAL },
{ NULL }
};
char shortopts[] = "c:hF:g:P:p:St:u:v";
@@ -127,16 +131,22 @@ is_numeric_str(char const *s)
}
int
-sig_name_to_str(char const *s)
+str_to_int(char const *s)
+{
+ char *end;
+ unsigned long n;
+ errno = 0;
+ n = strtoul(s, &end, 10);
+ if (errno || *end || n > UINT_MAX)
+ return -1;
+ return n;
+}
+
+int
+str_to_sig(char const *s)
{
if (is_numeric_str(s)) {
- char *end;
- unsigned long n;
- errno = 0;
- n = strtoul(s, &end, 10);
- if (errno || *end || n > UINT_MAX)
- return -1;
- return n;
+ return str_to_int(s);
} else {
struct sigdefn *sd;
@@ -183,8 +193,6 @@ char const *help_msg[] = {
"",
" -t, --timeout=SECONDS time to wait for the program to start up or",
" terminate",
- " --sentinel PROGRAM runs in foreground; disconnect from the",
- " controlling terminal, run it and act as a sentinel",
" -P, --pid-from=SOURCE where to look for PIDs of the running programs",
" -F, --pidfile=NAME name of the PID file",
" (same as --pid-from=FILE:NAME)",
@@ -194,6 +202,17 @@ char const *help_msg[] = {
" --signal-stop=SIG signal to send in order to terminate the program",
" (default: SIGTERM)",
"",
+ "Sentinel mode:",
+ "",
+ " --sentinel PROGRAM runs in foreground; disconnect from the",
+ " controlling terminal, run it and act as a sentinel",
+ " --restart-on-exit=[!]CODE[,...]",
+ " restart the program if it exits with one of the",
+ " listed status codes",
+ " --restart-on-signal=[!]SIG[,...]",
+ " restart the program if it terminates on one of the",
+ " listed signals",
+ "",
"Informational options:",
"",
" -h, --help display this help list",
@@ -272,6 +291,8 @@ char const *usage_msg[] = {
"[--pid-from=SOURCE]",
"[--pidfile=PIDFILE]",
"[--program=PROGRAM]",
+ "[--restart-on-exit=[!]CODE[,...]]",
+ "[--restart-on-signal=[!]SIG[,...]]",
"[--sentinel]",
"[--signal-reload=SIG]",
"[--signal-stop=SIG]",
@@ -422,6 +443,12 @@ main(int argc, char **argv)
case 'S':
setenv("GENRC_SENTINEL", "1", 1);
break;
+ case OPT_RESTART_ON_EXIT:
+ add_restart_condition(RESTART_ON_EXIT, optarg);
+ break;
+ case OPT_RESTART_ON_SIGNAL:
+ add_restart_condition(RESTART_ON_SIGNAL, optarg);
+ break;
case OPT_NO_RELOAD:
no_reload = 1;
break;
@@ -450,7 +477,7 @@ main(int argc, char **argv)
if (no_reload)
genrc_no_reload = 1;
else if ((p = getenv("GENRC_SIGNAL_RELOAD")) != NULL) {
- genrc_signal_reload = sig_name_to_str(p);
+ genrc_signal_reload = str_to_sig(p);
if (genrc_signal_reload == -1)
usage_error("%s: invalid signal number", p);
else if (genrc_signal_reload == 0)
@@ -458,7 +485,7 @@ main(int argc, char **argv)
}
if ((p = getenv("GENRC_SIGNAL_STOP")) != NULL) {
- genrc_signal_stop = sig_name_to_str(p);
+ genrc_signal_stop = str_to_sig(p);
if (genrc_signal_stop <= 0)
usage_error("%s: invalid signal number", p);
}
diff --git a/src/genrc.h b/src/genrc.h
index 9842016..c6ee57b 100644
--- a/src/genrc.h
+++ b/src/genrc.h
@@ -63,6 +63,9 @@ pid_t strtopid(char const *str);
int pid_is_running(pid_t pid);
void runas(void);
+int str_to_sig(char const *);
+int str_to_int(char const *);
+
enum {
MATCH_REGEX, /* extended POSIX regexp match (default) */
@@ -109,7 +112,14 @@ void match_pcre_free(PROCSCANBUF buf);
int match_pcre(PROCSCANBUF buf, char const *arg);
+enum {
+ RESTART_ON_EXIT,
+ RESTART_ON_SIGNAL
+};
+void add_restart_condition(int type, char const *arg);
+
+
struct genrc_pid_closure {
char const *name;
int (*pid)(struct genrc_pid_closure *, PIDLIST *);
diff --git a/src/sentinel.c b/src/sentinel.c
index 59b89cc..33d3e06 100644
--- a/src/sentinel.c
+++ b/src/sentinel.c
@@ -8,6 +8,7 @@ There is NO WARRANTY, to the extent permitted by law.
#include <unistd.h>
#include <fcntl.h>
#include <syslog.h>
+#include <time.h>
static void
xpipe(int p[2])
@@ -31,6 +32,13 @@ write_pid_file(pid_t pid)
}
}
+static void
+unlink_pid_file(void)
+{
+ if (genrc_create_pidfile)
+ unlink(genrc_create_pidfile);
+}
+
#define LOGBUFSIZE 1024
struct log_buffer {
@@ -68,13 +76,99 @@ log_buffer_read(int fd, struct log_buffer *lb)
}
}
+struct restart_cond {
+ struct restart_cond *next;
+ int type;
+ int negate;
+ int numc;
+ int numv[1];
+};
+
+struct restart_cond *restart_head, *restart_tail;
+
+static int
+restart_on(int type, int num)
+{
+ struct restart_cond *cond;
+
+ for (cond = restart_head; cond; cond = cond->next) {
+ if (cond->type == type) {
+ int result = cond->negate;
+ int i;
+ for (i = 0; i < cond->numc; i++) {
+ if (cond->numv[i] == num) {
+ result = !result;
+ break;
+ }
+ }
+ if (result)
+ return 1;
+ }
+ }
+ return 0;
+}
+
+typedef int (*RESTART_STON)(char const *);
+
+static RESTART_STON restart_ston[] = { str_to_int, str_to_sig };
+static char const *restart_what[] = { "exit status", "signal" };
+
+void
+add_restart_condition(int type, char const *arg)
+{
+ struct wordsplit ws;
+ size_t i;
+ int negate = 0;
+ struct restart_cond *cond;
+ RESTART_STON ston = restart_ston[type];
+
+ if (arg[0] == '!') {
+ negate = 1;
+ arg++;
+ }
+
+ ws.ws_delim = ",";
+ ws.ws_error = genrc_error;
+ if (wordsplit(arg, &ws,
+ WRDSF_NOCMD
+ | WRDSF_NOVAR
+ | WRDSF_DELIM
+ | WRDSF_ENOMEMABRT
+ | WRDSF_SHOWERR
+ | WRDSF_ERROR))
+ exit(1);
+
+ if (ws.ws_wordc == 0)
+ usage_error("empty restart condition");
+
+ cond = xmalloc(sizeof(*cond)
+ + (ws.ws_wordc - 1) * sizeof(cond->numv[0]));
+ cond->next = NULL;
+ cond->type = type;
+ cond->negate = negate;
+ cond->numc = ws.ws_wordc;
+ for (i = 0; i < ws.ws_wordc; i++) {
+ int n = ston(ws.ws_wordv[i]);
+ if (n == -1)
+ usage_error("bad %s: %s", restart_what[type],
+ ws.ws_wordv[i]);
+ cond->numv[i] = n;
+ }
+
+ if (restart_tail)
+ restart_tail->next = cond;
+ else
+ restart_head = cond;
+ restart_tail = cond;
+}
+
void
wait_loop(pid_t child, int out, int err)
{
fd_set rdset;
int nfd = (out > err ? out : err) + 1;
struct log_buffer obuf, ebuf;
-
+
openlog(genrc_program, LOG_PID, LOG_DAEMON);
log_buffer_init(&obuf, LOG_INFO);
log_buffer_init(&ebuf, LOG_ERR);
@@ -83,20 +177,24 @@ wait_loop(pid_t child, int out, int err)
int rc, status;
if (waitpid(child, &status, WNOHANG) == child) {
- if (genrc_create_pidfile)
- unlink(genrc_create_pidfile);
+ write_pid_file(getpid());
if (WIFEXITED(status)) {
+ int code = WEXITSTATUS(status);
syslog(LOG_INFO, "%s exited with status %d",
- genrc_program, WEXITSTATUS(status));
- _exit(WEXITSTATUS(status));
+ genrc_program, code);
+ if (restart_on(RESTART_ON_EXIT, code))
+ return;
} else if (WIFSIGNALED(status)) {
char const *coremsg = "";
+ int sig = WTERMSIG(status);
#ifdef WCOREDUMP
if (WCOREDUMP(status))
coremsg = " (core dumped)";
#endif
syslog(LOG_INFO, "%s terminated on signal %d%s",
- genrc_program, WTERMSIG(status), coremsg);
+ genrc_program, sig, coremsg);
+ if (restart_on(RESTART_ON_SIGNAL, sig))
+ return;
} else if (WIFSTOPPED(status)) {
syslog(LOG_INFO, "%s stopped on signal %d",
genrc_program, WSTOPSIG(status));
@@ -128,7 +226,8 @@ wait_loop(pid_t child, int out, int err)
log_buffer_read(err, &ebuf);
}
}
- _exit(1);
+ unlink_pid_file();
+ _exit(0);
}
pid_t
@@ -178,12 +277,78 @@ start_command(int p[])
p[1] = errpipe[0];
return pid;
}
+
+/* Restart rate control */
+static int volatile hup_received;
+
+static void
+sighup(int sig)
+{
+ hup_received++;
+}
+
+/* Consider the number of restarts during this interval */
+#define TESTTIME 2*60
+/* Stop respawning and go to sleep if it exceeds this number */
+#define MAXSPAWN 10
+/* Sleep that much seconds, then retry */
+#define SLEEPTIME 5*60
+
+struct ratectl {
+ time_t start_time; /* Start of the test interval */
+ unsigned failcount; /* Number of restarts done so far */
+};
+static void
+check_failure_rate(struct ratectl *rate)
+{
+ time_t now;
+ struct timeval start, stop, ttw;
+
+ time(&now);
+ if (rate->start_time + TESTTIME > now)
+ rate->failcount++;
+ else {
+ rate->failcount = 0;
+ rate->start_time = now;
+ }
+
+ if (rate->failcount > MAXSPAWN) {
+ syslog(LOG_NOTICE,
+ "%s respawning too fast; disabled for %d minutes",
+ genrc_program, SLEEPTIME / 60);
+
+ gettimeofday(&stop, NULL);
+ stop.tv_sec += SLEEPTIME;
+ while (1) {
+ gettimeofday(&start, NULL);
+ if (timercmp(&start, &stop, >=))
+ break;
+ timersub(&stop, &start, &ttw);
+ if (select(0, NULL, NULL, NULL, &ttw) < 0) {
+ if (errno == EINTR) {
+ if (hup_received) {
+ hup_received = 0;
+ break;
+ }
+ } else {
+ system_error(errno, "select");
+ break;
+ }
+ }
+ }
+
+ rate->failcount = 0;
+ }
+}
+
int
sentinel(void)
{
pid_t pid;
int p[2];
+ struct ratectl ctl;
+ struct sigaction act;
/* Detach from the controlling terminal */
pid = fork();
@@ -205,10 +370,23 @@ sentinel(void)
_exit(0);
/* Grand-child */
- pid = start_command(p);
- if (pid == -1)
- _exit(127);
- wait_loop(pid, p[0], p[1]);
+ act.sa_handler = sighup;
+ act.sa_flags = 0;
+ sigemptyset(&act.sa_mask);
+ sigaction(SIGHUP, &act, NULL);
+
+ ctl.start_time = 0;
+ ctl.failcount = 0;
+ while (1) {
+ pid = start_command(p);
+ if (pid == -1)
+ _exit(127);
+ if (pid == 0)
+ break;
+ wait_loop(pid, p[0], p[1]);
+ check_failure_rate(&ctl);
+ syslog(LOG_INFO, "restarting %s", genrc_program);
+ }
_exit(1);
}

Return to:

Send suggestions and report system problems to the System administrator.