diff options
author | Sergey Poznyakoff <gray@gnu.org> | 2018-05-20 10:53:30 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org> | 2018-05-20 10:53:30 +0300 |
commit | ddb46c6aa42ada061e51c635c0230e4dc8eab881 (patch) | |
tree | b003ae6af354f553207981b4fc281e8f9e19c60e /src | |
parent | ed8389beadb7cf1f8d95fe7addbc9ff2783f4d07 (diff) | |
download | genrc-ddb46c6aa42ada061e51c635c0230e4dc8eab881.tar.gz genrc-ddb46c6aa42ada061e51c635c0230e4dc8eab881.tar.bz2 |
Sentinel mode: restart the program on certain conditions
* Makefile.am: Create the ChangeLog file from git log.
* configure.ac: Request git2chg
* src/com_start.c: Use sigaction instead of signal.
* src/genrc.8: Document new options.
* src/genrc.c: New options --restart-on-exit and --restart-on-signal.
* src/genrc.h (str_to_sig, str_to_int): New prototypes.
(add_restart_condition): New prototype.
* src/sentinel.c (restart_on, add_restart_condition):
(check_failure_rate): New functions.
(wait_loop): Return if restart is requested.
(sentinel): Restart the program if needed.
Diffstat (limited to 'src')
-rw-r--r-- | src/com_start.c | 12 | ||||
-rw-r--r-- | src/genrc.8 | 58 | ||||
-rw-r--r-- | src/genrc.c | 85 | ||||
-rw-r--r-- | src/genrc.h | 10 | ||||
-rw-r--r-- | src/sentinel.c | 200 |
5 files changed, 319 insertions, 46 deletions
diff --git a/src/com_start.c b/src/com_start.c index 5744e39..3a9dffc 100644 --- a/src/com_start.c +++ b/src/com_start.c @@ -45,5 +45,9 @@ timedwaitpid(pid_t pid, int *status) int rc = -1; - SIGHANDLER oldsig; - - oldsig = signal(SIGCHLD, sigchld); + struct sigaction act, oldact; + + act.sa_handler = sigchld; + act.sa_flags = 0; + sigemptyset(&act.sa_mask); + sigaction(SIGCHLD, &act, &oldact); + gettimeofday(&stoptime, NULL); @@ -75,3 +79,3 @@ timedwaitpid(pid_t pid, int *status) } - signal(SIGCHLD, oldsig); + sigaction(SIGCHLD, &oldact, NULL); if (rc) { diff --git a/src/genrc.8 b/src/genrc.8 index 00522ee..959a00e 100644 --- a/src/genrc.8 +++ b/src/genrc.8 @@ -15,3 +15,3 @@ .\" along with genrc. If not, see <http://www.gnu.org/licenses/>. -.TH GENRC 8 "May 17, 2018" "GENRC" "Genrc User Manual" +.TH GENRC 8 "May 20, 2018" "GENRC" "Genrc User Manual" .SH NAME @@ -38,2 +38,4 @@ genrc \- generic system initialization script helper [\fB\-\-program=\fIPROGRAM\fR]\ + [\fB\-\-restart\-on\-exit=\fR[\fB!\fR]\fISTATUS\fR[\fB,\fISTATUS\fR...]]\ + [\fB\-\-restart\-on\-signal=\fR[\fB!\fR]\fISIG\fR[\fB,\fISIG\fR...]]\ [\fB\-\-sentinel\fR]\ @@ -103,2 +105,26 @@ terminates. Unless the \fB\-\-pid\-from\fR option is given, \fB\-\-pid\-from=FILE:\fIFILENAME\fR will be assumed. +.PP +In sentinel mode, it is possible to restart the program if it +terminates with a specific exit code or on a specific signal. This is +controlled by the \fB\-\-restart\-on\-exit\fR and +\fB\-\-restart\-on\-signal\fR options. Use this feature to ensure the +service provided by the program won't get terminated because of +hitting a bug or encountering an unforeseen external condition. For +example, the following two options will ensure that the program will +be terminated only if it exits with status 0 or it is terminated by +SIGTERM or SIGQUIT signal: +.EX +--restart-on-exit='!0' --restart-on-signal='!TERM,QUIT' +.EE +.PP +If restarts are requested, \fBgenrc\fR will control how often it has +to restart the program using the same algorithm as +.B init (8). +Namely, if the program is restarted more than 10 times within two +minutes, \fBgenrc\fR will disable subsequent restarts for the next +5 minutes. If the \fB\-\-create\-pidfile\fR option was used, the +PID of the controlling \fBgenrc\fR process will be stored in the +file during that interval. If the \fBSIGHUP\fR signal is delivered +during the sleep interval, the sleep will be broken prematurely and +the program restarted again. .SS status @@ -190,5 +216,33 @@ Where to look for PIDs of the running programs. .TP +\fB\-\-restart\-on\-exit=\fR[\fB!\fR]\fISTATUS\fR[\fB,\fISTATUS\fR...] +This option takes effect when used together with +\fB\-\-sentinel\fR. If the program terminates with one of status +codes listed as the argument to this option, it will be immediately +restarted. The exclamation mark at the start of the list inverts the +set, e.g. \fB\-\-restart\-on\-exit='!0,1'\fR means restart unless the +program exit code is 0 or 1. Note the use of quotation to prevent the +\fB!\fR from being interpreted by the shell. +.TP +\fB\-\-restart\-on\-signal=\fR[\fB!\fR]\fISIG\fR[\fB,\fISIG\fR...] +This option takes effect when used together with +\fB\-\-sentinel\fR. If the program terminates due to receiving one of +the signals from this list, it will be immediately restarted. Each +\fISIG\fR is either a signal number, or a signal name, as listed in +.BR signal (7). +The \fBSIG\fR prefix can be omitted from the signal name. Names are +case-insensitive. Thus, \fB1\fR, \fBHUP\fR, \fBSIGHUP\fR, and +\fBsighup\fR all stand for the same signal. +.sp +The exclamation mark at the start of the list complements the signal +set, so that e.g. \fB\-\-restart\-on\-signal='!TERM,QUIT,INT'\fR will +restart the program unless it terminates on one of the listed signals. +.TP \fB\-\-sentinel\fR \fIPROGRAM\fR runs in foreground; disconnect from the controlling -terminal, run it and act as a sentinel. +terminal, start it and run in background until it terminates. The +program's stdout and stderr are sent to the syslog facility +\fBdaemon\fR, priorities \fBinfo\fR and \fBerr\fR, correspondingly. + +See the options \fB\-\-restart\-on\-exit\fR and +\fB\-\-restart\-on\-signal\fR for details on how to restart the program. .TP diff --git a/src/genrc.c b/src/genrc.c index ae3070d..9052987 100644 --- a/src/genrc.c +++ b/src/genrc.c @@ -27,3 +27,5 @@ enum { OPT_SIGNAL_STOP, - OPT_CREATE_PIDFILE + OPT_CREATE_PIDFILE, + OPT_RESTART_ON_EXIT, + OPT_RESTART_ON_SIGNAL, }; @@ -31,18 +33,20 @@ enum { struct option longopts[] = { - { "help", no_argument, 0, 'h' }, - { "usage", no_argument, 0, OPT_USAGE }, - { "command", required_argument, 0, 'c' }, - { "program", required_argument, 0, 'p' }, - { "pid-from", required_argument, 0, 'P' }, - { "pidfile", required_argument, 0, 'F' }, - { "timeout", required_argument, 0, 't' }, - { "signal-reload", required_argument, 0, OPT_SIGNAL_RELOAD }, - { "no-reload", no_argument, 0, OPT_NO_RELOAD }, - { "signal-stop", required_argument, 0, OPT_SIGNAL_STOP }, - { "sentinel", no_argument, 0, 'S' }, - { "create-pidfile", required_argument, 0, OPT_CREATE_PIDFILE }, - { "version", no_argument, 0, OPT_VERSION }, - { "verbose", no_argument, 0, 'v' }, - { "user", required_argument, 0, 'u' }, - { "group", required_argument, 0, 'g' }, + { "help", no_argument, 0, 'h' }, + { "usage", no_argument, 0, OPT_USAGE }, + { "command", required_argument, 0, 'c' }, + { "program", required_argument, 0, 'p' }, + { "pid-from", required_argument, 0, 'P' }, + { "pidfile", required_argument, 0, 'F' }, + { "timeout", required_argument, 0, 't' }, + { "signal-reload", required_argument, 0, OPT_SIGNAL_RELOAD }, + { "no-reload", no_argument, 0, OPT_NO_RELOAD }, + { "signal-stop", required_argument, 0, OPT_SIGNAL_STOP }, + { "sentinel", no_argument, 0, 'S' }, + { "create-pidfile", required_argument, 0, OPT_CREATE_PIDFILE }, + { "version", no_argument, 0, OPT_VERSION }, + { "verbose", no_argument, 0, 'v' }, + { "user", required_argument, 0, 'u' }, + { "group", required_argument, 0, 'g' }, + { "restart-on-exit", required_argument, 0, OPT_RESTART_ON_EXIT }, + { "restart-on-signal", required_argument, 0, OPT_RESTART_ON_SIGNAL }, { NULL } @@ -129,12 +133,18 @@ is_numeric_str(char const *s) int -sig_name_to_str(char const *s) +str_to_int(char const *s) +{ + char *end; + unsigned long n; + errno = 0; + n = strtoul(s, &end, 10); + if (errno || *end || n > UINT_MAX) + return -1; + return n; +} + +int +str_to_sig(char const *s) { if (is_numeric_str(s)) { - char *end; - unsigned long n; - errno = 0; - n = strtoul(s, &end, 10); - if (errno || *end || n > UINT_MAX) - return -1; - return n; + return str_to_int(s); } else { @@ -185,4 +195,2 @@ char const *help_msg[] = { " terminate", - " --sentinel PROGRAM runs in foreground; disconnect from the", - " controlling terminal, run it and act as a sentinel", " -P, --pid-from=SOURCE where to look for PIDs of the running programs", @@ -196,2 +204,13 @@ char const *help_msg[] = { "", + "Sentinel mode:", + "", + " --sentinel PROGRAM runs in foreground; disconnect from the", + " controlling terminal, run it and act as a sentinel", + " --restart-on-exit=[!]CODE[,...]", + " restart the program if it exits with one of the", + " listed status codes", + " --restart-on-signal=[!]SIG[,...]", + " restart the program if it terminates on one of the", + " listed signals", + "", "Informational options:", @@ -274,2 +293,4 @@ char const *usage_msg[] = { "[--program=PROGRAM]", + "[--restart-on-exit=[!]CODE[,...]]", + "[--restart-on-signal=[!]SIG[,...]]", "[--sentinel]", @@ -424,2 +445,8 @@ main(int argc, char **argv) break; + case OPT_RESTART_ON_EXIT: + add_restart_condition(RESTART_ON_EXIT, optarg); + break; + case OPT_RESTART_ON_SIGNAL: + add_restart_condition(RESTART_ON_SIGNAL, optarg); + break; case OPT_NO_RELOAD: @@ -452,3 +479,3 @@ main(int argc, char **argv) else if ((p = getenv("GENRC_SIGNAL_RELOAD")) != NULL) { - genrc_signal_reload = sig_name_to_str(p); + genrc_signal_reload = str_to_sig(p); if (genrc_signal_reload == -1) @@ -460,3 +487,3 @@ main(int argc, char **argv) if ((p = getenv("GENRC_SIGNAL_STOP")) != NULL) { - genrc_signal_stop = sig_name_to_str(p); + genrc_signal_stop = str_to_sig(p); if (genrc_signal_stop <= 0) diff --git a/src/genrc.h b/src/genrc.h index 9842016..c6ee57b 100644 --- a/src/genrc.h +++ b/src/genrc.h @@ -65,2 +65,5 @@ int pid_is_running(pid_t pid); void runas(void); +int str_to_sig(char const *); +int str_to_int(char const *); + @@ -111,3 +114,10 @@ int match_pcre(PROCSCANBUF buf, char const *arg); +enum { + RESTART_ON_EXIT, + RESTART_ON_SIGNAL +}; +void add_restart_condition(int type, char const *arg); + + struct genrc_pid_closure { diff --git a/src/sentinel.c b/src/sentinel.c index 59b89cc..33d3e06 100644 --- a/src/sentinel.c +++ b/src/sentinel.c @@ -10,2 +10,3 @@ There is NO WARRANTY, to the extent permitted by law. #include <syslog.h> +#include <time.h> @@ -33,2 +34,9 @@ write_pid_file(pid_t pid) +static void +unlink_pid_file(void) +{ + if (genrc_create_pidfile) + unlink(genrc_create_pidfile); +} + #define LOGBUFSIZE 1024 @@ -70,2 +78,88 @@ log_buffer_read(int fd, struct log_buffer *lb) +struct restart_cond { + struct restart_cond *next; + int type; + int negate; + int numc; + int numv[1]; +}; + +struct restart_cond *restart_head, *restart_tail; + +static int +restart_on(int type, int num) +{ + struct restart_cond *cond; + + for (cond = restart_head; cond; cond = cond->next) { + if (cond->type == type) { + int result = cond->negate; + int i; + for (i = 0; i < cond->numc; i++) { + if (cond->numv[i] == num) { + result = !result; + break; + } + } + if (result) + return 1; + } + } + return 0; +} + +typedef int (*RESTART_STON)(char const *); + +static RESTART_STON restart_ston[] = { str_to_int, str_to_sig }; +static char const *restart_what[] = { "exit status", "signal" }; + +void +add_restart_condition(int type, char const *arg) +{ + struct wordsplit ws; + size_t i; + int negate = 0; + struct restart_cond *cond; + RESTART_STON ston = restart_ston[type]; + + if (arg[0] == '!') { + negate = 1; + arg++; + } + + ws.ws_delim = ","; + ws.ws_error = genrc_error; + if (wordsplit(arg, &ws, + WRDSF_NOCMD + | WRDSF_NOVAR + | WRDSF_DELIM + | WRDSF_ENOMEMABRT + | WRDSF_SHOWERR + | WRDSF_ERROR)) + exit(1); + + if (ws.ws_wordc == 0) + usage_error("empty restart condition"); + + cond = xmalloc(sizeof(*cond) + + (ws.ws_wordc - 1) * sizeof(cond->numv[0])); + cond->next = NULL; + cond->type = type; + cond->negate = negate; + cond->numc = ws.ws_wordc; + for (i = 0; i < ws.ws_wordc; i++) { + int n = ston(ws.ws_wordv[i]); + if (n == -1) + usage_error("bad %s: %s", restart_what[type], + ws.ws_wordv[i]); + cond->numv[i] = n; + } + + if (restart_tail) + restart_tail->next = cond; + else + restart_head = cond; + restart_tail = cond; +} + void @@ -76,3 +170,3 @@ wait_loop(pid_t child, int out, int err) struct log_buffer obuf, ebuf; - + openlog(genrc_program, LOG_PID, LOG_DAEMON); @@ -85,10 +179,12 @@ wait_loop(pid_t child, int out, int err) if (waitpid(child, &status, WNOHANG) == child) { - if (genrc_create_pidfile) - unlink(genrc_create_pidfile); + write_pid_file(getpid()); if (WIFEXITED(status)) { + int code = WEXITSTATUS(status); syslog(LOG_INFO, "%s exited with status %d", - genrc_program, WEXITSTATUS(status)); - _exit(WEXITSTATUS(status)); + genrc_program, code); + if (restart_on(RESTART_ON_EXIT, code)) + return; } else if (WIFSIGNALED(status)) { char const *coremsg = ""; + int sig = WTERMSIG(status); #ifdef WCOREDUMP @@ -98,3 +194,5 @@ wait_loop(pid_t child, int out, int err) syslog(LOG_INFO, "%s terminated on signal %d%s", - genrc_program, WTERMSIG(status), coremsg); + genrc_program, sig, coremsg); + if (restart_on(RESTART_ON_SIGNAL, sig)) + return; } else if (WIFSTOPPED(status)) { @@ -130,3 +228,4 @@ wait_loop(pid_t child, int out, int err) } - _exit(1); + unlink_pid_file(); + _exit(0); } @@ -180,3 +279,67 @@ start_command(int p[]) } + +/* Restart rate control */ +static int volatile hup_received; + +static void +sighup(int sig) +{ + hup_received++; +} + +/* Consider the number of restarts during this interval */ +#define TESTTIME 2*60 +/* Stop respawning and go to sleep if it exceeds this number */ +#define MAXSPAWN 10 +/* Sleep that much seconds, then retry */ +#define SLEEPTIME 5*60 + +struct ratectl { + time_t start_time; /* Start of the test interval */ + unsigned failcount; /* Number of restarts done so far */ +}; +static void +check_failure_rate(struct ratectl *rate) +{ + time_t now; + struct timeval start, stop, ttw; + + time(&now); + if (rate->start_time + TESTTIME > now) + rate->failcount++; + else { + rate->failcount = 0; + rate->start_time = now; + } + + if (rate->failcount > MAXSPAWN) { + syslog(LOG_NOTICE, + "%s respawning too fast; disabled for %d minutes", + genrc_program, SLEEPTIME / 60); + + gettimeofday(&stop, NULL); + stop.tv_sec += SLEEPTIME; + while (1) { + gettimeofday(&start, NULL); + if (timercmp(&start, &stop, >=)) + break; + timersub(&stop, &start, &ttw); + if (select(0, NULL, NULL, NULL, &ttw) < 0) { + if (errno == EINTR) { + if (hup_received) { + hup_received = 0; + break; + } + } else { + system_error(errno, "select"); + break; + } + } + } + + rate->failcount = 0; + } +} + int @@ -186,2 +349,4 @@ sentinel(void) int p[2]; + struct ratectl ctl; + struct sigaction act; @@ -207,6 +372,19 @@ sentinel(void) /* Grand-child */ - pid = start_command(p); - if (pid == -1) - _exit(127); - wait_loop(pid, p[0], p[1]); + act.sa_handler = sighup; + act.sa_flags = 0; + sigemptyset(&act.sa_mask); + sigaction(SIGHUP, &act, NULL); + + ctl.start_time = 0; + ctl.failcount = 0; + while (1) { + pid = start_command(p); + if (pid == -1) + _exit(127); + if (pid == 0) + break; + wait_loop(pid, p[0], p[1]); + check_failure_rate(&ctl); + syslog(LOG_INFO, "restarting %s", genrc_program); + } _exit(1); |