diff options
-rw-r--r-- | Makefile.am | 11 | ||||
-rw-r--r-- | configure.ac | 4 | ||||
-rw-r--r-- | src/com_start.c | 12 | ||||
-rw-r--r-- | src/genrc.8 | 58 | ||||
-rw-r--r-- | src/genrc.c | 85 | ||||
-rw-r--r-- | src/genrc.h | 10 | ||||
-rw-r--r-- | src/sentinel.c | 200 |
7 files changed, 332 insertions, 48 deletions
diff --git a/Makefile.am b/Makefile.am index 31e9e5f..dac3cb3 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1 +1,12 @@ SUBDIRS = grecs src +dist: ChangeLog +.PHONY: ChangeLog +ChangeLog: + $(AM_V_GEN)if test -d .git; then \ + git log --pretty='format:%ct %an <%ae>%n%n%s%n%n%b%n' | \ + awk -f $(top_srcdir)/@GRECS_SUBDIR@/build-aux/git2chg.awk \ + > ChangeLog.tmp; \ + cmp ChangeLog ChangeLog.tmp > /dev/null 2>&1 || \ + mv ChangeLog.tmp ChangeLog; \ + rm -f ChangeLog.tmp; \ + fi diff --git a/configure.ac b/configure.ac index 5d36092..a568649 100644 --- a/configure.ac +++ b/configure.ac @@ -15,7 +15,7 @@ # along with genrc. If not, see <http://www.gnu.org/licenses/>. AC_PREREQ([2.69]) -AC_INIT([genrc], [1.0], [gray@gnu.org]) +AC_INIT([genrc], [1.0.90], [gray@gnu.org]) AC_CONFIG_SRCDIR([src/genrc.c]) AC_CONFIG_HEADERS([config.h]) AM_INIT_AUTOMAKE([1.11 foreign silent-rules]) @@ -37,7 +37,7 @@ AC_CHECK_HEADERS([getopt.h pcre.h]) # Checks for library functions. AC_CHECK_FUNCS([getdtablesize]) -GRECS_SETUP(grecs, [all-parsers]) +GRECS_SETUP(grecs, [all-parsers git2chg]) AM_CONDITIONAL([COND_PCRE], [test "$ac_cv_header_pcre_h" = yes && test "$ac_cv_lib_pcre_main" = yes]) diff --git a/src/com_start.c b/src/com_start.c index 5744e39..3a9dffc 100644 --- a/src/com_start.c +++ b/src/com_start.c @@ -43,9 +43,13 @@ timedwaitpid(pid_t pid, int *status) { struct timeval now, stoptime, ttw; int rc = -1; - SIGHANDLER oldsig; - - oldsig = signal(SIGCHLD, sigchld); + struct sigaction act, oldact; + + act.sa_handler = sigchld; + act.sa_flags = 0; + sigemptyset(&act.sa_mask); + sigaction(SIGCHLD, &act, &oldact); + gettimeofday(&stoptime, NULL); stoptime.tv_sec += genrc_timeout; while (1) { @@ -73,7 +77,7 @@ timedwaitpid(pid_t pid, int *status) } } - signal(SIGCHLD, oldsig); + sigaction(SIGCHLD, &oldact, NULL); if (rc) { kill(pid, SIGKILL); } diff --git a/src/genrc.8 b/src/genrc.8 index 00522ee..959a00e 100644 --- a/src/genrc.8 +++ b/src/genrc.8 @@ -13,7 +13,7 @@ .\" .\" You should have received a copy of the GNU General Public License .\" along with genrc. If not, see <http://www.gnu.org/licenses/>. -.TH GENRC 8 "May 17, 2018" "GENRC" "Genrc User Manual" +.TH GENRC 8 "May 20, 2018" "GENRC" "Genrc User Manual" .SH NAME genrc \- generic system initialization script helper .SH SYNOPSIS @@ -36,6 +36,8 @@ genrc \- generic system initialization script helper [\fB\-\-pid\-from=\fISOURCE\fR]\ [\fB\-\-pidfile=\fIPIDFILE\fR]\ [\fB\-\-program=\fIPROGRAM\fR]\ + [\fB\-\-restart\-on\-exit=\fR[\fB!\fR]\fISTATUS\fR[\fB,\fISTATUS\fR...]]\ + [\fB\-\-restart\-on\-signal=\fR[\fB!\fR]\fISIG\fR[\fB,\fISIG\fR...]]\ [\fB\-\-sentinel\fR]\ [\fB\-\-signal\-reload=\fISIG\fR]\ [\fB\-\-signal\-stop=\fISIG\fR]\ @@ -101,6 +103,30 @@ If the \fB\-\-create\-pidfile=\fIFILENAME\fR option is given together with in \fIFILE\fR. The file will be unlinked after the subsidiary command terminates. Unless the \fB\-\-pid\-from\fR option is given, \fB\-\-pid\-from=FILE:\fIFILENAME\fR will be assumed. +.PP +In sentinel mode, it is possible to restart the program if it +terminates with a specific exit code or on a specific signal. This is +controlled by the \fB\-\-restart\-on\-exit\fR and +\fB\-\-restart\-on\-signal\fR options. Use this feature to ensure the +service provided by the program won't get terminated because of +hitting a bug or encountering an unforeseen external condition. For +example, the following two options will ensure that the program will +be terminated only if it exits with status 0 or it is terminated by +SIGTERM or SIGQUIT signal: +.EX +--restart-on-exit='!0' --restart-on-signal='!TERM,QUIT' +.EE +.PP +If restarts are requested, \fBgenrc\fR will control how often it has +to restart the program using the same algorithm as +.B init (8). +Namely, if the program is restarted more than 10 times within two +minutes, \fBgenrc\fR will disable subsequent restarts for the next +5 minutes. If the \fB\-\-create\-pidfile\fR option was used, the +PID of the controlling \fBgenrc\fR process will be stored in the +file during that interval. If the \fBSIGHUP\fR signal is delivered +during the sleep interval, the sleep will be broken prematurely and +the program restarted again. .SS status In \fBstatus\fR mode \fBgenrc\fR verifies if the \fICOMMAND\fR is already running and outputs its status on the standard output. To this @@ -188,9 +214,37 @@ Name of the program to run. \fB\-P\fR, \fB\-\-pid\-from=\fISOURCE\fR Where to look for PIDs of the running programs. .TP +\fB\-\-restart\-on\-exit=\fR[\fB!\fR]\fISTATUS\fR[\fB,\fISTATUS\fR...] +This option takes effect when used together with +\fB\-\-sentinel\fR. If the program terminates with one of status +codes listed as the argument to this option, it will be immediately +restarted. The exclamation mark at the start of the list inverts the +set, e.g. \fB\-\-restart\-on\-exit='!0,1'\fR means restart unless the +program exit code is 0 or 1. Note the use of quotation to prevent the +\fB!\fR from being interpreted by the shell. +.TP +\fB\-\-restart\-on\-signal=\fR[\fB!\fR]\fISIG\fR[\fB,\fISIG\fR...] +This option takes effect when used together with +\fB\-\-sentinel\fR. If the program terminates due to receiving one of +the signals from this list, it will be immediately restarted. Each +\fISIG\fR is either a signal number, or a signal name, as listed in +.BR signal (7). +The \fBSIG\fR prefix can be omitted from the signal name. Names are +case-insensitive. Thus, \fB1\fR, \fBHUP\fR, \fBSIGHUP\fR, and +\fBsighup\fR all stand for the same signal. +.sp +The exclamation mark at the start of the list complements the signal +set, so that e.g. \fB\-\-restart\-on\-signal='!TERM,QUIT,INT'\fR will +restart the program unless it terminates on one of the listed signals. +.TP \fB\-\-sentinel\fR \fIPROGRAM\fR runs in foreground; disconnect from the controlling -terminal, run it and act as a sentinel. +terminal, start it and run in background until it terminates. The +program's stdout and stderr are sent to the syslog facility +\fBdaemon\fR, priorities \fBinfo\fR and \fBerr\fR, correspondingly. + +See the options \fB\-\-restart\-on\-exit\fR and +\fB\-\-restart\-on\-signal\fR for details on how to restart the program. .TP \fB\-\-signal\-reload=\fISIG\fR Signal to send on reload (default: \fBSIGHUP\fR). Setting it to 0 is diff --git a/src/genrc.c b/src/genrc.c index ae3070d..9052987 100644 --- a/src/genrc.c +++ b/src/genrc.c @@ -25,26 +25,30 @@ enum { OPT_SIGNAL_RELOAD, OPT_NO_RELOAD, OPT_SIGNAL_STOP, - OPT_CREATE_PIDFILE + OPT_CREATE_PIDFILE, + OPT_RESTART_ON_EXIT, + OPT_RESTART_ON_SIGNAL, }; struct option longopts[] = { - { "help", no_argument, 0, 'h' }, - { "usage", no_argument, 0, OPT_USAGE }, - { "command", required_argument, 0, 'c' }, - { "program", required_argument, 0, 'p' }, - { "pid-from", required_argument, 0, 'P' }, - { "pidfile", required_argument, 0, 'F' }, - { "timeout", required_argument, 0, 't' }, - { "signal-reload", required_argument, 0, OPT_SIGNAL_RELOAD }, - { "no-reload", no_argument, 0, OPT_NO_RELOAD }, - { "signal-stop", required_argument, 0, OPT_SIGNAL_STOP }, - { "sentinel", no_argument, 0, 'S' }, - { "create-pidfile", required_argument, 0, OPT_CREATE_PIDFILE }, - { "version", no_argument, 0, OPT_VERSION }, - { "verbose", no_argument, 0, 'v' }, - { "user", required_argument, 0, 'u' }, - { "group", required_argument, 0, 'g' }, + { "help", no_argument, 0, 'h' }, + { "usage", no_argument, 0, OPT_USAGE }, + { "command", required_argument, 0, 'c' }, + { "program", required_argument, 0, 'p' }, + { "pid-from", required_argument, 0, 'P' }, + { "pidfile", required_argument, 0, 'F' }, + { "timeout", required_argument, 0, 't' }, + { "signal-reload", required_argument, 0, OPT_SIGNAL_RELOAD }, + { "no-reload", no_argument, 0, OPT_NO_RELOAD }, + { "signal-stop", required_argument, 0, OPT_SIGNAL_STOP }, + { "sentinel", no_argument, 0, 'S' }, + { "create-pidfile", required_argument, 0, OPT_CREATE_PIDFILE }, + { "version", no_argument, 0, OPT_VERSION }, + { "verbose", no_argument, 0, 'v' }, + { "user", required_argument, 0, 'u' }, + { "group", required_argument, 0, 'g' }, + { "restart-on-exit", required_argument, 0, OPT_RESTART_ON_EXIT }, + { "restart-on-signal", required_argument, 0, OPT_RESTART_ON_SIGNAL }, { NULL } }; char shortopts[] = "c:hF:g:P:p:St:u:v"; @@ -127,16 +131,22 @@ is_numeric_str(char const *s) } int -sig_name_to_str(char const *s) +str_to_int(char const *s) +{ + char *end; + unsigned long n; + errno = 0; + n = strtoul(s, &end, 10); + if (errno || *end || n > UINT_MAX) + return -1; + return n; +} + +int +str_to_sig(char const *s) { if (is_numeric_str(s)) { - char *end; - unsigned long n; - errno = 0; - n = strtoul(s, &end, 10); - if (errno || *end || n > UINT_MAX) - return -1; - return n; + return str_to_int(s); } else { struct sigdefn *sd; @@ -183,8 +193,6 @@ char const *help_msg[] = { "", " -t, --timeout=SECONDS time to wait for the program to start up or", " terminate", - " --sentinel PROGRAM runs in foreground; disconnect from the", - " controlling terminal, run it and act as a sentinel", " -P, --pid-from=SOURCE where to look for PIDs of the running programs", " -F, --pidfile=NAME name of the PID file", " (same as --pid-from=FILE:NAME)", @@ -194,6 +202,17 @@ char const *help_msg[] = { " --signal-stop=SIG signal to send in order to terminate the program", " (default: SIGTERM)", "", + "Sentinel mode:", + "", + " --sentinel PROGRAM runs in foreground; disconnect from the", + " controlling terminal, run it and act as a sentinel", + " --restart-on-exit=[!]CODE[,...]", + " restart the program if it exits with one of the", + " listed status codes", + " --restart-on-signal=[!]SIG[,...]", + " restart the program if it terminates on one of the", + " listed signals", + "", "Informational options:", "", " -h, --help display this help list", @@ -272,6 +291,8 @@ char const *usage_msg[] = { "[--pid-from=SOURCE]", "[--pidfile=PIDFILE]", "[--program=PROGRAM]", + "[--restart-on-exit=[!]CODE[,...]]", + "[--restart-on-signal=[!]SIG[,...]]", "[--sentinel]", "[--signal-reload=SIG]", "[--signal-stop=SIG]", @@ -422,6 +443,12 @@ main(int argc, char **argv) case 'S': setenv("GENRC_SENTINEL", "1", 1); break; + case OPT_RESTART_ON_EXIT: + add_restart_condition(RESTART_ON_EXIT, optarg); + break; + case OPT_RESTART_ON_SIGNAL: + add_restart_condition(RESTART_ON_SIGNAL, optarg); + break; case OPT_NO_RELOAD: no_reload = 1; break; @@ -450,7 +477,7 @@ main(int argc, char **argv) if (no_reload) genrc_no_reload = 1; else if ((p = getenv("GENRC_SIGNAL_RELOAD")) != NULL) { - genrc_signal_reload = sig_name_to_str(p); + genrc_signal_reload = str_to_sig(p); if (genrc_signal_reload == -1) usage_error("%s: invalid signal number", p); else if (genrc_signal_reload == 0) @@ -458,7 +485,7 @@ main(int argc, char **argv) } if ((p = getenv("GENRC_SIGNAL_STOP")) != NULL) { - genrc_signal_stop = sig_name_to_str(p); + genrc_signal_stop = str_to_sig(p); if (genrc_signal_stop <= 0) usage_error("%s: invalid signal number", p); } diff --git a/src/genrc.h b/src/genrc.h index 9842016..c6ee57b 100644 --- a/src/genrc.h +++ b/src/genrc.h @@ -63,6 +63,9 @@ pid_t strtopid(char const *str); int pid_is_running(pid_t pid); void runas(void); +int str_to_sig(char const *); +int str_to_int(char const *); + enum { MATCH_REGEX, /* extended POSIX regexp match (default) */ @@ -109,7 +112,14 @@ void match_pcre_free(PROCSCANBUF buf); int match_pcre(PROCSCANBUF buf, char const *arg); +enum { + RESTART_ON_EXIT, + RESTART_ON_SIGNAL +}; +void add_restart_condition(int type, char const *arg); + + struct genrc_pid_closure { char const *name; int (*pid)(struct genrc_pid_closure *, PIDLIST *); diff --git a/src/sentinel.c b/src/sentinel.c index 59b89cc..33d3e06 100644 --- a/src/sentinel.c +++ b/src/sentinel.c @@ -8,6 +8,7 @@ There is NO WARRANTY, to the extent permitted by law. #include <unistd.h> #include <fcntl.h> #include <syslog.h> +#include <time.h> static void xpipe(int p[2]) @@ -31,6 +32,13 @@ write_pid_file(pid_t pid) } } +static void +unlink_pid_file(void) +{ + if (genrc_create_pidfile) + unlink(genrc_create_pidfile); +} + #define LOGBUFSIZE 1024 struct log_buffer { @@ -68,13 +76,99 @@ log_buffer_read(int fd, struct log_buffer *lb) } } +struct restart_cond { + struct restart_cond *next; + int type; + int negate; + int numc; + int numv[1]; +}; + +struct restart_cond *restart_head, *restart_tail; + +static int +restart_on(int type, int num) +{ + struct restart_cond *cond; + + for (cond = restart_head; cond; cond = cond->next) { + if (cond->type == type) { + int result = cond->negate; + int i; + for (i = 0; i < cond->numc; i++) { + if (cond->numv[i] == num) { + result = !result; + break; + } + } + if (result) + return 1; + } + } + return 0; +} + +typedef int (*RESTART_STON)(char const *); + +static RESTART_STON restart_ston[] = { str_to_int, str_to_sig }; +static char const *restart_what[] = { "exit status", "signal" }; + +void +add_restart_condition(int type, char const *arg) +{ + struct wordsplit ws; + size_t i; + int negate = 0; + struct restart_cond *cond; + RESTART_STON ston = restart_ston[type]; + + if (arg[0] == '!') { + negate = 1; + arg++; + } + + ws.ws_delim = ","; + ws.ws_error = genrc_error; + if (wordsplit(arg, &ws, + WRDSF_NOCMD + | WRDSF_NOVAR + | WRDSF_DELIM + | WRDSF_ENOMEMABRT + | WRDSF_SHOWERR + | WRDSF_ERROR)) + exit(1); + + if (ws.ws_wordc == 0) + usage_error("empty restart condition"); + + cond = xmalloc(sizeof(*cond) + + (ws.ws_wordc - 1) * sizeof(cond->numv[0])); + cond->next = NULL; + cond->type = type; + cond->negate = negate; + cond->numc = ws.ws_wordc; + for (i = 0; i < ws.ws_wordc; i++) { + int n = ston(ws.ws_wordv[i]); + if (n == -1) + usage_error("bad %s: %s", restart_what[type], + ws.ws_wordv[i]); + cond->numv[i] = n; + } + + if (restart_tail) + restart_tail->next = cond; + else + restart_head = cond; + restart_tail = cond; +} + void wait_loop(pid_t child, int out, int err) { fd_set rdset; int nfd = (out > err ? out : err) + 1; struct log_buffer obuf, ebuf; - + openlog(genrc_program, LOG_PID, LOG_DAEMON); log_buffer_init(&obuf, LOG_INFO); log_buffer_init(&ebuf, LOG_ERR); @@ -83,20 +177,24 @@ wait_loop(pid_t child, int out, int err) int rc, status; if (waitpid(child, &status, WNOHANG) == child) { - if (genrc_create_pidfile) - unlink(genrc_create_pidfile); + write_pid_file(getpid()); if (WIFEXITED(status)) { + int code = WEXITSTATUS(status); syslog(LOG_INFO, "%s exited with status %d", - genrc_program, WEXITSTATUS(status)); - _exit(WEXITSTATUS(status)); + genrc_program, code); + if (restart_on(RESTART_ON_EXIT, code)) + return; } else if (WIFSIGNALED(status)) { char const *coremsg = ""; + int sig = WTERMSIG(status); #ifdef WCOREDUMP if (WCOREDUMP(status)) coremsg = " (core dumped)"; #endif syslog(LOG_INFO, "%s terminated on signal %d%s", - genrc_program, WTERMSIG(status), coremsg); + genrc_program, sig, coremsg); + if (restart_on(RESTART_ON_SIGNAL, sig)) + return; } else if (WIFSTOPPED(status)) { syslog(LOG_INFO, "%s stopped on signal %d", genrc_program, WSTOPSIG(status)); @@ -128,7 +226,8 @@ wait_loop(pid_t child, int out, int err) log_buffer_read(err, &ebuf); } } - _exit(1); + unlink_pid_file(); + _exit(0); } pid_t @@ -178,12 +277,78 @@ start_command(int p[]) p[1] = errpipe[0]; return pid; } + +/* Restart rate control */ +static int volatile hup_received; + +static void +sighup(int sig) +{ + hup_received++; +} + +/* Consider the number of restarts during this interval */ +#define TESTTIME 2*60 +/* Stop respawning and go to sleep if it exceeds this number */ +#define MAXSPAWN 10 +/* Sleep that much seconds, then retry */ +#define SLEEPTIME 5*60 + +struct ratectl { + time_t start_time; /* Start of the test interval */ + unsigned failcount; /* Number of restarts done so far */ +}; +static void +check_failure_rate(struct ratectl *rate) +{ + time_t now; + struct timeval start, stop, ttw; + + time(&now); + if (rate->start_time + TESTTIME > now) + rate->failcount++; + else { + rate->failcount = 0; + rate->start_time = now; + } + + if (rate->failcount > MAXSPAWN) { + syslog(LOG_NOTICE, + "%s respawning too fast; disabled for %d minutes", + genrc_program, SLEEPTIME / 60); + + gettimeofday(&stop, NULL); + stop.tv_sec += SLEEPTIME; + while (1) { + gettimeofday(&start, NULL); + if (timercmp(&start, &stop, >=)) + break; + timersub(&stop, &start, &ttw); + if (select(0, NULL, NULL, NULL, &ttw) < 0) { + if (errno == EINTR) { + if (hup_received) { + hup_received = 0; + break; + } + } else { + system_error(errno, "select"); + break; + } + } + } + + rate->failcount = 0; + } +} + int sentinel(void) { pid_t pid; int p[2]; + struct ratectl ctl; + struct sigaction act; /* Detach from the controlling terminal */ pid = fork(); @@ -205,10 +370,23 @@ sentinel(void) _exit(0); /* Grand-child */ - pid = start_command(p); - if (pid == -1) - _exit(127); - wait_loop(pid, p[0], p[1]); + act.sa_handler = sighup; + act.sa_flags = 0; + sigemptyset(&act.sa_mask); + sigaction(SIGHUP, &act, NULL); + + ctl.start_time = 0; + ctl.failcount = 0; + while (1) { + pid = start_command(p); + if (pid == -1) + _exit(127); + if (pid == 0) + break; + wait_loop(pid, p[0], p[1]); + check_failure_rate(&ctl); + syslog(LOG_INFO, "restarting %s", genrc_program); + } _exit(1); } |