diff options
Diffstat (limited to 'src/sentinel.c')
-rw-r--r-- | src/sentinel.c | 200 |
1 files changed, 189 insertions, 11 deletions
diff --git a/src/sentinel.c b/src/sentinel.c index 59b89cc..33d3e06 100644 --- a/src/sentinel.c +++ b/src/sentinel.c @@ -5,12 +5,13 @@ This is free software: you are free to change and redistribute it. There is NO WARRANTY, to the extent permitted by law. */ #include "genrc.h" #include <unistd.h> #include <fcntl.h> #include <syslog.h> +#include <time.h> static void xpipe(int p[2]) { if (pipe(p)) { system_error(errno, "pipe"); @@ -28,12 +29,19 @@ write_pid_file(pid_t pid) fp = fopen(genrc_create_pidfile, "w"); fprintf(fp, "%lu\n", (unsigned long)pid); fclose(fp); } } +static void +unlink_pid_file(void) +{ + if (genrc_create_pidfile) + unlink(genrc_create_pidfile); +} + #define LOGBUFSIZE 1024 struct log_buffer { char buf[LOGBUFSIZE]; size_t pos; int prio; @@ -65,41 +73,131 @@ log_buffer_read(int fd, struct log_buffer *lb) lb->pos = 0; } else lb->pos++; } } +struct restart_cond { + struct restart_cond *next; + int type; + int negate; + int numc; + int numv[1]; +}; + +struct restart_cond *restart_head, *restart_tail; + +static int +restart_on(int type, int num) +{ + struct restart_cond *cond; + + for (cond = restart_head; cond; cond = cond->next) { + if (cond->type == type) { + int result = cond->negate; + int i; + for (i = 0; i < cond->numc; i++) { + if (cond->numv[i] == num) { + result = !result; + break; + } + } + if (result) + return 1; + } + } + return 0; +} + +typedef int (*RESTART_STON)(char const *); + +static RESTART_STON restart_ston[] = { str_to_int, str_to_sig }; +static char const *restart_what[] = { "exit status", "signal" }; + +void +add_restart_condition(int type, char const *arg) +{ + struct wordsplit ws; + size_t i; + int negate = 0; + struct restart_cond *cond; + RESTART_STON ston = restart_ston[type]; + + if (arg[0] == '!') { + negate = 1; + arg++; + } + + ws.ws_delim = ","; + ws.ws_error = genrc_error; + if (wordsplit(arg, &ws, + WRDSF_NOCMD + | WRDSF_NOVAR + | WRDSF_DELIM + | WRDSF_ENOMEMABRT + | WRDSF_SHOWERR + | WRDSF_ERROR)) + exit(1); + + if (ws.ws_wordc == 0) + usage_error("empty restart condition"); + + cond = xmalloc(sizeof(*cond) + + (ws.ws_wordc - 1) * sizeof(cond->numv[0])); + cond->next = NULL; + cond->type = type; + cond->negate = negate; + cond->numc = ws.ws_wordc; + for (i = 0; i < ws.ws_wordc; i++) { + int n = ston(ws.ws_wordv[i]); + if (n == -1) + usage_error("bad %s: %s", restart_what[type], + ws.ws_wordv[i]); + cond->numv[i] = n; + } + + if (restart_tail) + restart_tail->next = cond; + else + restart_head = cond; + restart_tail = cond; +} + void wait_loop(pid_t child, int out, int err) { fd_set rdset; int nfd = (out > err ? out : err) + 1; struct log_buffer obuf, ebuf; - + openlog(genrc_program, LOG_PID, LOG_DAEMON); log_buffer_init(&obuf, LOG_INFO); log_buffer_init(&ebuf, LOG_ERR); while (1) { int rc, status; if (waitpid(child, &status, WNOHANG) == child) { - if (genrc_create_pidfile) - unlink(genrc_create_pidfile); + write_pid_file(getpid()); if (WIFEXITED(status)) { + int code = WEXITSTATUS(status); syslog(LOG_INFO, "%s exited with status %d", - genrc_program, WEXITSTATUS(status)); - _exit(WEXITSTATUS(status)); + genrc_program, code); + if (restart_on(RESTART_ON_EXIT, code)) + return; } else if (WIFSIGNALED(status)) { char const *coremsg = ""; + int sig = WTERMSIG(status); #ifdef WCOREDUMP if (WCOREDUMP(status)) coremsg = " (core dumped)"; #endif syslog(LOG_INFO, "%s terminated on signal %d%s", - genrc_program, WTERMSIG(status), coremsg); + genrc_program, sig, coremsg); + if (restart_on(RESTART_ON_SIGNAL, sig)) + return; } else if (WIFSTOPPED(status)) { syslog(LOG_INFO, "%s stopped on signal %d", genrc_program, WSTOPSIG(status)); } else { syslog(LOG_INFO, "%s terminated; status %d", genrc_program, rc); @@ -125,13 +223,14 @@ wait_loop(pid_t child, int out, int err) log_buffer_read(out, &obuf); } if (FD_ISSET(err, &rdset)) { log_buffer_read(err, &ebuf); } } - _exit(1); + unlink_pid_file(); + _exit(0); } pid_t start_command(int p[]) { int errpipe[2], outpipe[2]; @@ -175,18 +274,84 @@ start_command(int p[]) close(errpipe[1]); p[0] = outpipe[0]; p[1] = errpipe[0]; return pid; } + +/* Restart rate control */ +static int volatile hup_received; + +static void +sighup(int sig) +{ + hup_received++; +} + +/* Consider the number of restarts during this interval */ +#define TESTTIME 2*60 +/* Stop respawning and go to sleep if it exceeds this number */ +#define MAXSPAWN 10 +/* Sleep that much seconds, then retry */ +#define SLEEPTIME 5*60 + +struct ratectl { + time_t start_time; /* Start of the test interval */ + unsigned failcount; /* Number of restarts done so far */ +}; +static void +check_failure_rate(struct ratectl *rate) +{ + time_t now; + struct timeval start, stop, ttw; + + time(&now); + if (rate->start_time + TESTTIME > now) + rate->failcount++; + else { + rate->failcount = 0; + rate->start_time = now; + } + + if (rate->failcount > MAXSPAWN) { + syslog(LOG_NOTICE, + "%s respawning too fast; disabled for %d minutes", + genrc_program, SLEEPTIME / 60); + + gettimeofday(&stop, NULL); + stop.tv_sec += SLEEPTIME; + while (1) { + gettimeofday(&start, NULL); + if (timercmp(&start, &stop, >=)) + break; + timersub(&stop, &start, &ttw); + if (select(0, NULL, NULL, NULL, &ttw) < 0) { + if (errno == EINTR) { + if (hup_received) { + hup_received = 0; + break; + } + } else { + system_error(errno, "select"); + break; + } + } + } + + rate->failcount = 0; + } +} + int sentinel(void) { pid_t pid; int p[2]; + struct ratectl ctl; + struct sigaction act; /* Detach from the controlling terminal */ pid = fork(); if (pid == -1) { system_error(errno, "fork"); return -1; @@ -202,14 +367,27 @@ sentinel(void) exit(1); } if (pid) _exit(0); /* Grand-child */ - pid = start_command(p); - if (pid == -1) - _exit(127); - wait_loop(pid, p[0], p[1]); + act.sa_handler = sighup; + act.sa_flags = 0; + sigemptyset(&act.sa_mask); + sigaction(SIGHUP, &act, NULL); + + ctl.start_time = 0; + ctl.failcount = 0; + while (1) { + pid = start_command(p); + if (pid == -1) + _exit(127); + if (pid == 0) + break; + wait_loop(pid, p[0], p[1]); + check_failure_rate(&ctl); + syslog(LOG_INFO, "restarting %s", genrc_program); + } _exit(1); } |