summaryrefslogtreecommitdiffabout
path: root/src/sentinel.c
Side-by-side diff
Diffstat (limited to 'src/sentinel.c') (more/less context) (ignore whitespace changes)
-rw-r--r--src/sentinel.c200
1 files changed, 189 insertions, 11 deletions
diff --git a/src/sentinel.c b/src/sentinel.c
index 59b89cc..33d3e06 100644
--- a/src/sentinel.c
+++ b/src/sentinel.c
@@ -8,6 +8,7 @@ There is NO WARRANTY, to the extent permitted by law.
#include <unistd.h>
#include <fcntl.h>
#include <syslog.h>
+#include <time.h>
static void
xpipe(int p[2])
@@ -31,6 +32,13 @@ write_pid_file(pid_t pid)
}
}
+static void
+unlink_pid_file(void)
+{
+ if (genrc_create_pidfile)
+ unlink(genrc_create_pidfile);
+}
+
#define LOGBUFSIZE 1024
struct log_buffer {
@@ -68,13 +76,99 @@ log_buffer_read(int fd, struct log_buffer *lb)
}
}
+struct restart_cond {
+ struct restart_cond *next;
+ int type;
+ int negate;
+ int numc;
+ int numv[1];
+};
+
+struct restart_cond *restart_head, *restart_tail;
+
+static int
+restart_on(int type, int num)
+{
+ struct restart_cond *cond;
+
+ for (cond = restart_head; cond; cond = cond->next) {
+ if (cond->type == type) {
+ int result = cond->negate;
+ int i;
+ for (i = 0; i < cond->numc; i++) {
+ if (cond->numv[i] == num) {
+ result = !result;
+ break;
+ }
+ }
+ if (result)
+ return 1;
+ }
+ }
+ return 0;
+}
+
+typedef int (*RESTART_STON)(char const *);
+
+static RESTART_STON restart_ston[] = { str_to_int, str_to_sig };
+static char const *restart_what[] = { "exit status", "signal" };
+
+void
+add_restart_condition(int type, char const *arg)
+{
+ struct wordsplit ws;
+ size_t i;
+ int negate = 0;
+ struct restart_cond *cond;
+ RESTART_STON ston = restart_ston[type];
+
+ if (arg[0] == '!') {
+ negate = 1;
+ arg++;
+ }
+
+ ws.ws_delim = ",";
+ ws.ws_error = genrc_error;
+ if (wordsplit(arg, &ws,
+ WRDSF_NOCMD
+ | WRDSF_NOVAR
+ | WRDSF_DELIM
+ | WRDSF_ENOMEMABRT
+ | WRDSF_SHOWERR
+ | WRDSF_ERROR))
+ exit(1);
+
+ if (ws.ws_wordc == 0)
+ usage_error("empty restart condition");
+
+ cond = xmalloc(sizeof(*cond)
+ + (ws.ws_wordc - 1) * sizeof(cond->numv[0]));
+ cond->next = NULL;
+ cond->type = type;
+ cond->negate = negate;
+ cond->numc = ws.ws_wordc;
+ for (i = 0; i < ws.ws_wordc; i++) {
+ int n = ston(ws.ws_wordv[i]);
+ if (n == -1)
+ usage_error("bad %s: %s", restart_what[type],
+ ws.ws_wordv[i]);
+ cond->numv[i] = n;
+ }
+
+ if (restart_tail)
+ restart_tail->next = cond;
+ else
+ restart_head = cond;
+ restart_tail = cond;
+}
+
void
wait_loop(pid_t child, int out, int err)
{
fd_set rdset;
int nfd = (out > err ? out : err) + 1;
struct log_buffer obuf, ebuf;
-
+
openlog(genrc_program, LOG_PID, LOG_DAEMON);
log_buffer_init(&obuf, LOG_INFO);
log_buffer_init(&ebuf, LOG_ERR);
@@ -83,20 +177,24 @@ wait_loop(pid_t child, int out, int err)
int rc, status;
if (waitpid(child, &status, WNOHANG) == child) {
- if (genrc_create_pidfile)
- unlink(genrc_create_pidfile);
+ write_pid_file(getpid());
if (WIFEXITED(status)) {
+ int code = WEXITSTATUS(status);
syslog(LOG_INFO, "%s exited with status %d",
- genrc_program, WEXITSTATUS(status));
- _exit(WEXITSTATUS(status));
+ genrc_program, code);
+ if (restart_on(RESTART_ON_EXIT, code))
+ return;
} else if (WIFSIGNALED(status)) {
char const *coremsg = "";
+ int sig = WTERMSIG(status);
#ifdef WCOREDUMP
if (WCOREDUMP(status))
coremsg = " (core dumped)";
#endif
syslog(LOG_INFO, "%s terminated on signal %d%s",
- genrc_program, WTERMSIG(status), coremsg);
+ genrc_program, sig, coremsg);
+ if (restart_on(RESTART_ON_SIGNAL, sig))
+ return;
} else if (WIFSTOPPED(status)) {
syslog(LOG_INFO, "%s stopped on signal %d",
genrc_program, WSTOPSIG(status));
@@ -128,7 +226,8 @@ wait_loop(pid_t child, int out, int err)
log_buffer_read(err, &ebuf);
}
}
- _exit(1);
+ unlink_pid_file();
+ _exit(0);
}
pid_t
@@ -178,12 +277,78 @@ start_command(int p[])
p[1] = errpipe[0];
return pid;
}
+
+/* Restart rate control */
+static int volatile hup_received;
+
+static void
+sighup(int sig)
+{
+ hup_received++;
+}
+
+/* Consider the number of restarts during this interval */
+#define TESTTIME 2*60
+/* Stop respawning and go to sleep if it exceeds this number */
+#define MAXSPAWN 10
+/* Sleep that much seconds, then retry */
+#define SLEEPTIME 5*60
+
+struct ratectl {
+ time_t start_time; /* Start of the test interval */
+ unsigned failcount; /* Number of restarts done so far */
+};
+static void
+check_failure_rate(struct ratectl *rate)
+{
+ time_t now;
+ struct timeval start, stop, ttw;
+
+ time(&now);
+ if (rate->start_time + TESTTIME > now)
+ rate->failcount++;
+ else {
+ rate->failcount = 0;
+ rate->start_time = now;
+ }
+
+ if (rate->failcount > MAXSPAWN) {
+ syslog(LOG_NOTICE,
+ "%s respawning too fast; disabled for %d minutes",
+ genrc_program, SLEEPTIME / 60);
+
+ gettimeofday(&stop, NULL);
+ stop.tv_sec += SLEEPTIME;
+ while (1) {
+ gettimeofday(&start, NULL);
+ if (timercmp(&start, &stop, >=))
+ break;
+ timersub(&stop, &start, &ttw);
+ if (select(0, NULL, NULL, NULL, &ttw) < 0) {
+ if (errno == EINTR) {
+ if (hup_received) {
+ hup_received = 0;
+ break;
+ }
+ } else {
+ system_error(errno, "select");
+ break;
+ }
+ }
+ }
+
+ rate->failcount = 0;
+ }
+}
+
int
sentinel(void)
{
pid_t pid;
int p[2];
+ struct ratectl ctl;
+ struct sigaction act;
/* Detach from the controlling terminal */
pid = fork();
@@ -205,10 +370,23 @@ sentinel(void)
_exit(0);
/* Grand-child */
- pid = start_command(p);
- if (pid == -1)
- _exit(127);
- wait_loop(pid, p[0], p[1]);
+ act.sa_handler = sighup;
+ act.sa_flags = 0;
+ sigemptyset(&act.sa_mask);
+ sigaction(SIGHUP, &act, NULL);
+
+ ctl.start_time = 0;
+ ctl.failcount = 0;
+ while (1) {
+ pid = start_command(p);
+ if (pid == -1)
+ _exit(127);
+ if (pid == 0)
+ break;
+ wait_loop(pid, p[0], p[1]);
+ check_failure_rate(&ctl);
+ syslog(LOG_INFO, "restarting %s", genrc_program);
+ }
_exit(1);
}

Return to:

Send suggestions and report system problems to the System administrator.