/* This file is part of genrc Copyryght (C) 2018-2020 Sergey Poznyakoff License GPLv3+: GNU GPL version 3 or later This is free software: you are free to change and redistribute it. There is NO WARRANTY, to the extent permitted by law. */ #include "genrc.h" #include #include static void xpipe(int p[2]) { if (pipe(p)) { system_error(errno, "pipe"); exit(1); } } static inline int max(int a, int b) { return a > b ? a : b; } static void write_pid_file(pid_t pid) { if (genrc_create_pidfile) { FILE *fp; fp = fopen(genrc_create_pidfile, "w"); fprintf(fp, "%lu\n", (unsigned long)pid); fclose(fp); } } static void unlink_pid_file(void) { if (genrc_create_pidfile) unlink(genrc_create_pidfile); } #define LOGBUFSIZE 1024 struct log_buffer { char buf[LOGBUFSIZE]; size_t pos; int prio; }; static void log_buffer_init(struct log_buffer *lb, int prio) { lb->buf[sizeof(lb->buf) - 1] = 0; lb->pos = 0; lb->prio = prio; } static int log_buffer_read(int fd, struct log_buffer *lb) { int rc; rc = read(fd, lb->buf + lb->pos, 1); if (rc == -1) { syslog(LOG_ERR, "read: %m"); return -1; } else if (rc == 1) { if (lb->pos == sizeof(lb->buf)-1) { syslog(lb->prio, "%s", lb->buf); lb->pos = 0; } else if (lb->buf[lb->pos] == '\n') { lb->buf[lb->pos] = 0; syslog(lb->prio, "%s", lb->buf); lb->pos = 0; } else lb->pos++; } else return -1; return 0; } struct restart_cond { struct restart_cond *next; int type; int negate; int numc; int numv[1]; }; struct restart_cond *restart_head, *restart_tail; static int restart_on(int type, int num) { struct restart_cond *cond; for (cond = restart_head; cond; cond = cond->next) { if (cond->type == type) { int result = cond->negate; int i; for (i = 0; i < cond->numc; i++) { if (cond->numv[i] == num) { result = !result; break; } } if (result) return 1; } } return 0; } typedef int (*RESTART_STON)(char const *); static RESTART_STON restart_ston[] = { str_to_int, str_to_sig }; static char const *restart_what[] = { "exit status", "signal" }; void add_restart_condition(int type, char const *arg) { struct wordsplit ws; size_t i; int negate = 0; struct restart_cond *cond; RESTART_STON ston = restart_ston[type]; if (arg[0] == '!') { negate = 1; arg++; } ws.ws_delim = ","; ws.ws_error = genrc_error; if (wordsplit(arg, &ws, WRDSF_NOCMD | WRDSF_NOVAR | WRDSF_DELIM | WRDSF_ENOMEMABRT | WRDSF_SHOWERR | WRDSF_ERROR)) exit(1); if (ws.ws_wordc == 0) usage_error("empty restart condition"); cond = xmalloc(sizeof(*cond) + (ws.ws_wordc - 1) * sizeof(cond->numv[0])); cond->next = NULL; cond->type = type; cond->negate = negate; cond->numc = ws.ws_wordc; for (i = 0; i < ws.ws_wordc; i++) { int n = ston(ws.ws_wordv[i]); if (n == -1) usage_error("bad %s: %s", restart_what[type], ws.ws_wordv[i]); cond->numv[i] = n; } if (restart_tail) restart_tail->next = cond; else restart_head = cond; restart_tail = cond; } static pid_t start_logger(int out, int err) { fd_set rdset; int nfd = (out > err ? out : err) + 1; struct log_buffer obuf, ebuf; pid_t pid = fork(); if (pid != 0) return pid; mf_proctitle_format ("logger [%s]", genrc_command); /* Actual logger starts here: */ genrc_openlog(); log_buffer_init(&obuf, LOG_INFO); log_buffer_init(&ebuf, LOG_ERR); while (1) { int rc; FD_ZERO(&rdset); nfd = 0; if (out != -1) { FD_SET(out, &rdset); nfd = out; } if (err != -1) { FD_SET(err, &rdset); if (err > out) nfd = err; } if (nfd == 0) break; rc = select(nfd + 1, &rdset, NULL, NULL, NULL); if (rc == -1) { if (errno == EINTR) continue; syslog(LOG_CRIT, "logger: select: %m"); exit(1); } if (FD_ISSET(out, &rdset)) { if (log_buffer_read(out, &obuf)) { close(out); out = -1; } } if (FD_ISSET(err, &rdset)) { if (log_buffer_read(err, &ebuf)) { close(err); err = -1; } } } exit(0); } /* Indices to the array of PIDs */ enum { PID_LOGGER, /* Slot for logger process PID. */ PID_CHILD /* Slot for child process PID. */ }; /* * Wait for one of the subprocesses to terminate. * If child terminates, check if its termination condition (status code * or signal number) matches declared restart conditions. * If logger terminates with abnormal status, terminate the child and * restart it unconditionally. * * The function returns if the child needs to be restarted. Otherwise, * it exits. */ void wait_loop(pid_t cpid[]) { int restart = 0; while (1) { int status; pid_t pid; pid = waitpid((pid_t)-1, &status, 0); if (pid == cpid[PID_CHILD]) { write_pid_file(getpid()); if (WIFEXITED(status)) { int code = WEXITSTATUS(status); syslog(LOG_INFO, "%s exited with status %d", genrc_program, code); if (!restart) restart = restart_on(RESTART_ON_EXIT, code); } else if (WIFSIGNALED(status)) { char const *coremsg = ""; int sig = WTERMSIG(status); #ifdef WCOREDUMP if (WCOREDUMP(status)) coremsg = " (core dumped)"; #endif syslog(LOG_INFO, "%s terminated on signal %d%s", genrc_program, sig, coremsg); if (!restart) restart = restart_on(RESTART_ON_SIGNAL, sig); } else if (WIFSTOPPED(status)) { syslog(LOG_INFO, "%s stopped on signal %d", genrc_program, WSTOPSIG(status)); } else { syslog(LOG_INFO, "%s terminated; status %d", genrc_program, status); } if (cpid[PID_LOGGER] != -1) kill(cpid[PID_LOGGER], SIGKILL); if (restart) return; break; } if (pid == cpid[PID_LOGGER]) { syslog(LOG_CRIT, "logger exited"); cpid[PID_LOGGER] = -1; if (WIFEXITED(status)) { if (WEXITSTATUS(status) == 0) { /* Wait for the child to exit. */ continue; } } // FIXME syslog(LOG_CRIT, "stopping %lu and restarting", (unsigned long) cpid[PID_CHILD]); kill(cpid[PID_CHILD], SIGTERM); restart = 1; continue; } } unlink_pid_file(); _exit(0); } pid_t start_command(int out, int err) { pid_t pid; pid = fork(); setsid(); if (pid == -1) { system_error(errno, "pipe"); return -1; } if (pid == 0) { int p[2]; p[0] = out; p[1] = err; spawn(p); } write_pid_file(pid); return pid; } /* Restart rate control */ static int volatile hup_received; static void sighup(int sig) { hup_received++; } /* Consider the number of restarts during this interval */ #define TESTTIME 2*60 /* Stop respawning and go to sleep if it exceeds this number */ #define MAXSPAWN 10 /* Sleep that much seconds, then retry */ #define SLEEPTIME 5*60 struct ratectl { time_t start_time; /* Start of the test interval */ unsigned failcount; /* Number of restarts done so far */ }; static void check_failure_rate(struct ratectl *rate) { time_t now; struct timeval start, stop, ttw; time(&now); if (rate->start_time + TESTTIME > now) rate->failcount++; else { rate->failcount = 0; rate->start_time = now; } if (rate->failcount > MAXSPAWN) { syslog(LOG_NOTICE, "%s respawning too fast; disabled for %d minutes", genrc_program, SLEEPTIME / 60); gettimeofday(&stop, NULL); stop.tv_sec += SLEEPTIME; while (1) { gettimeofday(&start, NULL); if (timercmp(&start, &stop, >=)) break; timersub(&stop, &start, &ttw); if (select(0, NULL, NULL, NULL, &ttw) < 0) { if (errno == EINTR) { if (hup_received) { hup_received = 0; break; } } else { system_error(errno, "select"); break; } } } rate->failcount = 0; } } int sentinel(void) { pid_t pid; struct ratectl ctl; struct sigaction act; /* Detach from the controlling terminal */ pid = fork(); if (pid == -1) { system_error(errno, "fork"); return -1; } if (pid) /* master */ return 0; /* Run as a session leader */ setsid(); pid = fork(); if (pid == -1) { system_error(errno, "fork"); exit(1); } if (pid) _exit(0); /* Grand-child */ mf_proctitle_format ("sentinel [%s]", genrc_command); act.sa_handler = sighup; act.sa_flags = 0; sigemptyset(&act.sa_mask); sigaction(SIGHUP, &act, NULL); ctl.start_time = 0; ctl.failcount = 0; while (1) { int errpipe[2], outpipe[2]; pid_t cpid[2]; xpipe(errpipe); xpipe(outpipe); cpid[PID_LOGGER] = start_logger(outpipe[0], errpipe[0]); if (cpid[PID_LOGGER] == -1) _exit(127); cpid[PID_CHILD] = start_command(outpipe[1], errpipe[1]); if (cpid[PID_CHILD] == -1) _exit(127); close(outpipe[0]); close(outpipe[1]); close(errpipe[0]); close(errpipe[1]); wait_loop(cpid); check_failure_rate(&ctl); syslog(LOG_INFO, "restarting %s", genrc_program); } _exit(1); }