summaryrefslogtreecommitdiffabout
authorSergey Poznyakoff <gray@gnu.org>2018-05-20 07:53:30 (GMT)
committer Sergey Poznyakoff <gray@gnu.org>2018-05-20 07:53:30 (GMT)
commitddb46c6aa42ada061e51c635c0230e4dc8eab881 (patch) (side-by-side diff)
treeb003ae6af354f553207981b4fc281e8f9e19c60e
parented8389beadb7cf1f8d95fe7addbc9ff2783f4d07 (diff)
downloadgenrc-ddb46c6aa42ada061e51c635c0230e4dc8eab881.tar.gz
genrc-ddb46c6aa42ada061e51c635c0230e4dc8eab881.tar.bz2
Sentinel mode: restart the program on certain conditions
* Makefile.am: Create the ChangeLog file from git log. * configure.ac: Request git2chg * src/com_start.c: Use sigaction instead of signal. * src/genrc.8: Document new options. * src/genrc.c: New options --restart-on-exit and --restart-on-signal. * src/genrc.h (str_to_sig, str_to_int): New prototypes. (add_restart_condition): New prototype. * src/sentinel.c (restart_on, add_restart_condition): (check_failure_rate): New functions. (wait_loop): Return if restart is requested. (sentinel): Restart the program if needed.
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--Makefile.am11
-rw-r--r--configure.ac4
-rw-r--r--src/com_start.c12
-rw-r--r--src/genrc.858
-rw-r--r--src/genrc.c85
-rw-r--r--src/genrc.h10
-rw-r--r--src/sentinel.c200
7 files changed, 332 insertions, 48 deletions
diff --git a/Makefile.am b/Makefile.am
index 31e9e5f..dac3cb3 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1 +1,12 @@
SUBDIRS = grecs src
+dist: ChangeLog
+.PHONY: ChangeLog
+ChangeLog:
+ $(AM_V_GEN)if test -d .git; then \
+ git log --pretty='format:%ct %an <%ae>%n%n%s%n%n%b%n' | \
+ awk -f $(top_srcdir)/@GRECS_SUBDIR@/build-aux/git2chg.awk \
+ > ChangeLog.tmp; \
+ cmp ChangeLog ChangeLog.tmp > /dev/null 2>&1 || \
+ mv ChangeLog.tmp ChangeLog; \
+ rm -f ChangeLog.tmp; \
+ fi
diff --git a/configure.ac b/configure.ac
index 5d36092..a568649 100644
--- a/configure.ac
+++ b/configure.ac
@@ -12,13 +12,13 @@
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with genrc. If not, see <http://www.gnu.org/licenses/>.
AC_PREREQ([2.69])
-AC_INIT([genrc], [1.0], [gray@gnu.org])
+AC_INIT([genrc], [1.0.90], [gray@gnu.org])
AC_CONFIG_SRCDIR([src/genrc.c])
AC_CONFIG_HEADERS([config.h])
AM_INIT_AUTOMAKE([1.11 foreign silent-rules])
AC_CONFIG_MACRO_DIR(grecs/am)
# Enable silent rules by default
@@ -34,13 +34,13 @@ AC_CHECK_LIB(pcre, main)
# Checks for header files.
AC_CHECK_HEADERS([getopt.h pcre.h])
# Checks for library functions.
AC_CHECK_FUNCS([getdtablesize])
-GRECS_SETUP(grecs, [all-parsers])
+GRECS_SETUP(grecs, [all-parsers git2chg])
AM_CONDITIONAL([COND_PCRE],
[test "$ac_cv_header_pcre_h" = yes && test "$ac_cv_lib_pcre_main" = yes])
if test -f /proc/$$/cmdline; then
DEFAULT_PID_SOURCE='"PROC"'
diff --git a/src/com_start.c b/src/com_start.c
index 5744e39..3a9dffc 100644
--- a/src/com_start.c
+++ b/src/com_start.c
@@ -40,15 +40,19 @@ sigchld(int sig)
int
timedwaitpid(pid_t pid, int *status)
{
struct timeval now, stoptime, ttw;
int rc = -1;
- SIGHANDLER oldsig;
-
- oldsig = signal(SIGCHLD, sigchld);
+ struct sigaction act, oldact;
+
+ act.sa_handler = sigchld;
+ act.sa_flags = 0;
+ sigemptyset(&act.sa_mask);
+ sigaction(SIGCHLD, &act, &oldact);
+
gettimeofday(&stoptime, NULL);
stoptime.tv_sec += genrc_timeout;
while (1) {
pid_t p;
p = waitpid(pid, status, WNOHANG);
@@ -70,13 +74,13 @@ timedwaitpid(pid_t pid, int *status)
system_error(errno, "select");
break;
}
}
}
- signal(SIGCHLD, oldsig);
+ sigaction(SIGCHLD, &oldact, NULL);
if (rc) {
kill(pid, SIGKILL);
}
return rc;
}
diff --git a/src/genrc.8 b/src/genrc.8
index 00522ee..959a00e 100644
--- a/src/genrc.8
+++ b/src/genrc.8
@@ -10,13 +10,13 @@
.\" but WITHOUT ANY WARRANTY; without even the implied warranty of
.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
.\" GNU General Public License for more details.
.\"
.\" You should have received a copy of the GNU General Public License
.\" along with genrc. If not, see <http://www.gnu.org/licenses/>.
-.TH GENRC 8 "May 17, 2018" "GENRC" "Genrc User Manual"
+.TH GENRC 8 "May 20, 2018" "GENRC" "Genrc User Manual"
.SH NAME
genrc \- generic system initialization script helper
.SH SYNOPSIS
.nh
.na
\fBgenrc\fR\
@@ -33,12 +33,14 @@ genrc \- generic system initialization script helper
[\fB\-\-group=\fIGROUP\fR[,\fIGROUP\fR...]]\
[\fB\-\-help\fR]\
[\fB\-\-no\-reload\fR]\
[\fB\-\-pid\-from=\fISOURCE\fR]\
[\fB\-\-pidfile=\fIPIDFILE\fR]\
[\fB\-\-program=\fIPROGRAM\fR]\
+ [\fB\-\-restart\-on\-exit=\fR[\fB!\fR]\fISTATUS\fR[\fB,\fISTATUS\fR...]]\
+ [\fB\-\-restart\-on\-signal=\fR[\fB!\fR]\fISIG\fR[\fB,\fISIG\fR...]]\
[\fB\-\-sentinel\fR]\
[\fB\-\-signal\-reload=\fISIG\fR]\
[\fB\-\-signal\-stop=\fISIG\fR]\
[\fB\-\-timeout=\fISECONDS\fR]\
[\fB\-\-user=\fIUSER\fR]\
[\fB\-\-usage\fR]\
@@ -98,12 +100,36 @@ with the priority \fBinfo\fR and the error with the priority
.PP
If the \fB\-\-create\-pidfile=\fIFILENAME\fR option is given together with
\fB\-\-sentinel\fR, the PID of the subsidiary command will be stored
in \fIFILE\fR. The file will be unlinked after the subsidiary command
terminates. Unless the \fB\-\-pid\-from\fR option is given,
\fB\-\-pid\-from=FILE:\fIFILENAME\fR will be assumed.
+.PP
+In sentinel mode, it is possible to restart the program if it
+terminates with a specific exit code or on a specific signal. This is
+controlled by the \fB\-\-restart\-on\-exit\fR and
+\fB\-\-restart\-on\-signal\fR options. Use this feature to ensure the
+service provided by the program won't get terminated because of
+hitting a bug or encountering an unforeseen external condition. For
+example, the following two options will ensure that the program will
+be terminated only if it exits with status 0 or it is terminated by
+SIGTERM or SIGQUIT signal:
+.EX
+--restart-on-exit='!0' --restart-on-signal='!TERM,QUIT'
+.EE
+.PP
+If restarts are requested, \fBgenrc\fR will control how often it has
+to restart the program using the same algorithm as
+.B init (8).
+Namely, if the program is restarted more than 10 times within two
+minutes, \fBgenrc\fR will disable subsequent restarts for the next
+5 minutes. If the \fB\-\-create\-pidfile\fR option was used, the
+PID of the controlling \fBgenrc\fR process will be stored in the
+file during that interval. If the \fBSIGHUP\fR signal is delivered
+during the sleep interval, the sleep will be broken prematurely and
+the program restarted again.
.SS status
In \fBstatus\fR mode \fBgenrc\fR verifies if the \fICOMMAND\fR is
already running and outputs its status on the standard output. To this
effect, it uses an abstraction called \fIPID source\fR, which allows
it to determine the PID of the program by its name of command line.
.PP
@@ -185,15 +211,43 @@ Makes \fBreload\fR equivalent to \fBrestart\fR.
\fB\-p\fR, \fB\-\-program=\fIPROGRAM\fR
Name of the program to run.
.TP
\fB\-P\fR, \fB\-\-pid\-from=\fISOURCE\fR
Where to look for PIDs of the running programs.
.TP
+\fB\-\-restart\-on\-exit=\fR[\fB!\fR]\fISTATUS\fR[\fB,\fISTATUS\fR...]
+This option takes effect when used together with
+\fB\-\-sentinel\fR. If the program terminates with one of status
+codes listed as the argument to this option, it will be immediately
+restarted. The exclamation mark at the start of the list inverts the
+set, e.g. \fB\-\-restart\-on\-exit='!0,1'\fR means restart unless the
+program exit code is 0 or 1. Note the use of quotation to prevent the
+\fB!\fR from being interpreted by the shell.
+.TP
+\fB\-\-restart\-on\-signal=\fR[\fB!\fR]\fISIG\fR[\fB,\fISIG\fR...]
+This option takes effect when used together with
+\fB\-\-sentinel\fR. If the program terminates due to receiving one of
+the signals from this list, it will be immediately restarted. Each
+\fISIG\fR is either a signal number, or a signal name, as listed in
+.BR signal (7).
+The \fBSIG\fR prefix can be omitted from the signal name. Names are
+case-insensitive. Thus, \fB1\fR, \fBHUP\fR, \fBSIGHUP\fR, and
+\fBsighup\fR all stand for the same signal.
+.sp
+The exclamation mark at the start of the list complements the signal
+set, so that e.g. \fB\-\-restart\-on\-signal='!TERM,QUIT,INT'\fR will
+restart the program unless it terminates on one of the listed signals.
+.TP
\fB\-\-sentinel\fR
\fIPROGRAM\fR runs in foreground; disconnect from the controlling
-terminal, run it and act as a sentinel.
+terminal, start it and run in background until it terminates. The
+program's stdout and stderr are sent to the syslog facility
+\fBdaemon\fR, priorities \fBinfo\fR and \fBerr\fR, correspondingly.
+
+See the options \fB\-\-restart\-on\-exit\fR and
+\fB\-\-restart\-on\-signal\fR for details on how to restart the program.
.TP
\fB\-\-signal\-reload=\fISIG\fR
Signal to send on reload (default: \fBSIGHUP\fR). Setting it to 0 is
equivalent to \fB\-\-no\-reload\fR.
.TP
\fB\-\-signal\-stop=\fISIG\fR
diff --git a/src/genrc.c b/src/genrc.c
index ae3070d..9052987 100644
--- a/src/genrc.c
+++ b/src/genrc.c
@@ -22,32 +22,36 @@ int genrc_verbose;
enum {
OPT_USAGE = 256,
OPT_VERSION,
OPT_SIGNAL_RELOAD,
OPT_NO_RELOAD,
OPT_SIGNAL_STOP,
- OPT_CREATE_PIDFILE
+ OPT_CREATE_PIDFILE,
+ OPT_RESTART_ON_EXIT,
+ OPT_RESTART_ON_SIGNAL,
};
struct option longopts[] = {
- { "help", no_argument, 0, 'h' },
- { "usage", no_argument, 0, OPT_USAGE },
- { "command", required_argument, 0, 'c' },
- { "program", required_argument, 0, 'p' },
- { "pid-from", required_argument, 0, 'P' },
- { "pidfile", required_argument, 0, 'F' },
- { "timeout", required_argument, 0, 't' },
- { "signal-reload", required_argument, 0, OPT_SIGNAL_RELOAD },
- { "no-reload", no_argument, 0, OPT_NO_RELOAD },
- { "signal-stop", required_argument, 0, OPT_SIGNAL_STOP },
- { "sentinel", no_argument, 0, 'S' },
- { "create-pidfile", required_argument, 0, OPT_CREATE_PIDFILE },
- { "version", no_argument, 0, OPT_VERSION },
- { "verbose", no_argument, 0, 'v' },
- { "user", required_argument, 0, 'u' },
- { "group", required_argument, 0, 'g' },
+ { "help", no_argument, 0, 'h' },
+ { "usage", no_argument, 0, OPT_USAGE },
+ { "command", required_argument, 0, 'c' },
+ { "program", required_argument, 0, 'p' },
+ { "pid-from", required_argument, 0, 'P' },
+ { "pidfile", required_argument, 0, 'F' },
+ { "timeout", required_argument, 0, 't' },
+ { "signal-reload", required_argument, 0, OPT_SIGNAL_RELOAD },
+ { "no-reload", no_argument, 0, OPT_NO_RELOAD },
+ { "signal-stop", required_argument, 0, OPT_SIGNAL_STOP },
+ { "sentinel", no_argument, 0, 'S' },
+ { "create-pidfile", required_argument, 0, OPT_CREATE_PIDFILE },
+ { "version", no_argument, 0, OPT_VERSION },
+ { "verbose", no_argument, 0, 'v' },
+ { "user", required_argument, 0, 'u' },
+ { "group", required_argument, 0, 'g' },
+ { "restart-on-exit", required_argument, 0, OPT_RESTART_ON_EXIT },
+ { "restart-on-signal", required_argument, 0, OPT_RESTART_ON_SIGNAL },
{ NULL }
};
char shortopts[] = "c:hF:g:P:p:St:u:v";
struct sigdefn {
char const *sig_name;
@@ -124,22 +128,28 @@ is_numeric_str(char const *s)
s++;
}
return 1;
}
int
-sig_name_to_str(char const *s)
+str_to_int(char const *s)
+{
+ char *end;
+ unsigned long n;
+ errno = 0;
+ n = strtoul(s, &end, 10);
+ if (errno || *end || n > UINT_MAX)
+ return -1;
+ return n;
+}
+
+int
+str_to_sig(char const *s)
{
if (is_numeric_str(s)) {
- char *end;
- unsigned long n;
- errno = 0;
- n = strtoul(s, &end, 10);
- if (errno || *end || n > UINT_MAX)
- return -1;
- return n;
+ return str_to_int(s);
} else {
struct sigdefn *sd;
for (sd = sigdefn; sd->sig_name; sd++) {
if (s[0] == 's' || s[0] == 'S') {
if (strcasecmp(sd->sig_name, s) == 0)
@@ -180,23 +190,32 @@ char const *help_msg[] = {
" run with this group(s) privileges",
"",
"Additional configuration:",
"",
" -t, --timeout=SECONDS time to wait for the program to start up or",
" terminate",
- " --sentinel PROGRAM runs in foreground; disconnect from the",
- " controlling terminal, run it and act as a sentinel",
" -P, --pid-from=SOURCE where to look for PIDs of the running programs",
" -F, --pidfile=NAME name of the PID file",
" (same as --pid-from=FILE:NAME)",
" --signal-reload=SIG signal to send on reload (default: SIGHUP)",
" setting to 0 is equivalent to --no-reload",
" --no-reload makes reload equivalent to restart",
" --signal-stop=SIG signal to send in order to terminate the program",
" (default: SIGTERM)",
"",
+ "Sentinel mode:",
+ "",
+ " --sentinel PROGRAM runs in foreground; disconnect from the",
+ " controlling terminal, run it and act as a sentinel",
+ " --restart-on-exit=[!]CODE[,...]",
+ " restart the program if it exits with one of the",
+ " listed status codes",
+ " --restart-on-signal=[!]SIG[,...]",
+ " restart the program if it terminates on one of the",
+ " listed signals",
+ "",
"Informational options:",
"",
" -h, --help display this help list",
" --usage display short usage information",
" --version display program version and exist",
"",
@@ -269,12 +288,14 @@ char const *usage_msg[] = {
"[--group GROUP[,GROUP...]]",
"[--help]",
"[--no-reload]",
"[--pid-from=SOURCE]",
"[--pidfile=PIDFILE]",
"[--program=PROGRAM]",
+ "[--restart-on-exit=[!]CODE[,...]]",
+ "[--restart-on-signal=[!]SIG[,...]]",
"[--sentinel]",
"[--signal-reload=SIG]",
"[--signal-stop=SIG]",
"[--timeout=SECONDS]",
"[--usage]",
"[--user=USER]",
@@ -419,12 +440,18 @@ main(int argc, char **argv)
case 't':
setenv("GENRC_TIMEOUT", optarg, 1);
break;
case 'S':
setenv("GENRC_SENTINEL", "1", 1);
break;
+ case OPT_RESTART_ON_EXIT:
+ add_restart_condition(RESTART_ON_EXIT, optarg);
+ break;
+ case OPT_RESTART_ON_SIGNAL:
+ add_restart_condition(RESTART_ON_SIGNAL, optarg);
+ break;
case OPT_NO_RELOAD:
no_reload = 1;
break;
case OPT_SIGNAL_RELOAD:
setenv("GENRC_SIGNAL_RELOAD", optarg, 1);
break;
@@ -447,21 +474,21 @@ main(int argc, char **argv)
if ((p = getenv("GENRC_PROGRAM")) != NULL)
genrc_program = p;
if (no_reload)
genrc_no_reload = 1;
else if ((p = getenv("GENRC_SIGNAL_RELOAD")) != NULL) {
- genrc_signal_reload = sig_name_to_str(p);
+ genrc_signal_reload = str_to_sig(p);
if (genrc_signal_reload == -1)
usage_error("%s: invalid signal number", p);
else if (genrc_signal_reload == 0)
genrc_no_reload = 1;
}
if ((p = getenv("GENRC_SIGNAL_STOP")) != NULL) {
- genrc_signal_stop = sig_name_to_str(p);
+ genrc_signal_stop = str_to_sig(p);
if (genrc_signal_stop <= 0)
usage_error("%s: invalid signal number", p);
}
if ((p = getenv("GENRC_TIMEOUT")) != NULL) {
char *end;
diff --git a/src/genrc.h b/src/genrc.h
index 9842016..c6ee57b 100644
--- a/src/genrc.h
+++ b/src/genrc.h
@@ -60,12 +60,15 @@ void pidlist_kill(PIDLIST *plist, int sig);
pid_t strtopid(char const *str);
int pid_is_running(pid_t pid);
void runas(void);
+int str_to_sig(char const *);
+int str_to_int(char const *);
+
enum {
MATCH_REGEX, /* extended POSIX regexp match (default) */
MATCH_PCRE, /* PCRE match (not implemented) */
MATCH_GLOB, /* glob pattern match */
MATCH_EXACT, /* exact match */
@@ -106,13 +109,20 @@ int match_regex(PROCSCANBUF buf, char const *arg);
void match_pcre_init(PROCSCANBUF buf, char const *pattern);
void match_pcre_free(PROCSCANBUF buf);
int match_pcre(PROCSCANBUF buf, char const *arg);
+enum {
+ RESTART_ON_EXIT,
+ RESTART_ON_SIGNAL
+};
+void add_restart_condition(int type, char const *arg);
+
+
struct genrc_pid_closure {
char const *name;
int (*pid)(struct genrc_pid_closure *, PIDLIST *);
};
typedef struct genrc_pid_closure GENRC_PID_CLOSURE;
diff --git a/src/sentinel.c b/src/sentinel.c
index 59b89cc..33d3e06 100644
--- a/src/sentinel.c
+++ b/src/sentinel.c
@@ -5,12 +5,13 @@ This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.
*/
#include "genrc.h"
#include <unistd.h>
#include <fcntl.h>
#include <syslog.h>
+#include <time.h>
static void
xpipe(int p[2])
{
if (pipe(p)) {
system_error(errno, "pipe");
@@ -28,12 +29,19 @@ write_pid_file(pid_t pid)
fp = fopen(genrc_create_pidfile, "w");
fprintf(fp, "%lu\n", (unsigned long)pid);
fclose(fp);
}
}
+static void
+unlink_pid_file(void)
+{
+ if (genrc_create_pidfile)
+ unlink(genrc_create_pidfile);
+}
+
#define LOGBUFSIZE 1024
struct log_buffer {
char buf[LOGBUFSIZE];
size_t pos;
int prio;
@@ -65,41 +73,131 @@ log_buffer_read(int fd, struct log_buffer *lb)
lb->pos = 0;
} else
lb->pos++;
}
}
+struct restart_cond {
+ struct restart_cond *next;
+ int type;
+ int negate;
+ int numc;
+ int numv[1];
+};
+
+struct restart_cond *restart_head, *restart_tail;
+
+static int
+restart_on(int type, int num)
+{
+ struct restart_cond *cond;
+
+ for (cond = restart_head; cond; cond = cond->next) {
+ if (cond->type == type) {
+ int result = cond->negate;
+ int i;
+ for (i = 0; i < cond->numc; i++) {
+ if (cond->numv[i] == num) {
+ result = !result;
+ break;
+ }
+ }
+ if (result)
+ return 1;
+ }
+ }
+ return 0;
+}
+
+typedef int (*RESTART_STON)(char const *);
+
+static RESTART_STON restart_ston[] = { str_to_int, str_to_sig };
+static char const *restart_what[] = { "exit status", "signal" };
+
+void
+add_restart_condition(int type, char const *arg)
+{
+ struct wordsplit ws;
+ size_t i;
+ int negate = 0;
+ struct restart_cond *cond;
+ RESTART_STON ston = restart_ston[type];
+
+ if (arg[0] == '!') {
+ negate = 1;
+ arg++;
+ }
+
+ ws.ws_delim = ",";
+ ws.ws_error = genrc_error;
+ if (wordsplit(arg, &ws,
+ WRDSF_NOCMD
+ | WRDSF_NOVAR
+ | WRDSF_DELIM
+ | WRDSF_ENOMEMABRT
+ | WRDSF_SHOWERR
+ | WRDSF_ERROR))
+ exit(1);
+
+ if (ws.ws_wordc == 0)
+ usage_error("empty restart condition");
+
+ cond = xmalloc(sizeof(*cond)
+ + (ws.ws_wordc - 1) * sizeof(cond->numv[0]));
+ cond->next = NULL;
+ cond->type = type;
+ cond->negate = negate;
+ cond->numc = ws.ws_wordc;
+ for (i = 0; i < ws.ws_wordc; i++) {
+ int n = ston(ws.ws_wordv[i]);
+ if (n == -1)
+ usage_error("bad %s: %s", restart_what[type],
+ ws.ws_wordv[i]);
+ cond->numv[i] = n;
+ }
+
+ if (restart_tail)
+ restart_tail->next = cond;
+ else
+ restart_head = cond;
+ restart_tail = cond;
+}
+
void
wait_loop(pid_t child, int out, int err)
{
fd_set rdset;
int nfd = (out > err ? out : err) + 1;
struct log_buffer obuf, ebuf;
-
+
openlog(genrc_program, LOG_PID, LOG_DAEMON);
log_buffer_init(&obuf, LOG_INFO);
log_buffer_init(&ebuf, LOG_ERR);
while (1) {
int rc, status;
if (waitpid(child, &status, WNOHANG) == child) {
- if (genrc_create_pidfile)
- unlink(genrc_create_pidfile);
+ write_pid_file(getpid());
if (WIFEXITED(status)) {
+ int code = WEXITSTATUS(status);
syslog(LOG_INFO, "%s exited with status %d",
- genrc_program, WEXITSTATUS(status));
- _exit(WEXITSTATUS(status));
+ genrc_program, code);
+ if (restart_on(RESTART_ON_EXIT, code))
+ return;
} else if (WIFSIGNALED(status)) {
char const *coremsg = "";
+ int sig = WTERMSIG(status);
#ifdef WCOREDUMP
if (WCOREDUMP(status))
coremsg = " (core dumped)";
#endif
syslog(LOG_INFO, "%s terminated on signal %d%s",
- genrc_program, WTERMSIG(status), coremsg);
+ genrc_program, sig, coremsg);
+ if (restart_on(RESTART_ON_SIGNAL, sig))
+ return;
} else if (WIFSTOPPED(status)) {
syslog(LOG_INFO, "%s stopped on signal %d",
genrc_program, WSTOPSIG(status));
} else {
syslog(LOG_INFO, "%s terminated; status %d",
genrc_program, rc);
@@ -125,13 +223,14 @@ wait_loop(pid_t child, int out, int err)
log_buffer_read(out, &obuf);
}
if (FD_ISSET(err, &rdset)) {
log_buffer_read(err, &ebuf);
}
}
- _exit(1);
+ unlink_pid_file();
+ _exit(0);
}
pid_t
start_command(int p[])
{
int errpipe[2], outpipe[2];
@@ -175,18 +274,84 @@ start_command(int p[])
close(errpipe[1]);
p[0] = outpipe[0];
p[1] = errpipe[0];
return pid;
}
+
+/* Restart rate control */
+static int volatile hup_received;
+
+static void
+sighup(int sig)
+{
+ hup_received++;
+}
+
+/* Consider the number of restarts during this interval */
+#define TESTTIME 2*60
+/* Stop respawning and go to sleep if it exceeds this number */
+#define MAXSPAWN 10
+/* Sleep that much seconds, then retry */
+#define SLEEPTIME 5*60
+
+struct ratectl {
+ time_t start_time; /* Start of the test interval */
+ unsigned failcount; /* Number of restarts done so far */
+};
+static void
+check_failure_rate(struct ratectl *rate)
+{
+ time_t now;
+ struct timeval start, stop, ttw;
+
+ time(&now);
+ if (rate->start_time + TESTTIME > now)
+ rate->failcount++;
+ else {
+ rate->failcount = 0;
+ rate->start_time = now;
+ }
+
+ if (rate->failcount > MAXSPAWN) {
+ syslog(LOG_NOTICE,
+ "%s respawning too fast; disabled for %d minutes",
+ genrc_program, SLEEPTIME / 60);
+
+ gettimeofday(&stop, NULL);
+ stop.tv_sec += SLEEPTIME;
+ while (1) {
+ gettimeofday(&start, NULL);
+ if (timercmp(&start, &stop, >=))
+ break;
+ timersub(&stop, &start, &ttw);
+ if (select(0, NULL, NULL, NULL, &ttw) < 0) {
+ if (errno == EINTR) {
+ if (hup_received) {
+ hup_received = 0;
+ break;
+ }
+ } else {
+ system_error(errno, "select");
+ break;
+ }
+ }
+ }
+
+ rate->failcount = 0;
+ }
+}
+
int
sentinel(void)
{
pid_t pid;
int p[2];
+ struct ratectl ctl;
+ struct sigaction act;
/* Detach from the controlling terminal */
pid = fork();
if (pid == -1) {
system_error(errno, "fork");
return -1;
@@ -202,14 +367,27 @@ sentinel(void)
exit(1);
}
if (pid)
_exit(0);
/* Grand-child */
- pid = start_command(p);
- if (pid == -1)
- _exit(127);
- wait_loop(pid, p[0], p[1]);
+ act.sa_handler = sighup;
+ act.sa_flags = 0;
+ sigemptyset(&act.sa_mask);
+ sigaction(SIGHUP, &act, NULL);
+
+ ctl.start_time = 0;
+ ctl.failcount = 0;
+ while (1) {
+ pid = start_command(p);
+ if (pid == -1)
+ _exit(127);
+ if (pid == 0)
+ break;
+ wait_loop(pid, p[0], p[1]);
+ check_failure_rate(&ctl);
+ syslog(LOG_INFO, "restarting %s", genrc_program);
+ }
_exit(1);
}

Return to:

Send suggestions and report system problems to the System administrator.