summaryrefslogtreecommitdiffabout
authorSergey Poznyakoff <gray@gnu.org>2019-08-14 06:53:57 (GMT)
committer Sergey Poznyakoff <gray@gnu.org>2019-08-14 06:53:57 (GMT)
commit3713593a82aabc7a0daaccf65b8405749c58d9c5 (patch) (side-by-side diff)
tree1c0f0422fcc3c360a157631c9d8b04deab21e8b7
parent0b879bfdb75fa698e0f33b9c39671dc4af7dbaa6 (diff)
downloadtallyman-3713593a82aabc7a0daaccf65b8405749c58d9c5.tar.gz
tallyman-3713593a82aabc7a0daaccf65b8405749c58d9c5.tar.bz2
Improve error diagnostics
* runcap: Upgrade. This version correctly captures output lines not terminated with a newline character. * src/tallyman.1: Update. * src/tallyman.c (main): Print error diagnostics if unable to run the program or if it exists with an improper status.
Diffstat (more/less context) (ignore whitespace changes)
m---------runcap0
-rw-r--r--src/tallyman.139
-rw-r--r--src/tallyman.c15
3 files changed, 49 insertions, 5 deletions
diff --git a/runcap b/runcap
-Subproject 048800a78f64808116bb9b943837062a67f586a
+Subproject 1309ae35b1ff7ed26a3ccc2fcdc6bd13bb494ce
diff --git a/src/tallyman.1 b/src/tallyman.1
index ce1d09d..57036e8 100644
--- a/src/tallyman.1
+++ b/src/tallyman.1
@@ -1,4 +1,4 @@
-.TH TALLYMAN 1 "August 13, 2019" "TALLYMAN" "Tallyman User Reference"
+.TH TALLYMAN 1 "August 14, 2019" "TALLYMAN" "Tallyman User Reference"
.SH NAME
tallyman \- health state collector for docker containers
.SH SYNOPSIS
@@ -37,11 +37,44 @@ The program must be configured to run periodically via the
statement in the
.BR Dockerfile .
.PP
+It is supposed that each container is responsible for certain
+.IR service .
+Each service is assigned a name. Multiple containers can run
+the same service (for example you can have several database
+containers).
+.PP
+Containers are configured to run
+.B tallyman
+as their
+.B HEALTHCHECK
+command. The
+.I SRVID
+argument supplies the name of the service the container is responsible
+for. The \fICOMMAND\fR and optional additional arguments supply the
+name of the actual health-checking program and its command line
+arguments.
+.B Tallyman
+will run this command, collect its standard error and standard output,
+pack them along with the program exit code in a JSON packet, and send
+this packet to the predefined address using HTTP POST request. It will
+then exit with the same code as the health-checking program it ran. To
+the container, the effect of running
+.B tallyman
+is the same as if it ran the health-checking program itself: error
+code, standard error and standard output are all preserved. On the
+other hand, they are copied to the collector listening on the
+predefined address outside the container.
+.PP
The data collector program
.BR stevedore (8)
must be listening at \fIHOST:PORT\fR. See its manual for
-details. Container default gateway is the default \fIHOST\fR.
-Default port is 8990.
+details.
+.PP
+In the absense of the
+.B \-s
+option,
+.B tallyman
+will send statistics to the container's default gateway, port 8990.
.SH OPTIONS
.TP
\fB\-d\fR, \fB\-\-debug\fR
diff --git a/src/tallyman.c b/src/tallyman.c
index df40fa2..c35cc9e 100644
--- a/src/tallyman.c
+++ b/src/tallyman.c
@@ -298,15 +298,25 @@ main(int argc, char **argv)
status = EX_FAILURE;
if (runcap(&rc, rcflags)) {
+ char const *emsg = strerror(errno);
json_object_set(obj, "status", json_new_bool(0));
json_object_set(obj, "error", json_new_bool(1));
- json_object_set(obj, "message",
- json_new_string(strerror(errno)));
+ json_object_set(obj, "message", json_new_string(emsg));
+ error("failed to run %s: %s", argv[0], emsg);
} else {
if (WIFEXITED(rc.rc_status)) {
status = WEXITSTATUS(rc.rc_status);
json_object_set(obj, "status",
json_new_bool(status == 0));
+ if (status
+ && rc.rc_cap[RUNCAP_STDERR].sc_nlines == 0) {
+ if (status == 127)
+ error("failed to run %s: %s", argv[0],
+ "program not found");
+ else
+ error("command %s exited with status %d",
+ argv[0], status);
+ }
} else {
char *msg = 0;
size_t siz = 0;
@@ -327,6 +337,7 @@ main(int argc, char **argv)
json_object_set(obj, "error", json_new_bool(1));
json_object_set(obj, "message", json_new_string(msg));
+ error("%s: %s", argv[0], msg);
free(msg);
}

Return to:

Send suggestions and report system problems to the System administrator.