diff options
author | Sergey Poznyakoff <gray@gnu.org> | 2019-08-14 09:53:57 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org> | 2019-08-14 09:53:57 +0300 |
commit | 3713593a82aabc7a0daaccf65b8405749c58d9c5 (patch) | |
tree | 1c0f0422fcc3c360a157631c9d8b04deab21e8b7 | |
parent | 0b879bfdb75fa698e0f33b9c39671dc4af7dbaa6 (diff) | |
download | tallyman-3713593a82aabc7a0daaccf65b8405749c58d9c5.tar.gz tallyman-3713593a82aabc7a0daaccf65b8405749c58d9c5.tar.bz2 |
Improve error diagnostics
* runcap: Upgrade. This version correctly captures output lines
not terminated with a newline character.
* src/tallyman.1: Update.
* src/tallyman.c (main): Print error diagnostics if unable to run
the program or if it exists with an improper status.
m--------- | runcap | 0 | ||||
-rw-r--r-- | src/tallyman.1 | 39 | ||||
-rw-r--r-- | src/tallyman.c | 15 |
3 files changed, 49 insertions, 5 deletions
diff --git a/runcap b/runcap | |||
Subproject 048800a78f64808116bb9b943837062a67f586a | Subproject 1309ae35b1ff7ed26a3ccc2fcdc6bd13bb494ce | ||
diff --git a/src/tallyman.1 b/src/tallyman.1 index ce1d09d..57036e8 100644 --- a/src/tallyman.1 +++ b/src/tallyman.1 | |||
@@ -1,4 +1,4 @@ | |||
1 | .TH TALLYMAN 1 "August 13, 2019" "TALLYMAN" "Tallyman User Reference" | 1 | .TH TALLYMAN 1 "August 14, 2019" "TALLYMAN" "Tallyman User Reference" |
2 | .SH NAME | 2 | .SH NAME |
3 | tallyman \- health state collector for docker containers | 3 | tallyman \- health state collector for docker containers |
4 | .SH SYNOPSIS | 4 | .SH SYNOPSIS |
@@ -37,11 +37,44 @@ The program must be configured to run periodically via the | |||
37 | statement in the | 37 | statement in the |
38 | .BR Dockerfile . | 38 | .BR Dockerfile . |
39 | .PP | 39 | .PP |
40 | It is supposed that each container is responsible for certain | ||
41 | .IR service . | ||
42 | Each service is assigned a name. Multiple containers can run | ||
43 | the same service (for example you can have several database | ||
44 | containers). | ||
45 | .PP | ||
46 | Containers are configured to run | ||
47 | .B tallyman | ||
48 | as their | ||
49 | .B HEALTHCHECK | ||
50 | command. The | ||
51 | .I SRVID | ||
52 | argument supplies the name of the service the container is responsible | ||
53 | for. The \fICOMMAND\fR and optional additional arguments supply the | ||
54 | name of the actual health-checking program and its command line | ||
55 | arguments. | ||
56 | .B Tallyman | ||
57 | will run this command, collect its standard error and standard output, | ||
58 | pack them along with the program exit code in a JSON packet, and send | ||
59 | this packet to the predefined address using HTTP POST request. It will | ||
60 | then exit with the same code as the health-checking program it ran. To | ||
61 | the container, the effect of running | ||
62 | .B tallyman | ||
63 | is the same as if it ran the health-checking program itself: error | ||
64 | code, standard error and standard output are all preserved. On the | ||
65 | other hand, they are copied to the collector listening on the | ||
66 | predefined address outside the container. | ||
67 | .PP | ||
40 | The data collector program | 68 | The data collector program |
41 | .BR stevedore (8) | 69 | .BR stevedore (8) |
42 | must be listening at \fIHOST:PORT\fR. See its manual for | 70 | must be listening at \fIHOST:PORT\fR. See its manual for |
43 | details. Container default gateway is the default \fIHOST\fR. | 71 | details. |
44 | Default port is 8990. | 72 | .PP |
73 | In the absense of the | ||
74 | .B \-s | ||
75 | option, | ||
76 | .B tallyman | ||
77 | will send statistics to the container's default gateway, port 8990. | ||
45 | .SH OPTIONS | 78 | .SH OPTIONS |
46 | .TP | 79 | .TP |
47 | \fB\-d\fR, \fB\-\-debug\fR | 80 | \fB\-d\fR, \fB\-\-debug\fR |
diff --git a/src/tallyman.c b/src/tallyman.c index df40fa2..c35cc9e 100644 --- a/src/tallyman.c +++ b/src/tallyman.c | |||
@@ -298,15 +298,25 @@ main(int argc, char **argv) | |||
298 | 298 | ||
299 | status = EX_FAILURE; | 299 | status = EX_FAILURE; |
300 | if (runcap(&rc, rcflags)) { | 300 | if (runcap(&rc, rcflags)) { |
301 | char const *emsg = strerror(errno); | ||
301 | json_object_set(obj, "status", json_new_bool(0)); | 302 | json_object_set(obj, "status", json_new_bool(0)); |
302 | json_object_set(obj, "error", json_new_bool(1)); | 303 | json_object_set(obj, "error", json_new_bool(1)); |
303 | json_object_set(obj, "message", | 304 | json_object_set(obj, "message", json_new_string(emsg)); |
304 | json_new_string(strerror(errno))); | 305 | error("failed to run %s: %s", argv[0], emsg); |
305 | } else { | 306 | } else { |
306 | if (WIFEXITED(rc.rc_status)) { | 307 | if (WIFEXITED(rc.rc_status)) { |
307 | status = WEXITSTATUS(rc.rc_status); | 308 | status = WEXITSTATUS(rc.rc_status); |
308 | json_object_set(obj, "status", | 309 | json_object_set(obj, "status", |
309 | json_new_bool(status == 0)); | 310 | json_new_bool(status == 0)); |
311 | if (status | ||
312 | && rc.rc_cap[RUNCAP_STDERR].sc_nlines == 0) { | ||
313 | if (status == 127) | ||
314 | error("failed to run %s: %s", argv[0], | ||
315 | "program not found"); | ||
316 | else | ||
317 | error("command %s exited with status %d", | ||
318 | argv[0], status); | ||
319 | } | ||
310 | } else { | 320 | } else { |
311 | char *msg = 0; | 321 | char *msg = 0; |
312 | size_t siz = 0; | 322 | size_t siz = 0; |
@@ -327,6 +337,7 @@ main(int argc, char **argv) | |||
327 | json_object_set(obj, "error", json_new_bool(1)); | 337 | json_object_set(obj, "error", json_new_bool(1)); |
328 | json_object_set(obj, "message", json_new_string(msg)); | 338 | json_object_set(obj, "message", json_new_string(msg)); |
329 | 339 | ||
340 | error("%s: %s", argv[0], msg); | ||
330 | free(msg); | 341 | free(msg); |
331 | } | 342 | } |
332 | 343 | ||