diff options
author | Sergey Poznyakoff <gray@gnu.org> | 2020-02-13 11:40:55 +0200 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org> | 2020-02-13 11:40:55 +0200 |
commit | ab4bab1eb5cfaa9c7e51c14667d300f491009b80 (patch) | |
tree | 1a5aeedffef0c35f7fdfd86f65ccdc4f0fc9fad1 | |
parent | de8778ee1a6139cc1c117c06058d7004cae01d50 (diff) | |
download | ping903-ab4bab1eb5cfaa9c7e51c14667d300f491009b80.tar.gz ping903-ab4bab1eb5cfaa9c7e51c14667d300f491009b80.tar.bz2 |
Implement nagios check mode in ping903q
-rw-r--r-- | doc/ping903q.1 | 74 | ||||
-rw-r--r-- | src/ping903q.c | 295 |
2 files changed, 253 insertions, 116 deletions
diff --git a/doc/ping903q.1 b/doc/ping903q.1 index 3a2a91a..6e024d7 100644 --- a/doc/ping903q.1 +++ b/doc/ping903q.1 @@ -13,14 +13,22 @@ .\" .\" You should have received a copy of the GNU General Public License .\" along with Ping903. If not, see <http://www.gnu.org/licenses/>. -.TH PING903Q 8 "February 12, 2020" "PING903Q" "User Commands" +.TH PING903Q 8 "February 13, 2020" "PING903Q" "User Commands" .SH NAME ping903q \- ping903 query tool .SH SYNOPSIS \fBping903q\fR\ [\fB\-hVv\fR]\ - [\fB\-c \fIFILE\fR]\ + [\fB\-f \fIFILE\fR]\ [\fIIP\fR] + +or + +\fBping903q\fR\ + [\fB\-f \fIFILE\fR]\ + \fB\-H \fIHOST\fR\ + \fB\-c \fIRTA\fB,\fIPCT\fB%\fR\ + \fB\-w \fIRTA\fB,\fIPCT\fB%\fR .SH DESCRIPTION Queries monitoring statistics from the \fBping903\fR daemon. When used with a single argument (\fIIP\fR), displays information about @@ -41,40 +49,34 @@ exists, it will look obtain the socket address from the \fBlisten\fR statement. See .BR ping903.conf (5), for detailed description of the configuration file. +.SS Nagios check mode +When the \fB\-H\fR, \fB\-c\fR, and \fB\-w\fR options are used, the +program enters \fINagios check mode\fR. In this mode its output +complies with the requirements for external \fBNagios\fR check +programs. .SH EXIT CODE When called with one argument, the program exits with code 0 (success) -if the IP is alive and 1 otherwise. +if the IP is alive and 2 otherwise. .PP When called without arguments, the program exits with code 0 if all -monitored IP addresses are alive, 1 if none of them is reachable and 2 +monitored IP addresses are alive, 2 if none of them is reachable and 1 if some of them are. .PP -Other possible exit codes are: -.TP -.B 64 -Command line usage error. -.TP -.B 65 -Data format error. The returned JSON is malformed. -.TP -.B 69 -Service unavailable. This means some error occurred, which will be -diagnosed on the standard error. -.TP -.B 70 -Internal software error. +Exit codes in nagios check mode: .TP -.B 71 -System error (e.g. error reading from socket or the like). +.B 0 +Success .TP -.B 76 -Protocol error when talking to the daemon. +.B 1 +Warning condition. .TP -.B 78 -Error in configuration file. +.B 2 +Critical condition. +.PP +If any error is encountered, \fBping903q\fR exits with status \fB3\fR. .SH OPTIONS .TP -\fB\-c \fIFILE\fR +\fB\-f \fIFILE\fR Read configuration from \fIFILE\fR instead of from the default .BR /etc/ping903.conf . .TP @@ -86,8 +88,28 @@ Print program version, copyright information, and exit. .TP .B \-v Turn on verbose output. +.SS Options valid in Nagios check mode +The presense of any of these options switches \fBping903q\fR to Nagios +check mode. For this mode to succeed, all three options must be specified. +.TP +\fB\-H \fIHOST\fR +Sets host name or IP address to query for. +.TP +\fB\-c \fIRTA\fB,\fIPCT\fB%\fR +Sets the critical threshold value. \fIRTA\fR is the round-trip +average and \fIPCT\fR is the package loss percentage values. The +critical condition is entered if either the returned round-trip +average becomes greater than or equal to \fIRTA\fR or the lost package +percentage becomes greater than or equal to \fIPCT\fR. Note that both +parts must be present and must be valid floating-point numbers. Note +also that the use of the percent sign is mandatory. +.TP +\fB\-w \fIRTA\fB,\fIPCT\fB%\fR +Sets the warning threshold value. See above for the discussion of the +arguments. .SH SEE ALSO -.BR ping903 (8). +.BR ping903 (8), +.BR Nagios <https://www.nagios.org/>. .SH COPYRIGHT Copyright \(co 2020 Sergey Poznyakoff .br diff --git a/src/ping903q.c b/src/ping903q.c index e709f64..ba5caa1 100644 --- a/src/ping903q.c +++ b/src/ping903q.c @@ -1,8 +1,10 @@ #include <config.h> #include <stdio.h> #include <stdlib.h> -#include <sysexits.h> -#include <sys/types.h> /* See NOTES */ +#include <syslog.h> +#include <limits.h> +#include <math.h> +#include <sys/types.h> #include <sys/socket.h> #include <netdb.h> #include <errno.h> @@ -18,11 +20,31 @@ int verbose; char const http_version[] = "HTTP/1.1"; enum { - EX_P903_ALIVE = 0, - EX_P903_NOTALIVE = 1, - EX_P903_MIXED = 2 + EX_NAGIOS_OK = 0, + EX_NAGIOS_WARNING = 1, + EX_NAGIOS_CRITICAL = 2, + EX_NAGIOS_UNKNOWN = 3, + EX_NAGIOS_DEPENDENT = 4 }; +static char const *status_str[] = { + "OK", + "WARNING", + "CRITICAL", + "UNKNOWN", + "DEPENDENT" +}; + +static void +abend(char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vlogger(LOG_CRIT, fmt, ap); + va_end(ap); + exit(EX_NAGIOS_UNKNOWN); +} + static char * read_listen(char **ret_service) { @@ -38,11 +60,9 @@ read_listen(char **ret_service) if (!fp) { if (errno == ENOENT) return NULL; - else { - fatal("can't open %s: %s", + else + abend("can't open %s: %s", config_file, strerror(errno)); - exit(EX_CONFIG); - } } while (fgets(buf, sizeof(buf), fp)) { @@ -113,26 +133,18 @@ http_connect(char *node, char *service) hints.ai_socktype = SOCK_STREAM; hints.ai_protocol = IPPROTO_TCP; rc = getaddrinfo(node, service, &hints, &res); - if (rc) { - fatal("%s: %s", node, gai_strerror(rc)); - exit(EX_UNAVAILABLE); - } + if (rc) + abend("%s: %s", node, gai_strerror(rc)); fd = socket(res->ai_family, res->ai_socktype, 0); - if (fd == -1) { - fatal("socket: %s", strerror(errno)); - exit(EX_UNAVAILABLE); - } - if (connect(fd, (struct sockaddr *)res->ai_addr, res->ai_addrlen)) { - fatal("failed to connect to %s:%s: %s", + if (fd == -1) + abend("socket: %s", strerror(errno)); + if (connect(fd, (struct sockaddr *)res->ai_addr, res->ai_addrlen)) + abend("failed to connect to %s:%s: %s", node, service, strerror(errno)); - exit(EX_UNAVAILABLE); - } http = fdopen(fd, "w+"); - if (http == NULL) { - fatal("failed to open socket: %s", strerror(errno)); - exit(EX_UNAVAILABLE); - } + if (http == NULL) + abend("failed to open socket: %s", strerror(errno)); freeaddrinfo(res); } @@ -180,14 +192,11 @@ http_readline(struct http_buf *hbuf) hbuf->base = e2nrealloc(hbuf->base, &hbuf->size, 1); } if (fgets(hbuf->base + hbuf->len, hbuf->size, http) == NULL) { - if (feof(http)) { - fatal("connection closed prematurely"); - exit(EX_PROTOCOL); - } else { - fatal("error reading from socket: %s", + if (feof(http)) + abend("connection closed prematurely"); + else + abend("error reading from socket: %s", strerror(errno)); - exit(EX_OSERR); - } } hbuf->len += strlen(hbuf->base + hbuf->len); if (hbuf->base[hbuf->len-1] == '\n') { @@ -294,19 +303,16 @@ http_recv(struct http_resp *resp) char *res[3]; if (strsplit3(hbuf.base, res, 0)) { - fatal("malformed HTTP response"); - exit(EX_PROTOCOL); + abend("malformed HTTP response"); } if (strcmp(res[0], http_version)) { - fatal("unsupported HTTP version: %s", res[0]); - exit(EX_PROTOCOL); + abend("unsupported HTTP version: %s", res[0]); } free(res[0]); resp->code = atoi(res[1]); if (resp->code <= 0 || resp->code > 559) { - fatal("bad response code: %s", res[1]); - exit(EX_PROTOCOL); + abend("bad response code: %s", res[1]); } free(res[1]); resp->reason = res[2]; @@ -331,8 +337,7 @@ http_recv(struct http_resp *resp) char *p; unsigned long len = strtoul(hval, &p, 10); if (*p) { - fatal("malformed content-length"); - exit(EX_PROTOCOL); + abend("malformed content-length"); } resp->content = emalloc(len + 1); resp->content_length = len; @@ -341,13 +346,11 @@ http_recv(struct http_resp *resp) while (len) { ssize_t n = fread(p, 1, len, http); if (n == -1) { - fatal("socket read error: %s", + abend("socket read error: %s", strerror(errno)); - exit(EX_OSERR); } if (n == 0) { - error("warning: short read from socket"); - exit(EX_OSERR); + abend("short read from socket"); } len -= n; p += n; @@ -395,18 +398,16 @@ ejson_get(struct json_value *obj, char *name, int type) { struct json_value *jv; if (json_object_get(obj, name, &jv)) { - fatal("no \"%s\" member in the response", name); - exit(EX_DATAERR); + abend("no \"%s\" member in the response", name); } if (jv->type != type) { - fatal("\"%s\" member has wrong type", name); - exit(EX_DATAERR); + abend("\"%s\" member has wrong type", name); } return jv; } static int -print_host_status(struct json_value *obj) +print_host_status(struct json_value *obj, void *unused) { struct json_value *jv; int alive; @@ -418,7 +419,7 @@ print_host_status(struct json_value *obj) jv = ejson_get(obj, "status", json_bool); if (!jv->v.b) { printf("no response from %s\n", name); - return EX_P903_NOTALIVE; + return EX_NAGIOS_CRITICAL; } alive = ejson_get(obj, "alive", json_bool)->v.b; @@ -442,11 +443,12 @@ print_host_status(struct json_value *obj) ejson_get(obj, "tmax", json_number)->v.n, ejson_get(obj, "stddev", json_number)->v.n); } - return alive ? EX_P903_ALIVE : EX_P903_NOTALIVE; + return alive ? EX_NAGIOS_OK : EX_NAGIOS_CRITICAL; } static void -query_host(char const *name) +query_host(char const *name, int (*report)(struct json_value *, void *), + void *report_data) { int rc; struct http_resp resp; @@ -456,33 +458,28 @@ query_host(char const *name) char *p; ssize_t n = snprintf(url, sizeof(url), "/host/%s", name); if (n < 0 || n == sizeof(url)) { - fatal("bad host name or IP"); - exit(EX_USAGE); + abend("bad host name or IP"); } http_query("GET", url, NULL); http_resp_init(&resp); http_recv(&resp); if (resp.code != 200) { - fatal("%s", resp.reason); - exit(EX_UNAVAILABLE); + abend("%s", resp.reason); } hval = http_resp_get_header(&resp, "content-type"); if (!hval || strcmp(hval, "application/json")) { - fatal("missing or unsupported content type"); - exit(EX_UNAVAILABLE); + abend("missing or unsupported content type"); } rc = json_parse_string(resp.content, &obj, &p); if (rc != JSON_E_NOERR) { - fatal("%s near %s", json_strerror(rc), p); - exit(EX_DATAERR); + abend("%s near %s", json_strerror(rc), p); } if (obj->type != json_object) { - fatal("returned entity has wrong type"); - exit(EX_DATAERR); + abend("returned entity has wrong type"); } - exit(print_host_status(obj)); + exit(report(obj, report_data)); } static void @@ -501,53 +498,134 @@ query_all(void) http_resp_init(&resp); http_recv(&resp); if (resp.code != 200) { - fatal("%s", resp.reason); - exit(EX_UNAVAILABLE); + abend("%s", resp.reason); } hval = http_resp_get_header(&resp, "content-type"); if (!hval || strcmp(hval, "application/json")) { - fatal("missing or unsupported content type"); - exit(EX_UNAVAILABLE); + abend("missing or unsupported content type"); } rc = json_parse_string(resp.content, &obj, &p); if (rc != JSON_E_NOERR) { - fatal("%s near %s", json_strerror(rc), p); - exit(EX_DATAERR); + abend("%s near %s", json_strerror(rc), p); } if (obj->type != json_array) { - fatal("returned entity has wrong type"); - exit(EX_DATAERR); + abend("returned entity has wrong type"); } len = json_array_length(obj); for (i = 0; i < len; i++) { struct json_value *jv; if (json_array_get(obj, i, &jv)) { - fatal("can't get element %lu", (unsigned long) i); - exit(EX_SOFTWARE); + abend("can't get element %lu", (unsigned long) i); } if (jv->type == json_object) - count[print_host_status(jv)]++; + count[print_host_status(jv, NULL) != EX_NAGIOS_OK]++; } if (count[1] == 0) - exit(EX_P903_ALIVE); + exit(EX_NAGIOS_OK); if (count[0] == 0) - exit(EX_P903_NOTALIVE); - exit(EX_P903_MIXED); + exit(EX_NAGIOS_CRITICAL); + exit(EX_NAGIOS_WARNING); +} + +struct nagios_threshold { + double round_trip; + double loss_pct; +}; + +struct nagios_check_data { + struct nagios_threshold wth; + struct nagios_threshold cth; +}; + +static void +parse_nagios_threshold(char const *val, struct nagios_threshold *tp) +{ + char *p; + double d; + + d = strtod(val, &p); + if (d <= 0 || d == HUGE_VAL) { + abend("invalid threshold: %s", val); + } + tp->round_trip = d; + if (*p != ',') { + abend("threshold missing percentage: %s", val); + } + d = strtod(p + 1, &p); + if (d < 0 || (d == 0 && errno) || d > 100 || *p != '%') { + abend("invalid threshold: %s", val); + } + tp->loss_pct = d; +} + +static void +print_perfdata(double rta, double pl, struct nagios_check_data *cd) +{ + printf("|rta=%.6fms;%.6f;%.6f;%.6f pl=%.6f%%;%.6f;%.6f;%d\n", + rta, cd->wth.round_trip, cd->cth.round_trip, 0.0, + pl, cd->wth.loss_pct, cd->cth.loss_pct, 0); +} + +static inline int newstatus(int old, int new) { + return old > new ? old : new; +} + +static int +nagios_check(struct json_value *obj, void *data) +{ + struct nagios_check_data *cd = data; + struct json_value *jv; + char const *name; + int status; + double rta, loss; + + jv = ejson_get(obj, "name", json_string); + name = jv->v.s; + + jv = ejson_get(obj, "status", json_bool); + if (!jv->v.b) { + printf("PING CRITICAL - Packet loss = 100%"); + print_perfdata(cd->cth.round_trip, 100.0, cd); + exit(EX_NAGIOS_CRITICAL); + } + + status = EX_NAGIOS_OK; + rta = ejson_get(obj, "avg", json_number)->v.n; + if (rta >= cd->cth.round_trip) + status = newstatus(status, EX_NAGIOS_CRITICAL); + if (rta >= cd->wth.round_trip) + status = newstatus(status, EX_NAGIOS_WARNING); + + loss = ejson_get(obj, "loss", json_number)->v.n; + if (loss >= cd->cth.loss_pct) + status = newstatus(status, EX_NAGIOS_CRITICAL); + if (loss >= cd->wth.loss_pct) + status = newstatus(status, EX_NAGIOS_WARNING); + printf("PING %s Packet loss = %d%%, RTA = %.2f ms", + status_str[status], (int)loss, rta); + print_perfdata(rta, loss, cd); + exit(status); } void usage(void) { - printf("Usage: %s [-hVv] [-c FILE]\n", progname); + printf("Usage: %s [-hVv] [-f FILE] [HOST]\n", progname); + printf(" or: %s -H HOST -c RTA,PCT%% -w RTA,PCT%%\n", progname); printf("Query ping903 daemon.\n"); printf("\n"); printf("Options:\n\n"); - printf(" -c FILE read configuration from FILE\n"); + printf(" -f FILE read configuration from FILE\n"); printf(" -h print this help test\n"); printf(" -V print program version and exit\n"); printf(" -v additional verbosity\n"); + printf("\nNagios check mode:\n\n"); + printf(" -H HOST query statistics for this host\n"); + printf(" -c RTA,PCT%% set critical threshold\n"); + printf(" -w RTA,PCT%% set warning threshold\n"); + printf("\n(all three must be given in this mode)\n"); printf("\n"); printf("Report bugs to <%s>.\n", PACKAGE_BUGREPORT); printf("%s home page: <%s>\n", PACKAGE_NAME, PACKAGE_URL); @@ -565,8 +643,14 @@ main(int argc, char **argv) { int c; char *p, *node, *service; - + int nagios_check_mode = 0; + char const *host = NULL; + char *c_opt = NULL; + char *w_opt = NULL; + struct nagios_check_data chkdata; + set_progname(argv[0]); + if (argc == 2) { if (strcmp(argv[1], "--help") == 0) { usage(); @@ -577,11 +661,19 @@ main(int argc, char **argv) exit(0); } } - while ((c = getopt(argc, argv, "c:hVv")) != EOF) { + while ((c = getopt(argc, argv, "c:f:H:hVvw:")) != EOF) { switch (c) { case 'c': + c_opt = optarg; + nagios_check_mode = 1; + break; + case 'f': config_file = optarg; break; + case 'H': + host = optarg; + nagios_check_mode = 1; + break; case 'h': usage(); exit(0); @@ -591,13 +683,32 @@ main(int argc, char **argv) case 'v': verbose++; break; + case 'w': + w_opt = optarg; + nagios_check_mode = 1; + break; default: - exit(EX_USAGE); + exit(EX_NAGIOS_UNKNOWN); } } argc -= optind; argv += optind; + if (nagios_check_mode) { + if (argc != 0) { + abend("bad number of arguments"); + } + if (!host) + abend("hostname missing; use -H option"); + if (!c_opt) + abend("critical threshold missing; use -c option"); + parse_nagios_threshold(c_opt, &chkdata.cth); + + if (!w_opt) + abend("warning threshold missing; use -w option"); + parse_nagios_threshold(w_opt, &chkdata.wth); + } + p = node = read_listen(&service); if (!node || !node[0]) node = DEFAULT_ADDRESS; @@ -606,13 +717,17 @@ main(int argc, char **argv) http_connect(node, service); free(p); - switch (argc) { - case 0: - query_all(); - case 1: - query_host(argv[0]); - default: - fatal("bad number of arguments"); + if (nagios_check_mode) + query_host(host, nagios_check, &chkdata); + else { + switch (argc) { + case 0: + query_all(); + case 1: + query_host(argv[0], print_host_status, NULL); + default: + abend("bad number of arguments"); + } } - exit(EX_USAGE); + exit(EX_NAGIOS_UNKNOWN); } |