diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2020-02-19 10:49:11 +0200 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2020-02-19 10:49:11 +0200 |
commit | 998bea5d31b60f39c54f716063b44a5713fbc1c0 (patch) | |
tree | 12d8a50f05fbe2a0631dab0e9eedbb4828b54103 | |
parent | f6be3cd55490eab72be3e497877b5de5758d9911 (diff) | |
download | ping903-998bea5d31b60f39c54f716063b44a5713fbc1c0.tar.gz ping903-998bea5d31b60f39c54f716063b44a5713fbc1c0.tar.bz2 |
Fix handling of missing replies.
* src/ping903.h (hostaddr): New member recv_tv.
* src/ping903q.c (print_host_status): gracefully handle absense of
the "stddev" attribute.
Include IP address (or host name) to the nagios check output.
* src/pinger.c: Get rid of the send queue. Send echo requests in
batches each ping_interval. Correctly handle missing replies.
-rw-r--r-- | src/ping903.h | 1 | ||||
-rw-r--r-- | src/ping903q.c | 26 | ||||
-rw-r--r-- | src/pinger.c | 238 |
3 files changed, 122 insertions, 143 deletions
diff --git a/src/ping903.h b/src/ping903.h index 5d8dd5b..52ed6c1 100644 --- a/src/ping903.h +++ b/src/ping903.h @@ -96,6 +96,7 @@ typedef struct hostaddr { /* Current ping statistics */ struct timeval start_tv; struct timeval xmit_tv; + struct timeval recv_tv; unsigned long xmit_count; unsigned long recv_count; double tmin; /* minimum round trip time */ diff --git a/src/ping903q.c b/src/ping903q.c index 53358b4..9e7a350 100644 --- a/src/ping903q.c +++ b/src/ping903q.c @@ -436,17 +436,18 @@ print_host_status(struct json_value *obj, void *unused) json_number)->v.n; double stop_ts = ejson_get(obj, "stop-timestamp", json_number)->v.n; - + printf("--- %s ping statistics ---\n", name); printf("%lu packets transmitted, %lu received, %d%% packet loss, time %.0fms\n", (unsigned long) xmit, (unsigned long) recv, - (int)(100 * (xmit - recv) / recv), + (int)(100 * (xmit - recv) / xmit), (stop_ts - start_ts) * 1000); - printf("rtt min/avg/max/mdev = %.3f/%.3f/%.3f/%.3f ms\n", - ejson_get(obj, "tmin", json_number)->v.n, - ejson_get(obj, "avg", json_number)->v.n, - ejson_get(obj, "tmax", json_number)->v.n, - ejson_get(obj, "stddev", json_number)->v.n); + if (!json_object_get(obj, "stddev", &jv)) + printf("rtt min/avg/max/mdev = %.3f/%.3f/%.3f/%.3f ms\n", + ejson_get(obj, "tmin", json_number)->v.n, + ejson_get(obj, "avg", json_number)->v.n, + ejson_get(obj, "tmax", json_number)->v.n, + ejson_get(obj, "stddev", json_number)->v.n); } return alive ? EX_NAGIOS_OK : EX_NAGIOS_CRITICAL; } @@ -536,6 +537,9 @@ match_host(char const *name) size_t i, len; n = snprintf(url, sizeof(url), "/match/%s", name); + if (n == -1 || n == sizeof(url)) { + abend("url buffer overflow"); + } http_query("GET", url, std_headers); http_resp_init(&resp); http_recv(&resp); @@ -677,11 +681,11 @@ nagios_check(struct json_value *obj, void *data) jv = ejson_get(obj, "status", json_string); if (strcmp(jv->v.s, "init") == 0) { - printf("PING UNKNOWN - waiting for data to arrive\n"); + printf("PING %s UNKNOWN - waiting for data to arrive\n", name); exit(EX_NAGIOS_UNKNOWN); } if (strcmp(jv->v.s, "invalid") == 0) { - printf("PING CRITICAL - Packet loss = 100%%"); + printf("PING %s CRITICAL - Packet loss = 100%%", name); print_perfdata(cd->cth.round_trip, 100.0, cd); exit(EX_NAGIOS_CRITICAL); } @@ -698,8 +702,8 @@ nagios_check(struct json_value *obj, void *data) status = newstatus(status, EX_NAGIOS_CRITICAL); if (loss >= cd->wth.loss_pct) status = newstatus(status, EX_NAGIOS_WARNING); - printf("PING %s Packet loss = %d%%, RTA = %.2f ms", - status_str[status], (int)loss, rta); + printf("PING %s %s Packet loss = %d%%, RTA = %.2f ms", + name, status_str[status], (int)loss, rta); print_perfdata(rta, loss, cd); exit(status); } diff --git a/src/pinger.c b/src/pinger.c index aef88d0..2aff036 100644 --- a/src/pinger.c +++ b/src/pinger.c @@ -47,10 +47,9 @@ size_t hostaddr_max; static int ping_fd; -static HOSTADDR *sendq_head, *sendq_tail; static pthread_mutex_t sendq_mutex = PTHREAD_MUTEX_INITIALIZER; -static pthread_cond_t sendq_update_cond = PTHREAD_COND_INITIALIZER; -static pthread_cond_t sendq_empty_cond = PTHREAD_COND_INITIALIZER; +static pthread_cond_t sendq_cond = PTHREAD_COND_INITIALIZER; +static int send_p; #define ICMP_HEADER_LEN (offsetof(struct icmp, icmp_data)) #define PING_HEADER_LEN (ICMP_HEADER_LEN+sizeof(struct timeval)) @@ -132,69 +131,6 @@ p903_init(void) if (!seqidx) emalloc_die(); } - -static int -sendq_wait_empty(struct timeval *tv) -{ - int ret = 0; - struct timeval t1, t2; - pthread_mutex_lock(&sendq_mutex); - gettimeofday(&t1, NULL); - while (sendq_head) { - ret = 1; - pthread_cond_wait(&sendq_empty_cond, &sendq_mutex); - } - gettimeofday(&t2, NULL); - pthread_mutex_unlock(&sendq_mutex); - if (ret) - timersub(&t2, &t1, tv); - return ret; -} - -static void -sendq_enqueue(HOSTADDR *addr, int delay) -{ - struct timeval tv; - - pthread_mutex_lock(&sendq_mutex); - - gettimeofday(&tv, NULL); - tv.tv_sec += delay; - addr->xmit_tv = tv; - addr->next = NULL; - if (sendq_tail) - sendq_tail->next = addr; - else - sendq_head = addr; - sendq_tail = addr; - pthread_cond_broadcast(&sendq_update_cond); - pthread_mutex_unlock(&sendq_mutex); -} - -static HOSTADDR * -sendq_dequeue(void) -{ - HOSTADDR *host; - struct timespec ts; - - pthread_mutex_lock(&sendq_mutex); - while (!sendq_head) { - pthread_cond_wait(&sendq_update_cond, &sendq_mutex); - } - host = sendq_head; - sendq_head = host->next; - if (sendq_head == NULL) - sendq_tail = NULL; - host->next = NULL; - if (!sendq_head) - pthread_cond_broadcast(&sendq_empty_cond); - pthread_mutex_unlock(&sendq_mutex); - - ts.tv_sec = host->xmit_tv.tv_sec; - ts.tv_nsec = host->xmit_tv.tv_usec * 1000; - clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &ts, NULL); - return host; -} static unsigned short icmp_cksum(unsigned char * addr, int len) @@ -291,7 +227,7 @@ send_echo(HOSTADDR *host, unsigned char *ping_buffer) seqno = seqno_alloc(host, &host->xmit_tv); if (seqno == -1) { - sendq_enqueue(host, 0); + //FIXME return; } @@ -301,9 +237,6 @@ send_echo(HOSTADDR *host, unsigned char *ping_buffer) host->addr, host->addrlen); if (n < 0) { error("%s: sendto: %s", host->name, strerror(errno)); - if (errno == EINTR || errno == ENOBUFS) { - sendq_enqueue(host, 0); - } } else { if (host->xmit_count == 0) host->start_tv = host->xmit_tv; @@ -314,28 +247,54 @@ send_echo(HOSTADDR *host, unsigned char *ping_buffer) } } +static void +start_probe(void) +{ + pthread_mutex_lock(&sendq_mutex); + send_p = 1; + pthread_cond_broadcast(&sendq_cond); + pthread_mutex_unlock(&sendq_mutex); +} + +static void host_stat_commit(HOSTADDR *host); + void * p903_sender(void *p) { + size_t i; unsigned char *ping_buffer; struct pollfd pfd; int n; + unsigned send_count = 0; pfd.fd = ping_fd; pfd.events = POLLOUT; ping_buffer = emalloc(sizeof(struct icmp) + data_length); + pthread_mutex_lock(&sendq_mutex); while (1) { - n = poll(&pfd, 1, 0); - if (n == 1) { - HOSTADDR *host = sendq_dequeue(); - send_echo(host, ping_buffer); - } else if (n == -1) { - fatal("poll: %s", strerror(errno)); - exit(1); - } + if (!send_p) { + while (!send_p) + pthread_cond_wait(&sendq_cond, &sendq_mutex); + send_count = 0; + } + for (i = 0; i < hostaddr_count; i++) { + n = poll(&pfd, 1, 0); + if (n == 1) { + send_echo(&hostaddr[i], ping_buffer); + } else if (n == -1) { + fatal("poll: %s", strerror(errno)); + exit(1); + } + } + send_count++; + if (send_count == ping_count) + send_p = 0; + else + sleep(ping_interval); } + pthread_mutex_unlock(&sendq_mutex); return NULL; } @@ -363,30 +322,50 @@ nsqrt(double a, double prec) return x1; } +static inline int +timeval_is_null(struct timeval const *tv) +{ + return tv->tv_sec == 0 && tv->tv_usec == 0; +} + static void -host_stat(HOSTADDR *host, struct timeval const *tv) -{ - memcpy(&host->stat_last.start_tv, &host->start_tv, sizeof(host->stat_last.start_tv)); - memcpy(&host->stat_last.stop_tv, tv, sizeof(host->stat_last.stop_tv)); - host->stat_last.xmit_count = host->xmit_count; - host->stat_last.recv_count = host->recv_count; - host->stat_last.tmin = host->tmin; - host->stat_last.tmax = host->tmax; +hostaddr_extract_stat(HOSTADDR *host, struct host_stat *st) +{ + memcpy(&st->start_tv, &host->start_tv, + sizeof(host->stat_last.start_tv)); + memcpy(&st->stop_tv, + timeval_is_null(&host->recv_tv) + ? &host->xmit_tv : &host->recv_tv, + sizeof(host->stat_last.stop_tv)); + st->xmit_count = host->xmit_count; + st->recv_count = host->recv_count; + st->tmin = host->tmin; + st->tmax = host->tmax; if (host->recv_count > 0) { double total = host->recv_count; //FIXME: repeat count? double avg = host->tsum / total; double vari = host->tsumsq / total - avg * avg; - host->stat_last.avg = avg; - host->stat_last.stddev = nsqrt(vari, 0.0005); + st->avg = avg; + st->stddev = nsqrt(vari, 0.0005); + } +} + +static void +host_stat_commit(HOSTADDR *host) +{ + if (host->stat_last.status != HOST_STAT_VALID) { + hostaddr_extract_stat(host, &host->stat_last); + host->stat_last.status = HOST_STAT_VALID; } - host->stat_last.status = HOST_STAT_VALID; } /* Reset runtime statistics counters */ static void host_reset(HOSTADDR *host) { + if (host->xmit_count > 0) + host_stat_commit(host); host->xmit_count = 0; host->recv_count = 0; host->tmin = 999999999.0; @@ -417,14 +396,16 @@ int get_host_stat(HOSTADDR *host, struct json_value **retval) { struct json_value *obj, *v; - struct host_stat const *st = &host->stat_last; - double ts; + struct host_stat const *st; + struct host_stat stbuf; static char const *host_stat_status[] = { "init", "valid", "pending", "invalid" }; + int validity; + double ts; if (!(obj = json_new_object())) goto err; @@ -432,21 +413,41 @@ get_host_stat(HOSTADDR *host, struct json_value **retval) if (!(v = json_new_string(host->name)) || json_object_set(obj, "name", v)) goto err; - if (!(v = json_new_bool(host_stat_is_valid(st))) + + if (host_stat_is_valid(&host->stat_last)) { + st = &host->stat_last; + validity = 1; + } else if (host->stat_last.status == HOST_STAT_INIT + && host->xmit_count > ping_tolerance) { + hostaddr_extract_stat(host, &stbuf); + stbuf.status = HOST_STAT_PENDING; + st = &stbuf; + validity = 1; + } else { + st = NULL; + validity = 0; + } + + if (!(v = json_new_bool(validity)) || json_object_set(obj, "validity", v)) goto err; - if (!(v = json_new_string(host_stat_status[st->status])) + + if (!(v = json_new_string(host_stat_status[host->stat_last.status])) || json_object_set(obj, "status", v)) goto err; ts = timeval_to_double(&host->xmit_tv); if (!(v = json_new_number(ts)) || json_object_set(obj, "xmit-timestamp", v)) goto err; - - if (host_stat_is_valid(st)) { + + if (st) { int is_alive = st->xmit_count - st->recv_count < ping_tolerance; + if (!(v = json_new_string(host_stat_status[st->status])) + || json_object_set(obj, "status", v)) + goto err; + ts = timeval_to_double(&st->start_tv); if (!(v = json_new_number(ts)) || json_object_set(obj, "start-timestamp", v)) @@ -484,6 +485,10 @@ get_host_stat(HOSTADDR *host, struct json_value **retval) || json_object_set(obj, "stddev", v)) goto err; } + } else { + if (!(v = json_new_string(host_stat_status[host->stat_last.status])) + || json_object_set(obj, "status", v)) + goto err; } *retval = obj; @@ -685,16 +690,16 @@ p903_receiver(void *p) host = hostaddr_from_seqno(icmp->icmp_seq); if (host) { - struct timeval tv_now, tv_orig, tv_diff, *tp; + struct timeval tv_orig, tv_diff, *tp; double rtt; - gettimeofday (&tv_now, NULL); + gettimeofday (&host->recv_tv, NULL); tp = (struct timeval *) icmp->icmp_data; /* Avoid unaligned data: */ memcpy(&tv_orig, tp, sizeof (tv_orig)); - timersub(&tv_now, &tv_orig, &tv_diff); + timersub(&host->recv_tv, &tv_orig, &tv_diff); rtt = timeval_to_double(&tv_diff) * 1000.0; host->tsum += rtt; @@ -708,12 +713,8 @@ p903_receiver(void *p) if (verbose) log_echo((struct sockaddr *)&addr, addrlen, icmp, ip, n, rtt); - - if (host->recv_count < ping_count) { - sendq_enqueue(host, ping_interval); - } else { - host_stat(host, &tv_now); - } + if (host->recv_count == ping_count) + host_stat_commit(host); } } } @@ -723,39 +724,12 @@ p903_receiver(void *p) void * p903_scheduler(void *p) { - int update_round = 0; - double d; - while (1) { size_t i; - struct timeval tv; - /* Reset all statistics */ for (i = 0; i < hostaddr_count; i++) host_reset(hostaddr + i); - - /* Wait for pending probes to finish */ - if (sendq_wait_empty(&tv)) { - if (update_round == 0) { - info("scheduler: " - "starting probe interval correction"); - d = 0; - } - d += timeval_to_double(&tv); - update_round++; - info("scheduler: average: %.3fs", d / update_round); - if (update_round == UPDATE_MAX) { - d /= update_round; - probe_interval += 3*d/2; - info("scheduler: " - "finished probe interval correction: %lu", - probe_interval); - update_round = 0; - } - } - - for (i = 0; i < hostaddr_count; i++) - sendq_enqueue(hostaddr + i, 0); + start_probe(); sleep(probe_interval); } } |