aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/ping903.811
-rw-r--r--src/pinger.c137
2 files changed, 114 insertions, 34 deletions
diff --git a/doc/ping903.8 b/doc/ping903.8
index bde1f51..9349fca 100644
--- a/doc/ping903.8
+++ b/doc/ping903.8
@@ -13,7 +13,7 @@
.\"
.\" You should have received a copy of the GNU General Public License
.\" along with Ping903. If not, see <http://www.gnu.org/licenses/>.
-.TH PING903 8 "March 10, 2020" "PING903" "System Administration"
+.TH PING903 8 "March 11, 2020" "PING903" "System Administration"
.SH NAME
ping903 \- high-performance ICMP monitoring daemon
.SH SYNOPSIS
@@ -139,7 +139,14 @@ Don't start supervisor process.
Print program version, copyright information, and exit.
.TP
.B \-v
-Turn on additional logging.
+Turn on additional logging. This option can be given several times to
+request more verbose output. If given single \fB\-v\fR option, the
+program prints at the end of each probe the total number of echo
+requests sent and replies received. Two options (\fB\-vv\fR), enable
+additional diagnostics of invalid echo replies. Three options enable
+logging of each received echo reply, and four options enable verbose
+logging of each echo request sent. Notice that three or more
+\fB\-v\fR options can produce huge amount of logs.
.SH BUGS
Only IPv4 is currently supported.
.SH SEE ALSO
diff --git a/src/pinger.c b/src/pinger.c
index 8bb1090..53fedfa 100644
--- a/src/pinger.c
+++ b/src/pinger.c
@@ -39,9 +39,13 @@
#include "json.h"
#include "defs.h"
+/* Time in seconds between two subsequent probes. */
unsigned long probe_interval = 60;
+/* Time between two subsequent echo requests within the same probe. */
unsigned long ping_interval = 1;
+/* Number of echo requests per probe */
unsigned long ping_count = 10;
+/* Number of unanswered echo requests after which the host is declared dead. */
unsigned long ping_tolerance = 3;
/* Initial value for the tmin member of struct hostping */
@@ -236,7 +240,6 @@ typedef enum update_type {
static pthread_mutex_t update_mutex = PTHREAD_MUTEX_INITIALIZER;
static int check_host(char const *name);
static int update_add(UPDATE_TYPE t, void *data);
-static void update_commit(void);
void
pinger_setup(void)
@@ -965,8 +968,10 @@ static pthread_mutex_t sendq_mutex = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t sendq_cond = PTHREAD_COND_INITIALIZER;
static int send_p;
-static unsigned xmit_total;
-static unsigned recv_total;
+static unsigned long probe_num; /* Ordinal number of the current probe. */
+/* Totals for the current probe: */
+static unsigned xmit_total; /* Number of requests transmitted. */
+static unsigned recv_total; /* Number of replies received. */
#define ICMP_HEADER_LEN (offsetof(struct icmp, icmp_data))
#define PING_DATALEN (64 - ICMP_HEADER_LEN)
@@ -974,25 +979,39 @@ static unsigned recv_total;
size_t data_length = PING_DATALEN;
static unsigned char *data_buffer;
+/* Ping identifier (for the icmp_id member of struct icmp) */
static int ping_ident;
+/* Constants for sequence number database management */
+
+/* Max. time in seconds after which a sequence database entry can be reused. */
#define MAX_PING_TIMEOUT 10
+
enum {
- MAX_SEQNO = USHRT_MAX,
- MOD_SEQNO = MAX_SEQNO + 1
+ MAX_SEQNO = USHRT_MAX, /* Max. value for the sequence number */
+ MOD_SEQNO = MAX_SEQNO + 1 /* Modulus for computing next sequence
+ number. */
};
+/* Sequence number index entry. */
struct seqidx {
- HOSTPING *host;
- struct timeval tv;
- int ping_num;
+ HOSTPING *host; /* Associated host. */
+ struct timeval tv; /* Time the echo was sent. */
+ unsigned long probe_num; /* Number of the probe within which the
+ echo was sent. */
+ int ping_num; /* Number of echo request within the probe. */
};
-static struct seqidx *seqidx;
-static unsigned short next_seqno;
+static struct seqidx *seqidx; /* Sequence number database. */
+static unsigned short next_seqno; /* Next sequence number. */
+/* Protect simultaneous access to seqidx. */
static pthread_mutex_t seqno_mutex = PTHREAD_MUTEX_INITIALIZER;
+/* Allocate and return the sequence number for the given host and
+ * transmission time.
+ * Return negative value if the number cannot be allocated.
+ */
static int
seqno_alloc(HOSTPING *host, struct timeval *tv)
{
@@ -1001,6 +1020,7 @@ seqno_alloc(HOSTPING *host, struct timeval *tv)
if (tv->tv_sec - seqidx[n].tv.tv_sec > MAX_PING_TIMEOUT) {
memcpy(&seqidx[n].tv, tv, sizeof(*tv));
seqidx[n].host = host;
+ seqidx[n].probe_num = probe_num;
seqidx[n].ping_num = host->xmit_count;
next_seqno = (n + 1) % MOD_SEQNO;
return n;
@@ -1011,6 +1031,76 @@ seqno_alloc(HOSTPING *host, struct timeval *tv)
return -1;
}
+/* Check the validity of the echo reply SEQ. Return 0 if the reply is
+ * valid, -1 otherwise.
+ * This function is called when both the seqidx array and the HOSTPING
+ * structure associated with SEQ are locked.
+ */
+static int
+check_reply(int seq)
+{
+ int n = seqidx[seq].ping_num;
+ HOSTPING *host = seqidx[seq].host;
+
+ if (seqidx[seq].probe_num != probe_num) {
+ /* Case 1.
+ * A latecomer reply, which arrived after its probe round
+ * was committed (see hostping_commit).
+ */
+ if (verbose > 1)
+ info("%s: reply for discarded echo request #%d, "
+ "seqno %d; probe_num=%lu, current=%lu",
+ host->name, n, seq, seqidx[seq].probe_num,
+ probe_num);
+ } else if (host->xmit_count == 0) {
+ /* Case 2.
+ * A reply came while no echo requests were transmitted yet.
+ */
+ info("%s: stray reply #%d, seqno %d; probe_num=%lu",
+ host->name, n, seq, probe_num);
+ } else if (n >= 0 && n < ping_count) {
+ if (n > host->xmit_count) {
+ /* Case 3.
+ * Similar to 2, except that some echoes were sent.
+ */
+ error("%s: phantom reply #%d, seqno %d; xmit_count=%lu",
+ host->name, n, seq, host->xmit_count);
+ } else if (++host->nreply[n] > 1) {
+ /* Case 4.
+ * Duplicate reply.
+ */
+ host->dup_count++;
+ info("%s: duplicate reply for echo #%d, seqno %d",
+ host->name, n, seq);
+ } else if (host->recv_count == host->xmit_count) {
+ /* Case 5.
+ * Similar to 2 and 3.
+ * Each echo request was replied to, and yet another
+ * reply arrived, which is not a duplicate.
+ */
+ error("%s: unexpected reply #%d, seqno %d; "
+ "xmit_count=recv_count=%lu",
+ host->name, n, seq, host->xmit_count);
+ }
+ /* Case 6.
+ * This is a valid reply.
+ */
+ return 0;
+ } else {
+ /* Case 7.
+ * A reply with impossible echo request number.
+ * This one should not happen indeed.
+ */
+ error("%s: reply for unregistered echo #%d, seqno %d",
+ host->name, n, seq);
+ }
+ return -1;
+}
+
+/* Given an echo sequence number, return the locked HOSTPING structure
+ * associated with this echo request, or NULL if the reply is
+ * invalid.
+ */
static HOSTPING *
hostping_from_seqno(int seq)
{
@@ -1019,29 +1109,11 @@ hostping_from_seqno(int seq)
pthread_mutex_lock(&seqno_mutex);
host = seqidx[seq].host;
if (host) {
- int n;
- HOSTPING *orig = host;
-
hostping_lock(host);
- n = seqidx[seq].ping_num;
- if (n >= 0 && n < ping_count) {
- if (++host->nreply[n] > 1) {
- host->dup_count++;
- info("%s: duplicate reply for echo #%d, seqno %d",
- host->name, n, seq);
- host = NULL;
- } else if (host->recv_count == host->xmit_count) {
- error("%s: unexpected reply #%d, seqno %d",
- host->name, n, seq);
- host = NULL;
- }
- } else {
- error("%s: duplicate reply for unregistered echo #%d, seqno %d",
- host->name, n, seq);
+ if (check_reply(seq)) {
+ hostping_unlock(host);
host = NULL;
}
- if (!host)
- hostping_unlock(orig);
} else
fatal("no host found for sequence number %d", seq);
pthread_mutex_unlock(&seqno_mutex);
@@ -1192,7 +1264,7 @@ send_echo(HOSTPING *host, unsigned char *ping_buffer)
data_length - sizeof(host->xmit_tv));
buflen = ICMP_HEADER_LEN + data_length;
- if (verbose > 2)
+ if (verbose > 3)
info("sending %zu bytes to %s, icmp_seq=%d",
buflen, host->name,
seqno);
@@ -1378,7 +1450,7 @@ p903_receiver(void *p)
host->recv_count++;
- if (verbose > 1)
+ if (verbose > 2)
log_echo((struct sockaddr *)&addr, addrlen,
icmp, ip, n, rtt);
if (host->recv_count == ping_count)
@@ -1480,6 +1552,7 @@ p903_scheduler(void *p)
/* Commit updates */
p903_update_commit();
+ probe_num++;
send_p = 1;
pthread_cond_broadcast(&sendq_cond);
pthread_mutex_unlock(&sendq_mutex);

Return to:

Send suggestions and report system problems to the System administrator.