aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/ping903.811
-rw-r--r--src/pinger.c137
2 files changed, 114 insertions, 34 deletions
diff --git a/doc/ping903.8 b/doc/ping903.8
index bde1f51..9349fca 100644
--- a/doc/ping903.8
+++ b/doc/ping903.8
@@ -13,7 +13,7 @@
13.\" 13.\"
14.\" You should have received a copy of the GNU General Public License 14.\" You should have received a copy of the GNU General Public License
15.\" along with Ping903. If not, see <http://www.gnu.org/licenses/>. 15.\" along with Ping903. If not, see <http://www.gnu.org/licenses/>.
16.TH PING903 8 "March 10, 2020" "PING903" "System Administration" 16.TH PING903 8 "March 11, 2020" "PING903" "System Administration"
17.SH NAME 17.SH NAME
18ping903 \- high-performance ICMP monitoring daemon 18ping903 \- high-performance ICMP monitoring daemon
19.SH SYNOPSIS 19.SH SYNOPSIS
@@ -139,7 +139,14 @@ Don't start supervisor process.
139Print program version, copyright information, and exit. 139Print program version, copyright information, and exit.
140.TP 140.TP
141.B \-v 141.B \-v
142Turn on additional logging. 142Turn on additional logging. This option can be given several times to
143request more verbose output. If given single \fB\-v\fR option, the
144program prints at the end of each probe the total number of echo
145requests sent and replies received. Two options (\fB\-vv\fR), enable
146additional diagnostics of invalid echo replies. Three options enable
147logging of each received echo reply, and four options enable verbose
148logging of each echo request sent. Notice that three or more
149\fB\-v\fR options can produce huge amount of logs.
143.SH BUGS 150.SH BUGS
144Only IPv4 is currently supported. 151Only IPv4 is currently supported.
145.SH SEE ALSO 152.SH SEE ALSO
diff --git a/src/pinger.c b/src/pinger.c
index 8bb1090..53fedfa 100644
--- a/src/pinger.c
+++ b/src/pinger.c
@@ -39,9 +39,13 @@
39#include "json.h" 39#include "json.h"
40#include "defs.h" 40#include "defs.h"
41 41
42/* Time in seconds between two subsequent probes. */
42unsigned long probe_interval = 60; 43unsigned long probe_interval = 60;
44/* Time between two subsequent echo requests within the same probe. */
43unsigned long ping_interval = 1; 45unsigned long ping_interval = 1;
46/* Number of echo requests per probe */
44unsigned long ping_count = 10; 47unsigned long ping_count = 10;
48/* Number of unanswered echo requests after which the host is declared dead. */
45unsigned long ping_tolerance = 3; 49unsigned long ping_tolerance = 3;
46 50
47/* Initial value for the tmin member of struct hostping */ 51/* Initial value for the tmin member of struct hostping */
@@ -236,7 +240,6 @@ typedef enum update_type {
236static pthread_mutex_t update_mutex = PTHREAD_MUTEX_INITIALIZER; 240static pthread_mutex_t update_mutex = PTHREAD_MUTEX_INITIALIZER;
237static int check_host(char const *name); 241static int check_host(char const *name);
238static int update_add(UPDATE_TYPE t, void *data); 242static int update_add(UPDATE_TYPE t, void *data);
239static void update_commit(void);
240 243
241void 244void
242pinger_setup(void) 245pinger_setup(void)
@@ -965,8 +968,10 @@ static pthread_mutex_t sendq_mutex = PTHREAD_MUTEX_INITIALIZER;
965static pthread_cond_t sendq_cond = PTHREAD_COND_INITIALIZER; 968static pthread_cond_t sendq_cond = PTHREAD_COND_INITIALIZER;
966static int send_p; 969static int send_p;
967 970
968static unsigned xmit_total; 971static unsigned long probe_num; /* Ordinal number of the current probe. */
969static unsigned recv_total; 972/* Totals for the current probe: */
973static unsigned xmit_total; /* Number of requests transmitted. */
974static unsigned recv_total; /* Number of replies received. */
970 975
971#define ICMP_HEADER_LEN (offsetof(struct icmp, icmp_data)) 976#define ICMP_HEADER_LEN (offsetof(struct icmp, icmp_data))
972#define PING_DATALEN (64 - ICMP_HEADER_LEN) 977#define PING_DATALEN (64 - ICMP_HEADER_LEN)
@@ -974,25 +979,39 @@ static unsigned recv_total;
974size_t data_length = PING_DATALEN; 979size_t data_length = PING_DATALEN;
975static unsigned char *data_buffer; 980static unsigned char *data_buffer;
976 981
982/* Ping identifier (for the icmp_id member of struct icmp) */
977static int ping_ident; 983static int ping_ident;
978 984
985/* Constants for sequence number database management */
986
987/* Max. time in seconds after which a sequence database entry can be reused. */
979#define MAX_PING_TIMEOUT 10 988#define MAX_PING_TIMEOUT 10
989
980enum { 990enum {
981 MAX_SEQNO = USHRT_MAX, 991 MAX_SEQNO = USHRT_MAX, /* Max. value for the sequence number */
982 MOD_SEQNO = MAX_SEQNO + 1 992 MOD_SEQNO = MAX_SEQNO + 1 /* Modulus for computing next sequence
993 number. */
983}; 994};
984 995
996/* Sequence number index entry. */
985struct seqidx { 997struct seqidx {
986 HOSTPING *host; 998 HOSTPING *host; /* Associated host. */
987 struct timeval tv; 999 struct timeval tv; /* Time the echo was sent. */
988 int ping_num; 1000 unsigned long probe_num; /* Number of the probe within which the
1001 echo was sent. */
1002 int ping_num; /* Number of echo request within the probe. */
989}; 1003};
990 1004
991static struct seqidx *seqidx; 1005static struct seqidx *seqidx; /* Sequence number database. */
992static unsigned short next_seqno; 1006static unsigned short next_seqno; /* Next sequence number. */
993 1007
1008/* Protect simultaneous access to seqidx. */
994static pthread_mutex_t seqno_mutex = PTHREAD_MUTEX_INITIALIZER; 1009static pthread_mutex_t seqno_mutex = PTHREAD_MUTEX_INITIALIZER;
995 1010
1011/* Allocate and return the sequence number for the given host and
1012 * transmission time.
1013 * Return negative value if the number cannot be allocated.
1014 */
996static int 1015static int
997seqno_alloc(HOSTPING *host, struct timeval *tv) 1016seqno_alloc(HOSTPING *host, struct timeval *tv)
998{ 1017{
@@ -1001,6 +1020,7 @@ seqno_alloc(HOSTPING *host, struct timeval *tv)
1001 if (tv->tv_sec - seqidx[n].tv.tv_sec > MAX_PING_TIMEOUT) { 1020 if (tv->tv_sec - seqidx[n].tv.tv_sec > MAX_PING_TIMEOUT) {
1002 memcpy(&seqidx[n].tv, tv, sizeof(*tv)); 1021 memcpy(&seqidx[n].tv, tv, sizeof(*tv));
1003 seqidx[n].host = host; 1022 seqidx[n].host = host;
1023 seqidx[n].probe_num = probe_num;
1004 seqidx[n].ping_num = host->xmit_count; 1024 seqidx[n].ping_num = host->xmit_count;
1005 next_seqno = (n + 1) % MOD_SEQNO; 1025 next_seqno = (n + 1) % MOD_SEQNO;
1006 return n; 1026 return n;
@@ -1011,6 +1031,76 @@ seqno_alloc(HOSTPING *host, struct timeval *tv)
1011 return -1; 1031 return -1;
1012} 1032}
1013 1033
1034/* Check the validity of the echo reply SEQ. Return 0 if the reply is
1035 * valid, -1 otherwise.
1036 * This function is called when both the seqidx array and the HOSTPING
1037 * structure associated with SEQ are locked.
1038 */
1039static int
1040check_reply(int seq)
1041{
1042 int n = seqidx[seq].ping_num;
1043 HOSTPING *host = seqidx[seq].host;
1044
1045 if (seqidx[seq].probe_num != probe_num) {
1046 /* Case 1.
1047 * A latecomer reply, which arrived after its probe round
1048 * was committed (see hostping_commit).
1049 */
1050 if (verbose > 1)
1051 info("%s: reply for discarded echo request #%d, "
1052 "seqno %d; probe_num=%lu, current=%lu",
1053 host->name, n, seq, seqidx[seq].probe_num,
1054 probe_num);
1055 } else if (host->xmit_count == 0) {
1056 /* Case 2.
1057 * A reply came while no echo requests were transmitted yet.
1058 */
1059 info("%s: stray reply #%d, seqno %d; probe_num=%lu",
1060 host->name, n, seq, probe_num);
1061 } else if (n >= 0 && n < ping_count) {
1062 if (n > host->xmit_count) {
1063 /* Case 3.
1064 * Similar to 2, except that some echoes were sent.
1065 */
1066 error("%s: phantom reply #%d, seqno %d; xmit_count=%lu",
1067 host->name, n, seq, host->xmit_count);
1068 } else if (++host->nreply[n] > 1) {
1069 /* Case 4.
1070 * Duplicate reply.
1071 */
1072 host->dup_count++;
1073 info("%s: duplicate reply for echo #%d, seqno %d",
1074 host->name, n, seq);
1075 } else if (host->recv_count == host->xmit_count) {
1076 /* Case 5.
1077 * Similar to 2 and 3.
1078 * Each echo request was replied to, and yet another
1079 * reply arrived, which is not a duplicate.
1080 */
1081 error("%s: unexpected reply #%d, seqno %d; "
1082 "xmit_count=recv_count=%lu",
1083 host->name, n, seq, host->xmit_count);
1084 }
1085 /* Case 6.
1086 * This is a valid reply.
1087 */
1088 return 0;
1089 } else {
1090 /* Case 7.
1091 * A reply with impossible echo request number.
1092 * This one should not happen indeed.
1093 */
1094 error("%s: reply for unregistered echo #%d, seqno %d",
1095 host->name, n, seq);
1096 }
1097 return -1;
1098}
1099
1100/* Given an echo sequence number, return the locked HOSTPING structure
1101 * associated with this echo request, or NULL if the reply is
1102 * invalid.
1103 */
1014static HOSTPING * 1104static HOSTPING *
1015hostping_from_seqno(int seq) 1105hostping_from_seqno(int seq)
1016{ 1106{
@@ -1019,29 +1109,11 @@ hostping_from_seqno(int seq)
1019 pthread_mutex_lock(&seqno_mutex); 1109 pthread_mutex_lock(&seqno_mutex);
1020 host = seqidx[seq].host; 1110 host = seqidx[seq].host;
1021 if (host) { 1111 if (host) {
1022 int n;
1023 HOSTPING *orig = host;
1024
1025 hostping_lock(host); 1112 hostping_lock(host);
1026 n = seqidx[seq].ping_num; 1113 if (check_reply(seq)) {
1027 if (n >= 0 && n < ping_count) { 1114 hostping_unlock(host);
1028 if (++host->nreply[n] > 1) {
1029 host->dup_count++;
1030 info("%s: duplicate reply for echo #%d, seqno %d",
1031 host->name, n, seq);
1032 host = NULL;
1033 } else if (host->recv_cou