aboutsummaryrefslogtreecommitdiff
path: root/src/pinger.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/pinger.c')
-rw-r--r--src/pinger.c137
1 files changed, 105 insertions, 32 deletions
diff --git a/src/pinger.c b/src/pinger.c
index 8bb1090..53fedfa 100644
--- a/src/pinger.c
+++ b/src/pinger.c
@@ -39,9 +39,13 @@
39#include "json.h" 39#include "json.h"
40#include "defs.h" 40#include "defs.h"
41 41
42/* Time in seconds between two subsequent probes. */
42unsigned long probe_interval = 60; 43unsigned long probe_interval = 60;
44/* Time between two subsequent echo requests within the same probe. */
43unsigned long ping_interval = 1; 45unsigned long ping_interval = 1;
46/* Number of echo requests per probe */
44unsigned long ping_count = 10; 47unsigned long ping_count = 10;
48/* Number of unanswered echo requests after which the host is declared dead. */
45unsigned long ping_tolerance = 3; 49unsigned long ping_tolerance = 3;
46 50
47/* Initial value for the tmin member of struct hostping */ 51/* Initial value for the tmin member of struct hostping */
@@ -236,7 +240,6 @@ typedef enum update_type {
236static pthread_mutex_t update_mutex = PTHREAD_MUTEX_INITIALIZER; 240static pthread_mutex_t update_mutex = PTHREAD_MUTEX_INITIALIZER;
237static int check_host(char const *name); 241static int check_host(char const *name);
238static int update_add(UPDATE_TYPE t, void *data); 242static int update_add(UPDATE_TYPE t, void *data);
239static void update_commit(void);
240 243
241void 244void
242pinger_setup(void) 245pinger_setup(void)
@@ -965,8 +968,10 @@ static pthread_mutex_t sendq_mutex = PTHREAD_MUTEX_INITIALIZER;
965static pthread_cond_t sendq_cond = PTHREAD_COND_INITIALIZER; 968static pthread_cond_t sendq_cond = PTHREAD_COND_INITIALIZER;
966static int send_p; 969static int send_p;
967 970
968static unsigned xmit_total; 971static unsigned long probe_num; /* Ordinal number of the current probe. */
969static unsigned recv_total; 972/* Totals for the current probe: */
973static unsigned xmit_total; /* Number of requests transmitted. */
974static unsigned recv_total; /* Number of replies received. */
970 975
971#define ICMP_HEADER_LEN (offsetof(struct icmp, icmp_data)) 976#define ICMP_HEADER_LEN (offsetof(struct icmp, icmp_data))
972#define PING_DATALEN (64 - ICMP_HEADER_LEN) 977#define PING_DATALEN (64 - ICMP_HEADER_LEN)
@@ -974,25 +979,39 @@ static unsigned recv_total;
974size_t data_length = PING_DATALEN; 979size_t data_length = PING_DATALEN;
975static unsigned char *data_buffer; 980static unsigned char *data_buffer;
976 981
982/* Ping identifier (for the icmp_id member of struct icmp) */
977static int ping_ident; 983static int ping_ident;
978 984
985/* Constants for sequence number database management */
986
987/* Max. time in seconds after which a sequence database entry can be reused. */
979#define MAX_PING_TIMEOUT 10 988#define MAX_PING_TIMEOUT 10
989
980enum { 990enum {
981 MAX_SEQNO = USHRT_MAX, 991 MAX_SEQNO = USHRT_MAX, /* Max. value for the sequence number */
982 MOD_SEQNO = MAX_SEQNO + 1 992 MOD_SEQNO = MAX_SEQNO + 1 /* Modulus for computing next sequence
993 number. */
983}; 994};
984 995
996/* Sequence number index entry. */
985struct seqidx { 997struct seqidx {
986 HOSTPING *host; 998 HOSTPING *host; /* Associated host. */
987 struct timeval tv; 999 struct timeval tv; /* Time the echo was sent. */
988 int ping_num; 1000 unsigned long probe_num; /* Number of the probe within which the
1001 echo was sent. */
1002 int ping_num; /* Number of echo request within the probe. */
989}; 1003};
990 1004
991static struct seqidx *seqidx; 1005static struct seqidx *seqidx; /* Sequence number database. */
992static unsigned short next_seqno; 1006static unsigned short next_seqno; /* Next sequence number. */
993 1007
1008/* Protect simultaneous access to seqidx. */
994static pthread_mutex_t seqno_mutex = PTHREAD_MUTEX_INITIALIZER; 1009static pthread_mutex_t seqno_mutex = PTHREAD_MUTEX_INITIALIZER;
995 1010
1011/* Allocate and return the sequence number for the given host and
1012 * transmission time.
1013 * Return negative value if the number cannot be allocated.
1014 */
996static int 1015static int
997seqno_alloc(HOSTPING *host, struct timeval *tv) 1016seqno_alloc(HOSTPING *host, struct timeval *tv)
998{ 1017{
@@ -1001,6 +1020,7 @@ seqno_alloc(HOSTPING *host, struct timeval *tv)
1001 if (tv->tv_sec - seqidx[n].tv.tv_sec > MAX_PING_TIMEOUT) { 1020 if (tv->tv_sec - seqidx[n].tv.tv_sec > MAX_PING_TIMEOUT) {
1002 memcpy(&seqidx[n].tv, tv, sizeof(*tv)); 1021 memcpy(&seqidx[n].tv, tv, sizeof(*tv));
1003 seqidx[n].host = host; 1022 seqidx[n].host = host;
1023 seqidx[n].probe_num = probe_num;
1004 seqidx[n].ping_num = host->xmit_count; 1024 seqidx[n].ping_num = host->xmit_count;
1005 next_seqno = (n + 1) % MOD_SEQNO; 1025 next_seqno = (n + 1) % MOD_SEQNO;
1006 return n; 1026 return n;
@@ -1011,6 +1031,76 @@ seqno_alloc(HOSTPING *host, struct timeval *tv)
1011 return -1; 1031 return -1;
1012} 1032}
1013 1033
1034/* Check the validity of the echo reply SEQ. Return 0 if the reply is
1035 * valid, -1 otherwise.
1036 * This function is called when both the seqidx array and the HOSTPING
1037 * structure associated with SEQ are locked.
1038 */
1039static int
1040check_reply(int seq)
1041{
1042 int n = seqidx[seq].ping_num;
1043 HOSTPING *host = seqidx[seq].host;
1044
1045 if (seqidx[seq].probe_num != probe_num) {
1046 /* Case 1.
1047 * A latecomer reply, which arrived after its probe round
1048 * was committed (see hostping_commit).
1049 */
1050 if (verbose > 1)
1051 info("%s: reply for discarded echo request #%d, "
1052 "seqno %d; probe_num=%lu, current=%lu",
1053 host->name, n, seq, seqidx[seq].probe_num,
1054 probe_num);
1055 } else if (host->xmit_count == 0) {
1056 /* Case 2.
1057 * A reply came while no echo requests were transmitted yet.
1058 */
1059 info("%s: stray reply #%d, seqno %d; probe_num=%lu",
1060 host->name, n, seq, probe_num);
1061 } else if (n >= 0 && n < ping_count) {
1062 if (n > host->xmit_count) {
1063 /* Case 3.
1064 * Similar to 2, except that some echoes were sent.
1065 */
1066 error("%s: phantom reply #%d, seqno %d; xmit_count=%lu",
1067 host->name, n, seq, host->xmit_count);
1068 } else if (++host->nreply[n] > 1) {
1069 /* Case 4.
1070 * Duplicate reply.
1071 */
1072 host->dup_count++;
1073 info("%s: duplicate reply for echo #%d, seqno %d",
1074 host->name, n, seq);
1075 } else if (host->recv_count == host->xmit_count) {
1076 /* Case 5.
1077 * Similar to 2 and 3.
1078 * Each echo request was replied to, and yet another
1079 * reply arrived, which is not a duplicate.
1080 */
1081 error("%s: unexpected reply #%d, seqno %d; "
1082 "xmit_count=recv_count=%lu",
1083 host->name, n, seq, host->xmit_count);
1084 }
1085 /* Case 6.
1086 * This is a valid reply.
1087 */
1088 return 0;
1089 } else {
1090 /* Case 7.
1091 * A reply with impossible echo request number.
1092 * This one should not happen indeed.
1093 */
1094 error("%s: reply for unregistered echo #%d, seqno %d",
1095 host->name, n, seq);
1096 }
1097 return -1;
1098}
1099
1100/* Given an echo sequence number, return the locked HOSTPING structure
1101 * associated with this echo request, or NULL if the reply is
1102 * invalid.
1103 */
1014static HOSTPING * 1104static HOSTPING *
1015hostping_from_seqno(int seq) 1105hostping_from_seqno(int seq)
1016{ 1106{
@@ -1019,29 +1109,11 @@ hostping_from_seqno(int seq)
1019 pthread_mutex_lock(&seqno_mutex); 1109 pthread_mutex_lock(&seqno_mutex);
1020 host = seqidx[seq].host; 1110 host = seqidx[seq].host;
1021 if (host) { 1111 if (host) {
1022 int n;
1023 HOSTPING *orig = host;
1024
1025 hostping_lock(host); 1112 hostping_lock(host);
1026 n = seqidx[seq].ping_num; 1113 if (check_reply(seq)) {
1027 if (n >= 0 && n < ping_count) { 1114 hostping_unlock(host);
1028 if (++host->nreply[n] > 1) {
1029 host->dup_count++;
1030 info("%s: duplicate reply for echo #%d, seqno %d",
1031 host->name, n, seq);
1032 host = NULL;
1033 } else if (host->recv_count == host->xmit_count) {
1034 error("%s: unexpected reply #%d, seqno %d",
1035 host->name, n, seq);
1036 host = NULL;
1037 }
1038 } else {
1039 error("%s: duplicate reply for unregistered echo #%d, seqno %d",
1040 host->name, n, seq);
1041 host = NULL; 1115 host = NULL;
1042 } 1116 }
1043 if (!host)
1044 hostping_unlock(orig);
1045 } else 1117 } else
1046 fatal("no host found for sequence number %d", seq); 1118 fatal("no host found for sequence number %d", seq);
1047 pthread_mutex_unlock(&seqno_mutex); 1119 pthread_mutex_unlock(&seqno_mutex);
@@ -1192,7 +1264,7 @@ send_echo(HOSTPING *host, unsigned char *ping_buffer)
1192 data_length - sizeof(host->xmit_tv)); 1264 data_length - sizeof(host->xmit_tv));
1193 buflen = ICMP_HEADER_LEN + data_length; 1265 buflen = ICMP_HEADER_LEN + data_length;
1194 1266
1195 if (verbose > 2) 1267 if (verbose > 3)
1196 info("sending %zu bytes to %s, icmp_seq=%d", 1268 info("sending %zu bytes to %s, icmp_seq=%d",
1197 buflen, host->name, 1269 buflen, host->name,
1198 seqno); 1270 seqno);
@@ -1378,7 +1450,7 @@ p903_receiver(void *p)
1378 1450
1379 host->recv_count++; 1451 host->recv_count++;