diff options
Diffstat (limited to 'src/pinger.c')
-rw-r--r-- | src/pinger.c | 137 |
1 files changed, 105 insertions, 32 deletions
diff --git a/src/pinger.c b/src/pinger.c index 8bb1090..53fedfa 100644 --- a/src/pinger.c +++ b/src/pinger.c | |||
@@ -39,9 +39,13 @@ | |||
39 | #include "json.h" | 39 | #include "json.h" |
40 | #include "defs.h" | 40 | #include "defs.h" |
41 | 41 | ||
42 | /* Time in seconds between two subsequent probes. */ | ||
42 | unsigned long probe_interval = 60; | 43 | unsigned long probe_interval = 60; |
44 | /* Time between two subsequent echo requests within the same probe. */ | ||
43 | unsigned long ping_interval = 1; | 45 | unsigned long ping_interval = 1; |
46 | /* Number of echo requests per probe */ | ||
44 | unsigned long ping_count = 10; | 47 | unsigned long ping_count = 10; |
48 | /* Number of unanswered echo requests after which the host is declared dead. */ | ||
45 | unsigned long ping_tolerance = 3; | 49 | unsigned long ping_tolerance = 3; |
46 | 50 | ||
47 | /* Initial value for the tmin member of struct hostping */ | 51 | /* Initial value for the tmin member of struct hostping */ |
@@ -236,7 +240,6 @@ typedef enum update_type { | |||
236 | static pthread_mutex_t update_mutex = PTHREAD_MUTEX_INITIALIZER; | 240 | static pthread_mutex_t update_mutex = PTHREAD_MUTEX_INITIALIZER; |
237 | static int check_host(char const *name); | 241 | static int check_host(char const *name); |
238 | static int update_add(UPDATE_TYPE t, void *data); | 242 | static int update_add(UPDATE_TYPE t, void *data); |
239 | static void update_commit(void); | ||
240 | 243 | ||
241 | void | 244 | void |
242 | pinger_setup(void) | 245 | pinger_setup(void) |
@@ -965,8 +968,10 @@ static pthread_mutex_t sendq_mutex = PTHREAD_MUTEX_INITIALIZER; | |||
965 | static pthread_cond_t sendq_cond = PTHREAD_COND_INITIALIZER; | 968 | static pthread_cond_t sendq_cond = PTHREAD_COND_INITIALIZER; |
966 | static int send_p; | 969 | static int send_p; |
967 | 970 | ||
968 | static unsigned xmit_total; | 971 | static unsigned long probe_num; /* Ordinal number of the current probe. */ |
969 | static unsigned recv_total; | 972 | /* Totals for the current probe: */ |
973 | static unsigned xmit_total; /* Number of requests transmitted. */ | ||
974 | static unsigned recv_total; /* Number of replies received. */ | ||
970 | 975 | ||
971 | #define ICMP_HEADER_LEN (offsetof(struct icmp, icmp_data)) | 976 | #define ICMP_HEADER_LEN (offsetof(struct icmp, icmp_data)) |
972 | #define PING_DATALEN (64 - ICMP_HEADER_LEN) | 977 | #define PING_DATALEN (64 - ICMP_HEADER_LEN) |
@@ -974,25 +979,39 @@ static unsigned recv_total; | |||
974 | size_t data_length = PING_DATALEN; | 979 | size_t data_length = PING_DATALEN; |
975 | static unsigned char *data_buffer; | 980 | static unsigned char *data_buffer; |
976 | 981 | ||
982 | /* Ping identifier (for the icmp_id member of struct icmp) */ | ||
977 | static int ping_ident; | 983 | static int ping_ident; |
978 | 984 | ||
985 | /* Constants for sequence number database management */ | ||
986 | |||
987 | /* Max. time in seconds after which a sequence database entry can be reused. */ | ||
979 | #define MAX_PING_TIMEOUT 10 | 988 | #define MAX_PING_TIMEOUT 10 |
989 | |||
980 | enum { | 990 | enum { |
981 | MAX_SEQNO = USHRT_MAX, | 991 | MAX_SEQNO = USHRT_MAX, /* Max. value for the sequence number */ |
982 | MOD_SEQNO = MAX_SEQNO + 1 | 992 | MOD_SEQNO = MAX_SEQNO + 1 /* Modulus for computing next sequence |
993 | number. */ | ||
983 | }; | 994 | }; |
984 | 995 | ||
996 | /* Sequence number index entry. */ | ||
985 | struct seqidx { | 997 | struct seqidx { |
986 | HOSTPING *host; | 998 | HOSTPING *host; /* Associated host. */ |
987 | struct timeval tv; | 999 | struct timeval tv; /* Time the echo was sent. */ |
988 | int ping_num; | 1000 | unsigned long probe_num; /* Number of the probe within which the |
1001 | echo was sent. */ | ||
1002 | int ping_num; /* Number of echo request within the probe. */ | ||
989 | }; | 1003 | }; |
990 | 1004 | ||
991 | static struct seqidx *seqidx; | 1005 | static struct seqidx *seqidx; /* Sequence number database. */ |
992 | static unsigned short next_seqno; | 1006 | static unsigned short next_seqno; /* Next sequence number. */ |
993 | 1007 | ||
1008 | /* Protect simultaneous access to seqidx. */ | ||
994 | static pthread_mutex_t seqno_mutex = PTHREAD_MUTEX_INITIALIZER; | 1009 | static pthread_mutex_t seqno_mutex = PTHREAD_MUTEX_INITIALIZER; |
995 | 1010 | ||
1011 | /* Allocate and return the sequence number for the given host and | ||
1012 | * transmission time. | ||
1013 | * Return negative value if the number cannot be allocated. | ||
1014 | */ | ||
996 | static int | 1015 | static int |
997 | seqno_alloc(HOSTPING *host, struct timeval *tv) | 1016 | seqno_alloc(HOSTPING *host, struct timeval *tv) |
998 | { | 1017 | { |
@@ -1001,6 +1020,7 @@ seqno_alloc(HOSTPING *host, struct timeval *tv) | |||
1001 | if (tv->tv_sec - seqidx[n].tv.tv_sec > MAX_PING_TIMEOUT) { | 1020 | if (tv->tv_sec - seqidx[n].tv.tv_sec > MAX_PING_TIMEOUT) { |
1002 | memcpy(&seqidx[n].tv, tv, sizeof(*tv)); | 1021 | memcpy(&seqidx[n].tv, tv, sizeof(*tv)); |
1003 | seqidx[n].host = host; | 1022 | seqidx[n].host = host; |
1023 | seqidx[n].probe_num = probe_num; | ||
1004 | seqidx[n].ping_num = host->xmit_count; | 1024 | seqidx[n].ping_num = host->xmit_count; |
1005 | next_seqno = (n + 1) % MOD_SEQNO; | 1025 | next_seqno = (n + 1) % MOD_SEQNO; |
1006 | return n; | 1026 | return n; |
@@ -1011,6 +1031,76 @@ seqno_alloc(HOSTPING *host, struct timeval *tv) | |||
1011 | return -1; | 1031 | return -1; |
1012 | } | 1032 | } |
1013 | 1033 | ||
1034 | /* Check the validity of the echo reply SEQ. Return 0 if the reply is | ||
1035 | * valid, -1 otherwise. | ||
1036 | * This function is called when both the seqidx array and the HOSTPING | ||
1037 | * structure associated with SEQ are locked. | ||
1038 | */ | ||
1039 | static int | ||
1040 | check_reply(int seq) | ||
1041 | { | ||
1042 | int n = seqidx[seq].ping_num; | ||
1043 | HOSTPING *host = seqidx[seq].host; | ||
1044 | |||
1045 | if (seqidx[seq].probe_num != probe_num) { | ||
1046 | /* Case 1. | ||
1047 | * A latecomer reply, which arrived after its probe round | ||
1048 | * was committed (see hostping_commit). | ||
1049 | */ | ||
1050 | if (verbose > 1) | ||
1051 | info("%s: reply for discarded echo request #%d, " | ||
1052 | "seqno %d; probe_num=%lu, current=%lu", | ||
1053 | host->name, n, seq, seqidx[seq].probe_num, | ||
1054 | probe_num); | ||
1055 | } else if (host->xmit_count == 0) { | ||
1056 | /* Case 2. | ||
1057 | * A reply came while no echo requests were transmitted yet. | ||
1058 | */ | ||
1059 | info("%s: stray reply #%d, seqno %d; probe_num=%lu", | ||
1060 | host->name, n, seq, probe_num); | ||
1061 | } else if (n >= 0 && n < ping_count) { | ||
1062 | if (n > host->xmit_count) { | ||
1063 | /* Case 3. | ||
1064 | * Similar to 2, except that some echoes were sent. | ||
1065 | */ | ||
1066 | error("%s: phantom reply #%d, seqno %d; xmit_count=%lu", | ||
1067 | host->name, n, seq, host->xmit_count); | ||
1068 | } else if (++host->nreply[n] > 1) { | ||
1069 | /* Case 4. | ||
1070 | * Duplicate reply. | ||
1071 | */ | ||
1072 | host->dup_count++; | ||
1073 | info("%s: duplicate reply for echo #%d, seqno %d", | ||
1074 | host->name, n, seq); | ||
1075 | } else if (host->recv_count == host->xmit_count) { | ||
1076 | /* Case 5. | ||
1077 | * Similar to 2 and 3. | ||
1078 | * Each echo request was replied to, and yet another | ||
1079 | * reply arrived, which is not a duplicate. | ||
1080 | */ | ||
1081 | error("%s: unexpected reply #%d, seqno %d; " | ||
1082 | "xmit_count=recv_count=%lu", | ||
1083 | host->name, n, seq, host->xmit_count); | ||
1084 | } | ||
1085 | /* Case 6. | ||
1086 | * This is a valid reply. | ||
1087 | */ | ||
1088 | return 0; | ||
1089 | } else { | ||
1090 | /* Case 7. | ||
1091 | * A reply with impossible echo request number. | ||
1092 | * This one should not happen indeed. | ||
1093 | */ | ||
1094 | error("%s: reply for unregistered echo #%d, seqno %d", | ||
1095 | host->name, n, seq); | ||
1096 | } | ||
1097 | return -1; | ||
1098 | } | ||
1099 | |||
1100 | /* Given an echo sequence number, return the locked HOSTPING structure | ||
1101 | * associated with this echo request, or NULL if the reply is | ||
1102 | * invalid. | ||
1103 | */ | ||
1014 | static HOSTPING * | 1104 | static HOSTPING * |
1015 | hostping_from_seqno(int seq) | 1105 | hostping_from_seqno(int seq) |
1016 | { | 1106 | { |
@@ -1019,29 +1109,11 @@ hostping_from_seqno(int seq) | |||
1019 | pthread_mutex_lock(&seqno_mutex); | 1109 | pthread_mutex_lock(&seqno_mutex); |
1020 | host = seqidx[seq].host; | 1110 | host = seqidx[seq].host; |
1021 | if (host) { | 1111 | if (host) { |
1022 | int n; | ||
1023 | HOSTPING *orig = host; | ||
1024 | |||
1025 | hostping_lock(host); | 1112 | hostping_lock(host); |
1026 | n = seqidx[seq].ping_num; | 1113 | if (check_reply(seq)) { |
1027 | if (n >= 0 && n < ping_count) { | 1114 | hostping_unlock(host); |
1028 | if (++host->nreply[n] > 1) { | ||
1029 | host->dup_count++; | ||
1030 | info("%s: duplicate reply for echo #%d, seqno %d", | ||
1031 | host->name, n, seq); | ||
1032 | host = NULL; | ||
1033 | } else if (host->recv_count == host->xmit_count) { | ||
1034 | error("%s: unexpected reply #%d, seqno %d", | ||
1035 | host->name, n, seq); | ||
1036 | host = NULL; | ||
1037 | } | ||
1038 | } else { | ||
1039 | error("%s: duplicate reply for unregistered echo #%d, seqno %d", | ||
1040 | host->name, n, seq); | ||
1041 | host = NULL; | 1115 | host = NULL; |
1042 | } | 1116 | } |
1043 | if (!host) | ||
1044 | hostping_unlock(orig); | ||
1045 | } else | 1117 | } else |
1046 | fatal("no host found for sequence number %d", seq); | 1118 | fatal("no host found for sequence number %d", seq); |
1047 | pthread_mutex_unlock(&seqno_mutex); | 1119 | pthread_mutex_unlock(&seqno_mutex); |
@@ -1192,7 +1264,7 @@ send_echo(HOSTPING *host, unsigned char *ping_buffer) | |||
1192 | data_length - sizeof(host->xmit_tv)); | 1264 | data_length - sizeof(host->xmit_tv)); |
1193 | buflen = ICMP_HEADER_LEN + data_length; | 1265 | buflen = ICMP_HEADER_LEN + data_length; |
1194 | 1266 | ||
1195 | if (verbose > 2) | 1267 | if (verbose > 3) |
1196 | info("sending %zu bytes to %s, icmp_seq=%d", | 1268 | info("sending %zu bytes to %s, icmp_seq=%d", |
1197 | buflen, host->name, | 1269 | buflen, host->name, |
1198 | seqno); | 1270 | seqno); |
@@ -1378,7 +1450,7 @@ p903_receiver(void *p) | |||
1378 | 1450 | ||
1379 | host->recv_count++; | 1451 | host->recv_count++; |