aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org.ua>2014-10-16 16:57:56 +0300
committerSergey Poznyakoff <gray@gnu.org.ua>2014-10-16 17:01:24 +0300
commit4005897a75d88355c6da8513bf7a4a13c301c97a (patch)
treee28fb6995ee2f995ef74cd44a09d5fb07419b495
parentb51b2856515a92fad170b91f9788798e288420ef (diff)
downloaddnstools-4005897a75d88355c6da8513bf7a4a13c301c97a.tar.gz
dnstools-4005897a75d88355c6da8513bf7a4a13c301c97a.tar.bz2
whoseip: fix caching algorithm
* whoseip/Whoseip/DB.pm: Fix caching algorithm, improve debugging. Close all open databases before terminating. * whoseip/whoseip.pl: Implement ${source} and ${item} macro variables. Document new options and variables.
-rw-r--r--whoseip/Whoseip/DB.pm224
-rw-r--r--whoseip/whoseip.pl33
2 files changed, 197 insertions, 60 deletions
diff --git a/whoseip/Whoseip/DB.pm b/whoseip/Whoseip/DB.pm
index 0abc953..f6b08f0 100644
--- a/whoseip/Whoseip/DB.pm
+++ b/whoseip/Whoseip/DB.pm
@@ -162,4 +162,23 @@ use constant LEAF_IDX => 256;
+sub pagetypestr {
+ my $t = shift;
+ return "index" if ($t == IPDB_PAGE_INDEX);
+ return "leaf" if ($t == IPDB_PAGE_LEAF);
+ return $t;
+}
+
sub systell { sysseek($_[0], 0, SEEK_CUR) }
+my @ipdb_open_files;
+
+sub ipdb_close_all {
+ foreach my $file (@ipdb_open_files) {
+ ipdb_close($file) if defined($file->{fd});
+ }
+}
+
+END {
+ ipdb_close_all();
+}
+
=pod
@@ -223,3 +242,3 @@ sub ipdb_open {
print STDERR "ROOTIDX $tab[$i]=$tab[$i+1]\n"
- if $_{debug};
+ if $_{debug} > 1;
}
@@ -243,3 +262,3 @@ sub ipdb_open {
$ipdbfile{debug} = $_{debug};
- if ($ipdbfile{debug}) {
+ if ($ipdbfile{debug} > 1) {
my $ug = new Data::UUID;
@@ -247,2 +266,3 @@ sub ipdb_open {
}
+ push @ipdb_open_files, \%ipdbfile;
return \%ipdbfile;
@@ -328,3 +348,3 @@ sub ipdb_locker {
print STDERR "ROOTIDX $tab[$i]=$tab[$i+1]\n"
- if $_{debug};
+ if $_{debug} > 1;
}
@@ -349,3 +369,3 @@ sub ipdb_save_page($$) {
join(',', @{$page->{tab}})."\n"
- if $dbf->{debug};
+ if $dbf->{debug} > 1;
$ret = syswrite($dbf->{fd}, pack('LL[257].',
@@ -355,30 +375,54 @@ sub ipdb_save_page($$) {
$dbf->{pagesize});
+ croak "$dbf->{file}: write error at $page->{off}: $ret: $!"
+ unless ($ret == $dbf->{pagesize});
+
+ delete $page->{dirty};
} elsif ($page->{type} == IPDB_PAGE_LEAF) {
- print STDERR "saving leaf page $page->{off}\n"
- if $dbf->{debug};
- my @a;
- my $size = length(pack('LLL',0,0,0));
- my $i = 0;
- foreach my $ent (@{$page->{tab}}) {
- my $x = pack('LLLa2L/a', @{$ent}[0 .. 3],freeze($ent->[4]));
- my $l = length($x);
- if ($size + $l > $dbf->{pagesize}) {
- my $p = ipdb_alloc_page($dbf, IPDB_PAGE_LEAF);
- $page->{next} = $p->{off};
- $p->{tab} = @{$page->{tab}}[$i .. $#{$page->{tab}}];
- $p->{dirty} = 1;
- splice @{$page->{tab}}, $i;
- last;
+ my $nextpage;
+ do {
+ print STDERR "saving leaf page $page->{off}\n"
+ if $dbf->{debug} > 1;
+ my $size = length(pack('LLL',0,0,0));
+ my $i = 0;
+ my $a;
+ foreach my $ent (@{$page->{tab}}) {
+ my $fdata = eval { freeze($ent->[4]) };
+ if ($@) {
+ print STDERR "failed to freeze data for " .
+ inet_ntoa(pack('N', $ent->[0])) . "/" .
+ inet_ntoa(pack('N', $ent->[1])) . ":".
+ $ent->[3] ."\n";
+ exit;
+ }
+ my $x = pack('LLLa2L/a', @{$ent}[0 .. 3],$fdata);
+ my $l = length($x);
+ if ($size + $l > $dbf->{pagesize}) {
+ print STDERR "SPLIT at $i: $size + $l, rest ".
+ ($#{$page->{tab}}-$i+1)."\n"
+ if $dbf->{debug} > 1;
+ $nextpage = ipdb_alloc_page($dbf, IPDB_PAGE_LEAF,
+ nocache => 1);
+ $page->{next} = $nextpage->{off};
+ @{$nextpage->{tab}} = @{$page->{tab}}[$i .. $#{$page->{tab}}];
+ $nextpage->{dirty} = 1;
+ splice @{$page->{tab}}, $i;
+ last;
+ }
+ $size += $l;
+ $a .= $x;
+ ++$i;
}
- $size += $l;
- push @a, $x;
- } continue {
- ++$i;
- }
- $ret = syswrite($dbf->{fd},
- pack('LLLa*@'.$dbf->{pagesize},
- $page->{type},
- $#a + 1,
- $page->{next},
- @a));
+ $ret = syswrite($dbf->{fd},
+ pack('LLLa'.length($a).'@'.$dbf->{pagesize},
+ $page->{type},
+ $i,
+ $page->{next},
+ $a));
+ croak "$dbf->{file}: write error at $page->{off}: $ret: $!"
+ unless ($ret == $dbf->{pagesize});
+ delete $page->{dirty};
+ $page = $nextpage;
+ $nextpage = undef;
+ } while (defined($page));
+
} else {
@@ -386,6 +430,2 @@ sub ipdb_save_page($$) {
}
- croak "$dbf->{file}: write error at $page->{off}: $ret: $!"
- unless ($ret == $dbf->{pagesize});
-
- delete $page->{dirty};
}
@@ -416,5 +456,36 @@ sub ipdb_cache_put($$) {
$dbf->{pagecache}{lru_newest} = $page;
+ $dbf->{pagecache}{lru_oldest} = $page
+ unless defined $dbf->{pagecache}{lru_oldest};
$dbf->{pagecache}{$page->{off}} = $page;
+ dump_lru($dbf, "put $page->{off}");
}
+sub dump_lru {
+ my ($dbf,$pfx) = @_;
+
+ return unless $dbf->{debug} > 2;
+
+ my $x = $dbf->{pagecache}{lru_oldest};
+ print STDERR "DUMP $pfx\n";
+ print STDERR "KEYS: ".join(',', sort keys %{$dbf->{pagecache}})."\n";
+ while (defined($x)) {
+ print STDERR "==> $x->{off} (".pagetypestr($x->{type}).",";
+
+ if (defined($x->{lru_newer})) {
+ print STDERR $x->{lru_newer}{off};
+ } else {
+ print STDERR "NIL";
+ }
+ print STDERR ",";
+ if (defined($x->{lru_older})) {
+ print STDERR $x->{lru_older}{off};
+ } else {
+ print STDERR "NIL";
+ }
+ print STDERR ")\n";
+ $x = $x->{lru_newer};
+ }
+ print STDERR "END\n";
+}
+
sub ipdb_cache_get($$) {
@@ -423,15 +494,28 @@ sub ipdb_cache_get($$) {
if (defined($dbf->{pagecache}{$off})) {
+ print STDERR "$off found in cache\n" if $dbf->{debug};
$page = $dbf->{pagecache}{$off};
- # promote the page
- if (defined($page->{lru_older})) {
- $page->{lru_older}{lru_newer} = $page->{lru_newer};
- } else {
- # It was the oldest page
- $dbf->{pagecache}{lru_oldest} = $page->{lru_newer};
- }
if (defined($page->{lru_newer})) {
+ print STDERR "promoting $page->{off}\n" if $dbf->{debug} > 2;
+ # promote the page
+ if (defined($page->{lru_older})) {
+ $page->{lru_older}{lru_newer} = $page->{lru_newer};
+ } else {
+ # It was the oldest page
+ $dbf->{pagecache}{lru_oldest} = $page->{lru_newer};
+ }
+
$page->{lru_newer}{lru_older} = $page->{lru_older};
- }
- $dbf->{pagecache}{lru_newest} = $page;
+
+ $page->{lru_older} = $dbf->{pagecache}{lru_newest};
+ $dbf->{pagecache}{lru_newest}{lru_newer} = $page;
+
+ $dbf->{pagecache}{lru_newest} = $page;
+
+ $page->{lru_newer} = undef;
+ $dbf->{pagecache}{lru_oldest} = $page
+ unless defined $dbf->{pagecache}{lru_oldest};
+ dump_lru($dbf, "after promoting $page->{off}");
+ }
} else {
+ print STDERR "$off NOT found in cache\n" if $dbf->{debug};
$page = ipdb_get_page($dbf, $off);
@@ -489,2 +573,3 @@ sub ipdb_close($) {
close $dbf->{fd};
+ delete $dbf->{fd};
}
@@ -509,3 +594,3 @@ sub ipdb_get_page($$) {
if ($ret{type} == IPDB_PAGE_INDEX) {
- print STDERR "found index page at $off\n" if $dbf->{debug};
+ print STDERR "found index page at $off\n" if $dbf->{debug} > 3;
my ($x, @a) = unpack('LL257', $s);
@@ -516,6 +601,14 @@ sub ipdb_get_page($$) {
print STDERR "found leaf page at $off, has $nent entries\n"
- if $dbf->{debug};
+ if $dbf->{debug} > 3;
my ($x1, $x2, $x3, @a) = unpack("LLL(LLLa2L/a)$nent", $s);
- for (my $i = 0; $i < $nent; $i += 5) {
- push @{$ret{tab}}, [ @a[$i .. $i+3], thaw $a[$i+4] ];
+ for (my $i = 0; $i < $nent; $i++) {
+ my $href = thaw $a[$i*5 + 4];
+ if ($dbf->{debug} > 3) {
+ print STDERR "[$i] = ".join(' ', @a[$i*5 .. $i*5 + 3]).'; (';
+ while (my ($k,$v) = each %{$href}) {
+ print STDERR "$k => $v, ";
+ }
+ print STDERR ")\n";
+ }
+ push @{$ret{tab}}, [ @a[$i*5 .. $i*5 + 3], $href ];
}
@@ -531,3 +624,4 @@ sub ipdb_alloc_page($$) {
my %page;
-
+ local %_ = @_;
+
$page{type} = $type;
@@ -540,4 +634,8 @@ sub ipdb_alloc_page($$) {
}
+
+ print STDERR "new ".pagetypestr($type)." page at $page{off}\n"
+ if $dbf->{debug};
+
$page{dirty} = 1;
- ipdb_cache_put($dbf, \%page);
+ ipdb_cache_put($dbf, \%page) unless ($_{nocache});
++$dbf->{modified};
@@ -553,3 +651,3 @@ sub ipdb_get_root_page($$) {
print STDERR "root page for $nbits: created at $p->{off}\n"
- if $dbf->{debug};
+ if $dbf->{debug} > 2;
$dbf->{rootidx}{$nbits} = $p->{off};
@@ -558,3 +656,3 @@ sub ipdb_get_root_page($$) {
print STDERR "root page for $nbits: $dbf->{rootidx}{$nbits}\n"
- if $dbf->{debug};
+ if $dbf->{debug} > 2;
$p = ipdb_cache_get($dbf, $dbf->{rootidx}{$nbits});
@@ -616,3 +714,3 @@ sub ipdb_lookup_unlocked($$) {
join(',', @{$page->{tab}})."\n"
- if $dbf->{debug};
+ if $dbf->{debug} > 1;
print STDERR "ipdb_lookup: octet ${n}=$ipo[$n], off=$page->{tab}[$ipo[$n]]\n"
@@ -629,8 +727,14 @@ sub ipdb_lookup_unlocked($$) {
foreach my $r (@{$page->{tab}}) {
+ print STDERR "ipdb_lookup: compare ($ipn & $r->[1]) == $r->[0]\n"
+ if $dbf->{debug};
if (($ipn & $r->[1]) == $r->[0]) {
# FIXME: check timestamp
- return ( ( country => $r->[3],
- network => inet_ntoa(pack('N', $r->[0])),
- netmask => inet_ntoa(pack('N', $r->[1])) ),
- %{$r->[4]});
+ print STDERR "ipdb_lookup: MATCH $r->[3]\n"
+ if $dbf->{debug};
+ my %res = ( country => $r->[3],
+ network => inet_ntoa(pack('N', $r->[0])),
+ netmask => inet_ntoa(pack('N', $r->[1])) );
+ @res{keys %{$r->[4]}} = values %{$r->[4]}
+ if (defined($r->[4]) and ref($r->[4]) eq 'HASH');
+ return %res;
}
@@ -691,2 +795,4 @@ sub ipdb_insert_unlocked {
+ print STDERR "inserting $cidr $country\n" if $dbf->{debug};
+
my $n = int($masklen / 8);
@@ -696,2 +802,4 @@ sub ipdb_insert_unlocked {
if ($page->{tab}[$ipo[$i]]) {
+ print STDERR "ipdb_insert: octet ${i}=$ipo[$i], off=$page->{tab}[$ipo[$i]]\n"
+ if $dbf->{debug};
$page = ipdb_cache_get($dbf, $page->{tab}[$ipo[$i]]);
@@ -706,2 +814,4 @@ sub ipdb_insert_unlocked {
if ($page->{tab}[LEAF_IDX]) {
+ print STDERR "ipdb_insert: loading leaf page from $page->{tab}[LEAF_IDX]\n"
+ if $dbf->{debug};
$page = ipdb_cache_get($dbf, $page->{tab}[LEAF_IDX]);
@@ -710,2 +820,4 @@ sub ipdb_insert_unlocked {
} else {
+ print STDERR "ipdb_insert: creating leaf page\n"
+ if $dbf->{debug};
my $p = ipdb_alloc_page($dbf, IPDB_PAGE_LEAF);
diff --git a/whoseip/whoseip.pl b/whoseip/whoseip.pl
index 9ca899e..d64280e 100644
--- a/whoseip/whoseip.pl
+++ b/whoseip/whoseip.pl
@@ -460,2 +460,4 @@ sub serve {
$res{count} = range2count($res{range});
+ $res{term} = $term;
+ $res{source} = 'CACHE';
return %res;
@@ -493,2 +495,3 @@ sub serve {
}
+ $res{source} = 'QUERY';
$res{term} = $term;
@@ -760,6 +763,6 @@ if ($fastcgi) {
}
+ my $n = 1;
while (<>) {
chomp;
- %res = serve($_);
- format_out($output_format, %res);
+ format_out($output_format, serve($_), item => $n++);
last if $single_query;
@@ -767,4 +770,5 @@ if ($fastcgi) {
} else {
+ my $n = 1;
foreach my $term (@ARGV) {
- format_out($output_format, serve($term));
+ format_out($output_format, serve($term), item => $n++);
}
@@ -783,3 +787,3 @@ whoseip - return information about IP address
B<whoiseip>
-[B<-dh>]
+[B<-dhN>]
[B<-F> I<FILE>]
@@ -787,2 +791,3 @@ B<whoiseip>
[B<-i> I<FILE>]
+[B<--cache-file=>I<FILE>]
[B<--debug>]
@@ -796,2 +801,3 @@ B<whoiseip>
[B<--ip-list=>I<FILE>]
+[B<--no-cache>]
[B<--single-query>]
@@ -920,2 +926,6 @@ output formats.
+=item B<--cache-file=>I<FILE>
+
+Cache retrieved data in file I<FILE>.
+
=item B<-D>, B<--dump=>I<FILE>
@@ -974,2 +984,6 @@ Without this option, B<whoseip> uses the built-in list of servers.
+=item B<-N>, B<--no-cache>
+
+Disable caching (this is the default).
+
=item B<--single-query>
@@ -1054,2 +1068,7 @@ it is B<OK>, this macro is not defined.
+=item B<${item}>
+
+Ordinal number of the request being served. Not defined in B<CGI> and
+B<FastCGI> modes.
+
=item B<${term}>
@@ -1074,2 +1093,8 @@ ISO 3166-1 code of the country where IP address is located.
+=item B<${source}>
+
+Where the information was obtained from. B<QUERY>, if it was retrieved
+from a remote B<whois> server and B<CACHE>, if it was read from the
+cache database.
+
=back

Return to:

Send suggestions and report system problems to the System administrator.