summaryrefslogtreecommitdiffabout
authorSergey Poznyakoff <gray@gnu.org.ua>2014-10-16 13:57:56 (GMT)
committer Sergey Poznyakoff <gray@gnu.org.ua>2014-10-16 14:01:24 (GMT)
commit4005897a75d88355c6da8513bf7a4a13c301c97a (patch) (side-by-side diff)
treee28fb6995ee2f995ef74cd44a09d5fb07419b495
parentb51b2856515a92fad170b91f9788798e288420ef (diff)
downloaddnstools-4005897a75d88355c6da8513bf7a4a13c301c97a.tar.gz
dnstools-4005897a75d88355c6da8513bf7a4a13c301c97a.tar.bz2
whoseip: fix caching algorithm
* whoseip/Whoseip/DB.pm: Fix caching algorithm, improve debugging. Close all open databases before terminating. * whoseip/whoseip.pl: Implement ${source} and ${item} macro variables. Document new options and variables.
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--whoseip/Whoseip/DB.pm224
-rw-r--r--whoseip/whoseip.pl33
2 files changed, 197 insertions, 60 deletions
diff --git a/whoseip/Whoseip/DB.pm b/whoseip/Whoseip/DB.pm
index 0abc953..f6b08f0 100644
--- a/whoseip/Whoseip/DB.pm
+++ b/whoseip/Whoseip/DB.pm
@@ -160,8 +160,27 @@ use constant IPDB_PAGE_LEAF => 2;
use constant LEAF_IDX => 256;
+sub pagetypestr {
+ my $t = shift;
+ return "index" if ($t == IPDB_PAGE_INDEX);
+ return "leaf" if ($t == IPDB_PAGE_LEAF);
+ return $t;
+}
+
sub systell { sysseek($_[0], 0, SEEK_CUR) }
+my @ipdb_open_files;
+
+sub ipdb_close_all {
+ foreach my $file (@ipdb_open_files) {
+ ipdb_close($file) if defined($file->{fd});
+ }
+}
+
+END {
+ ipdb_close_all();
+}
+
=pod
=head2 B<I<$dbf> = Whoseip::DB::ipdb_open(I<$filename>>[B<,> I<options>]B<);>
@@ -221,7 +240,7 @@ sub ipdb_open {
for (my $i = 0; $i < $count; $i += 2) {
$ipdbfile{rootidx}->{$tab[$i]} = $tab[$i+1];
print STDERR "ROOTIDX $tab[$i]=$tab[$i+1]\n"
- if $_{debug};
+ if $_{debug} > 1;
}
} else {
open($fd, "+>", $filename)
@@ -241,10 +260,11 @@ sub ipdb_open {
$ipdbfile{maxpagecache} =
defined($_{maxpagecache}) ? $_{maxpagecache} : 16;
$ipdbfile{debug} = $_{debug};
- if ($ipdbfile{debug}) {
+ if ($ipdbfile{debug} > 1) {
my $ug = new Data::UUID;
print STDERR "file $filename, UUID ".$ug->to_string($ipdbfile{uuid})."\n";
}
+ push @ipdb_open_files, \%ipdbfile;
return \%ipdbfile;
}
@@ -326,7 +346,7 @@ sub ipdb_locker {
for (my $i = 0; $i < $count; $i += 2) {
$dbf->{rootidx}{$tab[$i]} = $tab[$i+1];
print STDERR "ROOTIDX $tab[$i]=$tab[$i+1]\n"
- if $_{debug};
+ if $_{debug} > 1;
}
# Invalidate the cache
@@ -347,47 +367,67 @@ sub ipdb_save_page($$) {
if ($page->{type} == IPDB_PAGE_INDEX) {
print STDERR "saving index page $page->{off}: ".
join(',', @{$page->{tab}})."\n"
- if $dbf->{debug};
+ if $dbf->{debug} > 1;
$ret = syswrite($dbf->{fd}, pack('LL[257].',
$page->{type},
@{$page->{tab}},
$dbf->{pagesize}),
$dbf->{pagesize});
+ croak "$dbf->{file}: write error at $page->{off}: $ret: $!"
+ unless ($ret == $dbf->{pagesize});
+
+ delete $page->{dirty};
} elsif ($page->{type} == IPDB_PAGE_LEAF) {
- print STDERR "saving leaf page $page->{off}\n"
- if $dbf->{debug};
- my @a;
- my $size = length(pack('LLL',0,0,0));
- my $i = 0;
- foreach my $ent (@{$page->{tab}}) {
- my $x = pack('LLLa2L/a', @{$ent}[0 .. 3],freeze($ent->[4]));
- my $l = length($x);
- if ($size + $l > $dbf->{pagesize}) {
- my $p = ipdb_alloc_page($dbf, IPDB_PAGE_LEAF);
- $page->{next} = $p->{off};
- $p->{tab} = @{$page->{tab}}[$i .. $#{$page->{tab}}];
- $p->{dirty} = 1;
- splice @{$page->{tab}}, $i;
- last;
+ my $nextpage;
+ do {
+ print STDERR "saving leaf page $page->{off}\n"
+ if $dbf->{debug} > 1;
+ my $size = length(pack('LLL',0,0,0));
+ my $i = 0;
+ my $a;
+ foreach my $ent (@{$page->{tab}}) {
+ my $fdata = eval { freeze($ent->[4]) };
+ if ($@) {
+ print STDERR "failed to freeze data for " .
+ inet_ntoa(pack('N', $ent->[0])) . "/" .
+ inet_ntoa(pack('N', $ent->[1])) . ":".
+ $ent->[3] ."\n";
+ exit;
+ }
+ my $x = pack('LLLa2L/a', @{$ent}[0 .. 3],$fdata);
+ my $l = length($x);
+ if ($size + $l > $dbf->{pagesize}) {
+ print STDERR "SPLIT at $i: $size + $l, rest ".
+ ($#{$page->{tab}}-$i+1)."\n"
+ if $dbf->{debug} > 1;
+ $nextpage = ipdb_alloc_page($dbf, IPDB_PAGE_LEAF,
+ nocache => 1);
+ $page->{next} = $nextpage->{off};
+ @{$nextpage->{tab}} = @{$page->{tab}}[$i .. $#{$page->{tab}}];
+ $nextpage->{dirty} = 1;
+ splice @{$page->{tab}}, $i;
+ last;
+ }
+ $size += $l;
+ $a .= $x;
+ ++$i;
}
- $size += $l;
- push @a, $x;
- } continue {
- ++$i;
- }
- $ret = syswrite($dbf->{fd},
- pack('LLLa*@'.$dbf->{pagesize},
- $page->{type},
- $#a + 1,
- $page->{next},
- @a));
+ $ret = syswrite($dbf->{fd},
+ pack('LLLa'.length($a).'@'.$dbf->{pagesize},
+ $page->{type},
+ $i,
+ $page->{next},
+ $a));
+ croak "$dbf->{file}: write error at $page->{off}: $ret: $!"
+ unless ($ret == $dbf->{pagesize});
+ delete $page->{dirty};
+ $page = $nextpage;
+ $nextpage = undef;
+ } while (defined($page));
+
} else {
croak "unrecognized page type ($page->{type})";
}
- croak "$dbf->{file}: write error at $page->{off}: $ret: $!"
- unless ($ret == $dbf->{pagesize});
-
- delete $page->{dirty};
}
sub ipdb_cache_invalidate($) {
@@ -414,26 +454,70 @@ sub ipdb_cache_put($$) {
$page->{lru_newer} = undef;
$page->{lru_older} = $n;
$dbf->{pagecache}{lru_newest} = $page;
+ $dbf->{pagecache}{lru_oldest} = $page
+ unless defined $dbf->{pagecache}{lru_oldest};
$dbf->{pagecache}{$page->{off}} = $page;
+ dump_lru($dbf, "put $page->{off}");
}
+sub dump_lru {
+ my ($dbf,$pfx) = @_;
+
+ return unless $dbf->{debug} > 2;
+
+ my $x = $dbf->{pagecache}{lru_oldest};
+ print STDERR "DUMP $pfx\n";
+ print STDERR "KEYS: ".join(',', sort keys %{$dbf->{pagecache}})."\n";
+ while (defined($x)) {
+ print STDERR "==> $x->{off} (".pagetypestr($x->{type}).",";
+
+ if (defined($x->{lru_newer})) {
+ print STDERR $x->{lru_newer}{off};
+ } else {
+ print STDERR "NIL";
+ }
+ print STDERR ",";
+ if (defined($x->{lru_older})) {
+ print STDERR $x->{lru_older}{off};
+ } else {
+ print STDERR "NIL";
+ }
+ print STDERR ")\n";
+ $x = $x->{lru_newer};
+ }
+ print STDERR "END\n";
+}
+
sub ipdb_cache_get($$) {
my ($dbf,$off) = @_;
my $page;
if (defined($dbf->{pagecache}{$off})) {
+ print STDERR "$off found in cache\n" if $dbf->{debug};
$page = $dbf->{pagecache}{$off};
- # promote the page
- if (defined($page->{lru_older})) {
- $page->{lru_older}{lru_newer} = $page->{lru_newer};
- } else {
- # It was the oldest page
- $dbf->{pagecache}{lru_oldest} = $page->{lru_newer};
- }
if (defined($page->{lru_newer})) {
+ print STDERR "promoting $page->{off}\n" if $dbf->{debug} > 2;
+ # promote the page
+ if (defined($page->{lru_older})) {
+ $page->{lru_older}{lru_newer} = $page->{lru_newer};
+ } else {
+ # It was the oldest page
+ $dbf->{pagecache}{lru_oldest} = $page->{lru_newer};
+ }
+
$page->{lru_newer}{lru_older} = $page->{lru_older};
- }
- $dbf->{pagecache}{lru_newest} = $page;
+
+ $page->{lru_older} = $dbf->{pagecache}{lru_newest};
+ $dbf->{pagecache}{lru_newest}{lru_newer} = $page;
+
+ $dbf->{pagecache}{lru_newest} = $page;
+
+ $page->{lru_newer} = undef;
+ $dbf->{pagecache}{lru_oldest} = $page
+ unless defined $dbf->{pagecache}{lru_oldest};
+ dump_lru($dbf, "after promoting $page->{off}");
+ }
} else {
+ print STDERR "$off NOT found in cache\n" if $dbf->{debug};
$page = ipdb_get_page($dbf, $off);
ipdb_cache_put($dbf, $page);
}
@@ -487,6 +571,7 @@ sub ipdb_close($) {
ipdb_sync($dbf);
ipdb_locker($dbf, lock => LOCK_UN);
close $dbf->{fd};
+ delete $dbf->{fd};
}
sub ipdb_get_page($$) {
@@ -507,17 +592,25 @@ sub ipdb_get_page($$) {
$ret{type} = unpack('L', $s);
$ret{off} = $off;
if ($ret{type} == IPDB_PAGE_INDEX) {
- print STDERR "found index page at $off\n" if $dbf->{debug};
+ print STDERR "found index page at $off\n" if $dbf->{debug} > 3;
my ($x, @a) = unpack('LL257', $s);
$ret{tab} = \@a;
} elsif ($ret{type} == IPDB_PAGE_LEAF) {
(my $x, my $nent, $ret{next}) =
unpack('LLL', $s);
print STDERR "found leaf page at $off, has $nent entries\n"
- if $dbf->{debug};
+ if $dbf->{debug} > 3;
my ($x1, $x2, $x3, @a) = unpack("LLL(LLLa2L/a)$nent", $s);
- for (my $i = 0; $i < $nent; $i += 5) {
- push @{$ret{tab}}, [ @a[$i .. $i+3], thaw $a[$i+4] ];
+ for (my $i = 0; $i < $nent; $i++) {
+ my $href = thaw $a[$i*5 + 4];
+ if ($dbf->{debug} > 3) {
+ print STDERR "[$i] = ".join(' ', @a[$i*5 .. $i*5 + 3]).'; (';
+ while (my ($k,$v) = each %{$href}) {
+ print STDERR "$k => $v, ";
+ }
+ print STDERR ")\n";
+ }
+ push @{$ret{tab}}, [ @a[$i*5 .. $i*5 + 3], $href ];
}
} else {
croak "$dbf->{filename}: invalid page type at offset $off";
@@ -529,7 +622,8 @@ sub ipdb_get_page($$) {
sub ipdb_alloc_page($$) {
my ($dbf,$type) = @_;
my %page;
-
+ local %_ = @_;
+
$page{type} = $type;
$page{off} = $dbf->{numpages}++ * $dbf->{pagesize} + 512;
if ($type == IPDB_PAGE_INDEX) {
@@ -538,8 +632,12 @@ sub ipdb_alloc_page($$) {
$page{next} = 0;
$#{$page{tab}} = -1;
}
+
+ print STDERR "new ".pagetypestr($type)." page at $page{off}\n"
+ if $dbf->{debug};
+
$page{dirty} = 1;
- ipdb_cache_put($dbf, \%page);
+ ipdb_cache_put($dbf, \%page) unless ($_{nocache});
++$dbf->{modified};
return \%page;
}
@@ -551,12 +649,12 @@ sub ipdb_get_root_page($$) {
if (!defined($dbf->{rootidx}{$nbits})) {
$p = ipdb_alloc_page($dbf, IPDB_PAGE_INDEX);
print STDERR "root page for $nbits: created at $p->{off}\n"
- if $dbf->{debug};
+ if $dbf->{debug} > 2;
$dbf->{rootidx}{$nbits} = $p->{off};
$dbf->{modified}++;
} else {
print STDERR "root page for $nbits: $dbf->{rootidx}{$nbits}\n"
- if $dbf->{debug};
+ if $dbf->{debug} > 2;
$p = ipdb_cache_get($dbf, $dbf->{rootidx}{$nbits});
}
return $p;
@@ -614,7 +712,7 @@ sub ipdb_lookup_unlocked($$) {
if ($page->{type} == IPDB_PAGE_INDEX) {
print STDERR "index page $page->{off}: ".
join(',', @{$page->{tab}})."\n"
- if $dbf->{debug};
+ if $dbf->{debug} > 1;
print STDERR "ipdb_lookup: octet ${n}=$ipo[$n], off=$page->{tab}[$ipo[$n]]\n"
if $dbf->{debug};
if ($page->{tab}[$ipo[$n]]) {
@@ -627,12 +725,18 @@ sub ipdb_lookup_unlocked($$) {
}
foreach my $r (@{$page->{tab}}) {
+ print STDERR "ipdb_lookup: compare ($ipn & $r->[1]) == $r->[0]\n"
+ if $dbf->{debug};
if (($ipn & $r->[1]) == $r->[0]) {
# FIXME: check timestamp
- return ( ( country => $r->[3],
- network => inet_ntoa(pack('N', $r->[0])),
- netmask => inet_ntoa(pack('N', $r->[1])) ),
- %{$r->[4]});
+ print STDERR "ipdb_lookup: MATCH $r->[3]\n"
+ if $dbf->{debug};
+ my %res = ( country => $r->[3],
+ network => inet_ntoa(pack('N', $r->[0])),
+ netmask => inet_ntoa(pack('N', $r->[1])) );
+ @res{keys %{$r->[4]}} = values %{$r->[4]}
+ if (defined($r->[4]) and ref($r->[4]) eq 'HASH');
+ return %res;
}
}
return undef if (!$page->{next});
@@ -689,11 +793,15 @@ sub ipdb_insert_unlocked {
return 0;
}
+ print STDERR "inserting $cidr $country\n" if $dbf->{debug};
+
my $n = int($masklen / 8);
my $page = ipdb_get_root_page($dbf, $nbits);
for (my $i = 0; $i < $n; $i++) {
if ($page->{tab}[$ipo[$i]]) {
+ print STDERR "ipdb_insert: octet ${i}=$ipo[$i], off=$page->{tab}[$ipo[$i]]\n"
+ if $dbf->{debug};
$page = ipdb_cache_get($dbf, $page->{tab}[$ipo[$i]]);
} else {
my $p = ipdb_alloc_page($dbf, IPDB_PAGE_INDEX);
@@ -704,10 +812,14 @@ sub ipdb_insert_unlocked {
}
if ($page->{tab}[LEAF_IDX]) {
+ print STDERR "ipdb_insert: loading leaf page from $page->{tab}[LEAF_IDX]\n"
+ if $dbf->{debug};
$page = ipdb_cache_get($dbf, $page->{tab}[LEAF_IDX]);
croak "$dbf->{filename}: index page found where leaf was expected"
unless $page->{type} == IPDB_PAGE_LEAF;
} else {
+ print STDERR "ipdb_insert: creating leaf page\n"
+ if $dbf->{debug};
my $p = ipdb_alloc_page($dbf, IPDB_PAGE_LEAF);
$page->{tab}[LEAF_IDX] = $p->{off};
$page->{dirty} = 1;
diff --git a/whoseip/whoseip.pl b/whoseip/whoseip.pl
index 9ca899e..d64280e 100644
--- a/whoseip/whoseip.pl
+++ b/whoseip/whoseip.pl
@@ -458,6 +458,8 @@ sub serve {
}
$res{range} = cidr_to_range($res{cidr});
$res{count} = range2count($res{range});
+ $res{term} = $term;
+ $res{source} = 'CACHE';
return %res;
}
}
@@ -491,6 +493,7 @@ sub serve {
$res{status} = 'BAD';
$res{diag} = 'invalid input';
}
+ $res{source} = 'QUERY';
$res{term} = $term;
return %res;
}
@@ -758,15 +761,16 @@ if ($fastcgi) {
local $/ = CRLF;
$delim = "$CR$LF";
}
+ my $n = 1;
while (<>) {
chomp;
- %res = serve($_);
- format_out($output_format, %res);
+ format_out($output_format, serve($_), item => $n++);
last if $single_query;
}
} else {
+ my $n = 1;
foreach my $term (@ARGV) {
- format_out($output_format, serve($term));
+ format_out($output_format, serve($term), item => $n++);
}
}
}
@@ -781,10 +785,11 @@ whoseip - return information about IP address
=head1 SYNOPSIS
B<whoiseip>
-[B<-dh>]
+[B<-dhN>]
[B<-F> I<FILE>]
[B<-D> I<FILE>]
[B<-i> I<FILE>]
+[B<--cache-file=>I<FILE>]
[B<--debug>]
[B<--define-format=>I<NAME>B<=>I<TEXT>]
[B<--dump=>I<FILE>]
@@ -794,6 +799,7 @@ B<whoiseip>
[B<--formfile=>I<FILE>]
[B<--help>]
[B<--ip-list=>I<FILE>]
+[B<--no-cache>]
[B<--single-query>]
[B<--usage>]
[I<IPADDR>...]
@@ -918,6 +924,10 @@ output formats.
=over 4
+=item B<--cache-file=>I<FILE>
+
+Cache retrieved data in file I<FILE>.
+
=item B<-D>, B<--dump=>I<FILE>
Dump the program to I<FILE>. This is normally done to update the
@@ -972,6 +982,10 @@ Comments are introduced with a B<#> sign. Empty lines are ignored.
Without this option, B<whoseip> uses the built-in list of servers.
+=item B<-N>, B<--no-cache>
+
+Disable caching (this is the default).
+
=item B<--single-query>
This option is valid only in B<inetd mode>. It instructs B<whoseip> to
@@ -1052,6 +1066,11 @@ B<NO>, if it was not, and B<BAD>, if the input was invalid.
Contains explanatory text if B<${status}> is B<NO> or B<BAD>. If
it is B<OK>, this macro is not defined.
+=item B<${item}>
+
+Ordinal number of the request being served. Not defined in B<CGI> and
+B<FastCGI> modes.
+
=item B<${term}>
The input IP address.
@@ -1072,6 +1091,12 @@ Number of IP addresses in the network.
ISO 3166-1 code of the country where IP address is located.
+=item B<${source}>
+
+Where the information was obtained from. B<QUERY>, if it was retrieved
+from a remote B<whois> server and B<CACHE>, if it was read from the
+cache database.
+
=back
If a macro is not defined, the corresponding reference expands to

Return to:

Send suggestions and report system problems to the System administrator.