From 4005897a75d88355c6da8513bf7a4a13c301c97a Mon Sep 17 00:00:00 2001 From: Sergey Poznyakoff Date: Thu, 16 Oct 2014 16:57:56 +0300 Subject: whoseip: fix caching algorithm * whoseip/Whoseip/DB.pm: Fix caching algorithm, improve debugging. Close all open databases before terminating. * whoseip/whoseip.pl: Implement ${source} and ${item} macro variables. Document new options and variables. --- whoseip/Whoseip/DB.pm | 224 +++++++++++++++++++++++++++++++++++++------------- whoseip/whoseip.pl | 33 +++++++- 2 files changed, 197 insertions(+), 60 deletions(-) diff --git a/whoseip/Whoseip/DB.pm b/whoseip/Whoseip/DB.pm index 0abc953..f6b08f0 100644 --- a/whoseip/Whoseip/DB.pm +++ b/whoseip/Whoseip/DB.pm @@ -160,8 +160,27 @@ use constant IPDB_PAGE_LEAF => 2; use constant LEAF_IDX => 256; +sub pagetypestr { + my $t = shift; + return "index" if ($t == IPDB_PAGE_INDEX); + return "leaf" if ($t == IPDB_PAGE_LEAF); + return $t; +} + sub systell { sysseek($_[0], 0, SEEK_CUR) } +my @ipdb_open_files; + +sub ipdb_close_all { + foreach my $file (@ipdb_open_files) { + ipdb_close($file) if defined($file->{fd}); + } +} + +END { + ipdb_close_all(); +} + =pod =head2 B = Whoseip::DB::ipdb_open(I<$filename>>[B<,> I]B<);> @@ -221,7 +240,7 @@ sub ipdb_open { for (my $i = 0; $i < $count; $i += 2) { $ipdbfile{rootidx}->{$tab[$i]} = $tab[$i+1]; print STDERR "ROOTIDX $tab[$i]=$tab[$i+1]\n" - if $_{debug}; + if $_{debug} > 1; } } else { open($fd, "+>", $filename) @@ -241,10 +260,11 @@ sub ipdb_open { $ipdbfile{maxpagecache} = defined($_{maxpagecache}) ? $_{maxpagecache} : 16; $ipdbfile{debug} = $_{debug}; - if ($ipdbfile{debug}) { + if ($ipdbfile{debug} > 1) { my $ug = new Data::UUID; print STDERR "file $filename, UUID ".$ug->to_string($ipdbfile{uuid})."\n"; } + push @ipdb_open_files, \%ipdbfile; return \%ipdbfile; } @@ -326,7 +346,7 @@ sub ipdb_locker { for (my $i = 0; $i < $count; $i += 2) { $dbf->{rootidx}{$tab[$i]} = $tab[$i+1]; print STDERR "ROOTIDX $tab[$i]=$tab[$i+1]\n" - if $_{debug}; + if $_{debug} > 1; } # Invalidate the cache @@ -347,47 +367,67 @@ sub ipdb_save_page($$) { if ($page->{type} == IPDB_PAGE_INDEX) { print STDERR "saving index page $page->{off}: ". join(',', @{$page->{tab}})."\n" - if $dbf->{debug}; + if $dbf->{debug} > 1; $ret = syswrite($dbf->{fd}, pack('LL[257].', $page->{type}, @{$page->{tab}}, $dbf->{pagesize}), $dbf->{pagesize}); + croak "$dbf->{file}: write error at $page->{off}: $ret: $!" + unless ($ret == $dbf->{pagesize}); + + delete $page->{dirty}; } elsif ($page->{type} == IPDB_PAGE_LEAF) { - print STDERR "saving leaf page $page->{off}\n" - if $dbf->{debug}; - my @a; - my $size = length(pack('LLL',0,0,0)); - my $i = 0; - foreach my $ent (@{$page->{tab}}) { - my $x = pack('LLLa2L/a', @{$ent}[0 .. 3],freeze($ent->[4])); - my $l = length($x); - if ($size + $l > $dbf->{pagesize}) { - my $p = ipdb_alloc_page($dbf, IPDB_PAGE_LEAF); - $page->{next} = $p->{off}; - $p->{tab} = @{$page->{tab}}[$i .. $#{$page->{tab}}]; - $p->{dirty} = 1; - splice @{$page->{tab}}, $i; - last; + my $nextpage; + do { + print STDERR "saving leaf page $page->{off}\n" + if $dbf->{debug} > 1; + my $size = length(pack('LLL',0,0,0)); + my $i = 0; + my $a; + foreach my $ent (@{$page->{tab}}) { + my $fdata = eval { freeze($ent->[4]) }; + if ($@) { + print STDERR "failed to freeze data for " . + inet_ntoa(pack('N', $ent->[0])) . "/" . + inet_ntoa(pack('N', $ent->[1])) . ":". + $ent->[3] ."\n"; + exit; + } + my $x = pack('LLLa2L/a', @{$ent}[0 .. 3],$fdata); + my $l = length($x); + if ($size + $l > $dbf->{pagesize}) { + print STDERR "SPLIT at $i: $size + $l, rest ". + ($#{$page->{tab}}-$i+1)."\n" + if $dbf->{debug} > 1; + $nextpage = ipdb_alloc_page($dbf, IPDB_PAGE_LEAF, + nocache => 1); + $page->{next} = $nextpage->{off}; + @{$nextpage->{tab}} = @{$page->{tab}}[$i .. $#{$page->{tab}}]; + $nextpage->{dirty} = 1; + splice @{$page->{tab}}, $i; + last; + } + $size += $l; + $a .= $x; + ++$i; } - $size += $l; - push @a, $x; - } continue { - ++$i; - } - $ret = syswrite($dbf->{fd}, - pack('LLLa*@'.$dbf->{pagesize}, - $page->{type}, - $#a + 1, - $page->{next}, - @a)); + $ret = syswrite($dbf->{fd}, + pack('LLLa'.length($a).'@'.$dbf->{pagesize}, + $page->{type}, + $i, + $page->{next}, + $a)); + croak "$dbf->{file}: write error at $page->{off}: $ret: $!" + unless ($ret == $dbf->{pagesize}); + delete $page->{dirty}; + $page = $nextpage; + $nextpage = undef; + } while (defined($page)); + } else { croak "unrecognized page type ($page->{type})"; } - croak "$dbf->{file}: write error at $page->{off}: $ret: $!" - unless ($ret == $dbf->{pagesize}); - - delete $page->{dirty}; } sub ipdb_cache_invalidate($) { @@ -414,26 +454,70 @@ sub ipdb_cache_put($$) { $page->{lru_newer} = undef; $page->{lru_older} = $n; $dbf->{pagecache}{lru_newest} = $page; + $dbf->{pagecache}{lru_oldest} = $page + unless defined $dbf->{pagecache}{lru_oldest}; $dbf->{pagecache}{$page->{off}} = $page; + dump_lru($dbf, "put $page->{off}"); } +sub dump_lru { + my ($dbf,$pfx) = @_; + + return unless $dbf->{debug} > 2; + + my $x = $dbf->{pagecache}{lru_oldest}; + print STDERR "DUMP $pfx\n"; + print STDERR "KEYS: ".join(',', sort keys %{$dbf->{pagecache}})."\n"; + while (defined($x)) { + print STDERR "==> $x->{off} (".pagetypestr($x->{type}).","; + + if (defined($x->{lru_newer})) { + print STDERR $x->{lru_newer}{off}; + } else { + print STDERR "NIL"; + } + print STDERR ","; + if (defined($x->{lru_older})) { + print STDERR $x->{lru_older}{off}; + } else { + print STDERR "NIL"; + } + print STDERR ")\n"; + $x = $x->{lru_newer}; + } + print STDERR "END\n"; +} + sub ipdb_cache_get($$) { my ($dbf,$off) = @_; my $page; if (defined($dbf->{pagecache}{$off})) { + print STDERR "$off found in cache\n" if $dbf->{debug}; $page = $dbf->{pagecache}{$off}; - # promote the page - if (defined($page->{lru_older})) { - $page->{lru_older}{lru_newer} = $page->{lru_newer}; - } else { - # It was the oldest page - $dbf->{pagecache}{lru_oldest} = $page->{lru_newer}; - } if (defined($page->{lru_newer})) { + print STDERR "promoting $page->{off}\n" if $dbf->{debug} > 2; + # promote the page + if (defined($page->{lru_older})) { + $page->{lru_older}{lru_newer} = $page->{lru_newer}; + } else { + # It was the oldest page + $dbf->{pagecache}{lru_oldest} = $page->{lru_newer}; + } + $page->{lru_newer}{lru_older} = $page->{lru_older}; - } - $dbf->{pagecache}{lru_newest} = $page; + + $page->{lru_older} = $dbf->{pagecache}{lru_newest}; + $dbf->{pagecache}{lru_newest}{lru_newer} = $page; + + $dbf->{pagecache}{lru_newest} = $page; + + $page->{lru_newer} = undef; + $dbf->{pagecache}{lru_oldest} = $page + unless defined $dbf->{pagecache}{lru_oldest}; + dump_lru($dbf, "after promoting $page->{off}"); + } } else { + print STDERR "$off NOT found in cache\n" if $dbf->{debug}; $page = ipdb_get_page($dbf, $off); ipdb_cache_put($dbf, $page); } @@ -487,6 +571,7 @@ sub ipdb_close($) { ipdb_sync($dbf); ipdb_locker($dbf, lock => LOCK_UN); close $dbf->{fd}; + delete $dbf->{fd}; } sub ipdb_get_page($$) { @@ -507,17 +592,25 @@ sub ipdb_get_page($$) { $ret{type} = unpack('L', $s); $ret{off} = $off; if ($ret{type} == IPDB_PAGE_INDEX) { - print STDERR "found index page at $off\n" if $dbf->{debug}; + print STDERR "found index page at $off\n" if $dbf->{debug} > 3; my ($x, @a) = unpack('LL257', $s); $ret{tab} = \@a; } elsif ($ret{type} == IPDB_PAGE_LEAF) { (my $x, my $nent, $ret{next}) = unpack('LLL', $s); print STDERR "found leaf page at $off, has $nent entries\n" - if $dbf->{debug}; + if $dbf->{debug} > 3; my ($x1, $x2, $x3, @a) = unpack("LLL(LLLa2L/a)$nent", $s); - for (my $i = 0; $i < $nent; $i += 5) { - push @{$ret{tab}}, [ @a[$i .. $i+3], thaw $a[$i+4] ]; + for (my $i = 0; $i < $nent; $i++) { + my $href = thaw $a[$i*5 + 4]; + if ($dbf->{debug} > 3) { + print STDERR "[$i] = ".join(' ', @a[$i*5 .. $i*5 + 3]).'; ('; + while (my ($k,$v) = each %{$href}) { + print STDERR "$k => $v, "; + } + print STDERR ")\n"; + } + push @{$ret{tab}}, [ @a[$i*5 .. $i*5 + 3], $href ]; } } else { croak "$dbf->{filename}: invalid page type at offset $off"; @@ -529,7 +622,8 @@ sub ipdb_get_page($$) { sub ipdb_alloc_page($$) { my ($dbf,$type) = @_; my %page; - + local %_ = @_; + $page{type} = $type; $page{off} = $dbf->{numpages}++ * $dbf->{pagesize} + 512; if ($type == IPDB_PAGE_INDEX) { @@ -538,8 +632,12 @@ sub ipdb_alloc_page($$) { $page{next} = 0; $#{$page{tab}} = -1; } + + print STDERR "new ".pagetypestr($type)." page at $page{off}\n" + if $dbf->{debug}; + $page{dirty} = 1; - ipdb_cache_put($dbf, \%page); + ipdb_cache_put($dbf, \%page) unless ($_{nocache}); ++$dbf->{modified}; return \%page; } @@ -551,12 +649,12 @@ sub ipdb_get_root_page($$) { if (!defined($dbf->{rootidx}{$nbits})) { $p = ipdb_alloc_page($dbf, IPDB_PAGE_INDEX); print STDERR "root page for $nbits: created at $p->{off}\n" - if $dbf->{debug}; + if $dbf->{debug} > 2; $dbf->{rootidx}{$nbits} = $p->{off}; $dbf->{modified}++; } else { print STDERR "root page for $nbits: $dbf->{rootidx}{$nbits}\n" - if $dbf->{debug}; + if $dbf->{debug} > 2; $p = ipdb_cache_get($dbf, $dbf->{rootidx}{$nbits}); } return $p; @@ -614,7 +712,7 @@ sub ipdb_lookup_unlocked($$) { if ($page->{type} == IPDB_PAGE_INDEX) { print STDERR "index page $page->{off}: ". join(',', @{$page->{tab}})."\n" - if $dbf->{debug}; + if $dbf->{debug} > 1; print STDERR "ipdb_lookup: octet ${n}=$ipo[$n], off=$page->{tab}[$ipo[$n]]\n" if $dbf->{debug}; if ($page->{tab}[$ipo[$n]]) { @@ -627,12 +725,18 @@ sub ipdb_lookup_unlocked($$) { } foreach my $r (@{$page->{tab}}) { + print STDERR "ipdb_lookup: compare ($ipn & $r->[1]) == $r->[0]\n" + if $dbf->{debug}; if (($ipn & $r->[1]) == $r->[0]) { # FIXME: check timestamp - return ( ( country => $r->[3], - network => inet_ntoa(pack('N', $r->[0])), - netmask => inet_ntoa(pack('N', $r->[1])) ), - %{$r->[4]}); + print STDERR "ipdb_lookup: MATCH $r->[3]\n" + if $dbf->{debug}; + my %res = ( country => $r->[3], + network => inet_ntoa(pack('N', $r->[0])), + netmask => inet_ntoa(pack('N', $r->[1])) ); + @res{keys %{$r->[4]}} = values %{$r->[4]} + if (defined($r->[4]) and ref($r->[4]) eq 'HASH'); + return %res; } } return undef if (!$page->{next}); @@ -689,11 +793,15 @@ sub ipdb_insert_unlocked { return 0; } + print STDERR "inserting $cidr $country\n" if $dbf->{debug}; + my $n = int($masklen / 8); my $page = ipdb_get_root_page($dbf, $nbits); for (my $i = 0; $i < $n; $i++) { if ($page->{tab}[$ipo[$i]]) { + print STDERR "ipdb_insert: octet ${i}=$ipo[$i], off=$page->{tab}[$ipo[$i]]\n" + if $dbf->{debug}; $page = ipdb_cache_get($dbf, $page->{tab}[$ipo[$i]]); } else { my $p = ipdb_alloc_page($dbf, IPDB_PAGE_INDEX); @@ -704,10 +812,14 @@ sub ipdb_insert_unlocked { } if ($page->{tab}[LEAF_IDX]) { + print STDERR "ipdb_insert: loading leaf page from $page->{tab}[LEAF_IDX]\n" + if $dbf->{debug}; $page = ipdb_cache_get($dbf, $page->{tab}[LEAF_IDX]); croak "$dbf->{filename}: index page found where leaf was expected" unless $page->{type} == IPDB_PAGE_LEAF; } else { + print STDERR "ipdb_insert: creating leaf page\n" + if $dbf->{debug}; my $p = ipdb_alloc_page($dbf, IPDB_PAGE_LEAF); $page->{tab}[LEAF_IDX] = $p->{off}; $page->{dirty} = 1; diff --git a/whoseip/whoseip.pl b/whoseip/whoseip.pl index 9ca899e..d64280e 100644 --- a/whoseip/whoseip.pl +++ b/whoseip/whoseip.pl @@ -458,6 +458,8 @@ sub serve { } $res{range} = cidr_to_range($res{cidr}); $res{count} = range2count($res{range}); + $res{term} = $term; + $res{source} = 'CACHE'; return %res; } } @@ -491,6 +493,7 @@ sub serve { $res{status} = 'BAD'; $res{diag} = 'invalid input'; } + $res{source} = 'QUERY'; $res{term} = $term; return %res; } @@ -758,15 +761,16 @@ if ($fastcgi) { local $/ = CRLF; $delim = "$CR$LF"; } + my $n = 1; while (<>) { chomp; - %res = serve($_); - format_out($output_format, %res); + format_out($output_format, serve($_), item => $n++); last if $single_query; } } else { + my $n = 1; foreach my $term (@ARGV) { - format_out($output_format, serve($term)); + format_out($output_format, serve($term), item => $n++); } } } @@ -781,10 +785,11 @@ whoseip - return information about IP address =head1 SYNOPSIS B -[B<-dh>] +[B<-dhN>] [B<-F> I] [B<-D> I] [B<-i> I] +[B<--cache-file=>I] [B<--debug>] [B<--define-format=>IB<=>I] [B<--dump=>I] @@ -794,6 +799,7 @@ B [B<--formfile=>I] [B<--help>] [B<--ip-list=>I] +[B<--no-cache>] [B<--single-query>] [B<--usage>] [I...] @@ -918,6 +924,10 @@ output formats. =over 4 +=item B<--cache-file=>I + +Cache retrieved data in file I. + =item B<-D>, B<--dump=>I Dump the program to I. This is normally done to update the @@ -972,6 +982,10 @@ Comments are introduced with a B<#> sign. Empty lines are ignored. Without this option, B uses the built-in list of servers. +=item B<-N>, B<--no-cache> + +Disable caching (this is the default). + =item B<--single-query> This option is valid only in B. It instructs B to @@ -1052,6 +1066,11 @@ B, if it was not, and B, if the input was invalid. Contains explanatory text if B<${status}> is B or B. If it is B, this macro is not defined. +=item B<${item}> + +Ordinal number of the request being served. Not defined in B and +B modes. + =item B<${term}> The input IP address. @@ -1072,6 +1091,12 @@ Number of IP addresses in the network. ISO 3166-1 code of the country where IP address is located. +=item B<${source}> + +Where the information was obtained from. B, if it was retrieved +from a remote B server and B, if it was read from the +cache database. + =back If a macro is not defined, the corresponding reference expands to -- cgit v1.2.1