diff options
-rw-r--r-- | bootstrap.pl | 3 | ||||
-rw-r--r-- | whoseip/GNUmakefile | 1 | ||||
-rw-r--r-- | whoseip/MANIFEST | 1 | ||||
-rw-r--r-- | whoseip/Makefile.PL | 37 | ||||
-rw-r--r-- | whoseip/Whoseip/DB.pm | 550 | ||||
-rw-r--r-- | whoseip/whoseip.pl | 37 |
6 files changed, 626 insertions, 3 deletions
diff --git a/bootstrap.pl b/bootstrap.pl index b9dc20f..b210751 100644 --- a/bootstrap.pl +++ b/bootstrap.pl @@ -61,7 +61,7 @@ my $dir; if (defined($file)) { print "$modname installed at $file\n"; if (-f $file) { - $file =~ s/\.pm$//; + $file =~ s#/[^/]+\.pm$##; if (-d $file) { $dir = $file; } else { @@ -75,6 +75,7 @@ if (defined($file)) { } $file = "$dir/$incdir/AutoInstall.pm"; +$file = "$dir/AutoInstall.pm" unless (-f $file); -f $file or die "$file not found"; chdir $topdir or die "Can't change to $topdir: $!"; diff --git a/whoseip/GNUmakefile b/whoseip/GNUmakefile index 878d763..1b4f6af 100644 --- a/whoseip/GNUmakefile +++ b/whoseip/GNUmakefile @@ -6,3 +6,4 @@ whoseip: whoseip.pl ip_del_list chmod +x whoseip dist: whoseip + diff --git a/whoseip/MANIFEST b/whoseip/MANIFEST index 1e84842..276d654 100644 --- a/whoseip/MANIFEST +++ b/whoseip/MANIFEST @@ -2,3 +2,4 @@ MANIFEST Makefile.PL inc/ExtUtils/AutoInstall.pm whoseip +Whoseip/DB.pm diff --git a/whoseip/Makefile.PL b/whoseip/Makefile.PL new file mode 100644 index 0000000..9d1bb4d --- /dev/null +++ b/whoseip/Makefile.PL @@ -0,0 +1,37 @@ +# -*- perl -*- +# Copyright (C) 2014 Sergey Poznyakoff <gray@gnu.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +use lib 'inc'; +use ExtUtils::AutoInstall ( + -core => [ + 'Getopt::Long' => 2.34, + 'IO::Socket' => 1.34, + 'Pod::Usage' => 1.51, + 'Pod::Man' => 2.25, + 'Net::CIDR' => 0.14 + ] +); + +WriteMakefile( + 'NAME' => 'whoseip', + 'AUTHOR' => 'Sergey Poznyakoff <gray@gnu.org>', + 'ABSTRACT' => 'Identifies IP addresses', + 'FIRST_MAKEFILE' => 'Makefile', + 'VERSION' => '1.00', + 'EXE_FILES' => [ 'whoseip' ], + 'PM' => { 'Whoseip/DB.pm' => '$(INST_LIBDIR)/Whoseip/DB.pm' } +); + diff --git a/whoseip/Whoseip/DB.pm b/whoseip/Whoseip/DB.pm new file mode 100644 index 0000000..b7cba34 --- /dev/null +++ b/whoseip/Whoseip/DB.pm @@ -0,0 +1,550 @@ +# -*- perl -*- +# Copyright (C) 2014 Sergey Poznyakoff <gray@gnu.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +package Whoseip::DB; + +use strict; +use Fcntl qw(SEEK_SET SEEK_CUR); +use Socket qw(inet_ntoa); + +require Exporter; +our @ISA = qw(Exporter); + +our %EXPORT_TAGS = ( 'all' => [ qw(ipdb_open ipdb_lookup ipdb_insert ipdb_close) ] ); + +our @EXPORT_OK = ( qw(ipdb_open ipdb_lookup ipdb_insert ipdb_close) ); + +our @EXPORT = qw(); + +our $VERSION = "0.1"; + +1; + +=pod + +=head1 NAME + +Whoseip::DB - WhoseIP cache database + +=head1 SYNOPSIS + +use Whoseip::DB; +use Whoseip::DB qw(:all); + +$dbf = Whoseip::DB::ipdb_open($filename[, + pagesize => $psize, + cachesize => $csize]); + +$ref = Whoseip::DB::ipdb_lookup($ip); +print $ref->{country} if defined($ref); + +Whoseip::DB::ipdb_insert($dbf, "192.0.2.0/24", 'US'); + +Whoseip::DB::ipdb_close($dbf); + +=head1 DESCRIPTION + +The B<Whoseip::DB> package provides functions for creating and accessing +a B<whoseip>(1) cache database. This database is analogous to a GeoIP +database, except that it keeps more information. + +The database is kept in a single file and consists of B<pages> of the +same size. The file begins with a 512-byte header block of the following +structure: + +B<Offset> B<Size> B<Description> + 0 8 "WHOSEIP\0" + 8 2 major version + 10 2 minor version + 12 4 page size + 16 4 number of allocated pages + 20 4 number of entries in root index table + 24 488 root index table + +The first three fields serve to identify the file format and its version. +At the time of this writing, major and minor versions are B<1>.B<0>. + +B<Page size> defines the size of the file page. It defaults to 1280 +bytes. + +Pages are of two types: B<index pages>, that serve to navigate through +the file, and B<leaf pages>, that keep actual data. The B<root index +table>, located at the end of the file header keeps offsets of the +initial index page for IP addresses of different sizes kept in the +database. Each entry in this table consists of two 32-bit words: the +first one keeps the length of the IP address in bits (e.g. 32 for IPv4 +and 128 for IPv6), and the second one keeps the offset of the first +index table for entries of that size. The table can accomodate at most +122 entries, which is more than enough for the purpose. + +An B<index page> contains a table of offsets of the next page to look up +(whether index or leaf) and is indexed by the octet value (0 -- 255). +An extra slot keeps the offset of the leaf page. The overall structure +of an index page is as follows: + +B<Offset> B<Size> B<Description> + 0 4 Index page type: B<1> + 4 4 OFF[0] + 8 4 OFF[1] + . . . + . . . + . . . + 1024 4 OFF[255] + 1028 4 OFF[LEAF] + +When looking up for an IP address, index pages are descended starting +from the root page. The octets of the IP address in host order are +iterated from left to right. Each subsequent octet is used to select +offset of the next page from the current index page. If the corresponding +offset is zero the last entry (B<OFF[LEAF]>) is used. This process stops +when the B<leaf page> is encountered. + +A B<leaf page> contains a table of B<CIDR>s and their descriptions. Its +structure is as follows: + +B<Offset> B<Size> B<Description> + 0 4 Leaf page type: B<2> + 4 4 Number of entries in the table + 8 4 Offset of the continuation page (0 if none) + + 12 4 Entry 0: network address + 16 4 Entry 0: network mask + 20 4 Entry 0: timestamp + 24 2 Entry 0: ISO 3166-1 country code + + . . . + . . . + . . . + + 12+N*14 4 Entry N: network address + 16+N*14 4 Entry N: network mask + 20+N*14 4 Entry N: timestamp + 24+N*14 2 Entry N: ISO 3166-1 country code + +When a leaf page is encountered, the IP address in question is compared +with each entry in turn using the usual procedure (B<AND>ing with the network +mask and comparing the result with the network address). Search stops when +a matching entry is found. Very large tables can span several leaf pages: +if no entry matches in the current page, the search continues at the +continuation page whose offset is indicated by the third field. If that +field is 0, the search returns failure. + +=cut + +my $dbsign = 'WHOSEIP'; +my $vmajor = 1; +my $vminor = 0; + +use constant IPDB_PAGE_INDEX => 1; +use constant IPDB_PAGE_LEAF => 2; + +sub systell { sysseek($_[0], 0, SEEK_CUR) } + +=pod + +=head2 B<I<$dbf> = Whoseip::DB::ipdb_open(I<$filename>>[B<,> I<options>]B<);> + +Opens the database file I<$filename> and returns a descriptor to be used for +searches in that file. I<options> is a hash that can contain the following +keys: + +=over 4 + +=item B<pagesize> + +Page size for the file. This option is honoured only when creating the +file. It cannot be less than 1032 bytes. Default is 1280 bytes. + +=item B<cachesize> + +Maximum number of pages to keep in a B<LRU> cache. Defaults to 16. + +=back + +=cut +sub ipdb_open { + my $filename = shift; + local %_ = @_; + my %ipdbfile; + my $fd; + if (-e $filename) { + my $mode; + if (-w $filename) { + $ipdbfile{mode} = "+<"; + } else { + $ipdbfile{mode} = "<"; + } + open($fd, $ipdbfile{mode}, $filename) + or die "can't open $filename: $!"; + binmode $fd; + die "$filename is not a valid IP cache file" + unless sysread($fd, my $s, 512) == 512; + my ($sign,$maj,$min,$size,$np,$count,@tab) = + unpack('Z8 S S L L L L*', $s); + die "$filename is not a valid IP cache file" + unless $sign eq $dbsign; + die "$filename is of wrong version ($maj.$min, expected $vmajor.$vminor)" + unless ($maj == $vmajor and $min == $vminor); + die "$filename: page size too small ($size)" if ($size < 1032); + + $ipdbfile{pagesize} = $size; + $ipdbfile{numpages} = $np; + + for (my $i = 0; $i < $count; $i += 2) { + $ipdbfile{rootidx}->{$tab[$i]} = $tab[$i+1]; + } + } else { + open($fd, "+>", $filename) + or die "can't open $filename: $!"; + binmode $fd; + $ipdbfile{pagesize} = defined($_{pagesize}) ? $_{pagesize} : 1280; + $ipdbfile{numpages} = 0; + syswrite($fd, pack('Z8 S S L L L @512', $dbsign, $vmajor, $vminor, + $ipdbfile{pagesize}, 0, 0)); + } + $ipdbfile{filename} = $filename; + $ipdbfile{fd} = $fd; + $ipdbfile{maxpagecache} = + defined($_{maxpagecache}) ? $_{maxpagecache} : 16; + return \%ipdbfile; +} + +sub ipdb_save_page($$) { + my ($dbf, $page) = @_; + + if (sysseek($dbf->{fd}, $page->{off}, SEEK_SET) != $page->{off}) { + die "$dbf->{filename}: can't seek: $!"; + } + + my $ret; + if ($page->{type} == IPDB_PAGE_INDEX) { + $ret = syswrite($dbf->{fd}, pack('LL[257].', + $page->{type}, + @{$page->{tab}}, + $dbf->{pagesize}), + $dbf->{pagesize}); + } elsif ($page->{type} == IPDB_PAGE_LEAF) { + my @a; + foreach my $ent (@{$page->{tab}}) { + push @a, @{$ent}; + } + $ret = syswrite($dbf->{fd}, + pack('LLL(LLLa2)*@'."$dbf->{pagesize}", $page->{type}, + $#{$page->{tab}} + 1, + $page->{next}, + @a)); + } else { + die "BOO!"; + } + die "$dbf->{file}: write error at $page->{off}: $ret: $!" + unless ($ret == $dbf->{pagesize}); + + delete $page->{dirty}; +} + +sub ipdb_cache_put($$) { + my ($dbf,$page) = @_; + if (keys(%{$dbf->{pagecache}}) >= $dbf->{maxpagecache}) { + my $prev = $dbf->{pagecache}{lru_oldest}{lru_newer}; + if ($dbf->{pagecache}{lru_oldest}{dirty}) { + ipdb_save_page($dbf, $dbf->{pagecache}{lru_oldest}); + } + delete $dbf->{pagecache}{$dbf->{pagecache}{lru_oldest}{off}}; + $dbf->{pagecache}{lru_oldest} = $prev; + delete $prev->{lru_older}; + } + my $n = $dbf->{pagecache}{lru_newest}; + if (defined($n)) { + $n->{lru_newer} = $page; + } + $page->{lru_newer} = undef; + $page->{lru_older} = $n; + $dbf->{pagecache}{lru_newest} = $page; + $dbf->{pagecache}{$page->{off}} = $page; +} + +sub ipdb_cache_get($$) { + my ($dbf,$off) = @_; + my $page; + if (defined($dbf->{pagecache}{$off})) { + $page = $dbf->{pagecache}{$off}; + # promote the page + if (defined($page->{lru_older})) { + $page->{lru_older}{lru_newer} = $page->{lru_newer}; + } else { + # It was the oldest page + $dbf->{pagecache}{lru_oldest} = $page->{lru_newer}; + } + if (defined($page->{lru_newer})) { + $page->{lru_newer}{lru_older} = $page->{lru_older}; + } + $dbf->{pagecache}{lru_newest} = $page; + } else { + $page = ipdb_get_page($dbf, $off); + ipdb_cache_put($dbf, $page); + } + return $page; +} + +=pod + +=head2 B<Whoseip::DB::ipdb_close(I<$dbf>);> + +Close the database. I<$dbf> is the handle returned from the +previous call to B<ipdb_open>. + +=cut +sub ipdb_close($) { + my $dbf = shift; + if ($dbf->{modified}) { + die "$dbf->{filename}: can't seek: $!" + if (sysseek($dbf->{fd}, 0, SEEK_SET) != 0); + my $n = syswrite($dbf->{fd}, + pack('Z8 S S L L L (LL)* @512', + $dbsign, $vmajor, $vminor, + $dbf->{pagesize}, $dbf->{numpages}, + keys(%{$dbf->{rootidx}}), + map { $_, $dbf->{rootidx}{$_} } + keys %{$dbf->{rootidx}})); + die "$dbf->{filename}: write error at header: $n: $!" + unless ($n == 512); + } + while (my ($off, $page) = each %{$dbf->{pagecache}}) { + ipdb_save_page($dbf, $page) if $page->{dirty}; + } + close $dbf->{fd}; +} + +sub ipdb_get_page($$) { + my ($dbf,$off) = @_; + my %ret; + + if (sysseek($dbf->{fd}, $off, SEEK_SET) != $off) { + die "$dbf->{filename}: can't seek: $!"; + } + + my $n = sysread($dbf->{fd}, my $s, $dbf->{pagesize}); + unless (defined($n)) { + die "$dbf->{filename}: can't read page: $!"; + } elsif ($n != $dbf->{pagesize}) { + die "$dbf->{filename}: short read ($n < $dbf->{pagesize})"; + } + + $ret{type} = unpack('L', $s); + $ret{off} = $off; + if ($ret{type} == IPDB_PAGE_INDEX) { + my ($x, @a) = unpack('LL257', $s); + $ret{tab} = \@a; + } elsif ($ret{type} == IPDB_PAGE_LEAF) { + (my $x, my $nent, $ret{next}, my @a) = + unpack('LLL(LLLa2)*', $s); + for (my $i = 0; $i < $nent; $i += 4) { + push @{$ret{tab}}, [ $a[$i], $a[$i+1], $a[$i+2], $a[$i+3] ]; + } +# print "$nent\n"; +# print join(', ', @{$ret{tab}})."\n"; + } else { + die "$dbf->{filename}: invalid page type at offset $off"; + } + + return \%ret; +} + +sub ipdb_alloc_page($$) { + my ($dbf,$type) = @_; + my %page; + + $page{type} = $type; + $page{off} = $dbf->{numpages}++ * $dbf->{pagesize} + 512; + if ($type == IPDB_PAGE_INDEX) { + $#{$page{tab}} = 256; + } else { + $page{next} = 0; + $#{$page{tab}} = -1; + } + $page{dirty} = 1; + ipdb_cache_put($dbf, \%page); + ++$dbf->{modified}; + return \%page; +} + +sub ipdb_get_root_page($$) { + my ($dbf,$nbits) = @_; + my $p; + + if (!defined($dbf->{rootidx}{$nbits})) { + $p = ipdb_alloc_page($dbf, IPDB_PAGE_INDEX); + $dbf->{rootidx}{$nbits} = $p->{off}; + $dbf->{modified}++; + } else { + $p = ipdb_cache_get($dbf, $dbf->{rootidx}{$nbits}); + } + return $p; +} + +### FIXME: Declared in whoseip.pl +my $ipv4rx = '\d{1,3}((\.\d{1,3}){3})'; + +### + + +=pod + +=head2 $res = B<Whoseip::DB::ipdb_lookup(I<$dbf>, I<$ip>);> + +Look up IP address I<$ip> in the database identified by I<$dbf> (a handle +returned by the previous call to B<ipdb_open>. If found, B<$ref> is a +reference to a hash that contains the following keys: + +=over 4 + +=item B<country> + +ISO 3166-1 country code + +=item B<network> + +Network address in a dotted-quad form. + +=item B<netmask> + +Network mask in a dotted-quad form. + +=back + +If not found, the function returns B<undef>. + +=cut +sub ipdb_lookup($$) { + my ($dbf,$ipstr) = @_; + my @ipo; + my $ipn; + my $nbits; + + if ($ipstr =~ /^$ipv4rx$/) { + @ipo = split(/\./, $ipstr); + $ipn = ($ipo[0] << 24) + ($ipo[1] << 16) + ($ipo[2] << 8) + $ipo[3]; + $nbits = 32; + } else { + # FIXME: diagnostics + return undef; + } + + my $page = ipdb_get_root_page($dbf, $nbits); + my $n = 0; + while (1) { + if ($page->{type} == IPDB_PAGE_INDEX) { + if ($page->{tab}[$ipo[$n]]) { + $page = ipdb_cache_get($dbf, $page->{tab}[$ipo[$n]]); + ++$n; + next; + } + return undef if (!$page->{tab}[256]); + $page = ipdb_cache_get($dbf, $page->{tab}[256]); + } + + foreach my $r (@{$page->{tab}}) { + if (($ipn & $r->[1]) == $r->[0]) { + # FIXME: check timestamp + + return ( country => $r->[3], + network => inet_ntoa(pack('N', $r->[0])), + netmask => inet_ntoa(pack('N', $r->[1])) ); + } + } + return undef if (!$page->{next}); + $page = ipdb_cache_get($dbf, $page->{next}); + } +} + +=pod + +=head2 $res = B<Whoseip::DB::ipdb_insert(I<$dbf>, I<$cidr>, I<$country>);> + +Inserts into the database I<$cidr> and the corresponding country code I<$country>. + +Currently, I<$cidr> must be in the form B<I<Net-address>/I<Netmask-length>>. + +=cut +sub ipdb_insert($$$) { + my ($dbf, $cidr, $country) = @_; + my @ipo; + my $ipn; + my $masklen; + my $netmask; + my $nbits; + + return 0 if ($dbf->{mode} eq '<'); + + if ($cidr =~ m#^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})/(\d+)$#) { + @ipo = ( $1, $2, $3, $4 ); + $ipn = ($1 << 24) + ($2 << 16) + ($3 << 8) + $4; + $masklen = $5; + $netmask = (0xffffffff ^ (0xffffffff >> $masklen)); + $nbits = 32; + } else { + # FIXME: error message + die "boo $cidr"; + return 0; + } + + my $n = int($masklen / 8); + + my $page = ipdb_get_root_page($dbf, $nbits); + for (my $i = 0; $i < $n; $i++) { + if ($page->{tab}[$ipo[$i]]) { + $page = ipdb_cache_get($dbf, $page->{tab}[$ipo[$i]]); + } else { + my $p = ipdb_alloc_page($dbf, IPDB_PAGE_INDEX); + $page->{tab}[$ipo[$i]] = $p->{off}; + $page->{dirty} = 1; + $page = $p; + } + } + + if ($page->{tab}[256]) { + $page = ipdb_cache_get($dbf, $page->{tab}[256]); + die "$dbf->{filename}: index page found where leaf was expected" + unless $page->{type} == IPDB_PAGE_LEAF; + } else { + my $p = ipdb_alloc_page($dbf, IPDB_PAGE_LEAF); + $page->{tab}[256] = $p->{off}; + $page->{dirty} = 1; + $page = $p; + } + + my $maxent = int(($dbf->{pagesize} - 12) / 14); + + while ($#{$page->{tab}} == $maxent) { + if ($page->{next}) { + $page = ipdb_cache_get($dbf, $page->{next}); + die "$dbf->{filename}: index page found where leaf was expected" + unless $page->{type} == IPDB_PAGE_LEAF; + } else { + my $p = ipdb_alloc_page($dbf, IPDB_PAGE_LEAF); + $page->{next} = $p->{off}; + $page->{dirty} = 1; + $page = $p; + } + } + + push @{$page->{tab}}, [ $ipn, $netmask, time(), $country ]; + $page->{dirty} = 1; + + return 1; +} + + diff --git a/whoseip/whoseip.pl b/whoseip/whoseip.pl index 0dffcf5..dc512df 100644 --- a/whoseip/whoseip.pl +++ b/whoseip/whoseip.pl @@ -21,6 +21,7 @@ use Pod::Usage; use Pod::Man; use Socket qw(:DEFAULT :crlf); use Net::CIDR; +use Whoseip::DB qw(:all); use constant EX_OK => 0; use constant EX_USAGE => 64; # command line usage error @@ -40,6 +41,9 @@ my @ipv4list; my $ipv4rx = '\d{1,3}((\.\d{1,3}){3})'; my $delim = $LF; # Output delimiter +my $dbf; +my $dbfile; + my %fmtab = (unix => '${status} $?{diag}{${diag}}{${country} ${cidr} ${range} ${count}} ', cgi => 'Content-Type: text/xml @@ -404,8 +408,20 @@ sub whois($$) { sub serve { my $term = shift; my %res; - + if ($term =~ /^${ipv4rx}$/) { + if (defined($dbf)) { + %res = ipdb_lookup($dbf, $term); + if (defined($res{country})) { + $res{status} = 'OK'; + $res{cidr} = Net::CIDR::addrandmask2cidr($res{network}, + $res{netmask}); + $res{range} = join ',', Net::CIDR::cidr2range($res{cidr}); + $res{count} = range2count($res{range}); + return %res; + } + } + my $srv = findsrv($term); if (defined($srv) and $srv ne 'UNKNOWN') { while (%res = whois($term, $srv), @@ -417,6 +433,11 @@ sub serve { $res{diag} = 'IP unknown'; } else { $res{status} = 'OK'; + if (defined($dbf)) { + foreach my $cidr (split /,/, $res{cidr}) { + ipdb_insert($dbf, $cidr, $res{country}); + } + } } } else { $res{status} = 'NO'; @@ -627,9 +648,16 @@ GetOptions("h" => sub { $output_format = read_format($_[1]); } }, - "fastcgi:s" => \$fastcgi + "fastcgi:s" => \$fastcgi, + "cache-file|c:s" => \$dbfile, + "no-cache|N" => sub { $dbfile = undef; } ) or exit(EX_USAGE); +if (defined($dbfile)) { + $dbfile .= "whoseip.db" if (-d $dbfile); + $dbf = ipdb_open($dbfile); +} + if (defined($fastcgi)) { if ($fastcgi eq '') { $fastcgi = 1; @@ -693,6 +721,11 @@ if ($fastcgi) { } } } + +if (defined($dbf)) { + ipdb_close($dbf); +} + __END__ =head1 NAME |