#! /usr/bin/perl # Copyright (C) 2014 Sergey Poznyakoff # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . use strict; use Getopt::Long qw(:config gnu_getopt no_ignore_case); use IO::Socket; use Pod::Usage; use Pod::Man; use Socket qw(:DEFAULT :crlf inet_ntoa); use Net::CIDR; use Whoseip::DB qw(:all); use constant EX_OK => 0; use constant EX_USAGE => 64; # command line usage error use constant EX_DATAERR => 65; # data format error use constant EX_NOINPUT => 66; # cannot open input file use constant EX_SOFTWARE => 70; # internal software error (not used yet) use constant EX_OSFILE => 72; # critical OS file missing use constant EX_CANTCREAT => 73; # can't create (user) output file my $progname; # This script name; ($progname = $0) =~ s/.*\///; my $progdescr = "Identifies IP addresses"; my $debug; my @ipv4list; my $ipv4rx = '\d{1,3}((\.\d{1,3}){3})'; my $delim = $LF; # Output delimiter my $dbf; my $dbfile; my %dbopt; my %fmtab = (unix => '${status} $?{diag}{${diag}}{${country} ${cidr} ${range} ${count}} ', cgi => 'Content-Type: text/xml ${status} $?{diag}{${diag}}{${country} ${cidr} ${range} ${count}} $?{term}{${term}} ' ); sub error { my $msg = shift; local %_ = @_; print STDERR "$progname: " if defined($progname); print STDERR "$_{prefix}: " if defined($_{prefix}); print STDERR "$msg\n" } sub debug { my $l = shift; error(join(' ',@_), prefix => 'DEBUG') if $debug >= $l; } sub abend { my $code = shift; print STDERR "$progname: " if defined($progname); print STDERR "@_\n"; exit $code; } sub read_config_file($) { my $config_file = shift; print STDERR "reading $config_file\n" if ($debug); open(my $fd, "<", $config_file) or die("cannot open $config_file: $!"); while (<$fd>) { chomp; s/\s+$//; if (/\\$/) { chop; $_ .= <$fd>; redo; } s/^\s+//; s/\s+=\s+/=/; s/#.*//; next if ($_ eq ""); unshift(@ARGV, "--$_"); } close $fd; } sub read_ipv4list { my $file = shift; open(my $fd, "<", $file) or abend(EX_NOINPUT, "can't open $file for reading: $!"); my $line = 0; @ipv4list = (); while (<$fd>) { ++$line; chomp; s/#.*//; s/^\s+//; s/\s+$//; next if ($_ eq ""); unless (/^([\d\.]+)\/(\d+)\s+([\w\.]+)$/) { error("$file:$line: malformed line"); next; } my $srv = $3; next if $srv eq 'UNKNOWN'; my $msk = $2; my $ip = str2ipv4($1); $srv = "whois.$srv.net" unless ($srv =~ /\./); push @ipv4list, [ $ip, (0xffffffff^(0xffffffff>>$msk)), $srv ]; } close $fd; } sub str2ipv4 { my $ipstr = shift; my @ip = split(/\./, $ipstr); return ($ip[0] << 24) + ($ip[1] << 16) + ($ip[2] << 8) + $ip[3]; } sub range2count { my $count = 0; foreach my $arg (@_) { my @a = split /-/, shift; next unless $#a == 1; $count += str2ipv4($a[1]) - str2ipv4($a[0]) + 1; } return $count; } sub cidr_to_range { my @a; @a = sort { $a->[0] <=> $b->[0] } map { map { [ map { str2ipv4($_) } split(/-/, $_, 2) ] } Net::CIDR::cidr2range($_) } split /,/, shift; for (my $i = $#a; $i > 0; $i--) { if ($a[$i]->[0] == $a[$i-1]->[1] + 1) { $a[$i-1]->[1] = $a[$i]->[1]; splice @a, $i, 1; } } return join ',', map { inet_ntoa(pack('N', $_->[0])) . '-' . inet_ntoa(pack('N', $_->[1])) } @a; } # ############ # ARIN # ############ sub arin_fmt { my $q = shift; return "n + $q"; } sub arin_decode { my ($input, $ref) = @_; return if ($input =~ /^#/ || $input eq ''); if ($input =~ /^NetRange:\s+(.+)/) { my $r = $1; $r =~ s/\s+//g; my $n = range2count($r); if (!defined($ref->{count}) || $ref->{count} > $n) { $ref->{range} = $r; $ref->{cidr} = join ',', Net::CIDR::range2cidr($r); $ref->{count} = $n; delete $ref->{country} } } elsif ($input =~ /^Country:\s+(.+)/ && !defined($ref->{country})) { $ref->{country} = $1; } } # ############ # RIPE # ############ use constant RIPE_INIT => 0; use constant RIPE_TEXT => 1; use constant RIPE_IGNR => 2; sub ripe_fmt { # From the RIPE Database FAQ: # # Q: Why did I receive an Error 201: Access Denied? # # * You (or your application) performed too many queries that # returned contact information (e.g. person or role objects) from the # RIPE Database. There is a daily limit on the amount of personal # data returned as described in the Acceptable Use Policy. # # * Even if you queried for other types of objects, the associated # contact information is returned by default. To avoid this situation # please use the "-r" flag to prevent any associated contact # information from being returned. my $q = shift; return "-r $q"; } sub ripe_decode { my ($input, $ref) = @_; error("WHOIS($ref->{server}:$ref->{port}): $1") if ($input =~ /^%ERROR:(.+)/); return if ($input =~ /^%/); if ($ref->{state} == RIPE_INIT) { if ($input eq '') { return; } else { $ref->{state} = RIPE_TEXT; } } if ($ref->{state} == RIPE_TEXT) { if ($input =~ /^inetnum:\s+(.+)/) { my $r = $1; $r =~ s/\s+//g; $ref->{range} = $r; $ref->{count} = range2count($r); $ref->{cidr} = join ',', Net::CIDR::range2cidr($r); } elsif ($input =~ /^country:\s+(.+)/) { $ref->{country} = $1; } elsif ($input =~ /^netname:\s+(.+)-TRANSFERRED.*/) { # A kludge to handle networks transferred to another RIR # E.g.: netname: AFRINIC-NET-TRANSFERRED-20050223 my $s = $1; $s =~ s/-/./g; $ref->{referto} = "whois.$s:43"; } elsif ($input eq '') { $ref->{state} = RIPE_IGNR; } } } # ############ # LACNIC # ############ sub lacnic_decode { my ($input, $ref) = @_; return if ($input =~ /^%/); if ($ref->{state} == RIPE_INIT) { if ($input eq '') { return; } else { $ref->{state} = RIPE_TEXT; } } if ($ref->{state} == RIPE_TEXT) { if ($input =~ /^inetnum:\s+(.+)/) { my $cidr = $1; if ($cidr =~ m#^(\d{1,3})/(\d+)#) { $cidr = "$1.0.0.0/$2"; } elsif ($cidr =~ m#^(\d{1,3}\.\d{1,3})/(\d+)#) { $cidr = "$1.0.0/$2"; } elsif ($cidr =~ m#^(\d{1,3}\.\d{1,3}\.\d{1,3})/(\d+)#) { $cidr = "$1.0/$2"; } $ref->{cidr} = $cidr; $ref->{range} = cidr_to_range($cidr); $ref->{count} = range2count($ref->{range}); } elsif ($input =~ /^country:\s+(.+)/) { $ref->{country} = $1; } elsif ($input eq '') { $ref->{state} = RIPE_IGNR; } } } # ################### # rwhois.gin.ntt.net # ################### sub ntt_decode { my ($input, $ref) = @_; if ($input =~ /^\s+(${ipv4rx}\s*-\s*${ipv4rx})/) { my $r = $1; $r =~ s/\s+//g; my $c = range2count($r); if (!defined($ref->{count}) || $ref->{count} > $c) { $ref->{count} = $c; $ref->{range} = $r; $ref->{cidr} = join ',', Net::CIDR::range2cidr($r); $ref->{country} = 'US'; } } } # ############ # TWNIC # ############ sub twnic_decode { my ($input, $ref) = @_; if ($input =~ /^\s+Netblock:\s+(.+)/) { my $r = $1; $r =~ s/\s+//g; $ref->{range} = $r; $ref->{count} = range2count($r); $ref->{cidr} = join ',', Net::CIDR::range2cidr($r); $ref->{country} = 'TW'; } } ################### # whois.nic.ad.jp ################### sub nic_ad_jp_fmt { my $q = shift; return "NET $q/e"; } sub nic_ad_jp_decode { my ($input, $ref) = @_; if ($input =~ /^a\.\s+\[Network Number\]\s+(.+)/) { $ref->{cidr} = $1; $ref->{range} = cidr_to_range($ref->{cidr}); $ref->{count} = range2count($ref->{range}); $ref->{country} = 'JP'; } } ################### # whois.nic.or.kr ################### sub nic_or_kr_decode { my ($input, $ref) = @_; if ($input =~ /^IPv4 Address\s*:\s+(${ipv4rx}\s*-\s*${ipv4rx})/) { my $r = $1; $r =~ s/\s+//g; my $c = range2count($r); if (!defined($ref->{count}) || $ref->{count} > $c) { $ref->{count} = $c; $ref->{range} = $r; $ref->{cidr} = join ',', Net::CIDR::range2cidr($r); $ref->{country} = 'KR'; } } } sub nobistech_decode { my ($input, $ref) = @_; if ($input =~ /network:IP-Network:(.+)/) { $ref->{cidr} = $1; $ref->{range} = cidr_to_range($1); $ref->{count} = range2count($ref->{range}); } elsif ($input =~ /network:Country-Code:(.+)/) { $ref->{country} = $1; } } # ####################################################################### # Server table # ####################################################################### my %srvtab = ( 'whois.arin.net' => { q => \&arin_fmt, d => \&arin_decode }, 'whois.lacnic.net' => { d => \&lacnic_decode }, 'whois.ripe.net' => { q => \&ripe_fmt, d => \&ripe_decode }, 'rwhois.gin.ntt.net' => { d => \&ntt_decode }, 'whois.twnic.net' => { d => \&twnic_decode }, 'whois.nic.ad.jp' => { q => \&nic_ad_jp_fmt, d => \&nic_ad_jp_decode }, 'whois.nic.br' => { d => \&lacnic_decode }, 'whois.nic.or.kr' => { d => \&nic_or_kr_decode }, 'rwhois.nobistech.net' => { d => \&nobistech_decode } ); sub format_query { my ($srv, $term) = @_; if (defined($srvtab{$srv}{q})) { return &{$srvtab{$srv}{q}}($term); } else { return $term; } } sub findsrv { my $ip = str2ipv4(shift); foreach my $r (@ipv4list) { debug(3, "findsrv: $ip $r->[0]/$r->[1]"); return $r->[2] if ($ip & $r->[1]) == $r->[0]; } return undef; } sub whois($$) { my $ip = shift; my $server = shift; my $port = 43; if ($server =~ /(.+):(.+)/) { $server = $1; $port = $2; } debug(1,"querying $ip from $server:$port"); my $sock = new IO::Socket::INET (PeerAddr => $server, PeerPort => $port, Proto => 'tcp'); my $expiration = undef; my @collect; unless ($sock) { error("could not connect to $server:$port: $!"); return undef; } print $sock format_query($server, $ip)."\n"; my $decode; if (defined($srvtab{$server}{d})) { $decode = $srvtab{$server}{d}; } else { $decode = \&ripe_decode; } local $/ = LF; my %res = (server => $server, port => $port, term => $ip); while (<$sock>) { s/\s*$CR?$LF$//; debug(4, "RECV: $_"); if (/%% referto: whois -h (\S+) -p (\S+)/) { $res{referto} = "$1:$2"; debug(1, "found reference to $res{referto}"); } elsif (m#ReferralServer: r?whois://(.+)#) { $res{referto} = $1; $res{referto} =~ s#/$##; debug(1, "found reference to $res{referto}"); } else { &{$decode}($_, \%res); } } close $sock; return %res; } sub serve { my $term = shift; my %res; if ($term =~ /^${ipv4rx}$/) { if (defined($dbf)) { eval { %res = ipdb_lookup($dbf, $term); }; if ($@) { error("cache lookup failure: $@"); } elsif (defined($res{country})) { $res{status} = 'OK'; unless (defined($res{cidr})) { $res{cidr} = Net::CIDR::addrandmask2cidr($res{network}, $res{netmask}); } $res{range} = cidr_to_range($res{cidr}); $res{count} = range2count($res{range}); $res{term} = $term; $res{source} = 'CACHE'; return %res; } } my $srv = findsrv($term); if (defined($srv) && $srv ne 'UNKNOWN') { my %prev; while (%res = whois($term, $srv), && defined($res{referto})) { %prev = %res if $res{status} = 'OK'; $srv = $res{referto}; } %res = %prev if (!defined($res{country}) && defined($prev{country})); if (!defined($res{country})) { $res{status} = 'NO'; $res{diag} = 'IP unknown'; } else { $res{status} = 'OK'; if (defined($dbf)) { foreach my $cidr (split /,/, $res{cidr}) { eval { ipdb_insert($dbf, $cidr, uc $res{country}, { cidr => $res{cidr}, server => $res{server}, port => $res{port} }); }; if ($@) { error("can't cache $cidr: $@"); } } } } } else { $res{status} = 'NO'; $res{diag} = 'whois server unknown'; } } else { $res{status} = 'BAD'; $res{diag} = 'invalid input'; } $res{source} = 'QUERY'; $res{package} = 'whoseip'; $res{version} = $Whoseip::DB::VERSION; $res{term} = $term; return %res; } # ####################################################################### # Create a copy of this program with ipv4list embedded # ####################################################################### sub whoseip_dump { my ($opt,$file) = @_; open(my $ifd, "<", $0) or abend(EX_NOINPUT, "can't open $0 for reading"); open(my $ofd, ">", $file) or abend(EX_CANTCREAT, "can't open $file for writing"); my $zapto; my $line = 0; while (<$ifd>) { ++$line; if (defined($zapto)) { $zapto = undef if /$zapto/; next; } if (/^my \@ipv4list\s*(.*)/) { my $tail = $1; if ($tail =~ /^=\s*\(/) { $zapto = '^\);$'; } elsif ($tail !~ /^;/) { error("$file:$line: unrecognized @ipv4list initializer"); print $ofd $_; next; } print $ofd "my \@ipv4list = (\n"; foreach my $x (@ipv4list) { print $ofd "[ $x->[0], $x->[1], '$x->[2]' ],\n"; } print $ofd ");\n"; } else { print $ofd $_; } } close $ifd; close $ofd; exit 0; } # ####################################################################### # Output functions # ####################################################################### sub read_format { my $file = shift; open(my $fd, "<", $file) or die "can't open $file for reading"; my $res; while (<$fd>) { chomp; if (/\\$/) { chop; $_ .= <$fd>; redo; } next if /^#/; $res .= "$_\n"; } close $fd; return $res; } sub getsegm { my $sref = shift; my $s = ${$sref}; my $level = 0; my $res; while ($s =~ /(.*?[{}])(.*)/s) { $res .= $1; $s = $2; if ($res =~ /[\$\?l]\{$/s) { if ($s =~ /(\w+\})(.*)/s) { $res .= $1; $s = $2; } } elsif ($res =~ /{$/) { ++$level; } elsif ($res =~ /}$/) { last if (--$level == 0); } } ${$sref} = $s; $res =~ s/^\{//s; $res =~ s/\}$//s; return $res; } sub expandout { my $s = shift; my %esctab = (a => "\a", b => "\b", e => "\e", f => "\f", n => "\n", r => "\r", t => "\t", v => "\v"); $s =~ s/\$l{(\w+)\}/length($_{$1})/sgex; $s =~ s/\$\{(\w+)\}/$_{$1}/sgex; $s =~ s/\\([\\abefnrtv])/$esctab{$1}/sgex; print $s; } sub print_result { my $fmt = shift; local %_ = @_; while ($fmt =~ /(.*?)\$\?\{(\w+)\}(.*)/s) { expandout($1); my $v = $2; $fmt = $3; my $t = getsegm(\$fmt); my $f; $f = getsegm(\$fmt) if ($fmt =~ /^\{/); if (defined($_{$v})) { print_result($t, @_); } elsif (defined($f)) { print_result($f, @_); } } expandout($fmt); } sub docgi { my ($fmt, $env) = @_; my $term; my %res; if ($env->{QUERY_STRING} =~ /^$ipv4rx$/) { $term = $env->{QUERY_STRING}; } else { my %q = map { /(.+?)=(.*)/ ? ($1 => $2) : ($1 => 1); } split(/\&/, $env->{QUERY_STRING}); if (defined($q{fmt})) { if (defined($fmtab{$q{fmt}})) { if ($fmtab{$q{fmt}} =~ /^Content-Type:/) { $fmt = $fmtab{$q{fmt}}; } else { %res = (status => 'BAD', diag => 'invalid format') } } else { %res = (status => 'BAD', diag => 'format undefined'); } } $term = $q{ip} if defined($q{ip}); } unless (defined($res{status})) { if (defined($term)) { %res = serve($term); } else { %res = (status => 'BAD', diag => 'search term invalid or missing'); } } print_result($fmt, %res); } # ####################################################################### # Main # ####################################################################### my $output_format; my $fastcgi; my $single_query; my $dbexport; my $dbimport; if (defined($ENV{WHOSEIP_CONF})) { read_config_file($ENV{WHOSEIP_CONF}); } elsif (-r "/etc/whoseip.conf") { read_config_file("/etc/whoseip.conf"); } GetOptions("h" => sub { pod2usage(-message => "$progname: $progdescr", -exitstatus => EX_OK); }, "help" => sub { pod2usage(-exitstatus => EX_OK, -verbose => 2); }, "usage" => sub { pod2usage(-exitstatus => EX_OK, -verbose => 0); }, "debug|d+" => \$debug, "ip-list|i=s" => sub { read_ipv4list($_[1]); }, "dump|D=s" => \&whoseip_dump, "define-format=s" => sub { my @a = split /\s*=\s*/, $_[1], 2; $fmtab{$a[0]} = $a[1]; }, "format|f=s" => \$output_format, "format-file|formfile|F=s" => sub { if ($_[1] =~ /=/) { my @a = split /\s*=\s*/, $_[1], 2; $fmtab{$a[0]} = read_format($a[1]); } else { $output_format = read_format($_[1]); } }, "fastcgi:s" => \$fastcgi, "cache-file|c:s" => \$dbfile, "no-cache|N" => sub { $dbfile = undef; }, "single-query" => \$single_query, "cache-ttl|ttl|t=n" => sub { $dbopt{ttl} => $_[1]; }, "cache-mode=s" => sub { $dbopt{mode} = $_[1]; }, "export" => \$dbexport, "import" => \$dbimport, ) or exit(EX_USAGE); if (defined($dbfile)) { $dbfile .= "/whoseip.db" if (-d $dbfile); $dbopt{debug} = $debug; eval { $dbf = ipdb_open($dbfile, %dbopt); }; if ($@) { error("can't open cache file $dbfile: $@"); } } if (defined($dbexport)) { abend(EX_USAGE, "--export requires --cache-file") unless defined($dbf); abend(EX_USAGE, "too many arguments") if ($#ARGV > 0); my $fd; if ($#ARGV == 0) { open($fd, '>', $ARGV[0]) or abend(EX_CANTCREAT, "can't open $ARGV[0] for writing: $!"); } ipdb_export($dbf, $fd); ipdb_close($dbf); exit(EX_OK); } if (defined($dbimport)) { abend(EX_USAGE, "--import requires --cache-file") unless defined($dbf); abend(EX_USAGE, "too many arguments") if ($#ARGV > 0); my $fd; if ($#ARGV == 0) { open($fd, '<', $ARGV[0]) or abend(EX_NOINPUT, "can't open $ARGV[0] for reading: $!"); } ipdb_import($dbf, $fd); ipdb_close($dbf); exit(EX_OK); } if (defined($fastcgi)) { if ($fastcgi eq '') { $fastcgi = 1; } else { my @suf = split /\s+/, $fastcgi; $fastcgi = undef; foreach my $s (@suf) { if ($0 =~ /$s$/) { $fastcgi = 1; last; } } } } else { $fastcgi = $0 =~ /\.fcgi$/; } if (defined($output_format) && $output_format =~ /@(.+)/) { abend(EX_USAGE, "format $1 not defined") unless defined $fmtab{$1}; $output_format = $fmtab{$1}; } if ($fastcgi) { eval { require FCGI; 1; } or do { my $msg = $@; if ($debug) { abend(EX_OSFILE, "can't load CGI::Fast: $@"); } else { abend(EX_OSFILE, "can't load CGI::Fast"); } }; $output_format = $fmtab{cgi} unless defined($output_format); my $req = FCGI::Request(); while ($req->Accept() >= 0) { docgi($output_format, $req->GetEnvironment()); } } elsif ($ENV{GATEWAY_INTERFACE} =~ m#CGI/\d+\.\d+#) { $output_format = $fmtab{cgi} unless defined($output_format); docgi($output_format, \%ENV); } else { my $term; my %res; ipdb_locker($dbf, lock => 'shared') if (defined($dbf)); $output_format = $fmtab{unix} unless defined($output_format); if ($#ARGV == -1) { unless (-t *STDIN) { local $/ = CRLF; $delim = "$CR$LF"; } my $n = 1; while (<>) { chomp; print_result($output_format, serve($_), item => $n++); last if $single_query; } } else { my $n = 1; foreach my $term (@ARGV) { print_result($output_format, serve($term), item => $n++); } } } ipdb_close($dbf) if defined($dbf); __END__ =head1 NAME whoseip - return information about IP address =head1 SYNOPSIS B [B<-dhN>] [B<-F> I] [B<-D> I] [B<-i> I] [B<--cache-file=>I] [B<--debug>] [B<--define-format=>IB<=>I] [B<--dump=>I] [B<--export>] [B<--fastcgi=>[I]] [B<--format=>I] [B<--format-file=>[IB<=>]I] [B<--formfile=>I] [B<--help>] [B<--import>] [B<--ip-list=>I] [B<--no-cache>] [B<--single-query>] [B<--usage>] [I...] =head1 DESCRIPTION For each IP address, B returns the country it is located in (a ISO 3166-1 code), the network it belongs to and the number of addresses in the network. The program can operate in several modes: as a standalone command line tool, or as a B or B process. If the program name ends in B<.fcgi> the B mode is enabled. This mode is also enabled if the command line option B<--fastcgi> is given without arguments, or if the program name ends in one of the suffixes supplied in the argument to this option (a whitespace-separated list). In this mode, the IP address to look for is taken from the B parameter B. Additional parameter B can be used to supply the name of the desired output format. Its value must be either a name of one of the built-in formats, or must be defined using the B<--define-format> option (see below). As a shortcut, the invocation command line containing an IP alone is also recognized. Otherwise, when one or more IP addresses are given in the command line, B prints the data for each of them on the standard output. This is B mode. If called without arguments, the program checks if the environment variable B is defined and contains B> (where I is the version number). If so, it assumes B mode. In this mode the command line is parsed the same way as in B mode. If B is not set, the program reads IP addresses from input (one per line) and prints replies for each of them. This is B. To summarize: =over 4 =item 1. Start it from the command line with one or more IPs given as arguments, if you wish to get info about these IPs. =item 2. Add it to B if you want to query it remotely as a service, e.g.: whois stream tcp nowait nobody /usr/bin/whoseip =item 3. Copy it to your B directory to use it with a B server as a B. =item 4. Link it to B to use it as a B application (or use the B<--fastcgi> option). =back Output formats are configurable and depend on the mode B runs in. In command line and inetd modes, the default output format is: =over 4 B I I I I =back where I is country code, I is network block in CIDR notation, I is network block as a range of IP addresses, and I is number of IP address in the network block. If the specified IP address is not found, the reply is =over 4 B I =back where I is a human-readable explanatory message. If the input is invalid, the reply is: =over 4 B I =back In B and B modes, the output is represented as XML, as shown in the example below: OK US 192.0.2.0/24 192.0.2.0-192.0.2.255 255 192.0.2.10 The following example illustrates the reply if the IP is not found: NO IP unknown 43.0.0.1 See the section B below for a discussion on how to customize output formats. =head2 Caching To minimize number of queries to external B servers, it is recommended to use a cache database. It is enabled by using the B<--cache-file=I> option (or B configuration file statement). A B