#! /usr/bin/perl

use strict;
use Getopt::Long qw(:config gnu_getopt no_ignore_case);
use Pod::Usage;
use Pod::Man;
use POSIX qw(strftime mktime);
use RRDs;

# Global vars
my $sys_config_file = "/etc/netplot.conf"; # Configuration file name
my $descr = "";
my $script;        # This script name.
my %debug_level = ( 'GENERAL' => 0, 'PARSE' => 0, 'GRAPH' => 0 );
my %rrds;
my $rrddir = ".";
my $rrdstep = 300;
my $rrdmaxval = "U";
my %inputfiles;
my $flow_report_program = "flow-report";

# Options:
my $debug;          # Debug mode indicator.
my $logfile;        # Name of the logfile.
my $dry_run;        # Dry-run mode.
my $help;           # Show help and exit.
my $man;            # Show man and exit.
my $overwrite = 0;  # Overwrite existing files.
my $graphdir = "."; # Directory where graph files reside.
my $recursive;      # Recursively scan subdirectories.
my $noupdate;       # Don't update rrd files.
my $nograph;        # Don't redraw graphs.
my $imgformat = "PNG"; # Create graphs of this type.  
my $flow_report_cfg;
my @graph_size;

my $layout = "mirror";
my $mirror_top = "incoming";
my @incoming_colors = ( "336600", "32CD32"  );
my @outgoing_colors = ( "0033CC", "4169E1" );
my @order;

# Return codes:
#  0 - OK, nothing changed
#  [1 - Not used]
#  2 - General error
#  3 - Usage error

#############

open(LOG, ">&STDERR");

sub logit {
    if ($logfile) {
	print LOG strftime "%b %d %H:%M:%S $script: @_\n", localtime;
    } else {
	print LOG "$script: @_\n";
    }
}

sub loginit {
    close LOG;
    if ($logfile and (!-e $logfile or -w $logfile)) {
	print STDERR "$script: logging to $logfile\n";
	open(LOG, ">>$logfile");
    } else {
	open(LOG, ">&STDERR");
    }
}

sub logdone {
}

sub abend($) {
    my $msg = shift;
    logit($msg);
    debug('GENERAL', 1, "ABEND");
    logdone();
    exit(2);
}

sub debug {
    my $category = shift;
    my $level = shift;
    #    print STDERR "$category: $debug_level{$category} >= $level\n";
    if ($debug_level{$category} >= $level) {
	logit "DEBUG[$category]: @_";
    }
}

sub read_config_file($) {
    my $config_file = shift;
    print STDERR "reading $config_file\n" if ($debug);
    open(FILE, "<", $config_file) or die("cannot open $config_file: $!");
    while (<FILE>) {
	chomp;
	s/^\s+//;
	s/\s+$//;
	s/\s+=\s+/=/;
        s/#.*//;
	next if ($_ eq "");
	unshift(@ARGV, "--$_");
    }
}

sub nametots {
    my $name = shift;
    $name =~ s/.*\///;
    if ($name =~ /ft-v[0-9]+\.([0-9][0-9][0-9][0-9])-([0-9][0-9])-([0-9][0-9])\.([0-9][0-9])([0-9][0-9])([0-9][0-9])\+([0-9][0-9])([0-9][0-9])/) {
	return mktime($6,$5,$4,$3,$2-1,$1-1900);
    }
    return undef;
}

sub rrd_open {
    my ($ip,$start) = @_;
    my $filename = "$rrddir/$ip.rrd";
    my $heartbeat = 2*$rrdstep;

    if (!$overwrite && -r $filename) {
	debug('GENERAL',2,"using existing $filename");
    } else {
	debug('GENERAL',2,"creating $filename");
	RRDs::create($filename, "--start", $start, "--step", $rrdstep,
		     "DS:in:ABSOLUTE:$heartbeat:0:$rrdmaxval",
		     "DS:out:ABSOLUTE:$heartbeat:0:$rrdmaxval",
		     "RRA:AVERAGE:0.5:1:600",
		     "RRA:AVERAGE:0.5:6:700",
		     "RRA:AVERAGE:0.5:24:775",
		     "RRA:AVERAGE:0.5:288:797") unless ($dry_run);
    
	my $err=RRDs::error;
	abend("cannot create $filename: $err") if $err;
    }
    $rrds{$ip} = $filename;
}

sub rrd_add {
    my ($ip, $timestamp, $hashref) = @_;
    rrd_open($ip,$timestamp-$rrdstep) if (!defined($rrds{$ip}));
    RRDs::update($rrds{$ip}, "--template", "in:out",
		 "$timestamp:".
		 (defined($hashref->{"in"}) ? $hashref->{"in"} : 0).":".
		 (defined($hashref->{"out"}) ? $hashref->{"out"} : 0)) unless ($dry_run);
    my $err=RRDs::error;
    abend("cannot update $rrds{$ip}: $err") if $err;
}

sub collect_report {
    my ($filename, $ds, $hashref) = @_;
    my $fd;
    
    debug('PARSE',2,"begin collecting $ds from $filename");
    open($fd, "$flow_report_program -s $flow_report_cfg -S $ds-summary < $filename |") or
	abend("failed to generate report $ds-summary for $filename");
    while (<$fd>) {
	chomp;
	s/#.*//;
	next if ($_ eq "");
	my ($ip,$octets) = split /,/;
	debug('PARSE',3,"$ip $octets");
	$hashref->{$ip}{$ds} = $octets;
    }
    debug('PARSE',2,"done collecting $ds");
    close($fd);
}
    
sub file_to_ds {
    my ($timestamp,$filename) = @_;
    my $fd;
    my %traff;

    debug('PARSE',1,"parsing file $filename, timestamp $timestamp");
    collect_report($filename, "in", \%traff);
    collect_report($filename, "out", \%traff);
    foreach my $ip (keys(%traff)) {
	rrd_add($ip, $timestamp, $traff{$ip});
    }
    debug('PARSE',1,"done parsing $filename");
}

sub scandir {
    my $dir = shift;
    my $fd;
    my $ent;

    debug('GENERAL',1,"scanning directory $dir");
    opendir($fd, $dir)
	or abend("cannot open directory $dir: $!");
    while ($ent = readdir($fd)) {
	next if ($ent eq "." or $ent eq "..");
	my $filename = "$dir/$ent";
	if (-f $filename) {
	    my $timestamp = nametots($filename);
	    if (defined($timestamp)) {
		$inputfiles{$timestamp} = $filename;
	    } else {
		logit("ignoring file $filename: unable to extract timestamp");
	    }
	} elsif (-d $filename && $recursive) {
	    scandir($filename);
	}
    }
    closedir($fd);
}

sub collect_ips_from_rrds {
    my $fd;
    my $ent;
    my $ip;

    %rrds = ();
    debug('GENERAL',1,"scanning rrd directory $rrddir");
    opendir($fd, $rrddir)
	or abend("cannot open directory $rrddir: $!");
    while ($ent = readdir($fd)) {
	next if ($ent eq "." or $ent eq "..");
	if (-f "$rrddir/$ent" && ($ip = $ent) =~ s/\.rrd$//) {
	    $rrds{$ip} = $ent;
	}
    }
    close($fd);
}

# addgraph(legend,var,varname,colors)
sub addgraph {
    my ($args,$legend,$var,$varname,$colors) = @_;
    
    my $suf = ":$legend";

    if (defined($colors->[1])) {
	push(@$args, "AREA:$var#".$colors->[1].$suf);
	$suf = "";
    }

    push(@$args, "LINE1:$var#".$colors->[0].$suf)
	if (defined($colors->[0]));

    push(@$args,
	 "GPRINT:$varname:MAX:  Max\\: %5.1lf %s",
	 "GPRINT:$varname:AVERAGE: Avg\\: %5.1lf %S",
	 "GPRINT:$varname:LAST: Current\\: %5.1lf %Sbytes/sec\\n");

}

my @graphs = ("day", "week", "month", "year");

sub dograph {
    my $ip = shift;
    my $rrdfile = "$rrddir/$ip.rrd";
    my $info;
    my $i;
    my @rrdargs;
    my $in;
    my $out;
    
    $info = RRDs::info($rrdfile);

    $i = 0;
    foreach my $gt (@graphs) {
	my $imgfile = "$graphdir/$ip.$gt.".lc($imgformat);
	my $step = $$info{"rra[$i].pdp_per_row"} * $$info{'step'};
	my $timespan = $$info{"rra[$i].rows"} * $step;
	debug('GRAPH',2,"creating $imgfile");
	@rrdargs = ($imgfile,
		    "--imgformat", uc($imgformat), "--end", "now",
		    "--start", "now-$timespan",
		    "--step", $step,
		    "-v bytes/sec");
	if ($#graph_size == 1) {
	    push(@rrdargs, "--width", $graph_size[0],
		 "--height", $graph_size[1]);
	}
	push(@rrdargs,
	     "DEF:in=$rrdfile:in:AVERAGE",
	     "DEF:out=$rrdfile:out:AVERAGE");
	     
	if ($layout eq "mirror") {
	    if ($mirror_top eq "incoming") {
		$in = "in";
		$out = "rev";
		push(@rrdargs, "CDEF:rev=out,-1,*");
	    } elsif ($mirror_top eq "outgoing") {
		$in = "rev";
		$out = "out";
		push(@rrdargs, "CDEF:rev=in,-1,*");
	    } else {
		abend("invalid mirror-top: $mirror_top");
	    }
	    push(@rrdargs, "HRULE:0#000000");
	} else {
	    $in = "in";
	    $out = "out";
	}

	foreach my $x (@order) {
	    if ($x eq "incoming") {
		addgraph(\@rrdargs, "Incoming",$in,"in",\@incoming_colors);
	    } elsif ($x eq "outgoing") {
		addgraph(\@rrdargs, "Outgoing",$out,"out",\@outgoing_colors);
	    } else {
		abend("invalid order component: $x");
	    }
	}
	debug('GRAPH',3,"args: ".join(' ', @rrdargs));
	RRDs::graph(@rrdargs) unless ($dry_run);
	my $err=RRDs::error;
	abend("cannot create graph for $ip: $err") if $err;
	++$i;
    }
}

sub getcolors {
    my @ret = split(/:/, shift);

    abend("invalid colors specification: $_") if ($#ret == -1 || $#ret > 1);
    if ($#ret == 0) {
	$ret[1] = undef;
    }
    $ret[0] = undef if ($ret[0] eq "" || $ret[0] eq "-");
    $ret[1] = undef if ($ret[1] eq "" || $ret[1] eq "-");
    
    return @ret;
}
	
my %comp = ( 'in' => 'incoming',
	     'incoming' => 'incoming',
	     'out' => 'outgoing',
	     'outgoing' => 'outgoing' );
    
###########
($script = $0) =~ s/.*\///;

my $home;

eval {
    my @ar = getpwuid($<);
    $home = $ar[7];
};

if ($ENV{'NETPLOT_CONF'}) {
    read_config_file($ENV{'NETPLOT_CONF'});
} elsif (-e "$home/.netplot.conf") {
    read_config_file("$home/.netplot.conf");
} elsif (-e "$sys_config_file") {
    read_config_file("$sys_config_file");
}

GetOptions("help|h" => \$help,
	   "man" => \$man,
	   "dry-run|n" => \$dry_run,
           "debug|d:s" => sub {
	       if (!$_[1]) {
		   foreach my $key (keys %debug_level) {
		       $debug_level{$key} = 1;
		   }
	       } elsif ($_[1] =~ /^[0-9]+/) {
		   foreach my $key (keys %debug_level) {
		       $debug_level{$key} = $_[1];
		   }
	       } else {
		   foreach my $cat (split(/,/, $_[1])) {
		       my @s = split(/[:=]/, $cat, 2);
		       $s[0] =~ tr/[a-z]/[A-Z]/;
		       if (defined($debug_level{$s[0]})) {
			   $debug_level{$s[0]} =
			       ($#s == 1) ? $s[1] : 1;
		       } else {
			   abend("no such category: $s[0]");
		       }
		   }
	       }
	   },
           "log-file|l=s" => \$logfile,
	   "overwrite" => \$overwrite,
	   "rrd-dir|o=s" => \$rrddir,
	   "graph-dir|g=s" => \$graphdir,
	   "no-update" => \$noupdate,
	   "no-graph" => \$nograph,
	   "imgformat" => \$imgformat,
	   "recursive|r" => \$recursive,
	   "flow-report-cfg|s=s" => \$flow_report_cfg,
           "flow-report-program|s=s" => \$flow_report_program,
	   "graph-size:s" => sub {
	       @graph_size = split(/[xX]/, $_[1]);
	       abend("bad size specification: $_[1]") unless ($#graph_size == 1);
	   },
	   "layout:s" => sub {
	       if ($_[1] eq "traditional" or $_[1] eq "mirror") {
		   $layout = $_[1];
	       } else {
		   abend("unrecognized layout: $_[1]");
	       }
	   },
	   "mirror-top:s" => sub {
	       if (defined($comp{$_[1]})) {
		   $mirror_top = $comp{$_[1]};
	       } else {
		   abend("unrecognized argument to mirror-top");
	       }
	   },
	   "order:s" => sub {
	       @order = split(/,/, $_[1]);
	       abend("invalid number of components in the order")
		   if ($#order == -1 or $#order > 1);
	       @order = map {
		   abend("unrecognized argument to order: $_")
		       if (!defined($comp{$_}));
		   $comp{$_}; } @order;
	   },		   
	   "incoming-graph:s" => sub {
	       @incoming_colors = getcolors($_[1]);
	   },
	   "outgoing-graph:s" => sub {
	       @outgoing_colors = getcolors($_[1]);
	   }
    ) or exit(3);

pod2usage(-message => "$script: $descr", -exitstatus => 0) if $help;
pod2usage(-exitstatus => 0, -verbose => 2) if $man;

# Select the default order
if ($layout eq "traditional") {
    if ($#order == -1) {
	if (defined($outgoing_colors[1]) and !defined($incoming_colors[1])) {
	    @order = ( "outgoing", "incoming" );
	} else {
	    if (defined($outgoing_colors[1]) and defined($incoming_colors[1])) {
		logit("warning: both graphs are filled and traditional layout is used; one of them will obscure the other");
	    }
	    @order = ( "incoming", "outgoing" );
	}
    }
} else {
    @order = ( "incoming", "outgoing" );
}

loginit();
debug('GENERAL', 1, "startup");
debug('GENERAL', 2, "args: ".join(' ', @ARGV));

unless ($noupdate) {
    if ($#ARGV == -1) {
	print STDERR "$script: no input files\n";
	exit(3);
    }
    
    foreach my $arg (@ARGV) {
	if (-f $arg) {
	    my $timestamp = nametots($arg);
	    if (defined($timestamp)) {
		$inputfiles{$timestamp} = $arg;
	    } else {
		logit("ignoring file $arg: unable to extract timestamp");
	    }
	} elsif (-d $arg) {
	    scandir($arg);
	} else {
	    logit("$arg is neither file or directory; ignored");
	}
    }
    
    foreach my $timestamp (sort { $a <=> $b } keys(%inputfiles)) {
	file_to_ds($timestamp, $inputfiles{$timestamp}) 
    }
}

unless ($nograph) {
    collect_ips_from_rrds();
    debug('GRAPH',1,"recreating graphs");
    foreach my $ip (keys(%rrds)) {
	dograph($ip);
    }
}

debug('GENERAL', 1, "shutdown");
logdone();

###########

__END__
=head1 NAME

netplot - graphical processor for NetFlow exports

=head1 SYNOPSIS

netplot [I<options>] [I<FILE>...]

=head1 DESCRIPTION

B<Netplot> extracts data from the flow files listed as its arguments,
stores it in B<RRD> files and converts it into graphs.

The utility can be used together with B<flow-capture>(1) to produce the graphs
in real time, e.g.:
    
     flow-capture -N -3 -n 288 -p /var/run/flow-capture.pid \
                  -w /var/lib/netflow/ \
                  -R /usr/bin/netplot 127.0.0.1/127.0.0.1/2055    

=head1 OPTIONS

=over 4

=item B<--overwrite>

Overwrite existing B<RRD> files.  Normally, B<netplot> adds data to the
existing B<RRD> files.  This option forces it to re-create them.
    
=item B<--rrd-dir>=I<DIR>, B<-o> I<DIR>

Name of the directory where B<RRD> files reside.  If not set, current
working directory is assumed.

=item B<--graph-dir>=I<DIR>, B<-g> I<DIR>

Name of the directory where to store graph files.  If not set, current
working directory is assumed.

=item B<--no-update>

Do not update B<RRD> files, only recreate the graphs.

=item B<--no-graph>

Do not update graphs, update only B<RRD> files.

=item B<--imgformat>=B<PNG>|B<SVG>|B<EPS>|B<PDF>

Image format for the generated graphs.

=item B<--recursive>, B<-r>

Recursively descend into directories.
    
=item B<--flow-report-cfg>=I<FILE>

Name of the netflow report configuration, suitable for B<flow-report>(1).
The report must contain two definitions: B<in-summary> and B<out-summary>,
for incoming and outgoing traffic, correspondingly.  See the section
B<NETFLOW REPORT CONFIGURATION>, for a detailed discussion.
    
=item B<--flow-report-program>=I<FILE>

Path to B<flow-report> binary.

=back
    
Graph layout and colors:
    
=over 4

=item B<--graph-size>=I<WIDTH>xI<HEIGHTS>

Sets the size of the graph pictures to create.

=item B<--layout>=B<traditional>|B<mirror>

Selects the graph layout.  The B<traditional> layout represents both
incoming and outgoing graphs in the same coordinate plane.  The B<mirror>
layout represents them in two adjacent coordinate places, having the
same time axis and with traffic axes going into opposite directions.
This is the default layout.
    
=item B<--mirror-top>=B<incoming>|B<outgoing>

If B<mirror> layout is used, this option select which of the graphs should
occupy the upper coordinate place.  The default is B<incoming>.

The argument can be abbreviated as B<in> instead for B<incoming> or
B<out> for B<outgoing>.    

=item B<--order>=I<ORDER>

Specifies the order in which graphs should be drawn.  The argument is
a comma-separated list of graph names: B<incoming> (or B<in>) and
B<outgoing> (or B<out>).  It is OK if it consists of only one graph
name: in this case the other graph will not be drawn.

The order of drawing makes sense only for traditional layout.  B<Netplot>
selects the default order so that the graph that has its area filled is
drawn first.  It will issue a warning if both graphs are filled.  Use this
option to override the default order, or to suppress this warning.
    
=item B<--incoming-graph>=I<LINE>[:I<AREA>]

Selects colors to use for incoming graph.  Each color should be a 6-digit
RGB specfication (e.g. B<7f7f7f> for grey color).  The I<LINE> paramenter
specifies the color for the graph line, and I<AREA> supplies the filling
color for the area below the graph.  Any of them can be omitted, or given
as B<-> (dash).  For example, B<--incoming-graph=ff0000> means that the
incloming graph should be drawn as a red line, without filling its area.    
    
=item B<--outgoing-graph>=I<LINE>[:I<AREA>]

Selects colors to use for outgoing graph.  See above for the description
of parameters.    

Defaults are:

    --incoming-graph=336600:32CD32 --outgoing-graph=0033CC:4169E1

which gives the green area for incoming graph and blue area for outgoing
graph.  Note, that if you prefer the traditional layout, it is wise to
disable area filling for one of the graphs.  For example, the options
below produce graphs in an old MRTG-like fashion:
    
    --layout=traditional --outgoing-graph=ff0000 
    
=back

Options controlling log and debug output:
    
=over 4

=item B<--log-file>=I<FILE>, B<-l> I<FILE>

Write diagnostic output to I<FILE>, instead of standard error.

=item B<--debug>[=I<spec>[,I<spec>...]], B<-d>[I<spec>[,I<spec>...]]

Set debugging level.  I<Spec> is either B<level>, or B<category>=B<level>,
where B<category> is a debugging category name and B<level> is a decimal
verbosity level.  Valid categories are: C<GENERAL>, C<PARSE>, and C<GRAPHS>.
The category C<GENERAL> prints information about the program as a whole,
C<PARSE> produces a trace of the flow report parser and C<GRAPHS> displays
information about creation of the graphs.    

The option B<--debug=>I<N>, where I<N> is a decimal number, configures all
debugging categories to have the verbosity I<N>.  If the B<=>I<N> part
is omitted, B<1> is assumed.
    
=item B<--dry-run>, B<-n>

Do almost everything, except creating/updating the output files.

=back

Informational options:

=over 4

=item B<--help>, B<-h>

Shows a terse help summary and exit.

=item B<--man>

Prints the manual page and exits.

=back

=head1 NETFLOW REPORT CONFIGURATION

The netflow report configuration file must contain two definitions:
B<in-summary> and B<out-summary>, for incoming and outgoing traffic,
correspondingly.  Each definition must produce output describing the traffic
for each specific IP address on a separate row, consisting of the IP address
and the number of bytes transferred during the sample interval, separated
by a comma.  For example:

     include-filter /etc/netflow/net.flt

     stat-report traffic-out
         type ip-source-address
         filter net-out
         output
              fields -flows,-packets,-duration
  
     stat-report traffic-in
         type ip-destination-address
         filter net-in
         output
              fields -flows,-packets,-duration
 
     stat-definition in-summary
         report traffic-in

     stat-definition out-summary
         report traffic-out

The contents of B</etc/netflow/net.flt> is, generally speaking, beyond
the scope of this discussion.  As a simplest example, suppose you run
a LAN with addresses 192.168.1.1 -- 192.168.1.254.  Then, the corresponding
filter file producing traffic summary for each host in this network would
look like:

     filter-primitive net
         type ip-address-mask 
         permit 192.168.1.0 255.255.255.0
         default deny

     filter-definition net-in
         match ip-destination-address net
  
     filter-definition net-out
         match ip-source-address net

=head1 CONFIGURATION

The program reads its configuration from one of the following locations:

=over 4

=item B<a.> The file name given by C<NETPLOT_CONF> environment variable (if set)

=item B<b.> B<~>/.netplot.conf

=item B<c.> /etc/netplot.conf

=back

The first existing file from this list is used.  It is an error, if the
B<$NETPLOT_CONF> variable is set, but points to a file that does not exist.
It is not an error, if the variable is not set and neither of the two
remaining files exist.  It is, however, an error if any of these file exists,
but is not readable.

The configuration file uses a usual UNIX configuration format.  Empty
lines and UNIX comments are ignored.  Each non-empty line is either an
option name, or option assignment, i.e. B<opt>=B<val>, with any amount of
optional whitespace around the equals sign.  Valid option names are
the same as the long command line options, but without the leading B<-->.
For example:

    rrd-dir   = /var/log/trafsum/rrd
    graph-dir = /var/log/trafsum/img
    flow-report-cfg = /etc/netflow/lan.cfg
    debug = 5 
    log-file = /var/log/netplot.log
    graph-size = 600x110

=head1 ENVIRONMENT

=over 4

=item NETPLOT_CONF

The name of the configuration file to read, instead of the default
F</etc/netplot.conf>.

=back
    
=head1 AUTHOR

Sergey Poznyakoff <gray@gnu.org>

=head1 COPYRIGHT

Copyright (C) 2012-2014 Sergey Poznyakoff

License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>

This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.

=cut