diff options
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | bottom.html | 3 | ||||
-rwxr-xr-x | mansrv | 520 | ||||
-rw-r--r-- | mansrv.conf | 16 | ||||
-rw-r--r-- | top.html | 10 |
5 files changed, 551 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9859261 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.emacs.desktop +*~ diff --git a/bottom.html b/bottom.html new file mode 100644 index 0000000..9943ff0 --- /dev/null +++ b/bottom.html @@ -0,0 +1,3 @@ +</div> +</body> +</html> @@ -0,0 +1,520 @@ +#! /usr/bin/perl +# Manpage CGI server. +# Copyright (C) 2013 Sergey Poznyakoff +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +use strict; +use File::Basename; +use sigtrap; +use Sys::Syslog; + +my $server = "mansrv"; +my $version = "1.0"; +my $cf = "/etc/mansrv.conf"; + +our $docdir; +our $manref = $ENV{'SCRIPT_NAME'}."?"; +our $htmltop; +our $htmlbot; +our $errpage; +our $includepath; +our $cachedir = "/tmp/mansrv"; +our $incfilesuf; +my @deps = ( $0 ); +my @grohtml_opts; + +my $tempcachefile; +my $mansep; + +sub syserror { + my $s = shift; + syslog("LOG_ERR", "$s"); + print "<h1>ABEND</h1>\n"; # Let them guess... + exit 1; +} + +my %conftab = ( + 'docdir' => \$docdir, + 'manref' => \$manref, + 'htmltop' => \$htmltop, + 'htmlbot' => \$htmlbot, + 'errpage' => \$errpage, + 'cachedir' => \$cachedir, + 'includepath' => \$includepath, + 'incfilesuf' => \$incfilesuf +); + +sub read_config($) { + my $config_file = shift; + my $err; + my $fd; + open($fd, "<", $config_file) or syserror("cannot open $config_file: $!"); + my $line = 0; + while (<$fd>) { + ++$line; + chomp; + s/^\s+//; + s/\s+$//; + s/#.*//; + next if ($_ eq ""); + if (/([^\s]+)\s*=\s*(.+)/) { + if (exists($conftab{$1})) { + ${$conftab{$1}} = $2; + } else { + syslog("LOG_ERR", "$config_file:$line: unknown variable: '$1'"); + print "$conftab{$1}\n"; + ++$err; + } + } else { + syslog("LOG_ERR", "$config_file:$line: syntax error"); + ++$err; + } + } + close($fd); + syserror("errors in config file") if ($err); +} + +sub sighan { + unink($tempcachefile) if -e $tempcachefile; +} + +sub build_manpath() { + my $d; + + opendir($d, $docdir) or return; + my $dirs=join ':', + sort map { "$docdir/$_/man" } + grep { !/^\./ && -d "$docdir/$_" && -d "$docdir/$_/man" } + readdir($d); + closedir($d); + $ENV{'MANPATH'} = "$dirs:$ENV{'MANPATH'}" if ($dirs); +} + +sub interpret_file($$) { + my ($ofd, $file) = @_; + my $ifd; + + open($ifd, $file) or syserror("cannot open $file: $!"); + while (<$ifd>) { + s/\@TITLE\@/$ARGV[1]/g; + s/\@SECTION\@/$ARGV[0]/g; + s/\@SERVER\@/$server/g; + s/\@VERSION\@/$version/g; + print $ofd $_; + } + close($ifd); +} + +sub interpret { + my $fd = shift; + foreach my $file (@_) { + interpret_file($fd, $file); + } +} + +sub addopts($) { + my $file = shift; + my $fd; + + open($fd, $file) or return; + while (<$fd>) { + chomp; + s/^\s+//; + s/\s+$//; + s/#.*//; + next if ($_ eq ""); + push @grohtml_opts, "-P $_"; + } + close($fd); + push @deps, $file; +} + +sub checkdeps($) { + my $ts = (stat(shift))[9]; + foreach my $depfile (@deps) { + return 0 if ($ts < (stat($depfile))[9]); + } + return 1; +} + +# ############################################################################# + +my $script; # This script name. +($script = $0) =~ s/.*\///; +openlog($script, "ndelay,pid", "daemon"); + +if ($ENV{'MANSRV_CONF'}) { + $cf = $ENV{'MANSRV_CONF'}; +} +read_config($cf); +push @deps, $cf; +if ($manref =~ /\?$/) { + $mansep = '+'; +} elsif ($manref =~ /\/$/) { + $mansep = '/'; +} else { + $manref .= "?"; + $mansep = '+'; +} + +# Set up environment +&build_manpath; +$ENV{'MANCGI'}='WEBDOC'; + +if ($#ARGV != 1) { + print "Location: http://man.gnu.org.ua\n"; + print "\n"; + exit 0; +} + +# Begin output +print "Content-Type: text/html\n"; +print "\n"; # Body begins + +my $file = `man -w $ARGV[0] $ARGV[1] 2>/dev/null`; + +unless ($file) { + my $f; + open($f, ">&STDOUT"); + interpret($f, $htmltop, $errpage, $htmlbot); + exit 0; +} +chomp $file; +push @deps, $file; + +# Split the name and determine the root mandir +my ($manfile, $mandir) = fileparse($file); +$mandir =~ s/\/man[1-8n]//; + +if (! -d $cachedir) { + mkdir($cachedir,0755) or syserror("mkdir $cachedir: $!"); +} +$cachedir .= "/cat$ARGV[0]"; +if (! -d $cachedir) { + mkdir($cachedir) or syserror("mkdir $cachedir: $!"); +} +my $cachefile = "$cachedir/$manfile"; +push @deps, $cachefile; + +# Check if it has an include file +if ($file =~ /^$docdir\/*/ && $incfilesuf) { + my $incfile = "$file$incfilesuf"; + if (-f $incfile) { + push @deps, $incfile; + $ENV{'INCFILE'} = $incfile; + } +} + +# Override top and bottom files, if necessary. +$htmltop = "$mandir/top.html" if (-R "$mandir/top.html"); +$htmlbot = "$mandir/bottom.html" if (-R "$mandir/bottom.html"); + +push @deps, $htmltop, $htmlbot; + +# Process grohtml options +addopts("$mandir/grohtml.opt") if (-R "$mandir/grohtml.opt"); + +unless (-e $cachefile && checkdeps($cachefile)) { + my $tempcachefile = "$cachefile.$$"; + open(my $fd, ">$tempcachefile") or syserror("opening $tempcachefile: $!"); + $SIG{'HUP'} = \&sighan; + $SIG{'INT'} = \&sighan; + $SIG{'QUIT'} = \&sighan; + $SIG{'PIPE'} = \&sighan; + $SIG{'TERM'} = \&sighan; + + my $groffcmd = "groff -Thtml -man "; + if ($includepath) { + $groffcmd .= join(' ',map { "-I $_" } split(/:/,$includepath)) . " "; + } + if ($#grohtml_opts >= 0) { + $groffcmd .= join(' ', @grohtml_opts) . " "; + } + + if ($file =~ /\.gz$/) { + $groffcmd = "gzip -d -c $file | $groffcmd -"; + } else { + $groffcmd .= $file; + } + + open(my $p, "-|", $groffcmd) or syserror("running \"$groffcmd\": $!"); + + interpret($fd, $htmltop); + + # State map: + # 0 - before <body> + # 1 - between <body> and </body> + # [ 2 - after </body> ] + my $state = 0; + while (<$p>) { + if ($state == 0) { + if (/<body>(.*)/) { + print $fd "$1\n"; + $state = 1; + } + next; + } + + if (/(.*)<\/body>/) { + print $fd "$1\n"; + last; + } + + if (/<b>[^<>]+<\/b>\([0-9n]\)/) { + while (/(.*?)<b>([^<>]+)<\/b>\(([0-9n])\)(.*)/) { + print $fd "$1"; + my $pref = $2; + my $href = $2; + my $sect = $3; + $_ = $4; + $href =~ s/−/-/g; + print $fd '<b><a href="'.$manref.$sect.$mansep.$href.'">'.$pref.'</a></b>('.$sect.')'; + } + print $fd "$_\n"; + } else { + print $fd $_; + } + } + + close($p); + + interpret($fd, $htmlbot); + + close $fd; + rename($tempcachefile, $cachefile) or + syserror("failed to rename $tempcachefile to $cachefile: $!"); +} + +open(FILE, $cachefile) or syserror("opening $cachefile for reading: $!"); +while (<FILE>) { + print; +} +close FILE; + +__END__ +=head1 NAME + +mansrv - manpage server + +=head1 SYNOPSIS + +B<mansrv> I<SECTION> I<NAME> + +=head1 DESCRIPTION + +This CGI script searches for the manpage I<NAME> in the section I<SECTION> +and displays it as an HTML. It uses B<grohtml>(1) to create initial +translation and preprocesses its output, replacing references to other +manpages by appropriate hyperlinks and fixing some other minor +inconsistencies. + +The program is designed to help display on-line the documentation in +manpage formats for multiple software projects without the need to +install these manpages somewhere in the system B<MANPATH>. This is +necessary for software forge sites that host a number of projects. + +To this effect, the following directory structure is assumed. Each +software project keeps its documentation files in a separate directory, +located in common B<document root> directory. If the project's directory +contains a directory named B<man>, its full pathname is prepended to +the system B<MANPATH>. The prepended directories are sorted in +lexicographical order. + +When a manpage is requested, it is looked in the B<MANPATH> using +B<man -w>. If found, it is piped through B<groff -man -Thtml> to +obtain initial translation. Each project can customize the call +to B<groff> by placing the file named B<grohtml.opt> in its B<man> +subdirectory. This file should list the valid B<grohtml>(1) options, +one option per line. Empty lines and comments (beginning with B<#>) +are ignored. + +The HTML prologue (anything up to and including the B<<body>> tag) +and epilogue (starting from the B<</body>> closing tag) are +removed and replaced by the content of B<HTML top> and B<HTML +bottom> files which are supplied in the configuration file (see +the B<htmltop> and B<htmlbot> settings below). A project can +override any one or both of this files by placing the file +B<top.html> and B<bottom.html> in its B<man> subdirectory. + +Before inclusion, both files are subject to variable expansion, +during which any occurrence of B<@>I<variable>B<@> is replaced +with the value of the I<variable>. See the section B<TEMPLATE +SUBSTITUTIONS> below for the list of valid variable names and +their values. + +If the source man page is located in one of the hosted projects' +B<man> subdirectories (as opposed to the system default B<MANPATH>), +B<mansrv> also looks for an optional B<include file>, which has the +same pathname as the source page plus the predefined filename suffix +(see the B<incfilesuf> configuration setting below). If such file +exists, its full pathname is assigned to the B<INCFILE> environment +variable. This allows to customize man pages for online display, +by using the following B<groff> construct: + + .if !"\V[INCFILE]"" .so \V[INCFILE] + +The produced HTML output is stored in the cache directory (as +specified by the B<cachedir> configuration setting). Subsequent +calls to B<mansrv> with the same arguments will retrieve the +already generated page from the cache, instead of regenerating +it each time. + +The cached page, however, is invalidated and its regeneration +is triggered if any of the files it depends upon is newer than +the cached copy. The list of dependencies includes (apart from +the B<mansrv> program itself and its configuration file), both +HTML top and bottom files and the include file (if any). + +=head1 CONFIGURATION + +The program reads its configuration from the file named in the +environment variable B<MANSRV_CONF>. If not set, the default +file name B</etc/mansrv.conf> is used. + +The configuration file has a usual UNIX configuration format. Empty +lines and comments (beginning with B<#>) are ignored. Each non-empty +line must have the form B<variable>=B<value>, with any amount of optional +whitespace around the equals sign. Valid variable names are: + +=over 4 + +=item B<docdir> + +Document root directory. This is a directory under which B<mansrv> looks +for additional man hierarchies. + +=item B<manref> + +The stem part for references to another manpages. By default it is deduced +from the B<SCRIPT_NAME> environment variable. + +If the value of this variable ends with B<?>, section number and manpage +name in the resulting reference will be separated by B<+> sign. If its value +ends with B</> the two arguments will be separated by B</>. Otherwise, +B<?> will be appended to B<manref> and B<+> separator assumed. + +=item B<cachedir> + +Cache directory. The default is B</tmp/mansrv>. + +=item B<htmltop> + +Path to the HTML top template file. This file must contain at least +the basic HTML prologue part, between B<<html>> and B<<body>> tags, +inclusive. See B<TEMPLATE SUBSTITUTIONS> below for details on its +processing. + +=item B<htmlbot> + +Path to the HTML bottom template file. It must contain at least +the closing B<</body>> and B<</html>> tags. + +=item B<errpage> + +Path to the template file for the error page. This page is displayed +when the requested manpage was not found. + +=item B<incfilesuf> + +Include file suffix. If defined, and the manpage file to be displayed is +located under B<docdir>, B<mansrv> will look for the file named +I<MANFILE>B<incfilesuf>, where I<MANFILE> is the manpage file name. +If this file exists, its full pathname will be assigned to the environment +variable B<INCFILE>. + +=item B<includepath> + +Include path for B<groff> (list of directories separated with semicolons). + +=back + +=head1 TEMPLATE SUBSTITUTIONS + +While interpreting the contents of the files B<htmltop>, B<htmlbot> and +B<errpage>, the following character sequences are removed and replaced with +the corresponding expansions: + +=over 4 + +=item B<@SERVER@> + +Canonical name of the program (B<mansrv>). + +=item B<@VERSION@> + +Version of B<mansrv>. + +=item B<@TITLE@> + +Manpage title. + +=item B<@SECTION@> + +Requested manpage section. + +=back + +=head1 LOGGING + +The program logs all fatal errors to the B<syslog>(3) facility +B<daemon>. + +=head1 ENVIRONMENT VARIABLES + +The following environment variables affect the behavior of B<mansrv>: + +=over 4 + +=item MANSRV_CONF + +The pathname of the configuration file to use instead of the default +one. + +=back + +Before invoking B<groff>, the following environment variables are +defined: + +=over 4 + +=item B<MANCGI> + +Contains the string B<WEBDOC>, + +=item B<INCFILE> + +Defined if the include file is found (see the description of the +configuration variable B<incfilesuf>). + +=back + +=head1 FILES + +=over 4 + +=item B</etc/mansrv.conf> + +Default configuration file. + +=back + +=head1 SEE ALSO + +B<groff>(1), B<grohtml>(1). + +=head1 AUTHOR + +Sergey Poznyakoff <gray@gnu.org> + diff --git a/mansrv.conf b/mansrv.conf new file mode 100644 index 0000000..17b2b0b --- /dev/null +++ b/mansrv.conf @@ -0,0 +1,16 @@ +## Documentation directory +docdir = . +## URL base to use instead of $ENV{'SCRIPT_NAME'} +manref = http://man.gnu.org.ua/manpage/? +## HTML top file. +htmltop = top.html +## HTML bottom file. +htmlbot = bottom.html +## Error page +errpage = error.html +## Cache directory +cachedir = /tmp/mansrv +## Include file suffix +incfilesuf = .inc +## Additional include path (semicolon-separated) +#includepath = diff --git a/top.html b/top.html new file mode 100644 index 0000000..768cefe --- /dev/null +++ b/top.html @@ -0,0 +1,10 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" +"http://www.w3.org/TR/html4/loose.dtd"> +<html> +<head> +<meta name="generator" content="@SERVER@ @VERSION@"> +<meta http-equiv="Content-Type" content="text/html; charset=US-ASCII"> +<title>@TITLE@</title> +</head> +<body> +<div id="content"> |