#! /usr/bin/perl
# Manpage CGI server.
# Copyright (C) 2013 Sergey Poznyakoff
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
use strict;
use File::Basename;
use sigtrap;
use Sys::Syslog;
use Safe;
my $package_name = "mansrv";
our $VERSION = "1.1";
my $cf = "/etc/mansrv.conf";
our $docdir;
our $manref = $ENV{'SCRIPT_NAME'}."?";
our $htmltop;
our $htmlbot;
our $errpage;
our $includepath;
our $cachedir = "/tmp/mansrv";
our $incfilesuf;
my @deps = ( $0 );
my @grohtml_opts;
my $tempcachefile;
my $mansep;
my %decomp = (
'gz' => 'gzip -d -c',
'bz2' => 'bzip2 -d -c',
'z' => 'compress -d -c',
'Z' => 'compress -d -c'
);
sub syserror {
my $s = shift;
syslog("LOG_ERR", "$s");
print "
ABEND
\n"; # Let them guess...
exit 1;
}
my %conftab = (
'docdir' => \$docdir,
'manref' => \$manref,
'htmltop' => \$htmltop,
'htmlbot' => \$htmlbot,
'errpage' => \$errpage,
'cachedir' => \$cachedir,
'includepath' => \$includepath,
'incfilesuf' => \$incfilesuf
);
sub read_config($) {
my $config_file = shift;
my $err;
my $fd;
open($fd, "<", $config_file) or syserror("cannot open $config_file: $!");
my $line = 0;
while (<$fd>) {
++$line;
chomp;
s/^\s+//;
s/\s+$//;
s/#.*//;
next if ($_ eq "");
if (/([^\s]+)\s*=\s*(.+)/) {
if (exists($conftab{$1})) {
${$conftab{$1}} = $2;
} else {
syslog("LOG_ERR", "$config_file:$line: unknown variable: '$1'");
print "$conftab{$1}\n";
++$err;
}
} else {
syslog("LOG_ERR", "$config_file:$line: syntax error");
++$err;
}
}
close($fd);
syserror("errors in config file") if ($err);
}
sub sighan {
unink($tempcachefile) if -e $tempcachefile;
}
sub build_manpath() {
my $d;
opendir($d, $docdir) or return;
my $dirs=join ':',
sort map { "$docdir/$_/man" }
grep { !/^\./ && -d "$docdir/$_" && -d "$docdir/$_/man" }
readdir($d);
closedir($d);
$ENV{'MANPATH'} = "$dirs:$ENV{'MANPATH'}" if ($dirs);
}
sub expand_template {
my ($comp, $code, $file, $line) = @_;
my $r = $comp->reval($code);
unless (defined($r)) {
syslog("LOG_ERR", "%s:%d: error expanding template expression %s",
$file, $line, $code);
$r = '';
}
return $r;
}
sub interpret_file($$) {
my ($ofd, $file) = @_;
my $ifd;
open($ifd, $file) or syserror("cannot open $file: $!");
my $s = new Safe 'Root' ;
%{$s->varglob('ENV')} = %ENV;
${$s->varglob('TITLE')} = $ARGV[1];
${$s->varglob('SECTION')} = $ARGV[0];
${$s->varglob('PACKAGE')} = $package_name;
${$s->varglob('VERSION')} = $VERSION;
${$s->varglob('SERVER')} = "$ENV{REQUEST_SCHEME}://$ENV{SERVER_NAME}";
while (<$ifd>) {
chomp;
s/\{%(.*)%\}/expand_template($s, $1, $file, $.)/ex;
print $ofd "$_\n";
}
close($ifd);
}
sub interpret {
my $fd = shift;
foreach my $file (@_) {
interpret_file($fd, $file);
}
}
sub addopts($) {
my $file = shift;
my $fd;
open($fd, $file) or return;
while (<$fd>) {
chomp;
s/^\s+//;
s/\s+$//;
s/#.*//;
next if ($_ eq "");
push @grohtml_opts, "-P $_";
}
close($fd);
push @deps, $file;
}
sub checkdeps($) {
my $ts = (stat(shift))[9];
foreach my $depfile (@deps) {
return 0 if ($ts < (stat($depfile))[9]);
}
return 1;
}
# #############################################################################
openlog(basename($0), "ndelay,pid", "daemon");
if ($ENV{'MANSRV_CONF'}) {
$cf = $ENV{'MANSRV_CONF'};
}
read_config($cf);
push @deps, $cf;
if ($manref =~ /\?$/) {
$mansep = '+';
} elsif ($manref =~ /\/$/) {
$mansep = '/';
} else {
$manref .= "?";
$mansep = '+';
}
# Set up environment
build_manpath();
my $proto;
if (exists($ENV{HTTP_X_FORWARDED_PROTO})) {
$proto = $ENV{HTTP_X_FORWARDED_PROTO};
} elsif ($ENV{HTTPS} eq 'on') {
$proto = 'https';
} else {
$proto = 'http';
}
$ENV{REQUEST_SCHEME} = $proto;
$ENV{'MANCGI'}='WEBDOC';
if ($#ARGV != 1) {
print "Location: $ENV{REQUEST_SCHEME}://$ENV{SERVER_NAME}\n";
print "\n";
exit 0;
}
# Begin output
print "Content-Type: text/html\n";
print "\n"; # Body begins
my $file = `man -w $ARGV[0] $ARGV[1] 2>/dev/null`;
unless ($file) {
my $f;
open($f, ">&STDOUT");
interpret($f, $htmltop, $errpage, $htmlbot);
exit 0;
}
chomp $file;
push @deps, $file;
# Split the name and determine the root mandir
my ($manfile, $mandir) = fileparse($file);
$mandir =~ s/\/man[1-8n]//;
$manfile =~ s/^(.*\.[1-8n])\..*/$1/;
$manfile .= ".html";
if (! -d $cachedir) {
mkdir($cachedir,0755) or syserror("mkdir $cachedir: $!");
}
$cachedir .= "/cat$ARGV[0]";
if (! -d $cachedir) {
mkdir($cachedir) or syserror("mkdir $cachedir: $!");
}
my $cachefile = "$cachedir/$manfile";
push @deps, $cachefile;
# Check if it has an include file
if ($file =~ /^$docdir\/*/ && $incfilesuf) {
my $incfile = "$file$incfilesuf";
if (-f $incfile) {
push @deps, $incfile;
$ENV{'INCFILE'} = $incfile;
}
}
# Override top and bottom files, if necessary.
$htmltop = "$mandir/top.html" if (-R "$mandir/top.html");
$htmlbot = "$mandir/bottom.html" if (-R "$mandir/bottom.html");
push @deps, $htmltop, $htmlbot;
# Process grohtml options
if (-R "$mandir/grohtml.opt") {
addopts("$mandir/grohtml.opt");
push @deps, "$mandir/grohtml.opt";
}
unless (-e $cachefile && checkdeps($cachefile)) {
my $tempcachefile = "$cachefile.$$";
open(my $fd, ">$tempcachefile") or syserror("opening $tempcachefile: $!");
$SIG{'HUP'} = \&sighan;
$SIG{'INT'} = \&sighan;
$SIG{'QUIT'} = \&sighan;
$SIG{'PIPE'} = \&sighan;
$SIG{'TERM'} = \&sighan;
my $groffcmd = "groff -Thtml -man ";
if ($includepath) {
$groffcmd .= join(' ',map { "-I $_" } split(/:/,$includepath)) . " ";
}
if ($#grohtml_opts >= 0) {
$groffcmd .= join(' ', @grohtml_opts) . " ";
}
if ($file =~ /.*\.([^.]+)$/ && exists($decomp{$1})) {
$groffcmd = "$decomp{$1} $file | $groffcmd -";
} else {
$groffcmd .= $file;
}
open(my $p, "-|", $groffcmd) or syserror("running \"$groffcmd\": $!");
interpret($fd, $htmltop);
# State map:
# 0 - before
# 1 - between and
# [ 2 - after ]
my $state = 0;
while (<$p>) {
if ($state == 0) {
if (/(.*)/) {
print $fd "$1\n";
$state = 1;
}
next;
}
if (/(.*)<\/body>/) {
print $fd "$1\n";
last;
}
if (/[^<>]+<\/b>\([0-9n]\)/) {
while (/(.*?)([^<>]+)<\/b>\(([0-9n])\)(.*)/) {
print $fd "$1";
my $pref = $2;
my $href = $2;
my $sect = $3;
$_ = $4;
$href =~ s/−/-/g;
print $fd ''.$pref.'('.$sect.')';
}
print $fd "$_\n";
} else {
print $fd $_;
}
}
close($p);
interpret($fd, $htmlbot);
close $fd;
rename($tempcachefile, $cachefile) or
syserror("failed to rename $tempcachefile to $cachefile: $!");
}
open(FILE, $cachefile) or syserror("opening $cachefile for reading: $!");
while () {
print;
}
close FILE;
__END__
=head1 NAME
mansrv - manpage server
=head1 SYNOPSIS
B I I
=head1 DESCRIPTION
This CGI script searches for the manpage I in the section I
and displays it as an HTML. It uses B(1) to create initial
translation and preprocesses its output, replacing references to other
manpages by appropriate hyperlinks and fixing some other minor
inconsistencies.
The program is designed to help display on-line the documentation in
manpage formats for multiple software projects without the need to
install these manpages somewhere in the system B. This is
necessary for software forge sites that host a number of projects.
To this effect, the following directory structure is assumed. Each
software project keeps its documentation files in a separate directory,
located in common B directory. If the project's directory
contains a directory named B, its full pathname is prepended to
the system B. The prepended directories are sorted in
lexicographical order.
When a manpage is requested, it is looked in the B using
B. If found, it is piped through B to
obtain initial translation. Each project can customize the call
to B by placing the file named B in its B
subdirectory. This file should list the valid B(1) options,
one option per line. Empty lines and comments (beginning with B<#>)
are ignored.
The HTML prologue (anything up to and including the B<> tag)
and epilogue (starting from the B<> closing tag) are
removed and replaced by the content of B and B files which are supplied in the configuration file (see
the B and B settings below). A project can
override any one or both of this files by placing the file
B and B in its B subdirectory.
Before inclusion, both files are subject to variable expansion,
during which any occurrence of B<@>IB<@> is replaced
with the value of the I. See the section B below for the list of valid variable names and
their values.
If the source man page is located in one of the hosted projects'
B subdirectories (as opposed to the system default B),
B also looks for an optional B, which has the
same pathname as the source page plus the predefined filename suffix
(see the B configuration setting below). If such file
exists, its full pathname is assigned to the B environment
variable. This allows to customize man pages for online display,
by using the following B construct:
.if !"\V[INCFILE]"" .so \V[INCFILE]
The produced HTML output is stored in the cache directory (as
specified by the B configuration setting). Subsequent
calls to B with the same arguments will retrieve the
already generated page from the cache, instead of regenerating
it each time.
The cached page, however, is invalidated and its regeneration
is triggered if any of the files it depends upon is newer than
the cached copy. The list of dependencies includes (apart from
the B program itself and its configuration file), both
HTML top and bottom files, the B and the include
file (if any).
=head1 CONFIGURATION
The program reads its configuration from the file named in the
environment variable B. If not set, the default
file name B is used.
The configuration file has a usual UNIX configuration format. Empty
lines and comments (beginning with B<#>) are ignored. Each non-empty
line must have the form B=B, with any amount of optional
whitespace around the equals sign. Valid variable names are:
=over 4
=item B
Document root directory. This is a directory under which B looks
for additional man hierarchies.
=item B
The stem part for references to another manpages. By default it is deduced
from the B environment variable.
If the value of this variable ends with B>, section number and manpage
name in the resulting reference will be separated by B<+> sign. If its value
ends with B> the two arguments will be separated by B>. Otherwise,
B> will be appended to B and B<+> separator assumed.
=item B
Cache directory. The default is B.
=item B
Path to the HTML top template file. This file must contain at least
the basic HTML prologue part, between B<> and B<> tags,
inclusive. See B below for details on its
processing.
=item B
Path to the HTML bottom template file. It must contain at least
the closing B<> and B<> tags.
=item B
Path to the template file for the error page. This page is displayed
when the requested manpage was not found.
=item B
Include file suffix. If defined, and the manpage file to be displayed is
located under B, B will look for the file named
IB, where I is the manpage file name.
If this file exists, its full pathname will be assigned to the environment
variable B.
=item B
Include path for B (list of directories separated with semicolons).
=back
=head1 TEMPLATE SUBSTITUTIONS
While interpreting the contents of the files B, B and
B, the material between B<{%> and B<%}> is evaluated as a Perl
expression. It can make references to the following variables:
=over 4
=item B<$PACKAGE>
Canonical name of the program (B).
=item B<$VERSION>
Version of B.
=item B<%ENV>
Trimmed environment from the master process.
=item B<$SERVER>
Base server URL.
=item B<$TITLE>
Manpage title.
=item B<$SECTION>
Requested manpage section.
=back
=head1 LOGGING
The program logs all fatal errors to the B(3) facility
B.
=head1 ENVIRONMENT VARIABLES
The following environment variables affect the behavior of B:
=over 4
=item MANSRV_CONF
The pathname of the configuration file to use instead of the default
one.
=back
Before invoking B, the following environment variables are
defined:
=over 4
=item B
Contains the string B,
=item B
Defined if the include file is found (see the description of the
configuration variable B).
=back
=head1 FILES
=over 4
=item B
Default configuration file.
=back
=head1 SEE ALSO
B(1), B(1).
=head1 AUTHOR
Sergey Poznyakoff