#! /local/bin/perl
##---------------------------------------------------------------------------##
##  File:
##      man.cgi
##  Author:
##      Earl Hood       ehood@convex.com
##  Description:
##	man.cgi is a CGI program for viewing Unix manpages.  The
##	program man2html,
##	<URL:http://www.oac.uci.edu/indiv/ehood/man2html.html>,
##	is used to convert the output from man(1) to html.
##
##	If man.cgi is invoked with no input data, it will output a
##	form for the user to select a manpage to view.
##	man.cgi can handle POST and GET methods.
##
##	The code section "Configureable Globals" is designed to
##	allow you to modify man.cgi to work with your particular
##	system configuration.
##---------------------------------------------------------------------------##
##  Copyright (C) 1995  Earl Hood, ehood@convex.com
##
##  This program is free software; you can redistribute it and/or modify
##  it under the terms of the GNU General Public License as published by
##  the Free Software Foundation; either version 2 of the License, or
##  (at your option) any later version.
##  
##  This program is distributed in the hope that it will be useful,
##  but WITHOUT ANY WARRANTY; without even the implied warranty of
##  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##  GNU General Public License for more details.
##  
##  You should have received a copy of the GNU General Public License
##  along with this program; if not, write to the Free Software
##  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
##---------------------------------------------------------------------------##

########################################################################
##	Configureable Globals
##
##	Change the value of these variables to reflect your
##	system configuration.
##
##  English name for program
$ProgName	= "Manpage Viewer";

##  man programs
$ManPrg		= '/usr/bin/man';
$CatPrg		= '/usr/bin/cat';
$PcatPrg	= '/usr/bin/pcat';

##  nroff program
$NroffPrg	= '/usr/bin/nroff';

##  Flag if the -cgiurl option should be used
$DoCgiurl	= 1;

##  System specific arguments to man2html:
##	HP  => "-leftm 1 -topm 8"
##	Sun => "-sun"
##  See man2html documentation for more information.
#ConvArgs	= "-leftm 1 -topm 8";
$ConvArgs	= "-sun";

##  Keyword search processing arguments for man2html.  Normally,
##  '-k' is adequate.  However, if on a Solaris system, the
##  '-solaris' option should be specified with '-k'.  See
##  man2html documentation on information about the '-solaris' option.
#$KeyWArgs	= "-k";			# Normal
$KeyWArgs	= "-k -solaris";	# For Solaris

##  Possible manual sections.  This array is used to determine the
##  the choices available in an option menu.
@Sections	= (
    '1', '1M', '1N', '2', '2p', '3', '3C', '3F', '3X', '4', '5', '6', '7', '8', '9',
);

##  Form method.  The value is either 'GET' or 'POST'.  'GET' is
##  recommended since the URL sent by the client also contains
##  the argument information.  This allows a client's "Reload" function
##  to reprocess a currently viewed manpage.
$FormMethod	= 'GET';

##  Argument separator for CGI URL links.  As clients become more
##  SGML conformant, the simple use of '&' conflicts with
##  SGML syntax.  You can set this variable to control what is
##  used as the separator.  Possibilities:
##	&amp;
##	&#38;
##
$ArgSep		= '&';

##  MANPATH for man
$MANPATH = $manpath = "/proj/i4memsy/man:/proj/MEMSOS/USR/catman";
$ENV{'MANPATH'}	= $manpath;

($MEMSYSCRIPTS = $ENV{'SCRIPT_FILENAME'}) =~ s/\/man\.cgi//;
$SYS5MANPATH = "/proj/MEMSOS/USR/catman";

##	End Configureable Globals section
########################################################################

########################################################################
##	Globals
########################################################################
($PROG = $0)	=~ s/.*\///;		# Name of program
%FORM		= ();			# Array to hold form contents
$Error		= '';			# Error string
$Man2html	= "$MEMSYSCRIPTS/man2html";   # man2html program
$MINDEX		= "$MEMSYSCRIPTS/mindex";
$TMP            = '/tmp';

########################################################################
##	Main block
{
    #	Set unbuffered I/O.  Prevents buffering problems with
    #	"system()" calls.
    select((select(STDOUT), $| = 1)[0]);

    #	Print content-type header
    &printouttype("text/html");

    #	Print man form if called w/no arguments
    if (&noarg())
    {
	&printhead($ProgName);
	&printform();
	&printend();
	exit 0;
    }

    #	If reached here, there is input to process
    &error("CGI input error") unless &parseinput();
    &doit();
    exit 0;
}
########################################################################
##	Subroutines
########################################################################
#-----------------------------------------------------------------------
#	printform outputs the man selection form to the client.
#
sub printform {
    local($topic, $section, $secnum);
    
    $topic = $FORM{'topic'};
    $section = $FORM{'section'};
    $section = "all" unless ($section);
    $section =~ tr/[A-Z]/[a-z]/;

    print STDOUT <<EndOfForm;
<hr>
<form method="$FormMethod" action="$PROG">
<table border=0 width=100%>
<tr>
<td colspan=3 align=center>The following form allows you to view manpages available on the 
MEMSOS system. Please fill out the following fields and select 'Submit'
to view a manpage.</td>
</tr>
<tr><td><br></td></tr>
<tr>
<td align=center>Section: <select name=section> 
EndOfForm

    printf STDOUT "<option value=\"all\" %s>All Sections</option>\n", ($section eq "all") ? "selected" : "";
    printf STDOUT "<option value=\"keyword\" %s>Keyword Search</option>\n", ($section eq "keyword") ? "selected" : "";

    #	Print out possible section choices
    foreach $secnum (@Sections) {
	printf STDOUT "<option value=\"$secnum\" %s>Section $secnum</option>\n", ($section eq $secnum) ? "selected" : "";
    }

    printf STDOUT "</select></td>\n<td align=center>Topic: <input type=\"TEXT\" name=\"topic\" %s></td>\n", "value=\"$topic\"";

print STDOUT <<EndOfForm;
<td align=center><input align="right" type="SUBMIT" value="Submit"></td>
</p>
</tr>
</table>
</form>
<hr>
EndOfForm
}

#-----------------------------------------------------------------------
# build_manexec - compose the correct string for open command
#
sub build_manexec
{
    local($file) = @_;
    local($manexec, $type);

    $manexec = "$CatPrg $file";
    $type = `file $file`;
    print "File type: $type<BR>\n" if ($debug);
    $manexec = "$PcatPrg $file" if ($type =~ /packed/);

    system("$manexec > $TMP/man.out");
    $type = `grep '^\.SH' $TMP/man.out`;
    unlink("$TMP/man.out");

    $manexec .= " | $NroffPrg -man" if ($type =~ /SH/);

    print STDOUT "manexec: $manexec<BR>\n" if ($debug);
    return("$manexec");
}


#-----------------------------------------------------------------------
#	get man output
#
sub do_manexec
{
    local($manexec) = @_;
    local($manout, $sep);

    #   Execute man...
    if ($manexec && open(MANPRG, "$manexec 2>/dev/null |")) {
	$sep = $/;
	$/ = 0777;
	$manout = <MANPRG>;
	close(MANPRG);
	$/ = $sep;
    }
    else
    {
	&error("Unable to execute '$manexec'");
    }
    return($manout);
}


#-----------------------------------------------------------------------
#	get man output
#
sub get_manout
{
    local($section, $topic, $manpage, $memsy) = @_;
    local($manexec, $manout, @files, $file, $header, $body, $str, $href);
    local(%mindex, $manual, $path, $sec, @data);

    # 1. A specific man page is desired and we have the path
    if ($manpage)
    {
	$manexec = &build_manexec($manpage);
	$manout = &do_manexec($manexec);
	return($manout);
    }

    # 2. Man page may be a MEMSOS man page
    if (!$memsy)
    {
	$manexec = "$ManPrg";
	if ($section =~ /keyword/) {
	    $manexec .= " -k $topic";
	} else {
	    &error("No topic entered")  unless $topic;
	    $manexec .= " -s $section"  if $section !~ /all/;
	    $manexec .= " $topic";
	}
	$manout = &do_manexec($manexec);
	return($manout);
    }
	
    # 3. Man page was not found in MEMSOS pages -> is a SysV page
    #    ($memsy && !$manpage)

    # Read mindex database if we have one
    if ( -f "$MINDEX" && open(MINDEX, "$MINDEX"))
    {
	print STDOUT "Reading MINDEX database:<BR>\n" if ($debug);
	while (<MINDEX>)
	{
	    ($name, $sec, @data) = split(',', $_);
	    next if ($topic ne $name);
	    next if ($section && $section ne "all" && $section ne $sec);
	    print "$name($sec): @data<BR>\n" if ($debug);
	    $mindex{"$name,$sec"} = join(',', @data);
	}
	close(MINDEX);
	printf "%d entries read<BR><HR>\n", int(%mindex) if ($debug);
    }
    # nop, use a simple echo command
    else
    {
	$section =~ tr/[A-Z]/[a-z]/;
	$section = "\*" if ($section eq "all");
	@files = split('\s+', `echo $SYS5MANPATH/*/man$section*/$topic* 2>/dev/null`);
	# build mindex substitute
	foreach $file (@files)
	{
	    $file =~ s/$SYS5MANPATH\///;
	    ($path, $sec, $manual) = split('/', $file);
	    $path = "$path/$sec";
	    $sec =~ s/man//;
	    $manual =~ s/\.([1-9][a-z]*|z)//g;
	    $mindex{"$manual,$sec"} = "$path,$manual,no information available";
	}
    }
    # Some debug blurb
    if ($debug)
    {
	print "FILES found: <BR>\n";
	foreach $manual (sort(keys(%mindex)))
	{
	    print "$manual,$mindex{$manual}<BR>\n";
	}
	print "<HR>\n";
    }
    # Shortcut: only one manual found
    if (int(%mindex) == 1)
    {
	@data = keys(%mindex);
	$manual = $mindex{$data[0]};
	print "Entry: $manual<BR>\n" if ($debug);
	($path, $manual, $descr) = split(',', $manual);
	$file = "$SYS5MANPATH/$path/$manual\*";
	$manexec = &build_manexec($file);
	print "Shortcut: manexec=`$manexec'<BR><HR>\n" if ($debug);
	$manout = &do_manexec($manexec);
	return($manout);
    }
    # Oops, nothing found
    if (int(%mindex) == 0)
    {
	&printhead($ProgName);
	print STDOUT "<HR><B>Nothing found for $topic</B>\n";
	&printform;
	&printend;
	exit(0);
    }
    # More than one entry found
    &printhead("Man: $topic", "Multiple entries found for: $topic");
    foreach $file (sort(keys(%mindex)))
    {
	($name, $sec) = split(',', $file);
	($path, $manual, $descr) = split(',', $mindex{$file});
	$href = "$PROG?file=$file&topic=$topic";
	$header = $header . "<STRONG><A HREF=\"$href\">$name($sec)</A></STRONG> - $descr<BR>\n";
    }
    print STDOUT "$header\n<HR>\n";
    &printend;
    exit;
}
#-----------------------------------------------------------------------
#	build htmlexec
#
sub build_htmlexec
{
    local($section, $topic, $manpath) = @_;
    local($htmlexec);

    $htmlexec = "$Man2html";
    if ($section =~ /keyword/) {
	$htmlexec .= qq| $KeyWArgs -title "Keyword search: \\"$topic\\""|;
    } else {
	&error("No topic entered")  unless $topic;
	$htmlexec .= qq| $ConvArgs -title "$topic($section)"|;
    }
    #	Check if doing man xref detection
    if ($DoCgiurl) {
	$htmlexec .= q| -cgiurl '| .
                     $PROG .
		     q|?section=${section}${subsection}| .
		     $ArgSep .
		     q|topic=${title}| .
                     $ArgSep .
                     q|manpath=${manpath}'| .
		     " -manpath $manpath";
    }
    print STDOUT "HtmlExec: $htmlexec<BR>\n" if ($debug);
    return($htmlexec);
}
#-----------------------------------------------------------------------
#	doit does the conversion
#
sub doit {
    local($section, $topic, $manexec, $htmlexec, $manout, $manpath, $memsy, $file);
    $manout = '';

    # Get section and topic from input
    $debug = $FORM{'debug'};
    $topic = $FORM{'topic'};

    # The section part may be empty, then use 'all'
    $section = $FORM{'section'};
    $section = "all" unless ($section);
    $section =~ tr/[A-Z]/[a-z]/;

    # This is special: file is set in cases there you have a manual with the
    # same name in the same section, but in different parent sections.
    # e.g.: .../catman/a_man/man1/intro.z and .../catman/u_man/man1/intro.z
    $file = $FORM{'file'};

    $manpath = $FORM{'manpath'};
    $manpath = $MANPATH unless ($manpath);
    &error("Questionable characters in topic \'$topic\'")  if ($topic && &isquestionable($topic));

    #   Set MANPATH
    $ENV{'MANPATH'} = $manpath;
    
    $memsy = 0;
    for ($memsy = 0; $memsy <= 1; $memsy++)
    {
	#   Get manual
	$manout = &get_manout($section, $topic, $file, $memsy);
	#   Error handling
	if ($manout =~ /^\s*$/ || $manout =~ /^no (manual )*entry/i)
	{
	    if ($memsy == 1)
	    {
		# Not found so far, try MEMSYMAN
		&error("Nothing found for $topic");
	    }
	}
	else
	{
	    last;
	}
    }

    #   Determine command arguments for man and man2html
    $htmlexec = &build_htmlexec($section, $topic, $manpath);

    #   No errors, so feed man2html
    if (open(MAN2HTML, "| $htmlexec 2>/dev/null")) {
	print MAN2HTML $manout;
	close(MAN2HTML);
    } else {
	&error("Unable to execute '$htmlexec'");
    }
}
########################################################################
##	Generic subroutines for CGI use
########################################################################
#-----------------------------------------------------------------------
#	noarg returns true if no arguments were passed to script.
#
sub noarg {
    $ENV{"REQUEST_METHOD"} eq "GET" && $ENV{"QUERY_STRING"} =~ /^\s*$/;
}
#-----------------------------------------------------------------------
#	parseinput converts the input data into the %FORM array
#
sub parseinput {
    local($method) = ($ENV{"REQUEST_METHOD"});
    local($data);
    if ($method eq "GET") {
	$data = $ENV{"QUERY_STRING"};
    } elsif ($method eq "POST") {
	read(STDIN, $data, $ENV{"CONTENT_LENGTH"});
    } else {
	$Error = "Unrecgonized request method : $method";
	return 0;
    }
    local(@pairs, $name, $value);
    if ($data ne '') {
	@pairs = split(/&/, $data);
	foreach (@pairs) {
	    ($name, $value) = split(/=/);
	    $name = &expandstr($name);
	    $value = &expandstr($value);
	    $FORM{$name} = $value;
	}
    }
    1;
}
#-----------------------------------------------------------------------
#	printouttype prints out specified content-type header back
#	to client
#
sub printouttype {
    local($type) = shift;
    print STDOUT "Content-type: $type\r\n\r\n";
}

#-----------------------------------------------------------------------
#	printhead outputs html prematter
#
sub printhead {
    local($title, $h1) = @_;
    $h1 = $title  unless $h1;

    print STDOUT `/proj/i4/bin/mkhtml +body BGCOLOR=\"WhiteSmoke\" +title "$title" -flag -onlyheader +person tsthiel ++arrows +home "http://www4.informatik.uni-erlangen.de/IMMD-IV/Projects/MEMSY/index.html" +up "http://www4.informatik.uni-erlangen.de/IMMD-IV/Projects/MEMSY/pguide.html"`;
    print STDOUT <<ENDOFHEAD;
<H1>$h1</H1>
ENDOFHEAD
}

#-----------------------------------------------------------------------
#	printend outputs html postmatter
#
sub printend {
    print STDOUT <<ENDOFEND;
</BODY>
</HTML>
ENDOFEND
}

#-----------------------------------------------------------------------
#	error prints an error out to the client.
#
sub error {
    local($str) = &htmlize(shift);
    &printhead("$ProgName Error");
    $str .= ":"  if $Error && $str;
    $str .= " $Error";
    print STDOUT "<p>$str</p>";
    &printend();
    exit 0;
}

#-----------------------------------------------------------------------
#	htmlize translates special characters to enitity refs.
#
sub htmlize {
    local($str) = shift;
    $str =~ s/&/\&amp;/g;
    $str =~ s/</\&lt;/g;
    $str =~ s/>/\&gt;/g;
    $str;
}
#-----------------------------------------------------------------------
#	expandstr translates hex codes to characters
#
sub expandstr {
    local($str) = shift;
    $str =~ tr/+/ /;
    $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/ge;
    $str;
}
#-----------------------------------------------------------------------
#	isquestionable determines if $str contains questionable
#	characters if $str is used in a subshell invocation.
#
sub isquestionable {
    local($str) = shift;
    $str !~ /^[a-zA-Z0-9_\.\:\-+ \t\/@%]+$/;
}
########################################################################
