Cover V07, I05
Article
Listing 1

may98.tar


Listing 1: man.sh

#!/bin/ksh
####################################################################
# Copyright (C) 1998 Steven G. Isaacson, All rights reserved.
####################################################################
# man.sh - web-enable man pages.  This script should go in your web
# server's $SERVER_ROOT/cgi-bin directory.

# get a good awk
AWK=awk
whence gawk > /dev/null && AWK=gawk
whence nawk > /dev/null && AWK=nawk

tmpfile=/tmp/man.$$
tmpfile2=/tmp/man2.$$

####################################################################
# standard html header stuff
####################################################################

echo "Content-type: text/html\n"

####################################################################
# parse the query string
####################################################################
# Use awk to split up the arguments, print them one per line in the
# form of name="value", substitute + for space, and finally eval
# everything.  Another approach, instead of using eval, is to write
# the entries to a file and then dot-in the file.
#
# Example.  If "2 chown" is entered into the form, with no search
# critera, then QUERY_STRING looks like this:
#     srch=&man=2+chown
#
# And the awk program below converts it to this:
#     srch=""
#     man="2 chown"

eval $(
if test "$REQUEST_METHOD" = POST
then
cat -
else
echo "$QUERY_STRING"
fi | # tee /tmp/query.out | # debug for query string
$AWK -F\& '{
for (i=1; i<=NF; i++) {
x=$i
sub(/\=/, "=\"", x)
sub(/$/, "\"", x)
sub(/^M/, "", x)
gsub(/\+/, " ", x)
print x
}
}' # | tee /tmp/awk.out     # debug for awk program
)

#-------------------------------------------------------------------
escape() {
#-------------------------------------------------------------------
# Escape characters for HTML output.

sed -e 's,&$,__AMP__BR,g' \
-e 's,&,__AMP__,g' \
-e 's,<,__LT__,g' \
-e 's,>,__GT__,g' \
-e 's,__AMP__BR,\&amp,g' \
-e 's,__AMP__,\&amp<!-- -->,g' \
-e 's,__LT__,\&lt<!-- -->,g' \
-e 's,__GT__,\&gt<!-- -->,g' \
-e 's,\&gt<!-- -->$,\&gt<!-- --> ,' \
-e 's,\&lt<!-- -->$,\&lt<!-- --> ,'
}

#-------------------------------------------------------------------
display_form()
#-------------------------------------------------------------------
{
echo '<h2><form method="GET" action="/cgi-bin/man.sh">
Search string <input name="srch" type=text size=20>
man page <input name="man" type="text" size="15">
<input type="submit"> <input type="reset">
</form></h2>'
}

####################################################################
# Start our HTML document
####################################################################

echo "<head><title>man $man</title></head>
<h1 align=center>man <font color=\"#FF0080\">
<tt>$man</tt></font><br></h1>
<body><pre>"

test "$man" || {
echo "Error: no man request specified"
display_form
echo "</body></html>"
exit 1
}

whence man > /dev/null || {
echo "<h2>Error: man program not found.</h2>"
echo "Here is the \$PATH"
echo $PATH
echo "</body></html>"
exit 1
}

####################################################################
#                                 man
####################################################################

# Some platforms require a flag to specify the section.
case "$(uname -s)" in
SunOS) section_flag="-s+";; # plus-sign is a space in URL.
*) section_flag="";;
esac

# Set TERM=dumb because this tells some man programs to strip out
# the formatting characters.

TERM=dumb man $man |

# But that doesn't always work.  So we also force it manually here.

sed -e 's/.^H//g' |

# Pass the results on to this awk program to have it shrink down
# multiple blank lines.  (See also more -s.) If we get nothing out
# of this, then tell user there are no man pages.

$AWK 'BEGIN {i=1}
/   - [0-9]* -   / {continue} # skip page lines
{
# this shrinks down multiple blank lines
if ($0 || !(last_line[1] == "" && last_line[2] == ""))
print

last_line[i++]=$0
if (i == 3) i=1
}' > $tmpfile

test -s $tmpfile || {
echo "<h2>No man entry found for $man</h2>"
display_form
echo "</body></html>"
exit_script 1
}

# We got something from the man program.

# Now if a search critera was entered, look for it here.
if test "$srch"
then

cat $tmpfile | escape |
$AWK -v x="$srch" '$0 ~ x {
printf("<li><a href=\"#%s\">%s</a></li>\n", $0, $0)
}' > $tmpfile2

# Was anything found?
if test -s $tmpfile2
then
echo "<a name=TOPOFFILE>"
echo "<hr><ul>"
echo "<h2 align=center>lines containing: \"$srch\"</h2>"
cat $tmpfile2
echo "</ul><br>"
else
echo "<h2>No lines found with: \"$srch\"</h2>"
fi
rm -f $tmpfile2
echo "<hr>"
fi

cat $tmpfile |

# Escape characters for HTML output.
escape |

# Use sed, which is good at this kind of thing, to wrap references
# to other man pages inside of ctrl-a and ctrl-b.  For example, a
# reference to grep(1) gets changed to a reference to ^Agrep(1)^B.
# The awk program below unwraps it again.

sed -e \
's,\([-_\.a-zA-Z0-9][-_\.a-zA-Z0-9]*([a-zA-Z0-9][a-zA-Z0-9]*)\),^A\1^B,g' |

# debug
# tee /tmp/sed.out |

# Here's the tricky part.
# 1) If the line contains a ctrl-a, this is what we do:
#    step through the line, a char at a time, and print
#    the char if we're not inbetween ctrl-a and ctrl-b.  If we are
#    inbetween, then figure out the name and section number (by
#    stepping up to the left paren, etc.) and stick in a url for a
#    man page jump to that item.
# 2) If the line does not contain a ctrl-a, then just print it.
#    (See last line of awk program.)

$AWK '/^A/ {

# We are dealing with something like this:
#     This is a ^Aman(1)^B and so on.

line=$0
len=length(line)

for (i=1; i<=len; i++) {

char=substr(line,i,1)

if (char == "^A") {
inname=1
name=""
}
else if (char == "^B") {
inname=0
printf("<a href=\"http:/cgi-bin/man.sh?man=")
printf("%s%s+%s\">%s(%s)</a>",
section_flag, section, name, name, section)
}
else {
if (inname == 1) {
if (char == "(") {
insection=1
section=""
}
else if (char == ")") {
insection=0
}
else {
if (insection == 1) {
section = section char
}
else
name = name char
}
}
else # not inname == 1
printf("%s", char)
}
}
printf("\n")
continue
}
{print}' section_flag="$section_flag" |

# debug
# tee /tmp/awka.out |

# We have our man page.  If a search string was requested, we search
# for that here and build in the internal references.

{
if test "$srch"
then
$AWK '$0 ~ x {

printf("<a name=\"%s\"></a><br>", $0)
printf("<font color=\"#FF0080\">%s</font>", $0)
printf("  <a href=#TOPOFFILE><small>Top of 
File</small></a>\n") continue } {print}' x="$srch" else cat - fi } | # Highlight section headings. Section headings are thought to be # any capitalized words at the beginning of a sentence, such as # NAME, SYNOPSIS, SEE ALSO, etc. $AWK 'BEGIN { print "<pre>" } # allow zero or one space /^([A-Z][A-Z]| [A-Z][A-Z])/ { print "<strong>" $0 "</strong>" continue } { print $0 }' # We're done. echo "</pre></body></html>" # Cleanup rm -f $tmpfile $tmpfile2 exit 0 # End of File