summaryrefslogtreecommitdiff
path: root/doc/man2html.pl
diff options
context:
space:
mode:
authorStef Walter <stef@memberwebs.com>2006-04-05 20:51:48 +0000
committerStef Walter <stef@memberwebs.com>2006-04-05 20:51:48 +0000
commiteac1602858639b0653a8e15ff191ed9b9f64c440 (patch)
tree90349495c80410153c88cd73d4a0beef6b41997c /doc/man2html.pl
parent0be20dc6ea13149dc2083ac2add78441be8733f3 (diff)
Cleanup and initial open source release
Diffstat (limited to 'doc/man2html.pl')
-rwxr-xr-xdoc/man2html.pl230
1 files changed, 230 insertions, 0 deletions
diff --git a/doc/man2html.pl b/doc/man2html.pl
new file mode 100755
index 0000000..21e3ab5
--- /dev/null
+++ b/doc/man2html.pl
@@ -0,0 +1,230 @@
+#!/usr/bin/perl
+
+# TODO: We need to make this more resilient
+# currently expects args without enforcing
+
+$FIL = $NAM = $SEC = @ARGV[0];
+
+$NAM =~ s/^([^.]+)\..+$/$1/;
+$SEC =~ s/^.+\.([^.]+)$/$1/;
+
+$command = "groff";
+@args = split(" ", "-Tascii -mdoc $FIL");
+
+$enable_include_links = 0;
+
+man($NAM, $SEC);
+
+sub man {
+ local($name, $section) = @_;
+ local($_, $title, $head, *MAN);
+ local($html_name, $html_section, $prefix);
+ local(@manargs);
+ local($query) = $name;
+
+ # $section =~ s/^([0-9ln]).*$/$1/;
+ $section =~ tr/A-Z/a-z/;
+
+ $prefix = "Man ";
+ if ($alttitle) {
+ $prefix = "";
+ $title = &encode_title($alttitle);
+ $head = &encode_data($alttitle);
+ } elsif ($section) {
+ $title = &encode_title("${name}($section)");
+ $head = &encode_data("${name}($section)");
+ } else {
+ $title = &encode_title("${name}");
+ $head = &encode_data("${name}");
+ }
+
+ print &html_header("$title");
+ print "<H1>Man Page: ${title}</H1>";
+ print "<PRE>\n";
+
+ $html_name = &encode_data($name);
+ $html_section = &encode_data($section);
+
+ #print Dumper($sectionpath);
+ #print "yy $section yy $manpath\n";
+ if ($name =~ /^\s*$/) {
+ print "Empty input, no man page given.\n";
+ return;
+ }
+
+ if (index($name, '*') != -1) {
+ print "Invalid character input '*': $name\n";
+ return;
+ }
+
+ if ($section !~ /^[0-9ln]\w*$/ && $section ne '') {
+ print "Sorry, section `$section' is not valid\n";
+ return;
+ }
+
+ if (!$section) {
+ if ($sectionpath->{$manpath}) {
+ $section = "-S " . $sectionpath->{$manpath}{'path'};
+ } else {
+ $section = '';
+ }
+ } else {
+ if ($sectionpath->{$manpath}{$section}) {
+ $section = "-S " . $sectionpath->{$manpath}{$section};
+ } else {
+ $section = "-S $section";
+ }
+ }
+
+ # print "X $command{'man'} @manargs -- x $name x\n";
+ &proc(*MAN, $command, @args) ||
+ &mydie ("$0: open of $command{'man'} command failed: $!\n");
+ if (eof(MAN)) {
+ # print "X $command{'man'} @manargs -- x $name x\n";
+ print "Sorry, no data found for `$html_name" .
+ ($html_section ? "($html_section)": '') . "'.\n";
+ return;
+ }
+
+ local($space) = 1;
+ local(@sect);
+ local($i, $j);
+ while(<MAN>) {
+ # remove tailing white space
+ if (/^\s+$/) {
+ next if $space;
+ $space = 1;
+ } else {
+ $space = 0;
+ }
+
+ $_ = &encode_data($_);
+ if($enable_include_links &&
+ m,(<B>)?\#include(</B>)?\s+(<B>)?\&lt\;(.*\.h)\&gt\;(</B>)?,) {
+ $match = $4; ($regexp = $match) =~ s/\./\\\./;
+ s,$regexp,\<A HREF=\"$BASE/usr/include/$match\"\>$match\</A\>,;
+ }
+ /^\s/ && # skip headers
+ s,((<[IB]>)?[\w\_\.\-]+\s*(</[IB]>)?\s*\(([1-9ln][a-zA-Z]*)\)),&mlnk($1),oige;
+
+ # detect E-Mail Addreses in manpages
+ if (/\@/) {
+ s/([a-z0-9_\-\.]+\@[a-z0-9\-\.]+\.[a-z]+)/<A HREF="mailto:$1">$1<\/A>/gi;
+ }
+
+ # detect URLs in manpages
+ if (m%tp://%) {
+ s,((ftp|http)://[^\s<>\)]+),<A HREF="$1">$1</A>,gi;
+ }
+
+ if (/^<B>\S+/ && m%^<B>([^<]+)%) {
+ $i = $1; $j = &encode_url($i);
+ s%^<B>([^<]+)</B>%<B>$i</B>%;
+ push(@sect, $1);
+ }
+ print;
+ }
+ close(MAN);
+
+ print "<H6>Copyright, N. Nielsen&nbsp;&nbsp;&nbsp;[ <a href='./'>back</a> | <a href='../../'>home</a> ]</h6>";
+ print "</BODY>\n";
+ print "</HTML>\n";
+
+ # Sleep 0.35 seconds to avoid DoS attacs
+ select undef, undef, undef, 0.35;
+}
+
+# encode unknown data for use in <TITLE>...</TITILE>
+sub encode_title {
+ # like encode_url but less strict (I couldn't find docs on this)
+ local($_) = @_;
+ s/([\000-\031\%\&\<\>\177-\377])/sprintf('%%%02x',ord($1))/eg;
+ $_;
+}
+
+# encode unknown data for use in a URL <A HREF="...">
+sub encode_url {
+ local($_) = @_;
+ # rfc1738 says that ";"|"/"|"?"|":"|"@"|"&"|"=" may be reserved.
+ # And % is the escape character so we escape it along with
+ # single-quote('), double-quote("), grave accent(`), less than(<),
+ # greater than(>), and non-US-ASCII characters (binary data),
+ # and white space. Whew.
+ s/([\000-\032\;\/\?\:\@\&\=\%\'\"\`\<\>\177-\377 ])/sprintf('%%%02x',ord($1))/eg;
+ s/%20/+/g;
+ $_;
+}
+# encode unknown data for use inside markup attributes <MARKUP ATTR="...">
+sub encode_attribute {
+ # rfc1738 says to use entity references here
+ local($_) = @_;
+ s/([\000-\031\"\'\`\%\&\<\>\177-\377])/sprintf('\&#%03d;',ord($1))/eg;
+ $_;
+}
+# encode unknown text data for using as HTML,
+# treats ^H as overstrike ala nroff.
+sub encode_data {
+ local($_) = @_;
+ local($str);
+
+ # Escape &, < and >
+ s,\010[><&],,g;
+ s/\&/\&amp\;/g;
+ s/\</\&lt\;/g;
+ s/\>/\&gt\;/g;
+
+ s,((_\010.)+),($str = $1) =~ s/.\010//g; "<I>$str</I>";,ge;
+ s,(.\010)+,$1,g;
+
+ if (!s,((.\010.)+\s+(.\010.)+),($str = $1) =~ s/.\010//g; "<B>$str</B>";,ge) {
+ s,((.\010.)+),($str = $1) =~ s/.\010//g; "<B>$str</B>";,ge;
+ }
+
+ s,.\010,,g;
+
+ $_;
+}
+
+sub html_header {
+ return qq{<HTML>
+<HEAD>
+<TITLE>$_[0]</TITLE>
+<link rev="made" href="mailto:wosch\@FreeBSD.ORG">
+<META name="robots" content="nofollow">
+<meta content="text/html; charset=iso-8859-1" http-equiv="Content-Type">
+<link rel="stylesheet" type="text/css" href="/nielsen/style.css">
+</HEAD>
+<BODY BGCOLOR="#FFFFFF" TEXT="#000000">\n\n};
+}
+
+sub mlnk {
+ local($matched) = @_;
+ return qq{<U>$matched</U>};
+}
+
+sub proc {
+ local(*FH, $prog, @args) = @_;
+ local($pid) = open(FH, "-|");
+ return undef unless defined($pid);
+ if ($pid == 0) {
+ exec $prog, @args;
+ &mydie("exec $prog failed\n");
+ }
+ 1;
+}
+
+# CGI script must die with error status 0
+sub mydie {
+ local($message) = @_;
+ print &html_header("Error");
+ print $message;
+
+print qq{
+<p>
+<A HREF="$BASE">Index Page and Help</A>
+</BODY>
+</HTML>
+};
+
+ exit(0);
+}