From d84dd680e0e0929406e7a70348689adbbacb3707 Mon Sep 17 00:00:00 2001 From: Stef Walter Date: Wed, 10 May 2006 22:02:48 +0000 Subject: Add HTML generation for man pages --- doc/man2html.pl | 230 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 230 insertions(+) create mode 100755 doc/man2html.pl (limited to 'doc/man2html.pl') diff --git a/doc/man2html.pl b/doc/man2html.pl new file mode 100755 index 0000000..21e3ab5 --- /dev/null +++ b/doc/man2html.pl @@ -0,0 +1,230 @@ +#!/usr/bin/perl + +# TODO: We need to make this more resilient +# currently expects args without enforcing + +$FIL = $NAM = $SEC = @ARGV[0]; + +$NAM =~ s/^([^.]+)\..+$/$1/; +$SEC =~ s/^.+\.([^.]+)$/$1/; + +$command = "groff"; +@args = split(" ", "-Tascii -mdoc $FIL"); + +$enable_include_links = 0; + +man($NAM, $SEC); + +sub man { + local($name, $section) = @_; + local($_, $title, $head, *MAN); + local($html_name, $html_section, $prefix); + local(@manargs); + local($query) = $name; + + # $section =~ s/^([0-9ln]).*$/$1/; + $section =~ tr/A-Z/a-z/; + + $prefix = "Man "; + if ($alttitle) { + $prefix = ""; + $title = &encode_title($alttitle); + $head = &encode_data($alttitle); + } elsif ($section) { + $title = &encode_title("${name}($section)"); + $head = &encode_data("${name}($section)"); + } else { + $title = &encode_title("${name}"); + $head = &encode_data("${name}"); + } + + print &html_header("$title"); + print "

Man Page: ${title}

"; + print "
\n";
+
+    $html_name = &encode_data($name);
+    $html_section = &encode_data($section);
+
+    #print Dumper($sectionpath);
+    #print "yy $section yy $manpath\n";
+    if ($name =~ /^\s*$/) {
+        print "Empty input, no man page given.\n";
+        return;
+    }
+
+    if (index($name, '*') != -1) {
+        print "Invalid character input '*': $name\n";
+        return;
+    }
+
+    if ($section !~ /^[0-9ln]\w*$/ && $section ne '') {
+        print "Sorry, section `$section' is not valid\n";
+        return;
+    }
+
+    if (!$section) {
+        if ($sectionpath->{$manpath}) {
+            $section = "-S " . $sectionpath->{$manpath}{'path'};
+        } else {
+            $section =  '';
+        }
+    } else {
+        if ($sectionpath->{$manpath}{$section}) {
+            $section = "-S " . $sectionpath->{$manpath}{$section};
+        } else {
+            $section = "-S $section";
+        }
+    }
+
+    # print "X $command{'man'} @manargs -- x $name x\n";
+    &proc(*MAN, $command, @args) ||
+        &mydie ("$0: open of $command{'man'} command failed: $!\n");
+    if (eof(MAN)) {
+        # print "X $command{'man'} @manargs -- x $name x\n";
+        print "Sorry, no data found for `$html_name" .
+                ($html_section ? "($html_section)": '') . "'.\n";
+        return;
+    }
+
+    local($space) = 1;
+    local(@sect);
+    local($i, $j);
+    while() {
+        # remove tailing white space
+        if (/^\s+$/) {
+            next if $space;
+            $space = 1;
+        } else {
+            $space = 0;
+        }
+
+        $_ = &encode_data($_);
+        if($enable_include_links &&
+           m,()?\#include()?\s+()?\<\;(.*\.h)\>\;()?,) {
+            $match = $4; ($regexp = $match) =~ s/\./\\\./;
+            s,$regexp,\$match\,;
+        }
+        /^\s/ &&                         # skip headers
+            s,((<[IB]>)?[\w\_\.\-]+\s*()?\s*\(([1-9ln][a-zA-Z]*)\)),&mlnk($1),oige;
+
+        # detect E-Mail Addreses in manpages
+        if (/\@/) {
+            s/([a-z0-9_\-\.]+\@[a-z0-9\-\.]+\.[a-z]+)/$1<\/A>/gi;
+        }
+
+        # detect URLs in manpages
+        if (m%tp://%) {
+            s,((ftp|http)://[^\s<>\)]+),$1,gi;
+        }
+
+        if (/^\S+/ && m%^([^<]+)%) {
+            $i = $1; $j = &encode_url($i);
+            s%^([^<]+)%$i%;
+            push(@sect, $1);
+        }
+        print;
+    }
+    close(MAN);
+
+    print "
Copyright, N. Nielsen   [ back | home ]
"; + print "\n"; + print "\n"; + + # Sleep 0.35 seconds to avoid DoS attacs + select undef, undef, undef, 0.35; +} + +# encode unknown data for use in ...</TITILE> +sub encode_title { + # like encode_url but less strict (I couldn't find docs on this) + local($_) = @_; + s/([\000-\031\%\&\<\>\177-\377])/sprintf('%%%02x',ord($1))/eg; + $_; +} + +# encode unknown data for use in a URL <A HREF="..."> +sub encode_url { + local($_) = @_; + # rfc1738 says that ";"|"/"|"?"|":"|"@"|"&"|"=" may be reserved. + # And % is the escape character so we escape it along with + # single-quote('), double-quote("), grave accent(`), less than(<), + # greater than(>), and non-US-ASCII characters (binary data), + # and white space. Whew. + s/([\000-\032\;\/\?\:\@\&\=\%\'\"\`\<\>\177-\377 ])/sprintf('%%%02x',ord($1))/eg; + s/%20/+/g; + $_; +} +# encode unknown data for use inside markup attributes <MARKUP ATTR="..."> +sub encode_attribute { + # rfc1738 says to use entity references here + local($_) = @_; + s/([\000-\031\"\'\`\%\&\<\>\177-\377])/sprintf('\&#%03d;',ord($1))/eg; + $_; +} +# encode unknown text data for using as HTML, +# treats ^H as overstrike ala nroff. +sub encode_data { + local($_) = @_; + local($str); + + # Escape &, < and > + s,\010[><&],,g; + s/\&/\&\;/g; + s/\</\<\;/g; + s/\>/\>\;/g; + + s,((_\010.)+),($str = $1) =~ s/.\010//g; "<I>$str</I>";,ge; + s,(.\010)+,$1,g; + + if (!s,((.\010.)+\s+(.\010.)+),($str = $1) =~ s/.\010//g; "<B>$str</B>";,ge) { + s,((.\010.)+),($str = $1) =~ s/.\010//g; "<B>$str</B>";,ge; + } + + s,.\010,,g; + + $_; +} + +sub html_header { + return qq{<HTML> +<HEAD> +<TITLE>$_[0] + + + + + +\n\n}; +} + +sub mlnk { + local($matched) = @_; + return qq{$matched}; +} + +sub proc { + local(*FH, $prog, @args) = @_; + local($pid) = open(FH, "-|"); + return undef unless defined($pid); + if ($pid == 0) { + exec $prog, @args; + &mydie("exec $prog failed\n"); + } + 1; +} + +# CGI script must die with error status 0 +sub mydie { + local($message) = @_; + print &html_header("Error"); + print $message; + +print qq{ +

+Index Page and Help + + +}; + + exit(0); +} -- cgit v1.2.3