#!/usr/bin/env python
# man2tidyhtml - a wrapper around manServer 1.07 and 1.08
#
# Copyright (C) 2009 Sebastian Pipping <sebastian@pipping.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.

PACKAGE = 'man2tidyhtml'
VERSION = '1.1'
WEBSITE = 'http://git.goodpoint.de/?p=man2tidyhtml.git;a=summary'

import subprocess
import re
import sys
from optparse import OptionParser


# Build and run command line interface
USAGE = "Usage: %prog [options] page.n [page.n.html]"
parser = OptionParser(usage=USAGE, version='%prog ' + VERSION)
parser.add_option('--skip-tidy',
                  dest = 'skip_tidy',
                  default = False,
                  action = 'store_true',
                  help = 'skip post-processing by HTML Tidy')
(opts, args) = parser.parse_args()


# Process file list
len_args = len(args)
if len_args == 0:
    # TODO Stdin to stdout
    parser.print_help()
    sys.exit(1)
elif len_args == 1:
    # File to stdout
    input_filename = args[0]
    output_filename = None
elif len_args == 2:
    # File A to file B
    input_filename = args[0]
    output_filename = args[1]
elif len_args > 2:
    parser.print_help()
    sys.exit(1)


# Run manServer
COMMAND = "manServer"
try:
    process = subprocess.Popen([COMMAND, input_filename], \
            stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
except OSError:
    sys.stderr.write('Could not execute command "%s"\n\n' % (COMMAND))
    sys.exit(1)
lines = process.stdout.readlines()
if process.poll() == None:
    process.wait()
failed_to_open = (len(lines) == 1) and \
        (lines[0].find("Failed to open") != -1)
if failed_to_open:
    sys.stderr.write('Could not open file "%s"\n\n' % (input_filename))
    sys.exit(1)
document = ''.join(lines)


# Post-process HTML
h3_fix = re.compile('(<A name=)(>[\n\t ]+<H3>([A-Z]+)</H3>)')
bold_word_anchors = re.compile('<B>([A-Za-z_-]+)</B>(?!\\(\\d+\\))')
center_kill = re.compile(' align=center|</?CENTER>')

document = document.replace('<BODY bgcolor=#F0F0F0 text=#000000 ' + \
        'link=#0000ff vlink=#C000C0 alink=#ff0000>', """\
<LINK MEDIA="screen" HREF="manpage-screen.css" TYPE="text/css" REL="stylesheet">
</HEAD>
<BODY>\
""")
document = h3_fix.sub('\\1"\\3"\\2', document)
document = bold_word_anchors.sub('<B><A NAME="\\1">\\1</A></B>', document)
document = document.replace("""\
</BLOCKQUOTE>
<A name=contents></A><H3>CONTENTS</H3></A>
<BLOCKQUOTE>
""", "")
document = center_kill.sub('', document)
document = document.replace('<I>Manual Reference Pages &nbsp;-&nbsp;</I>', '')
extend_generator_note = re.compile('Generated by ' + \
        '(<A HREF="[^"]+">' + \
        'manServer [^<]+</A>)')
document = extend_generator_note.sub(
        'Generated by <A HREF="%(website)s">%(package)s %(version)s</A> through \\1' % \
            {'package':PACKAGE, 'version':VERSION, 'website':WEBSITE},
        document)


# Run HTML Tidy
if not opts.skip_tidy:
    try:
        import tidy
    except ImportError:
        sys.stderr.write("""\
Could not import module "tidy"

Please make sure you have the TidyLib Python Wrapper
(http://utidylib.berlios.de/) installed and its location
somewhere in PYTHONPATH.

Gentoo:  # sudo emerge -av dev-python/utidylib

""")
        sys.exit(1)

    options = dict(
            output_xhtml=True,
            add_xml_decl=True,
            indent='auto',
            drop_font_tags=True,
            tidy_mark=True)
    document = str(tidy.parseString(document, **options))


if output_filename:
    f = open(output_filename, "w")
    f.write(document)
    f.close()
else:
    print document
