#! /usr/bin/env python """ mw2html - Mediawiki to static HTML I use this to create a personal website from a local mediawiki installation. No search functionality. Hacks the Monobook skin and the produced HTML. Connelly Barnes 2005. Public domain. """ __version__ = '' import re import sys import getopt import random import urllib import urllib2 import textwrap import urlparse import os, os.path import errno import sha try: set except: from sets import Set as set try: import htmldata except: print 'Requires Python htmldata module:' print ' http://oregonstate.edu/~barnesc/htmldata/' sys.exit() MOVE_HREF = 'movehref' MADE_BY_COMMENT = '' INDEX_HTML = 'index.html' url_filename_cache = {} wrote_file_set = set() MONOBOOK_SKIN = 'monobook' # Constant identifier for Monobook. class Config: """ Instances contain all options passed at the command line. """ def __init__(self, rooturl, outdir, flatten=True, lower=True, index=None, clean=True, sidebar=None, hack_skin=True, made_by=True, overwrite=False, footer=None, skin=MONOBOOK_SKIN, move_href=True, remove_png=True, remove_history=True): self.rooturl = rooturl self.outdir = os.path.abspath(outdir) self.flatten = flatten self.lower = lower self.index = index self.clean = clean self.sidebar = sidebar self.hack_skin = hack_skin self.made_by = made_by self.overwrite = overwrite self.footer = footer self.skin = skin self.move_href = move_href if self.sidebar is not None: self.sidebar = os.path.abspath(self.sidebar) if self.footer is not None: self.footer = os.path.abspath(self.footer) self.remove_png = remove_png self.remove_history = remove_history def post_filename_transform(filename, config): """ User-customizable filename transform. Here filename is the full filename in the output directory. Returns modified full filename. """ return filename def monobook_fix_html_sidebar(doc, config): """ Sets sidebar for Mediawiki 1.4beta6 Monobook HTML output. """ if config.made_by: doc = doc.replace(')[\s\S]+?' + r'()', r'\1
' + SIDEBAR_ID + r'\2', doc) pre_sidebar = """
""" post_sidebar = """
""" sidebar_content = '' if config.sidebar != None: f = open(config.sidebar, 'rU') sidebar_content = f.read() f.close() sidebar_content = pre_sidebar + sidebar_content + post_sidebar doc = doc.replace(SIDEBAR_ID, sidebar_content) doc = re.sub( r'
[\s\S]+?(', r'', doc) doc = re.sub(r'

Image links

[\s\S]+?', r'', doc) return doc def post_html_transform(doc, url, config): """ User-customizable HTML transform. Given an HTML document (with URLs already rewritten), returns modified HTML document. """ if config.hack_skin: if config.skin == MONOBOOK_SKIN: doc = monobook_fix_html_sidebar(doc, config) doc = monobook_hack_skin_html(doc, config) else: raise ValueError('unknown skin') if config.move_href: doc = fix_move_href_tags(doc, config) if config.remove_history: doc = html_remove_image_history(doc, config) return doc def monobook_hack_skin_html(doc, config): """ Hacks Monobook HTML output: use CSS ids for hacked skin. See monobook_hack_skin_css. """ doc = doc.replace('
', '
') doc = doc.replace('