#! /usr/bin/python

import re, sys, os, string

def addtag(matchobj):
	str = matchobj.group(1)
	if str[-2:] == '/>':
		return str
	return str[:-1] + '/>'

def lowertag(matchobj):
	return matchobj.group(1) + string.lower(matchobj.group(2))

def xmlizeTag(text, tag):
	pat = re.compile("(<%s(\s[ ]?[^>]*)?>)" % tag, re.DOTALL)
	return re.sub(pat, addtag, text)
	
docbookfile = open(sys.argv[1])
text = docbookfile.read()

text = re.sub("<!DOCTYPE\s*(\S+)\s*PUBLIC\s*\"-//KDE//DTD DocBook V4.1-Based Variant V1.0//EN\"",
			  "<?xml version=\"1.0\" ?><!DOCTYPE \\1 PUBLIC \"-//KDE//DTD DocBook XML V4.1-Based Variant V1.0//EN\" \"dtd/kdex.dtd\"",
			  text)
text = re.sub("<!DOCTYPE\s*(\S+)\s*PUBLIC\s*\"-//KDE//DTD DocBook V3.1-Based Variant V1.0//EN\"",
			  "<?xml version=\"1.0\" ?><!DOCTYPE \\1 PUBLIC \"-//KDE//DTD DocBook XML V4.1-Based Variant V1.0//EN\" \"dtd/kdex.dtd\"",
			  text)
text = re.sub("<!ENTITY([^>-]*)--", r"<!ENTITY\1> <!--", text)

# first normalize it - before we remove it
text = re.sub("<!ENTITY\s*%\s*ents\s*PUBLIC\s*\"-//KDE//ENTITIES Documentation V2.0//EN\"",
			  "<!ENTITY % ents PUBLIC \"-//KDE//ENTITIES Documentation V2.0//EN\" \"entities/kde-prologue.entities\"", text)
text = re.sub("<!ENTITY\s*%\s*ents\s*PUBLIC\s*\"-//KDE//ENTITIES Documentation V1.0//EN\"",
			  "<!ENTITY % ents PUBLIC \"-//KDE//ENTITIES Documentation V2.0//EN\" \"entities/kde-prologue.entities\"", text)

text = re.sub("<!ENTITY\s*%\s*ents\s*PUBLIC\s*\"-//KDE//ENTITIES Application-Variable Entities V.\\.0//EN\"",
			  "<!ENTITY % ents PUBLIC \"-//KDE//ENTITIES Application-Variable Entities V2.0//EN\" \"entities/kde-prologue.entities\"", text)

# remove it
text = re.sub("<!ENTITY % ents PUBLIC \"-//KDE//ENTITIES Documentation V2.0//EN\" \"entities/kde-prologue.entities\"[^>]*>", "", text)
text = re.sub("%ents;", "", text)

text = re.sub("<!\[ %addindex; \[\s*([^\]]*)\s*\]\]>", r"\1", text)

for tag in "imagedata", "colspec", "spanspec", "anchor", "xref", "area", "glossseealso", "footnoteref", "glosssee":
	text = xmlizeTag(text, tag)

pat = re.compile("(<\w*)([A-Z])")
while 1:
	text, n = re.subn(pat, lowertag, text)
	if not n:
		break
	
pat = re.compile("(</\w*)([A-Z])")
while 1:
	text, n = re.subn(pat, lowertag, text)
	if not n:
		break
	
print text
