Admin/website/build/pypager.py
changeset 16233 e634d33deb86
child 16240 95cc0e8f8a17
equal deleted inserted replaced
16232:8a12e11d222b 16233:e634d33deb86
       
     1 #!/usr/bin/env python
       
     2 # -*- coding: Latin-1 -*-
       
     3 
       
     4 __author__ = 'Florian Haftmann, florian.haftmann@informatik.tu-muenchen.de'
       
     5 __revision__ = '$Id$'
       
     6 
       
     7 # generic imports
       
     8 import sys
       
     9 import os
       
    10 from os import path
       
    11 import posixpath
       
    12 import codecs
       
    13 import shlex
       
    14 import optparse
       
    15 import time
       
    16 
       
    17 # xml imports
       
    18 from xml.sax.saxutils import escape
       
    19 from xml.sax.saxutils import quoteattr
       
    20 from xml.sax import make_parser as makeParser
       
    21 from xml.sax.handler import ContentHandler
       
    22 from xml.sax.handler import EntityResolver
       
    23 from xml.sax.xmlreader import AttributesImpl as Attributes
       
    24 from xml.sax import SAXException
       
    25 from xml.sax import SAXParseException
       
    26 
       
    27 nbsp = unichr(160)
       
    28 
       
    29 # global configuration
       
    30 outputEncoding = 'UTF-8'
       
    31 
       
    32 # implement your own functions for PIs here
       
    33 class Functions:
       
    34 
       
    35     def __init__(self, pc, valdict, modtime, encodingMeta):
       
    36 
       
    37         self._pc = pc
       
    38         self._valdict = valdict
       
    39         self._modtime = modtime
       
    40         self._encodingMeta = encodingMeta
       
    41 
       
    42     def getPc(self):
       
    43 
       
    44         return self._pc
       
    45 
       
    46     def value(self, handler, **args):
       
    47 
       
    48         value = self._valdict[args[u"key"]]
       
    49         handler.characters(value)
       
    50 
       
    51     def title(self, handler, **args):
       
    52 
       
    53         handler.characters(handler._title)
       
    54 
       
    55     def contentType(self, handler, **args):
       
    56 
       
    57         encoding = self._encodingMeta or handler._encoding
       
    58         attr = {
       
    59             u"http-equiv": u"Content-Type",
       
    60             u"content": u"text/html; charset=%s" % encoding
       
    61         }
       
    62         handler.startElement(u"meta", attr)
       
    63         handler.endElement(u"meta")
       
    64 
       
    65     def currentDate(self, handler, **args):
       
    66 
       
    67         handler.characters(unicode(time.strftime('%Y-%m-%d %H:%M:%S')))
       
    68 
       
    69     def modificationDate(self, handler, **args):
       
    70 
       
    71         handler.characters(unicode(time.strftime('%Y-%m-%d %H:%M:%S',
       
    72             time.localtime(self._modtime))))
       
    73 
       
    74     def relativeRoot(self, handler, **args):
       
    75 
       
    76         href = args[u"href"].encode("latin-1")
       
    77         handler.characters(self._pc.relDstPathOf('//'+href))
       
    78 
       
    79     def include(self, handler, **args):
       
    80 
       
    81         filename = args[u"file"].encode("latin-1")
       
    82         filename = self._pc.absSrcPathOf(filename)
       
    83         self._modtime = max(self._modtime, os.stat(filename).st_mtime)
       
    84         istream = open(filename, "r")
       
    85         parseWithER(istream, handler)
       
    86         istream.close()
       
    87 
       
    88     def navitem(self, handler, **args):
       
    89 
       
    90         target = args[u"target"].encode("latin-1")
       
    91         target = self._pc.relDstPathOf(target)
       
    92         if self._pc.isSrc(target):
       
    93             wrapTagname = u"strong"
       
    94         else:
       
    95             wrapTagname = u"span"
       
    96         title = args[u"title"]
       
    97         attr = {}
       
    98         handler.startElement(u"li", attr)
       
    99         handler.startElement(wrapTagname, {})
       
   100         handler.startElement(u"a", {
       
   101             u"href": unicode(target, 'latin-1')
       
   102         })
       
   103         handler.characters(title)
       
   104         handler.endElement(u"a")
       
   105         handler.endElement(wrapTagname)
       
   106         handler.endElement(u"li")
       
   107 
       
   108     def downloadCells(self, handler, **args):
       
   109 
       
   110         target = args[u"target"].encode("latin-1")
       
   111         targetReal = self._pc.absDstPathOf(target)
       
   112         title = args.get(u"title", unicode(posixpath.split(target)[0], 'latin-1'))
       
   113         size = os.stat(targetReal).st_size
       
   114         handler.startElement(u"td", {})
       
   115         handler.startElement(u"a", {
       
   116             u"href": target
       
   117         })
       
   118         handler.characters(title)
       
   119         handler.endElement(u"a")
       
   120         handler.endElement(u"td")
       
   121         handler.startElement(u"td", {})
       
   122         handler.characters(u"%i%sKB" % (size / 1024, unichr(160)))
       
   123         handler.endElement(u"td")
       
   124 
       
   125     def cvs(self, handler, **args):
       
   126 
       
   127         pass
       
   128 
       
   129 # a notion of paths
       
   130 class PathCalculator:
       
   131 
       
   132     def __init__(self, srcLoc, srcRoot, dstRoot):
       
   133 
       
   134         self._src = path.normpath(path.abspath(srcLoc))
       
   135         srcPath, srcName = path.split(self._src)
       
   136         self._srcRoot = path.normpath(path.abspath(srcRoot))
       
   137         self._dstRoot = path.normpath(path.abspath(dstRoot))
       
   138         self._relRoot = ""
       
   139         relLocChain = []
       
   140         diffRoot = srcPath
       
   141         while diffRoot != self._srcRoot:
       
   142             self._relRoot = path.join(self._relRoot, os.pardir)
       
   143             diffRoot, chainPiece = path.split(diffRoot)
       
   144             relLocChain.insert(0, chainPiece)
       
   145         self._relRoot = self._relRoot and self._relRoot + '/'
       
   146         self._relLoc = relLocChain and path.join(*relLocChain) or ""
       
   147 
       
   148     def isSrc(self, loc):
       
   149 
       
   150         return self.absSrcPathOf(loc) == self._src
       
   151 
       
   152     def relRootPath(self):
       
   153 
       
   154         return self._relRoot
       
   155 
       
   156     def absSrcPathOf(self, loc):
       
   157 
       
   158         if loc.startswith("//"):
       
   159             return path.normpath(path.abspath(loc[2:]))
       
   160         else:
       
   161             return path.normpath(path.abspath(path.join(self._relLoc, loc)))
       
   162 
       
   163     def absDstPathOf(self, loc):
       
   164 
       
   165         if loc.startswith("//"):
       
   166             return path.join(self._dstRoot, loc[2:])
       
   167         else:
       
   168             return path.join(self._dstRoot, self._relLoc, loc)
       
   169 
       
   170     def relSrcPathOf(self, loc):
       
   171 
       
   172         loc = self.absSrcPathOf(loc)
       
   173         loc = self.stripCommonPrefix(loc, self._srcRoot)
       
   174         loc = self.stripCommonPrefix(loc, self._relLoc)
       
   175         return loc
       
   176 
       
   177     def relDstPathOf(self, loc):
       
   178 
       
   179         loc = self.absDstPathOf(loc)
       
   180         loc = self.stripCommonPrefix(loc, self._dstRoot)
       
   181         loc = self.stripCommonPrefix(loc, self._relLoc)
       
   182         return loc
       
   183 
       
   184     def stripCommonPrefix(self, loc, prefix):
       
   185 
       
   186         common = self.commonPrefix((loc, prefix))
       
   187         if common:
       
   188             loc = loc[len(common):]
       
   189             if loc and loc[0] == '/':
       
   190                 loc = loc[1:]
       
   191         return loc
       
   192 
       
   193     def commonPrefix(self, locs):
       
   194 
       
   195         common = path.commonprefix(locs)
       
   196         # commonprefix bugs
       
   197         if [ loc for loc in locs if len(loc) != common ] and \
       
   198             [ loc for loc in locs if len(common) < len(loc) and loc[len(common)] != path.sep ]:
       
   199                 common = path.split(common)[0]
       
   200         if common and common[-1] == path.sep:
       
   201             common = common[:-1]
       
   202 
       
   203         return common or ""
       
   204 
       
   205 # the XML transformer
       
   206 class TransformerHandler(ContentHandler, EntityResolver):
       
   207 
       
   208     def __init__(self, out, encoding, dtd, func):
       
   209 
       
   210         ContentHandler.__init__(self)
       
   211         #~ EntityResolver.__init__(self)
       
   212         self._out = codecs.getwriter(encoding)(out)
       
   213         self._ns_contexts = [{}] # contains uri -> prefix dicts
       
   214         self._current_context = self._ns_contexts[-1]
       
   215         self._undeclared_ns_maps = []
       
   216         self._encoding = encoding
       
   217         self._lastStart = False
       
   218         self._func = func
       
   219         self._characterBuffer = {}
       
   220         self._currentXPath = []
       
   221         self._title = None
       
   222         self._init = False
       
   223         self._dtd = dtd
       
   224 
       
   225     def closeLastStart(self):
       
   226 
       
   227         if self._lastStart:
       
   228             self._out.write(u'>')
       
   229             self._lastStart = False
       
   230 
       
   231     def flushCharacterBuffer(self):
       
   232 
       
   233         self._out.write(escape(u"".join(self._characterBuffer)))
       
   234         self._characterBuffer = []
       
   235 
       
   236     def transformAbsPath(self, attrs, attrname):
       
   237 
       
   238         pathval = attrs.get(attrname, None)
       
   239         if pathval and pathval.startswith(u"//"):
       
   240             attrs = dict(attrs)
       
   241             pathRel = self._func.getPc().relDstPathOf(pathval)
       
   242             pathDst = self._func.getPc().absDstPathOf(pathval)
       
   243             if not path.exists(pathDst):
       
   244                 raise Exception("Path does not exist: %s" % pathDst)
       
   245             attrs[attrname] = pathRel
       
   246             return attrs
       
   247         else:
       
   248             return attrs
       
   249 
       
   250     def startDocument(self):
       
   251 
       
   252         if not self._init:
       
   253             if self._encoding.upper() != 'UTF-8':
       
   254                 self._out.write(u'<?xml version="1.0" encoding="%s"?>\n' %
       
   255                                 self._encoding)
       
   256             else:
       
   257                 self._out.write(u'<?xml version="1.0"?>\n')
       
   258             self._init = True
       
   259 
       
   260     def startPrefixMapping(self, prefix, uri):
       
   261 
       
   262         self._ns_contexts.append(self._current_context.copy())
       
   263         self._current_context[uri] = prefix
       
   264         self._undeclared_ns_maps.append((prefix, uri))
       
   265 
       
   266     def endPrefixMapping(self, prefix):
       
   267 
       
   268         self._current_context = self._ns_contexts[-1]
       
   269         del self._ns_contexts[-1]
       
   270 
       
   271     def startElement(self, name, attrs):
       
   272 
       
   273         if name == u"dummy:wrapper":
       
   274             return
       
   275         self.closeLastStart()
       
   276         self.flushCharacterBuffer()
       
   277         self._out.write(u'<' + name)
       
   278         # this list is not exhaustive
       
   279         for tagname, attrname in ((u"a", u"href"), (u"img", u"src"), (u"link", u"href")):
       
   280             if name == tagname:
       
   281                 attrs = self.transformAbsPath(attrs, attrname)
       
   282         for (name, value) in attrs.items():
       
   283             self._out.write(u' %s=%s' % (name, quoteattr(value)))
       
   284         self._currentXPath.append(name)
       
   285         self._lastStart = True
       
   286 
       
   287     def endElement(self, name):
       
   288 
       
   289         if name == u"dummy:wrapper":
       
   290             return
       
   291         elif name == u'title':
       
   292             self._title = u"".join(self._characterBuffer)
       
   293         self.flushCharacterBuffer()
       
   294         if self._lastStart:
       
   295             self._out.write(u'/>')
       
   296             self._lastStart = False
       
   297         else:
       
   298             self._out.write('</%s>' % name)
       
   299         self._currentXPath.pop()
       
   300 
       
   301     def startElementNS(self, name, qname, attrs):
       
   302 
       
   303         self.closeLastStart()
       
   304         self.flushCharacterBuffer()
       
   305         if name[0] is None:
       
   306             # if the name was not namespace-scoped, use the unqualified part
       
   307             name = name[1]
       
   308         else:
       
   309             # else try to restore the original prefix from the namespace
       
   310             name = self._current_context[name[0]] + u":" + name[1]
       
   311         self._out.write(u'<' + name)
       
   312 
       
   313         for pair in self._undeclared_ns_maps:
       
   314             self._out.write(u' xmlns:%s="%s"' % pair)
       
   315         self._undeclared_ns_maps = []
       
   316 
       
   317         for (name, value) in attrs.items():
       
   318             name = self._current_context[name[0]] + ":" + name[1]
       
   319             self._out.write(' %s=%s' % (name, quoteattr(value)))
       
   320         self._out.write('>')
       
   321         self._currentXPath.append(name)
       
   322 
       
   323     def endElementNS(self, name, qname):
       
   324 
       
   325         self.flushCharacterBuffer()
       
   326         if name[0] is None:
       
   327             name = name[1]
       
   328         else:
       
   329             name = self._current_context[name[0]] + u":" + name[1]
       
   330         if self._lastStart:
       
   331             self._out.write(u'/>')
       
   332             self._lastStart = False
       
   333         else:
       
   334             self._out.write(u'</%s>' % name)
       
   335         self._currentXPath.pop()
       
   336 
       
   337     def characters(self, content):
       
   338 
       
   339         self.closeLastStart()
       
   340         self._characterBuffer.append(content)
       
   341 
       
   342     def ignorableWhitespace(self, content):
       
   343 
       
   344         self.closeLastStart()
       
   345         self.flushCharacterBuffer()
       
   346         self._out.write(content)
       
   347 
       
   348     def resolveEntity(self, publicId, systemId):
       
   349 
       
   350         loc, name = posixpath.split(systemId)
       
   351         if loc == u"http://www.w3.org/TR/xhtml1/DTD" or loc == u"":
       
   352             systemId = path.join(self._dtd, name)
       
   353         return EntityResolver.resolveEntity(self, publicId, systemId)
       
   354 
       
   355     def processingInstruction(self, target, data):
       
   356 
       
   357         self.closeLastStart()
       
   358         self.flushCharacterBuffer()
       
   359         func = getattr(self._func, target)
       
   360         args = {}
       
   361         for keyval in shlex.split(data.encode("utf-8")):
       
   362             key, val = keyval.split("=", 1)
       
   363             args[key] = val
       
   364         func(self, **args)
       
   365 
       
   366 def parseWithER(istream, handler):
       
   367 
       
   368     parser = makeParser()
       
   369     parser.setContentHandler(handler)
       
   370     parser.setEntityResolver(handler)
       
   371     parser.parse(istream)
       
   372 
       
   373 def main():
       
   374 
       
   375     # parse command line
       
   376     cmdlineparser = optparse.OptionParser(
       
   377         usage = '%prog [options] [key=value]* src [dst]',
       
   378         conflict_handler = "error",
       
   379         description = '''Leightweight HTML page generation tool''',
       
   380         add_help_option = True,
       
   381     )
       
   382     cmdlineparser.add_option("-s", "--srcroot",
       
   383         action="store", dest="srcroot",
       
   384         type="string", default=".",
       
   385         help="source tree root", metavar='location')
       
   386     cmdlineparser.add_option("-d", "--dstroot",
       
   387         action="store", dest="dstroot",
       
   388         type="string", default=".",
       
   389         help="destination tree root", metavar='location')
       
   390     cmdlineparser.add_option("-t", "--dtd",
       
   391         action="store", dest="dtd",
       
   392         type="string", default=".",
       
   393         help="local mirror of XHTML DTDs", metavar='location')
       
   394     cmdlineparser.add_option("-m", "--encodinghtml",
       
   395         action="store", dest="encodinghtml",
       
   396         type="string", default="",
       
   397         help="force value of html content encoding meta ", metavar='encoding')
       
   398 
       
   399 
       
   400     options, args = cmdlineparser.parse_args(sys.argv[1:])
       
   401 
       
   402     # check source
       
   403     if len(args) < 1:
       
   404         cmdlineparser.error("Exactly one soure file must be given")
       
   405 
       
   406     # read arguments
       
   407     valdict = {}
       
   408     if len(args) == 1:
       
   409         src = args[0]
       
   410         dst = None
       
   411     else:
       
   412         if "=" in args[-2]:
       
   413             src = args[-1]
       
   414             dst = None
       
   415             vallist = args[:-1]
       
   416         else:
       
   417             src = args[-2]
       
   418             dst = args[-1]
       
   419             if dst == "-":
       
   420                 dst = None
       
   421             vallist = args[:-2]
       
   422         for keyval in vallist:
       
   423             key, val = keyval.split("=", 1)
       
   424             valdict[unicode(key, 'latin-1')] = unicode(val, 'latin-1')
       
   425 
       
   426     # path calculator
       
   427     pc = PathCalculator(src, options.srcroot, options.dstroot)
       
   428 
       
   429     # function space
       
   430     modtime = os.stat(src).st_mtime
       
   431     func = Functions(pc, valdict, modtime, options.encodinghtml)
       
   432 
       
   433     # allocate file handles
       
   434     istream = open(src, 'r')
       
   435     if dst is not None:
       
   436         ostream = open(dst, 'wb')
       
   437     else:
       
   438         ostream = sys.stdout
       
   439 
       
   440     # process file
       
   441     transformer = TransformerHandler(ostream, outputEncoding, options.dtd, func)
       
   442     parseWithER(istream, transformer)
       
   443 
       
   444     # close handles
       
   445     ostream.close()
       
   446     istream.close()
       
   447 
       
   448 if __name__ == '__main__':
       
   449     main()
       
   450 
       
   451 __todo__ = '''
       
   452 '''