|
1 #!/usr/bin/env python |
|
2 # -*- coding: Latin-1 -*- |
|
3 |
|
4 __author__ = 'Florian Haftmann, florian.haftmann@informatik.tu-muenchen.de' |
|
5 __revision__ = '$Id$' |
|
6 |
|
7 # generic imports |
|
8 import sys |
|
9 import os |
|
10 from os import path |
|
11 import posixpath |
|
12 import codecs |
|
13 import shlex |
|
14 import optparse |
|
15 import time |
|
16 |
|
17 # xml imports |
|
18 from xml.sax.saxutils import escape |
|
19 from xml.sax.saxutils import quoteattr |
|
20 from xml.sax import make_parser as makeParser |
|
21 from xml.sax.handler import ContentHandler |
|
22 from xml.sax.handler import EntityResolver |
|
23 from xml.sax.xmlreader import AttributesImpl as Attributes |
|
24 from xml.sax import SAXException |
|
25 from xml.sax import SAXParseException |
|
26 |
|
27 nbsp = unichr(160) |
|
28 |
|
29 # global configuration |
|
30 outputEncoding = 'UTF-8' |
|
31 |
|
32 # implement your own functions for PIs here |
|
33 class Functions: |
|
34 |
|
35 def __init__(self, pc, valdict, modtime, encodingMeta): |
|
36 |
|
37 self._pc = pc |
|
38 self._valdict = valdict |
|
39 self._modtime = modtime |
|
40 self._encodingMeta = encodingMeta |
|
41 |
|
42 def getPc(self): |
|
43 |
|
44 return self._pc |
|
45 |
|
46 def value(self, handler, **args): |
|
47 |
|
48 value = self._valdict[args[u"key"]] |
|
49 handler.characters(value) |
|
50 |
|
51 def title(self, handler, **args): |
|
52 |
|
53 handler.characters(handler._title) |
|
54 |
|
55 def contentType(self, handler, **args): |
|
56 |
|
57 encoding = self._encodingMeta or handler._encoding |
|
58 attr = { |
|
59 u"http-equiv": u"Content-Type", |
|
60 u"content": u"text/html; charset=%s" % encoding |
|
61 } |
|
62 handler.startElement(u"meta", attr) |
|
63 handler.endElement(u"meta") |
|
64 |
|
65 def currentDate(self, handler, **args): |
|
66 |
|
67 handler.characters(unicode(time.strftime('%Y-%m-%d %H:%M:%S'))) |
|
68 |
|
69 def modificationDate(self, handler, **args): |
|
70 |
|
71 handler.characters(unicode(time.strftime('%Y-%m-%d %H:%M:%S', |
|
72 time.localtime(self._modtime)))) |
|
73 |
|
74 def relativeRoot(self, handler, **args): |
|
75 |
|
76 href = args[u"href"].encode("latin-1") |
|
77 handler.characters(self._pc.relDstPathOf('//'+href)) |
|
78 |
|
79 def include(self, handler, **args): |
|
80 |
|
81 filename = args[u"file"].encode("latin-1") |
|
82 filename = self._pc.absSrcPathOf(filename) |
|
83 self._modtime = max(self._modtime, os.stat(filename).st_mtime) |
|
84 istream = open(filename, "r") |
|
85 parseWithER(istream, handler) |
|
86 istream.close() |
|
87 |
|
88 def navitem(self, handler, **args): |
|
89 |
|
90 target = args[u"target"].encode("latin-1") |
|
91 target = self._pc.relDstPathOf(target) |
|
92 if self._pc.isSrc(target): |
|
93 wrapTagname = u"strong" |
|
94 else: |
|
95 wrapTagname = u"span" |
|
96 title = args[u"title"] |
|
97 attr = {} |
|
98 handler.startElement(u"li", attr) |
|
99 handler.startElement(wrapTagname, {}) |
|
100 handler.startElement(u"a", { |
|
101 u"href": unicode(target, 'latin-1') |
|
102 }) |
|
103 handler.characters(title) |
|
104 handler.endElement(u"a") |
|
105 handler.endElement(wrapTagname) |
|
106 handler.endElement(u"li") |
|
107 |
|
108 def downloadCells(self, handler, **args): |
|
109 |
|
110 target = args[u"target"].encode("latin-1") |
|
111 targetReal = self._pc.absDstPathOf(target) |
|
112 title = args.get(u"title", unicode(posixpath.split(target)[0], 'latin-1')) |
|
113 size = os.stat(targetReal).st_size |
|
114 handler.startElement(u"td", {}) |
|
115 handler.startElement(u"a", { |
|
116 u"href": target |
|
117 }) |
|
118 handler.characters(title) |
|
119 handler.endElement(u"a") |
|
120 handler.endElement(u"td") |
|
121 handler.startElement(u"td", {}) |
|
122 handler.characters(u"%i%sKB" % (size / 1024, unichr(160))) |
|
123 handler.endElement(u"td") |
|
124 |
|
125 def cvs(self, handler, **args): |
|
126 |
|
127 pass |
|
128 |
|
129 # a notion of paths |
|
130 class PathCalculator: |
|
131 |
|
132 def __init__(self, srcLoc, srcRoot, dstRoot): |
|
133 |
|
134 self._src = path.normpath(path.abspath(srcLoc)) |
|
135 srcPath, srcName = path.split(self._src) |
|
136 self._srcRoot = path.normpath(path.abspath(srcRoot)) |
|
137 self._dstRoot = path.normpath(path.abspath(dstRoot)) |
|
138 self._relRoot = "" |
|
139 relLocChain = [] |
|
140 diffRoot = srcPath |
|
141 while diffRoot != self._srcRoot: |
|
142 self._relRoot = path.join(self._relRoot, os.pardir) |
|
143 diffRoot, chainPiece = path.split(diffRoot) |
|
144 relLocChain.insert(0, chainPiece) |
|
145 self._relRoot = self._relRoot and self._relRoot + '/' |
|
146 self._relLoc = relLocChain and path.join(*relLocChain) or "" |
|
147 |
|
148 def isSrc(self, loc): |
|
149 |
|
150 return self.absSrcPathOf(loc) == self._src |
|
151 |
|
152 def relRootPath(self): |
|
153 |
|
154 return self._relRoot |
|
155 |
|
156 def absSrcPathOf(self, loc): |
|
157 |
|
158 if loc.startswith("//"): |
|
159 return path.normpath(path.abspath(loc[2:])) |
|
160 else: |
|
161 return path.normpath(path.abspath(path.join(self._relLoc, loc))) |
|
162 |
|
163 def absDstPathOf(self, loc): |
|
164 |
|
165 if loc.startswith("//"): |
|
166 return path.join(self._dstRoot, loc[2:]) |
|
167 else: |
|
168 return path.join(self._dstRoot, self._relLoc, loc) |
|
169 |
|
170 def relSrcPathOf(self, loc): |
|
171 |
|
172 loc = self.absSrcPathOf(loc) |
|
173 loc = self.stripCommonPrefix(loc, self._srcRoot) |
|
174 loc = self.stripCommonPrefix(loc, self._relLoc) |
|
175 return loc |
|
176 |
|
177 def relDstPathOf(self, loc): |
|
178 |
|
179 loc = self.absDstPathOf(loc) |
|
180 loc = self.stripCommonPrefix(loc, self._dstRoot) |
|
181 loc = self.stripCommonPrefix(loc, self._relLoc) |
|
182 return loc |
|
183 |
|
184 def stripCommonPrefix(self, loc, prefix): |
|
185 |
|
186 common = self.commonPrefix((loc, prefix)) |
|
187 if common: |
|
188 loc = loc[len(common):] |
|
189 if loc and loc[0] == '/': |
|
190 loc = loc[1:] |
|
191 return loc |
|
192 |
|
193 def commonPrefix(self, locs): |
|
194 |
|
195 common = path.commonprefix(locs) |
|
196 # commonprefix bugs |
|
197 if [ loc for loc in locs if len(loc) != common ] and \ |
|
198 [ loc for loc in locs if len(common) < len(loc) and loc[len(common)] != path.sep ]: |
|
199 common = path.split(common)[0] |
|
200 if common and common[-1] == path.sep: |
|
201 common = common[:-1] |
|
202 |
|
203 return common or "" |
|
204 |
|
205 # the XML transformer |
|
206 class TransformerHandler(ContentHandler, EntityResolver): |
|
207 |
|
208 def __init__(self, out, encoding, dtd, func): |
|
209 |
|
210 ContentHandler.__init__(self) |
|
211 #~ EntityResolver.__init__(self) |
|
212 self._out = codecs.getwriter(encoding)(out) |
|
213 self._ns_contexts = [{}] # contains uri -> prefix dicts |
|
214 self._current_context = self._ns_contexts[-1] |
|
215 self._undeclared_ns_maps = [] |
|
216 self._encoding = encoding |
|
217 self._lastStart = False |
|
218 self._func = func |
|
219 self._characterBuffer = {} |
|
220 self._currentXPath = [] |
|
221 self._title = None |
|
222 self._init = False |
|
223 self._dtd = dtd |
|
224 |
|
225 def closeLastStart(self): |
|
226 |
|
227 if self._lastStart: |
|
228 self._out.write(u'>') |
|
229 self._lastStart = False |
|
230 |
|
231 def flushCharacterBuffer(self): |
|
232 |
|
233 self._out.write(escape(u"".join(self._characterBuffer))) |
|
234 self._characterBuffer = [] |
|
235 |
|
236 def transformAbsPath(self, attrs, attrname): |
|
237 |
|
238 pathval = attrs.get(attrname, None) |
|
239 if pathval and pathval.startswith(u"//"): |
|
240 attrs = dict(attrs) |
|
241 pathRel = self._func.getPc().relDstPathOf(pathval) |
|
242 pathDst = self._func.getPc().absDstPathOf(pathval) |
|
243 if not path.exists(pathDst): |
|
244 raise Exception("Path does not exist: %s" % pathDst) |
|
245 attrs[attrname] = pathRel |
|
246 return attrs |
|
247 else: |
|
248 return attrs |
|
249 |
|
250 def startDocument(self): |
|
251 |
|
252 if not self._init: |
|
253 if self._encoding.upper() != 'UTF-8': |
|
254 self._out.write(u'<?xml version="1.0" encoding="%s"?>\n' % |
|
255 self._encoding) |
|
256 else: |
|
257 self._out.write(u'<?xml version="1.0"?>\n') |
|
258 self._init = True |
|
259 |
|
260 def startPrefixMapping(self, prefix, uri): |
|
261 |
|
262 self._ns_contexts.append(self._current_context.copy()) |
|
263 self._current_context[uri] = prefix |
|
264 self._undeclared_ns_maps.append((prefix, uri)) |
|
265 |
|
266 def endPrefixMapping(self, prefix): |
|
267 |
|
268 self._current_context = self._ns_contexts[-1] |
|
269 del self._ns_contexts[-1] |
|
270 |
|
271 def startElement(self, name, attrs): |
|
272 |
|
273 if name == u"dummy:wrapper": |
|
274 return |
|
275 self.closeLastStart() |
|
276 self.flushCharacterBuffer() |
|
277 self._out.write(u'<' + name) |
|
278 # this list is not exhaustive |
|
279 for tagname, attrname in ((u"a", u"href"), (u"img", u"src"), (u"link", u"href")): |
|
280 if name == tagname: |
|
281 attrs = self.transformAbsPath(attrs, attrname) |
|
282 for (name, value) in attrs.items(): |
|
283 self._out.write(u' %s=%s' % (name, quoteattr(value))) |
|
284 self._currentXPath.append(name) |
|
285 self._lastStart = True |
|
286 |
|
287 def endElement(self, name): |
|
288 |
|
289 if name == u"dummy:wrapper": |
|
290 return |
|
291 elif name == u'title': |
|
292 self._title = u"".join(self._characterBuffer) |
|
293 self.flushCharacterBuffer() |
|
294 if self._lastStart: |
|
295 self._out.write(u'/>') |
|
296 self._lastStart = False |
|
297 else: |
|
298 self._out.write('</%s>' % name) |
|
299 self._currentXPath.pop() |
|
300 |
|
301 def startElementNS(self, name, qname, attrs): |
|
302 |
|
303 self.closeLastStart() |
|
304 self.flushCharacterBuffer() |
|
305 if name[0] is None: |
|
306 # if the name was not namespace-scoped, use the unqualified part |
|
307 name = name[1] |
|
308 else: |
|
309 # else try to restore the original prefix from the namespace |
|
310 name = self._current_context[name[0]] + u":" + name[1] |
|
311 self._out.write(u'<' + name) |
|
312 |
|
313 for pair in self._undeclared_ns_maps: |
|
314 self._out.write(u' xmlns:%s="%s"' % pair) |
|
315 self._undeclared_ns_maps = [] |
|
316 |
|
317 for (name, value) in attrs.items(): |
|
318 name = self._current_context[name[0]] + ":" + name[1] |
|
319 self._out.write(' %s=%s' % (name, quoteattr(value))) |
|
320 self._out.write('>') |
|
321 self._currentXPath.append(name) |
|
322 |
|
323 def endElementNS(self, name, qname): |
|
324 |
|
325 self.flushCharacterBuffer() |
|
326 if name[0] is None: |
|
327 name = name[1] |
|
328 else: |
|
329 name = self._current_context[name[0]] + u":" + name[1] |
|
330 if self._lastStart: |
|
331 self._out.write(u'/>') |
|
332 self._lastStart = False |
|
333 else: |
|
334 self._out.write(u'</%s>' % name) |
|
335 self._currentXPath.pop() |
|
336 |
|
337 def characters(self, content): |
|
338 |
|
339 self.closeLastStart() |
|
340 self._characterBuffer.append(content) |
|
341 |
|
342 def ignorableWhitespace(self, content): |
|
343 |
|
344 self.closeLastStart() |
|
345 self.flushCharacterBuffer() |
|
346 self._out.write(content) |
|
347 |
|
348 def resolveEntity(self, publicId, systemId): |
|
349 |
|
350 loc, name = posixpath.split(systemId) |
|
351 if loc == u"http://www.w3.org/TR/xhtml1/DTD" or loc == u"": |
|
352 systemId = path.join(self._dtd, name) |
|
353 return EntityResolver.resolveEntity(self, publicId, systemId) |
|
354 |
|
355 def processingInstruction(self, target, data): |
|
356 |
|
357 self.closeLastStart() |
|
358 self.flushCharacterBuffer() |
|
359 func = getattr(self._func, target) |
|
360 args = {} |
|
361 for keyval in shlex.split(data.encode("utf-8")): |
|
362 key, val = keyval.split("=", 1) |
|
363 args[key] = val |
|
364 func(self, **args) |
|
365 |
|
366 def parseWithER(istream, handler): |
|
367 |
|
368 parser = makeParser() |
|
369 parser.setContentHandler(handler) |
|
370 parser.setEntityResolver(handler) |
|
371 parser.parse(istream) |
|
372 |
|
373 def main(): |
|
374 |
|
375 # parse command line |
|
376 cmdlineparser = optparse.OptionParser( |
|
377 usage = '%prog [options] [key=value]* src [dst]', |
|
378 conflict_handler = "error", |
|
379 description = '''Leightweight HTML page generation tool''', |
|
380 add_help_option = True, |
|
381 ) |
|
382 cmdlineparser.add_option("-s", "--srcroot", |
|
383 action="store", dest="srcroot", |
|
384 type="string", default=".", |
|
385 help="source tree root", metavar='location') |
|
386 cmdlineparser.add_option("-d", "--dstroot", |
|
387 action="store", dest="dstroot", |
|
388 type="string", default=".", |
|
389 help="destination tree root", metavar='location') |
|
390 cmdlineparser.add_option("-t", "--dtd", |
|
391 action="store", dest="dtd", |
|
392 type="string", default=".", |
|
393 help="local mirror of XHTML DTDs", metavar='location') |
|
394 cmdlineparser.add_option("-m", "--encodinghtml", |
|
395 action="store", dest="encodinghtml", |
|
396 type="string", default="", |
|
397 help="force value of html content encoding meta ", metavar='encoding') |
|
398 |
|
399 |
|
400 options, args = cmdlineparser.parse_args(sys.argv[1:]) |
|
401 |
|
402 # check source |
|
403 if len(args) < 1: |
|
404 cmdlineparser.error("Exactly one soure file must be given") |
|
405 |
|
406 # read arguments |
|
407 valdict = {} |
|
408 if len(args) == 1: |
|
409 src = args[0] |
|
410 dst = None |
|
411 else: |
|
412 if "=" in args[-2]: |
|
413 src = args[-1] |
|
414 dst = None |
|
415 vallist = args[:-1] |
|
416 else: |
|
417 src = args[-2] |
|
418 dst = args[-1] |
|
419 if dst == "-": |
|
420 dst = None |
|
421 vallist = args[:-2] |
|
422 for keyval in vallist: |
|
423 key, val = keyval.split("=", 1) |
|
424 valdict[unicode(key, 'latin-1')] = unicode(val, 'latin-1') |
|
425 |
|
426 # path calculator |
|
427 pc = PathCalculator(src, options.srcroot, options.dstroot) |
|
428 |
|
429 # function space |
|
430 modtime = os.stat(src).st_mtime |
|
431 func = Functions(pc, valdict, modtime, options.encodinghtml) |
|
432 |
|
433 # allocate file handles |
|
434 istream = open(src, 'r') |
|
435 if dst is not None: |
|
436 ostream = open(dst, 'wb') |
|
437 else: |
|
438 ostream = sys.stdout |
|
439 |
|
440 # process file |
|
441 transformer = TransformerHandler(ostream, outputEncoding, options.dtd, func) |
|
442 parseWithER(istream, transformer) |
|
443 |
|
444 # close handles |
|
445 ostream.close() |
|
446 istream.close() |
|
447 |
|
448 if __name__ == '__main__': |
|
449 main() |
|
450 |
|
451 __todo__ = ''' |
|
452 ''' |