156 handler.endElement(u"td") |
156 handler.endElement(u"td") |
157 handler.startElement(u"td", {}) |
157 handler.startElement(u"td", {}) |
158 handler.characters(u"%i%sKB" % (size / 1024, unichr(160))) |
158 handler.characters(u"%i%sKB" % (size / 1024, unichr(160))) |
159 handler.endElement(u"td") |
159 handler.endElement(u"td") |
160 |
160 |
|
161 def mirror(self, handler, **args): |
|
162 |
|
163 """<?mirror prefix="..." title="..."?> - generates a mirror switch link, |
|
164 where prefix denotes the base root url of the mirror location |
|
165 and title the visible description""" |
|
166 |
|
167 prefix = args[u"prefix"] |
|
168 title = args[u"title"] |
|
169 handler.startElement(u"a", {u"href": posixpath.join(prefix, self._pc.relLocOfThis())}) |
|
170 handler.characters(title) |
|
171 handler.endElement(u"a") |
|
172 |
161 def getPc(self): |
173 def getPc(self): |
162 |
174 |
163 return self._pc |
175 return self._pc |
164 |
176 |
165 # a notion of paths |
177 # a notion of paths |
166 class PathCalculator: |
178 class PathCalculator: |
167 |
179 |
168 def __init__(self, srcLoc, srcRoot, dstRoot): |
180 def __init__(self, srcLoc, srcRoot, dstRoot): |
169 |
181 |
170 self._src = path.normpath(path.abspath(srcLoc)) |
182 self._src = path.normpath(path.abspath(srcLoc)) |
171 srcPath, srcName = path.split(self._src) |
183 srcPath, self._srcName = path.split(self._src) |
172 self._srcRoot = path.normpath(path.abspath(srcRoot)) |
184 self._srcRoot = path.normpath(path.abspath(srcRoot)) |
173 self._dstRoot = path.normpath(path.abspath(dstRoot)) |
185 self._dstRoot = path.normpath(path.abspath(dstRoot)) |
174 self._relRoot = "" |
186 self._relRoot = "" |
175 relLocChain = [] |
187 relLocChain = [] |
176 diffRoot = srcPath |
188 diffRoot = srcPath |
215 loc = self.absDstPathOf(loc) |
227 loc = self.absDstPathOf(loc) |
216 loc = self.stripCommonPrefix(loc, self._dstRoot) |
228 loc = self.stripCommonPrefix(loc, self._dstRoot) |
217 loc = self.stripCommonPrefix(loc, self._relLoc) |
229 loc = self.stripCommonPrefix(loc, self._relLoc) |
218 return loc |
230 return loc |
219 |
231 |
|
232 def relLocOfThis(self): |
|
233 |
|
234 return posixpath.join(self._relLoc, self._srcName) |
|
235 |
220 def stripCommonPrefix(self, loc, prefix): |
236 def stripCommonPrefix(self, loc, prefix): |
221 |
237 |
222 common = self.commonPrefix((loc, prefix)) |
238 common = self.commonPrefix((loc, prefix)) |
223 if common: |
239 if common: |
224 loc = loc[len(common):] |
240 loc = loc[len(common):] |
239 return common or "" |
255 return common or "" |
240 |
256 |
241 # the XML transformer |
257 # the XML transformer |
242 class TransformerHandler(ContentHandler, EntityResolver): |
258 class TransformerHandler(ContentHandler, EntityResolver): |
243 |
259 |
244 def __init__(self, out, encoding, dtd, func): |
260 def __init__(self, out, encoding, dtd, func, spamprotect): |
245 |
261 |
246 ContentHandler.__init__(self) |
262 ContentHandler.__init__(self) |
247 #~ EntityResolver.__init__(self) |
263 #~ EntityResolver.__init__(self) |
248 self._out = codecs.getwriter(encoding)(out) |
264 self._out = codecs.getwriter(encoding)(out) |
249 self._ns_contexts = [{}] # contains uri -> prefix dicts |
265 self._ns_contexts = [{}] # contains uri -> prefix dicts |
250 self._current_context = self._ns_contexts[-1] |
266 self._current_context = self._ns_contexts[-1] |
251 self._undeclared_ns_maps = [] |
267 self._undeclared_ns_maps = [] |
252 self._encoding = encoding |
268 self._encoding = encoding |
253 self._lastStart = False |
269 self._lastStart = False |
254 self._func = func |
270 self._func = func |
|
271 self._spamprotect = spamprotect |
255 self._characterBuffer = {} |
272 self._characterBuffer = {} |
256 self._currentXPath = [] |
273 self._currentXPath = [] |
257 self._title = None |
274 self._title = None |
258 self._init = False |
275 self._init = False |
259 self._dtd = dtd |
276 self._dtd = dtd |
264 self._out.write(u'>') |
281 self._out.write(u'>') |
265 self._lastStart = False |
282 self._lastStart = False |
266 |
283 |
267 def flushCharacterBuffer(self): |
284 def flushCharacterBuffer(self): |
268 |
285 |
269 self._out.write(escape(u"".join(self._characterBuffer))) |
286 self._out.write(escape(u"".join(self._characterBuffer)).replace(u"@", u"@")) |
270 self._characterBuffer = [] |
287 self._characterBuffer = [] |
271 |
288 |
272 def transformAbsPath(self, attrs, attrname): |
289 def transformAbsPath(self, attrs, attrname): |
273 |
290 |
274 pathval = attrs.get(attrname, None) |
291 pathval = attrs.get(attrname, None) |
313 self._out.write(u'<' + name) |
330 self._out.write(u'<' + name) |
314 # this list is not exhaustive |
331 # this list is not exhaustive |
315 for tagname, attrname in ((u"a", u"href"), (u"img", u"src"), (u"link", u"href")): |
332 for tagname, attrname in ((u"a", u"href"), (u"img", u"src"), (u"link", u"href")): |
316 if name == tagname: |
333 if name == tagname: |
317 attrs = self.transformAbsPath(attrs, attrname) |
334 attrs = self.transformAbsPath(attrs, attrname) |
318 for (name, value) in attrs.items(): |
335 if self.spamprotect and name = u"a": |
319 self._out.write(u' %s=%s' % (name, quoteattr(value))) |
336 value = attrs.get(u"href") |
320 self._currentXPath.append(name) |
337 if value and value.startswith(u"mailto:"): |
|
338 attrs = dict(attrs) |
|
339 attrs[u"href"] = "".join([ ("&#%i;" % ord(c)) for c in value ]) |
|
340 for (key, value) in attrs.items(): |
|
341 self._out.write(u' %s=%s' % (key, quoteattr(value))) |
|
342 self._currentXPath.append(key) |
321 self._lastStart = True |
343 self._lastStart = True |
322 |
344 |
323 def endElement(self, name): |
345 def endElement(self, name): |
324 |
346 |
325 if name == u"dummy:wrapper": |
347 if name == u"dummy:wrapper": |
428 type="string", default=".", |
450 type="string", default=".", |
429 help="local mirror of XHTML DTDs", metavar='location') |
451 help="local mirror of XHTML DTDs", metavar='location') |
430 cmdlineparser.add_option("-m", "--encodinghtml", |
452 cmdlineparser.add_option("-m", "--encodinghtml", |
431 action="store", dest="encodinghtml", |
453 action="store", dest="encodinghtml", |
432 type="string", default="", |
454 type="string", default="", |
433 help="force value of html content encoding meta ", metavar='encoding') |
455 help="force value of html content encoding meta tag", metavar='encoding') |
434 |
456 cmdlineparser.add_option("-s", "--spamprotect", |
|
457 action="store_true", dest="spamprotect", |
|
458 help="rewrite mailto-links using entities") |
435 |
459 |
436 options, args = cmdlineparser.parse_args(sys.argv[1:]) |
460 options, args = cmdlineparser.parse_args(sys.argv[1:]) |
437 |
461 |
438 # check source |
462 # check source |
439 if len(args) < 1: |
463 if len(args) < 1: |
472 ostream = open(dst, 'wb') |
496 ostream = open(dst, 'wb') |
473 else: |
497 else: |
474 ostream = sys.stdout |
498 ostream = sys.stdout |
475 |
499 |
476 # process file |
500 # process file |
477 transformer = TransformerHandler(ostream, outputEncoding, options.dtd, func) |
501 transformer = TransformerHandler(ostream, outputEncoding, options.dtd, func, options.spamprotect) |
478 parseWithER(istream, transformer) |
502 parseWithER(istream, transformer) |
479 |
503 |
480 # close handles |
504 # close handles |
481 ostream.close() |
505 ostream.close() |
482 istream.close() |
506 istream.close() |