# HG changeset patch # User haftmann # Date 1146579593 -7200 # Node ID fc4c6458d5698eb016c5bbcdd4e0016e9aec6b64 # Parent dae447f2b0b4442df5d6bd6e901a174bce49ab9c added obfuscation for mails diff -r dae447f2b0b4 -r fc4c6458d569 Admin/website/build/make_dep.bash --- a/Admin/website/build/make_dep.bash Tue May 02 14:27:49 2006 +0200 +++ b/Admin/website/build/make_dep.bash Tue May 02 16:19:53 2006 +0200 @@ -49,10 +49,11 @@ echo ' -$(TIDYCMD) $@' >> "$DEP_FILE" echo ' chmod $(TARGET_UMASK_FILE) $@' >> "$DEP_FILE" echo ' chgrp $(TARGET_GROUP) $@' >> "$DEP_FILE" - allhtml="$allhtml$outputfile "; \ + allhtml="$allhtml$outputfile " echo >> "$DEP_FILE" -done; \ +done echo "DEP_ALLHTML=$allhtml" >> "$DEP_FILE" echo >> "$DEP_FILE" echo 'allsite: $(DEP_ALLHTML) $(DEP_ALLSTATIC)' >> "$DEP_FILE" +echo ' $(PYTHON) build/obfusmail.py --dtd="dtd/" --dstroot="$(OUTPUTROOT)" --dstdir="img"' "$allhtml" >> "$DEP_FILE" echo ".PHONY: allsite" >> "$DEP_FILE" diff -r dae447f2b0b4 -r fc4c6458d569 Admin/website/build/obfusmail.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Admin/website/build/obfusmail.py Tue May 02 16:19:53 2006 +0200 @@ -0,0 +1,168 @@ +#!/usr/bin/env python +# -*- coding: Latin-1 -*- + +""" + Obfucatings mail adresses +""" + +__author__ = 'Florian Haftmann, florian.haftmann@informatik.tu-muenchen.de' +__revision__ = '$Id$' + +import sys +import os +from os import path +import posixpath +import optparse +from cStringIO import StringIO + +from xml.sax.saxutils import escape +from xml.sax.saxutils import quoteattr + +from xhtmlparse import TransformerHandler, parseWithER + +# global configuration +outputEncoding = 'UTF-8' + +class FindHandler(TransformerHandler): + + class DevZero(object): + + def write(self, s): + + pass + + def __init__(self, dtd, mails): + + super(FindHandler, self).__init__(self.DevZero(), outputEncoding, dtd) + self.pending_mail = None + self.mails = mails + + def startElement(self, name, attrs): + + if name == u'a': + href = attrs.get(u'href', u'') + if href.startswith(u'mailto:'): + self.pending_mail = href[7:] + super(FindHandler, self).startElement(name, attrs) + + def endElement(self, name): + + if name == u'a': + if self.pending_mail is not None: + if self.currentContent() != self.pending_mail: + raise Exception("Inconsistent mail address: '%s' vs. '%s'" % (self.currentContent(), self.pending_mail)) + self.mails[self.pending_mail] = True + self.pending_mail = None + super(FindHandler, self).endElement(name) + + def processingInstruction(self, target, data): + + pass + +class ReplaceHandler(TransformerHandler): + + def __init__(self, out, dtd, mails): + + super(ReplaceHandler, self).__init__(out, outputEncoding, dtd) + self.pending_mail = None + self.mails = mails + + def startElement(self, name, attrs): + + if name == u'a': + href = attrs.get(u'href', u'') + if href.startswith(u'mailto:'): + self.pending_mail = href[7:] + return + + super(ReplaceHandler, self).startElement(name, attrs) + + def endElement(self, name): + + if name == u'a': + if self.pending_mail is not None: + self.flushCharacterBuffer() + self._out.write(self.mails[self.pending_mail]) + self.pending_mail = None + return + + super(ReplaceHandler, self).endElement(name) + + def characters(self, content): + + if self.pending_mail is None: + super(ReplaceHandler, self).characters(content) + + def processingInstruction(self, target, data): + + pass + +def obfuscate(mailaddr, dsturl, dstfile): + + def mk_line(s): + return u"document.write('%s');" % s.replace("'", "\\'") + def mk_script(s): + return u'' % s + + name, host = mailaddr.split("@", 2) + imgname = (name + "_" + host).replace(".", "_"). replace("?", "_") + ".png" + imgfile = path.join(dstfile, imgname) + os.system("convert label:'%s' '%s'" % (mailaddr, imgfile)) + mailsimple = u"{%s} AT [%s]" % (name, host) + imgurl = posixpath.join(dsturl, imgname) + mailscript = u" ".join(map(mk_line, [''])); + mailimg = '%s' % (quoteattr(imgurl), quoteattr(mailsimple)) + + return (mk_script(mailscript) + mailimg + mk_script(mk_line(""))) + +def main(): + + # parse command line + cmdlineparser = optparse.OptionParser( + usage = '%prog [options] htmlfiles*', + conflict_handler = "error", + description = '''Protecting mail adresses in html files by obfuscating''', + add_help_option = True, + ) + cmdlineparser.add_option("-d", "--dstroot", + action="store", dest="dstroot", + type="string", default=".", + help="root destination of generated images", metavar='location') + cmdlineparser.add_option("-D", "--dstdir", + action="store", dest="dstdir", + type="string", default=".", + help="root destination of generated images", metavar='location') + cmdlineparser.add_option("-t", "--dtd", + action="store", dest="dtd", + type="string", default=".", + help="local mirror of XHTML DTDs", metavar='location') + + options, filenames = cmdlineparser.parse_args(sys.argv[1:]) + + # find mails + mails = {} + for filename in filenames: + istream = open(filename, 'r') + findhandler = FindHandler(options.dtd, mails) + parseWithER(istream, findhandler) + istream.close() + + # transform mails + mails_subst = {} + for mail in mails.keys(): + mails_subst[mail] = obfuscate(mail, options.dstdir, path.join(options.dstroot, options.dstdir)) + + # transform pages + for filename in filenames: + istream = StringIO(open(filename, 'r').read()) + ostream = open(filename, 'wb') + replacehandler = ReplaceHandler(ostream, options.dtd, mails_subst) + parseWithER(istream, replacehandler) + ostream.close() + istream.close() + +if __name__ == '__main__': + main() + +__todo__ = ''' +''' diff -r dae447f2b0b4 -r fc4c6458d569 Admin/website/build/project.mak --- a/Admin/website/build/project.mak Tue May 02 14:27:49 2006 +0200 +++ b/Admin/website/build/project.mak Tue May 02 16:19:53 2006 +0200 @@ -26,10 +26,10 @@ $(COPY) -vRud $ prefix dicts - self._current_context = self._ns_contexts[-1] - self._undeclared_ns_maps = [] - self._encoding = encoding - self._lastStart = False + super(FunctionsHandler, self).__init__(out, encoding, dtd) self._func = func - self._characterBuffer = {} - self._currentXPath = [] self._title = None - self._init = False - self._dtd = dtd - - def closeLastStart(self): - - if self._lastStart: - self._out.write(u'>') - self._lastStart = False - - def flushCharacterBuffer(self): - - content = escape(u"".join(self._characterBuffer)) - self._out.write(content) - self._characterBuffer = [] def transformAbsPath(self, attrs, attrname): @@ -300,42 +268,15 @@ else: return attrs - def startDocument(self): - - if not self._init: - if self._encoding.upper() != 'UTF-8': - self._out.write(u'\n' % - self._encoding) - else: - self._out.write(u'\n') - self._init = True - - def startPrefixMapping(self, prefix, uri): - - self._ns_contexts.append(self._current_context.copy()) - self._current_context[uri] = prefix - self._undeclared_ns_maps.append((prefix, uri)) - - def endPrefixMapping(self, prefix): - - self._current_context = self._ns_contexts[-1] - del self._ns_contexts[-1] - def startElement(self, name, attrs): if name == u"dummy:wrapper": return - self.closeLastStart() - self.flushCharacterBuffer() - self._out.write(u'<' + name) # this list is not exhaustive for tagname, attrname in ((u"a", u"href"), (u"img", u"src"), (u"link", u"href")): if name == tagname: attrs = self.transformAbsPath(attrs, attrname) - for (key, value) in attrs.items(): - self._out.write(u' %s=%s' % (key, quoteattr(value))) - self._currentXPath.append(name) - self._lastStart = True + super(FunctionsHandler, self).startElement(name, attrs) def endElement(self, name): @@ -343,67 +284,7 @@ return elif name == u'title': self._title = u"".join(self._characterBuffer) - self.flushCharacterBuffer() - if self._lastStart: - self._out.write(u'/>') - self._lastStart = False - else: - self._out.write('' % name) - self._currentXPath.pop() - - def startElementNS(self, name, qname, attrs): - - self.closeLastStart() - self.flushCharacterBuffer() - if name[0] is None: - # if the name was not namespace-scoped, use the unqualified part - name = name[1] - else: - # else try to restore the original prefix from the namespace - name = self._current_context[name[0]] + u":" + name[1] - self._out.write(u'<' + name) - - for pair in self._undeclared_ns_maps: - self._out.write(u' xmlns:%s="%s"' % pair) - self._undeclared_ns_maps = [] - - for (name, value) in attrs.items(): - name = self._current_context[name[0]] + ":" + name[1] - self._out.write(' %s=%s' % (name, quoteattr(value))) - self._out.write('>') - self._currentXPath.append(name) - - def endElementNS(self, name, qname): - - self.flushCharacterBuffer() - if name[0] is None: - name = name[1] - else: - name = self._current_context[name[0]] + u":" + name[1] - if self._lastStart: - self._out.write(u'/>') - self._lastStart = False - else: - self._out.write(u'' % name) - self._currentXPath.pop() - - def characters(self, content): - - self.closeLastStart() - self._characterBuffer.append(content) - - def ignorableWhitespace(self, content): - - self.closeLastStart() - self.flushCharacterBuffer() - self._out.write(content) - - def resolveEntity(self, publicId, systemId): - - loc, name = posixpath.split(systemId) - if loc == u"http://www.w3.org/TR/xhtml1/DTD" or loc == u"": - systemId = path.abspath(path.join(self._dtd, name)) - return EntityResolver.resolveEntity(self, publicId, systemId) + super(FunctionsHandler, self).endElement(name) def processingInstruction(self, target, data): @@ -416,12 +297,6 @@ args[key] = val func(self, **args) -def parseWithER(istream, handler): - - parser = makeParser() - parser.setContentHandler(handler) - parser.setEntityResolver(handler) - parser.parse(istream) def main(): @@ -491,7 +366,7 @@ # process file try: - transformer = TransformerHandler(ostream, outputEncoding, options.dtd, func) + transformer = FunctionsHandler(ostream, outputEncoding, options.dtd, func) parseWithER(istream, transformer) except Exception: if dst is not None: diff -r dae447f2b0b4 -r fc4c6458d569 Admin/website/build/xhtmlparse.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Admin/website/build/xhtmlparse.py Tue May 02 16:19:53 2006 +0200 @@ -0,0 +1,163 @@ +#!/usr/bin/env python +# -*- coding: Latin-1 -*- + +""" + Common services for parsing xhtml. +""" + +__all__ = ['TransformerHandler'] + +__author__ = 'Florian Haftmann, florian.haftmann@informatik.tu-muenchen.de' +__revision__ = '$Id$' + +from os import path +import codecs +import posixpath +from xml.sax.saxutils import escape +from xml.sax.saxutils import quoteattr +from xml.sax import make_parser as makeParser +from xml.sax.handler import ContentHandler +from xml.sax.handler import EntityResolver +from xml.sax.xmlreader import AttributesImpl as Attributes +from xml.sax import SAXException +from xml.sax import SAXParseException + +nbsp = unichr(160) + +class TransformerHandler(object, ContentHandler, EntityResolver): + + def __init__(self, out, encoding, dtd): + + ContentHandler.__init__(self) + self._out = codecs.getwriter(encoding)(out) + self._encoding = encoding + self._dtd = dtd + self._ns_contexts = [{}] # contains uri -> prefix dicts + self._current_context = self._ns_contexts[-1] + self._undeclared_ns_maps = [] + self._characterBuffer = {} + self._lastStart = False + self._currentXPath = [] + self._init = False + + def closeLastStart(self): + + if self._lastStart: + self._out.write(u'>') + self._lastStart = False + + def currentContent(self): + + return u"".join(self._characterBuffer) + + def flushCharacterBuffer(self): + + content = escape(self.currentContent()) + self._out.write(content) + self._characterBuffer = [] + + def startDocument(self): + + if not self._init: + if self._encoding.upper() != 'UTF-8': + self._out.write(u'\n' % + self._encoding) + else: + self._out.write(u'\n') + self._init = True + + def startPrefixMapping(self, prefix, uri): + + self._ns_contexts.append(self._current_context.copy()) + self._current_context[uri] = prefix + self._undeclared_ns_maps.append((prefix, uri)) + + def endPrefixMapping(self, prefix): + + self._current_context = self._ns_contexts[-1] + del self._ns_contexts[-1] + + def startElement(self, name, attrs): + + self.closeLastStart() + self.flushCharacterBuffer() + self._out.write(u'<' + name) + for (key, value) in attrs.items(): + self._out.write(u' %s=%s' % (key, quoteattr(value))) + self._currentXPath.append(name) + self._lastStart = True + + def endElement(self, name): + + self.flushCharacterBuffer() + if self._lastStart: + self._out.write(u'/>') + self._lastStart = False + else: + self._out.write('' % name) + self._currentXPath.pop() + + def startElementNS(self, name, qname, attrs): + + self.closeLastStart() + self.flushCharacterBuffer() + if name[0] is None: + # if the name was not namespace-scoped, use the unqualified part + name = name[1] + else: + # else try to restore the original prefix from the namespace + name = self._current_context[name[0]] + u":" + name[1] + self._out.write(u'<' + name) + + for pair in self._undeclared_ns_maps: + self._out.write(u' xmlns:%s="%s"' % pair) + self._undeclared_ns_maps = [] + + for (name, value) in attrs.items(): + name = self._current_context[name[0]] + ":" + name[1] + self._out.write(' %s=%s' % (name, quoteattr(value))) + self._out.write('>') + self._currentXPath.append(name) + + def endElementNS(self, name, qname): + + self.flushCharacterBuffer() + if name[0] is None: + name = name[1] + else: + name = self._current_context[name[0]] + u":" + name[1] + if self._lastStart: + self._out.write(u'/>') + self._lastStart = False + else: + self._out.write(u'' % name) + self._currentXPath.pop() + + def characters(self, content): + + self.closeLastStart() + self._characterBuffer.append(content) + + def ignorableWhitespace(self, content): + + self.closeLastStart() + self.flushCharacterBuffer() + self._out.write(content) + + def resolveEntity(self, publicId, systemId): + + loc, name = posixpath.split(systemId) + if loc == u"http://www.w3.org/TR/xhtml1/DTD" or loc == u"": + systemId = path.abspath(path.join(self._dtd, name)) + return EntityResolver.resolveEntity(self, publicId, systemId) + + def processingInstruction(self, target, data): + + raise Exception("no handler defined for processing instructions") + +def parseWithER(istream, handler): + + parser = makeParser() + parser.setContentHandler(handler) + parser.setEntityResolver(handler) + parser.parse(istream) diff -r dae447f2b0b4 -r fc4c6458d569 Admin/website/community.html --- a/Admin/website/community.html Tue May 02 14:27:49 2006 +0200 +++ b/Admin/website/community.html Tue May 02 16:19:53 2006 +0200 @@ -32,9 +32,10 @@

You may use the mailing list isabelle-users@cl.cam.ac.uk and its archive to discuss - problems and results. To subscribe, - contact our robot.

+ problems and results. + To subscribe, contact our robot: + Cl-isabelle-users-request@lists.cam.ac.uk?subject=subscribe. +

Contributing theorems

diff -r dae447f2b0b4 -r fc4c6458d569 Admin/website/documentation.html --- a/Admin/website/documentation.html Tue May 02 14:27:49 2006 +0200 +++ b/Admin/website/documentation.html Tue May 02 16:19:53 2006 +0200 @@ -34,9 +34,10 @@

You may use the mailing list isabelle-users@cl.cam.ac.uk and its archive to discuss - problems and results. To subscribe, - contact our robot.

+ problems and results. + To subscribe, contact our robot: + Cl-isabelle-users-request@lists.cam.ac.uk?subject=subscribe. +

Please consult the FAQ for answers to frequent problems.

diff -r dae447f2b0b4 -r fc4c6458d569 Admin/website/index.html --- a/Admin/website/index.html Tue May 02 14:27:49 2006 +0200 +++ b/Admin/website/index.html Tue May 02 16:19:53 2006 +0200 @@ -72,9 +72,9 @@ Use the mailing list isabelle-users@cl.cam.ac.uk and its archive to -discuss problems and results. To subscribe, - contact our robot. +discuss problems and results. + To subscribe, contact our robot: + Cl-isabelle-users-request@lists.cam.ac.uk?subject=subscribe.