added obfuscation for mails
authorhaftmann
Tue, 02 May 2006 16:19:53 +0200
changeset 19533 fc4c6458d569
parent 19532 dae447f2b0b4
child 19534 1724ec4032c5
added obfuscation for mails
Admin/website/build/make_dep.bash
Admin/website/build/obfusmail.py
Admin/website/build/project.mak
Admin/website/build/pypager.py
Admin/website/build/xhtmlparse.py
Admin/website/community.html
Admin/website/documentation.html
Admin/website/index.html
--- a/Admin/website/build/make_dep.bash	Tue May 02 14:27:49 2006 +0200
+++ b/Admin/website/build/make_dep.bash	Tue May 02 16:19:53 2006 +0200
@@ -49,10 +49,11 @@
     echo '	-$(TIDYCMD) $@' >> "$DEP_FILE"
     echo '	chmod $(TARGET_UMASK_FILE) $@' >> "$DEP_FILE"
     echo '	chgrp $(TARGET_GROUP) $@' >> "$DEP_FILE"
-    allhtml="$allhtml$outputfile "; \
+    allhtml="$allhtml$outputfile "
     echo >> "$DEP_FILE"
-done; \
+done
 echo "DEP_ALLHTML=$allhtml" >> "$DEP_FILE"
 echo >> "$DEP_FILE"
 echo 'allsite: $(DEP_ALLHTML) $(DEP_ALLSTATIC)' >> "$DEP_FILE"
+echo '	$(PYTHON) build/obfusmail.py --dtd="dtd/" --dstroot="$(OUTPUTROOT)" --dstdir="img"' "$allhtml" >> "$DEP_FILE"
 echo ".PHONY: allsite" >> "$DEP_FILE"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Admin/website/build/obfusmail.py	Tue May 02 16:19:53 2006 +0200
@@ -0,0 +1,168 @@
+#!/usr/bin/env python
+# -*- coding: Latin-1 -*-
+
+"""
+    Obfucatings mail adresses
+"""
+
+__author__ = 'Florian Haftmann, florian.haftmann@informatik.tu-muenchen.de'
+__revision__ = '$Id$'
+
+import sys
+import os
+from os import path
+import posixpath
+import optparse
+from cStringIO import StringIO
+
+from xml.sax.saxutils import escape
+from xml.sax.saxutils import quoteattr
+
+from xhtmlparse import TransformerHandler, parseWithER
+
+# global configuration
+outputEncoding = 'UTF-8'
+
+class FindHandler(TransformerHandler):
+
+    class DevZero(object):
+
+        def write(self, s):
+
+            pass
+
+    def __init__(self, dtd, mails):
+
+        super(FindHandler, self).__init__(self.DevZero(), outputEncoding, dtd)
+        self.pending_mail = None
+        self.mails = mails
+
+    def startElement(self, name, attrs):
+
+        if name == u'a':
+            href = attrs.get(u'href', u'')
+            if href.startswith(u'mailto:'):
+                self.pending_mail = href[7:]
+        super(FindHandler, self).startElement(name, attrs)
+
+    def endElement(self, name):
+
+        if name == u'a':
+            if self.pending_mail is not None:
+                if self.currentContent() != self.pending_mail:
+                    raise Exception("Inconsistent mail address: '%s' vs. '%s'" % (self.currentContent(), self.pending_mail))
+                self.mails[self.pending_mail] = True
+                self.pending_mail = None
+        super(FindHandler, self).endElement(name)
+
+    def processingInstruction(self, target, data):
+
+        pass
+
+class ReplaceHandler(TransformerHandler):
+
+    def __init__(self, out, dtd, mails):
+
+        super(ReplaceHandler, self).__init__(out, outputEncoding, dtd)
+        self.pending_mail = None
+        self.mails = mails
+
+    def startElement(self, name, attrs):
+
+        if name == u'a':
+            href = attrs.get(u'href', u'')
+            if href.startswith(u'mailto:'):
+                self.pending_mail = href[7:]
+                return
+
+        super(ReplaceHandler, self).startElement(name, attrs)
+
+    def endElement(self, name):
+
+        if name == u'a':
+            if self.pending_mail is not None:
+                self.flushCharacterBuffer()
+                self._out.write(self.mails[self.pending_mail])
+                self.pending_mail = None
+                return
+
+        super(ReplaceHandler, self).endElement(name)
+
+    def characters(self, content):
+
+        if self.pending_mail is None:
+            super(ReplaceHandler, self).characters(content)
+
+    def processingInstruction(self, target, data):
+
+        pass
+
+def obfuscate(mailaddr, dsturl, dstfile):
+
+    def mk_line(s):
+        return u"document.write('%s');" % s.replace("'", "\\'")
+    def mk_script(s):
+        return u'<script type="text/javascript">%s</script>' % s
+
+    name, host = mailaddr.split("@", 2)
+    imgname = (name + "_" + host).replace(".", "_"). replace("?", "_") + ".png"
+    imgfile = path.join(dstfile, imgname)
+    os.system("convert label:'%s' '%s'" % (mailaddr, imgfile))
+    mailsimple = u"{%s} AT [%s]" % (name, host)
+    imgurl = posixpath.join(dsturl, imgname)
+    mailscript = u" ".join(map(mk_line, ['<a href="', "mailto:", name, "@", host, '">']));
+    mailimg = '<img src=%s style="vertical-align:middle" alt=%s />' % (quoteattr(imgurl), quoteattr(mailsimple))
+
+    return (mk_script(mailscript) + mailimg + mk_script(mk_line("</a>")))
+
+def main():
+
+    # parse command line
+    cmdlineparser = optparse.OptionParser(
+        usage = '%prog [options] htmlfiles*',
+        conflict_handler = "error",
+        description = '''Protecting mail adresses in html files by obfuscating''',
+        add_help_option = True,
+    )
+    cmdlineparser.add_option("-d", "--dstroot",
+        action="store", dest="dstroot",
+        type="string", default=".",
+        help="root destination of generated images", metavar='location')
+    cmdlineparser.add_option("-D", "--dstdir",
+        action="store", dest="dstdir",
+        type="string", default=".",
+        help="root destination of generated images", metavar='location')
+    cmdlineparser.add_option("-t", "--dtd",
+        action="store", dest="dtd",
+        type="string", default=".",
+        help="local mirror of XHTML DTDs", metavar='location')
+
+    options, filenames = cmdlineparser.parse_args(sys.argv[1:])
+
+    # find mails
+    mails = {}
+    for filename in filenames:
+        istream = open(filename, 'r')
+        findhandler = FindHandler(options.dtd, mails)
+        parseWithER(istream, findhandler)
+        istream.close()
+
+    # transform mails
+    mails_subst = {}
+    for mail in mails.keys():
+        mails_subst[mail] = obfuscate(mail, options.dstdir, path.join(options.dstroot, options.dstdir))
+
+    # transform pages
+    for filename in filenames:
+        istream = StringIO(open(filename, 'r').read())
+        ostream = open(filename, 'wb')
+        replacehandler = ReplaceHandler(ostream, options.dtd, mails_subst)
+        parseWithER(istream, replacehandler)
+        ostream.close()
+        istream.close()
+
+if __name__ == '__main__':
+    main()
+
+__todo__ = '''
+'''
--- a/Admin/website/build/project.mak	Tue May 02 14:27:49 2006 +0200
+++ b/Admin/website/build/project.mak	Tue May 02 16:19:53 2006 +0200
@@ -26,10 +26,10 @@
 	$(COPY) -vRud $</[^w]* $@
 	-chgrp -hR $(TARGET_GROUP) $@
 	-chmod -R u+w,g-w,o-w $@
-	-[ ! -e $@/Isabelle ] && ln -s $(ISABELLE_DIST)/$(DISTNAME) $@/Isabelle
+	rm -f $@/Isabelle && ln -s $(ISABELLE_DIST)/$(DISTNAME) $@/Isabelle
 	-chgrp -h $(TARGET_GROUP) $@/Isabelle
 	-chmod u+w,g-w,o-w $@/Isabelle
-	( cd $(OUTPUTROOT); [ ! -e dist ] && ln -s $(OUTPUTDIST_REL) dist || true)
+	( cd $(OUTPUTROOT) && rm -f dist && ln -s $(OUTPUTDIST_REL) dist)
 
 else
 
@@ -38,10 +38,10 @@
 	$(RSYNC) -v --exclude='/website/' -rlt --delete --delete-after $</ $@
 	-chgrp -hR $(TARGET_GROUP) $@
 	-chmod -R u+w,g-w,o-w $@
-	-[ ! -e $@/Isabelle ] && ln -s $(ISABELLE_DIST)/$(DISTNAME) $@/Isabelle
+	rm -f $@/Isabelle &&  ln -s $(ISABELLE_DIST)/$(DISTNAME) $@/Isabelle
 	-chgrp -h $(TARGET_GROUP) $@/Isabelle
 	-chmod u+w,g-w,o-w $@/Isabelle
-	( cd $(OUTPUTROOT); [ ! -e dist ] && ln -s $(OUTPUTDIST_REL) dist || true)
+	( cd $(OUTPUTROOT) && rm -f dist && ln -s $(OUTPUTDIST_REL) dist)
 
 SYNC_ALWAYS:
 
--- a/Admin/website/build/pypager.py	Tue May 02 14:27:49 2006 +0200
+++ b/Admin/website/build/pypager.py	Tue May 02 16:19:53 2006 +0200
@@ -13,20 +13,12 @@
 import os
 from os import path
 import posixpath
-import codecs
 import shlex
 import optparse
 import time
 
-# xml imports
-from xml.sax.saxutils import escape
-from xml.sax.saxutils import quoteattr
-from xml.sax import make_parser as makeParser
-from xml.sax.handler import ContentHandler
-from xml.sax.handler import EntityResolver
-from xml.sax.xmlreader import AttributesImpl as Attributes
-from xml.sax import SAXException
-from xml.sax import SAXParseException
+# xhtml parsing
+from xhtmlparse import TransformerHandler, parseWithER
 
 nbsp = unichr(160)
 
@@ -34,7 +26,7 @@
 outputEncoding = 'UTF-8'
 
 # implement your own functions for PIs here
-class Functions:
+class Functions(object):
 
     def __init__(self, pc, valdict, modtime, encodingMeta):
 
@@ -254,37 +246,13 @@
 
         return common or ""
 
-# the XML transformer
-class TransformerHandler(ContentHandler, EntityResolver):
+class FunctionsHandler(TransformerHandler):
 
     def __init__(self, out, encoding, dtd, func):
 
-        ContentHandler.__init__(self)
-        #~ EntityResolver.__init__(self)
-        self._out = codecs.getwriter(encoding)(out)
-        self._ns_contexts = [{}] # contains uri -> prefix dicts
-        self._current_context = self._ns_contexts[-1]
-        self._undeclared_ns_maps = []
-        self._encoding = encoding
-        self._lastStart = False
+        super(FunctionsHandler, self).__init__(out, encoding, dtd)
         self._func = func
-        self._characterBuffer = {}
-        self._currentXPath = []
         self._title = None
-        self._init = False
-        self._dtd = dtd
-
-    def closeLastStart(self):
-
-        if self._lastStart:
-            self._out.write(u'>')
-            self._lastStart = False
-
-    def flushCharacterBuffer(self):
-
-        content = escape(u"".join(self._characterBuffer))
-        self._out.write(content)
-        self._characterBuffer = []
 
     def transformAbsPath(self, attrs, attrname):
 
@@ -300,42 +268,15 @@
         else:
             return attrs
 
-    def startDocument(self):
-
-        if not self._init:
-            if self._encoding.upper() != 'UTF-8':
-                self._out.write(u'<?xml version="1.0" encoding="%s"?>\n' %
-                                self._encoding)
-            else:
-                self._out.write(u'<?xml version="1.0"?>\n')
-            self._init = True
-
-    def startPrefixMapping(self, prefix, uri):
-
-        self._ns_contexts.append(self._current_context.copy())
-        self._current_context[uri] = prefix
-        self._undeclared_ns_maps.append((prefix, uri))
-
-    def endPrefixMapping(self, prefix):
-
-        self._current_context = self._ns_contexts[-1]
-        del self._ns_contexts[-1]
-
     def startElement(self, name, attrs):
 
         if name == u"dummy:wrapper":
             return
-        self.closeLastStart()
-        self.flushCharacterBuffer()
-        self._out.write(u'<' + name)
         # this list is not exhaustive
         for tagname, attrname in ((u"a", u"href"), (u"img", u"src"), (u"link", u"href")):
             if name == tagname:
                 attrs = self.transformAbsPath(attrs, attrname)
-        for (key, value) in attrs.items():
-            self._out.write(u' %s=%s' % (key, quoteattr(value)))
-        self._currentXPath.append(name)
-        self._lastStart = True
+        super(FunctionsHandler, self).startElement(name, attrs)
 
     def endElement(self, name):
 
@@ -343,67 +284,7 @@
             return
         elif name == u'title':
             self._title = u"".join(self._characterBuffer)
-        self.flushCharacterBuffer()
-        if self._lastStart:
-            self._out.write(u'/>')
-            self._lastStart = False
-        else:
-            self._out.write('</%s>' % name)
-        self._currentXPath.pop()
-
-    def startElementNS(self, name, qname, attrs):
-
-        self.closeLastStart()
-        self.flushCharacterBuffer()
-        if name[0] is None:
-            # if the name was not namespace-scoped, use the unqualified part
-            name = name[1]
-        else:
-            # else try to restore the original prefix from the namespace
-            name = self._current_context[name[0]] + u":" + name[1]
-        self._out.write(u'<' + name)
-
-        for pair in self._undeclared_ns_maps:
-            self._out.write(u' xmlns:%s="%s"' % pair)
-        self._undeclared_ns_maps = []
-
-        for (name, value) in attrs.items():
-            name = self._current_context[name[0]] + ":" + name[1]
-            self._out.write(' %s=%s' % (name, quoteattr(value)))
-        self._out.write('>')
-        self._currentXPath.append(name)
-
-    def endElementNS(self, name, qname):
-
-        self.flushCharacterBuffer()
-        if name[0] is None:
-            name = name[1]
-        else:
-            name = self._current_context[name[0]] + u":" + name[1]
-        if self._lastStart:
-            self._out.write(u'/>')
-            self._lastStart = False
-        else:
-            self._out.write(u'</%s>' % name)
-        self._currentXPath.pop()
-
-    def characters(self, content):
-
-        self.closeLastStart()
-        self._characterBuffer.append(content)
-
-    def ignorableWhitespace(self, content):
-
-        self.closeLastStart()
-        self.flushCharacterBuffer()
-        self._out.write(content)
-
-    def resolveEntity(self, publicId, systemId):
-
-        loc, name = posixpath.split(systemId)
-        if loc == u"http://www.w3.org/TR/xhtml1/DTD" or loc == u"":
-            systemId = path.abspath(path.join(self._dtd, name))
-        return EntityResolver.resolveEntity(self, publicId, systemId)
+        super(FunctionsHandler, self).endElement(name)
 
     def processingInstruction(self, target, data):
 
@@ -416,12 +297,6 @@
             args[key] = val
         func(self, **args)
 
-def parseWithER(istream, handler):
-
-    parser = makeParser()
-    parser.setContentHandler(handler)
-    parser.setEntityResolver(handler)
-    parser.parse(istream)
 
 def main():
 
@@ -491,7 +366,7 @@
 
     # process file
     try:
-        transformer = TransformerHandler(ostream, outputEncoding, options.dtd, func)
+        transformer = FunctionsHandler(ostream, outputEncoding, options.dtd, func)
         parseWithER(istream, transformer)
     except Exception:
         if dst is not None:
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Admin/website/build/xhtmlparse.py	Tue May 02 16:19:53 2006 +0200
@@ -0,0 +1,163 @@
+#!/usr/bin/env python
+# -*- coding: Latin-1 -*-
+
+"""
+    Common services for parsing xhtml.
+"""
+
+__all__ = ['TransformerHandler']
+
+__author__ = 'Florian Haftmann, florian.haftmann@informatik.tu-muenchen.de'
+__revision__ = '$Id$'
+
+from os import path
+import codecs
+import posixpath
+from xml.sax.saxutils import escape
+from xml.sax.saxutils import quoteattr
+from xml.sax import make_parser as makeParser
+from xml.sax.handler import ContentHandler
+from xml.sax.handler import EntityResolver
+from xml.sax.xmlreader import AttributesImpl as Attributes
+from xml.sax import SAXException
+from xml.sax import SAXParseException
+
+nbsp = unichr(160)
+
+class TransformerHandler(object, ContentHandler, EntityResolver):
+
+    def __init__(self, out, encoding, dtd):
+
+        ContentHandler.__init__(self)
+        self._out = codecs.getwriter(encoding)(out)
+        self._encoding = encoding
+        self._dtd = dtd
+        self._ns_contexts = [{}] # contains uri -> prefix dicts
+        self._current_context = self._ns_contexts[-1]
+        self._undeclared_ns_maps = []
+        self._characterBuffer = {}
+        self._lastStart = False
+        self._currentXPath = []
+        self._init = False
+
+    def closeLastStart(self):
+
+        if self._lastStart:
+            self._out.write(u'>')
+            self._lastStart = False
+
+    def currentContent(self):
+
+        return u"".join(self._characterBuffer)
+
+    def flushCharacterBuffer(self):
+
+        content = escape(self.currentContent())
+        self._out.write(content)
+        self._characterBuffer = []
+
+    def startDocument(self):
+
+        if not self._init:
+            if self._encoding.upper() != 'UTF-8':
+                self._out.write(u'<?xml version="1.0" encoding="%s"?>\n' %
+                                self._encoding)
+            else:
+                self._out.write(u'<?xml version="1.0"?>\n')
+            self._init = True
+
+    def startPrefixMapping(self, prefix, uri):
+
+        self._ns_contexts.append(self._current_context.copy())
+        self._current_context[uri] = prefix
+        self._undeclared_ns_maps.append((prefix, uri))
+
+    def endPrefixMapping(self, prefix):
+
+        self._current_context = self._ns_contexts[-1]
+        del self._ns_contexts[-1]
+
+    def startElement(self, name, attrs):
+
+        self.closeLastStart()
+        self.flushCharacterBuffer()
+        self._out.write(u'<' + name)
+        for (key, value) in attrs.items():
+            self._out.write(u' %s=%s' % (key, quoteattr(value)))
+        self._currentXPath.append(name)
+        self._lastStart = True
+
+    def endElement(self, name):
+
+        self.flushCharacterBuffer()
+        if self._lastStart:
+            self._out.write(u'/>')
+            self._lastStart = False
+        else:
+            self._out.write('</%s>' % name)
+        self._currentXPath.pop()
+
+    def startElementNS(self, name, qname, attrs):
+
+        self.closeLastStart()
+        self.flushCharacterBuffer()
+        if name[0] is None:
+            # if the name was not namespace-scoped, use the unqualified part
+            name = name[1]
+        else:
+            # else try to restore the original prefix from the namespace
+            name = self._current_context[name[0]] + u":" + name[1]
+        self._out.write(u'<' + name)
+
+        for pair in self._undeclared_ns_maps:
+            self._out.write(u' xmlns:%s="%s"' % pair)
+        self._undeclared_ns_maps = []
+
+        for (name, value) in attrs.items():
+            name = self._current_context[name[0]] + ":" + name[1]
+            self._out.write(' %s=%s' % (name, quoteattr(value)))
+        self._out.write('>')
+        self._currentXPath.append(name)
+
+    def endElementNS(self, name, qname):
+
+        self.flushCharacterBuffer()
+        if name[0] is None:
+            name = name[1]
+        else:
+            name = self._current_context[name[0]] + u":" + name[1]
+        if self._lastStart:
+            self._out.write(u'/>')
+            self._lastStart = False
+        else:
+            self._out.write(u'</%s>' % name)
+        self._currentXPath.pop()
+
+    def characters(self, content):
+
+        self.closeLastStart()
+        self._characterBuffer.append(content)
+
+    def ignorableWhitespace(self, content):
+
+        self.closeLastStart()
+        self.flushCharacterBuffer()
+        self._out.write(content)
+
+    def resolveEntity(self, publicId, systemId):
+
+        loc, name = posixpath.split(systemId)
+        if loc == u"http://www.w3.org/TR/xhtml1/DTD" or loc == u"":
+            systemId = path.abspath(path.join(self._dtd, name))
+        return EntityResolver.resolveEntity(self, publicId, systemId)
+
+    def processingInstruction(self, target, data):
+
+        raise Exception("no handler defined for processing instructions")
+
+def parseWithER(istream, handler):
+
+    parser = makeParser()
+    parser.setContentHandler(handler)
+    parser.setEntityResolver(handler)
+    parser.parse(istream)
--- a/Admin/website/community.html	Tue May 02 14:27:49 2006 +0200
+++ b/Admin/website/community.html	Tue May 02 16:19:53 2006 +0200
@@ -32,9 +32,10 @@
       <p>You may use the mailing list <a href=
           "mailto:isabelle-users@cl.cam.ac.uk">isabelle-users@cl.cam.ac.uk</a> and its
           <a href="https://lists.cam.ac.uk/pipermail/cl-isabelle-users/index.html">archive</a> to discuss
-          problems and results. To subscribe, <a href=
-          "mailto:Cl-isabelle-users-request@lists.cam.ac.uk?subject=subscribe">
-          contact our robot</a>.</p>
+          problems and results.
+          To subscribe, contact our robot:
+          <a href="mailto:Cl-isabelle-users-request@lists.cam.ac.uk?subject=subscribe">Cl-isabelle-users-request@lists.cam.ac.uk?subject=subscribe</a>.
+    </p>
 
       <h2>Contributing theorems</h2>
 
--- a/Admin/website/documentation.html	Tue May 02 14:27:49 2006 +0200
+++ b/Admin/website/documentation.html	Tue May 02 16:19:53 2006 +0200
@@ -34,9 +34,10 @@
           <p>You may use the mailing list <a href=
           "mailto:isabelle-users@cl.cam.ac.uk">isabelle-users@cl.cam.ac.uk</a> and its
           <a href="https://lists.cam.ac.uk/pipermail/cl-isabelle-users/index.html">archive</a> to discuss
-          problems and results. To subscribe, <a href=
-          "mailto:Cl-isabelle-users-request@lists.cam.ac.uk?subject=subscribe">
-          contact our robot</a>.</p>
+          problems and results.
+                    To subscribe, contact our robot:
+          <a href="mailto:Cl-isabelle-users-request@lists.cam.ac.uk?subject=subscribe">Cl-isabelle-users-request@lists.cam.ac.uk?subject=subscribe</a>.
+            </p>
         <p>Please consult the <a href="http://isabelle.in.tum.de/faq.html">FAQ</a> for answers to frequent
         problems.</p>
 
--- a/Admin/website/index.html	Tue May 02 14:27:49 2006 +0200
+++ b/Admin/website/index.html	Tue May 02 16:19:53 2006 +0200
@@ -72,9 +72,9 @@
 Use the mailing list <a href=
           "mailto:isabelle-users@cl.cam.ac.uk">isabelle-users@cl.cam.ac.uk</a> and its
           <a href="https://lists.cam.ac.uk/pipermail/cl-isabelle-users/index.html">archive</a> to
-discuss problems and results.  To subscribe, <a href=
-          "mailto:Cl-isabelle-users-request@lists.cam.ac.uk?subject=subscribe">
-          contact our robot</a>.
+discuss problems and results.
+      To subscribe, contact our robot:
+      <a href="mailto:Cl-isabelle-users-request@lists.cam.ac.uk?subject=subscribe">Cl-isabelle-users-request@lists.cam.ac.uk?subject=subscribe</a>.
 </p>
 
     </div>