Admin/website/build/obfusmail.py
author haftmann
Wed, 03 May 2006 17:41:28 +0200
changeset 19554 bc0bef4a124e
parent 19552 273d2c9866fd
child 19555 7938d8e0c52d
permissions -rw-r--r--
added world map
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
     1
#!/usr/bin/env python
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
     2
# -*- coding: Latin-1 -*-
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
     3
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
     4
"""
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
     5
    Obfucatings mail adresses
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
     6
"""
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
     7
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
     8
__author__ = 'Florian Haftmann, florian.haftmann@informatik.tu-muenchen.de'
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
     9
__revision__ = '$Id$'
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    10
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    11
import sys
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    12
import os
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    13
from os import path
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    14
import posixpath
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    15
import optparse
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    16
from cStringIO import StringIO
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    17
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    18
from xml.sax.saxutils import escape
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    19
from xml.sax.saxutils import quoteattr
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    20
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    21
from xhtmlparse import TransformerHandler, parseWithER
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    22
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    23
# global configuration
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    24
outputEncoding = 'UTF-8'
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    25
19554
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    26
def split_mail(mail):
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    27
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    28
    mail_arg = mail.split("?", 2)
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    29
    if len(mail_arg) == 2:
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    30
        mail, arg = mail_arg
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    31
    else:
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    32
        mail = mail_arg[0]
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    33
        arg = None
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    34
    name, host = mail.split("@", 2) 
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    35
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    36
    return ((name, host), arg)
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    37
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    38
class FindHandler(TransformerHandler):
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    39
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    40
    class DevZero(object):
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    41
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    42
        def write(self, s):
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    43
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    44
            pass
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    45
19554
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    46
    def __init__(self, dtd, filename, mails, encs):
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    47
19554
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    48
        super(FindHandler, self).__init__(self.DevZero(), 'UTF-8', dtd)
19552
273d2c9866fd improvments in mail obfuscator
haftmann
parents: 19533
diff changeset
    49
        self.filename = filename
273d2c9866fd improvments in mail obfuscator
haftmann
parents: 19533
diff changeset
    50
        self.mails = mails
19554
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    51
        self.encs = encs
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    52
        self.pending_mail = None
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    53
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    54
    def startElement(self, name, attrs):
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    55
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    56
        if name == u'a':
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    57
            href = attrs.get(u'href', u'')
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    58
            if href.startswith(u'mailto:'):
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    59
                self.pending_mail = href[7:]
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    60
        super(FindHandler, self).startElement(name, attrs)
19554
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    61
        if name == u'meta' and attrs.get(u'http-equiv', u'').lower() == u'content-type':
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    62
            content = attrs.get(u'content', u'')
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    63
            if content.startswith(u'text/html; charset='):
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    64
                self.encs[self.filename] = content[19:]
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    65
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    66
    def endElement(self, name):
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    67
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    68
        if name == u'a':
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    69
            if self.pending_mail is not None:
19554
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    70
                baremail = "%s@%s" % split_mail(self.pending_mail)[0]
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    71
                if self.currentContent() != baremail:
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    72
                    raise Exception("In '%s', inconsistent mail address: '%s' vs. '%s'" % (self.filename, self.currentContent(), baremail))
19552
273d2c9866fd improvments in mail obfuscator
haftmann
parents: 19533
diff changeset
    73
                self.mails[(self.filename, self.pending_mail)] = True
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    74
                self.pending_mail = None
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    75
        super(FindHandler, self).endElement(name)
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    76
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    77
    def processingInstruction(self, target, data):
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    78
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    79
        pass
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    80
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    81
class ReplaceHandler(TransformerHandler):
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    82
19554
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    83
    def __init__(self, out, dtd, filename, encoding, mails):
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    84
19554
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    85
        super(ReplaceHandler, self).__init__(out, encoding, dtd)
19552
273d2c9866fd improvments in mail obfuscator
haftmann
parents: 19533
diff changeset
    86
        self.filename = filename
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    87
        self.pending_mail = None
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    88
        self.mails = mails
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    89
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    90
    def startElement(self, name, attrs):
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    91
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    92
        if name == u'a':
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    93
            href = attrs.get(u'href', u'')
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    94
            if href.startswith(u'mailto:'):
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    95
                self.pending_mail = href[7:]
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    96
                return
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    97
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    98
        super(ReplaceHandler, self).startElement(name, attrs)
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    99
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   100
    def endElement(self, name):
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   101
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   102
        if name == u'a':
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   103
            if self.pending_mail is not None:
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   104
                self.flushCharacterBuffer()
19552
273d2c9866fd improvments in mail obfuscator
haftmann
parents: 19533
diff changeset
   105
                self._out.write(self.mails[(self.filename, self.pending_mail)])
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   106
                self.pending_mail = None
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   107
                return
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   108
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   109
        super(ReplaceHandler, self).endElement(name)
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   110
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   111
    def characters(self, content):
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   112
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   113
        if self.pending_mail is None:
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   114
            super(ReplaceHandler, self).characters(content)
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   115
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   116
    def processingInstruction(self, target, data):
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   117
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   118
        pass
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   119
19552
273d2c9866fd improvments in mail obfuscator
haftmann
parents: 19533
diff changeset
   120
def obfuscate(mailaddr, htmlfile):
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   121
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   122
    def mk_line(s):
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   123
        return u"document.write('%s');" % s.replace("'", "\\'")
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   124
    def mk_script(s):
19552
273d2c9866fd improvments in mail obfuscator
haftmann
parents: 19533
diff changeset
   125
        return u'<script type="text/javascript">/*<![CDATA[*/%s/*]]>*/</script>' % s
273d2c9866fd improvments in mail obfuscator
haftmann
parents: 19533
diff changeset
   126
    def cmd(s):
273d2c9866fd improvments in mail obfuscator
haftmann
parents: 19533
diff changeset
   127
        print "[shell cmd] %s" % s
273d2c9866fd improvments in mail obfuscator
haftmann
parents: 19533
diff changeset
   128
        n = os.system(s)
273d2c9866fd improvments in mail obfuscator
haftmann
parents: 19533
diff changeset
   129
        if n != 0:
273d2c9866fd improvments in mail obfuscator
haftmann
parents: 19533
diff changeset
   130
            raise Exception("shell cmd error: %s" % n)
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   131
19554
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
   132
    ((name, host), arg) = split_mail(mailaddr)
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
   133
    baremail = "%s@%s" % (name, host)
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
   134
    imgname = (name + "_" + host).replace(".", "_") + ".png"
19552
273d2c9866fd improvments in mail obfuscator
haftmann
parents: 19533
diff changeset
   135
    imgfile = path.join(path.split(htmlfile)[0], imgname)
19554
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
   136
    cmd("convert label:'%s' '%s'" % (baremail, imgfile))
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
   137
    if arg is not None:
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
   138
        mailsimple = u"{%s} AT [%s] WITH (%s)" % (name, host, arg)
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
   139
        mailscript = u" ".join(map(mk_line, ['<a href="', "mailto:", name, "@", host, "?", arg, '">']));
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
   140
    else:
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
   141
        mailsimple = u"{%s} AT [%s]" % (name, host)
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
   142
        mailscript = u" ".join(map(mk_line, ['<a href="', "mailto:", name, "@", host, '">']));
19552
273d2c9866fd improvments in mail obfuscator
haftmann
parents: 19533
diff changeset
   143
    mailimg = '<img src=%s style="vertical-align:middle" alt=%s />' % (quoteattr(imgname), quoteattr(mailsimple))
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   144
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   145
    return (mk_script(mailscript) + mailimg + mk_script(mk_line("</a>")))
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   146
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   147
def main():
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   148
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   149
    # parse command line
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   150
    cmdlineparser = optparse.OptionParser(
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   151
        usage = '%prog [options] htmlfiles*',
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   152
        conflict_handler = "error",
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   153
        description = '''Protecting mail adresses in html files by obfuscating''',
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   154
        add_help_option = True,
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   155
    )
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   156
    cmdlineparser.add_option("-t", "--dtd",
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   157
        action="store", dest="dtd",
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   158
        type="string", default=".",
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   159
        help="local mirror of XHTML DTDs", metavar='location')
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   160
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   161
    options, filenames = cmdlineparser.parse_args(sys.argv[1:])
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   162
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   163
    # find mails
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   164
    mails = {}
19554
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
   165
    encs = {}
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   166
    for filename in filenames:
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   167
        istream = open(filename, 'r')
19554
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
   168
        findhandler = FindHandler(options.dtd, filename, mails, encs)
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   169
        parseWithER(istream, findhandler)
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   170
        istream.close()
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   171
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   172
    # transform mails
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   173
    mails_subst = {}
19554
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
   174
    filenames = {}
19552
273d2c9866fd improvments in mail obfuscator
haftmann
parents: 19533
diff changeset
   175
    for filename, mail in mails.iterkeys():
19554
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
   176
        filenames[filename] = True
19552
273d2c9866fd improvments in mail obfuscator
haftmann
parents: 19533
diff changeset
   177
        mails_subst[(filename, mail)] = obfuscate(mail, filename)
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   178
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   179
    # transform pages
19554
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
   180
    for filename in filenames.iterkeys():
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   181
        istream = StringIO(open(filename, 'r').read())
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   182
        ostream = open(filename, 'wb')
19554
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
   183
        print "writing %s with %s" % (filename, encs.get(filename, outputEncoding))
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
   184
        replacehandler = ReplaceHandler(ostream, options.dtd, filename, encs.get(filename, outputEncoding), mails_subst)
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   185
        parseWithER(istream, replacehandler)
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   186
        ostream.close()
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   187
        istream.close()
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   188
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   189
if __name__ == '__main__':
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   190
    main()
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   191
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   192
__todo__ = '''
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   193
'''