Admin/website/build/obfusmail.py
author haftmann
Tue, 09 May 2006 09:18:05 +0200
changeset 19595 2042422ac7d8
parent 19556 a3951e34269f
child 19827 e9e9be6111bb
permissions -rw-r--r--
improved chmod/chgrp handling
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
     1
#!/usr/bin/env python
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
     2
# -*- coding: Latin-1 -*-
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
     3
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
     4
"""
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
     5
    Obfucatings mail adresses
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
     6
"""
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
     7
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
     8
__author__ = 'Florian Haftmann, florian.haftmann@informatik.tu-muenchen.de'
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
     9
__revision__ = '$Id$'
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    10
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    11
import sys
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    12
import os
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    13
from os import path
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    14
import posixpath
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    15
import optparse
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    16
from cStringIO import StringIO
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    17
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    18
from xml.sax.saxutils import escape
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    19
from xml.sax.saxutils import quoteattr
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    20
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    21
from xhtmlparse import TransformerHandler, parseWithER
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    22
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    23
# global configuration
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    24
outputEncoding = 'UTF-8'
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    25
19554
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    26
def split_mail(mail):
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    27
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    28
    mail_arg = mail.split("?", 2)
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    29
    if len(mail_arg) == 2:
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    30
        mail, arg = mail_arg
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    31
    else:
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    32
        mail = mail_arg[0]
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    33
        arg = None
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    34
    name, host = mail.split("@", 2) 
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    35
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    36
    return ((name, host), arg)
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    37
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    38
class FindHandler(TransformerHandler):
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    39
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    40
    class DevZero(object):
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    41
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    42
        def write(self, s):
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    43
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    44
            pass
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    45
19555
7938d8e0c52d fixed some flaws
haftmann
parents: 19554
diff changeset
    46
    def __init__(self, dtd, mails, enc):
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    47
19554
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    48
        super(FindHandler, self).__init__(self.DevZero(), 'UTF-8', dtd)
19552
273d2c9866fd improvments in mail obfuscator
haftmann
parents: 19533
diff changeset
    49
        self.mails = mails
19555
7938d8e0c52d fixed some flaws
haftmann
parents: 19554
diff changeset
    50
        self.enc = enc
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    51
        self.pending_mail = None
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    52
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    53
    def startElement(self, name, attrs):
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    54
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    55
        if name == u'a':
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    56
            href = attrs.get(u'href', u'')
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    57
            if href.startswith(u'mailto:'):
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    58
                self.pending_mail = href[7:]
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    59
        super(FindHandler, self).startElement(name, attrs)
19554
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    60
        if name == u'meta' and attrs.get(u'http-equiv', u'').lower() == u'content-type':
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    61
            content = attrs.get(u'content', u'')
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    62
            if content.startswith(u'text/html; charset='):
19555
7938d8e0c52d fixed some flaws
haftmann
parents: 19554
diff changeset
    63
                self.enc = content[19:]
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    64
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    65
    def endElement(self, name):
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    66
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    67
        if name == u'a':
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    68
            if self.pending_mail is not None:
19554
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    69
                baremail = "%s@%s" % split_mail(self.pending_mail)[0]
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    70
                if self.currentContent() != baremail:
19555
7938d8e0c52d fixed some flaws
haftmann
parents: 19554
diff changeset
    71
                    raise Exception("Inconsistent mail address: '%s' vs. '%s'" % (self.currentContent(), baremail))
7938d8e0c52d fixed some flaws
haftmann
parents: 19554
diff changeset
    72
                self.mails[self.pending_mail] = True
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    73
                self.pending_mail = None
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    74
        super(FindHandler, self).endElement(name)
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    75
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    76
    def processingInstruction(self, target, data):
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    77
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    78
        pass
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    79
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    80
class ReplaceHandler(TransformerHandler):
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    81
19555
7938d8e0c52d fixed some flaws
haftmann
parents: 19554
diff changeset
    82
    def __init__(self, out, dtd, encoding, mails):
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    83
19554
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
    84
        super(ReplaceHandler, self).__init__(out, encoding, dtd)
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    85
        self.pending_mail = None
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    86
        self.mails = mails
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    87
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    88
    def startElement(self, name, attrs):
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    89
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    90
        if name == u'a':
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    91
            href = attrs.get(u'href', u'')
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    92
            if href.startswith(u'mailto:'):
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    93
                self.pending_mail = href[7:]
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    94
                return
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    95
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    96
        super(ReplaceHandler, self).startElement(name, attrs)
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    97
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    98
    def endElement(self, name):
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
    99
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   100
        if name == u'a':
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   101
            if self.pending_mail is not None:
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   102
                self.flushCharacterBuffer()
19556
a3951e34269f fixed some flaws
haftmann
parents: 19555
diff changeset
   103
                self._lastStart = False
19555
7938d8e0c52d fixed some flaws
haftmann
parents: 19554
diff changeset
   104
                self._out.write(self.mails[self.pending_mail])
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   105
                self.pending_mail = None
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   106
                return
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   107
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   108
        super(ReplaceHandler, self).endElement(name)
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   109
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   110
    def characters(self, content):
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   111
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   112
        if self.pending_mail is None:
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   113
            super(ReplaceHandler, self).characters(content)
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   114
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   115
    def processingInstruction(self, target, data):
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   116
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   117
        pass
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   118
19552
273d2c9866fd improvments in mail obfuscator
haftmann
parents: 19533
diff changeset
   119
def obfuscate(mailaddr, htmlfile):
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   120
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   121
    def mk_line(s):
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   122
        return u"document.write('%s');" % s.replace("'", "\\'")
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   123
    def mk_script(s):
19552
273d2c9866fd improvments in mail obfuscator
haftmann
parents: 19533
diff changeset
   124
        return u'<script type="text/javascript">/*<![CDATA[*/%s/*]]>*/</script>' % s
273d2c9866fd improvments in mail obfuscator
haftmann
parents: 19533
diff changeset
   125
    def cmd(s):
273d2c9866fd improvments in mail obfuscator
haftmann
parents: 19533
diff changeset
   126
        print "[shell cmd] %s" % s
273d2c9866fd improvments in mail obfuscator
haftmann
parents: 19533
diff changeset
   127
        n = os.system(s)
273d2c9866fd improvments in mail obfuscator
haftmann
parents: 19533
diff changeset
   128
        if n != 0:
273d2c9866fd improvments in mail obfuscator
haftmann
parents: 19533
diff changeset
   129
            raise Exception("shell cmd error: %s" % n)
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   130
19554
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
   131
    ((name, host), arg) = split_mail(mailaddr)
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
   132
    baremail = "%s@%s" % (name, host)
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
   133
    imgname = (name + "_" + host).replace(".", "_") + ".png"
19552
273d2c9866fd improvments in mail obfuscator
haftmann
parents: 19533
diff changeset
   134
    imgfile = path.join(path.split(htmlfile)[0], imgname)
19595
2042422ac7d8 improved chmod/chgrp handling
haftmann
parents: 19556
diff changeset
   135
    mod = os.stat(htmlfile).st_mode
2042422ac7d8 improved chmod/chgrp handling
haftmann
parents: 19556
diff changeset
   136
    gid = os.stat(htmlfile).st_gid
19554
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
   137
    cmd("convert label:'%s' '%s'" % (baremail, imgfile))
19595
2042422ac7d8 improved chmod/chgrp handling
haftmann
parents: 19556
diff changeset
   138
    os.chmod(imgfile, mod)
2042422ac7d8 improved chmod/chgrp handling
haftmann
parents: 19556
diff changeset
   139
    os.chown(imgfile, -1, gid)
19554
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
   140
    if arg is not None:
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
   141
        mailsimple = u"{%s} AT [%s] WITH (%s)" % (name, host, arg)
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
   142
        mailscript = u" ".join(map(mk_line, ['<a href="', "mailto:", name, "@", host, "?", arg, '">']));
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
   143
    else:
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
   144
        mailsimple = u"{%s} AT [%s]" % (name, host)
bc0bef4a124e added world map
haftmann
parents: 19552
diff changeset
   145
        mailscript = u" ".join(map(mk_line, ['<a href="', "mailto:", name, "@", host, '">']));
19552
273d2c9866fd improvments in mail obfuscator
haftmann
parents: 19533
diff changeset
   146
    mailimg = '<img src=%s style="vertical-align:middle" alt=%s />' % (quoteattr(imgname), quoteattr(mailsimple))
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   147
19556
a3951e34269f fixed some flaws
haftmann
parents: 19555
diff changeset
   148
    result = (mk_script(mailscript) + mailimg + mk_script(mk_line("</a>")))
a3951e34269f fixed some flaws
haftmann
parents: 19555
diff changeset
   149
    return result
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   150
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   151
def main():
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   152
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   153
    # parse command line
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   154
    cmdlineparser = optparse.OptionParser(
19555
7938d8e0c52d fixed some flaws
haftmann
parents: 19554
diff changeset
   155
        usage = '%prog [options] htmlfile',
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   156
        conflict_handler = "error",
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   157
        description = '''Protecting mail adresses in html files by obfuscating''',
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   158
        add_help_option = True,
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   159
    )
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   160
    cmdlineparser.add_option("-t", "--dtd",
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   161
        action="store", dest="dtd",
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   162
        type="string", default=".",
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   163
        help="local mirror of XHTML DTDs", metavar='location')
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   164
19555
7938d8e0c52d fixed some flaws
haftmann
parents: 19554
diff changeset
   165
    options, (filename,) = cmdlineparser.parse_args(sys.argv[1:])
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   166
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   167
    # find mails
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   168
    mails = {}
19555
7938d8e0c52d fixed some flaws
haftmann
parents: 19554
diff changeset
   169
    enc = outputEncoding
7938d8e0c52d fixed some flaws
haftmann
parents: 19554
diff changeset
   170
    istream = open(filename, 'r')
7938d8e0c52d fixed some flaws
haftmann
parents: 19554
diff changeset
   171
    findhandler = FindHandler(options.dtd, mails, enc)
7938d8e0c52d fixed some flaws
haftmann
parents: 19554
diff changeset
   172
    parseWithER(istream, findhandler)
7938d8e0c52d fixed some flaws
haftmann
parents: 19554
diff changeset
   173
    enc = findhandler.enc
7938d8e0c52d fixed some flaws
haftmann
parents: 19554
diff changeset
   174
    istream.close()
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   175
19555
7938d8e0c52d fixed some flaws
haftmann
parents: 19554
diff changeset
   176
    if mails:
7938d8e0c52d fixed some flaws
haftmann
parents: 19554
diff changeset
   177
        # transform mails
7938d8e0c52d fixed some flaws
haftmann
parents: 19554
diff changeset
   178
        mails_subst = {}
7938d8e0c52d fixed some flaws
haftmann
parents: 19554
diff changeset
   179
        for mail in mails.iterkeys():
7938d8e0c52d fixed some flaws
haftmann
parents: 19554
diff changeset
   180
            mails_subst[mail] = obfuscate(mail, filename)
19595
2042422ac7d8 improved chmod/chgrp handling
haftmann
parents: 19556
diff changeset
   181
19555
7938d8e0c52d fixed some flaws
haftmann
parents: 19554
diff changeset
   182
        # transform pages
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   183
        istream = StringIO(open(filename, 'r').read())
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   184
        ostream = open(filename, 'wb')
19555
7938d8e0c52d fixed some flaws
haftmann
parents: 19554
diff changeset
   185
        print "writing %s with %s" % (filename, enc)
7938d8e0c52d fixed some flaws
haftmann
parents: 19554
diff changeset
   186
        replacehandler = ReplaceHandler(ostream, options.dtd, enc, mails_subst)
19533
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   187
        parseWithER(istream, replacehandler)
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   188
        ostream.close()
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   189
        istream.close()
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   190
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   191
if __name__ == '__main__':
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   192
    main()
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   193
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   194
__todo__ = '''
fc4c6458d569 added obfuscation for mails
haftmann
parents:
diff changeset
   195
'''