19533
|
1 |
#!/usr/bin/env python
|
|
2 |
# -*- coding: Latin-1 -*-
|
|
3 |
|
|
4 |
"""
|
|
5 |
Obfucatings mail adresses
|
|
6 |
"""
|
|
7 |
|
|
8 |
__author__ = 'Florian Haftmann, florian.haftmann@informatik.tu-muenchen.de'
|
|
9 |
__revision__ = '$Id$'
|
|
10 |
|
|
11 |
import sys
|
|
12 |
import os
|
|
13 |
from os import path
|
|
14 |
import posixpath
|
|
15 |
import optparse
|
|
16 |
from cStringIO import StringIO
|
|
17 |
|
|
18 |
from xml.sax.saxutils import escape
|
|
19 |
from xml.sax.saxutils import quoteattr
|
|
20 |
|
|
21 |
from xhtmlparse import TransformerHandler, parseWithER
|
|
22 |
|
|
23 |
# global configuration
|
|
24 |
outputEncoding = 'UTF-8'
|
|
25 |
|
19554
|
26 |
def split_mail(mail):
|
|
27 |
|
|
28 |
mail_arg = mail.split("?", 2)
|
|
29 |
if len(mail_arg) == 2:
|
|
30 |
mail, arg = mail_arg
|
|
31 |
else:
|
|
32 |
mail = mail_arg[0]
|
|
33 |
arg = None
|
|
34 |
name, host = mail.split("@", 2)
|
|
35 |
|
|
36 |
return ((name, host), arg)
|
|
37 |
|
19533
|
38 |
class FindHandler(TransformerHandler):
|
|
39 |
|
|
40 |
class DevZero(object):
|
|
41 |
|
|
42 |
def write(self, s):
|
|
43 |
|
|
44 |
pass
|
|
45 |
|
19554
|
46 |
def __init__(self, dtd, filename, mails, encs):
|
19533
|
47 |
|
19554
|
48 |
super(FindHandler, self).__init__(self.DevZero(), 'UTF-8', dtd)
|
19552
|
49 |
self.filename = filename
|
|
50 |
self.mails = mails
|
19554
|
51 |
self.encs = encs
|
19533
|
52 |
self.pending_mail = None
|
|
53 |
|
|
54 |
def startElement(self, name, attrs):
|
|
55 |
|
|
56 |
if name == u'a':
|
|
57 |
href = attrs.get(u'href', u'')
|
|
58 |
if href.startswith(u'mailto:'):
|
|
59 |
self.pending_mail = href[7:]
|
|
60 |
super(FindHandler, self).startElement(name, attrs)
|
19554
|
61 |
if name == u'meta' and attrs.get(u'http-equiv', u'').lower() == u'content-type':
|
|
62 |
content = attrs.get(u'content', u'')
|
|
63 |
if content.startswith(u'text/html; charset='):
|
|
64 |
self.encs[self.filename] = content[19:]
|
19533
|
65 |
|
|
66 |
def endElement(self, name):
|
|
67 |
|
|
68 |
if name == u'a':
|
|
69 |
if self.pending_mail is not None:
|
19554
|
70 |
baremail = "%s@%s" % split_mail(self.pending_mail)[0]
|
|
71 |
if self.currentContent() != baremail:
|
|
72 |
raise Exception("In '%s', inconsistent mail address: '%s' vs. '%s'" % (self.filename, self.currentContent(), baremail))
|
19552
|
73 |
self.mails[(self.filename, self.pending_mail)] = True
|
19533
|
74 |
self.pending_mail = None
|
|
75 |
super(FindHandler, self).endElement(name)
|
|
76 |
|
|
77 |
def processingInstruction(self, target, data):
|
|
78 |
|
|
79 |
pass
|
|
80 |
|
|
81 |
class ReplaceHandler(TransformerHandler):
|
|
82 |
|
19554
|
83 |
def __init__(self, out, dtd, filename, encoding, mails):
|
19533
|
84 |
|
19554
|
85 |
super(ReplaceHandler, self).__init__(out, encoding, dtd)
|
19552
|
86 |
self.filename = filename
|
19533
|
87 |
self.pending_mail = None
|
|
88 |
self.mails = mails
|
|
89 |
|
|
90 |
def startElement(self, name, attrs):
|
|
91 |
|
|
92 |
if name == u'a':
|
|
93 |
href = attrs.get(u'href', u'')
|
|
94 |
if href.startswith(u'mailto:'):
|
|
95 |
self.pending_mail = href[7:]
|
|
96 |
return
|
|
97 |
|
|
98 |
super(ReplaceHandler, self).startElement(name, attrs)
|
|
99 |
|
|
100 |
def endElement(self, name):
|
|
101 |
|
|
102 |
if name == u'a':
|
|
103 |
if self.pending_mail is not None:
|
|
104 |
self.flushCharacterBuffer()
|
19552
|
105 |
self._out.write(self.mails[(self.filename, self.pending_mail)])
|
19533
|
106 |
self.pending_mail = None
|
|
107 |
return
|
|
108 |
|
|
109 |
super(ReplaceHandler, self).endElement(name)
|
|
110 |
|
|
111 |
def characters(self, content):
|
|
112 |
|
|
113 |
if self.pending_mail is None:
|
|
114 |
super(ReplaceHandler, self).characters(content)
|
|
115 |
|
|
116 |
def processingInstruction(self, target, data):
|
|
117 |
|
|
118 |
pass
|
|
119 |
|
19552
|
120 |
def obfuscate(mailaddr, htmlfile):
|
19533
|
121 |
|
|
122 |
def mk_line(s):
|
|
123 |
return u"document.write('%s');" % s.replace("'", "\\'")
|
|
124 |
def mk_script(s):
|
19552
|
125 |
return u'<script type="text/javascript">/*<![CDATA[*/%s/*]]>*/</script>' % s
|
|
126 |
def cmd(s):
|
|
127 |
print "[shell cmd] %s" % s
|
|
128 |
n = os.system(s)
|
|
129 |
if n != 0:
|
|
130 |
raise Exception("shell cmd error: %s" % n)
|
19533
|
131 |
|
19554
|
132 |
((name, host), arg) = split_mail(mailaddr)
|
|
133 |
baremail = "%s@%s" % (name, host)
|
|
134 |
imgname = (name + "_" + host).replace(".", "_") + ".png"
|
19552
|
135 |
imgfile = path.join(path.split(htmlfile)[0], imgname)
|
19554
|
136 |
cmd("convert label:'%s' '%s'" % (baremail, imgfile))
|
|
137 |
if arg is not None:
|
|
138 |
mailsimple = u"{%s} AT [%s] WITH (%s)" % (name, host, arg)
|
|
139 |
mailscript = u" ".join(map(mk_line, ['<a href="', "mailto:", name, "@", host, "?", arg, '">']));
|
|
140 |
else:
|
|
141 |
mailsimple = u"{%s} AT [%s]" % (name, host)
|
|
142 |
mailscript = u" ".join(map(mk_line, ['<a href="', "mailto:", name, "@", host, '">']));
|
19552
|
143 |
mailimg = '<img src=%s style="vertical-align:middle" alt=%s />' % (quoteattr(imgname), quoteattr(mailsimple))
|
19533
|
144 |
|
|
145 |
return (mk_script(mailscript) + mailimg + mk_script(mk_line("</a>")))
|
|
146 |
|
|
147 |
def main():
|
|
148 |
|
|
149 |
# parse command line
|
|
150 |
cmdlineparser = optparse.OptionParser(
|
|
151 |
usage = '%prog [options] htmlfiles*',
|
|
152 |
conflict_handler = "error",
|
|
153 |
description = '''Protecting mail adresses in html files by obfuscating''',
|
|
154 |
add_help_option = True,
|
|
155 |
)
|
|
156 |
cmdlineparser.add_option("-t", "--dtd",
|
|
157 |
action="store", dest="dtd",
|
|
158 |
type="string", default=".",
|
|
159 |
help="local mirror of XHTML DTDs", metavar='location')
|
|
160 |
|
|
161 |
options, filenames = cmdlineparser.parse_args(sys.argv[1:])
|
|
162 |
|
|
163 |
# find mails
|
|
164 |
mails = {}
|
19554
|
165 |
encs = {}
|
19533
|
166 |
for filename in filenames:
|
|
167 |
istream = open(filename, 'r')
|
19554
|
168 |
findhandler = FindHandler(options.dtd, filename, mails, encs)
|
19533
|
169 |
parseWithER(istream, findhandler)
|
|
170 |
istream.close()
|
|
171 |
|
|
172 |
# transform mails
|
|
173 |
mails_subst = {}
|
19554
|
174 |
filenames = {}
|
19552
|
175 |
for filename, mail in mails.iterkeys():
|
19554
|
176 |
filenames[filename] = True
|
19552
|
177 |
mails_subst[(filename, mail)] = obfuscate(mail, filename)
|
19533
|
178 |
|
|
179 |
# transform pages
|
19554
|
180 |
for filename in filenames.iterkeys():
|
19533
|
181 |
istream = StringIO(open(filename, 'r').read())
|
|
182 |
ostream = open(filename, 'wb')
|
19554
|
183 |
print "writing %s with %s" % (filename, encs.get(filename, outputEncoding))
|
|
184 |
replacehandler = ReplaceHandler(ostream, options.dtd, filename, encs.get(filename, outputEncoding), mails_subst)
|
19533
|
185 |
parseWithER(istream, replacehandler)
|
|
186 |
ostream.close()
|
|
187 |
istream.close()
|
|
188 |
|
|
189 |
if __name__ == '__main__':
|
|
190 |
main()
|
|
191 |
|
|
192 |
__todo__ = '''
|
|
193 |
'''
|