19533
|
1 |
#!/usr/bin/env python
|
|
2 |
# -*- coding: Latin-1 -*-
|
|
3 |
|
|
4 |
"""
|
|
5 |
Obfucatings mail adresses
|
|
6 |
"""
|
|
7 |
|
|
8 |
__author__ = 'Florian Haftmann, florian.haftmann@informatik.tu-muenchen.de'
|
|
9 |
__revision__ = '$Id$'
|
|
10 |
|
|
11 |
import sys
|
|
12 |
import os
|
|
13 |
from os import path
|
|
14 |
import posixpath
|
|
15 |
import optparse
|
|
16 |
from cStringIO import StringIO
|
|
17 |
|
|
18 |
from xml.sax.saxutils import escape
|
|
19 |
from xml.sax.saxutils import quoteattr
|
|
20 |
|
|
21 |
from xhtmlparse import TransformerHandler, parseWithER
|
|
22 |
|
|
23 |
# global configuration
|
|
24 |
outputEncoding = 'UTF-8'
|
|
25 |
|
19554
|
26 |
def split_mail(mail):
|
|
27 |
|
|
28 |
mail_arg = mail.split("?", 2)
|
|
29 |
if len(mail_arg) == 2:
|
|
30 |
mail, arg = mail_arg
|
|
31 |
else:
|
|
32 |
mail = mail_arg[0]
|
|
33 |
arg = None
|
|
34 |
name, host = mail.split("@", 2)
|
|
35 |
|
|
36 |
return ((name, host), arg)
|
|
37 |
|
19533
|
38 |
class FindHandler(TransformerHandler):
|
|
39 |
|
|
40 |
class DevZero(object):
|
|
41 |
|
|
42 |
def write(self, s):
|
|
43 |
|
|
44 |
pass
|
|
45 |
|
19555
|
46 |
def __init__(self, dtd, mails, enc):
|
19533
|
47 |
|
19554
|
48 |
super(FindHandler, self).__init__(self.DevZero(), 'UTF-8', dtd)
|
19552
|
49 |
self.mails = mails
|
19555
|
50 |
self.enc = enc
|
19533
|
51 |
self.pending_mail = None
|
|
52 |
|
|
53 |
def startElement(self, name, attrs):
|
|
54 |
|
|
55 |
if name == u'a':
|
|
56 |
href = attrs.get(u'href', u'')
|
|
57 |
if href.startswith(u'mailto:'):
|
|
58 |
self.pending_mail = href[7:]
|
|
59 |
super(FindHandler, self).startElement(name, attrs)
|
19554
|
60 |
if name == u'meta' and attrs.get(u'http-equiv', u'').lower() == u'content-type':
|
|
61 |
content = attrs.get(u'content', u'')
|
|
62 |
if content.startswith(u'text/html; charset='):
|
19555
|
63 |
self.enc = content[19:]
|
19533
|
64 |
|
|
65 |
def endElement(self, name):
|
|
66 |
|
|
67 |
if name == u'a':
|
|
68 |
if self.pending_mail is not None:
|
19554
|
69 |
baremail = "%s@%s" % split_mail(self.pending_mail)[0]
|
|
70 |
if self.currentContent() != baremail:
|
19555
|
71 |
raise Exception("Inconsistent mail address: '%s' vs. '%s'" % (self.currentContent(), baremail))
|
|
72 |
self.mails[self.pending_mail] = True
|
19533
|
73 |
self.pending_mail = None
|
|
74 |
super(FindHandler, self).endElement(name)
|
|
75 |
|
|
76 |
def processingInstruction(self, target, data):
|
|
77 |
|
|
78 |
pass
|
|
79 |
|
|
80 |
class ReplaceHandler(TransformerHandler):
|
|
81 |
|
19555
|
82 |
def __init__(self, out, dtd, encoding, mails):
|
19533
|
83 |
|
19554
|
84 |
super(ReplaceHandler, self).__init__(out, encoding, dtd)
|
19533
|
85 |
self.pending_mail = None
|
|
86 |
self.mails = mails
|
|
87 |
|
|
88 |
def startElement(self, name, attrs):
|
|
89 |
|
|
90 |
if name == u'a':
|
|
91 |
href = attrs.get(u'href', u'')
|
|
92 |
if href.startswith(u'mailto:'):
|
|
93 |
self.pending_mail = href[7:]
|
|
94 |
return
|
|
95 |
|
|
96 |
super(ReplaceHandler, self).startElement(name, attrs)
|
|
97 |
|
|
98 |
def endElement(self, name):
|
|
99 |
|
|
100 |
if name == u'a':
|
|
101 |
if self.pending_mail is not None:
|
|
102 |
self.flushCharacterBuffer()
|
19556
|
103 |
self._lastStart = False
|
19555
|
104 |
self._out.write(self.mails[self.pending_mail])
|
19533
|
105 |
self.pending_mail = None
|
|
106 |
return
|
|
107 |
|
|
108 |
super(ReplaceHandler, self).endElement(name)
|
|
109 |
|
|
110 |
def characters(self, content):
|
|
111 |
|
|
112 |
if self.pending_mail is None:
|
|
113 |
super(ReplaceHandler, self).characters(content)
|
|
114 |
|
|
115 |
def processingInstruction(self, target, data):
|
|
116 |
|
|
117 |
pass
|
|
118 |
|
19552
|
119 |
def obfuscate(mailaddr, htmlfile):
|
19533
|
120 |
|
|
121 |
def mk_line(s):
|
|
122 |
return u"document.write('%s');" % s.replace("'", "\\'")
|
|
123 |
def mk_script(s):
|
19552
|
124 |
return u'<script type="text/javascript">/*<![CDATA[*/%s/*]]>*/</script>' % s
|
|
125 |
def cmd(s):
|
|
126 |
print "[shell cmd] %s" % s
|
|
127 |
n = os.system(s)
|
|
128 |
if n != 0:
|
|
129 |
raise Exception("shell cmd error: %s" % n)
|
19533
|
130 |
|
19554
|
131 |
((name, host), arg) = split_mail(mailaddr)
|
|
132 |
baremail = "%s@%s" % (name, host)
|
|
133 |
imgname = (name + "_" + host).replace(".", "_") + ".png"
|
19552
|
134 |
imgfile = path.join(path.split(htmlfile)[0], imgname)
|
19595
|
135 |
mod = os.stat(htmlfile).st_mode
|
|
136 |
gid = os.stat(htmlfile).st_gid
|
19554
|
137 |
cmd("convert label:'%s' '%s'" % (baremail, imgfile))
|
19827
|
138 |
try:
|
|
139 |
os.chmod(imgfile, mod)
|
|
140 |
except OSError:
|
|
141 |
pass
|
|
142 |
try:
|
|
143 |
os.chown(imgfile, -1, gid)
|
|
144 |
except OSError:
|
|
145 |
pass
|
19554
|
146 |
if arg is not None:
|
|
147 |
mailsimple = u"{%s} AT [%s] WITH (%s)" % (name, host, arg)
|
|
148 |
mailscript = u" ".join(map(mk_line, ['<a href="', "mailto:", name, "@", host, "?", arg, '">']));
|
|
149 |
else:
|
|
150 |
mailsimple = u"{%s} AT [%s]" % (name, host)
|
|
151 |
mailscript = u" ".join(map(mk_line, ['<a href="', "mailto:", name, "@", host, '">']));
|
19552
|
152 |
mailimg = '<img src=%s style="vertical-align:middle" alt=%s />' % (quoteattr(imgname), quoteattr(mailsimple))
|
19533
|
153 |
|
19556
|
154 |
result = (mk_script(mailscript) + mailimg + mk_script(mk_line("</a>")))
|
|
155 |
return result
|
19533
|
156 |
|
|
157 |
def main():
|
|
158 |
|
|
159 |
# parse command line
|
|
160 |
cmdlineparser = optparse.OptionParser(
|
19555
|
161 |
usage = '%prog [options] htmlfile',
|
19533
|
162 |
conflict_handler = "error",
|
|
163 |
description = '''Protecting mail adresses in html files by obfuscating''',
|
|
164 |
add_help_option = True,
|
|
165 |
)
|
|
166 |
cmdlineparser.add_option("-t", "--dtd",
|
|
167 |
action="store", dest="dtd",
|
|
168 |
type="string", default=".",
|
|
169 |
help="local mirror of XHTML DTDs", metavar='location')
|
|
170 |
|
19555
|
171 |
options, (filename,) = cmdlineparser.parse_args(sys.argv[1:])
|
19533
|
172 |
|
|
173 |
# find mails
|
|
174 |
mails = {}
|
19555
|
175 |
enc = outputEncoding
|
|
176 |
istream = open(filename, 'r')
|
|
177 |
findhandler = FindHandler(options.dtd, mails, enc)
|
|
178 |
parseWithER(istream, findhandler)
|
|
179 |
enc = findhandler.enc
|
|
180 |
istream.close()
|
19533
|
181 |
|
19555
|
182 |
if mails:
|
|
183 |
# transform mails
|
|
184 |
mails_subst = {}
|
|
185 |
for mail in mails.iterkeys():
|
|
186 |
mails_subst[mail] = obfuscate(mail, filename)
|
19595
|
187 |
|
19555
|
188 |
# transform pages
|
19533
|
189 |
istream = StringIO(open(filename, 'r').read())
|
|
190 |
ostream = open(filename, 'wb')
|
19555
|
191 |
print "writing %s with %s" % (filename, enc)
|
|
192 |
replacehandler = ReplaceHandler(ostream, options.dtd, enc, mails_subst)
|
19533
|
193 |
parseWithER(istream, replacehandler)
|
|
194 |
ostream.close()
|
|
195 |
istream.close()
|
|
196 |
|
|
197 |
if __name__ == '__main__':
|
|
198 |
main()
|
|
199 |
|
|
200 |
__todo__ = '''
|
|
201 |
'''
|