|
72558
|
1 |
/* Title: Pure/General/mailman.scala
|
|
|
2 |
Author: Makarius
|
|
|
3 |
|
|
|
4 |
Support for Mailman list servers.
|
|
|
5 |
*/
|
|
|
6 |
|
|
|
7 |
package isabelle
|
|
|
8 |
|
|
|
9 |
|
|
|
10 |
import java.net.URL
|
|
|
11 |
|
|
74921
|
12 |
import scala.annotation.tailrec
|
|
74906
|
13 |
import scala.util.matching.Regex
|
|
74921
|
14 |
import scala.util.matching.Regex.Match
|
|
74906
|
15 |
|
|
72558
|
16 |
|
|
|
17 |
object Mailman
|
|
|
18 |
{
|
|
74921
|
19 |
/* mailing list messages */
|
|
72559
|
20 |
|
|
74921
|
21 |
sealed case class Message(
|
|
|
22 |
name: String,
|
|
|
23 |
date: Date,
|
|
|
24 |
title: String,
|
|
|
25 |
author_name: String,
|
|
|
26 |
author_address: String,
|
|
74934
|
27 |
body: String,
|
|
|
28 |
tags: List[String])
|
|
72558
|
29 |
{
|
|
74925
|
30 |
override def toString: String = name
|
|
74921
|
31 |
def print: String =
|
|
74922
|
32 |
name + "\n" + date + "\n" + title + "\n" +
|
|
|
33 |
quote(author_name) + "\n" + "<" + author_address + ">\n"
|
|
74931
|
34 |
|
|
|
35 |
def normal_address: String = Word.lowercase(author_address)
|
|
74921
|
36 |
}
|
|
74906
|
37 |
|
|
74933
|
38 |
object Messages
|
|
74925
|
39 |
{
|
|
74933
|
40 |
def apply(msgs: List[Message]): Messages =
|
|
|
41 |
new Messages(msgs.sortBy(_.date)(Date.Ordering))
|
|
74934
|
42 |
|
|
|
43 |
def find(dir: Path): Messages =
|
|
|
44 |
{
|
|
|
45 |
val msgs =
|
|
|
46 |
for {
|
|
|
47 |
archive <- List(Isabelle_Users, Isabelle_Dev)
|
|
|
48 |
msg <- archive.find_messages(dir + Path.basic(archive.list_name))
|
|
|
49 |
} yield msg
|
|
|
50 |
Messages(msgs)
|
|
|
51 |
}
|
|
74933
|
52 |
}
|
|
|
53 |
|
|
|
54 |
class Messages private(val sorted: List[Message])
|
|
|
55 |
{
|
|
|
56 |
override def toString: String = "Messages(" + sorted.length + ")"
|
|
74925
|
57 |
|
|
74933
|
58 |
def check(): Unit =
|
|
|
59 |
{
|
|
|
60 |
val proper = sorted.filter(msg => msg.author_name.nonEmpty && msg.author_address.nonEmpty)
|
|
|
61 |
val address_name = Multi_Map.from(proper.map(msg => (msg.normal_address, msg.author_name)))
|
|
|
62 |
val name_address = Multi_Map.from(proper.map(msg => (msg.author_name, msg.normal_address)))
|
|
74925
|
63 |
|
|
74933
|
64 |
def print_dup(a: String, bs: List[String]): String =
|
|
|
65 |
" * " + a + bs.mkString("\n ", "\n ", "")
|
|
74925
|
66 |
|
|
74933
|
67 |
def print_dups(title: String, m: Multi_Map[String, String]): Unit =
|
|
|
68 |
{
|
|
|
69 |
val dups = m.iterator_list.filter(p => p._2.length > 1).toList
|
|
|
70 |
if (dups.nonEmpty) {
|
|
|
71 |
Output.writeln(cat_lines(title :: dups.map(p => print_dup(p._1, p._2))))
|
|
|
72 |
}
|
|
74925
|
73 |
}
|
|
74933
|
74 |
|
|
|
75 |
print_dups("\nduplicate names:", address_name)
|
|
|
76 |
print_dups("\nduplicate addresses:", name_address)
|
|
74925
|
77 |
|
|
74933
|
78 |
def get_name(msg: Message): Option[String] =
|
|
|
79 |
proper_string(msg.author_name) orElse address_name.get(msg.author_address)
|
|
74925
|
80 |
|
|
74933
|
81 |
val missing_name =
|
|
|
82 |
sorted.flatMap(msg =>
|
|
|
83 |
if (get_name(msg).isEmpty) Some(proper_string(msg.author_address).getOrElse(msg.name))
|
|
|
84 |
else None).toSet.toList.sorted
|
|
74925
|
85 |
|
|
74933
|
86 |
val missing_address =
|
|
|
87 |
sorted.flatMap(msg =>
|
|
|
88 |
if (msg.author_address.isEmpty) Some(proper_string(msg.author_name).getOrElse(msg.name))
|
|
|
89 |
else None).toSet.toList.sorted
|
|
74925
|
90 |
|
|
74933
|
91 |
if (missing_name.nonEmpty) {
|
|
|
92 |
Output.writeln(("missing name:" :: missing_name).mkString("\n", "\n ", ""))
|
|
|
93 |
}
|
|
74931
|
94 |
|
|
74933
|
95 |
if (missing_address.nonEmpty) {
|
|
|
96 |
Output.writeln(("missing address:" :: missing_address).mkString("\n", "\n ", ""))
|
|
|
97 |
}
|
|
74925
|
98 |
}
|
|
|
99 |
}
|
|
|
100 |
|
|
|
101 |
|
|
74921
|
102 |
/* mailing list archives */
|
|
|
103 |
|
|
|
104 |
abstract class Archive(
|
|
|
105 |
url: URL,
|
|
|
106 |
name: String = "",
|
|
|
107 |
tag: String = "")
|
|
|
108 |
{
|
|
|
109 |
def message_regex: Regex
|
|
74923
|
110 |
def message_content(name: String, lines: List[String]): Message
|
|
74921
|
111 |
|
|
74924
|
112 |
def message_match(lines: List[String], re: Regex): Option[Match] =
|
|
|
113 |
lines.flatMap(re.findFirstMatchIn(_)).headOption
|
|
|
114 |
|
|
74931
|
115 |
def make_name(str: String): String =
|
|
|
116 |
str.replaceAll("""\s+""", " ").replaceAll(" at ", "@")
|
|
74924
|
117 |
|
|
|
118 |
def make_body(lines: List[String]): String =
|
|
|
119 |
cat_lines(Library.take_suffix[String](_.isEmpty, lines)._1)
|
|
|
120 |
|
|
74921
|
121 |
private val main_url: URL =
|
|
|
122 |
Url(Library.take_suffix[Char](_ == '/', Url.trim_index(url).toString.toList)._1.mkString + "/")
|
|
|
123 |
|
|
|
124 |
private val main_html = Url.read(main_url)
|
|
|
125 |
|
|
|
126 |
val list_name: String =
|
|
|
127 |
{
|
|
|
128 |
val title =
|
|
|
129 |
"""<title>The ([^</>]*) Archives</title>""".r.findFirstMatchIn(main_html).map(_.group(1))
|
|
|
130 |
(proper_string(name) orElse title).getOrElse(error("Failed to determine mailing list name"))
|
|
|
131 |
}
|
|
72558
|
132 |
override def toString: String = list_name
|
|
|
133 |
|
|
74923
|
134 |
def full_name(href: String): String = list_name + "/" + href
|
|
|
135 |
|
|
74921
|
136 |
def list_tag: String = proper_string(tag).getOrElse(list_name)
|
|
|
137 |
|
|
|
138 |
def read_text(href: String): String = Url.read(new URL(main_url, href))
|
|
|
139 |
|
|
|
140 |
def hrefs_text: List[String] =
|
|
|
141 |
"""href="([^"]+\.txt(?:\.gz)?)"""".r.findAllMatchIn(main_html).map(_.group(1)).toList
|
|
|
142 |
|
|
|
143 |
def hrefs_msg: List[String] =
|
|
74906
|
144 |
(for {
|
|
74921
|
145 |
href <- """href="([^"]+)/date.html"""".r.findAllMatchIn(main_html).map(_.group(1))
|
|
|
146 |
html = read_text(href + "/date.html")
|
|
|
147 |
msg <- message_regex.findAllMatchIn(html).map(_.group(1))
|
|
74906
|
148 |
} yield href + "/" + msg).toList
|
|
|
149 |
|
|
|
150 |
def get(target_dir: Path, href: String, progress: Progress = new Progress): Option[Path] =
|
|
72558
|
151 |
{
|
|
|
152 |
val dir = target_dir + Path.basic(list_name)
|
|
74906
|
153 |
val path = dir + Path.explode(href)
|
|
74921
|
154 |
val url = new URL(main_url, href)
|
|
74906
|
155 |
val connection = url.openConnection
|
|
|
156 |
try {
|
|
|
157 |
val length = connection.getContentLengthLong
|
|
|
158 |
val timestamp = connection.getLastModified
|
|
|
159 |
val file = path.file
|
|
|
160 |
if (file.isFile && file.length == length && file.lastModified == timestamp) None
|
|
|
161 |
else {
|
|
|
162 |
Isabelle_System.make_directory(path.dir)
|
|
|
163 |
progress.echo("Getting " + url)
|
|
|
164 |
val bytes =
|
|
|
165 |
using(connection.getInputStream)(Bytes.read_stream(_, hint = length.toInt max 1024))
|
|
|
166 |
Bytes.write(file, bytes)
|
|
|
167 |
file.setLastModified(timestamp)
|
|
|
168 |
Some(path)
|
|
|
169 |
}
|
|
|
170 |
}
|
|
|
171 |
finally { connection.getInputStream.close() }
|
|
|
172 |
}
|
|
72558
|
173 |
|
|
74906
|
174 |
def download_text(target_dir: Path, progress: Progress = new Progress): List[Path] =
|
|
|
175 |
hrefs_text.flatMap(get(target_dir, _, progress = progress))
|
|
|
176 |
|
|
|
177 |
def download_msg(target_dir: Path, progress: Progress = new Progress): List[Path] =
|
|
|
178 |
hrefs_msg.flatMap(get(target_dir, _, progress = progress))
|
|
74907
|
179 |
|
|
|
180 |
def download(target_dir: Path, progress: Progress = new Progress): List[Path] =
|
|
|
181 |
download_text(target_dir, progress = progress) :::
|
|
|
182 |
download_msg(target_dir, progress = progress)
|
|
74921
|
183 |
|
|
74924
|
184 |
def make_title(str: String): String =
|
|
74921
|
185 |
{
|
|
|
186 |
val Trim1 = ("""\s*\Q[""" + list_tag + """]\E\s*(.*)""").r
|
|
|
187 |
val Trim2 = """(?i:(?:re|fw|fwd)\s*:\s*)(.*)""".r
|
|
|
188 |
val Trim3 = """\[\s*(.*?)\s*\]""".r
|
|
|
189 |
@tailrec def trim(s: String): String =
|
|
|
190 |
s match {
|
|
|
191 |
case Trim1(s1) => trim(s1)
|
|
|
192 |
case Trim2(s1) => trim(s1)
|
|
|
193 |
case Trim3(s1) => trim(s1)
|
|
|
194 |
case _ => s
|
|
|
195 |
}
|
|
|
196 |
trim(str)
|
|
|
197 |
}
|
|
|
198 |
|
|
|
199 |
def get_messages(): List[Message] =
|
|
|
200 |
for (href <- hrefs_msg) yield message_content(href, split_lines(read_text(href)))
|
|
|
201 |
|
|
|
202 |
def find_messages(dir: Path): List[Message] =
|
|
|
203 |
{
|
|
|
204 |
for {
|
|
|
205 |
file <- File.find_files(dir.file, file => file.getName.endsWith(".html"))
|
|
|
206 |
rel_path <- File.relative_path(dir, File.path(file))
|
|
74923
|
207 |
}
|
|
|
208 |
yield {
|
|
|
209 |
val name = full_name(rel_path.implode)
|
|
|
210 |
message_content(name, split_lines(File.read(file)))
|
|
|
211 |
}
|
|
74921
|
212 |
}
|
|
72558
|
213 |
}
|
|
72559
|
214 |
|
|
74932
|
215 |
private class Message_Chunk(bg: String = "", en: String = "")
|
|
|
216 |
{
|
|
|
217 |
def unapply(lines: List[String]): Option[List[String]] =
|
|
|
218 |
{
|
|
|
219 |
val res1 =
|
|
|
220 |
if (bg.isEmpty) Some(lines)
|
|
|
221 |
else {
|
|
|
222 |
lines.dropWhile(_ != bg) match {
|
|
|
223 |
case Nil => None
|
|
|
224 |
case _ :: rest => Some(rest)
|
|
|
225 |
}
|
|
|
226 |
}
|
|
|
227 |
if (en.isEmpty) res1
|
|
|
228 |
else {
|
|
|
229 |
res1 match {
|
|
|
230 |
case None => None
|
|
|
231 |
case Some(lines1) =>
|
|
|
232 |
val lines2 = lines1.takeWhile(_ != en)
|
|
|
233 |
if (lines1.drop(lines2.length).isEmpty) None else Some(lines2)
|
|
|
234 |
}
|
|
|
235 |
}
|
|
|
236 |
}
|
|
|
237 |
|
|
|
238 |
def get(lines: List[String]): List[String] =
|
|
|
239 |
unapply(lines) getOrElse
|
|
|
240 |
error("Missing delimiters:" +
|
|
|
241 |
(if (bg.nonEmpty) " " else "") + bg +
|
|
|
242 |
(if (en.nonEmpty) " " else "") + en)
|
|
|
243 |
}
|
|
|
244 |
|
|
72559
|
245 |
|
|
74930
|
246 |
/* isabelle-users mailing list */
|
|
72559
|
247 |
|
|
74921
|
248 |
object Isabelle_Users extends Archive(
|
|
|
249 |
Url("https://lists.cam.ac.uk/pipermail/cl-isabelle-users"),
|
|
|
250 |
name = "isabelle-users", tag = "isabelle")
|
|
|
251 |
{
|
|
|
252 |
override def message_regex: Regex = """<li><a name="\d+" href="(msg\d+\.html)">""".r
|
|
|
253 |
|
|
|
254 |
private object Head extends
|
|
74922
|
255 |
Message_Chunk(bg = "<!--X-Head-of-Message-->", en = "<!--X-Head-of-Message-End-->")
|
|
74921
|
256 |
|
|
|
257 |
private object Body extends
|
|
74922
|
258 |
Message_Chunk(bg = "<!--X-Body-of-Message-->", en = "<!--X-Body-of-Message-End-->")
|
|
|
259 |
|
|
|
260 |
private object Date_Format
|
|
|
261 |
{
|
|
|
262 |
private val Trim1 = """\w+,\s*(.*)""".r
|
|
|
263 |
private val Trim2 = """(.*?)\s*\(.*\)""".r
|
|
|
264 |
private val Format =
|
|
|
265 |
Date.Format(
|
|
|
266 |
"d MMM uuuu H:m:s Z",
|
|
|
267 |
"d MMM uuuu H:m:s z",
|
|
|
268 |
"d MMM yy H:m:s Z",
|
|
|
269 |
"d MMM yy H:m:s z")
|
|
|
270 |
def unapply(s: String): Option[Date] =
|
|
|
271 |
{
|
|
|
272 |
val s0 = s.replaceAll("""\s+""", " ")
|
|
|
273 |
val s1 =
|
|
|
274 |
s0 match {
|
|
|
275 |
case Trim1(s1) => s1
|
|
|
276 |
case _ => s0
|
|
|
277 |
}
|
|
|
278 |
val s2 =
|
|
|
279 |
s1 match {
|
|
|
280 |
case Trim2(s2) => s2
|
|
|
281 |
case _ => s1
|
|
|
282 |
}
|
|
|
283 |
Format.unapply(s2)
|
|
|
284 |
}
|
|
|
285 |
}
|
|
74921
|
286 |
|
|
74924
|
287 |
override def make_name(str: String): String =
|
|
74921
|
288 |
{
|
|
74931
|
289 |
val s = Library.perhaps_unsuffix("via Cl-isabelle-users", super.make_name(str))
|
|
74925
|
290 |
if (s == "cl-isabelle-users@lists.cam.ac.uk") "" else s
|
|
|
291 |
}
|
|
|
292 |
|
|
74924
|
293 |
object Address
|
|
|
294 |
{
|
|
|
295 |
private def anchor(s: String): String = """<a href="[^"]*">""" + s + """</a>"""
|
|
|
296 |
private def angl(s: String): String = """<""" + s + """>"""
|
|
|
297 |
private def quot(s: String): String = """"""" + s + """""""
|
|
|
298 |
private def paren(s: String): String = """\(""" + s + """\)"""
|
|
|
299 |
private val adr = """([^<>]*? at [^<>]*?)"""
|
|
|
300 |
private val any = """([^<>]*?)"""
|
|
|
301 |
private val spc = """\s*"""
|
|
|
302 |
|
|
|
303 |
private val Name1 = quot(anchor(any)).r
|
|
|
304 |
private val Name2 = quot(any).r
|
|
|
305 |
private val Name_Adr1 = (quot(anchor(any)) + spc + angl(anchor(adr))).r
|
|
|
306 |
private val Name_Adr2 = (quot(any) + spc + angl(anchor(adr))).r
|
|
|
307 |
private val Name_Adr3 = (anchor(any) + spc + angl(anchor(adr))).r
|
|
|
308 |
private val Name_Adr4 = (any + spc + angl(anchor(adr))).r
|
|
|
309 |
private val Adr_Name1 = (angl(anchor(adr)) + spc + paren(any)).r
|
|
|
310 |
private val Adr_Name2 = (anchor(adr) + spc + paren(any)).r
|
|
|
311 |
private val Adr1 = angl(anchor(adr)).r
|
|
|
312 |
private val Adr2 = anchor(adr).r
|
|
|
313 |
|
|
|
314 |
def unapply(s: String): Option[(String, String)] =
|
|
|
315 |
s match {
|
|
74931
|
316 |
case Name1(a) => Some((make_name(a), ""))
|
|
|
317 |
case Name2(a) => Some((make_name(a), ""))
|
|
|
318 |
case Name_Adr1(a, b) => Some((make_name(a), make_name(b)))
|
|
|
319 |
case Name_Adr2(a, b) => Some((make_name(a), make_name(b)))
|
|
|
320 |
case Name_Adr3(a, b) => Some((make_name(a), make_name(b)))
|
|
|
321 |
case Name_Adr4(a, b) => Some((make_name(a), make_name(b)))
|
|
|
322 |
case Adr_Name1(b, a) => Some((make_name(a), make_name(b)))
|
|
|
323 |
case Adr_Name2(b, a) => Some((make_name(a), make_name(b)))
|
|
|
324 |
case Adr1(a) => Some(("", make_name(a)))
|
|
|
325 |
case Adr2(a) => Some(("", make_name(a)))
|
|
74924
|
326 |
case _ => None
|
|
|
327 |
}
|
|
|
328 |
}
|
|
|
329 |
|
|
74923
|
330 |
override def message_content(name: String, lines: List[String]): Message =
|
|
74921
|
331 |
{
|
|
|
332 |
def err(msg: String = ""): Nothing =
|
|
74923
|
333 |
error("Malformed message: " + name + (if (msg.isEmpty) "" else "\n" + msg))
|
|
74921
|
334 |
|
|
|
335 |
val (head, body) =
|
|
74924
|
336 |
try { (Head.get(lines), make_body(Body.get(lines))) }
|
|
74921
|
337 |
catch { case ERROR(msg) => err(msg) }
|
|
72559
|
338 |
|
|
74921
|
339 |
val date =
|
|
|
340 |
message_match(head, """<li><em>Date</em>:\s*(.*?)\s*</li>""".r)
|
|
|
341 |
.map(m => HTML.input(m.group(1))) match
|
|
|
342 |
{
|
|
|
343 |
case Some(Date_Format(d)) => d
|
|
|
344 |
case Some(s) => err("Malformed Date: " + quote(s))
|
|
|
345 |
case None => err("Missing Date")
|
|
|
346 |
}
|
|
|
347 |
|
|
|
348 |
val title =
|
|
74924
|
349 |
make_title(
|
|
|
350 |
HTML.input(message_match(head, """<li><em>Subject</em>:\s*(.*)</li>""".r)
|
|
|
351 |
.getOrElse(err("Missing Subject")).group(1)))
|
|
74921
|
352 |
|
|
|
353 |
val (author_name, author_address) =
|
|
|
354 |
message_match(head, """<li><em>From</em>:\s*(.*?)\s*</li>""".r) match {
|
|
|
355 |
case None => err("Missing From")
|
|
74925
|
356 |
case Some(m) =>
|
|
74931
|
357 |
val (a0, b0) = Address.unapply(m.group(1)) getOrElse err("Malformed From")
|
|
|
358 |
val a = HTML.input(a0)
|
|
|
359 |
val b = HTML.input(b0)
|
|
74925
|
360 |
(if (a == b) "" else a, b)
|
|
74921
|
361 |
}
|
|
|
362 |
|
|
74934
|
363 |
val tags = List(list_name)
|
|
|
364 |
|
|
|
365 |
Message(name, date, title, author_name, author_address, body, tags)
|
|
74921
|
366 |
}
|
|
|
367 |
}
|
|
|
368 |
|
|
74930
|
369 |
|
|
|
370 |
/* isabelle-dev mailing list */
|
|
|
371 |
|
|
74921
|
372 |
object Isabelle_Dev extends Archive(Url("https://mailmanbroy.in.tum.de/pipermail/isabelle-dev"))
|
|
|
373 |
{
|
|
|
374 |
override def message_regex: Regex = """<LI><A HREF="(\d+\.html)">""".r
|
|
|
375 |
|
|
74922
|
376 |
private object Head extends Message_Chunk(en = "<!--beginarticle-->")
|
|
|
377 |
private object Body extends Message_Chunk(bg = "<!--beginarticle-->", en = "<!--endarticle-->")
|
|
|
378 |
|
|
|
379 |
private object Date_Format
|
|
|
380 |
{
|
|
|
381 |
val Format = Date.Format("E MMM d H:m:s z uuuu")
|
|
|
382 |
def unapply(s: String): Option[Date] = Format.unapply(s.replaceAll("""\s+""", " "))
|
|
|
383 |
}
|
|
|
384 |
|
|
74931
|
385 |
override def make_name(str: String): String =
|
|
|
386 |
{
|
|
|
387 |
val s = Library.perhaps_unsuffix("via RT", super.make_name(str))
|
|
|
388 |
if (s == "sys-admin@cl.cam.ac.uk") "" else s
|
|
|
389 |
}
|
|
|
390 |
|
|
74923
|
391 |
override def message_content(name: String, lines: List[String]): Message =
|
|
74922
|
392 |
{
|
|
|
393 |
def err(msg: String = ""): Nothing =
|
|
74923
|
394 |
error("Malformed message: " + name + (if (msg.isEmpty) "" else "\n" + msg))
|
|
74922
|
395 |
|
|
|
396 |
val (head, body) =
|
|
74924
|
397 |
try { (Head.get(lines), make_body(Body.get(lines))) }
|
|
74922
|
398 |
catch { case ERROR(msg) => err(msg) }
|
|
|
399 |
|
|
|
400 |
val date =
|
|
|
401 |
message_match(head, """\s*<I>(.*)</I>""".r).map(m => HTML.input(m.group(1))) match {
|
|
|
402 |
case Some(Date_Format(d)) => d
|
|
|
403 |
case Some(s) => err("Malformed Date: " + quote(s))
|
|
|
404 |
case None => err("Missing Date")
|
|
|
405 |
}
|
|
|
406 |
|
|
|
407 |
val (title, author_address) =
|
|
|
408 |
{
|
|
|
409 |
message_match(head, """TITLE="([^"]+)">(.*)""".r) match {
|
|
74931
|
410 |
case Some(m) => (make_title(HTML.input(m.group(1))), make_name(HTML.input(m.group(2))))
|
|
74922
|
411 |
case None => err("Missing TITLE")
|
|
|
412 |
}
|
|
|
413 |
}
|
|
|
414 |
|
|
|
415 |
val author_name =
|
|
|
416 |
message_match(head, """\s*<B>(.*)</B>""".r) match {
|
|
|
417 |
case None => err("Missing author")
|
|
74931
|
418 |
case Some(m) =>
|
|
|
419 |
val a = make_name(HTML.input(m.group(1)))
|
|
|
420 |
if (a == author_address) "" else a
|
|
74922
|
421 |
}
|
|
|
422 |
|
|
74934
|
423 |
val tags = List(list_name)
|
|
|
424 |
|
|
|
425 |
Message(name, date, title, author_name, author_address, body, tags)
|
|
74922
|
426 |
}
|
|
74921
|
427 |
}
|
|
72558
|
428 |
}
|