72558
|
1 |
/* Title: Pure/General/mailman.scala
|
|
2 |
Author: Makarius
|
|
3 |
|
74942
|
4 |
Support for Mailman list servers, notably isabelle-users and isabelle-dev.
|
72558
|
5 |
*/
|
|
6 |
|
|
7 |
package isabelle
|
|
8 |
|
|
9 |
|
|
10 |
import java.net.URL
|
|
11 |
|
74921
|
12 |
import scala.annotation.tailrec
|
74906
|
13 |
import scala.util.matching.Regex
|
74921
|
14 |
import scala.util.matching.Regex.Match
|
74906
|
15 |
|
72558
|
16 |
|
75393
|
17 |
object Mailman {
|
74921
|
18 |
/* mailing list messages */
|
72559
|
19 |
|
74935
|
20 |
def is_address(s: String): Boolean =
|
|
21 |
s.contains('@') && s.contains('.') && !s.contains(' ')
|
|
22 |
|
74936
|
23 |
private val standard_name: Map[String, String] =
|
74935
|
24 |
Map(
|
74941
|
25 |
"121171528@qq.com" -> "Guo Fan\n121171528@qq.com",
|
74937
|
26 |
"Aman Pohjola, Johannes (Data61, Kensington NSW)" -> "Johannes Aman Pohjola",
|
|
27 |
"Andrei de AraÃjo Formiga" -> "Andrei de Araujo Formiga",
|
74940
|
28 |
"Benedikt.AHRENS@unice.fr" -> "Benedikt Ahrens\nBenedikt.AHRENS@unice.fr",
|
|
29 |
"Berg, Nils Erik" -> "Nils Erik Berg",
|
74937
|
30 |
"Berger U." -> "Ulrich Berger",
|
|
31 |
"Bisping, Benjamin" -> "Benjamin Bisping",
|
74935
|
32 |
"Blanchette, J.C." -> "Jasmin Christian Blanchette",
|
|
33 |
"Buday Gergely István" -> "Gergely Buday",
|
74938
|
34 |
"CALaF1UJ9Uy0vGCu4WkBmbfuPDxG7nFm8hfeCMP+O3g7_5CQ0Bw@mail.gmail.com" -> "",
|
74937
|
35 |
"CRACIUN F." -> "Florin Craciun",
|
|
36 |
"Carsten Schuermann" -> "Carsten Schürmann",
|
74940
|
37 |
"Chris" -> "",
|
74935
|
38 |
"Christoph Lueth" -> "Christoph Lüth",
|
|
39 |
"Claude Marche" -> "Claude Marché",
|
74937
|
40 |
"Daniel StÃwe" -> "Daniel Stüwe",
|
74940
|
41 |
"Daniel.Matichuk@data61.csiro.au" -> "Daniel Matichuk\nDaniel.Matichuk@data61.csiro.au",
|
|
42 |
"Daniel.Matichuk@nicta.com.au" -> "Daniel Matichuk\nDaniel.Matichuk@nicta.com.au",
|
74937
|
43 |
"David MENTRE" -> "David MENTRÉ",
|
74940
|
44 |
"Dey, Katie" -> "Katie Dey",
|
74937
|
45 |
"Dr. Brendan Patrick Mahony" -> "Brendan Mahony",
|
|
46 |
"Farn" -> "Farn Wang",
|
74935
|
47 |
"Farquhar, Colin I" -> "Colin Farquhar",
|
74940
|
48 |
"Fernandez, Matthew" -> "Matthew Fernandez",
|
74937
|
49 |
"Filip Maric" -> "Filip Marić",
|
|
50 |
"Filip MariÄ" -> "Filip Marić",
|
|
51 |
"Fleury Mathias" -> "Mathias Fleury",
|
74940
|
52 |
"Francisco Jose CHAVES ALONSO" -> "Francisco Jose Chaves Alonso",
|
|
53 |
"Frederic Tuong (Dr)" -> "Frederic Tuong",
|
|
54 |
"Fulya" -> "Fulya Horozal",
|
74935
|
55 |
"George K." -> "George Karabotsos",
|
74940
|
56 |
"Gidon Ernst" -> "Gidon ERNST",
|
|
57 |
"Gransden, Thomas" -> "Thomas Gransden",
|
74937
|
58 |
"Hans-JÃrg Schurr" -> "Hans-Jörg Schurr",
|
|
59 |
"Henri DEBRAT" -> "Henri Debrat",
|
74935
|
60 |
"Hitoshi Ohsaki (RTA publicity chair)" -> "Hitoshi Ohsaki",
|
74940
|
61 |
"Häuselmann Rafael" -> "Rafael Häuselmann",
|
74937
|
62 |
"Isabelle" -> "",
|
74940
|
63 |
"J. Juhas (TUM)" -> "Jonatan Juhas",
|
74937
|
64 |
"Jackson, Vincent (Data61, Kensington NSW)" -> "Vincent Jackson",
|
|
65 |
"Janney, Mark-P26816" -> "Mark Janney",
|
74940
|
66 |
"Jean François Molderez" -> "Jean-François Molderez",
|
|
67 |
"Jean-Francois Molderez" -> "Jean-François Molderez",
|
|
68 |
"John R Harrison" -> "John Harrison",
|
74937
|
69 |
"Jose DivasÃn" -> "Jose Divasón",
|
|
70 |
"Julian" -> "",
|
|
71 |
"Julien" -> "",
|
|
72 |
"Klein, Gerwin (Data61, Kensington NSW)" -> "Gerwin Klein",
|
|
73 |
"Kobayashi, Hidetsune" -> "Hidetsune Kobayashi",
|
74940
|
74 |
"Kylie Williams (IND)" -> "Kylie Williams",
|
|
75 |
"Laarman, A.W." -> "A.W. Laarman",
|
74937
|
76 |
"Laurent Thery" -> "Laurent Théry",
|
74940
|
77 |
"Li, Chanjuan" -> "Li Chanjuan",
|
74937
|
78 |
"Lochbihler Andreas" -> "Andreas Lochbihler",
|
74940
|
79 |
"Luckhardt, Daniel" -> "Daniel Luckhardt",
|
74937
|
80 |
"Lutz Schroeder" -> "Lutz Schröder",
|
|
81 |
"Lutz SchrÃder" -> "Lutz Schröder",
|
74940
|
82 |
"MACKENZIE Carlin" -> "Carlin MACKENZIE",
|
74937
|
83 |
"Makarius" -> "Makarius Wenzel",
|
|
84 |
"Marco" -> "",
|
|
85 |
"Mark" -> "",
|
74940
|
86 |
"Markus Mueller-Olm" -> "Markus Müller-Olm",
|
74937
|
87 |
"Markus" -> "",
|
74940
|
88 |
"Marmsoler, Diego" -> "Diego Marmsoler",
|
74937
|
89 |
"Martin Klebermass" -> "Martin Klebermaß",
|
74941
|
90 |
"Martyn Johnson via RT" -> "",
|
|
91 |
"Mathias.Fleury@ens-rennes.fr" -> "Mathias Fleury\nmathias.fleury12@gmail.com",
|
74940
|
92 |
"Matthew" -> "",
|
74937
|
93 |
"Matthews, John R" -> "John Matthews",
|
74940
|
94 |
"McCarthy, Jim (C3ID)" -> "Jim McCarthy",
|
|
95 |
"McCue, Brian" -> "Brian McCue",
|
74937
|
96 |
"Michael FÃrber" -> "Michael Färber",
|
74940
|
97 |
"Michel" -> "",
|
|
98 |
"Miranda, Brando" -> "Brando Miranda",
|
|
99 |
"Moscato, Mariano M. \\(LARC-D320\\)\\[NATIONAL INSTITUTE OF AEROSPACE\\]" -> "Mariano M. Moscato",
|
|
100 |
"Mr Julian Fell" -> "Julian Fell",
|
|
101 |
"Mueller Peter" -> "Peter Müller",
|
|
102 |
"Munoz, Cesar Augusto (LARC-D320)" -> "Cesar A. Munoz",
|
|
103 |
"Nadel, Alexander" -> "Alexander Nadel",
|
|
104 |
"Nagashima, Yutaka" -> "Yutaka Nagashima",
|
74937
|
105 |
"Norrish, Michael (Data61, Acton)" -> "Michael Norrish",
|
74940
|
106 |
"O'Leary, John W" -> "John W O'Leary",
|
74937
|
107 |
"Omar Montano Rivas" -> "Omar Montaño Rivas",
|
|
108 |
"Omar MontaÃo Rivas" -> "Omar Montaño Rivas",
|
|
109 |
"OndÅej KunÄar" -> "Ondřej Kunčar",
|
74941
|
110 |
"PALMER Jake" -> "Jake Palmer",
|
74937
|
111 |
"PAQUI LUCIO" -> "Paqui Lucio",
|
74940
|
112 |
"Pal, Abhik" -> "Abhik Pal",
|
|
113 |
"Pasupuleti, Vijay" -> "Vijay Pasupuleti",
|
74937
|
114 |
"Peter Vincent Homeier" -> "Peter V. Homeier",
|
|
115 |
"Peter" -> "",
|
|
116 |
"Philipp Ruemmer" -> "Philipp Rümmer",
|
|
117 |
"Philipp RÃmmer" -> "Philipp Rümmer",
|
74941
|
118 |
"Piete Brooks via RT" -> "",
|
74940
|
119 |
"RTA publicity chair" -> "",
|
74937
|
120 |
"Raamsdonk, F. van" -> "Femke van Raamsdonk",
|
|
121 |
"Raul Gutierrez" -> "Raúl Gutiérrez",
|
|
122 |
"Renà Thiemann" -> "René Thiemann",
|
74940
|
123 |
"Ridgway, John V. E." -> "John V. E. Ridgway",
|
74937
|
124 |
"Roggenbach M." -> "Markus Roggenbach",
|
74940
|
125 |
"Rosu, Grigore" -> "Grigore Rosu",
|
74937
|
126 |
"Rozman, Mihaela" -> "Mihaela Rozman",
|
74940
|
127 |
"Schmaltz, J." -> "Julien Schmaltz",
|
74937
|
128 |
"Serguei A. Mokhov on behalf of PST-11" -> "Serguei A. Mokhov",
|
74940
|
129 |
"Serguei Mokhov" -> "Serguei A. Mokhov",
|
|
130 |
"Shumeiko, Igor" -> "Igor Shumeiko",
|
|
131 |
"Siek, Jeremy" -> "Jeremy Siek",
|
|
132 |
"Silvio.Ranise@loria.fr" -> "Silvio Ranise\nSilvio.Ranise@loria.fr",
|
|
133 |
"Siu, Tony" -> "Tony Siu",
|
74937
|
134 |
"Stüber, Sebastian" -> "Sebastian Stüber",
|
|
135 |
"Thiemann, Rene" -> "René Thiemann",
|
|
136 |
"Thiemann, René" -> "René Thiemann",
|
74940
|
137 |
"Thomas Arthur Leck Sewell" -> "Thomas Sewell",
|
74937
|
138 |
"Thomas Goethel" -> "Thomas Göthel",
|
74940
|
139 |
"Thomas.Sewell@data61.csiro.au" -> "Thomas Sewell\nThomas.Sewell@data61.csiro.au",
|
74941
|
140 |
"Tjark Weber via RT" -> "Tjark Weber",
|
74940
|
141 |
"Toby.Murray@data61.csiro.au" -> "Toby Murray\nToby.Murray@data61.csiro.au",
|
74935
|
142 |
"Urban, Christian" -> "Christian Urban",
|
74940
|
143 |
"Ursula Eschbach" -> "",
|
|
144 |
"Van Staden Stephan" -> "Stephan van Staden",
|
74937
|
145 |
"Viktor Kuncak" -> "Viktor Kunčak",
|
|
146 |
"Viorel Preoteasaa" -> "Viorel Preoteasa",
|
74940
|
147 |
"Wickerson, John P" -> "John Wickerson",
|
|
148 |
"Wong, Yat" -> "Yat Wong",
|
|
149 |
"YAMADA, Akihisa" -> "Akihisa Yamada",
|
74937
|
150 |
"YliÃs Falcone" -> "Yliès Falcone",
|
74940
|
151 |
"amir mohajeri" -> "Amir Mohajeri",
|
|
152 |
"aniello murano" -> "Aniello Murano",
|
|
153 |
"barzan stefania" -> "Stefania Barzan",
|
|
154 |
"benhamou" -> "Belaid Benhamou",
|
|
155 |
"charmi panchal" -> "Charmi Panchal",
|
|
156 |
"chen kun" -> "Chen Kun",
|
|
157 |
"chunhan wu" -> "Chunhan Wu",
|
|
158 |
"daniel de la concepción sáez" -> "Daniel de la Concepción Sáez",
|
|
159 |
"daniel.luckhardt@mathematik.uni-goettingen.de" -> "Logiker@gmx.net",
|
|
160 |
"david streader" -> "David Streader",
|
|
161 |
"eschbach@in.tum.de" -> "",
|
|
162 |
"f.rabe@jacobs-university.de" -> "florian.rabe@fau.de",
|
|
163 |
"florian@haftmann-online.de" -> "haftmann@in.tum.de",
|
|
164 |
"fredegar@haftmann-online.de" -> "haftmann@in.tum.de",
|
74937
|
165 |
"gallais @ ensl.org" -> "Guillaume Allais",
|
74940
|
166 |
"geng chen" -> "Geng Chen",
|
|
167 |
"henning.seidler" -> "Henning Seidler",
|
74937
|
168 |
"hkb" -> "Hidetsune Kobayashi",
|
74941
|
169 |
"jobs-pm@inf.ethz.ch" -> "",
|
74937
|
170 |
"julien@RadboudUniversity" -> "",
|
74940
|
171 |
"jun sun" -> "Jun Sun",
|
|
172 |
"jwang whu.edu.cn (jwang)" -> "jwang",
|
|
173 |
"kostas pouliasis" -> "Kostas Pouliasis",
|
|
174 |
"kristof.teichel@ptb.de" -> "Kristof Teichel\nkristof.teichel@ptb.de",
|
|
175 |
"lucas cavalcante" -> "Lucas Cavalcante",
|
|
176 |
"mahmoud abdelazim" -> "Mahmoud Abdelazim",
|
|
177 |
"manish surolia" -> "Manish Surolia",
|
74937
|
178 |
"mantel" -> "Heiko Mantel",
|
74940
|
179 |
"marco caminati" -> "Marco Caminati",
|
74941
|
180 |
"mathias.fleury@ens-rennes.fr" -> "Mathias Fleury\nmathias.fleury12@gmail.com",
|
74937
|
181 |
"merz@loria.fr" -> "stephan.merz@loria.fr",
|
74940
|
182 |
"michel levy" -> "Michel Levy",
|
|
183 |
"michel.levy2009@laposte.net" -> "Michel Levy\nmichel.levy2009@laposte.net",
|
74937
|
184 |
"nemouchi" -> "Yakoub Nemouchi",
|
74941
|
185 |
"noam neer" -> "Noam Neer",
|
|
186 |
"olfa mraihi" -> "Olfa Mraihi",
|
|
187 |
"pathsnottakenworkshop@gmail.com" -> "Leo Freitas\nleo.freitas@newcastle.ac.uk",
|
74940
|
188 |
"patrick barlatier" -> "Patrick Barlatier",
|
|
189 |
"patrick dabou" -> "Patrick Dabou",
|
|
190 |
"paul zimmermann" -> "Paul Zimmermann",
|
74937
|
191 |
"popescu2@illinois.edu" -> "Andrei Popescu",
|
74940
|
192 |
"recruiting" -> "",
|
74941
|
193 |
"recruiting@mais.informatik.tu-darmstadt.de" -> "",
|
74940
|
194 |
"roux cody" -> "Cody Roux",
|
|
195 |
"scott constable" -> "Scott Constable",
|
|
196 |
"superuser@mattweidner.com" -> "Matthew Weidner\nsuperuser@mattweidner.com",
|
|
197 |
"urban@math.lmu.de" -> "Christian Urban\nurban@math.lmu.de",
|
74937
|
198 |
"veronique.cortier@loria.fr" -> "Veronique.Cortier@loria.fr",
|
74940
|
199 |
"vikram singh" -> "Vikram Singh",
|
74937
|
200 |
"wenzelm@in.tum.de" -> "makarius@sketis.net",
|
74940
|
201 |
"werner@lix.polytechnique.fr" -> "Benjamin Werner\nwerner@lix.polytechnique.fr",
|
|
202 |
"wmansky@cs.princeton.edu" -> "William Mansky\nwmansky@cs.princeton.edu",
|
|
203 |
"y.nemouchi@ensbiotech.edu.dz" -> "Yakoub Nemouchi\ny.nemouchi@ensbiotech.edu.dz",
|
74937
|
204 |
"ÐÑÐÐÐÑÐÐÐ ÐÐÐÐÐÐÐÑÐÐÐÑ ÐÐÐÑÐÐÐ" -> "",
|
|
205 |
"∀X.Xπ - Tutorials about Proofs" -> "Bruno Woltzenlogel Paleo",
|
74935
|
206 |
).withDefault(identity)
|
|
207 |
|
74936
|
208 |
def standard_author_info(author_info: List[String]): List[String] =
|
74940
|
209 |
author_info.flatMap(s => split_lines(standard_name.getOrElse(s, s))).distinct
|
74936
|
210 |
|
74921
|
211 |
sealed case class Message(
|
|
212 |
name: String,
|
|
213 |
date: Date,
|
|
214 |
title: String,
|
74936
|
215 |
author_info: List[String],
|
74934
|
216 |
body: String,
|
75393
|
217 |
tags: List[String]
|
|
218 |
) {
|
74936
|
219 |
if (author_info.isEmpty || author_info.exists(_.isEmpty)) {
|
|
220 |
error("Bad author information in " + quote(name))
|
74935
|
221 |
}
|
|
222 |
|
74925
|
223 |
override def toString: String = name
|
74935
|
224 |
}
|
74906
|
225 |
|
75393
|
226 |
object Messages {
|
74935
|
227 |
type Graph = isabelle.Graph[String, Message]
|
|
228 |
|
75393
|
229 |
def apply(msgs: List[Message]): Messages = {
|
74936
|
230 |
def make_node(g: Graph, node: String, msg: Message): Graph =
|
|
231 |
if (g.defined(node) && Date.Ordering.compare(g.get_node(node).date, msg.date) >= 0) g
|
74935
|
232 |
else g.default_node(node, msg)
|
|
233 |
|
74936
|
234 |
def connect_nodes(g: Graph, nodes: List[String]): Graph =
|
|
235 |
nodes match {
|
|
236 |
case Nil => g
|
|
237 |
case a :: bs => bs.foldLeft(g)({ case (g1, b) => g1.add_edge(a, b).add_edge(b, a) })
|
|
238 |
}
|
|
239 |
|
74935
|
240 |
new Messages(msgs.sortBy(_.date)(Date.Ordering),
|
|
241 |
msgs.foldLeft[Graph](Graph.string)(
|
74936
|
242 |
{ case (graph, msg) =>
|
|
243 |
val nodes = msg.author_info
|
|
244 |
connect_nodes(nodes.foldLeft(graph)(make_node(_, _, msg)), nodes)
|
74935
|
245 |
}))
|
|
246 |
}
|
74934
|
247 |
|
75393
|
248 |
def find(dir: Path): Messages = {
|
74934
|
249 |
val msgs =
|
|
250 |
for {
|
|
251 |
archive <- List(Isabelle_Users, Isabelle_Dev)
|
|
252 |
msg <- archive.find_messages(dir + Path.basic(archive.list_name))
|
|
253 |
} yield msg
|
|
254 |
Messages(msgs)
|
|
255 |
}
|
74935
|
256 |
|
75393
|
257 |
sealed case class Cluster(author_info: List[String]) {
|
74936
|
258 |
val (addresses, names) = author_info.partition(is_address)
|
|
259 |
|
74935
|
260 |
val name: String =
|
|
261 |
names.headOption getOrElse {
|
|
262 |
addresses match {
|
|
263 |
case a :: _ => a.substring(0, a.indexOf('@')).replace('.', ' ')
|
|
264 |
case Nil => error("Empty cluster")
|
|
265 |
}
|
|
266 |
}
|
|
267 |
|
74940
|
268 |
val name_lowercase: String = Word.lowercase(name)
|
|
269 |
|
74935
|
270 |
def get_address: Option[String] = addresses.headOption
|
|
271 |
|
|
272 |
def unique: Boolean = names.length == 1 && addresses.length == 1
|
|
273 |
def multi: Boolean = names.length > 1 || addresses.length > 1
|
|
274 |
|
75393
|
275 |
def print: String = {
|
74935
|
276 |
val entries = names ::: (if (addresses.isEmpty) Nil else List("")) ::: addresses
|
|
277 |
entries.mkString("\n * ", "\n ", "")
|
|
278 |
}
|
|
279 |
}
|
74933
|
280 |
}
|
|
281 |
|
75393
|
282 |
class Messages private(val sorted: List[Message], val graph: Messages.Graph) {
|
74935
|
283 |
override def toString: String = "Messages(" + sorted.size + ")"
|
74925
|
284 |
|
75393
|
285 |
object Node_Ordering extends scala.math.Ordering[String] {
|
74935
|
286 |
override def compare(a: String, b: String): Int =
|
|
287 |
Date.Rev_Ordering.compare(graph.get_node(a).date, graph.get_node(b).date)
|
|
288 |
}
|
|
289 |
|
|
290 |
def get_cluster(msg: Message): Messages.Cluster =
|
74936
|
291 |
Messages.Cluster(graph.all_succs(msg.author_info).sorted.sorted(Node_Ordering))
|
74935
|
292 |
|
|
293 |
def get_name(msg: Message): String = get_cluster(msg).name
|
74925
|
294 |
|
74935
|
295 |
def get_address(msg: Message): String =
|
|
296 |
get_cluster(msg).get_address getOrElse error("No author address for " + quote(msg.name))
|
|
297 |
|
75393
|
298 |
def check(check_all: Boolean, check_multi: Boolean = false): Unit = {
|
74940
|
299 |
val clusters = sorted.map(get_cluster).distinct.sortBy(_.name_lowercase)
|
74935
|
300 |
|
74937
|
301 |
if (check_all) {
|
|
302 |
Output.writeln(cat_lines("clusters:" :: clusters.map(_.print)))
|
|
303 |
}
|
|
304 |
else {
|
|
305 |
val multi = if (check_multi) clusters.filter(_.multi) else Nil
|
|
306 |
if (multi.nonEmpty) {
|
|
307 |
Output.writeln(cat_lines("ambiguous clusters:" :: multi.map(_.print)))
|
|
308 |
}
|
74925
|
309 |
}
|
74933
|
310 |
|
74935
|
311 |
val unknown = clusters.filter(cluster => cluster.get_address.isEmpty)
|
|
312 |
if (unknown.nonEmpty) {
|
74937
|
313 |
Output.writeln(cat_lines("\nunknown mail:" :: unknown.map(_.print)))
|
74933
|
314 |
}
|
74925
|
315 |
}
|
|
316 |
}
|
|
317 |
|
|
318 |
|
74921
|
319 |
/* mailing list archives */
|
|
320 |
|
|
321 |
abstract class Archive(
|
|
322 |
url: URL,
|
|
323 |
name: String = "",
|
75393
|
324 |
tag: String = ""
|
|
325 |
) {
|
74921
|
326 |
def message_regex: Regex
|
74923
|
327 |
def message_content(name: String, lines: List[String]): Message
|
74921
|
328 |
|
74924
|
329 |
def message_match(lines: List[String], re: Regex): Option[Match] =
|
|
330 |
lines.flatMap(re.findFirstMatchIn(_)).headOption
|
|
331 |
|
75393
|
332 |
def make_name(str: String): String = {
|
74937
|
333 |
val s =
|
|
334 |
str.trim.replaceAll("""\s+""", " ").replaceAll(" at ", "@")
|
|
335 |
.replace("mailbroy.informatik.tu-muenchen.de", "in.tum.de")
|
|
336 |
.replace("informatik.tu-muenchen.de", "in.tum.de")
|
74935
|
337 |
if (s.startsWith("=") && s.endsWith("=")) "" else s
|
|
338 |
}
|
74924
|
339 |
|
|
340 |
def make_body(lines: List[String]): String =
|
|
341 |
cat_lines(Library.take_suffix[String](_.isEmpty, lines)._1)
|
|
342 |
|
74921
|
343 |
private val main_url: URL =
|
|
344 |
Url(Library.take_suffix[Char](_ == '/', Url.trim_index(url).toString.toList)._1.mkString + "/")
|
|
345 |
|
|
346 |
private val main_html = Url.read(main_url)
|
|
347 |
|
75393
|
348 |
val list_name: String = {
|
74921
|
349 |
val title =
|
|
350 |
"""<title>The ([^</>]*) Archives</title>""".r.findFirstMatchIn(main_html).map(_.group(1))
|
|
351 |
(proper_string(name) orElse title).getOrElse(error("Failed to determine mailing list name"))
|
|
352 |
}
|
72558
|
353 |
override def toString: String = list_name
|
|
354 |
|
74923
|
355 |
def full_name(href: String): String = list_name + "/" + href
|
|
356 |
|
74921
|
357 |
def list_tag: String = proper_string(tag).getOrElse(list_name)
|
|
358 |
|
|
359 |
def read_text(href: String): String = Url.read(new URL(main_url, href))
|
|
360 |
|
|
361 |
def hrefs_text: List[String] =
|
|
362 |
"""href="([^"]+\.txt(?:\.gz)?)"""".r.findAllMatchIn(main_html).map(_.group(1)).toList
|
|
363 |
|
|
364 |
def hrefs_msg: List[String] =
|
74906
|
365 |
(for {
|
74921
|
366 |
href <- """href="([^"]+)/date.html"""".r.findAllMatchIn(main_html).map(_.group(1))
|
|
367 |
html = read_text(href + "/date.html")
|
|
368 |
msg <- message_regex.findAllMatchIn(html).map(_.group(1))
|
74906
|
369 |
} yield href + "/" + msg).toList
|
|
370 |
|
75393
|
371 |
def get(target_dir: Path, href: String, progress: Progress = new Progress): Option[Path] = {
|
72558
|
372 |
val dir = target_dir + Path.basic(list_name)
|
74906
|
373 |
val path = dir + Path.explode(href)
|
74921
|
374 |
val url = new URL(main_url, href)
|
74906
|
375 |
val connection = url.openConnection
|
|
376 |
try {
|
|
377 |
val length = connection.getContentLengthLong
|
|
378 |
val timestamp = connection.getLastModified
|
|
379 |
val file = path.file
|
|
380 |
if (file.isFile && file.length == length && file.lastModified == timestamp) None
|
|
381 |
else {
|
|
382 |
Isabelle_System.make_directory(path.dir)
|
|
383 |
progress.echo("Getting " + url)
|
|
384 |
val bytes =
|
|
385 |
using(connection.getInputStream)(Bytes.read_stream(_, hint = length.toInt max 1024))
|
|
386 |
Bytes.write(file, bytes)
|
|
387 |
file.setLastModified(timestamp)
|
|
388 |
Some(path)
|
|
389 |
}
|
|
390 |
}
|
|
391 |
finally { connection.getInputStream.close() }
|
|
392 |
}
|
72558
|
393 |
|
74906
|
394 |
def download_text(target_dir: Path, progress: Progress = new Progress): List[Path] =
|
|
395 |
hrefs_text.flatMap(get(target_dir, _, progress = progress))
|
|
396 |
|
|
397 |
def download_msg(target_dir: Path, progress: Progress = new Progress): List[Path] =
|
|
398 |
hrefs_msg.flatMap(get(target_dir, _, progress = progress))
|
74907
|
399 |
|
|
400 |
def download(target_dir: Path, progress: Progress = new Progress): List[Path] =
|
|
401 |
download_text(target_dir, progress = progress) :::
|
|
402 |
download_msg(target_dir, progress = progress)
|
74921
|
403 |
|
75393
|
404 |
def make_title(str: String): String = {
|
74921
|
405 |
val Trim1 = ("""\s*\Q[""" + list_tag + """]\E\s*(.*)""").r
|
|
406 |
val Trim2 = """(?i:(?:re|fw|fwd)\s*:\s*)(.*)""".r
|
|
407 |
val Trim3 = """\[\s*(.*?)\s*\]""".r
|
|
408 |
@tailrec def trim(s: String): String =
|
|
409 |
s match {
|
|
410 |
case Trim1(s1) => trim(s1)
|
|
411 |
case Trim2(s1) => trim(s1)
|
|
412 |
case Trim3(s1) => trim(s1)
|
|
413 |
case _ => s
|
|
414 |
}
|
|
415 |
trim(str)
|
|
416 |
}
|
|
417 |
|
|
418 |
def get_messages(): List[Message] =
|
|
419 |
for (href <- hrefs_msg) yield message_content(href, split_lines(read_text(href)))
|
|
420 |
|
75393
|
421 |
def find_messages(dir: Path): List[Message] = {
|
74921
|
422 |
for {
|
|
423 |
file <- File.find_files(dir.file, file => file.getName.endsWith(".html"))
|
|
424 |
rel_path <- File.relative_path(dir, File.path(file))
|
74923
|
425 |
}
|
|
426 |
yield {
|
|
427 |
val name = full_name(rel_path.implode)
|
|
428 |
message_content(name, split_lines(File.read(file)))
|
|
429 |
}
|
74921
|
430 |
}
|
72558
|
431 |
}
|
72559
|
432 |
|
75393
|
433 |
private class Message_Chunk(bg: String = "", en: String = "") {
|
|
434 |
def unapply(lines: List[String]): Option[List[String]] = {
|
74932
|
435 |
val res1 =
|
|
436 |
if (bg.isEmpty) Some(lines)
|
|
437 |
else {
|
|
438 |
lines.dropWhile(_ != bg) match {
|
|
439 |
case Nil => None
|
|
440 |
case _ :: rest => Some(rest)
|
|
441 |
}
|
|
442 |
}
|
|
443 |
if (en.isEmpty) res1
|
|
444 |
else {
|
|
445 |
res1 match {
|
|
446 |
case None => None
|
|
447 |
case Some(lines1) =>
|
|
448 |
val lines2 = lines1.takeWhile(_ != en)
|
|
449 |
if (lines1.drop(lines2.length).isEmpty) None else Some(lines2)
|
|
450 |
}
|
|
451 |
}
|
|
452 |
}
|
|
453 |
|
|
454 |
def get(lines: List[String]): List[String] =
|
|
455 |
unapply(lines) getOrElse
|
|
456 |
error("Missing delimiters:" +
|
|
457 |
(if (bg.nonEmpty) " " else "") + bg +
|
|
458 |
(if (en.nonEmpty) " " else "") + en)
|
|
459 |
}
|
|
460 |
|
72559
|
461 |
|
74930
|
462 |
/* isabelle-users mailing list */
|
72559
|
463 |
|
74921
|
464 |
object Isabelle_Users extends Archive(
|
|
465 |
Url("https://lists.cam.ac.uk/pipermail/cl-isabelle-users"),
|
75393
|
466 |
name = "isabelle-users", tag = "isabelle"
|
|
467 |
) {
|
74921
|
468 |
override def message_regex: Regex = """<li><a name="\d+" href="(msg\d+\.html)">""".r
|
|
469 |
|
|
470 |
private object Head extends
|
74922
|
471 |
Message_Chunk(bg = "<!--X-Head-of-Message-->", en = "<!--X-Head-of-Message-End-->")
|
74921
|
472 |
|
|
473 |
private object Body extends
|
74922
|
474 |
Message_Chunk(bg = "<!--X-Body-of-Message-->", en = "<!--X-Body-of-Message-End-->")
|
|
475 |
|
75393
|
476 |
private object Date_Format {
|
74922
|
477 |
private val Trim1 = """\w+,\s*(.*)""".r
|
|
478 |
private val Trim2 = """(.*?)\s*\(.*\)""".r
|
|
479 |
private val Format =
|
|
480 |
Date.Format(
|
|
481 |
"d MMM uuuu H:m:s Z",
|
|
482 |
"d MMM uuuu H:m:s z",
|
|
483 |
"d MMM yy H:m:s Z",
|
|
484 |
"d MMM yy H:m:s z")
|
75393
|
485 |
def unapply(s: String): Option[Date] = {
|
74922
|
486 |
val s0 = s.replaceAll("""\s+""", " ")
|
|
487 |
val s1 =
|
|
488 |
s0 match {
|
|
489 |
case Trim1(s1) => s1
|
|
490 |
case _ => s0
|
|
491 |
}
|
|
492 |
val s2 =
|
|
493 |
s1 match {
|
|
494 |
case Trim2(s2) => s2
|
|
495 |
case _ => s1
|
|
496 |
}
|
|
497 |
Format.unapply(s2)
|
|
498 |
}
|
|
499 |
}
|
74921
|
500 |
|
75393
|
501 |
override def make_name(str: String): String = {
|
74935
|
502 |
val s = Library.perhaps_unsuffix(" via Cl-isabelle-users", super.make_name(str))
|
74925
|
503 |
if (s == "cl-isabelle-users@lists.cam.ac.uk") "" else s
|
|
504 |
}
|
|
505 |
|
75393
|
506 |
object Address {
|
74924
|
507 |
private def anchor(s: String): String = """<a href="[^"]*">""" + s + """</a>"""
|
|
508 |
private def angl(s: String): String = """<""" + s + """>"""
|
|
509 |
private def quot(s: String): String = """"""" + s + """""""
|
|
510 |
private def paren(s: String): String = """\(""" + s + """\)"""
|
|
511 |
private val adr = """([^<>]*? at [^<>]*?)"""
|
|
512 |
private val any = """([^<>]*?)"""
|
|
513 |
private val spc = """\s*"""
|
|
514 |
|
|
515 |
private val Name1 = quot(anchor(any)).r
|
|
516 |
private val Name2 = quot(any).r
|
|
517 |
private val Name_Adr1 = (quot(anchor(any)) + spc + angl(anchor(adr))).r
|
|
518 |
private val Name_Adr2 = (quot(any) + spc + angl(anchor(adr))).r
|
|
519 |
private val Name_Adr3 = (anchor(any) + spc + angl(anchor(adr))).r
|
|
520 |
private val Name_Adr4 = (any + spc + angl(anchor(adr))).r
|
|
521 |
private val Adr_Name1 = (angl(anchor(adr)) + spc + paren(any)).r
|
|
522 |
private val Adr_Name2 = (anchor(adr) + spc + paren(any)).r
|
|
523 |
private val Adr1 = angl(anchor(adr)).r
|
|
524 |
private val Adr2 = anchor(adr).r
|
|
525 |
|
74936
|
526 |
def parse(s: String): List[String] =
|
74924
|
527 |
s match {
|
74936
|
528 |
case Name1(a) => List(a)
|
|
529 |
case Name2(a) => List(a)
|
|
530 |
case Name_Adr1(a, b) => List(a, b)
|
|
531 |
case Name_Adr2(a, b) => List(a, b)
|
|
532 |
case Name_Adr3(a, b) => List(a, b)
|
|
533 |
case Name_Adr4(a, b) => List(a, b)
|
|
534 |
case Adr_Name1(b, a) => List(a, b)
|
|
535 |
case Adr_Name2(b, a) => List(a, b)
|
|
536 |
case Adr1(a) => List(a)
|
|
537 |
case Adr2(a) => List(a)
|
|
538 |
case _ => Nil
|
74924
|
539 |
}
|
|
540 |
}
|
|
541 |
|
75393
|
542 |
override def message_content(name: String, lines: List[String]): Message = {
|
74921
|
543 |
def err(msg: String = ""): Nothing =
|
74923
|
544 |
error("Malformed message: " + name + (if (msg.isEmpty) "" else "\n" + msg))
|
74921
|
545 |
|
|
546 |
val (head, body) =
|
74924
|
547 |
try { (Head.get(lines), make_body(Body.get(lines))) }
|
74921
|
548 |
catch { case ERROR(msg) => err(msg) }
|
72559
|
549 |
|
74921
|
550 |
val date =
|
|
551 |
message_match(head, """<li><em>Date</em>:\s*(.*?)\s*</li>""".r)
|
|
552 |
.map(m => HTML.input(m.group(1))) match
|
|
553 |
{
|
|
554 |
case Some(Date_Format(d)) => d
|
|
555 |
case Some(s) => err("Malformed Date: " + quote(s))
|
|
556 |
case None => err("Missing Date")
|
|
557 |
}
|
|
558 |
|
|
559 |
val title =
|
74924
|
560 |
make_title(
|
|
561 |
HTML.input(message_match(head, """<li><em>Subject</em>:\s*(.*)</li>""".r)
|
|
562 |
.getOrElse(err("Missing Subject")).group(1)))
|
74921
|
563 |
|
74938
|
564 |
def parse_author_info(re: Regex): List[String] =
|
|
565 |
message_match(head, re) match {
|
|
566 |
case None => Nil
|
|
567 |
case Some(m) => Address.parse(m.group(1)).map(a => HTML.input(make_name(a)))
|
|
568 |
}
|
|
569 |
|
74936
|
570 |
val author_info =
|
74938
|
571 |
(parse_author_info("""<li><em>From</em>:\s*(.*?)\s*</li>""".r) :::
|
|
572 |
parse_author_info("""<li><em>Reply-to</em>:\s*(.*?)\s*</li>""".r))
|
|
573 |
.distinct.filter(_.nonEmpty)
|
|
574 |
|
|
575 |
if (author_info.isEmpty) err("Malformed author information")
|
74921
|
576 |
|
74934
|
577 |
val tags = List(list_name)
|
|
578 |
|
74936
|
579 |
Message(name, date, title, standard_author_info(author_info), body, tags)
|
74921
|
580 |
}
|
|
581 |
}
|
|
582 |
|
74930
|
583 |
|
|
584 |
/* isabelle-dev mailing list */
|
|
585 |
|
75393
|
586 |
object Isabelle_Dev extends Archive(Url("https://mailmanbroy.in.tum.de/pipermail/isabelle-dev")) {
|
74921
|
587 |
override def message_regex: Regex = """<LI><A HREF="(\d+\.html)">""".r
|
|
588 |
|
74922
|
589 |
private object Head extends Message_Chunk(en = "<!--beginarticle-->")
|
|
590 |
private object Body extends Message_Chunk(bg = "<!--beginarticle-->", en = "<!--endarticle-->")
|
|
591 |
|
75393
|
592 |
private object Date_Format {
|
74922
|
593 |
val Format = Date.Format("E MMM d H:m:s z uuuu")
|
|
594 |
def unapply(s: String): Option[Date] = Format.unapply(s.replaceAll("""\s+""", " "))
|
|
595 |
}
|
|
596 |
|
75393
|
597 |
override def message_content(name: String, lines: List[String]): Message = {
|
74922
|
598 |
def err(msg: String = ""): Nothing =
|
74923
|
599 |
error("Malformed message: " + name + (if (msg.isEmpty) "" else "\n" + msg))
|
74922
|
600 |
|
|
601 |
val (head, body) =
|
74924
|
602 |
try { (Head.get(lines), make_body(Body.get(lines))) }
|
74922
|
603 |
catch { case ERROR(msg) => err(msg) }
|
|
604 |
|
|
605 |
val date =
|
|
606 |
message_match(head, """\s*<I>(.*)</I>""".r).map(m => HTML.input(m.group(1))) match {
|
|
607 |
case Some(Date_Format(d)) => d
|
|
608 |
case Some(s) => err("Malformed Date: " + quote(s))
|
|
609 |
case None => err("Missing Date")
|
|
610 |
}
|
|
611 |
|
75393
|
612 |
val (title, author_address) = {
|
74922
|
613 |
message_match(head, """TITLE="([^"]+)">(.*)""".r) match {
|
74931
|
614 |
case Some(m) => (make_title(HTML.input(m.group(1))), make_name(HTML.input(m.group(2))))
|
74922
|
615 |
case None => err("Missing TITLE")
|
|
616 |
}
|
|
617 |
}
|
|
618 |
|
|
619 |
val author_name =
|
|
620 |
message_match(head, """\s*<B>(.*)</B>""".r) match {
|
|
621 |
case None => err("Missing author")
|
74931
|
622 |
case Some(m) =>
|
|
623 |
val a = make_name(HTML.input(m.group(1)))
|
|
624 |
if (a == author_address) "" else a
|
74922
|
625 |
}
|
|
626 |
|
74936
|
627 |
val author_info = List(author_name, author_address)
|
74934
|
628 |
val tags = List(list_name)
|
|
629 |
|
74936
|
630 |
Message(name, date, title, standard_author_info(author_info), body, tags)
|
74922
|
631 |
}
|
74921
|
632 |
}
|
72558
|
633 |
}
|