--- a/src/Pure/General/mailman.scala Fri Dec 10 16:55:42 2021 +0100
+++ b/src/Pure/General/mailman.scala Fri Dec 10 18:56:18 2021 +0100
@@ -9,65 +9,89 @@
import java.net.URL
+import scala.util.matching.Regex
+
object Mailman
{
/* mailing list archives */
- def archive(url: URL, name: String = ""): Archive =
+ def archive(url: URL, msg_format: Msg_Format, name: String = ""): Archive =
{
- val html = Url.read(url)
+ val list_url =
+ Url(Library.take_suffix[Char](_ == '/', Url.trim_index(url).toString.toList)._1.mkString + "/")
+
+ val html = Url.read(list_url)
val title =
"""<title>The ([^</>]*) Archives</title>""".r.findFirstMatchIn(html).map(_.group(1))
val hrefs_text =
"""href="([^"]+\.txt(?:\.gz)?)"""".r.findAllMatchIn(html).map(_.group(1)).toList
- val list_url =
- Url(Library.take_suffix[Char](_ == '/', Url.trim_index(url).toString.toList)._1.mkString + "/")
val list_name =
(proper_string(name) orElse title).getOrElse(error("Failed to determine mailing list name"))
- new Archive(list_url, list_name, hrefs_text)
+ new Archive(list_url, list_name, msg_format, hrefs_text)
}
- class Archive private[Mailman](val list_url: URL, val list_name: String, hrefs_text: List[String])
+ abstract class Msg_Format
+ {
+ def regex: Regex
+ }
+
+ class Archive private[Mailman](
+ val list_url: URL,
+ val list_name: String,
+ msg_format: Msg_Format,
+ hrefs_text: List[String])
{
override def toString: String = list_name
- def download_text(target_dir: Path, progress: Progress = new Progress): List[Path] =
+ private def hrefs_msg: List[String] =
+ (for {
+ href <- """href="([^"]+)/date.html"""".r.findAllMatchIn(Url.read(list_url)).map(_.group(1))
+ html = Url.read(new URL(list_url, href + "/date.html"))
+ msg <- msg_format.regex.findAllMatchIn(html).map(_.group(1))
+ } yield href + "/" + msg).toList
+
+ def get(target_dir: Path, href: String, progress: Progress = new Progress): Option[Path] =
{
val dir = target_dir + Path.basic(list_name)
- Isabelle_System.make_directory(dir)
+ val path = dir + Path.explode(href)
+ val url = new URL(list_url, href)
+ val connection = url.openConnection
+ try {
+ val length = connection.getContentLengthLong
+ val timestamp = connection.getLastModified
+ val file = path.file
+ if (file.isFile && file.length == length && file.lastModified == timestamp) None
+ else {
+ Isabelle_System.make_directory(path.dir)
+ progress.echo("Getting " + url)
+ val bytes =
+ using(connection.getInputStream)(Bytes.read_stream(_, hint = length.toInt max 1024))
+ Bytes.write(file, bytes)
+ file.setLastModified(timestamp)
+ Some(path)
+ }
+ }
+ finally { connection.getInputStream.close() }
+ }
- hrefs_text.flatMap(name =>
- {
- val path = dir + Path.basic(name)
- val url = new URL(list_url, name)
- val connection = url.openConnection
- try {
- val length = connection.getContentLengthLong
- val timestamp = connection.getLastModified
- val file = path.file
- if (file.isFile && file.length == length && file.lastModified == timestamp) None
- else {
- progress.echo("Getting " + url)
- val bytes =
- using(connection.getInputStream)(Bytes.read_stream(_, hint = length.toInt max 1024))
- Bytes.write(file, bytes)
- file.setLastModified(timestamp)
- Some(path)
- }
- }
- finally { connection.getInputStream.close() }
- })
- }
+ def download_text(target_dir: Path, progress: Progress = new Progress): List[Path] =
+ hrefs_text.flatMap(get(target_dir, _, progress = progress))
+
+ def download_msg(target_dir: Path, progress: Progress = new Progress): List[Path] =
+ hrefs_msg.flatMap(get(target_dir, _, progress = progress))
}
/* Isabelle mailing lists */
def isabelle_users: Archive =
- archive(Url("https://lists.cam.ac.uk/pipermail/cl-isabelle-users"), name = "isabelle-users")
+ archive(Url("https://lists.cam.ac.uk/pipermail/cl-isabelle-users"), name = "isabelle-users",
+ msg_format =
+ new Msg_Format { val regex: Regex = """<li><a name="\d+" href="(msg\d+\.html)">""".r })
def isabelle_dev: Archive =
- archive(Url("https://mailmanbroy.in.tum.de/pipermail/isabelle-dev"))
+ archive(Url("https://mailmanbroy.in.tum.de/pipermail/isabelle-dev"),
+ new Msg_Format { val regex: Regex = """<LI><A HREF="(\d+\.html)">""".r })
}