# HG changeset patch # User wenzelm # Date 1639158978 -3600 # Node ID 9702913db56c636f74de513e5e5e9a127e755d96 # Parent 246e220681413a9cf2abbe3e1c48042df43443a5 more Mailman content; diff -r 246e22068141 -r 9702913db56c src/Pure/General/mailman.scala --- a/src/Pure/General/mailman.scala Fri Dec 10 16:55:42 2021 +0100 +++ b/src/Pure/General/mailman.scala Fri Dec 10 18:56:18 2021 +0100 @@ -9,65 +9,89 @@ import java.net.URL +import scala.util.matching.Regex + object Mailman { /* mailing list archives */ - def archive(url: URL, name: String = ""): Archive = + def archive(url: URL, msg_format: Msg_Format, name: String = ""): Archive = { - val html = Url.read(url) + val list_url = + Url(Library.take_suffix[Char](_ == '/', Url.trim_index(url).toString.toList)._1.mkString + "/") + + val html = Url.read(list_url) val title = """The ([^</>]*) Archives""".r.findFirstMatchIn(html).map(_.group(1)) val hrefs_text = """href="([^"]+\.txt(?:\.gz)?)"""".r.findAllMatchIn(html).map(_.group(1)).toList - val list_url = - Url(Library.take_suffix[Char](_ == '/', Url.trim_index(url).toString.toList)._1.mkString + "/") val list_name = (proper_string(name) orElse title).getOrElse(error("Failed to determine mailing list name")) - new Archive(list_url, list_name, hrefs_text) + new Archive(list_url, list_name, msg_format, hrefs_text) } - class Archive private[Mailman](val list_url: URL, val list_name: String, hrefs_text: List[String]) + abstract class Msg_Format + { + def regex: Regex + } + + class Archive private[Mailman]( + val list_url: URL, + val list_name: String, + msg_format: Msg_Format, + hrefs_text: List[String]) { override def toString: String = list_name - def download_text(target_dir: Path, progress: Progress = new Progress): List[Path] = + private def hrefs_msg: List[String] = + (for { + href <- """href="([^"]+)/date.html"""".r.findAllMatchIn(Url.read(list_url)).map(_.group(1)) + html = Url.read(new URL(list_url, href + "/date.html")) + msg <- msg_format.regex.findAllMatchIn(html).map(_.group(1)) + } yield href + "/" + msg).toList + + def get(target_dir: Path, href: String, progress: Progress = new Progress): Option[Path] = { val dir = target_dir + Path.basic(list_name) - Isabelle_System.make_directory(dir) + val path = dir + Path.explode(href) + val url = new URL(list_url, href) + val connection = url.openConnection + try { + val length = connection.getContentLengthLong + val timestamp = connection.getLastModified + val file = path.file + if (file.isFile && file.length == length && file.lastModified == timestamp) None + else { + Isabelle_System.make_directory(path.dir) + progress.echo("Getting " + url) + val bytes = + using(connection.getInputStream)(Bytes.read_stream(_, hint = length.toInt max 1024)) + Bytes.write(file, bytes) + file.setLastModified(timestamp) + Some(path) + } + } + finally { connection.getInputStream.close() } + } - hrefs_text.flatMap(name => - { - val path = dir + Path.basic(name) - val url = new URL(list_url, name) - val connection = url.openConnection - try { - val length = connection.getContentLengthLong - val timestamp = connection.getLastModified - val file = path.file - if (file.isFile && file.length == length && file.lastModified == timestamp) None - else { - progress.echo("Getting " + url) - val bytes = - using(connection.getInputStream)(Bytes.read_stream(_, hint = length.toInt max 1024)) - Bytes.write(file, bytes) - file.setLastModified(timestamp) - Some(path) - } - } - finally { connection.getInputStream.close() } - }) - } + def download_text(target_dir: Path, progress: Progress = new Progress): List[Path] = + hrefs_text.flatMap(get(target_dir, _, progress = progress)) + + def download_msg(target_dir: Path, progress: Progress = new Progress): List[Path] = + hrefs_msg.flatMap(get(target_dir, _, progress = progress)) } /* Isabelle mailing lists */ def isabelle_users: Archive = - archive(Url("https://lists.cam.ac.uk/pipermail/cl-isabelle-users"), name = "isabelle-users") + archive(Url("https://lists.cam.ac.uk/pipermail/cl-isabelle-users"), name = "isabelle-users", + msg_format = + new Msg_Format { val regex: Regex = """
  • """.r }) def isabelle_dev: Archive = - archive(Url("https://mailmanbroy.in.tum.de/pipermail/isabelle-dev")) + archive(Url("https://mailmanbroy.in.tum.de/pipermail/isabelle-dev"), + new Msg_Format { val regex: Regex = """
  • """.r }) }