support for Mailman list servers;
authorwenzelm
Sat, 07 Nov 2020 20:24:34 +0100
changeset 72558 38ebf696fd0c
parent 72557 6345cce0e576
child 72559 274c9986e55b
support for Mailman list servers;
src/Pure/General/mailman.scala
src/Pure/General/url.scala
src/Pure/build-jars
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Pure/General/mailman.scala	Sat Nov 07 20:24:34 2020 +0100
@@ -0,0 +1,60 @@
+/*  Title:      Pure/General/mailman.scala
+    Author:     Makarius
+
+Support for Mailman list servers.
+*/
+
+package isabelle
+
+
+import java.net.URL
+
+
+object Mailman
+{
+  def archive(url: URL, name: String = ""): Archive =
+  {
+    val text = Url.read(url)
+    val hrefs = """href="([^"]+\.txt(?:\.gz)?)"""".r.findAllMatchIn(text).map(_.group(1)).toList
+    val title =
+      """<title>The ([^</>]*) Archives</title>""".r.findFirstMatchIn(text).map(_.group(1))
+    val list_name =
+      (proper_string(name) orElse title).getOrElse(error("Failed to determine mailing list name"))
+    new Archive(Url.trim_index(url), list_name, hrefs)
+  }
+
+  class Archive private[Mailman](val list_url: URL, val list_name: String, hrefs: List[String])
+  {
+    override def toString: String = list_name
+
+    def download(
+      target_dir: Path = Path.current,
+      progress: Progress = new Progress): List[Path] =
+    {
+      val dir = target_dir + Path.basic(list_name)
+      Isabelle_System.make_directory(dir)
+
+      hrefs.flatMap(name =>
+        {
+          val path = dir + Path.basic(name)
+          val url = new URL(list_url, name)
+          val connection = url.openConnection
+          try {
+            val length = connection.getContentLengthLong
+            val timestamp = connection.getLastModified
+            val file = path.file
+            if (file.isFile && file.length == length && file.lastModified == timestamp) None
+            else {
+              progress.echo("Getting " + url)
+              val bytes =
+                using(connection.getInputStream)(Bytes.read_stream(_, hint = length.toInt max 1024))
+              Bytes.write(file, bytes)
+              file.setLastModified(timestamp)
+              Some(path)
+            }
+          }
+          finally { connection.getInputStream.close }
+        })
+    }
+  }
+}
--- a/src/Pure/General/url.scala	Sat Nov 07 16:49:51 2020 +0100
+++ b/src/Pure/General/url.scala	Sat Nov 07 20:24:34 2020 +0100
@@ -47,6 +47,21 @@
     catch { case ERROR(_) => false }
 
 
+  /* trim index */
+
+  def trim_index(url: URL): URL =
+  {
+    Library.try_unprefix("/index.html", url.toString) match {
+      case Some(u) => Url(u)
+      case None =>
+        Library.try_unprefix("/index.php", url.toString) match {
+          case Some(u) => Url(u)
+          case None => url
+        }
+    }
+  }
+
+
   /* strings */
 
   def decode(s: String): String = URLDecoder.decode(s, UTF8.charset_name)
--- a/src/Pure/build-jars	Sat Nov 07 16:49:51 2020 +0100
+++ b/src/Pure/build-jars	Sat Nov 07 20:24:34 2020 +0100
@@ -66,6 +66,7 @@
   src/Pure/General/linear_set.scala
   src/Pure/General/logger.scala
   src/Pure/General/long_name.scala
+  src/Pure/General/mailman.scala
   src/Pure/General/mercurial.scala
   src/Pure/General/multi_map.scala
   src/Pure/General/output.scala