# HG changeset patch # User wenzelm # Date 1604777074 -3600 # Node ID 38ebf696fd0cb9f985e93437d2a57748ca7315db # Parent 6345cce0e576f215867fc00f2132ca6f25593e82 support for Mailman list servers; diff -r 6345cce0e576 -r 38ebf696fd0c src/Pure/General/mailman.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Pure/General/mailman.scala Sat Nov 07 20:24:34 2020 +0100 @@ -0,0 +1,60 @@ +/* Title: Pure/General/mailman.scala + Author: Makarius + +Support for Mailman list servers. +*/ + +package isabelle + + +import java.net.URL + + +object Mailman +{ + def archive(url: URL, name: String = ""): Archive = + { + val text = Url.read(url) + val hrefs = """href="([^"]+\.txt(?:\.gz)?)"""".r.findAllMatchIn(text).map(_.group(1)).toList + val title = + """The ([^</>]*) Archives""".r.findFirstMatchIn(text).map(_.group(1)) + val list_name = + (proper_string(name) orElse title).getOrElse(error("Failed to determine mailing list name")) + new Archive(Url.trim_index(url), list_name, hrefs) + } + + class Archive private[Mailman](val list_url: URL, val list_name: String, hrefs: List[String]) + { + override def toString: String = list_name + + def download( + target_dir: Path = Path.current, + progress: Progress = new Progress): List[Path] = + { + val dir = target_dir + Path.basic(list_name) + Isabelle_System.make_directory(dir) + + hrefs.flatMap(name => + { + val path = dir + Path.basic(name) + val url = new URL(list_url, name) + val connection = url.openConnection + try { + val length = connection.getContentLengthLong + val timestamp = connection.getLastModified + val file = path.file + if (file.isFile && file.length == length && file.lastModified == timestamp) None + else { + progress.echo("Getting " + url) + val bytes = + using(connection.getInputStream)(Bytes.read_stream(_, hint = length.toInt max 1024)) + Bytes.write(file, bytes) + file.setLastModified(timestamp) + Some(path) + } + } + finally { connection.getInputStream.close } + }) + } + } +} diff -r 6345cce0e576 -r 38ebf696fd0c src/Pure/General/url.scala --- a/src/Pure/General/url.scala Sat Nov 07 16:49:51 2020 +0100 +++ b/src/Pure/General/url.scala Sat Nov 07 20:24:34 2020 +0100 @@ -47,6 +47,21 @@ catch { case ERROR(_) => false } + /* trim index */ + + def trim_index(url: URL): URL = + { + Library.try_unprefix("/index.html", url.toString) match { + case Some(u) => Url(u) + case None => + Library.try_unprefix("/index.php", url.toString) match { + case Some(u) => Url(u) + case None => url + } + } + } + + /* strings */ def decode(s: String): String = URLDecoder.decode(s, UTF8.charset_name) diff -r 6345cce0e576 -r 38ebf696fd0c src/Pure/build-jars --- a/src/Pure/build-jars Sat Nov 07 16:49:51 2020 +0100 +++ b/src/Pure/build-jars Sat Nov 07 20:24:34 2020 +0100 @@ -66,6 +66,7 @@ src/Pure/General/linear_set.scala src/Pure/General/logger.scala src/Pure/General/long_name.scala + src/Pure/General/mailman.scala src/Pure/General/mercurial.scala src/Pure/General/multi_map.scala src/Pure/General/output.scala