# HG changeset patch # User wenzelm # Date 1639420752 -3600 # Node ID af954161e1cdc2801a57e7f35d629c2f07730d8b # Parent e0070487b6350278bd6f12662b47ed391c824d49 misc tuning and clarification; diff -r e0070487b635 -r af954161e1cd src/Pure/General/mailman.scala --- a/src/Pure/General/mailman.scala Mon Dec 13 19:21:24 2021 +0100 +++ b/src/Pure/General/mailman.scala Mon Dec 13 19:39:12 2021 +0100 @@ -31,10 +31,7 @@ quote(author_name) + "\n" + "<" + author_address + ">\n" } - def message_match(lines: List[String], re: Regex): Option[Match] = - lines.flatMap(re.findFirstMatchIn(_)).headOption - - class Message_Chunk(bg: String = "", en: String = "") + private class Message_Chunk(bg: String = "", en: String = "") { def unapply(lines: List[String]): Option[List[String]] = { @@ -64,45 +61,6 @@ (if (en.nonEmpty) " " else "") + en) } - object Address - { - def anchor(s: String): String = """""" + s + """""" - def angl(s: String): String = """<""" + s + """>""" - def quot(s: String): String = """"""" + s + """"""" - def paren(s: String): String = """\(""" + s + """\)""" - val adr = """([^<>]*? at [^<>]*?)""" - val any = """([^<>]*?)""" - val spc = """\s*""" - - val Name1 = quot(anchor(any)).r - val Name2 = quot(any).r - val Name_Adr1 = (quot(anchor(any)) + spc + angl(anchor(adr))).r - val Name_Adr2 = (quot(any) + spc + angl(anchor(adr))).r - val Name_Adr3 = (anchor(any) + spc + angl(anchor(adr))).r - val Name_Adr4 = (any + spc + angl(anchor(adr))).r - val Adr_Name1 = (angl(anchor(adr)) + spc + paren(any)).r - val Adr_Name2 = (anchor(adr) + spc + paren(any)).r - val Adr1 = angl(anchor(adr)).r - val Adr2 = anchor(adr).r - - def trim(s: String): String = HTML.input(s).replace(" at ", "@") - - def unapply(s: String): Option[(String, String)] = - s match { - case Name1(a) => Some((trim(a), "")) - case Name2(a) => Some((trim(a), "")) - case Name_Adr1(a, b) => Some((trim(a), trim(b))) - case Name_Adr2(a, b) => Some((trim(a), trim(b))) - case Name_Adr3(a, b) => Some((trim(a), trim(b))) - case Name_Adr4(a, b) => Some((trim(a), trim(b))) - case Adr_Name1(b, a) => Some((trim(a), trim(b))) - case Adr_Name2(b, a) => Some((trim(a), trim(b))) - case Adr1(a) => Some(("", trim(a))) - case Adr2(a) => Some(("", trim(a))) - case _ => None - } - } - /* mailing list archives */ @@ -114,6 +72,16 @@ def message_regex: Regex def message_content(name: String, lines: List[String]): Message + def message_match(lines: List[String], re: Regex): Option[Match] = + lines.flatMap(re.findFirstMatchIn(_)).headOption + + def make_name(str: String): String = str.replaceAll("""\s+""", " ") + + def make_address(s: String): String = HTML.input(s).replace(" at ", "@") + + def make_body(lines: List[String]): String = + cat_lines(Library.take_suffix[String](_.isEmpty, lines)._1) + private val main_url: URL = Url(Library.take_suffix[Char](_ == '/', Url.trim_index(url).toString.toList)._1.mkString + "/") @@ -177,7 +145,7 @@ download_text(target_dir, progress = progress) ::: download_msg(target_dir, progress = progress) - def trim_title(str: String): String = + def make_title(str: String): String = { val Trim1 = ("""\s*\Q[""" + list_tag + """]\E\s*(.*)""").r val Trim2 = """(?i:(?:re|fw|fwd)\s*:\s*)(.*)""".r @@ -192,8 +160,6 @@ trim(str) } - def trim_name(str: String): String = str.replaceAll("""\s+""", " ") - def get_messages(): List[Message] = for (href <- hrefs_msg) yield message_content(href, split_lines(read_text(href))) @@ -252,22 +218,59 @@ } } - override def trim_name(str: String): String = + override def make_name(str: String): String = { val Trim = """(.*?)\s*via\s+Cl-isabelle-users""".r - super.trim_name(str) match { + super.make_name(str) match { case Trim(s) => s case s => s } } + object Address + { + private def anchor(s: String): String = """""" + s + """""" + private def angl(s: String): String = """<""" + s + """>""" + private def quot(s: String): String = """"""" + s + """"""" + private def paren(s: String): String = """\(""" + s + """\)""" + private val adr = """([^<>]*? at [^<>]*?)""" + private val any = """([^<>]*?)""" + private val spc = """\s*""" + + private val Name1 = quot(anchor(any)).r + private val Name2 = quot(any).r + private val Name_Adr1 = (quot(anchor(any)) + spc + angl(anchor(adr))).r + private val Name_Adr2 = (quot(any) + spc + angl(anchor(adr))).r + private val Name_Adr3 = (anchor(any) + spc + angl(anchor(adr))).r + private val Name_Adr4 = (any + spc + angl(anchor(adr))).r + private val Adr_Name1 = (angl(anchor(adr)) + spc + paren(any)).r + private val Adr_Name2 = (anchor(adr) + spc + paren(any)).r + private val Adr1 = angl(anchor(adr)).r + private val Adr2 = anchor(adr).r + + def unapply(s: String): Option[(String, String)] = + s match { + case Name1(a) => Some((make_address(a), "")) + case Name2(a) => Some((make_address(a), "")) + case Name_Adr1(a, b) => Some((make_address(a), make_address(b))) + case Name_Adr2(a, b) => Some((make_address(a), make_address(b))) + case Name_Adr3(a, b) => Some((make_address(a), make_address(b))) + case Name_Adr4(a, b) => Some((make_address(a), make_address(b))) + case Adr_Name1(b, a) => Some((make_address(a), make_address(b))) + case Adr_Name2(b, a) => Some((make_address(a), make_address(b))) + case Adr1(a) => Some(("", make_address(a))) + case Adr2(a) => Some(("", make_address(a))) + case _ => None + } + } + override def message_content(name: String, lines: List[String]): Message = { def err(msg: String = ""): Nothing = error("Malformed message: " + name + (if (msg.isEmpty) "" else "\n" + msg)) val (head, body) = - try { (Head.get(lines), Body.get(lines)) } + try { (Head.get(lines), make_body(Body.get(lines))) } catch { case ERROR(msg) => err(msg) } val date = @@ -280,8 +283,9 @@ } val title = - HTML.input(message_match(head, """
  • Subject:\s*(.*)
  • """.r) - .getOrElse(err("Missing Subject")).group(1)) + make_title( + HTML.input(message_match(head, """
  • Subject:\s*(.*)
  • """.r) + .getOrElse(err("Missing Subject")).group(1))) val (author_name, author_address) = message_match(head, """
  • From:\s*(.*?)\s*
  • """.r) match { @@ -289,11 +293,7 @@ case Some(m) => Address.unapply(m.group(1)) getOrElse err("Malformed From") } - Message( - name, date, trim_title(title), - trim_name(proper_string(author_name) getOrElse author_address), - author_address, - cat_lines(body)) + Message(name, date, title, author_name, author_address, body) } } @@ -316,7 +316,7 @@ error("Malformed message: " + name + (if (msg.isEmpty) "" else "\n" + msg)) val (head, body) = - try { (Head.get(lines), Body.get(lines)) } + try { (Head.get(lines), make_body(Body.get(lines))) } catch { case ERROR(msg) => err(msg) } val date = @@ -329,7 +329,7 @@ val (title, author_address) = { message_match(head, """TITLE="([^"]+)">(.*)""".r) match { - case Some(m) => (HTML.input(m.group(1)), Address.trim(HTML.input(m.group(2)))) + case Some(m) => (make_title(HTML.input(m.group(1))), make_address(HTML.input(m.group(2)))) case None => err("Missing TITLE") } } @@ -337,14 +337,10 @@ val author_name = message_match(head, """\s*(.*)""".r) match { case None => err("Missing author") - case Some(m) => HTML.input(m.group(1)) + case Some(m) => make_name(HTML.input(m.group(1))) } - Message( - name, date, trim_title(title), - trim_name(proper_string(author_name) getOrElse author_address), - author_address, - cat_lines(body)) + Message(name, date, title, author_name, author_address, body) } } }