more data integrity: name vs. address;
authorwenzelm
Mon, 13 Dec 2021 22:12:48 +0100
changeset 74925 4bc306cb2832
parent 74924 af954161e1cd
child 74926 5cd2e6e17e6f
more data integrity: name vs. address;
src/Pure/General/date.scala
src/Pure/General/mailman.scala
--- a/src/Pure/General/date.scala	Mon Dec 13 19:39:12 2021 +0100
+++ b/src/Pure/General/date.scala	Mon Dec 13 22:12:48 2021 +0100
@@ -13,6 +13,7 @@
 import java.time.temporal.TemporalAccessor
 
 import scala.annotation.tailrec
+import scala.math.Ordering
 
 
 object Date
@@ -74,6 +75,21 @@
   }
 
 
+  /* ordering */
+
+  object Ordering extends scala.math.Ordering[Date]
+  {
+    def compare(date1: Date, date2: Date): Int =
+      date1.instant.compareTo(date2.instant)
+  }
+
+  object Rev_Ordering extends scala.math.Ordering[Date]
+  {
+    def compare(date1: Date, date2: Date): Int =
+      date2.instant.compareTo(date1.instant)
+  }
+
+
   /* date operations */
 
   def timezone_utc: ZoneId = ZoneId.of("UTC")
--- a/src/Pure/General/mailman.scala	Mon Dec 13 19:39:12 2021 +0100
+++ b/src/Pure/General/mailman.scala	Mon Dec 13 22:12:48 2021 +0100
@@ -26,6 +26,7 @@
     author_address: String,
     body: String)
   {
+    override def toString: String = name
     def print: String =
       name + "\n" + date + "\n" + title + "\n" +
       quote(author_name) + "\n" + "<" + author_address + ">\n"
@@ -62,6 +63,44 @@
   }
 
 
+  /* integrity check */
+
+  def check_messages(msgs: List[Message]): Unit =
+  {
+    val msgs_sorted = msgs.sortBy(_.date)(Date.Ordering)
+    val msgs_proper =
+      msgs_sorted.filter(msg => msg.author_name.nonEmpty && msg.author_address.nonEmpty)
+
+    val address_name = Multi_Map.from(msgs_proper.map(msg => (msg.author_address, msg.author_name)))
+    val name_address = Multi_Map.from(msgs_proper.map(msg => (msg.author_name, msg.author_address)))
+
+    def print_dup(a: String, bs: List[String]): String =
+      "  * " + a + bs.mkString("\n      ", "\n      ", "")
+
+    def print_dups(title: String, m: Multi_Map[String, String]): Unit =
+    {
+      val dups = m.iterator_list.filter(p => p._2.length > 1).toList
+      if (dups.nonEmpty) {
+        Output.writeln(cat_lines(title :: dups.map(p => print_dup(p._1, p._2))))
+      }
+    }
+
+    print_dups("duplicate names:", address_name)
+    print_dups("duplicate addresses:", name_address)
+
+    def get_name(msg: Message): Option[String] =
+      proper_string(msg.author_name) orElse address_name.get(msg.author_address)
+
+    val unnamed =
+      msgs_sorted.flatMap(msg => if (get_name(msg).isEmpty) Some(msg.author_address) else None)
+        .toSet.toList.sorted
+
+    if (unnamed.nonEmpty) {
+      Output.writeln(("unnamed:" :: unnamed).mkString("\n", "\n  ", ""))
+    }
+  }
+
+
   /* mailing list archives */
 
   abstract class Archive(
@@ -227,6 +266,12 @@
       }
     }
 
+    override def make_address(str: String): String =
+    {
+      val s = super.make_address(make_name(str))
+      if (s == "cl-isabelle-users@lists.cam.ac.uk") "" else s
+    }
+
     object Address
     {
       private def anchor(s: String): String = """<a href="[^"]*">""" + s + """</a>"""
@@ -290,7 +335,9 @@
       val (author_name, author_address) =
         message_match(head, """<li><em>From</em>:\s*(.*?)\s*</li>""".r) match {
           case None => err("Missing From")
-          case Some(m) => Address.unapply(m.group(1)) getOrElse err("Malformed From")
+          case Some(m) =>
+            val (a, b) = Address.unapply(m.group(1)) getOrElse err("Malformed From")
+            (if (a == b) "" else a, b)
         }
 
       Message(name, date, title, author_name, author_address, body)