clarified session_statistics: removed somewhat pointless per-theory statistics;
authorwenzelm
Thu, 13 Jul 2023 13:04:15 +0200
changeset 78328 007b04dc6f96
parent 78327 a235fc426523
child 78329 a041d49736f2
clarified session_statistics: removed somewhat pointless per-theory statistics;
src/Pure/Tools/profiling.scala
src/Tools/profiling.ML
--- a/src/Pure/Tools/profiling.scala	Thu Jul 13 10:36:27 2023 +0200
+++ b/src/Pure/Tools/profiling.scala	Thu Jul 13 13:04:15 2023 +0200
@@ -27,37 +27,13 @@
   }
 
 
-  /* session and theory statistics */
-
-  object Theory_Statistics {
-    def sum(name: String, theories: List[Theory_Statistics]): Theory_Statistics =
-      theories.foldLeft(Theory_Statistics(name = name))(_ + _)
+  /* session statistics */
 
-    val header: List[String] =
-      List("name", "locales", "locale_thms", "global_thms",
-        "locale_thms_percentage", "global_thms_percentage")
-  }
-
-  sealed case class Theory_Statistics(
-    name: String = "",
+  sealed case class Session_Statistics(
+    theories: Int = 0,
     locales: Int = 0,
     locale_thms: Int = 0,
-    global_thms: Int = 0
-  ) {
-    def + (other: Theory_Statistics): Theory_Statistics =
-      copy(
-        locales = other.locales + locales,
-        locale_thms = other.locale_thms + locale_thms,
-        global_thms = other.global_thms + global_thms)
-
-    def thms: Int = locale_thms + global_thms
-
-    def fields: List[Any] =
-      List(name, locales, locale_thms, global_thms,
-        percentage(locale_thms, thms), percentage(global_thms, thms))
-  }
-
-  sealed case class Session_Statistics(
+    global_thms: Int = 0,
     sizeof_thys_id: Space = Space.zero,
     sizeof_thms_id: Space = Space.zero,
     sizeof_terms: Space = Space.zero,
@@ -69,53 +45,37 @@
       (args: List[String]) =>
         { import XML.Encode._; list(string)(args) }
 
-    private val decode_theories_result: XML.Decode.T[List[Theory_Statistics]] =
-      (body: XML.Body) =>
-        { import XML.Decode._; list(pair(string, pair(int, pair(int, int))))(body) } map {
-          case (a, (b, (c, d))) =>
-            Theory_Statistics(
-              name = a,
-              locales = b,
-              locale_thms = c,
-              global_thms = d)
-        }
-
-    private val decode_session_result: XML.Decode.T[Session_Statistics] =
+    private val decode_result: XML.Decode.T[Session_Statistics] =
       (body: XML.Body) =>
         {
-          val (a, (b, (c, (d, e)))) = {
+          val (a, (b, (c, (d, (e, (f, (g, (h, i)))))))) = {
             import XML.Decode._
-            pair(long, pair(long, pair(long, pair(long, long))))(body)
+            pair(int, pair(int, pair(int, pair(int,
+              pair(long, pair(long, pair(long, pair(long, long))))))))(body)
           }
           Session_Statistics(
-            sizeof_thys_id = Space.bytes(a),
-            sizeof_thms_id = Space.bytes(b),
-            sizeof_terms = Space.bytes(c),
-            sizeof_types = Space.bytes(d),
-            sizeof_spaces = Space.bytes(e))
+            theories = a,
+            locales = b,
+            locale_thms = c,
+            global_thms = d,
+            sizeof_thys_id = Space.bytes(e),
+            sizeof_thms_id = Space.bytes(f),
+            sizeof_terms = Space.bytes(g),
+            sizeof_types = Space.bytes(h),
+            sizeof_spaces = Space.bytes(i))
         }
-    private val decode_result: XML.Decode.T[(List[Theory_Statistics], Session_Statistics)] =
-      (body: XML.Body) =>
-        { import XML.Decode._; pair(decode_theories_result, decode_session_result)(body) }
 
     def make(
       store: Store,
       session_background: Sessions.Background,
-      parent: Option[Statistics] = None,
-      anonymous_theories: Boolean = false
+      parent: Option[Statistics] = None
     ): Statistics = {
       val session_base = session_background.base
       val session_name = session_base.session_name
       val sessions_structure = session_background.sessions_structure
 
-      val theories_name = session_base.used_theories.map(p => p._1.theory)
-      val theories_index =
-        Map.from(
-          for ((name, i) <- theories_name.zipWithIndex)
-            yield name -> String.format(Locale.ROOT, "%s.%04d", session_name, i + 1))
-
-      val (theories0, session) = {
-        val args = theories_name
+      val session = {
+        val args = session_base.used_theories.map(p => p._1.theory)
         val eval_args =
           List("--eval", "use_thy " + ML_Syntax.print_string_bytes("~~/src/Tools/Profiling"))
         Isabelle_System.with_tmp_dir("profiling") { dir =>
@@ -129,11 +89,11 @@
         }
       }
 
-      val theories =
-        if (anonymous_theories) theories0.map(a => a.copy(name = theories_index(a.name)))
-        else theories0
-
-      new Statistics(parent = parent, session_name = session_name, theories = theories,
+      new Statistics(parent = parent, session = session_name,
+        theories = session.theories,
+        locales = session.locales,
+        locale_thms = session.locale_thms,
+        global_thms = session.global_thms,
         heap_size = Space.bytes(store.the_heap(session_name).file.length),
         thys_id_size = session.sizeof_thys_id,
         thms_id_size = session.sizeof_thms_id,
@@ -144,21 +104,31 @@
 
     val empty: Statistics = new Statistics()
 
-    val header: List[String] =
-      Theory_Statistics.header :::
-        List("heap_size",
-          "thys_id_size_percentage",
-          "thms_id_size_percentage",
-          "terms_size_percentage",
-          "types_size_percentage",
-          "spaces_size_percentage")
-    val header_cumulative: List[String] = header ::: header.tail.map(_ + "_cumulative")
+    val header0: List[String] =
+      List(
+        "theories",
+        "locales",
+        "locale_thms",
+        "global_thms",
+        "locale_thms_percentage",
+        "global_thms_percentage",
+        "heap_size",
+        "thys_id_size_percentage",
+        "thms_id_size_percentage",
+        "terms_size_percentage",
+        "types_size_percentage",
+        "spaces_size_percentage")
+
+    def header: List[String] = "session" :: header0 ::: header0.map(_ + "_cumulative")
   }
 
   final class Statistics private(
     val parent: Option[Statistics] = None,
-    val session_name: String = "",
-    val theories: List[Theory_Statistics] = Nil,
+    val session: String = "",
+    val theories: Int = 0,
+    val locales: Int = 0,
+    val locale_thms: Int = 0,
+    val global_thms: Int = 0,
     val heap_size: Space = Space.zero,
     val thys_id_size: Space = Space.zero,
     val thms_id_size: Space = Space.zero,
@@ -169,26 +139,36 @@
     private def size_percentage(space: Space): Percentage =
       percentage_space(space, heap_size)
 
-    def fields: List[Any] =
-      session.fields :::
-        List(heap_size.print,
-          size_percentage(thys_id_size).toString,
-          size_percentage(thms_id_size).toString,
-          size_percentage(terms_size).toString,
-          size_percentage(types_size).toString,
-          size_percentage(spaces_size).toString)
-    def fields_cumulative: List[Any] = fields ::: cumulative.fields.tail
+    private def thms_percentage(thms: Int): Percentage =
+      percentage(thms, locale_thms + global_thms)
 
-    lazy val session: Theory_Statistics =
-      Theory_Statistics.sum(session_name, theories)
+    def fields0: List[Any] =
+      List(
+        theories,
+        locales,
+        locale_thms,
+        global_thms,
+        thms_percentage(locale_thms).toString,
+        thms_percentage(global_thms).toString,
+        heap_size.print,
+        size_percentage(thys_id_size).toString,
+        size_percentage(thms_id_size).toString,
+        size_percentage(terms_size).toString,
+        size_percentage(types_size).toString,
+        size_percentage(spaces_size).toString)
+
+    def fields: List[Any] = session :: fields0 ::: cumulative.fields0
 
     lazy val cumulative: Statistics =
       parent match {
         case None => this
         case Some(other) =>
           new Statistics(parent = None,
-            session_name = session_name,
-            theories = other.cumulative.theories ::: theories,
+            session = session,
+            theories = other.cumulative.theories + theories,
+            locales = other.cumulative.locales + locales,
+            locale_thms = other.cumulative.locale_thms + locale_thms,
+            global_thms = other.cumulative.global_thms + global_thms,
             heap_size = other.cumulative.heap_size + heap_size,
             thys_id_size = other.cumulative.thys_id_size + thys_id_size,
             thms_id_size = other.cumulative.thms_id_size + thms_id_size,
@@ -197,7 +177,7 @@
             spaces_size = other.cumulative.spaces_size + spaces_size)
       }
 
-    override def toString: String = "Statistics(" + session_name + ")"
+    override def toString: String = "Statistics(" + session + ")"
   }
 
 
@@ -210,14 +190,8 @@
     ): Unit = {
       progress.echo("Output directory " + output_dir.absolute)
       Isabelle_System.make_directory(output_dir)
-
-      val sessions_records =
-        for (stats <- sessions) yield {
-          CSV.File("session_" + stats.session_name, Theory_Statistics.header,
-            stats.theories.map(thy => CSV.Record(thy.fields:_*))).write(output_dir)
-          CSV.Record(stats.fields_cumulative:_*)
-        }
-      CSV.File("all_sessions", Statistics.header_cumulative, sessions_records).write(output_dir)
+      val csv_records = for (session <- sessions) yield CSV.Record(session.fields:_*)
+      CSV.File("sessions", Statistics.header, csv_records).write(output_dir)
     }
   }
 
@@ -228,8 +202,7 @@
     dirs: List[Path] = Nil,
     select_dirs: List[Path] = Nil,
     numa_shuffling: Boolean = false,
-    max_jobs: Int = 1,
-    anonymous_theories: Boolean = false
+    max_jobs: Int = 1
   ): Results = {
     /* sessions structure */
 
@@ -286,8 +259,7 @@
         val stats =
           Statistics.make(store,
             build_results.deps.background(session_name),
-            parent = parent,
-            anonymous_theories = anonymous_theories)
+            parent = parent)
         seen += (session_name -> stats)
         stats
       }
@@ -314,7 +286,6 @@
         var dirs: List[Path] = Nil
         var session_groups: List[String] = Nil
         var max_jobs = 1
-        var anonymous_theories = false
         var options = Options.init(specs = Options.Spec.ISABELLE_BUILD_OPTIONS)
         var verbose = false
         var exclude_sessions: List[String] = Nil
@@ -332,7 +303,6 @@
     -d DIR       include session directory
     -g NAME      select session group NAME
     -j INT       maximum number of parallel jobs (default 1)
-    -n           anonymous theories: suppress details of private projects
     -o OPTION    override Isabelle system OPTION (via NAME=VAL or NAME)
     -v           verbose
     -x NAME      exclude session NAME and all descendants
@@ -348,7 +318,6 @@
           "d:" -> (arg => dirs = dirs ::: List(Path.explode(arg))),
           "g:" -> (arg => session_groups = session_groups ::: List(arg)),
           "j:" -> (arg => max_jobs = Value.Int.parse(arg)),
-          "n" -> (_ => anonymous_theories = true),
           "o:" -> (arg => options = options + arg),
           "v" -> (_ => verbose = true),
           "x:" -> (arg => exclude_sessions = exclude_sessions ::: List(arg)))
@@ -371,8 +340,7 @@
               dirs = dirs,
               select_dirs = select_dirs,
               numa_shuffling = Host.numa_check(progress, numa_shuffling),
-              max_jobs = max_jobs,
-              anonymous_theories = anonymous_theories)
+              max_jobs = max_jobs)
           }
 
         results.output(output_dir = output_dir.absolute, progress = progress)
--- a/src/Tools/profiling.ML	Thu Jul 13 10:36:27 2023 +0200
+++ b/src/Tools/profiling.ML	Thu Jul 13 13:04:15 2023 +0200
@@ -4,33 +4,33 @@
 Session profiling based on loaded ML image.
 *)
 
-type theory_statistics =
- {theory: string,
-  locales: int,
-  locale_thms: int,
-  global_thms: int};
-
-type session_statistics =
- {sizeof_thys_id: int,
-  sizeof_thms_id: int,
-  sizeof_terms: int,
-  sizeof_types: int,
-  sizeof_spaces: int};
-
-structure Profiling:
+signature PROFILING =
 sig
-  val locale_names: theory -> string list
+  val locales: theory -> string list
   val locale_thms: theory -> string -> thm list
+  val locales_thms: theory -> thm list
   val global_thms: theory -> thm list
   val all_thms: theory -> thm list
-  val statistics: string list -> theory_statistics list * session_statistics
+  type session_statistics =
+   {theories: int,
+    locales: int,
+    locale_thms: int,
+    global_thms: int,
+    sizeof_thys_id: int,
+    sizeof_thms_id: int,
+    sizeof_terms: int,
+    sizeof_types: int,
+    sizeof_spaces: int}
+  val session_statistics: string list -> session_statistics
   val main: unit -> unit
-end =
+end;
+
+structure Profiling: PROFILING =
 struct
 
 (* theory content *)
 
-fun locale_names thy =
+fun locales thy =
   let
     val parent_spaces = map Locale.locale_space (Theory.parents_of thy);
     fun add name =
@@ -44,19 +44,20 @@
   (Locale.locale_notes thy loc, []) |->
     (fold (#2 #> fold (#2 #> (fold (#1 #> fold (fn thm => proper_thm thm ? cons thm ))))));
 
+fun locales_thms thy =
+  maps (locale_thms thy) (locales thy);
+
 fun global_thms thy =
   Facts.dest_static true
     (map Global_Theory.facts_of (Theory.parents_of thy)) (Global_Theory.facts_of thy)
   |> maps #2;
 
-fun all_thms thy =
-  let val locales = locale_names thy
-  in maps (locale_thms thy) locales @ global_thms thy end;
+fun all_thms thy = locales_thms thy @ global_thms thy;
 
 
 (* session content *)
 
-fun theories_content thys =
+fun session_content thys =
   let
     val thms = maps all_thms thys;
     val thys_id = map Context.theory_id thys;
@@ -79,19 +80,20 @@
 fun sizeof_diff (a, b) = ML_Heap.sizeof a - ML_Heap.sizeof b;
 
 
-(* collect statistics *)
+(* session statistics *)
 
-fun theory_statistics thy : theory_statistics =
-  let
-    val locales = locale_names thy;
-  in
-   {theory = Context.theory_long_name thy,
-    locales = length locales,
-    locale_thms = length (maps (locale_thms thy) locales),
-    global_thms = length (global_thms thy)}
-  end;
+type session_statistics =
+ {theories: int,
+  locales: int,
+  locale_thms: int,
+  global_thms: int,
+  sizeof_thys_id: int,
+  sizeof_thms_id: int,
+  sizeof_terms: int,
+  sizeof_types: int,
+  sizeof_spaces: int};
 
-fun statistics theories =
+fun session_statistics theories : session_statistics =
   let
     val theories_member = Symtab.defined (Symtab.make_set theories) o Context.theory_long_name;
 
@@ -106,38 +108,41 @@
     val base_thys = filter theories_member all_thys;
 
     val {thys_id = all_thys_id, thms_id = all_thms_id, terms = all_terms,
-          types = all_types, spaces = all_spaces} = theories_content all_thys;
+          types = all_types, spaces = all_spaces} = session_content all_thys;
     val {thys_id = base_thys_id, thms_id = base_thms_id, terms = base_terms,
-          types = base_types, spaces = base_spaces} = theories_content base_thys;
-    val session_statistics =
-      {sizeof_thys_id = sizeof_diff (all_thys_id, base_thys_id),
-       sizeof_thms_id = sizeof_diff (all_thms_id, base_thms_id),
-       sizeof_terms = sizeof_diff (all_terms, base_terms),
-       sizeof_types = sizeof_diff (all_types, base_types),
-       sizeof_spaces = sizeof_diff (all_spaces, base_spaces)};
-  in (map theory_statistics loaded_thys, session_statistics) end;
+          types = base_types, spaces = base_spaces} = session_content base_thys;
+  in
+    {theories = length loaded_thys,
+     locales = Integer.sum (map (length o locales) loaded_thys),
+     locale_thms = Integer.sum (map (length o locales_thms) loaded_thys),
+     global_thms = Integer.sum (map (length o global_thms) loaded_thys),
+     sizeof_thys_id = sizeof_diff (all_thys_id, base_thys_id),
+     sizeof_thms_id = sizeof_diff (all_thms_id, base_thms_id),
+     sizeof_terms = sizeof_diff (all_terms, base_terms),
+     sizeof_types = sizeof_diff (all_types, base_types),
+     sizeof_spaces = sizeof_diff (all_spaces, base_spaces)}
+  end;
 
 
-(* main entry point for "isabelle process" *)
+(* main entry point for external process *)
 
 local
 
 val decode_args : string list XML.Decode.T =
   let open XML.Decode in list string end;
 
-val encode_theories_result : theory_statistics list XML.Encode.T =
-  map (fn {theory = a, locales = b, locale_thms = c, global_thms = d} => (a, (b, (c, d))))
-  #> let open XML.Encode in list (pair string (pair int (pair int int))) end;
-
-val encode_session_result : session_statistics XML.Encode.T =
-  (fn {sizeof_thys_id = a,
-       sizeof_thms_id = b,
-       sizeof_terms = c,
-       sizeof_types = d,
-       sizeof_spaces = e} => (a, (b, (c, (d, e)))))
-  #> let open XML.Encode in pair int (pair int (pair int (pair int int))) end;
-
-val encode_result = let open XML.Encode in pair encode_theories_result encode_session_result end;
+val encode_result : session_statistics XML.Encode.T =
+  (fn {theories = a,
+       locales = b,
+       locale_thms = c,
+       global_thms = d,
+       sizeof_thys_id = e,
+       sizeof_thms_id = f,
+       sizeof_terms = g,
+       sizeof_types = h,
+       sizeof_spaces = i} => (a, (b, (c, (d, (e, (f, (g, (h, i))))))))) #>
+  let open XML.Encode
+  in pair int (pair int (pair int (pair int (pair int (pair int (pair int (pair int int))))))) end;
 
 in
 
@@ -148,7 +153,7 @@
       let
         val dir = Path.explode dir_name;
         val args = decode_args (YXML.parse_body (File.read (dir + \<^path>\<open>args.yxml\<close>)));
-        val result = statistics args;
+        val result = session_statistics args;
       in File.write (dir + \<^path>\<open>result.yxml\<close>) (YXML.string_of_body (encode_result result)) end);
 
 end;