merged
authorwenzelm
Tue, 16 Apr 2024 17:28:58 +0200
changeset 80126 b73df63e0f52
parent 80107 247751d25102 (current diff)
parent 80125 761bd2b35217 (diff)
child 80127 39f9084a9668
merged
--- a/src/Pure/Build/build.scala	Tue Apr 16 13:29:27 2024 +0200
+++ b/src/Pure/Build/build.scala	Tue Apr 16 17:28:58 2024 +0200
@@ -34,8 +34,9 @@
     ml_platform: String = Isabelle_System.getenv("ML_PLATFORM"),
     hostname: String = Isabelle_System.hostname(),
     numa_shuffling: Boolean = false,
+    numa_nodes: List[Int] = Nil,
     clean_sessions: List[String] = Nil,
-    build_heap: Boolean = false,
+    store_heap: Boolean = false,
     fresh_build: Boolean = false,
     no_build: Boolean = false,
     session_setup: (String, Session) => Unit = (_, _) => (),
@@ -78,10 +79,11 @@
     def cache: Term.Cache = store.cache
 
     def sessions_ok: List[String] =
-      (for {
-        name <- deps.sessions_structure.build_topological_order.iterator
-        result <- results.get(name) if result.ok
-      } yield name).toList
+      List.from(
+        for {
+          name <- deps.sessions_structure.build_topological_order.iterator
+          result <- results.get(name) if result.ok
+        } yield name)
 
     def info(name: String): Sessions.Info = deps.sessions_structure(name)
     def sessions: Set[String] = results.keySet
@@ -230,10 +232,11 @@
 
       if (check_unknown_files) {
         val source_files =
-          (for {
-            (_, base) <- build_deps.session_bases.iterator
-            (path, _) <- base.session_sources.iterator
-          } yield path).toList
+          List.from(
+            for {
+              (_, base) <- build_deps.session_bases.iterator
+              (path, _) <- base.session_sources.iterator
+            } yield path)
         Mercurial.check_files(source_files)._2 match {
           case Nil =>
           case unknown_files =>
@@ -249,12 +252,13 @@
       val clean_sessions =
         if (clean_build) full_sessions.imports_descendants(full_sessions_selection) else Nil
 
+      val numa_nodes = Host.numa_nodes(enabled = numa_shuffling)
       val build_context =
         Context(store, build_deps, engine = engine, afp_root = afp_root,
           build_hosts = build_hosts, hostname = hostname(build_options),
-          clean_sessions = clean_sessions, build_heap = build_heap,
-          numa_shuffling = numa_shuffling, fresh_build = fresh_build,
-          no_build = no_build, session_setup = session_setup,
+          clean_sessions = clean_sessions, store_heap = build_heap,
+          numa_shuffling = numa_shuffling, numa_nodes = numa_nodes,
+          fresh_build = fresh_build, no_build = no_build, session_setup = session_setup,
           jobs = max_jobs.getOrElse(if (build_hosts.nonEmpty) 0 else 1), master = true)
 
       val results = engine.run_build_process(build_context, progress, server)
--- a/src/Pure/Build/build_benchmark.scala	Tue Apr 16 13:29:27 2024 +0200
+++ b/src/Pure/Build/build_benchmark.scala	Tue Apr 16 17:28:58 2024 +0200
@@ -65,21 +65,11 @@
         benchmark_requirements(local_options, progress)
         for (db <- database_server) ML_Heap.restore(db, hierachy, cache = store.cache.compress)
 
-        def get_shasum(session_name: String): SHA1.Shasum = {
-          val ancestor_shasums = sessions(session_name).ancestors.map(get_shasum)
-
-          val input_shasum =
-            if (ancestor_shasums.isEmpty) ML_Process.bootstrap_shasum()
-            else SHA1.flat_shasum(ancestor_shasums)
-
-          store.check_output(
-            database_server, session_name,
-            session_options = build_context.sessions_structure(session_name).options,
-            sources_shasum = sessions(session_name).sources_shasum,
-            input_shasum = input_shasum,
-            fresh_build = false,
-            store_heap = false)._2
-        }
+        def get_shasum(name: String): SHA1.Shasum =
+          store.check_output(database_server, name,
+            session_options = build_context.sessions_structure(name).options,
+            sources_shasum = sessions(name).sources_shasum,
+            input_shasum = ML_Process.make_shasum(sessions(name).ancestors.map(get_shasum)))._2
 
         val deps = Sessions.deps(full_sessions.selection(selection)).check_errors
         val background = deps.background(benchmark_session_name)
--- a/src/Pure/Build/build_job.scala	Tue Apr 16 13:29:27 2024 +0200
+++ b/src/Pure/Build/build_job.scala	Tue Apr 16 17:28:58 2024 +0200
@@ -514,14 +514,8 @@
             case None => using(store.open_database(session_name, output = true))(write_info)
           }
 
-          using_optional(store.maybe_open_heaps_database(database_server, server = server)) {
-            heaps_database =>
-              for (db <- database_server orElse heaps_database) {
-                val slice = Space.MiB(options.real("build_database_slice"))
-                ML_Heap.store(db, store_session, slice,
-                  cache = store.cache.compress, progress = progress)
-              }
-          }
+          store.in_heaps_database(
+            List(store_session), database_server, server = server, progress = progress)
 
           // messages
           process_result.err_lines.foreach(progress.echo(_))
--- a/src/Pure/Build/build_process.scala	Tue Apr 16 13:29:27 2024 +0200
+++ b/src/Pure/Build/build_process.scala	Tue Apr 16 17:28:58 2024 +0200
@@ -94,6 +94,9 @@
     def iterator: Iterator[Build_Job.Session_Context] =
       for (name <- graph.topological_order.iterator) yield apply(name)
 
+    def store_heap(name: String): Boolean =
+      isabelle.Sessions.is_pure(name) || iterator.exists(_.ancestors.contains(name))
+
     def data: Library.Update.Data[Build_Job.Session_Context] =
       Map.from(for ((_, (session, _)) <- graph.iterator) yield session.name -> session)
 
@@ -221,7 +224,6 @@
 
   sealed case class State(
     serial: Long = 0,
-    numa_nodes: List[Int] = Nil,
     sessions: Sessions = Sessions.empty,
     pending: State.Pending = Map.empty,
     running: State.Running = Map.empty,
@@ -1148,9 +1150,10 @@
   }
 
   protected def next_node_info(state: Build_Process.State, session_name: String): Host.Node_Info = {
-    def used_nodes: Set[Int] =
+    val available_nodes = build_context.numa_nodes
+    val used_nodes =
       Set.from(for (job <- state.running.valuesIterator; i <- job.node_info.numa_node) yield i)
-    val numa_node = Host.next_numa_node(_host_database, hostname, state.numa_nodes, used_nodes)
+    val numa_node = Host.next_numa_node(_host_database, hostname, available_nodes, used_nodes)
     Host.Node_Info(hostname, numa_node, Nil)
   }
 
@@ -1160,15 +1163,8 @@
     ancestor_results: List[Build_Process.Result]
   ): Build_Process.State = {
     val sources_shasum = state.sessions(session_name).sources_shasum
-
-    val input_shasum =
-      if (ancestor_results.isEmpty) ML_Process.bootstrap_shasum()
-      else SHA1.flat_shasum(ancestor_results.map(_.output_shasum))
-
-    val store_heap =
-      build_context.build_heap || Sessions.is_pure(session_name) ||
-      state.sessions.iterator.exists(_.ancestors.contains(session_name))
-
+    val input_shasum = ML_Process.make_shasum(ancestor_results.map(_.output_shasum))
+    val store_heap = build_context.store_heap || state.sessions.store_heap(session_name)
     val (current, output_shasum) =
       store.check_output(_database_server, session_name,
         session_options = build_context.sessions_structure(session_name).options,
@@ -1302,7 +1298,7 @@
       finished || sleep
     }
 
-  protected def init_unsynchronized(): Unit = {
+  protected def init_unsynchronized(): Unit =
     if (build_context.master) {
       val sessions1 =
         _state.sessions.init(build_context, _database_server, progress = build_progress)
@@ -1315,10 +1311,6 @@
       _state = _state.copy(sessions = sessions1, pending = pending1)
     }
 
-    val numa_nodes = Host.numa_nodes(enabled = build_context.numa_shuffling)
-    _state = _state.copy(numa_nodes = numa_nodes)
-  }
-
   protected def main_unsynchronized(): Unit = {
     for (job <- _state.running.valuesIterator; build <- job.build if build.is_finished) {
       val result = build.join
--- a/src/Pure/Build/build_schedule.scala	Tue Apr 16 13:29:27 2024 +0200
+++ b/src/Pure/Build/build_schedule.scala	Tue Apr 16 17:28:58 2024 +0200
@@ -471,8 +471,9 @@
 
       if (used.length >= host.max_jobs) false
       else {
-        if (host.numa_nodes.length <= 1)
+        if (host.numa_nodes.length <= 1) {
           used.map(host_infos.num_threads).sum + threads <= host.max_threads
+        }
         else {
           def node_threads(n: Int): Int =
             used.filter(_.numa_node.contains(n)).map(host_infos.num_threads).sum
@@ -1117,23 +1118,13 @@
     def is_current(state: Build_Process.State, session_name: String): Boolean =
       state.ancestor_results(session_name) match {
         case Some(ancestor_results) if ancestor_results.forall(_.current) =>
-          val sources_shasum = state.sessions(session_name).sources_shasum
-
-          val input_shasum =
-            if (ancestor_results.isEmpty) ML_Process.bootstrap_shasum()
-            else SHA1.flat_shasum(ancestor_results.map(_.output_shasum))
-
-          val store_heap =
-            build_context.build_heap || Sessions.is_pure(session_name) ||
-              state.sessions.iterator.exists(_.ancestors.contains(session_name))
-
           store.check_output(
             _database_server, session_name,
             session_options = build_context.sessions_structure(session_name).options,
-            sources_shasum = sources_shasum,
-            input_shasum = input_shasum,
+            sources_shasum = state.sessions(session_name).sources_shasum,
+            input_shasum = ML_Process.make_shasum(ancestor_results.map(_.output_shasum)),
             fresh_build = build_context.fresh_build,
-            store_heap = store_heap)._1
+            store_heap = build_context.store_heap || state.sessions.store_heap(session_name))._1
         case _ => false
       }
 
--- a/src/Pure/Build/store.scala	Tue Apr 16 13:29:27 2024 +0200
+++ b/src/Pure/Build/store.scala	Tue Apr 16 17:28:58 2024 +0200
@@ -83,7 +83,7 @@
       new Sources(
         session_base.session_sources.foldLeft(Map.empty) {
           case (sources, (path, digest)) =>
-            def err(): Nothing = error("Incoherent digest for source file: " + path)
+            def err(): Nothing = error("Incoherent digest for source file: " + path.expand)
             val name = File.symbolic_path(path)
             sources.get(name) match {
               case Some(source_file) =>
@@ -383,7 +383,30 @@
     server: SSH.Server = SSH.no_server
   ): Option[SQL.Database] = {
     if (database_server.isDefined) None
-    else store.maybe_open_database_server(server = server, guard = build_cluster)
+    else maybe_open_database_server(server = server, guard = build_cluster)
+  }
+
+  def maybe_using_heaps_database[A](
+    database_server: Option[SQL.Database],
+    server: SSH.Server = SSH.no_server
+  )(f: SQL.Database => A): Option[A] = {
+    using_optional(maybe_open_heaps_database(database_server, server = server)) {
+      heaps_database => (database_server orElse heaps_database).map(f)
+    }
+  }
+
+  def in_heaps_database(
+    sessions: List[Store.Session],
+    database_server: Option[SQL.Database],
+    server: SSH.Server = SSH.no_server,
+    progress: Progress = new Progress
+  ): Unit = {
+    if (sessions.nonEmpty) {
+      maybe_using_heaps_database(database_server, server = server) { db =>
+        val slice = Space.MiB(options.real("build_database_slice"))
+        sessions.foreach(ML_Heap.store(db, _, slice, cache = cache.compress, progress = progress))
+      }
+    }
   }
 
   def open_build_database(path: Path, server: SSH.Server = SSH.no_server): SQL.Database =
@@ -465,8 +488,8 @@
     session_options: Options,
     sources_shasum: SHA1.Shasum,
     input_shasum: SHA1.Shasum,
-    fresh_build: Boolean,
-    store_heap: Boolean
+    fresh_build: Boolean = false,
+    store_heap: Boolean = false
   ): (Boolean, SHA1.Shasum) = {
     def no_check: (Boolean, SHA1.Shasum) = (false, SHA1.no_shasum)
 
--- a/src/Pure/ML/ml_heap.scala	Tue Apr 16 13:29:27 2024 +0200
+++ b/src/Pure/ML/ml_heap.scala	Tue Apr 16 17:28:58 2024 +0200
@@ -11,16 +11,14 @@
   /** heap file with SHA1 digest **/
 
   private val sha1_prefix = "SHA1:"
+  private val sha1_length = sha1_prefix.length + SHA1.digest_length
 
   def read_file_digest(heap: Path): Option[SHA1.Digest] = {
     if (heap.is_file) {
-      val l = sha1_prefix.length
-      val m = l + SHA1.digest_length
-      val n = File.size(heap)
-      val bs = Bytes.read_file(heap, offset = n - m)
-      if (bs.length == m) {
+      val bs = Bytes.read_file(heap, offset = File.size(heap) - sha1_length)
+      if (bs.length == sha1_length) {
         val s = bs.text
-        if (s.startsWith(sha1_prefix)) Some(SHA1.fake_digest(s.substring(l)))
+        if (s.startsWith(sha1_prefix)) Some(SHA1.fake_digest(s.substring(sha1_prefix.length)))
         else None
       }
       else None
@@ -204,7 +202,7 @@
     val heap_digest = session.heap.map(write_file_digest)
     val heap_size =
       session.heap match {
-        case Some(heap) => File.size(heap) - sha1_prefix.length - SHA1.digest_length
+        case Some(heap) => File.size(heap) - sha1_length
         case None => 0L
       }
 
@@ -285,16 +283,13 @@
 
             val base_dir = Isabelle_System.make_directory(heap.expand.dir)
             Isabelle_System.with_tmp_file(session_name + "_", base_dir = base_dir.file) { tmp =>
-              Bytes.write(tmp, Bytes.empty)
+              tmp.file.delete()
               for (slice <- private_data.read_slices(db, session_name)) {
                 Bytes.append(tmp, slice.uncompress(cache = cache))
               }
               val digest = write_file_digest(tmp)
-              if (db_digest.get == digest) {
-                Isabelle_System.chmod("a+r", tmp)
-                Isabelle_System.move_file(tmp, heap)
-              }
-              else error("Incoherent content for session heap " + heap)
+              if (db_digest.get == digest) Isabelle_System.move_file(tmp, heap)
+              else error("Incoherent content for session heap " + heap.expand)
             }
           }
         }
@@ -303,14 +298,14 @@
         /* log_db */
 
         for (session <- sessions; path <- session.log_db) {
-          val file_uuid = Store.read_build_uuid(path, session.name)
-          private_data.read_log_db(db, session.name, old_uuid = file_uuid) match {
-            case Some(log_db) if file_uuid.isEmpty =>
+          val old_uuid = Store.read_build_uuid(path, session.name)
+          for (log_db <- private_data.read_log_db(db, session.name, old_uuid = old_uuid)) {
+            if (old_uuid.isEmpty) {
               progress.echo("Restoring " + session.log_db_name + " ...")
               Isabelle_System.make_directory(path.expand.dir)
               Bytes.write(path, log_db.content)
-            case Some(_) => error("Incoherent content for session database " + path)
-            case None =>
+            }
+            else error("Incoherent content for session database " + path.expand)
           }
         }
       }
--- a/src/Pure/ML/ml_process.scala	Tue Apr 16 13:29:27 2024 +0200
+++ b/src/Pure/ML/ml_process.scala	Tue Apr 16 17:28:58 2024 +0200
@@ -12,8 +12,9 @@
 
 
 object ML_Process {
-  def bootstrap_shasum(): SHA1.Shasum =
-    SHA1.shasum_meta_info(SHA1.digest(Path.explode("$POLYML_EXE")))
+  def make_shasum(ancestors: List[SHA1.Shasum]): SHA1.Shasum =
+    if (ancestors.isEmpty) SHA1.shasum_meta_info(SHA1.digest(Path.explode("$POLYML_EXE")))
+    else SHA1.flat_shasum(ancestors)
 
   def session_heaps(
     store: Store,
--- a/src/Pure/Tools/update.scala	Tue Apr 16 13:29:27 2024 +0200
+++ b/src/Pure/Tools/update.scala	Tue Apr 16 17:28:58 2024 +0200
@@ -217,13 +217,13 @@
                 sessions = sessions),
               base_logics = base_logics,
               progress = progress,
-              build_heap,
-              clean_build,
+              build_heap = build_heap,
+              clean_build = clean_build,
               dirs = dirs,
               select_dirs = select_dirs,
               numa_shuffling = Host.numa_check(progress, numa_shuffling),
               max_jobs = max_jobs,
-              fresh_build,
+              fresh_build = fresh_build,
               no_build = no_build)
           }