isabelle: changeset 79054:edc0dbd59d48

--- a/src/HOL/Bit_Operations.thy	Fri Nov 24 20:58:12 2023 +0100
+++ b/src/HOL/Bit_Operations.thy	Fri Nov 24 20:58:29 2023 +0100
@@ -719,7 +719,8 @@
     and xor_rec: \<open>a XOR b = of_bool (odd a \<noteq> odd b) + 2 * ((a div 2) XOR (b div 2))\<close>
     and mask_eq_exp_minus_1: \<open>mask n = 2 ^ n - 1\<close>
     and set_bit_eq_or: \<open>set_bit n a = a OR push_bit n 1\<close>
-    and bit_unset_bit_iff [bit_simps]: \<open>bit (unset_bit m a) n \<longleftrightarrow> bit a n \<and> m \<noteq> n\<close>
+    and unset_bit_0 [simp]: \<open>unset_bit 0 a = 2 * (a div 2)\<close>
+    and unset_bit_Suc: \<open>unset_bit (Suc n) a = a mod 2 + 2 * unset_bit n (a div 2)\<close>
     and flip_bit_eq_xor: \<open>flip_bit n a = a XOR push_bit n 1\<close>
     and push_bit_eq_mult: \<open>push_bit n a = a * 2 ^ n\<close>
     and drop_bit_eq_div: \<open>drop_bit n a = a div 2 ^ n\<close>
@@ -1204,6 +1205,26 @@
   \<open>even (set_bit m a) \<longleftrightarrow> even a \<and> m \<noteq> 0\<close>
   using bit_set_bit_iff [of m a 0] by (auto simp add: bit_0)
 
+lemma bit_unset_bit_iff [bit_simps]:
+  \<open>bit (unset_bit m a) n \<longleftrightarrow> bit a n \<and> m \<noteq> n\<close>
+proof (induction m arbitrary: a n)
+  case 0
+  then show ?case
+    by (auto simp add: bit_simps simp flip: bit_Suc dest: bit_imp_possible_bit)
+next
+  case (Suc m)
+  show ?case
+  proof (cases n)
+    case 0
+    then show ?thesis
+      by (cases m) (simp_all add: bit_0 unset_bit_Suc)
+  next
+    case (Suc n)
+    with Suc.IH [of \<open>a div 2\<close> n] show ?thesis
+      by (auto simp add: unset_bit_Suc mod_2_eq_odd bit_simps even_bit_succ_iff simp flip: bit_Suc dest: bit_imp_possible_bit)
+  qed
+qed
+
 lemma even_unset_bit_iff:
   \<open>even (unset_bit m a) \<longleftrightarrow> even a \<or> m = 0\<close>
   using bit_unset_bit_iff [of m a 0] by (auto simp add: bit_0)
@@ -1240,15 +1261,6 @@
   by (auto simp add: bit_eq_iff bit_sum_mult_2_cases bit_simps bit_0 simp flip: bit_Suc
     elim: possible_bit_less_imp)
 
-lemma unset_bit_0 [simp]:
-  \<open>unset_bit 0 a = 2 * (a div 2)\<close>
-  by (auto simp add: bit_eq_iff bit_simps even_bit_succ_iff simp flip: bit_Suc)
-
-lemma unset_bit_Suc:
-  \<open>unset_bit (Suc n) a = a mod 2 + 2 * unset_bit n (a div 2)\<close>
-  by (auto simp add: bit_eq_iff bit_sum_mult_2_cases bit_simps bit_0 simp flip: bit_Suc
-    elim: possible_bit_less_imp)
-
 lemma flip_bit_0 [simp]:
   \<open>flip_bit 0 a = of_bool (even a) + 2 * (a div 2)\<close>
   by (auto simp add: bit_eq_iff bit_simps even_bit_succ_iff bit_0 simp flip: bit_Suc)
@@ -1549,36 +1561,20 @@
 
 subsection \<open>Instance \<^typ>\<open>int\<close>\<close>
 
-instantiation int :: ring_bit_operations
+locale fold2_bit_int =
+  fixes f :: \<open>bool \<Rightarrow> bool \<Rightarrow> bool\<close>
+begin
+
+context
 begin
 
-definition not_int :: \<open>int \<Rightarrow> int\<close>
-  where \<open>not_int k = - k - 1\<close>
-
-lemma not_int_rec:
-  \<open>NOT k = of_bool (even k) + 2 * NOT (k div 2)\<close> for k :: int
-  by (auto simp add: not_int_def elim: oddE)
-
-lemma even_not_iff_int:
-  \<open>even (NOT k) \<longleftrightarrow> odd k\<close> for k :: int
-  by (simp add: not_int_def)
-
-lemma not_int_div_2:
-  \<open>NOT k div 2 = NOT (k div 2)\<close> for k :: int
-  by (simp add: not_int_def)
-
-lemma bit_not_int_iff:
-  \<open>bit (NOT k) n \<longleftrightarrow> \<not> bit k n\<close>
-  for k :: int
-  by (simp add: bit_not_int_iff' not_int_def)
-
-function and_int :: \<open>int \<Rightarrow> int \<Rightarrow> int\<close>
-  where \<open>(k::int) AND l = (if k \<in> {0, - 1} \<and> l \<in> {0, - 1}
-    then - of_bool (odd k \<and> odd l)
-    else of_bool (odd k \<and> odd l) + 2 * ((k div 2) AND (l div 2)))\<close>
+function F :: \<open>int \<Rightarrow> int \<Rightarrow> int\<close>
+  where \<open>F k l = (if k \<in> {0, - 1} \<and> l \<in> {0, - 1}
+    then - of_bool (f (odd k) (odd l))
+    else of_bool (f (odd k) (odd l)) + 2 * (F (k div 2) (l div 2)))\<close>
   by auto
 
-termination proof (relation \<open>measure (\<lambda>(k, l). nat (\<bar>k\<bar> + \<bar>l\<bar>))\<close>)
+private termination proof (relation \<open>measure (\<lambda>(k, l). nat (\<bar>k\<bar> + \<bar>l\<bar>))\<close>)
   have less_eq: \<open>\<bar>k div 2\<bar> \<le> \<bar>k\<bar>\<close> for k :: int
     by (cases k) (simp_all add: divide_int_def nat_add_distrib)
   then have less: \<open>\<bar>k div 2\<bar> < \<bar>k\<bar>\<close> if \<open>k \<notin> {0, - 1}\<close> for k :: int
@@ -1607,67 +1603,72 @@
         by auto
     qed
     ultimately show ?thesis
-      by simp
+      by (simp only: in_measure split_def fst_conv snd_conv nat_mono_iff)
   qed
 qed
 
-declare and_int.simps [simp del]
-
-lemma and_int_rec:
-  \<open>k AND l = of_bool (odd k \<and> odd l) + 2 * ((k div 2) AND (l div 2))\<close>
+declare F.simps [simp del]
+
+lemma rec:
+  \<open>F k l = of_bool (f (odd k) (odd l)) + 2 * (F (k div 2) (l div 2))\<close>
     for k l :: int
 proof (cases \<open>k \<in> {0, - 1} \<and> l \<in> {0, - 1}\<close>)
   case True
   then show ?thesis
-    by auto (simp_all add: and_int.simps)
+    by (auto simp add: F.simps [of 0] F.simps [of \<open>- 1\<close>])
 next
   case False
   then show ?thesis
-    by (auto simp add: ac_simps and_int.simps [of k l])
+    by (auto simp add: ac_simps F.simps [of k l])
 qed
 
-lemma bit_and_int_iff:
-  \<open>bit (k AND l) n \<longleftrightarrow> bit k n \<and> bit l n\<close> for k l :: int
+lemma bit_iff:
+  \<open>bit (F k l) n \<longleftrightarrow> f (bit k n) (bit l n)\<close> for k l :: int
 proof (induction n arbitrary: k l)
   case 0
   then show ?case
-    by (simp add: and_int_rec [of k l] bit_0)
+    by (simp add: rec [of k l] bit_0)
 next
   case (Suc n)
   then show ?case
-    by (simp add: and_int_rec [of k l] bit_Suc)
+    by (simp add: rec [of k l] bit_Suc)
 qed
 
-lemma even_and_iff_int:
-  \<open>even (k AND l) \<longleftrightarrow> even k \<or> even l\<close> for k l :: int
-  using bit_and_int_iff [of k l 0] by (auto simp add: bit_0)
-
-definition or_int :: \<open>int \<Rightarrow> int \<Rightarrow> int\<close>
-  where \<open>k OR l = NOT (NOT k AND NOT l)\<close> for k l :: int
-
-lemma or_int_rec:
-  \<open>k OR l = of_bool (odd k \<or> odd l) + 2 * ((k div 2) OR (l div 2))\<close>
-  for k l :: int
-  using and_int_rec [of \<open>NOT k\<close> \<open>NOT l\<close>]
-  by (simp add: or_int_def even_not_iff_int not_int_div_2)
-    (simp_all add: not_int_def)
-
-lemma bit_or_int_iff:
-  \<open>bit (k OR l) n \<longleftrightarrow> bit k n \<or> bit l n\<close> for k l :: int
-  by (simp add: or_int_def bit_not_int_iff bit_and_int_iff)
-
-definition xor_int :: \<open>int \<Rightarrow> int \<Rightarrow> int\<close>
-  where \<open>k XOR l = k AND NOT l OR NOT k AND l\<close> for k l :: int
-
-lemma xor_int_rec:
-  \<open>k XOR l = of_bool (odd k \<noteq> odd l) + 2 * ((k div 2) XOR (l div 2))\<close>
-  for k l :: int
-  by (simp add: xor_int_def or_int_rec [of \<open>k AND NOT l\<close> \<open>NOT k AND l\<close>] even_and_iff_int even_not_iff_int)
-    (simp add: and_int_rec [of \<open>NOT k\<close> \<open>l\<close>] and_int_rec [of \<open>k\<close> \<open>NOT l\<close>] not_int_div_2)
-
-lemma bit_xor_int_iff:
-  \<open>bit (k XOR l) n \<longleftrightarrow> bit k n \<noteq> bit l n\<close> for k l :: int
-  by (auto simp add: xor_int_def bit_or_int_iff bit_and_int_iff bit_not_int_iff)
+end
+
+end
+
+instantiation int :: ring_bit_operations
+begin
+
+definition not_int :: \<open>int \<Rightarrow> int\<close>
+  where \<open>not_int k = - k - 1\<close>
+
+lemma not_int_rec:
+  \<open>NOT k = of_bool (even k) + 2 * NOT (k div 2)\<close> for k :: int
+  by (auto simp add: not_int_def elim: oddE)
+
+lemma even_not_iff_int:
+  \<open>even (NOT k) \<longleftrightarrow> odd k\<close> for k :: int
+  by (simp add: not_int_def)
+
+lemma not_int_div_2:
+  \<open>NOT k div 2 = NOT (k div 2)\<close> for k :: int
+  by (simp add: not_int_def)
+
+lemma bit_not_int_iff:
+  \<open>bit (NOT k) n \<longleftrightarrow> \<not> bit k n\<close>
+  for k :: int
+  by (simp add: bit_not_int_iff' not_int_def)
+
+global_interpretation and_int: fold2_bit_int \<open>(\<and>)\<close>
+  defines and_int = and_int.F .
+
+global_interpretation or_int: fold2_bit_int \<open>(\<or>)\<close>
+  defines or_int = or_int.F .
+
+global_interpretation xor_int: fold2_bit_int \<open>(\<noteq>)\<close>
+  defines xor_int = xor_int.F .
 
 definition mask_int :: \<open>nat \<Rightarrow> int\<close>
   where \<open>mask n = (2 :: int) ^ n - 1\<close>
@@ -1696,26 +1697,33 @@
     by (simp add: not_int_def)
   show \<open>NOT k = of_bool (even k) + 2 * NOT (k div 2)\<close>
     by (auto simp add: not_int_def elim: oddE)
-  show \<open>k AND l = of_bool (odd k \<and> odd l) + 2 * (k div 2 AND l div 2)\<close>
-    by (fact and_int_rec)
-  show \<open>k OR l = of_bool (odd k \<or> odd l) + 2 * (k div 2 OR l div 2)\<close>
-    by (fact or_int_rec)
-  show \<open>k XOR l = of_bool (odd k \<noteq> odd l) + 2 * (k div 2 XOR l div 2)\<close>
-    by (fact xor_int_rec)
-  show \<open>bit (unset_bit m k) n \<longleftrightarrow> bit k n \<and> m \<noteq> n\<close>
-  proof -
-    have \<open>unset_bit m k = k AND NOT (push_bit m 1)\<close>
-      by (simp add: unset_bit_int_def)
-    also have \<open>NOT (push_bit m 1 :: int) = - (push_bit m 1 + 1)\<close>
-      by (simp add: not_int_def)
-    finally show ?thesis by (simp only: bit_simps bit_and_int_iff)
-      (auto simp add: bit_simps bit_not_int_iff' push_bit_int_def)
-  qed
-qed (simp_all add: mask_int_def set_bit_int_def flip_bit_int_def
-  push_bit_int_def drop_bit_int_def take_bit_int_def)
+  show \<open>unset_bit 0 k = 2 * (k div 2)\<close>
+    by (rule bit_eqI)
+      (auto simp add: unset_bit_int_def push_bit_int_def and_int.bit_iff bit_not_int_iff bit_simps simp flip: bit_Suc)
+  show \<open>unset_bit (Suc n) k = k mod 2 + 2 * unset_bit n (k div 2)\<close>
+    by (rule bit_eqI)
+      (auto simp add: unset_bit_int_def push_bit_int_def and_int.bit_iff bit_not_int_iff bit_simps mod_2_eq_odd even_bit_succ_iff bit_0 simp flip: bit_Suc)
+qed (fact and_int.rec or_int.rec xor_int.rec mask_int_def set_bit_int_def flip_bit_int_def
+  push_bit_int_def drop_bit_int_def take_bit_int_def)+
 
 end
 
+lemmas and_int_rec = and_int.rec
+
+lemmas bit_and_int_iff = and_int.bit_iff
+
+lemmas or_int_rec = or_int.rec
+
+lemmas bit_or_int_iff = or_int.bit_iff
+
+lemmas xor_int_rec = xor_int.rec
+
+lemmas bit_xor_int_iff = xor_int.bit_iff
+
+lemma even_and_iff_int:
+  \<open>even (k AND l) \<longleftrightarrow> even k \<or> even l\<close> for k l :: int
+  by (fact even_and_iff)
+
 lemma bit_push_bit_iff_int:
   \<open>bit (push_bit m k) n \<longleftrightarrow> m \<le> n \<and> bit k (n - m)\<close> for k :: int
   by (auto simp add: bit_push_bit_iff)
@@ -2455,15 +2463,17 @@
   where \<open>flip_bit m n = n XOR push_bit m 1\<close> for m n :: nat
 
 instance proof
-  fix m n q :: nat
+  fix m n :: nat
   show \<open>m AND n = of_bool (odd m \<and> odd n) + 2 * (m div 2 AND n div 2)\<close>
     by (simp add: and_nat_def and_rec [of \<open>int m\<close> \<open>int n\<close>] nat_add_distrib of_nat_div)
   show \<open>m OR n = of_bool (odd m \<or> odd n) + 2 * (m div 2 OR n div 2)\<close>
     by (simp add: or_nat_def or_rec [of \<open>int m\<close> \<open>int n\<close>] nat_add_distrib of_nat_div)
   show \<open>m XOR n = of_bool (odd m \<noteq> odd n) + 2 * (m div 2 XOR n div 2)\<close>
     by (simp add: xor_nat_def xor_rec [of \<open>int m\<close> \<open>int n\<close>] nat_add_distrib of_nat_div)
-  show \<open>bit (unset_bit m n) q \<longleftrightarrow> bit n q \<and> m \<noteq> q\<close>
-    by (simp add: unset_bit_nat_def bit_simps)
+  show \<open>unset_bit 0 n = 2 * (n div 2)\<close>
+    by (simp add: unset_bit_nat_def nat_mult_distrib)
+  show \<open>unset_bit (Suc m) n = n mod 2 + 2 * unset_bit m (n div 2)\<close>
+    by (simp add: unset_bit_nat_def unset_bit_Suc nat_add_distrib nat_mult_distrib nat_mod_distrib of_nat_div)
 qed (simp_all add: mask_nat_def set_bit_nat_def flip_bit_nat_def push_bit_nat_def drop_bit_nat_def take_bit_nat_def)
 
 end
@@ -3728,7 +3738,7 @@
   \<open>Int.Neg (num.Bit1 n) OR Int.Pos m = (case and_not_num (num.Bit0 n) m of None \<Rightarrow> -1 | Some n' \<Rightarrow> Int.Neg (Num.inc n'))\<close>
   apply (auto simp add: numeral_or_num_eq split: option.splits)
          apply (simp_all only: and_not_num_eq_None_iff and_not_num_eq_Some_iff and_not_numerals
-           numeral_or_not_num_eq or_int_def bit.double_compl ac_simps flip: numeral_eq_iff [where ?'a = int])
+           numeral_or_not_num_eq or_eq_not_not_and bit.double_compl ac_simps flip: numeral_eq_iff [where ?'a = int])
          apply simp_all
   done

--- a/src/HOL/Code_Numeral.thy	Fri Nov 24 20:58:12 2023 +0100
+++ b/src/HOL/Code_Numeral.thy	Fri Nov 24 20:58:29 2023 +0100
@@ -353,7 +353,7 @@
     exp_div_exp_eq div_exp_eq mod_exp_eq mult_exp_mod_exp_eq div_exp_mod_exp_eq
     even_mask_div_iff even_mult_exp_div_exp_iff
     and_rec or_rec xor_rec mask_eq_exp_minus_1 not_rec
-    set_bit_def bit_unset_bit_iff flip_bit_def not_rec minus_eq_not_minus_1)+
+    set_bit_def unset_bit_0 unset_bit_Suc flip_bit_def not_rec minus_eq_not_minus_1)+
 
 end
 
@@ -1111,7 +1111,7 @@
     bits_div_0 bits_div_by_1 bits_mod_div_trivial even_succ_div_2
     exp_div_exp_eq div_exp_eq mod_exp_eq mult_exp_mod_exp_eq div_exp_mod_exp_eq
     even_mask_div_iff even_mult_exp_div_exp_iff and_rec or_rec xor_rec
-    mask_eq_exp_minus_1 set_bit_def bit_unset_bit_iff flip_bit_def)+
+    mask_eq_exp_minus_1 set_bit_def unset_bit_0 unset_bit_Suc flip_bit_def)+
 
 end

--- a/src/HOL/Library/Word.thy	Fri Nov 24 20:58:12 2023 +0100
+++ b/src/HOL/Library/Word.thy	Fri Nov 24 20:58:29 2023 +0100
@@ -1028,8 +1028,16 @@
     by transfer (simp flip: mask_eq_exp_minus_1)
   show \<open>set_bit n v = v OR push_bit n 1\<close>
     by transfer (simp add: take_bit_set_bit_eq set_bit_eq_or)
-  show \<open>bit (unset_bit m v) n \<longleftrightarrow> bit v n \<and> m \<noteq> n\<close>
-    by transfer (simp add: bit_simps)
+  show \<open>unset_bit 0 v = 2 * (v div 2)\<close>
+    apply transfer
+    apply (rule bit_eqI)
+    apply (auto simp add: bit_simps simp flip: bit_Suc)
+    done
+  show \<open>unset_bit (Suc n) v = v mod 2 + 2 * unset_bit n (v div 2)\<close>
+    apply transfer
+    apply (rule bit_eqI)
+    apply (auto simp add: bit_simps mod_2_eq_odd even_bit_succ_iff bit_0 simp flip: bit_Suc)
+    done
   show \<open>flip_bit n v = v XOR push_bit n 1\<close>
     by transfer (simp add: take_bit_flip_bit_eq flip_bit_eq_xor)
   show \<open>push_bit n v = v * 2 ^ n\<close>

--- a/src/Pure/Tools/build_process.scala	Fri Nov 24 20:58:12 2023 +0100
+++ b/src/Pure/Tools/build_process.scala	Fri Nov 24 20:58:29 2023 +0100
@@ -216,7 +216,8 @@
       copy(serial = serial + 1)
     }
 
-    def ready: State.Pending = pending.filter(entry => entry.is_ready && !is_running(entry.name))
+    def ready: State.Pending = pending.filter(_.is_ready)
+    def next_ready: State.Pending = ready.filter(entry => !is_running(entry.name))
 
     def remove_pending(name: String): State =
       copy(pending = pending.flatMap(
@@ -978,7 +979,7 @@
       if (progress.stopped) { if (build_context.master) Int.MaxValue else 0 }
       else build_context.max_jobs - state.build_running.length
     }
-    if (limit > 0) state.ready.sortBy(_.name)(state.sessions.ordering).take(limit).map(_.name)
+    if (limit > 0) state.next_ready.sortBy(_.name)(state.sessions.ordering).take(limit).map(_.name)
     else Nil
   }

--- a/src/Pure/Tools/build_schedule.scala	Fri Nov 24 20:58:12 2023 +0100
+++ b/src/Pure/Tools/build_schedule.scala	Fri Nov 24 20:58:29 2023 +0100
@@ -13,104 +13,186 @@
 object Build_Schedule {
   val engine_name = "build_schedule"
 
-  object Config {
-    def from_job(job: Build_Process.Job): Config = Config(job.name, job.node_info)
-  }
-
-  case class Config(job_name: String, node_info: Node_Info) {
-    def job_of(start_time: Time): Build_Process.Job =
-      Build_Process.Job(job_name, "", "", node_info, Date(start_time), None)
-  }
 
   /* organized historic timing information (extracted from build logs) */
 
   case class Timing_Entry(job_name: String, hostname: String, threads: Int, elapsed: Time)
+  case class Timing_Entries(entries: List[Timing_Entry]) {
+    require(entries.nonEmpty)
 
-  class Timing_Data private(data: List[Timing_Entry], val host_infos: Host_Infos) {
-    require(data.nonEmpty)
+    def is_empty = entries.isEmpty
+    def size = entries.length
 
-    def is_empty = data.isEmpty
-    def size = data.length
+    lazy val by_job = entries.groupBy(_.job_name).view.mapValues(Timing_Entries(_)).toMap
+    lazy val by_threads = entries.groupBy(_.threads).view.mapValues(Timing_Entries(_)).toMap
+    lazy val by_hostname = entries.groupBy(_.hostname).view.mapValues(Timing_Entries(_)).toMap
+
+    def mean_time: Time = Timing_Data.mean_time(entries.map(_.elapsed))
+    def best_entry: Timing_Entry = entries.minBy(_.elapsed.ms)
+  }
 
-    private lazy val by_job =
-      data.groupBy(_.job_name).view.mapValues(new Timing_Data(_, host_infos)).toMap
-    private lazy val by_threads =
-      data.groupBy(_.threads).view.mapValues(new Timing_Data(_, host_infos)).toMap
-    private lazy val by_hostname =
-      data.groupBy(_.hostname).view.mapValues(new Timing_Data(_, host_infos)).toMap
+  class Timing_Data private(data: Timing_Entries, val host_infos: Host_Infos) {
+    private def inflection_point(last_mono: Int, next: Int): Int =
+      last_mono + ((next - last_mono) / 2)
 
-    def mean_time: Time = Timing_Data.mean_time(data.map(_.elapsed))
-
-    private def best_entry: Timing_Entry = data.minBy(_.elapsed.ms)
-
-    def best_threads(job_name: String): Option[Int] = by_job.get(job_name).map(_.best_entry.threads)
-
-    def best_time(job_name: String): Time =
-      by_job.get(job_name).map(_.best_entry.elapsed).getOrElse(
-        estimate_config(Config(job_name, Node_Info(best_entry.hostname, None, Nil))))
+    def best_threads(job_name: String, max_threads: Int): Int = {
+      val worse_threads =
+        data.by_job.get(job_name).toList.flatMap(_.by_hostname).flatMap {
+          case (hostname, data) =>
+            val best_threads = data.best_entry.threads
+            data.by_threads.keys.toList.sorted.find(_ > best_threads).map(
+              inflection_point(best_threads, _))
+        }
+      (max_threads :: worse_threads).min
+    }
 
     private def hostname_factor(from: String, to: String): Double =
       host_infos.host_factor(host_infos.the_host(from), host_infos.the_host(to))
 
-    def approximate_threads(threads: Int): Option[Time] = {
-      val approximations =
-        by_job.values.filter(_.size > 1).map { data =>
-          val (ref_hostname, x0) =
-            data.by_hostname.toList.flatMap((hostname, data) =>
-              data.by_threads.keys.map(hostname -> _)).minBy((_, n) => Math.abs(n - threads))
+    private def approximate_threads(entries_unsorted: List[(Int, Time)], threads: Int): Time = {
+      val entries = entries_unsorted.sortBy(_._1)
+
+      def sorted_prefix[A](xs: List[A], f: A => Long): List[A] =
+        xs match {
+          case x1 :: x2 :: xs =>
+            if (f(x1) <= f(x2)) x1 :: sorted_prefix(x2 :: xs, f) else x1 :: Nil
+          case xs => xs
+        }
+
+      def linear(p0: (Int, Time), p1: (Int, Time)): Time = {
+        val a = (p1._2 - p0._2).scale(1.0 / (p1._1 - p0._1))
+        val b = p0._2 - a.scale(p0._1)
+        Time.ms((a.scale(threads) + b).ms max 0)
+      }
 
-          def unify_hosts(data: Timing_Data): List[Time] =
-            data.by_hostname.toList.map((hostname, data) =>
-              data.mean_time.scale(hostname_factor(hostname, ref_hostname)))
+      val mono_prefix = sorted_prefix(entries, e => -e._2.ms)
+
+      val is_mono = entries == mono_prefix
+      val in_prefix = mono_prefix.length > 1 && threads <= mono_prefix.last._1
+      val in_inflection =
+        !is_mono && mono_prefix.length > 1 && threads < entries.drop(mono_prefix.length).head._1
+      if (is_mono || in_prefix || in_inflection) {
+        // Model with Amdahl's law
+        val t_p =
+          Timing_Data.median_time(for {
+            (n, t0) <- mono_prefix
+            (m, t1) <- mono_prefix
+            if m != n
+          } yield (t0 - t1).scale(n.toDouble * m / (m - n)))
+        val t_c =
+          Timing_Data.median_time(for ((n, t) <- mono_prefix) yield t - t_p.scale(1.0 / n))
 
-          val entries =
-            data.by_threads.toList.map((threads, data) =>
-              threads -> Timing_Data.median_time(unify_hosts(data)))
+        def model(threads: Int): Time = t_c + t_p.scale(1.0 / threads)
+
+        if (is_mono || in_prefix) model(threads)
+        else {
+          val post_inflection = entries.drop(mono_prefix.length).head
+          val inflection_threads = inflection_point(mono_prefix.last._1, post_inflection._1)
 
-          val y0 = data.by_hostname(ref_hostname).by_threads(x0).mean_time
-          val (x1, y1_data) =
-            data.by_hostname(ref_hostname).by_threads.toList.minBy((n, _) => Math.abs(n - threads))
-          val y1 = y1_data.mean_time
+          if (threads <= inflection_threads) model(threads)
+          else linear((inflection_threads, model(inflection_threads)), post_inflection)
+        }
+      } else {
+        // Piecewise linear
+        val (p0, p1) =
+          if (entries.head._1 <= threads && threads <= entries.last._1) {
+            val split = entries.partition(_._1 <= threads)
+            (split._1.last, split._2.head)
+          } else {
+            val piece = if (threads < entries.head._1) entries.take(2) else entries.takeRight(2)
+            (piece.head, piece.last)
+          }
 
-          val a = (y1.ms - y0.ms).toDouble / (x1 - x0)
-          val b = y0.ms - a * x0
-          Time.ms((a * threads + b).toLong)
-        }
-      if (approximations.isEmpty) None else Some(Timing_Data.mean_time(approximations))
+        linear(p0, p1)
+      }
+    }
+
+    private def unify_hosts(job_name: String, on_host: String): List[(Int, Time)] = {
+      def unify(hostname: String, data: Timing_Entries) =
+        data.mean_time.scale(hostname_factor(hostname, on_host))
+
+      for {
+        data <- data.by_job.get(job_name).toList
+        (threads, data) <- data.by_threads
+        entries = data.by_hostname.toList.map(unify)
+      } yield threads -> Timing_Data.median_time(entries)
     }
 
-    def threads_factor(divided: Int, divisor: Int): Double =
-      (approximate_threads(divided), approximate_threads(divisor)) match {
-        case (Some(dividend), Some(divisor)) => dividend.ms.toDouble / divisor.ms
-        case _ => divided.toDouble / divisor
-      }
+    def estimate_threads(job_name: String, hostname: String, threads: Int): Option[Time] = {
+      for {
+        data <- data.by_job.get(job_name)
+        data <- data.by_hostname.get(hostname)
+        entries = data.by_threads.toList.map((threads, data) => threads -> data.mean_time)
+        time <- data.by_threads.get(threads).map(_.mean_time).orElse(
+          if (data.by_threads.size < 2) None else Some(approximate_threads(entries, threads)))
+      } yield time
+    }
+
+    def global_threads_factor(from: Int, to: Int): Double = {
+      def median(xs: Iterable[Double]): Double = xs.toList.sorted.apply(xs.size / 2)
+
+      val estimates =
+        for {
+          (hostname, data) <- data.by_hostname
+          job_name <- data.by_job.keys
+          from_time <- estimate_threads(job_name, hostname, from)
+          to_time <- estimate_threads(job_name, hostname, to)
+        } yield from_time.ms.toDouble / to_time.ms
 
-    def estimate_config(config: Config): Time =
-      by_job.get(config.job_name) match {
-        case None => mean_time
+      if (estimates.nonEmpty) median(estimates)
+      else {
+        // unify hosts
+        val estimates =
+          for {
+            (job_name, data) <- data.by_job
+            hostname = data.by_hostname.keys.head
+            entries = unify_hosts(job_name, hostname)
+            if entries.length > 1
+          } yield
+            approximate_threads(entries, from).ms.toDouble / approximate_threads(entries, to).ms
+
+        if (estimates.nonEmpty) median(estimates)
+        else from.toDouble / to.toDouble
+      }
+    }
+
+    def estimate(job_name: String, hostname: String, threads: Int): Time =
+      data.by_job.get(job_name) match {
+        case None =>
+          // no data for job, take average of other jobs for given threads
+          val job_estimates = data.by_job.keys.flatMap(estimate_threads(_, hostname, threads))
+          if (job_estimates.nonEmpty) Timing_Data.mean_time(job_estimates)
+          else {
+            // no other job to estimate from, use global curve to approximate any other job
+            val (threads1, data1) = data.by_threads.head
+            data1.mean_time.scale(global_threads_factor(threads1, threads))
+          }
+
         case Some(data) =>
-          val hostname = config.node_info.hostname
-          val threads = host_infos.num_threads(config.node_info)
           data.by_threads.get(threads) match {
             case None => // interpolate threads
-              data.by_hostname.get(hostname).flatMap(
-                _.approximate_threads(threads)).getOrElse {
-                  // per machine, try to approximate config for threads
-                  val approximated =
-                    data.by_hostname.toList.flatMap((hostname1, data) =>
-                      data.approximate_threads(threads).map(time =>
-                        time.scale(hostname_factor(hostname1, hostname))))
+              estimate_threads(job_name, hostname, threads).getOrElse {
+                // per machine, try to approximate config for threads
+                val approximated =
+                  for {
+                    hostname1 <- data.by_hostname.keys
+                    estimate <- estimate_threads(job_name, hostname1, threads)
+                    factor = hostname_factor(hostname1, hostname)
+                  } yield estimate.scale(factor)
 
-                  if (approximated.nonEmpty) Timing_Data.mean_time(approximated)
+                if (approximated.nonEmpty) Timing_Data.mean_time(approximated)
+                else {
+                  // no single machine where config can be approximated, unify data points
+                  val unified_entries = unify_hosts(job_name, hostname)
+
+                  if (unified_entries.length > 1) approximate_threads(unified_entries, threads)
                   else {
-                    // no machine where config can be approximated
-                    data.approximate_threads(threads).getOrElse {
-                      // only single data point, use global curve to approximate
-                      val global_factor = threads_factor(data.by_threads.keys.head, threads)
-                      data.by_threads.values.head.mean_time.scale(global_factor)
-                    }
+                    // only single data point, use global curve to approximate
+                    val (job_threads, job_time) = unified_entries.head
+                    job_time.scale(global_threads_factor(job_threads, threads))
                   }
                 }
+              }
 
             case Some(data) => // time for job/thread exists, interpolate machine
               data.by_hostname.get(hostname).map(_.mean_time).getOrElse {
@@ -123,7 +205,7 @@
   }
 
   object Timing_Data {
-    def median_time(obs: List[Time]): Time = obs.sortBy(_.ms).drop(obs.length / 2).head
+    def median_time(obs: List[Time]): Time = obs.sortBy(_.ms).apply(obs.length / 2)
     def mean_time(obs: Iterable[Time]): Time = Time.ms(obs.map(_.ms).sum / obs.size)
 
     private def dummy_entries(host: Host, host_factor: Double) =
@@ -159,7 +241,7 @@
               Timing_Entry(job_name, hostname, threads, median_time(timings))
           }
 
-      new Timing_Data(entries, host_infos)
+      new Timing_Data(Timing_Entries(entries), host_infos)
     }
   }
 
@@ -186,6 +268,8 @@
   }
 
   class Host_Infos private(val hosts: List[Host]) {
+    require(hosts.nonEmpty)
+
     private val by_hostname = hosts.map(host => host.info.hostname -> host).toMap
 
     def host_factor(from: Host, to: Host): Double =
@@ -210,7 +294,16 @@
   }
 
 
-  /* offline tracking of resource allocations */
+  /* offline tracking of job configurations and resource allocations */
+
+  object Config {
+    def from_job(job: Build_Process.Job): Config = Config(job.name, job.node_info)
+  }
+
+  case class Config(job_name: String, node_info: Node_Info) {
+    def job_of(start_time: Time): Build_Process.Job =
+      Build_Process.Job(job_name, "", "", node_info, Date(start_time), None)
+  }
 
   case class Resources(
     host_infos: Host_Infos,
@@ -303,7 +396,8 @@
       val remaining =
         build_state.running.values.toList.map { job =>
           val elapsed = current_time - job.start_date.time
-          val predicted = timing_data.estimate_config(Config.from_job(job))
+          val threads = timing_data.host_infos.num_threads(job.node_info)
+          val predicted = timing_data.estimate(job.name, job.node_info.hostname, threads)
           val remaining = if (elapsed > predicted) Time.zero else predicted - elapsed
           job -> remaining
         }
@@ -323,7 +417,17 @@
     def build_duration(build_state: Build_Process.State): Time
   }
 
-  abstract class Heuristic(timing_data: Timing_Data) extends Scheduler {
+  abstract class Heuristic(timing_data: Timing_Data, max_threads_limit: Int) extends Scheduler {
+    val host_infos = timing_data.host_infos
+    val ordered_hosts = host_infos.hosts.sorted(host_infos.host_speeds)
+    val max_threads = host_infos.hosts.map(_.info.num_cpus).max min max_threads_limit
+
+    def best_time(job_name: String): Time = {
+      val host = ordered_hosts.last
+      val threads = timing_data.best_threads(job_name, max_threads) min host.info.num_cpus
+      timing_data.estimate(job_name, host.info.hostname, threads)
+    }
+
     def build_duration(build_state: Build_Process.State): Time = {
       @tailrec
       def simulate(state: State): State =
@@ -352,12 +456,11 @@
 
   /* heuristics */
 
-  class Timing_Heuristic(
-    threshold: Time,
+  abstract class Path_Heuristic(
     timing_data: Timing_Data,
     sessions_structure: Sessions.Structure,
-    max_threads: Int = 8
-  ) extends Heuristic(timing_data) {
+    max_threads_limit: Int
+  ) extends Heuristic(timing_data, max_threads_limit) {
     /* pre-computed properties for efficient heuristic */
 
     type Node = String
@@ -367,7 +470,7 @@
     val maximals_preds =
       all_maximals.map(node => node -> build_graph.all_preds(List(node)).toSet).toMap
 
-    val best_times = build_graph.keys.map(node => node -> timing_data.best_time(node)).toMap
+    val best_times = build_graph.keys.map(node => node -> best_time(node)).toMap
     val remaining_time_ms = build_graph.node_height(best_times(_).ms)
 
     def elapsed_times(node: Node): Map[Node, Time] =
@@ -383,10 +486,6 @@
         .groupMapReduce(_._1)(_._2)(_ max _)
     }
 
-    val critical_path_nodes =
-      build_graph.keys.map(node =>
-        node -> path_times(node).filter((_, time) => time > threshold).keySet).toMap
-
     def parallel_paths(minimals: Set[Node], pred: Node => Boolean = _ => true): Int = {
       def start(node: Node): (Node, Time) = node -> best_times(node)
 
@@ -413,41 +512,47 @@
 
       parallel_paths(minimals.map(start).toMap)._1
     }
-
+  }
 
-    /* scheduling */
-
-    val host_infos = timing_data.host_infos
+  class Timing_Heuristic(
+    threshold: Time,
+    timing_data: Timing_Data,
+    sessions_structure: Sessions.Structure,
+    max_threads_limit: Int = 8
+  ) extends Path_Heuristic(timing_data, sessions_structure, max_threads_limit) {
+    val critical_path_nodes =
+      build_graph.keys.map(node =>
+        node -> path_times(node).filter((_, time) => time > threshold).keySet).toMap
 
     def next(state: Build_Process.State): List[Config] = {
       val resources = host_infos.available(state)
 
       def best_threads(task: Build_Process.Task): Int =
-        timing_data.best_threads(task.name).getOrElse(
-          host_infos.hosts.map(_.info.num_cpus).max min max_threads)
+        timing_data.best_threads(task.name, max_threads)
+
+      val rev_ordered_hosts = ordered_hosts.reverse.map(_ -> max_threads)
 
-      val ordered_hosts =
-        host_infos.hosts.sorted(host_infos.host_speeds).reverse.map(_ -> max_threads)
-
-      val fully_parallelizable =
-        parallel_paths(state.ready.map(_.name).toSet) <= resources.unused_nodes(max_threads).length
+      val resources0 = host_infos.available(state.copy(running = Map.empty))
+      val max_parallel = parallel_paths(state.ready.map(_.name).toSet)
+      val fully_parallelizable = max_parallel <= resources0.unused_nodes(max_threads).length
 
       if (fully_parallelizable) {
-        val all_tasks = state.ready.map(task => (task, best_threads(task), best_threads(task)))
-        resources.try_allocate_tasks(ordered_hosts, all_tasks)._1
+        val all_tasks = state.next_ready.map(task => (task, best_threads(task), best_threads(task)))
+        resources.try_allocate_tasks(rev_ordered_hosts, all_tasks)._1
       }
       else {
         val critical_nodes = state.ready.toSet.flatMap(task => critical_path_nodes(task.name))
 
         val (critical, other) =
-          state.ready.sortBy(task => remaining_time_ms(task.name)).reverse.partition(task =>
+          state.next_ready.sortBy(task => remaining_time_ms(task.name)).reverse.partition(task =>
             critical_nodes.contains(task.name))
 
         val critical_tasks = critical.map(task => (task, best_threads(task), best_threads(task)))
         val other_tasks = other.map(task => (task, 1, best_threads(task)))
 
-        val (critical_hosts, other_hosts) =
-          ordered_hosts.splitAt(parallel_paths(critical.map(_.name).toSet, critical_nodes.contains))
+        val critical_minimals = critical_nodes.intersect(state.ready.map(_.name).toSet)
+        val max_critical_parallel = parallel_paths(critical_minimals, critical_nodes.contains)
+        val (critical_hosts, other_hosts) = rev_ordered_hosts.splitAt(max_critical_parallel)
 
         val (configs1, resources1) = resources.try_allocate_tasks(critical_hosts, critical_tasks)
         val (configs2, _) = resources1.try_allocate_tasks(other_hosts, other_tasks)
@@ -617,10 +722,10 @@
     override def next_jobs(state: Build_Process.State): List[String] = {
       val finalize_limit = if (build_context.master) Int.MaxValue else 0
 
-      if (progress.stopped) state.ready.map(_.name).take(finalize_limit)
+      if (progress.stopped) state.next_ready.map(_.name).take(finalize_limit)
       else if (cache.is_current(state)) cache.configs.map(_.job_name).filterNot(state.is_running)
       else {
-        val current = state.ready.filter(task => is_current(state, task.name))
+        val current = state.next_ready.filter(task => is_current(state, task.name))
         if (current.nonEmpty) current.map(_.name).take(finalize_limit)
         else {
           val start = Time.now()

author	wenzelm
	Fri, 24 Nov 2023 20:58:29 +0100
changeset 79054	edc0dbd59d48
parent 79041	ff7d48e776ab (diff)
parent 79053	badb3da19ac6 (current diff)
child 79055	c83cdd300848