generalized node infos: allow addressing of numa node segments via relative cpus;
add more node options and process policy options using taskset as alternative to NUMA for more fine-grained cpu controls (e.g., for cpus with heterogeneous cores in the same NUMA segment);
--- a/src/Pure/System/host.scala Wed Oct 18 19:05:06 2023 +0200
+++ b/src/Pure/System/host.scala Wed Oct 18 19:26:37 2023 +0200
@@ -48,26 +48,49 @@
}
- /* process policy via numactl tool */
+ /* process policy via numactl and taskset tools */
+
+ def taskset(cpus: List[Int]): String = "taskset --cpu-list " + Range(cpus)
+ def taskset_ok(cpus: List[Int]): Boolean = Isabelle_System.bash(taskset(cpus) + " true").ok
- def numactl(node: Int): String = "numactl -m" + node + " -N" + node
- def numactl_ok(node: Int): Boolean = Isabelle_System.bash(numactl(node) + " true").ok
+ def numactl(node: Int, rel_cpus: List[Int] = Nil): String =
+ "numactl -m" + node + " -N" + node + if_proper(rel_cpus, " -C+" + Range(rel_cpus))
+ def numactl_ok(node: Int, rel_cpus: List[Int] = Nil): Boolean =
+ Isabelle_System.bash(numactl(node, rel_cpus) + " true").ok
- def process_policy_options(options: Options, numa_node: Option[Int]): Options =
+ def numa_options(options: Options, numa_node: Option[Int]): Options =
numa_node match {
case None => options
case Some(node) =>
options.string("process_policy") = if (numactl_ok(node)) numactl(node) else ""
}
+ def node_options(options: Options, node: Node_Info): Options = {
+ val threads_options =
+ if (node.rel_cpus.isEmpty) options else options.int("threads") = node.rel_cpus.length
+
+ node.numa_node match {
+ case None if node.rel_cpus.isEmpty =>
+ threads_options
+ case Some(numa_node) =>
+ threads_options.string("process_policy") =
+ if (numactl_ok(numa_node, node.rel_cpus)) numactl(numa_node, node.rel_cpus) else ""
+ case _ =>
+ threads_options.string("process_policy") =
+ if (taskset_ok(node.rel_cpus)) taskset(node.rel_cpus) else ""
+ }
+ }
+
/* allocated resources */
- object Node_Info { def none: Node_Info = Node_Info("", None) }
+ object Node_Info { def none: Node_Info = Node_Info("", None, Nil) }
- sealed case class Node_Info(hostname: String, numa_node: Option[Int]) {
+ sealed case class Node_Info(hostname: String, numa_node: Option[Int], rel_cpus: List[Int]) {
override def toString: String =
- hostname + if_proper(numa_node, "/" + numa_node.get.toString)
+ hostname +
+ if_proper(numa_node, "/" + numa_node.get.toString) +
+ if_proper(rel_cpus, "+" + Range(rel_cpus))
}
--- a/src/Pure/Tools/build.scala Wed Oct 18 19:05:06 2023 +0200
+++ b/src/Pure/Tools/build.scala Wed Oct 18 19:26:37 2023 +0200
@@ -115,7 +115,7 @@
}
def process_options(options: Options, node_info: Host.Node_Info): Options =
- Host.process_policy_options(options, node_info.numa_node)
+ Host.node_options(options, node_info)
final def build_store(options: Options,
build_hosts: List[Build_Cluster.Host] = Nil,
--- a/src/Pure/Tools/build_process.scala Wed Oct 18 19:05:06 2023 +0200
+++ b/src/Pure/Tools/build_process.scala Wed Oct 18 19:26:37 2023 +0200
@@ -614,9 +614,11 @@
val build_uuid = Generic.build_uuid
val hostname = SQL.Column.string("hostname")
val numa_node = SQL.Column.int("numa_node")
+ val rel_cpus = SQL.Column.string("rel_cpus")
val table =
- make_table(List(name, worker_uuid, build_uuid, hostname, numa_node), name = "running")
+ make_table(
+ List(name, worker_uuid, build_uuid, hostname, numa_node, rel_cpus), name = "running")
}
def read_running(db: SQL.Database): State.Running =
@@ -629,7 +631,10 @@
val build_uuid = res.string(Running.build_uuid)
val hostname = res.string(Running.hostname)
val numa_node = res.get_int(Running.numa_node)
- name -> Job(name, worker_uuid, build_uuid, Host.Node_Info(hostname, numa_node), None)
+ val rel_cpus = res.string(Running.rel_cpus)
+
+ val node_info = Host.Node_Info(hostname, numa_node, Host.Range.from(rel_cpus))
+ name -> Job(name, worker_uuid, build_uuid, node_info, None)
}
)
@@ -655,6 +660,7 @@
stmt.string(3) = job.build_uuid
stmt.string(4) = job.node_info.hostname
stmt.int(5) = job.node_info.numa_node
+ stmt.string(6) = Host.Range(job.node_info.rel_cpus)
})
}
@@ -669,7 +675,8 @@
val worker_uuid = Generic.worker_uuid
val build_uuid = Generic.build_uuid
val hostname = SQL.Column.string("hostname")
- val numa_node = SQL.Column.string("numa_node")
+ val numa_node = SQL.Column.int("numa_node")
+ val rel_cpus = SQL.Column.string("rel_cpus")
val rc = SQL.Column.int("rc")
val out = SQL.Column.string("out")
val err = SQL.Column.string("err")
@@ -681,7 +688,7 @@
val table =
make_table(
- List(name, worker_uuid, build_uuid, hostname, numa_node,
+ List(name, worker_uuid, build_uuid, hostname, numa_node, rel_cpus,
rc, out, err, timing_elapsed, timing_cpu, timing_gc, output_shasum, current),
name = "results")
}
@@ -701,7 +708,8 @@
val build_uuid = res.string(Results.build_uuid)
val hostname = res.string(Results.hostname)
val numa_node = res.get_int(Results.numa_node)
- val node_info = Host.Node_Info(hostname, numa_node)
+ val rel_cpus = res.string(Results.rel_cpus)
+ val node_info = Host.Node_Info(hostname, numa_node, Host.Range.from(rel_cpus))
val rc = res.int(Results.rc)
val out = res.string(Results.out)
@@ -742,14 +750,15 @@
stmt.string(3) = result.build_uuid
stmt.string(4) = result.node_info.hostname
stmt.int(5) = result.node_info.numa_node
- stmt.int(6) = process_result.rc
- stmt.string(7) = cat_lines(process_result.out_lines)
- stmt.string(8) = cat_lines(process_result.err_lines)
- stmt.long(9) = process_result.timing.elapsed.ms
- stmt.long(10) = process_result.timing.cpu.ms
- stmt.long(11) = process_result.timing.gc.ms
- stmt.string(12) = result.output_shasum.toString
- stmt.bool(13) = result.current
+ stmt.string(6) = Host.Range(result.node_info.rel_cpus)
+ stmt.int(7) = process_result.rc
+ stmt.string(8) = cat_lines(process_result.out_lines)
+ stmt.string(9) = cat_lines(process_result.err_lines)
+ stmt.long(10) = process_result.timing.elapsed.ms
+ stmt.long(11) = process_result.timing.cpu.ms
+ stmt.long(12) = process_result.timing.gc.ms
+ stmt.string(13) = result.output_shasum.toString
+ stmt.bool(14) = result.current
})
}
@@ -1032,10 +1041,10 @@
db <- _host_database
n <- Host.next_numa_node(db, hostname, state.numa_nodes, used_nodes)
} yield n
- val node_info = Host.Node_Info(hostname, numa_node)
+ val node_info = Host.Node_Info(hostname, numa_node, Nil)
val print_node_info =
- node_info.numa_node.isDefined ||
+ node_info.numa_node.isDefined || node_info.rel_cpus.nonEmpty ||
_build_database.isDefined && _build_database.get.is_postgresql
progress.echo(
--- a/src/Pure/Tools/dump.scala Wed Oct 18 19:05:06 2023 +0200
+++ b/src/Pure/Tools/dump.scala Wed Oct 18 19:26:37 2023 +0200
@@ -98,7 +98,7 @@
): Context = {
val session_options: Options = {
val options1 =
- Host.process_policy_options(options, Host.numa_node0()) +
+ Host.numa_options(options, Host.numa_node0()) +
"parallel_proofs=0" +
"completion_limit=0" +
"editor_tracing_messages=0"