# HG changeset patch # User Fabian Huch # Date 1697649997 -7200 # Node ID 7799ec03b8bd2e0dfa9f372d7be2800e4108c165 # Parent 4b014e6c1dfec06e050f00cd7e90bdcfcbf15061 generalized node infos: allow addressing of numa node segments via relative cpus; add more node options and process policy options using taskset as alternative to NUMA for more fine-grained cpu controls (e.g., for cpus with heterogeneous cores in the same NUMA segment); diff -r 4b014e6c1dfe -r 7799ec03b8bd src/Pure/System/host.scala --- a/src/Pure/System/host.scala Wed Oct 18 19:05:06 2023 +0200 +++ b/src/Pure/System/host.scala Wed Oct 18 19:26:37 2023 +0200 @@ -48,26 +48,49 @@ } - /* process policy via numactl tool */ + /* process policy via numactl and taskset tools */ + + def taskset(cpus: List[Int]): String = "taskset --cpu-list " + Range(cpus) + def taskset_ok(cpus: List[Int]): Boolean = Isabelle_System.bash(taskset(cpus) + " true").ok - def numactl(node: Int): String = "numactl -m" + node + " -N" + node - def numactl_ok(node: Int): Boolean = Isabelle_System.bash(numactl(node) + " true").ok + def numactl(node: Int, rel_cpus: List[Int] = Nil): String = + "numactl -m" + node + " -N" + node + if_proper(rel_cpus, " -C+" + Range(rel_cpus)) + def numactl_ok(node: Int, rel_cpus: List[Int] = Nil): Boolean = + Isabelle_System.bash(numactl(node, rel_cpus) + " true").ok - def process_policy_options(options: Options, numa_node: Option[Int]): Options = + def numa_options(options: Options, numa_node: Option[Int]): Options = numa_node match { case None => options case Some(node) => options.string("process_policy") = if (numactl_ok(node)) numactl(node) else "" } + def node_options(options: Options, node: Node_Info): Options = { + val threads_options = + if (node.rel_cpus.isEmpty) options else options.int("threads") = node.rel_cpus.length + + node.numa_node match { + case None if node.rel_cpus.isEmpty => + threads_options + case Some(numa_node) => + threads_options.string("process_policy") = + if (numactl_ok(numa_node, node.rel_cpus)) numactl(numa_node, node.rel_cpus) else "" + case _ => + threads_options.string("process_policy") = + if (taskset_ok(node.rel_cpus)) taskset(node.rel_cpus) else "" + } + } + /* allocated resources */ - object Node_Info { def none: Node_Info = Node_Info("", None) } + object Node_Info { def none: Node_Info = Node_Info("", None, Nil) } - sealed case class Node_Info(hostname: String, numa_node: Option[Int]) { + sealed case class Node_Info(hostname: String, numa_node: Option[Int], rel_cpus: List[Int]) { override def toString: String = - hostname + if_proper(numa_node, "/" + numa_node.get.toString) + hostname + + if_proper(numa_node, "/" + numa_node.get.toString) + + if_proper(rel_cpus, "+" + Range(rel_cpus)) } diff -r 4b014e6c1dfe -r 7799ec03b8bd src/Pure/Tools/build.scala --- a/src/Pure/Tools/build.scala Wed Oct 18 19:05:06 2023 +0200 +++ b/src/Pure/Tools/build.scala Wed Oct 18 19:26:37 2023 +0200 @@ -115,7 +115,7 @@ } def process_options(options: Options, node_info: Host.Node_Info): Options = - Host.process_policy_options(options, node_info.numa_node) + Host.node_options(options, node_info) final def build_store(options: Options, build_hosts: List[Build_Cluster.Host] = Nil, diff -r 4b014e6c1dfe -r 7799ec03b8bd src/Pure/Tools/build_process.scala --- a/src/Pure/Tools/build_process.scala Wed Oct 18 19:05:06 2023 +0200 +++ b/src/Pure/Tools/build_process.scala Wed Oct 18 19:26:37 2023 +0200 @@ -614,9 +614,11 @@ val build_uuid = Generic.build_uuid val hostname = SQL.Column.string("hostname") val numa_node = SQL.Column.int("numa_node") + val rel_cpus = SQL.Column.string("rel_cpus") val table = - make_table(List(name, worker_uuid, build_uuid, hostname, numa_node), name = "running") + make_table( + List(name, worker_uuid, build_uuid, hostname, numa_node, rel_cpus), name = "running") } def read_running(db: SQL.Database): State.Running = @@ -629,7 +631,10 @@ val build_uuid = res.string(Running.build_uuid) val hostname = res.string(Running.hostname) val numa_node = res.get_int(Running.numa_node) - name -> Job(name, worker_uuid, build_uuid, Host.Node_Info(hostname, numa_node), None) + val rel_cpus = res.string(Running.rel_cpus) + + val node_info = Host.Node_Info(hostname, numa_node, Host.Range.from(rel_cpus)) + name -> Job(name, worker_uuid, build_uuid, node_info, None) } ) @@ -655,6 +660,7 @@ stmt.string(3) = job.build_uuid stmt.string(4) = job.node_info.hostname stmt.int(5) = job.node_info.numa_node + stmt.string(6) = Host.Range(job.node_info.rel_cpus) }) } @@ -669,7 +675,8 @@ val worker_uuid = Generic.worker_uuid val build_uuid = Generic.build_uuid val hostname = SQL.Column.string("hostname") - val numa_node = SQL.Column.string("numa_node") + val numa_node = SQL.Column.int("numa_node") + val rel_cpus = SQL.Column.string("rel_cpus") val rc = SQL.Column.int("rc") val out = SQL.Column.string("out") val err = SQL.Column.string("err") @@ -681,7 +688,7 @@ val table = make_table( - List(name, worker_uuid, build_uuid, hostname, numa_node, + List(name, worker_uuid, build_uuid, hostname, numa_node, rel_cpus, rc, out, err, timing_elapsed, timing_cpu, timing_gc, output_shasum, current), name = "results") } @@ -701,7 +708,8 @@ val build_uuid = res.string(Results.build_uuid) val hostname = res.string(Results.hostname) val numa_node = res.get_int(Results.numa_node) - val node_info = Host.Node_Info(hostname, numa_node) + val rel_cpus = res.string(Results.rel_cpus) + val node_info = Host.Node_Info(hostname, numa_node, Host.Range.from(rel_cpus)) val rc = res.int(Results.rc) val out = res.string(Results.out) @@ -742,14 +750,15 @@ stmt.string(3) = result.build_uuid stmt.string(4) = result.node_info.hostname stmt.int(5) = result.node_info.numa_node - stmt.int(6) = process_result.rc - stmt.string(7) = cat_lines(process_result.out_lines) - stmt.string(8) = cat_lines(process_result.err_lines) - stmt.long(9) = process_result.timing.elapsed.ms - stmt.long(10) = process_result.timing.cpu.ms - stmt.long(11) = process_result.timing.gc.ms - stmt.string(12) = result.output_shasum.toString - stmt.bool(13) = result.current + stmt.string(6) = Host.Range(result.node_info.rel_cpus) + stmt.int(7) = process_result.rc + stmt.string(8) = cat_lines(process_result.out_lines) + stmt.string(9) = cat_lines(process_result.err_lines) + stmt.long(10) = process_result.timing.elapsed.ms + stmt.long(11) = process_result.timing.cpu.ms + stmt.long(12) = process_result.timing.gc.ms + stmt.string(13) = result.output_shasum.toString + stmt.bool(14) = result.current }) } @@ -1032,10 +1041,10 @@ db <- _host_database n <- Host.next_numa_node(db, hostname, state.numa_nodes, used_nodes) } yield n - val node_info = Host.Node_Info(hostname, numa_node) + val node_info = Host.Node_Info(hostname, numa_node, Nil) val print_node_info = - node_info.numa_node.isDefined || + node_info.numa_node.isDefined || node_info.rel_cpus.nonEmpty || _build_database.isDefined && _build_database.get.is_postgresql progress.echo( diff -r 4b014e6c1dfe -r 7799ec03b8bd src/Pure/Tools/dump.scala --- a/src/Pure/Tools/dump.scala Wed Oct 18 19:05:06 2023 +0200 +++ b/src/Pure/Tools/dump.scala Wed Oct 18 19:26:37 2023 +0200 @@ -98,7 +98,7 @@ ): Context = { val session_options: Options = { val options1 = - Host.process_policy_options(options, Host.numa_node0()) + + Host.numa_options(options, Host.numa_node0()) + "parallel_proofs=0" + "completion_limit=0" + "editor_tracing_messages=0"