src/Pure/Tools/build.scala
author wenzelm
Mon, 06 Feb 2023 14:54:15 +0100
changeset 77207 d98a99e4eea9
parent 77206 6784eaef7d0c
child 77236 dce91dab1728
permissions -rw-r--r--
proper Shasum.digest, to emulate old form from build_history database; clarified signature: more explicit types;

/*  Title:      Pure/Tools/build.scala
    Author:     Makarius
    Options:    :folding=explicit:

Build and manage Isabelle sessions.
*/

package isabelle


import scala.collection.immutable.SortedSet
import scala.annotation.tailrec


object Build {
  /** auxiliary **/

  /* persistent build info */

  sealed case class Session_Info(
    sources: SHA1.Shasum,
    input_heaps: SHA1.Shasum,
    output_heap: SHA1.Shasum,
    return_code: Int,
    uuid: String
  ) {
    def ok: Boolean = return_code == 0
  }


  /* queue with scheduling information */

  private object Queue {
    type Timings = (List[Properties.T], Double)

    def load_timings(progress: Progress, store: Sessions.Store, session_name: String): Timings = {
      val no_timings: Timings = (Nil, 0.0)

      store.try_open_database(session_name) match {
        case None => no_timings
        case Some(db) =>
          def ignore_error(msg: String) = {
            progress.echo_warning("Ignoring bad database " + db +
              " for session " + quote(session_name) + (if (msg == "") "" else ":\n" + msg))
            no_timings
          }
          try {
            val command_timings = store.read_command_timings(db, session_name)
            val session_timing =
              store.read_session_timing(db, session_name) match {
                case Markup.Elapsed(t) => t
                case _ => 0.0
              }
            (command_timings, session_timing)
          }
          catch {
            case ERROR(msg) => ignore_error(msg)
            case exn: java.lang.Error => ignore_error(Exn.message(exn))
            case _: XML.Error => ignore_error("XML.Error")
          }
          finally { db.close() }
      }
    }

    def make_session_timing(
      sessions_structure: Sessions.Structure,
      timing: Map[String, Double]
    ) : Map[String, Double] = {
      val maximals = sessions_structure.build_graph.maximals.toSet
      def desc_timing(session_name: String): Double = {
        if (maximals.contains(session_name)) timing(session_name)
        else {
          val descendants = sessions_structure.build_descendants(List(session_name)).toSet
          val g = sessions_structure.build_graph.restrict(descendants)
          (0.0 :: g.maximals.flatMap { desc =>
            val ps = g.all_preds(List(desc))
            if (ps.exists(p => !timing.isDefinedAt(p))) None
            else Some(ps.map(timing(_)).sum)
          }).max
        }
      }
      timing.keySet.iterator.map(name => (name -> desc_timing(name))).toMap.withDefaultValue(0.0)
    }

    def apply(
      progress: Progress,
      sessions_structure: Sessions.Structure,
      store: Sessions.Store
    ) : Queue = {
      val graph = sessions_structure.build_graph
      val names = graph.keys

      val timings = names.map(name => (name, load_timings(progress, store, name)))
      val command_timings =
        timings.map({ case (name, (ts, _)) => (name, ts) }).toMap.withDefaultValue(Nil)
      val session_timing =
        make_session_timing(sessions_structure,
          timings.map({ case (name, (_, t)) => (name, t) }).toMap)

      object Ordering extends scala.math.Ordering[String] {
        def compare(name1: String, name2: String): Int =
          session_timing(name2) compare session_timing(name1) match {
            case 0 =>
              sessions_structure(name2).timeout compare sessions_structure(name1).timeout match {
                case 0 => name1 compare name2
                case ord => ord
              }
            case ord => ord
          }
      }

      new Queue(graph, SortedSet(names: _*)(Ordering), command_timings)
    }
  }

  private class Queue(
    graph: Graph[String, Sessions.Info],
    order: SortedSet[String],
    val command_timings: String => List[Properties.T]
  ) {
    def is_inner(name: String): Boolean = !graph.is_maximal(name)

    def is_empty: Boolean = graph.is_empty

    def - (name: String): Queue =
      new Queue(graph.del_node(name), order - name, command_timings)

    def dequeue(skip: String => Boolean): Option[(String, Sessions.Info)] = {
      val it = order.iterator.dropWhile(name => skip(name) || !graph.is_minimal(name))
      if (it.hasNext) { val name = it.next(); Some((name, graph.get_node(name))) }
      else None
    }
  }



  /** build with results **/

  class Results private[Build](
    val store: Sessions.Store,
    val deps: Sessions.Deps,
    val sessions_ok: List[String],
    results: Map[String, (Option[Process_Result], Sessions.Info)]
  ) {
    def cache: Term.Cache = store.cache

    def sessions: Set[String] = results.keySet
    def cancelled(name: String): Boolean = results(name)._1.isEmpty
    def info(name: String): Sessions.Info = results(name)._2
    def apply(name: String): Process_Result = results(name)._1.getOrElse(Process_Result(1))
    val rc: Int =
      results.iterator.map({ case (_, (Some(r), _)) => r.rc case (_, (None, _)) => 1 }).
        foldLeft(Process_Result.RC.ok)(_ max _)
    def ok: Boolean = rc == Process_Result.RC.ok

    def unfinished: List[String] = sessions.iterator.filterNot(apply(_).ok).toList.sorted

    override def toString: String = rc.toString
  }

  def session_finished(session_name: String, process_result: Process_Result): String =
    "Finished " + session_name + " (" + process_result.timing.message_resources + ")"

  def session_timing(session_name: String, build_log: Build_Log.Session_Info): String = {
    val props = build_log.session_timing
    val threads = Markup.Session_Timing.Threads.unapply(props) getOrElse 1
    val timing = Markup.Timing_Properties.get(props)
    "Timing " + session_name + " (" + threads + " threads, " + timing.message_factor + ")"
  }

  def build(
    options: Options,
    selection: Sessions.Selection = Sessions.Selection.empty,
    browser_info: Browser_Info.Config = Browser_Info.Config.none,
    progress: Progress = new Progress,
    check_unknown_files: Boolean = false,
    build_heap: Boolean = false,
    clean_build: Boolean = false,
    dirs: List[Path] = Nil,
    select_dirs: List[Path] = Nil,
    infos: List[Sessions.Info] = Nil,
    numa_shuffling: Boolean = false,
    max_jobs: Int = 1,
    list_files: Boolean = false,
    check_keywords: Set[String] = Set.empty,
    fresh_build: Boolean = false,
    no_build: Boolean = false,
    soft_build: Boolean = false,
    verbose: Boolean = false,
    export_files: Boolean = false,
    augment_options: String => List[Options.Spec] = _ => Nil,
    session_setup: (String, Session) => Unit = (_, _) => ()
  ): Results = {
    val build_options =
      options +
        "completion_limit=0" +
        "editor_tracing_messages=0" +
        ("pide_reports=" + options.bool("build_pide_reports"))

    val store = Sessions.store(build_options)

    Isabelle_Fonts.init()


    /* session selection and dependencies */

    val full_sessions =
      Sessions.load_structure(build_options, dirs = dirs, select_dirs = select_dirs, infos = infos,
        augment_options = augment_options)
    val full_sessions_selection = full_sessions.imports_selection(selection)

    val build_deps = {
      val deps0 =
        Sessions.deps(full_sessions.selection(selection),
          progress = progress, inlined_files = true, verbose = verbose,
          list_files = list_files, check_keywords = check_keywords).check_errors

      if (soft_build && !fresh_build) {
        val outdated =
          deps0.sessions_structure.build_topological_order.flatMap(name =>
            store.try_open_database(name) match {
              case Some(db) =>
                using(db)(store.read_build(_, name)) match {
                  case Some(build)
                  if build.ok && build.sources == deps0.sources_shasum(name) => None
                  case _ => Some(name)
                }
              case None => Some(name)
            })

        Sessions.deps(full_sessions.selection(Sessions.Selection(sessions = outdated)),
          progress = progress, inlined_files = true).check_errors
      }
      else deps0
    }


    /* check unknown files */

    if (check_unknown_files) {
      val source_files =
        (for {
          (_, base) <- build_deps.session_bases.iterator
          (path, _) <- base.session_sources.iterator
        } yield path).toList
      Mercurial.check_files(source_files)._2 match {
        case Nil =>
        case unknown_files =>
          progress.echo_warning("Unknown files (not part of the underlying Mercurial repository):" +
            unknown_files.map(File.standard_path).sorted.mkString("\n  ", "\n  ", ""))
      }
    }


    /* main build process */

    val queue = Queue(progress, build_deps.sessions_structure, store)

    store.prepare_output_dir()

    if (clean_build) {
      for (name <- full_sessions.imports_descendants(full_sessions_selection)) {
        val (relevant, ok) = store.clean_output(name)
        if (relevant) {
          if (ok) progress.echo("Cleaned " + name)
          else progress.echo(name + " FAILED to clean")
        }
      }
    }

    // scheduler loop
    case class Result(
      current: Boolean,
      output_heap: SHA1.Shasum,
      process: Option[Process_Result],
      info: Sessions.Info
    ) {
      def ok: Boolean =
        process match {
          case None => false
          case Some(res) => res.ok
        }
    }

    def sleep(): Unit =
      Isabelle_Thread.interrupt_handler(_ => progress.stop()) {
        build_options.seconds("editor_input_delay").sleep()
      }

    val log =
      build_options.string("system_log") match {
        case "" => No_Logger
        case "-" => Logger.make(progress)
        case log_file => Logger.make(Some(Path.explode(log_file)))
      }

    val numa_nodes = new NUMA.Nodes(numa_shuffling)

    @tailrec def loop(
      pending: Queue,
      running: Map[String, (SHA1.Shasum, Build_Job)],
      results: Map[String, Result]
    ): Map[String, Result] = {
      def used_node(i: Int): Boolean =
        running.iterator.exists(
          { case (_, (_, job)) => job.numa_node.isDefined && job.numa_node.get == i })

      if (pending.is_empty) results
      else {
        if (progress.stopped) {
          for ((_, (_, job)) <- running) job.terminate()
        }

        running.find({ case (_, (_, job)) => job.is_finished }) match {
          case Some((session_name, (input_heaps, job))) =>
            //{{{ finish job

            val (process_result, output_heap) = job.join

            val log_lines = process_result.out_lines.filterNot(Protocol_Message.Marker.test)
            val process_result_tail = {
              val tail = job.info.options.int("process_output_tail")
              process_result.copy(
                out_lines =
                  "(more details via \"isabelle log -H Error " + session_name + "\")" ::
                  (if (tail == 0) log_lines else log_lines.drop(log_lines.length - tail max 0)))
            }

            val build_log =
              Build_Log.Log_File(session_name, process_result.out_lines).
                parse_session_info(
                  command_timings = true,
                  theory_timings = true,
                  ml_statistics = true,
                  task_statistics = true)

            // write log file
            if (process_result.ok) {
              File.write_gzip(store.output_log_gz(session_name), terminate_lines(log_lines))
            }
            else File.write(store.output_log(session_name), terminate_lines(log_lines))

            // write database
            using(store.open_database(session_name, output = true))(db =>
              store.write_session_info(db, session_name, job.session_sources,
                build_log =
                  if (process_result.timeout) build_log.error("Timeout") else build_log,
                build =
                  Session_Info(build_deps.sources_shasum(session_name), input_heaps, output_heap,
                    process_result.rc, UUID.random().toString)))

            // messages
            process_result.err_lines.foreach(progress.echo)

            if (process_result.ok) {
              if (verbose) progress.echo(session_timing(session_name, build_log))
              progress.echo(session_finished(session_name, process_result))
            }
            else {
              progress.echo(session_name + " FAILED")
              if (!process_result.interrupted) progress.echo(process_result_tail.out)
            }

            loop(pending - session_name, running - session_name,
              results +
                (session_name -> Result(false, output_heap, Some(process_result_tail), job.info)))
            //}}}
          case None if running.size < (max_jobs max 1) =>
            //{{{ check/start next job
            pending.dequeue(running.isDefinedAt) match {
              case Some((session_name, info)) =>
                val ancestor_results =
                  build_deps.sessions_structure.build_requirements(List(session_name)).
                    filterNot(_ == session_name).map(results(_))
                val input_heaps =
                  if (ancestor_results.isEmpty) {
                    SHA1.shasum_meta_info(SHA1.digest(Path.explode("$POLYML_EXE")))
                  }
                  else SHA1.flat_shasum(ancestor_results.map(_.output_heap))

                val do_store =
                  build_heap || Sessions.is_pure(session_name) || queue.is_inner(session_name)

                val (current, output_heap) = {
                  store.try_open_database(session_name) match {
                    case Some(db) =>
                      using(db)(store.read_build(_, session_name)) match {
                        case Some(build) =>
                          val output_heap = store.find_heap_shasum(session_name)
                          val current =
                            !fresh_build &&
                            build.ok &&
                            build.sources == build_deps.sources_shasum(session_name) &&
                            build.input_heaps == input_heaps &&
                            build.output_heap == output_heap &&
                            !(do_store && output_heap.is_empty)
                          (current, output_heap)
                        case None => (false, SHA1.no_shasum)
                      }
                    case None => (false, SHA1.no_shasum)
                  }
                }
                val all_current = current && ancestor_results.forall(_.current)

                if (all_current) {
                  loop(pending - session_name, running,
                    results +
                      (session_name -> Result(true, output_heap, Some(Process_Result(0)), info)))
                }
                else if (no_build) {
                  progress.echo_if(verbose, "Skipping " + session_name + " ...")
                  loop(pending - session_name, running,
                    results +
                      (session_name -> Result(false, output_heap, Some(Process_Result(1)), info)))
                }
                else if (ancestor_results.forall(_.ok) && !progress.stopped) {
                  progress.echo((if (do_store) "Building " else "Running ") + session_name + " ...")

                  store.clean_output(session_name)
                  using(store.open_database(session_name, output = true))(
                    store.init_session_info(_, session_name))

                  val numa_node = numa_nodes.next(used_node)
                  val job =
                    new Build_Job(progress, build_deps.background(session_name), store, do_store,
                      log, session_setup, numa_node, queue.command_timings(session_name))
                  loop(pending, running + (session_name -> (input_heaps, job)), results)
                }
                else {
                  progress.echo(session_name + " CANCELLED")
                  loop(pending - session_name, running,
                    results + (session_name -> Result(false, output_heap, None, info)))
                }
              case None => sleep(); loop(pending, running, results)
            }
            ///}}}
          case None => sleep(); loop(pending, running, results)
        }
      }
    }


    /* build results */

    val results = {
      val build_results =
        if (build_deps.is_empty) {
          progress.echo_warning("Nothing to build")
          Map.empty[String, Result]
        }
        else Isabelle_Thread.uninterruptible { loop(queue, Map.empty, Map.empty) }

      val sessions_ok: List[String] =
        (for {
          name <- build_deps.sessions_structure.build_topological_order.iterator
          result <- build_results.get(name)
          if result.ok
        } yield name).toList

      val results =
        (for ((name, result) <- build_results.iterator)
          yield (name, (result.process, result.info))).toMap

      new Results(store, build_deps, sessions_ok, results)
    }

    if (export_files) {
      for (name <- full_sessions_selection.iterator if results(name).ok) {
        val info = results.info(name)
        if (info.export_files.nonEmpty) {
          progress.echo("Exporting " + info.name + " ...")
          for ((dir, prune, pats) <- info.export_files) {
            Export.export_files(store, name, info.dir + dir,
              progress = if (verbose) progress else new Progress,
              export_prune = prune,
              export_patterns = pats)
          }
        }
      }
    }

    val presentation_sessions =
      results.sessions_ok.filter(name => browser_info.enabled(results.info(name)))
    if (presentation_sessions.nonEmpty && !progress.stopped) {
      Browser_Info.build(browser_info, results.store, results.deps, presentation_sessions,
        progress = progress, verbose = verbose)
    }

    if (!results.ok && (verbose || !no_build)) {
      progress.echo("Unfinished session(s): " + commas(results.unfinished))
    }

    results
  }


  /* Isabelle tool wrapper */

  val isabelle_tool = Isabelle_Tool("build", "build and manage Isabelle sessions",
    Scala_Project.here,
    { args =>
      val build_options = Word.explode(Isabelle_System.getenv("ISABELLE_BUILD_OPTIONS"))

      var base_sessions: List[String] = Nil
      var select_dirs: List[Path] = Nil
      var numa_shuffling = false
      var browser_info = Browser_Info.Config.none
      var requirements = false
      var soft_build = false
      var exclude_session_groups: List[String] = Nil
      var all_sessions = false
      var build_heap = false
      var clean_build = false
      var dirs: List[Path] = Nil
      var export_files = false
      var fresh_build = false
      var session_groups: List[String] = Nil
      var max_jobs = 1
      var check_keywords: Set[String] = Set.empty
      var list_files = false
      var no_build = false
      var options = Options.init(opts = build_options)
      var verbose = false
      var exclude_sessions: List[String] = Nil

      val getopts = Getopts("""
Usage: isabelle build [OPTIONS] [SESSIONS ...]

  Options are:
    -B NAME      include session NAME and all descendants
    -D DIR       include session directory and select its sessions
    -N           cyclic shuffling of NUMA CPU nodes (performance tuning)
    -P DIR       enable HTML/PDF presentation in directory (":" for default)
    -R           refer to requirements of selected sessions
    -S           soft build: only observe changes of sources, not heap images
    -X NAME      exclude sessions from group NAME and all descendants
    -a           select all sessions
    -b           build heap images
    -c           clean build
    -d DIR       include session directory
    -e           export files from session specification into file-system
    -f           fresh build
    -g NAME      select session group NAME
    -j INT       maximum number of parallel jobs (default 1)
    -k KEYWORD   check theory sources for conflicts with proposed keywords
    -l           list session source files
    -n           no build -- take existing build databases
    -o OPTION    override Isabelle system OPTION (via NAME=VAL or NAME)
    -v           verbose
    -x NAME      exclude session NAME and all descendants

  Build and manage Isabelle sessions, depending on implicit settings:

""" + Library.indent_lines(2,  Build_Log.Settings.show()) + "\n",
        "B:" -> (arg => base_sessions = base_sessions ::: List(arg)),
        "D:" -> (arg => select_dirs = select_dirs ::: List(Path.explode(arg))),
        "N" -> (_ => numa_shuffling = true),
        "P:" -> (arg => browser_info = Browser_Info.Config.make(arg)),
        "R" -> (_ => requirements = true),
        "S" -> (_ => soft_build = true),
        "X:" -> (arg => exclude_session_groups = exclude_session_groups ::: List(arg)),
        "a" -> (_ => all_sessions = true),
        "b" -> (_ => build_heap = true),
        "c" -> (_ => clean_build = true),
        "d:" -> (arg => dirs = dirs ::: List(Path.explode(arg))),
        "e" -> (_ => export_files = true),
        "f" -> (_ => fresh_build = true),
        "g:" -> (arg => session_groups = session_groups ::: List(arg)),
        "j:" -> (arg => max_jobs = Value.Int.parse(arg)),
        "k:" -> (arg => check_keywords = check_keywords + arg),
        "l" -> (_ => list_files = true),
        "n" -> (_ => no_build = true),
        "o:" -> (arg => options = options + arg),
        "v" -> (_ => verbose = true),
        "x:" -> (arg => exclude_sessions = exclude_sessions ::: List(arg)))

      val sessions = getopts(args)

      val progress = new Console_Progress(verbose = verbose)

      val start_date = Date.now()

      if (verbose) {
        progress.echo(
          "Started at " + Build_Log.print_date(start_date) +
            " (" + Isabelle_System.getenv("ML_IDENTIFIER") + " on " + Isabelle_System.hostname() +")")
        progress.echo(Build_Log.Settings.show() + "\n")
      }

      val results =
        progress.interrupt_handler {
          build(options,
            selection = Sessions.Selection(
              requirements = requirements,
              all_sessions = all_sessions,
              base_sessions = base_sessions,
              exclude_session_groups = exclude_session_groups,
              exclude_sessions = exclude_sessions,
              session_groups = session_groups,
              sessions = sessions),
            browser_info = browser_info,
            progress = progress,
            check_unknown_files = Mercurial.is_repository(Path.ISABELLE_HOME),
            build_heap = build_heap,
            clean_build = clean_build,
            dirs = dirs,
            select_dirs = select_dirs,
            numa_shuffling = NUMA.enabled_warning(progress, numa_shuffling),
            max_jobs = max_jobs,
            list_files = list_files,
            check_keywords = check_keywords,
            fresh_build = fresh_build,
            no_build = no_build,
            soft_build = soft_build,
            verbose = verbose,
            export_files = export_files)
        }
      val end_date = Date.now()
      val elapsed_time = end_date.time - start_date.time

      if (verbose) {
        progress.echo("\nFinished at " + Build_Log.print_date(end_date))
      }

      val total_timing =
        results.sessions.iterator.map(a => results(a).timing).foldLeft(Timing.zero)(_ + _).
          copy(elapsed = elapsed_time)
      progress.echo(total_timing.message_resources)

      sys.exit(results.rc)
    })


  /* build logic image */

  def build_logic(options: Options, logic: String,
    progress: Progress = new Progress,
    build_heap: Boolean = false,
    dirs: List[Path] = Nil,
    fresh: Boolean = false,
    strict: Boolean = false
  ): Int = {
    val selection = Sessions.Selection.session(logic)
    val rc =
      if (!fresh && build(options, selection = selection,
            build_heap = build_heap, no_build = true, dirs = dirs).ok) Process_Result.RC.ok
      else {
        progress.echo("Build started for Isabelle/" + logic + " ...")
        Build.build(options, selection = selection, progress = progress,
          build_heap = build_heap, fresh_build = fresh, dirs = dirs).rc
      }
    if (strict && rc != Process_Result.RC.ok) error("Failed to build Isabelle/" + logic) else rc
  }
}