src/Pure/Admin/build_log.scala
author wenzelm
Mon Oct 16 14:32:09 2017 +0200 (21 months ago)
changeset 66873 9953ae603a23
parent 66863 6acd1a2bd146
child 66874 0b8da0fc9563
permissions -rw-r--r--
provide theory timing information, similar to command timing but always considered relevant;
     1 /*  Title:      Pure/Admin/build_log.scala
     2     Author:     Makarius
     3 
     4 Management of build log files and database storage.
     5 */
     6 
     7 package isabelle
     8 
     9 
    10 import java.io.{File => JFile}
    11 import java.time.ZoneId
    12 import java.time.format.{DateTimeFormatter, DateTimeParseException}
    13 import java.util.Locale
    14 
    15 import scala.collection.immutable.SortedMap
    16 import scala.collection.mutable
    17 import scala.util.matching.Regex
    18 
    19 
    20 object Build_Log
    21 {
    22   /** content **/
    23 
    24   /* properties */
    25 
    26   object Prop
    27   {
    28     val build_tags = SQL.Column.string("build_tags")  // lines
    29     val build_args = SQL.Column.string("build_args")  // lines
    30     val build_group_id = SQL.Column.string("build_group_id")
    31     val build_id = SQL.Column.string("build_id")
    32     val build_engine = SQL.Column.string("build_engine")
    33     val build_host = SQL.Column.string("build_host")
    34     val build_start = SQL.Column.date("build_start")
    35     val build_end = SQL.Column.date("build_end")
    36     val isabelle_version = SQL.Column.string("isabelle_version")
    37     val afp_version = SQL.Column.string("afp_version")
    38 
    39     val all_props: List[SQL.Column] =
    40       List(build_tags, build_args, build_group_id, build_id, build_engine,
    41         build_host, build_start, build_end, isabelle_version, afp_version)
    42   }
    43 
    44 
    45   /* settings */
    46 
    47   object Settings
    48   {
    49     val ISABELLE_BUILD_OPTIONS = SQL.Column.string("ISABELLE_BUILD_OPTIONS")
    50     val ML_PLATFORM = SQL.Column.string("ML_PLATFORM")
    51     val ML_HOME = SQL.Column.string("ML_HOME")
    52     val ML_SYSTEM = SQL.Column.string("ML_SYSTEM")
    53     val ML_OPTIONS = SQL.Column.string("ML_OPTIONS")
    54 
    55     val ml_settings = List(ML_PLATFORM, ML_HOME, ML_SYSTEM, ML_OPTIONS)
    56     val all_settings = ISABELLE_BUILD_OPTIONS :: ml_settings
    57 
    58     type Entry = (String, String)
    59     type T = List[Entry]
    60 
    61     object Entry
    62     {
    63       def unapply(s: String): Option[Entry] =
    64         s.indexOf('=') match {
    65           case -1 => None
    66           case i =>
    67             val a = s.substring(0, i)
    68             val b = Library.perhaps_unquote(s.substring(i + 1))
    69             Some((a, b))
    70         }
    71       def apply(a: String, b: String): String = a + "=" + quote(b)
    72       def getenv(a: String): String = apply(a, Isabelle_System.getenv(a))
    73     }
    74 
    75     def show(): String =
    76       cat_lines(
    77         List(Entry.getenv(ISABELLE_BUILD_OPTIONS.name), "") :::
    78         ml_settings.map(c => Entry.getenv(c.name)))
    79   }
    80 
    81 
    82   /* file names */
    83 
    84   def log_date(date: Date): String =
    85     String.format(Locale.ROOT, "%s.%05d",
    86       DateTimeFormatter.ofPattern("yyyy-MM-dd").format(date.rep),
    87       new java.lang.Long((date.time - date.midnight.time).ms / 1000))
    88 
    89   def log_subdir(date: Date): Path =
    90     Path.explode("log") + Path.explode(date.rep.getYear.toString)
    91 
    92   def log_filename(engine: String, date: Date, more: List[String] = Nil): Path =
    93     Path.explode((engine :: log_date(date) :: more).mkString("", "_", ".log"))
    94 
    95 
    96 
    97   /** log file **/
    98 
    99   def print_date(date: Date): String = Log_File.Date_Format(date)
   100 
   101   object Log_File
   102   {
   103     /* log file */
   104 
   105     def plain_name(name: String): String =
   106     {
   107       List(".log", ".log.gz", ".log.xz", ".gz", ".xz").find(name.endsWith(_)) match {
   108         case Some(s) => Library.try_unsuffix(s, name).get
   109         case None => name
   110       }
   111     }
   112 
   113     def apply(name: String, lines: List[String]): Log_File =
   114       new Log_File(plain_name(name), lines)
   115 
   116     def apply(name: String, text: String): Log_File =
   117       Log_File(name, Library.trim_split_lines(text))
   118 
   119     def apply(file: JFile): Log_File =
   120     {
   121       val name = file.getName
   122       val text =
   123         if (name.endsWith(".gz")) File.read_gzip(file)
   124         else if (name.endsWith(".xz")) File.read_xz(file)
   125         else File.read(file)
   126       apply(name, text)
   127     }
   128 
   129     def apply(path: Path): Log_File = apply(path.file)
   130 
   131 
   132     /* log file collections */
   133 
   134     def is_log(file: JFile,
   135       prefixes: List[String] =
   136         List(Build_History.log_prefix, Identify.log_prefix, Isatest.log_prefix,
   137           AFP_Test.log_prefix, Jenkins.log_prefix),
   138       suffixes: List[String] = List(".log", ".log.gz", ".log.xz")): Boolean =
   139     {
   140       val name = file.getName
   141 
   142       prefixes.exists(name.startsWith(_)) &&
   143       suffixes.exists(name.endsWith(_)) &&
   144       name != "isatest.log" &&
   145       name != "afp-test.log" &&
   146       name != "main.log"
   147     }
   148 
   149     def find_files(dirs: Iterable[Path]): List[JFile] =
   150       dirs.iterator.flatMap(dir => File.find_files(dir.file, is_log(_))).toList
   151 
   152 
   153     /* date format */
   154 
   155     val Date_Format =
   156     {
   157       val fmts =
   158         Date.Formatter.variants(
   159           List("EEE MMM d HH:mm:ss O yyyy", "EEE MMM d HH:mm:ss VV yyyy"),
   160           List(Locale.ENGLISH, Locale.GERMAN)) :::
   161         List(
   162           DateTimeFormatter.RFC_1123_DATE_TIME,
   163           Date.Formatter.pattern("EEE MMM d HH:mm:ss yyyy").withZone(ZoneId.of("Europe/Berlin")))
   164 
   165       def tune_timezone(s: String): String =
   166         s match {
   167           case "CET" | "MET" => "GMT+1"
   168           case "CEST" | "MEST" => "GMT+2"
   169           case "EST" => "Europe/Berlin"
   170           case _ => s
   171         }
   172       def tune_weekday(s: String): String =
   173         s match {
   174           case "Die" => "Di"
   175           case "Mit" => "Mi"
   176           case "Don" => "Do"
   177           case "Fre" => "Fr"
   178           case "Sam" => "Sa"
   179           case "Son" => "So"
   180           case _ => s
   181         }
   182 
   183       def tune(s: String): String =
   184         Word.implode(
   185           Word.explode(s) match {
   186             case a :: "M\uFFFDr" :: bs => tune_weekday(a) :: "Mär" :: bs.map(tune_timezone(_))
   187             case a :: bs => tune_weekday(a) :: bs.map(tune_timezone(_))
   188             case Nil => Nil
   189           }
   190         )
   191 
   192       Date.Format.make(fmts, tune)
   193     }
   194 
   195 
   196     /* inlined content */
   197 
   198     def print_props(marker: String, props: Properties.T): String =
   199       marker + YXML.string_of_body(XML.Encode.properties(Properties.encode_lines(props)))
   200   }
   201 
   202   class Log_File private(val name: String, val lines: List[String])
   203   {
   204     log_file =>
   205 
   206     override def toString: String = name
   207 
   208     def text: String = cat_lines(lines)
   209 
   210     def err(msg: String): Nothing =
   211       error("Error in log file " + quote(name) + ": " + msg)
   212 
   213 
   214     /* date format */
   215 
   216     object Strict_Date
   217     {
   218       def unapply(s: String): Some[Date] =
   219         try { Some(Log_File.Date_Format.parse(s)) }
   220         catch { case exn: DateTimeParseException => log_file.err(exn.getMessage) }
   221     }
   222 
   223 
   224     /* inlined content */
   225 
   226     def find[A](f: String => Option[A]): Option[A] =
   227       lines.iterator.map(f).find(_.isDefined).map(_.get)
   228 
   229     def find_line(marker: String): Option[String] =
   230       find(Library.try_unprefix(marker, _))
   231 
   232     def find_match(regexes: List[Regex]): Option[String] =
   233       regexes match {
   234         case Nil => None
   235         case regex :: rest =>
   236           lines.iterator.map(regex.unapplySeq(_)).find(res => res.isDefined && res.get.length == 1).
   237             map(res => res.get.head) orElse find_match(rest)
   238       }
   239 
   240 
   241     /* settings */
   242 
   243     def get_setting(a: String): Option[Settings.Entry] =
   244       lines.find(_.startsWith(a + "=")) match {
   245         case Some(line) => Settings.Entry.unapply(line)
   246         case None => None
   247       }
   248 
   249     def get_all_settings: Settings.T =
   250       for { c <- Settings.all_settings; entry <- get_setting(c.name) }
   251       yield entry
   252 
   253 
   254     /* properties (YXML) */
   255 
   256     val xml_cache = new XML.Cache()
   257 
   258     def parse_props(text: String): Properties.T =
   259       try {
   260         xml_cache.props(Properties.decode_lines(XML.Decode.properties(YXML.parse_body(text))))
   261       }
   262       catch { case _: XML.Error => log_file.err("malformed properties") }
   263 
   264     def filter_lines(marker: String): List[String] =
   265       for (line <- lines; s <- Library.try_unprefix(marker, line)) yield s
   266 
   267     def filter_props(marker: String): List[Properties.T] =
   268       for (s <- filter_lines(marker) if YXML.detect(s)) yield parse_props(s)
   269 
   270     def find_props(marker: String): Option[Properties.T] =
   271       find_line(marker) match {
   272         case Some(text) if YXML.detect(text) => Some(parse_props(text))
   273         case _ => None
   274       }
   275 
   276 
   277     /* parse various formats */
   278 
   279     def parse_meta_info(): Meta_Info = Build_Log.parse_meta_info(log_file)
   280 
   281     def parse_build_info(ml_statistics: Boolean = false): Build_Info =
   282       Build_Log.parse_build_info(log_file, ml_statistics)
   283 
   284     def parse_session_info(
   285         command_timings: Boolean = false,
   286         theory_timings: Boolean = false,
   287         ml_statistics: Boolean = false,
   288         task_statistics: Boolean = false): Session_Info =
   289       Build_Log.parse_session_info(
   290         log_file, command_timings, theory_timings, ml_statistics, task_statistics)
   291   }
   292 
   293 
   294 
   295   /** digested meta info: produced by Admin/build_history in log.xz file **/
   296 
   297   object Meta_Info
   298   {
   299     val empty: Meta_Info = Meta_Info(Nil, Nil)
   300   }
   301 
   302   sealed case class Meta_Info(props: Properties.T, settings: Settings.T)
   303   {
   304     def is_empty: Boolean = props.isEmpty && settings.isEmpty
   305 
   306     def get(c: SQL.Column): Option[String] =
   307       Properties.get(props, c.name) orElse
   308       Properties.get(settings, c.name)
   309 
   310     def get_date(c: SQL.Column): Option[Date] =
   311       get(c).map(Log_File.Date_Format.parse(_))
   312   }
   313 
   314   object Identify
   315   {
   316     val log_prefix = "isabelle_identify_"
   317 
   318     def engine(log_file: Log_File): String =
   319       if (log_file.name.startsWith(Jenkins.log_prefix)) "jenkins_identify"
   320       else "identify"
   321 
   322     def content(date: Date, isabelle_version: Option[String], afp_version: Option[String]): String =
   323       terminate_lines(
   324         List("isabelle_identify: " + Build_Log.print_date(date), "") :::
   325         isabelle_version.map("Isabelle version: " + _).toList :::
   326         afp_version.map("AFP version: " + _).toList)
   327 
   328     val Start = new Regex("""^isabelle_identify: (.+)$""")
   329     val No_End = new Regex("""$.""")
   330     val Isabelle_Version = List(new Regex("""^Isabelle version: (\S+)$"""))
   331     val AFP_Version = List(new Regex("""^AFP version: (\S+)$"""))
   332   }
   333 
   334   object Isatest
   335   {
   336     val log_prefix = "isatest-makeall-"
   337     val engine = "isatest"
   338     val Start = new Regex("""^------------------- starting test --- (.+) --- (.+)$""")
   339     val End = new Regex("""^------------------- test (?:successful|FAILED) --- (.+) --- .*$""")
   340     val Isabelle_Version = List(new Regex("""^Isabelle version: (\S+)$"""))
   341   }
   342 
   343   object AFP_Test
   344   {
   345     val log_prefix = "afp-test-devel-"
   346     val engine = "afp-test"
   347     val Start = new Regex("""^Start test(?: for .+)? at ([^,]+), (.*)$""")
   348     val Start_Old = new Regex("""^Start test(?: for .+)? at ([^,]+)$""")
   349     val End = new Regex("""^End test on (.+), .+, elapsed time:.*$""")
   350     val Isabelle_Version = List(new Regex("""^Isabelle version: .* -- hg id (\S+)$"""))
   351     val AFP_Version = List(new Regex("""^AFP version: .* -- hg id (\S+)$"""))
   352     val Bad_Init = new Regex("""^cp:.*: Disc quota exceeded$""")
   353   }
   354 
   355   object Jenkins
   356   {
   357     val log_prefix = "jenkins_"
   358     val engine = "jenkins"
   359     val Host = new Regex("""^Building remotely on (\S+) \((\S+)\).*$""")
   360     val Start = new Regex("""^(?:Started by an SCM change|Started from command line by admin|).*$""")
   361     val Start_Date = new Regex("""^Build started at (.+)$""")
   362     val No_End = new Regex("""$.""")
   363     val Isabelle_Version =
   364       List(new Regex("""^(?:Build for Isabelle id|Isabelle id) (\w+).*$"""),
   365         new Regex("""^ISABELLE_CI_REPO_ID="(\w+)".*$"""),
   366         new Regex("""^(\w{12}) tip.*$"""))
   367     val AFP_Version =
   368       List(new Regex("""^(?:Build for AFP id|AFP id) (\w+).*$"""),
   369         new Regex("""^ISABELLE_CI_AFP_ID="(\w+)".*$"""))
   370     val CONFIGURATION = "=== CONFIGURATION ==="
   371     val BUILD = "=== BUILD ==="
   372   }
   373 
   374   private def parse_meta_info(log_file: Log_File): Meta_Info =
   375   {
   376     def parse(engine: String, host: String, start: Date,
   377       End: Regex, Isabelle_Version: List[Regex], AFP_Version: List[Regex]): Meta_Info =
   378     {
   379       val build_id =
   380       {
   381         val prefix = proper_string(host) orElse proper_string(engine) getOrElse "build"
   382         prefix + ":" + start.time.ms
   383       }
   384       val build_engine = if (engine == "") Nil else List(Prop.build_engine.name -> engine)
   385       val build_host = if (host == "") Nil else List(Prop.build_host.name -> host)
   386 
   387       val start_date = List(Prop.build_start.name -> print_date(start))
   388       val end_date =
   389         log_file.lines.last match {
   390           case End(log_file.Strict_Date(end_date)) =>
   391             List(Prop.build_end.name -> print_date(end_date))
   392           case _ => Nil
   393         }
   394 
   395       val isabelle_version =
   396         log_file.find_match(Isabelle_Version).map(Prop.isabelle_version.name -> _)
   397       val afp_version =
   398         log_file.find_match(AFP_Version).map(Prop.afp_version.name -> _)
   399 
   400       Meta_Info((Prop.build_id.name -> build_id) :: build_engine ::: build_host :::
   401           start_date ::: end_date ::: isabelle_version.toList ::: afp_version.toList,
   402         log_file.get_all_settings)
   403     }
   404 
   405     log_file.lines match {
   406       case line :: _ if line.startsWith(Build_History.META_INFO_MARKER) =>
   407         Meta_Info(log_file.find_props(Build_History.META_INFO_MARKER).get,
   408           log_file.get_all_settings)
   409 
   410       case Identify.Start(log_file.Strict_Date(start)) :: _ =>
   411         parse(Identify.engine(log_file), "", start, Identify.No_End,
   412           Identify.Isabelle_Version, Identify.AFP_Version)
   413 
   414       case Isatest.Start(log_file.Strict_Date(start), host) :: _ =>
   415         parse(Isatest.engine, host, start, Isatest.End,
   416           Isatest.Isabelle_Version, Nil)
   417 
   418       case AFP_Test.Start(log_file.Strict_Date(start), host) :: _ =>
   419         parse(AFP_Test.engine, host, start, AFP_Test.End,
   420           AFP_Test.Isabelle_Version, AFP_Test.AFP_Version)
   421 
   422       case AFP_Test.Start_Old(log_file.Strict_Date(start)) :: _ =>
   423         parse(AFP_Test.engine, "", start, AFP_Test.End,
   424           AFP_Test.Isabelle_Version, AFP_Test.AFP_Version)
   425 
   426       case Jenkins.Start() :: _ =>
   427         log_file.lines.dropWhile(_ != Jenkins.BUILD) match {
   428           case Jenkins.BUILD :: _ :: Jenkins.Start_Date(log_file.Strict_Date(start)) :: _ =>
   429             val host =
   430               log_file.lines.takeWhile(_ != Jenkins.CONFIGURATION).collectFirst({
   431                 case Jenkins.Host(a, b) => a + "." + b
   432               }).getOrElse("")
   433             parse(Jenkins.engine, host, start.to(ZoneId.of("Europe/Berlin")), Jenkins.No_End,
   434               Jenkins.Isabelle_Version, Jenkins.AFP_Version)
   435           case _ => Meta_Info.empty
   436         }
   437 
   438       case line :: _ if line.startsWith("\u0000") => Meta_Info.empty
   439       case List(Isatest.End(_)) => Meta_Info.empty
   440       case _ :: AFP_Test.Bad_Init() :: _ => Meta_Info.empty
   441       case Nil => Meta_Info.empty
   442 
   443       case _ => log_file.err("cannot detect log file format")
   444     }
   445   }
   446 
   447 
   448 
   449   /** build info: toplevel output of isabelle build or Admin/build_history **/
   450 
   451   val THEORY_TIMING_MARKER = "\ftheory_timing = "
   452   val ML_STATISTICS_MARKER = "\fML_statistics = "
   453   val ERROR_MESSAGE_MARKER = "\ferror_message = "
   454   val SESSION_NAME = "session_name"
   455 
   456   object Session_Status extends Enumeration
   457   {
   458     val existing, finished, failed, cancelled = Value
   459   }
   460 
   461   object Session_Entry
   462   {
   463     val empty: Session_Entry = Session_Entry()
   464   }
   465 
   466   sealed case class Session_Entry(
   467     chapter: String = "",
   468     groups: List[String] = Nil,
   469     threads: Option[Int] = None,
   470     timing: Timing = Timing.zero,
   471     ml_timing: Timing = Timing.zero,
   472     heap_size: Option[Long] = None,
   473     status: Option[Session_Status.Value] = None,
   474     errors: List[String] = Nil,
   475     ml_statistics: List[Properties.T] = Nil)
   476   {
   477     def proper_groups: Option[String] = if (groups.isEmpty) None else Some(cat_lines(groups))
   478     def finished: Boolean = status == Some(Session_Status.finished)
   479     def failed: Boolean = status == Some(Session_Status.failed)
   480   }
   481 
   482   sealed case class Build_Info(sessions: Map[String, Session_Entry])
   483   {
   484     def finished_sessions: List[String] = for ((a, b) <- sessions.toList if b.finished) yield a
   485     def failed_sessions: List[String] = for ((a, b) <- sessions.toList if b.failed) yield a
   486   }
   487 
   488   private def parse_build_info(log_file: Log_File, parse_ml_statistics: Boolean): Build_Info =
   489   {
   490     object Chapter_Name
   491     {
   492       def unapply(s: String): Some[(String, String)] =
   493         space_explode('/', s) match {
   494           case List(chapter, name) => Some((chapter, name))
   495           case _ => Some(("", s))
   496         }
   497     }
   498 
   499     val Session_No_Groups = new Regex("""^Session (\S+)$""")
   500     val Session_Groups = new Regex("""^Session (\S+) \((.*)\)$""")
   501     val Session_Finished1 =
   502       new Regex("""^Finished (\S+) \((\d+):(\d+):(\d+) elapsed time, (\d+):(\d+):(\d+) cpu time.*$""")
   503     val Session_Finished2 =
   504       new Regex("""^Finished (\S+) \((\d+):(\d+):(\d+) elapsed time.*$""")
   505     val Session_Timing =
   506       new Regex("""^Timing (\S+) \((\d+) threads, (\d+\.\d+)s elapsed time, (\d+\.\d+)s cpu time, (\d+\.\d+)s GC time.*$""")
   507     val Session_Started = new Regex("""^(?:Running|Building) (\S+) \.\.\.$""")
   508     val Session_Failed = new Regex("""^(\S+) FAILED""")
   509     val Session_Cancelled = new Regex("""^(\S+) CANCELLED""")
   510     val Heap = new Regex("""^Heap (\S+) \((\d+) bytes\)$""")
   511 
   512     var chapter = Map.empty[String, String]
   513     var groups = Map.empty[String, List[String]]
   514     var threads = Map.empty[String, Int]
   515     var timing = Map.empty[String, Timing]
   516     var ml_timing = Map.empty[String, Timing]
   517     var started = Set.empty[String]
   518     var failed = Set.empty[String]
   519     var cancelled = Set.empty[String]
   520     var heap_sizes = Map.empty[String, Long]
   521     var ml_statistics = Map.empty[String, List[Properties.T]]
   522     var errors = Map.empty[String, List[String]]
   523 
   524     def all_sessions: Set[String] =
   525       chapter.keySet ++ groups.keySet ++ threads.keySet ++ timing.keySet ++ ml_timing.keySet ++
   526       failed ++ cancelled ++ started ++ heap_sizes.keySet ++ ml_statistics.keySet
   527 
   528 
   529     for (line <- log_file.lines) {
   530       line match {
   531         case Session_No_Groups(Chapter_Name(chapt, name)) =>
   532           chapter += (name -> chapt)
   533           groups += (name -> Nil)
   534 
   535         case Session_Groups(Chapter_Name(chapt, name), grps) =>
   536           chapter += (name -> chapt)
   537           groups += (name -> Word.explode(grps))
   538 
   539         case Session_Started(name) =>
   540           started += name
   541 
   542         case Session_Finished1(name,
   543             Value.Int(e1), Value.Int(e2), Value.Int(e3),
   544             Value.Int(c1), Value.Int(c2), Value.Int(c3)) =>
   545           val elapsed = Time.hms(e1, e2, e3)
   546           val cpu = Time.hms(c1, c2, c3)
   547           timing += (name -> Timing(elapsed, cpu, Time.zero))
   548 
   549         case Session_Finished2(name,
   550             Value.Int(e1), Value.Int(e2), Value.Int(e3)) =>
   551           val elapsed = Time.hms(e1, e2, e3)
   552           timing += (name -> Timing(elapsed, Time.zero, Time.zero))
   553 
   554         case Session_Timing(name,
   555             Value.Int(t), Value.Double(e), Value.Double(c), Value.Double(g)) =>
   556           val elapsed = Time.seconds(e)
   557           val cpu = Time.seconds(c)
   558           val gc = Time.seconds(g)
   559           ml_timing += (name -> Timing(elapsed, cpu, gc))
   560           threads += (name -> t)
   561 
   562         case Heap(name, Value.Long(size)) =>
   563           heap_sizes += (name -> size)
   564 
   565         case _ if parse_ml_statistics && line.startsWith(ML_STATISTICS_MARKER) && YXML.detect(line) =>
   566           val (name, props) =
   567             Library.try_unprefix(ML_STATISTICS_MARKER, line).map(log_file.parse_props(_)) match {
   568               case Some((SESSION_NAME, name) :: props) => (name, props)
   569               case _ => log_file.err("malformed ML_statistics " + quote(line))
   570             }
   571           ml_statistics += (name -> (props :: ml_statistics.getOrElse(name, Nil)))
   572 
   573         case _ if line.startsWith(ERROR_MESSAGE_MARKER) && YXML.detect(line) =>
   574           val (name, msg) =
   575             Library.try_unprefix(ERROR_MESSAGE_MARKER, line).map(log_file.parse_props(_)) match {
   576               case Some(List((SESSION_NAME, name), (Markup.CONTENT, msg))) => (name, msg)
   577               case _ => log_file.err("malformed error message " + quote(line))
   578             }
   579           errors += (name -> (Library.decode_lines(msg) :: errors.getOrElse(name, Nil)))
   580 
   581         case _ =>
   582       }
   583     }
   584 
   585     val sessions =
   586       Map(
   587         (for (name <- all_sessions.toList) yield {
   588           val status =
   589             if (failed(name)) Session_Status.failed
   590             else if (cancelled(name)) Session_Status.cancelled
   591             else if (timing.isDefinedAt(name) || ml_timing.isDefinedAt(name))
   592               Session_Status.finished
   593             else if (started(name)) Session_Status.failed
   594             else Session_Status.existing
   595           val entry =
   596             Session_Entry(
   597               chapter = chapter.getOrElse(name, ""),
   598               groups = groups.getOrElse(name, Nil),
   599               threads = threads.get(name),
   600               timing = timing.getOrElse(name, Timing.zero),
   601               ml_timing = ml_timing.getOrElse(name, Timing.zero),
   602               heap_size = heap_sizes.get(name),
   603               status = Some(status),
   604               errors = errors.getOrElse(name, Nil).reverse,
   605               ml_statistics = ml_statistics.getOrElse(name, Nil).reverse)
   606           (name -> entry)
   607         }):_*)
   608     Build_Info(sessions)
   609   }
   610 
   611 
   612 
   613   /** session info: produced by isabelle build as session log.gz file **/
   614 
   615   sealed case class Session_Info(
   616     session_timing: Properties.T,
   617     command_timings: List[Properties.T],
   618     theory_timings: List[Properties.T],
   619     ml_statistics: List[Properties.T],
   620     task_statistics: List[Properties.T],
   621     errors: List[String])
   622 
   623   private def parse_session_info(
   624     log_file: Log_File,
   625     command_timings: Boolean,
   626     theory_timings: Boolean,
   627     ml_statistics: Boolean,
   628     task_statistics: Boolean): Session_Info =
   629   {
   630     Session_Info(
   631       session_timing = log_file.find_props("\fTiming = ") getOrElse Nil,
   632       command_timings = if (command_timings) log_file.filter_props("\fcommand_timing = ") else Nil,
   633       theory_timings = if (theory_timings) log_file.filter_props(THEORY_TIMING_MARKER) else Nil,
   634       ml_statistics = if (ml_statistics) log_file.filter_props(ML_STATISTICS_MARKER) else Nil,
   635       task_statistics = if (task_statistics) log_file.filter_props("\ftask_statistics = ") else Nil,
   636       errors = log_file.filter_lines(ERROR_MESSAGE_MARKER).map(Library.decode_lines(_)))
   637   }
   638 
   639   def compress_errors(errors: List[String]): Option[Bytes] =
   640     if (errors.isEmpty) None
   641     else Some(Bytes(YXML.string_of_body(XML.Encode.list(XML.Encode.string)(errors))).compress())
   642 
   643   def uncompress_errors(bytes: Bytes): List[String] =
   644     if (bytes.isEmpty) Nil
   645     else XML.Decode.list(YXML.string_of_body(_))(YXML.parse_body(bytes.uncompress().text))
   646 
   647 
   648 
   649   /** persistent store **/
   650 
   651   /* SQL data model */
   652 
   653   object Data
   654   {
   655     def build_log_table(name: String, columns: List[SQL.Column], body: String = ""): SQL.Table =
   656       SQL.Table("isabelle_build_log_" + name, columns, body)
   657 
   658 
   659     /* main content */
   660 
   661     val log_name = SQL.Column.string("log_name").make_primary_key
   662     val session_name = SQL.Column.string("session_name").make_primary_key
   663     val chapter = SQL.Column.string("chapter")
   664     val groups = SQL.Column.string("groups")
   665     val threads = SQL.Column.int("threads")
   666     val timing_elapsed = SQL.Column.long("timing_elapsed")
   667     val timing_cpu = SQL.Column.long("timing_cpu")
   668     val timing_gc = SQL.Column.long("timing_gc")
   669     val timing_factor = SQL.Column.double("timing_factor")
   670     val ml_timing_elapsed = SQL.Column.long("ml_timing_elapsed")
   671     val ml_timing_cpu = SQL.Column.long("ml_timing_cpu")
   672     val ml_timing_gc = SQL.Column.long("ml_timing_gc")
   673     val ml_timing_factor = SQL.Column.double("ml_timing_factor")
   674     val heap_size = SQL.Column.long("heap_size")
   675     val status = SQL.Column.string("status")
   676     val errors = SQL.Column.bytes("errors")
   677     val ml_statistics = SQL.Column.bytes("ml_statistics")
   678     val known = SQL.Column.bool("known")
   679 
   680     val meta_info_table =
   681       build_log_table("meta_info", log_name :: Prop.all_props ::: Settings.all_settings)
   682 
   683     val sessions_table =
   684       build_log_table("sessions",
   685         List(log_name, session_name, chapter, groups, threads, timing_elapsed, timing_cpu,
   686           timing_gc, timing_factor, ml_timing_elapsed, ml_timing_cpu, ml_timing_gc, ml_timing_factor,
   687           heap_size, status, errors))
   688 
   689     val ml_statistics_table =
   690       build_log_table("ml_statistics", List(log_name, session_name, ml_statistics))
   691 
   692 
   693     /* AFP versions */
   694 
   695     val isabelle_afp_versions_table: SQL.Table =
   696     {
   697       val version1 = Prop.isabelle_version
   698       val version2 = Prop.afp_version
   699       build_log_table("isabelle_afp_versions", List(version1.make_primary_key, version2),
   700         SQL.select(List(version1, version2), distinct = true) + meta_info_table +
   701         " WHERE " + version1.defined + " AND " + version2.defined)
   702     }
   703 
   704 
   705     /* earliest pull date for repository version (PostgreSQL queries) */
   706 
   707     val pull_date = SQL.Column.date("pull_date")
   708 
   709     def pull_date_table(afp: Boolean = false): SQL.Table =
   710     {
   711       val (name, versions) =
   712         if (afp) ("afp_pull_date", List(Prop.isabelle_version, Prop.afp_version))
   713         else ("pull_date", List(Prop.isabelle_version))
   714 
   715       build_log_table(name, versions.map(_.make_primary_key) ::: List(pull_date),
   716         "SELECT " + versions.mkString(", ") + ", min(" + Prop.build_start + ") AS " + pull_date +
   717         " FROM " + meta_info_table +
   718         " WHERE " + (versions ::: List(Prop.build_start)).map(_.defined).mkString(" AND ") +
   719         " GROUP BY " + versions.mkString(", "))
   720     }
   721 
   722 
   723     /* recent entries */
   724 
   725     def recent_time(days: Int): SQL.Source =
   726       "now() - INTERVAL '" + days.max(0) + " days'"
   727 
   728     def recent_pull_date_table(
   729       days: Int, rev: String = "", afp_rev: Option[String] = None): SQL.Table =
   730     {
   731       val afp = afp_rev.isDefined
   732       val rev2 = afp_rev.getOrElse("")
   733       val table = pull_date_table(afp)
   734 
   735       val version1 = Prop.isabelle_version
   736       val version2 = Prop.afp_version
   737       val eq1 = version1(table) + " = " + SQL.string(rev)
   738       val eq2 = version2(table) + " = " + SQL.string(rev2)
   739 
   740       SQL.Table("recent_pull_date", table.columns,
   741         table.select(table.columns,
   742           "WHERE " + pull_date(table) + " > " + recent_time(days) +
   743           (if (rev != "" && rev2 == "") " OR " + eq1
   744            else if (rev == "" && rev2 != "") " OR " + eq2
   745            else if (rev != "" && rev2 != "") " OR (" + eq1 + " AND " + eq2 + ")"
   746            else "")))
   747     }
   748 
   749     def select_recent_log_names(days: Int): SQL.Source =
   750     {
   751       val table1 = meta_info_table
   752       val table2 = recent_pull_date_table(days)
   753       table1.select(List(log_name), distinct = true) + SQL.join_inner + table2.query_named +
   754         " ON " + Prop.isabelle_version(table1) + " = " + Prop.isabelle_version(table2)
   755     }
   756 
   757     def select_recent_versions(days: Int,
   758       rev: String = "", afp_rev: Option[String] = None, sql: SQL.Source = ""): SQL.Source =
   759     {
   760       val version = Prop.isabelle_version
   761       val table1 = recent_pull_date_table(days, rev = rev, afp_rev = afp_rev)
   762       val table2 = meta_info_table
   763       val aux_table = SQL.Table("aux", table2.columns, table2.select(sql = sql))
   764 
   765       val columns =
   766         table1.columns.map(c => c(table1)) :::
   767           List(known.copy(expr = log_name(aux_table).defined))
   768       SQL.select(columns, distinct = true) +
   769         table1.query_named + SQL.join_outer + aux_table.query_named +
   770         " ON " + version(table1) + " = " + version(aux_table) +
   771         " ORDER BY " + pull_date(table1) + " DESC"
   772     }
   773 
   774 
   775     /* universal view on main data */
   776 
   777     val universal_table: SQL.Table =
   778     {
   779       val table1 = meta_info_table
   780       val table2 = pull_date_table()
   781       val table3 = sessions_table
   782       val table4 = ml_statistics_table
   783 
   784       val a_columns = log_name :: pull_date :: meta_info_table.columns.tail
   785       val a_table =
   786         SQL.Table("a", a_columns,
   787           SQL.select(a_columns.take(2) ::: a_columns.drop(2).map(_.apply(table1))) +
   788             table1 + SQL.join_outer + table2 + " ON " +
   789             Prop.isabelle_version(table1) + " = " + Prop.isabelle_version(table2))
   790 
   791       val b_columns = a_columns ::: sessions_table.columns.tail
   792       val b_table =
   793         SQL.Table("b", b_columns,
   794           SQL.select(log_name(a_table) :: b_columns.tail) + a_table.query_named +
   795           SQL.join_inner + table3 + " ON " + log_name(a_table) + " = " + log_name(table3))
   796 
   797       SQL.Table("isabelle_build_log", b_columns ::: List(ml_statistics),
   798         {
   799           SQL.select(b_columns.map(_.apply(b_table)) ::: List(ml_statistics)) +
   800             b_table.query_named + SQL.join_outer + table4 + " ON " +
   801             log_name(b_table) + " = " + log_name(table4) + " AND " +
   802             session_name(b_table) + " = " + session_name(table4)
   803         })
   804     }
   805   }
   806 
   807 
   808   /* database access */
   809 
   810   def store(options: Options): Store = new Store(options)
   811 
   812   class Store private[Build_Log](options: Options)
   813   {
   814     val xml_cache: XML.Cache = new XML.Cache()
   815 
   816     def open_database(
   817       user: String = options.string("build_log_database_user"),
   818       password: String = options.string("build_log_database_password"),
   819       database: String = options.string("build_log_database_name"),
   820       host: String = options.string("build_log_database_host"),
   821       port: Int = options.int("build_log_database_port"),
   822       ssh_host: String = options.string("build_log_ssh_host"),
   823       ssh_user: String = options.string("build_log_ssh_user"),
   824       ssh_port: Int = options.int("build_log_ssh_port")): PostgreSQL.Database =
   825     {
   826       PostgreSQL.open_database(
   827         user = user, password = password, database = database, host = host, port = port,
   828         ssh =
   829           if (ssh_host == "") None
   830           else Some(SSH.init_context(options).open_session(ssh_host, ssh_user, port)),
   831         ssh_close = true)
   832     }
   833 
   834     def update_database(db: PostgreSQL.Database, dirs: List[Path], ml_statistics: Boolean = false)
   835     {
   836       write_info(db, Log_File.find_files(dirs), ml_statistics = ml_statistics)
   837 
   838       db.create_view(Data.pull_date_table())
   839       db.create_view(Data.pull_date_table(afp = true))
   840       db.create_view(Data.universal_table)
   841     }
   842 
   843     def snapshot_database(db: PostgreSQL.Database, sqlite_database: Path,
   844       days: Int = 100, ml_statistics: Boolean = false)
   845     {
   846       Isabelle_System.mkdirs(sqlite_database.dir)
   847       sqlite_database.file.delete
   848 
   849       using(SQLite.open_database(sqlite_database))(db2 =>
   850       {
   851         db.transaction {
   852           db2.transaction {
   853             // main content
   854             db2.create_table(Data.meta_info_table)
   855             db2.create_table(Data.sessions_table)
   856             db2.create_table(Data.ml_statistics_table)
   857 
   858             val recent_log_names =
   859               db.using_statement(Data.select_recent_log_names(days))(stmt =>
   860                 stmt.execute_query().iterator(_.string(Data.log_name)).toList)
   861 
   862             for (log_name <- recent_log_names) {
   863               read_meta_info(db, log_name).foreach(meta_info =>
   864                 update_meta_info(db2, log_name, meta_info))
   865 
   866               update_sessions(db2, log_name, read_build_info(db, log_name))
   867 
   868               if (ml_statistics) {
   869                 update_ml_statistics(db2, log_name,
   870                   read_build_info(db, log_name, ml_statistics = true))
   871               }
   872             }
   873 
   874             // pull_date
   875             {
   876               val table = Data.pull_date_table()
   877               db2.create_table(table)
   878               db2.using_statement(table.insert())(stmt2 =>
   879               {
   880                 db.using_statement(Data.recent_pull_date_table(days).query)(stmt =>
   881                 {
   882                   val res = stmt.execute_query()
   883                   while (res.next()) {
   884                     for ((c, i) <- table.columns.zipWithIndex) {
   885                       stmt2.string(i + 1) = res.get_string(c)
   886                     }
   887                     stmt2.execute()
   888                   }
   889                 })
   890               })
   891             }
   892 
   893             // full view
   894             db2.create_view(Data.universal_table)
   895           }
   896         }
   897         db2.rebuild
   898       })
   899     }
   900 
   901     def domain(db: SQL.Database, table: SQL.Table, column: SQL.Column): Set[String] =
   902       db.using_statement(table.select(List(column), distinct = true))(stmt =>
   903         stmt.execute_query().iterator(_.string(column)).toSet)
   904 
   905     def update_meta_info(db: SQL.Database, log_name: String, meta_info: Meta_Info)
   906     {
   907       val table = Data.meta_info_table
   908       db.using_statement(db.insert_permissive(table))(stmt =>
   909       {
   910         stmt.string(1) = log_name
   911         for ((c, i) <- table.columns.tail.zipWithIndex) {
   912           if (c.T == SQL.Type.Date)
   913             stmt.date(i + 2) = meta_info.get_date(c)
   914           else
   915             stmt.string(i + 2) = meta_info.get(c)
   916         }
   917         stmt.execute()
   918       })
   919     }
   920 
   921     def update_sessions(db: SQL.Database, log_name: String, build_info: Build_Info)
   922     {
   923       val table = Data.sessions_table
   924       db.using_statement(db.insert_permissive(table))(stmt =>
   925       {
   926         val entries_iterator =
   927           if (build_info.sessions.isEmpty) Iterator("" -> Session_Entry.empty)
   928           else build_info.sessions.iterator
   929         for ((session_name, session) <- entries_iterator) {
   930           stmt.string(1) = log_name
   931           stmt.string(2) = session_name
   932           stmt.string(3) = proper_string(session.chapter)
   933           stmt.string(4) = session.proper_groups
   934           stmt.int(5) = session.threads
   935           stmt.long(6) = session.timing.elapsed.proper_ms
   936           stmt.long(7) = session.timing.cpu.proper_ms
   937           stmt.long(8) = session.timing.gc.proper_ms
   938           stmt.double(9) = session.timing.factor
   939           stmt.long(10) = session.ml_timing.elapsed.proper_ms
   940           stmt.long(11) = session.ml_timing.cpu.proper_ms
   941           stmt.long(12) = session.ml_timing.gc.proper_ms
   942           stmt.double(13) = session.ml_timing.factor
   943           stmt.long(14) = session.heap_size
   944           stmt.string(15) = session.status.map(_.toString)
   945           stmt.bytes(16) = compress_errors(session.errors)
   946           stmt.execute()
   947         }
   948       })
   949     }
   950 
   951     def update_ml_statistics(db: SQL.Database, log_name: String, build_info: Build_Info)
   952     {
   953       val table = Data.ml_statistics_table
   954       db.using_statement(db.insert_permissive(table))(stmt =>
   955       {
   956         val ml_stats: List[(String, Option[Bytes])] =
   957           Par_List.map[(String, Session_Entry), (String, Option[Bytes])](
   958             { case (a, b) => (a, Properties.compress(b.ml_statistics).proper) },
   959             build_info.sessions.iterator.filter(p => p._2.ml_statistics.nonEmpty).toList)
   960         val entries = if (ml_stats.nonEmpty) ml_stats else List("" -> None)
   961         for ((session_name, ml_statistics) <- entries) {
   962           stmt.string(1) = log_name
   963           stmt.string(2) = session_name
   964           stmt.bytes(3) = ml_statistics
   965           stmt.execute()
   966         }
   967       })
   968     }
   969 
   970     def write_info(db: SQL.Database, files: List[JFile], ml_statistics: Boolean = false)
   971     {
   972       abstract class Table_Status(table: SQL.Table)
   973       {
   974         db.create_table(table)
   975         private var known: Set[String] = domain(db, table, Data.log_name)
   976 
   977         def required(file: JFile): Boolean = !known(Log_File.plain_name(file.getName))
   978 
   979         def update_db(db: SQL.Database, log_file: Log_File): Unit
   980         def update(log_file: Log_File)
   981         {
   982           if (!known(log_file.name)) {
   983             update_db(db, log_file)
   984             known += log_file.name
   985           }
   986         }
   987       }
   988       val status =
   989         List(
   990           new Table_Status(Data.meta_info_table) {
   991             override def update_db(db: SQL.Database, log_file: Log_File): Unit =
   992               update_meta_info(db, log_file.name, log_file.parse_meta_info())
   993           },
   994           new Table_Status(Data.sessions_table) {
   995             override def update_db(db: SQL.Database, log_file: Log_File): Unit =
   996               update_sessions(db, log_file.name, log_file.parse_build_info())
   997           },
   998           new Table_Status(Data.ml_statistics_table) {
   999             override def update_db(db: SQL.Database, log_file: Log_File): Unit =
  1000             if (ml_statistics) {
  1001               update_ml_statistics(db, log_file.name,
  1002                 log_file.parse_build_info(ml_statistics = true))
  1003             }
  1004           })
  1005 
  1006       for (file_group <- files.filter(file => status.exists(_.required(file))).grouped(100)) {
  1007         val log_files = Par_List.map[JFile, Log_File](Log_File.apply _, file_group)
  1008         db.transaction { log_files.foreach(log_file => status.foreach(_.update(log_file))) }
  1009       }
  1010     }
  1011 
  1012     def read_meta_info(db: SQL.Database, log_name: String): Option[Meta_Info] =
  1013     {
  1014       val table = Data.meta_info_table
  1015       val columns = table.columns.tail
  1016       db.using_statement(table.select(columns, Data.log_name.where_equal(log_name)))(stmt =>
  1017       {
  1018         val res = stmt.execute_query()
  1019         if (!res.next) None
  1020         else {
  1021           val results =
  1022             columns.map(c => c.name ->
  1023               (if (c.T == SQL.Type.Date)
  1024                 res.get_date(c).map(Log_File.Date_Format(_))
  1025                else
  1026                 res.get_string(c)))
  1027           val n = Prop.all_props.length
  1028           val props = for ((x, Some(y)) <- results.take(n)) yield (x, y)
  1029           val settings = for ((x, Some(y)) <- results.drop(n)) yield (x, y)
  1030           Some(Meta_Info(props, settings))
  1031         }
  1032       })
  1033     }
  1034 
  1035     def read_build_info(
  1036       db: SQL.Database,
  1037       log_name: String,
  1038       session_names: List[String] = Nil,
  1039       ml_statistics: Boolean = false): Build_Info =
  1040     {
  1041       val table1 = Data.sessions_table
  1042       val table2 = Data.ml_statistics_table
  1043 
  1044       val where_log_name =
  1045         Data.log_name(table1).where_equal(log_name) + " AND " +
  1046         Data.session_name(table1) + " <> ''"
  1047       val where =
  1048         if (session_names.isEmpty) where_log_name
  1049         else where_log_name + " AND " + SQL.member(Data.session_name(table1).ident, session_names)
  1050 
  1051       val columns1 = table1.columns.tail.map(_.apply(table1))
  1052       val (columns, from) =
  1053         if (ml_statistics) {
  1054           val columns = columns1 ::: List(Data.ml_statistics(table2))
  1055           val join =
  1056             table1 + SQL.join_outer + table2 + " ON " +
  1057             Data.log_name(table1) + " = " + Data.log_name(table2) + " AND " +
  1058             Data.session_name(table1) + " = " + Data.session_name(table2)
  1059           (columns, SQL.enclose(join))
  1060         }
  1061         else (columns1, table1.ident)
  1062 
  1063       val sessions =
  1064         db.using_statement(SQL.select(columns) + from + " " + where)(stmt =>
  1065         {
  1066           stmt.execute_query().iterator(res =>
  1067           {
  1068             val session_name = res.string(Data.session_name)
  1069             val session_entry =
  1070               Session_Entry(
  1071                 chapter = res.string(Data.chapter),
  1072                 groups = split_lines(res.string(Data.groups)),
  1073                 threads = res.get_int(Data.threads),
  1074                 timing = res.timing(Data.timing_elapsed, Data.timing_cpu, Data.timing_gc),
  1075                 ml_timing =
  1076                   res.timing(Data.ml_timing_elapsed, Data.ml_timing_cpu, Data.ml_timing_gc),
  1077                 heap_size = res.get_long(Data.heap_size),
  1078                 status = res.get_string(Data.status).map(Session_Status.withName(_)),
  1079                 errors = uncompress_errors(res.bytes(Data.errors)),
  1080                 ml_statistics =
  1081                   if (ml_statistics)
  1082                     Properties.uncompress(res.bytes(Data.ml_statistics), Some(xml_cache))
  1083                   else Nil)
  1084             session_name -> session_entry
  1085           }).toMap
  1086         })
  1087       Build_Info(sessions)
  1088     }
  1089   }
  1090 }