maintain persistent session info in SQLite database instead of log file;
authorwenzelm
Fri, 17 Mar 2017 20:21:01 +0100
changeset 65291 57c85c83c11b
parent 65290 6c1d7d5c2165
child 65292 e3bd1e7ddd23
maintain persistent session info in SQLite database instead of log file;
src/Pure/Thy/sessions.scala
src/Pure/Tools/build.scala
--- a/src/Pure/Thy/sessions.scala	Fri Mar 17 19:14:11 2017 +0100
+++ b/src/Pure/Thy/sessions.scala	Fri Mar 17 20:21:01 2017 +0100
@@ -550,15 +550,19 @@
           map(xml_cache.props(_))
     }
 
-    def read_bytes(db: SQLite.Database, table: SQL.Table, column: SQL.Column): Bytes =
-    {
+    def read_string(db: SQLite.Database, table: SQL.Table, column: SQL.Column): String =
       using(db.select_statement(table, List(column)))(stmt =>
       {
         val rs = stmt.executeQuery
-        if (!rs.next) Bytes.empty
-        else db.bytes(rs, column.name)
+        if (!rs.next) "" else db.string(rs, column.name)
       })
-    }
+
+    def read_bytes(db: SQLite.Database, table: SQL.Table, column: SQL.Column): Bytes =
+      using(db.select_statement(table, List(column)))(stmt =>
+      {
+        val rs = stmt.executeQuery
+        if (!rs.next) Bytes.empty else db.bytes(rs, column.name)
+      })
 
     def read_properties(db: SQLite.Database, table: SQL.Table, column: SQL.Column)
       : List[Properties.T] = uncompress_properties(read_bytes(db, table, column))
@@ -586,19 +590,13 @@
         output_dir :: Path.split(Isabelle_System.getenv_strict("ISABELLE_PATH")).map(_ + ml_ident)
       }
 
-    def find(name: String): Option[(Path, Option[String])] =
-      input_dirs.find(dir => (dir + log_gz(name)).is_file).map(dir =>
-        (dir + log_gz(name), read_heap_digest(dir + Path.basic(name))))
+    def find_database_heap(name: String): Option[(Path, Option[String])] =
+      input_dirs.find(dir => (dir + database(name)).is_file).map(dir =>
+        (dir + database(name), read_heap_digest(dir + Path.basic(name))))
 
     def find_database(name: String): Option[Path] =
       input_dirs.map(_ + database(name)).find(_.is_file)
 
-    def find_log(name: String): Option[Path] =
-      input_dirs.map(_ + log(name)).find(_.is_file)
-
-    def find_log_gz(name: String): Option[Path] =
-      input_dirs.map(_ + log_gz(name)).find(_.is_file)
-
     def heap(name: String): Path =
       input_dirs.map(_ + Path.basic(name)).find(_.is_file) getOrElse
         error("Unknown logic " + quote(name) + " -- no heap file found in:\n" +
@@ -648,7 +646,7 @@
           db.set_bytes(stmt, 5, store.compress_properties(build_log.task_statistics))
           db.set_string(stmt, 6, cat_lines(build.sources))
           db.set_string(stmt, 7, cat_lines(build.input_heaps))
-          db.set_string(stmt, 8, build.output_heap)
+          db.set_string(stmt, 8, build.output_heap getOrElse "")
           db.set_int(stmt, 9, build.return_code)
           stmt.execute()
         })
@@ -667,21 +665,23 @@
     def read_task_statistics(store: Store, db: SQLite.Database): List[Properties.T] =
       store.read_properties(db, table, task_statistics)
 
-    def read_build_log(store: Store, db: SQLite.Database): Option[Build_Log.Session_Info] =
-      using(db.select_statement(table, build_log_columns))(stmt =>
-      {
-        val rs = stmt.executeQuery
-        if (!rs.next) None
-        else {
-          Some(
-            Build_Log.Session_Info(
-              db.string(rs, session_name.name),
-              store.decode_properties(db.bytes(rs, session_timing.name)),
-              store.uncompress_properties(db.bytes(rs, command_timings.name)),
-              store.uncompress_properties(db.bytes(rs, ml_statistics.name)),
-              store.uncompress_properties(db.bytes(rs, task_statistics.name))))
-        }
-      })
+    def read_build_log(store: Store, db: SQLite.Database,
+      default_name: String = "",
+      command_timings: Boolean = false,
+      ml_statistics: Boolean = false,
+      task_statistics: Boolean = false): Build_Log.Session_Info =
+    {
+      val name = store.read_string(db, table, session_name)
+      Build_Log.Session_Info(
+        session_name =
+          if (name == "") default_name
+          else if (default_name == "" || default_name == name) name
+          else error("Database from different session " + quote(name)),
+        session_timing = read_session_timing(store, db),
+        command_timings = if (command_timings) read_command_timings(store, db) else Nil,
+        ml_statistics = if (ml_statistics) read_ml_statistics(store, db) else Nil,
+        task_statistics = if (task_statistics) read_task_statistics(store, db) else Nil)
+    }
 
     def read_build(store: Store, db: SQLite.Database): Option[Build.Session_Info] =
       using(db.select_statement(table, table.columns))(stmt =>
@@ -693,7 +693,7 @@
             Build.Session_Info(
               split_lines(db.string(rs, sources.name)),
               split_lines(db.string(rs, input_heaps.name)),
-              db.string(rs, output_heap.name),
+              db.string(rs, output_heap.name) match { case "" => None case s => Some(s) },
               db.int(rs, return_code.name)))
         }
       })
--- a/src/Pure/Tools/build.scala	Fri Mar 17 19:14:11 2017 +0100
+++ b/src/Pure/Tools/build.scala	Fri Mar 17 20:21:01 2017 +0100
@@ -21,37 +21,45 @@
 {
   /** auxiliary **/
 
+  /* persistent build info */
+
+  sealed case class Session_Info(
+    sources: List[String],
+    input_heaps: List[String],
+    output_heap: Option[String],
+    return_code: Int)
+
+
   /* queue with scheduling information */
 
   private object Queue
   {
     def load_timings(store: Sessions.Store, name: String): (List[Properties.T], Double) =
     {
-      val (path, text) =
-        store.find_log_gz(name) match {
-          case Some(path) => (path, File.read_gzip(path))
-          case None =>
-            store.find_log(name) match {
-              case Some(path) => (path, File.read(path))
-              case None => (Path.current, "")
-            }
-        }
+      val no_timings: (List[Properties.T], Double) = (Nil, 0.0)
 
-      def ignore_error(msg: String): (List[Properties.T], Double) =
-      {
-        Output.warning("Ignoring bad log file: " + path + (if (msg == "") "" else "\n" + msg))
-        (Nil, 0.0)
-      }
-
-      try {
-        val info = Build_Log.Log_File(name, text).parse_session_info(name, command_timings = true)
-        val session_timing = Markup.Elapsed.unapply(info.session_timing) getOrElse 0.0
-        (info.command_timings, session_timing)
-      }
-      catch {
-        case ERROR(msg) => ignore_error(msg)
-        case exn: java.lang.Error => ignore_error(Exn.message(exn))
-        case _: XML.Error => ignore_error("")
+      store.find_database(name) match {
+        case None => no_timings
+        case Some(database) =>
+          def ignore_error(msg: String) =
+          {
+            Output.warning("Ignoring bad database: " + database + (if (msg == "") "" else "\n" + msg))
+            no_timings
+          }
+          try {
+            using(SQLite.open_database(database))(db =>
+            {
+              val build_log =
+                Sessions.Session_Info.read_build_log(store, db, name, command_timings = true)
+              val session_timing = Markup.Elapsed.unapply(build_log.session_timing) getOrElse 0.0
+              (build_log.command_timings, session_timing)
+            })
+          }
+          catch {
+            case ERROR(msg) => ignore_error(msg)
+            case exn: java.lang.Error => ignore_error(Exn.message(exn))
+            case _: XML.Error => ignore_error("")
+          }
       }
     }
 
@@ -60,7 +68,7 @@
       val graph = tree.graph
       val sessions = graph.keys
 
-      val timings = Par_List.map((name: String) => (name, load_timings(store, name)), sessions)
+      val timings = sessions.map(name => (name, load_timings(store, name)))
       val command_timings =
         Map(timings.map({ case (name, (ts, _)) => (name, ts) }): _*).withDefaultValue(Nil)
       val session_timing =
@@ -231,49 +239,6 @@
   }
 
 
-  /* sources and heaps */
-
-  sealed case class Session_Info(
-    sources: List[String], input_heaps: List[String], output_heap: String, return_code: Int)
-
-  private val SOURCES = "sources: "
-  private val INPUT_HEAP = "input_heap: "
-  private val OUTPUT_HEAP = "output_heap: "
-  private val LOG_START = "log:"
-  private val line_prefixes = List(SOURCES, INPUT_HEAP, OUTPUT_HEAP, LOG_START)
-
-  private def sources_stamp(digests: List[SHA1.Digest]): String =
-    digests.map(_.toString).sorted.mkString(SOURCES, " ", "")
-
-  private def read_stamps(path: Path): Option[(String, List[String], List[String])] =
-    if (path.is_file) {
-      val stream = new GZIPInputStream(new BufferedInputStream(new FileInputStream(path.file)))
-      val reader = new BufferedReader(new InputStreamReader(stream, UTF8.charset))
-      val lines =
-      {
-        val lines = new mutable.ListBuffer[String]
-        try {
-          var finished = false
-          while (!finished) {
-            val line = reader.readLine
-            if (line != null && line_prefixes.exists(line.startsWith(_)))
-              lines += line
-            else finished = true
-          }
-        }
-        finally { reader.close }
-        lines.toList
-      }
-
-      if (!lines.isEmpty && lines.last.startsWith(LOG_START)) {
-        lines.find(_.startsWith(SOURCES)).map(s =>
-          (s, lines.filter(_.startsWith(INPUT_HEAP)), lines.filter(_.startsWith(OUTPUT_HEAP))))
-      }
-      else None
-    }
-    else None
-
-
 
   /** build with results **/
 
@@ -356,8 +321,8 @@
     val deps =
       Sessions.dependencies(progress, true, verbose, list_files, check_keywords, selected_tree)
 
-    def session_sources_stamp(name: String): String =
-      sources_stamp(selected_tree(name).meta_digest :: deps.sources(name))
+    def sources_stamp(name: String): List[String] =
+      (selected_tree(name).meta_digest :: deps.sources(name)).map(_.toString).sorted
 
 
     /* main build process */
@@ -371,7 +336,7 @@
     if (clean_build) {
       for (name <- full_tree.graph.all_succs(selected)) {
         val files =
-          List(Path.basic(name), store.log(name), store.log_gz(name)).
+          List(Path.basic(name), store.database(name), store.log(name), store.log_gz(name)).
             map(store.output_dir + _).filter(_.is_file)
         if (files.nonEmpty) progress.echo("Cleaning " + name + " ...")
         if (!files.forall(p => p.file.delete)) progress.echo(name + " FAILED to delete")
@@ -440,11 +405,7 @@
                     yield Sessions.write_heap_digest(path)
 
                 File.write_gzip(store.output_dir + store.log_gz(name),
-                  terminate_lines(
-                    session_sources_stamp(name) ::
-                    input_heaps.map(INPUT_HEAP + _) :::
-                    heap_stamp.toList.map(OUTPUT_HEAP + _) :::
-                    List(LOG_START) ::: process_result.out_lines))
+                  terminate_lines(process_result.out_lines))
 
                 heap_stamp
               }
@@ -459,6 +420,22 @@
 
                 None
               }
+
+            // write database
+            {
+              val database = store.output_dir + store.database(name)
+              database.file.delete
+
+              using(SQLite.open_database(database))(db =>
+                Sessions.Session_Info.write(store, db,
+                  build_log =
+                    Build_Log.Log_File(name, process_result.out_lines).
+                      parse_session_info(name,
+                        command_timings = true, ml_statistics = true, task_statistics = true),
+                  build =
+                    Session_Info(sources_stamp(name), input_heaps, heap_stamp, process_result.rc)))
+            }
+
             loop(pending - name, running - name,
               results + (name -> Result(false, heap_stamp, Some(process_result_tail), job.info)))
             //}}}
@@ -473,15 +450,18 @@
 
                 val (current, heap_stamp) =
                 {
-                  store.find(name) match {
-                    case Some((log_gz, heap_stamp)) =>
-                      read_stamps(log_gz) match {
-                        case Some((sources, input_heaps, output_heaps)) =>
+                  store.find_database_heap(name) match {
+                    case Some((database, heap_stamp)) =>
+                      using(SQLite.open_database(database))(
+                        Sessions.Session_Info.read_build(store, _)) match
+                      {
+                        case Some(build) =>
                           val current =
-                            sources == session_sources_stamp(name) &&
-                            input_heaps == ancestor_heaps.map(INPUT_HEAP + _) &&
-                            output_heaps == heap_stamp.toList.map(OUTPUT_HEAP + _) &&
-                            !(do_output && heap_stamp.isEmpty)
+                            build.sources == sources_stamp(name) &&
+                            build.input_heaps == ancestor_heaps &&
+                            build.output_heap == heap_stamp &&
+                            !(do_output && heap_stamp.isEmpty) &&
+                            build.return_code == 0
                           (current, heap_stamp)
                         case None => (false, None)
                       }