maintain blobs within document state: digest + text in ML, digest-only in Scala;
authorwenzelm
Tue, 19 Nov 2013 19:33:27 +0100
changeset 54519 5fed81762406
parent 54518 81a9a54c57fc
child 54520 cee77d2e9582
maintain blobs within document state: digest + text in ML, digest-only in Scala; resolve files for command span, based on defined blobs; tuned;
src/Pure/Isar/token.ML
src/Pure/PIDE/command.ML
src/Pure/PIDE/command.scala
src/Pure/PIDE/document.ML
src/Pure/PIDE/document.scala
src/Pure/PIDE/protocol.ML
src/Pure/PIDE/protocol.scala
src/Pure/System/session.scala
src/Pure/Thy/thy_load.ML
src/Pure/Thy/thy_load.scala
src/Pure/Thy/thy_syntax.scala
--- a/src/Pure/Isar/token.ML	Tue Nov 19 13:54:02 2013 +0100
+++ b/src/Pure/Isar/token.ML	Tue Nov 19 19:33:27 2013 +0100
@@ -46,7 +46,7 @@
   val content_of: T -> string
   val unparse: T -> string
   val text_of: T -> string * string
-  val get_files: T -> file list option
+  val get_files: T -> file list
   val put_files: file list -> T -> T
   val get_value: T -> value option
   val map_value: (value -> value) -> T -> T
@@ -244,10 +244,11 @@
 
 (* inlined file content *)
 
-fun get_files (Token (_, _, Value (SOME (Files files)))) = SOME files
-  | get_files _ = NONE;
+fun get_files (Token (_, _, Value (SOME (Files files)))) = files
+  | get_files _ = [];
 
-fun put_files files (Token (x, y, Slot)) = Token (x, y, Value (SOME (Files files)))
+fun put_files [] tok = tok
+  | put_files files (Token (x, y, Slot)) = Token (x, y, Value (SOME (Files files)))
   | put_files _ tok =
       raise Fail ("Cannot put inlined files here" ^ Position.here (position_of tok));
 
--- a/src/Pure/PIDE/command.ML	Tue Nov 19 13:54:02 2013 +0100
+++ b/src/Pure/PIDE/command.ML	Tue Nov 19 19:33:27 2013 +0100
@@ -6,13 +6,14 @@
 
 signature COMMAND =
 sig
-  val read: (unit -> theory) -> Token.T list -> Toplevel.transition
+  type blob = (string * string) Exn.result
+  val read: (unit -> theory) -> blob list -> Token.T list -> Toplevel.transition
   type eval
   val eval_eq: eval * eval -> bool
   val eval_running: eval -> bool
   val eval_finished: eval -> bool
   val eval_result_state: eval -> Toplevel.state
-  val eval: (unit -> theory) -> Token.T list -> eval -> eval
+  val eval: (unit -> theory) -> blob list -> Token.T list -> eval -> eval
   type print
   val print: bool -> (string * string list) list -> string ->
     eval -> print list -> print list option
@@ -82,7 +83,19 @@
 
 (* read *)
 
-fun read init span =
+type blob = (string * string) Exn.result;  (*file node name, digest or text*)
+
+fun resolve_files blobs toks =
+  (case Thy_Syntax.parse_spans toks of
+    [span] => span
+      |> Thy_Syntax.resolve_files (fn _ => fn (path, pos) =>
+        blobs |> map (Exn.release #> (fn (file, text) =>
+          let val _ = Position.report pos (Markup.path file);
+          in {src_path = path (* FIXME *), text = text, pos = Position.file file} end)))
+      |> Thy_Syntax.span_content
+  | _ => toks);
+
+fun read init blobs span =
   let
     val outer_syntax = #2 (Outer_Syntax.get_syntax ());
     val command_reports = Outer_Syntax.command_reports outer_syntax;
@@ -99,7 +112,7 @@
   in
     if is_malformed then Toplevel.malformed pos "Malformed command syntax"
     else
-      (case Outer_Syntax.read_spans outer_syntax span of
+      (case Outer_Syntax.read_spans outer_syntax (resolve_files blobs span) of
         [tr] =>
           if Keyword.is_control (Toplevel.name_of tr) then
             Toplevel.malformed pos "Illegal control command"
@@ -180,14 +193,14 @@
 
 in
 
-fun eval init span eval0 =
+fun eval init blobs span eval0 =
   let
     val exec_id = Document_ID.make ();
     fun process () =
       let
         val tr =
           Position.setmp_thread_data (Position.id_only (Document_ID.print exec_id))
-            (fn () => read init span |> Toplevel.exec_id exec_id) ();
+            (fn () => read init blobs span |> Toplevel.exec_id exec_id) ();
       in eval_state span tr (eval_result eval0) end;
   in Eval {exec_id = exec_id, eval_process = memo exec_id process} end;
 
--- a/src/Pure/PIDE/command.scala	Tue Nov 19 13:54:02 2013 +0100
+++ b/src/Pure/PIDE/command.scala	Tue Nov 19 19:33:27 2013 +0100
@@ -144,13 +144,13 @@
   def name(span: List[Token]): String =
     span.find(_.is_command) match { case Some(tok) => tok.source case _ => "" }
 
-  type Blobs = List[Exn.Result[(Document.Node.Name, SHA1.Digest)]]
+  type Blob = Exn.Result[(Document.Node.Name, SHA1.Digest)]
 
   def apply(
     id: Document_ID.Command,
     node_name: Document.Node.Name,
+    blobs: List[Blob],
     span: List[Token],
-    blobs: Blobs,
     results: Results = Results.empty,
     markup: Markup_Tree = Markup_Tree.empty): Command =
   {
@@ -169,14 +169,14 @@
       i += n
     }
 
-    new Command(id, node_name, span1.toList, source, blobs, results, markup)
+    new Command(id, node_name, blobs, span1.toList, source, results, markup)
   }
 
   val empty = Command(Document_ID.none, Document.Node.Name.empty, Nil, Nil)
 
   def unparsed(id: Document_ID.Command, source: String, results: Results, markup: Markup_Tree)
       : Command =
-    Command(id, Document.Node.Name.empty, List(Token(Token.Kind.UNPARSED, source)), Nil,
+    Command(id, Document.Node.Name.empty, Nil, List(Token(Token.Kind.UNPARSED, source)),
       results, markup)
 
   def unparsed(source: String): Command =
@@ -215,9 +215,9 @@
 final class Command private(
     val id: Document_ID.Command,
     val node_name: Document.Node.Name,
+    val blobs: List[Command.Blob],
     val span: List[Token],
     val source: String,
-    val blobs: Command.Blobs,
     val init_results: Command.Results,
     val init_markup: Markup_Tree)
 {
@@ -237,6 +237,12 @@
     id + "/" + (if (is_command) name else if (is_ignored) "IGNORED" else "MALFORMED")
 
 
+  /* blobs */
+
+  def blobs_digests: List[SHA1.Digest] =
+    for (Exn.Res((_, digest)) <- blobs) yield digest
+
+
   /* source */
 
   def length: Int = source.length
--- a/src/Pure/PIDE/document.ML	Tue Nov 19 13:54:02 2013 +0100
+++ b/src/Pure/PIDE/document.ML	Tue Nov 19 19:33:27 2013 +0100
@@ -18,7 +18,9 @@
   type edit = string * node_edit
   type state
   val init_state: state
-  val define_command: Document_ID.command -> string -> string -> state -> state
+  val define_blob: string -> string -> state -> state
+  val define_command: Document_ID.command -> string -> Command.blob list -> string ->
+    state -> state
   val remove_versions: Document_ID.version list -> state -> state
   val start_execution: state -> state
   val update: Document_ID.version -> Document_ID.version -> edit list -> state ->
@@ -231,29 +233,32 @@
 
 abstype state = State of
  {versions: version Inttab.table,  (*version id -> document content*)
-  commands: (string * Token.T list lazy) Inttab.table,  (*command id -> named command span*)
+  blobs: string Symtab.table,  (*digest -> text*)
+  commands: (string * Command.blob list * Token.T list lazy) Inttab.table,
+    (*command id -> name, inlined files, command span*)
   execution: execution}  (*current execution process*)
 with
 
-fun make_state (versions, commands, execution) =
-  State {versions = versions, commands = commands, execution = execution};
+fun make_state (versions, blobs, commands, execution) =
+  State {versions = versions, blobs = blobs, commands = commands, execution = execution};
 
-fun map_state f (State {versions, commands, execution}) =
-  make_state (f (versions, commands, execution));
+fun map_state f (State {versions, blobs, commands, execution}) =
+  make_state (f (versions, blobs, commands, execution));
 
 val init_state =
-  make_state (Inttab.make [(Document_ID.none, empty_version)], Inttab.empty, no_execution);
+  make_state (Inttab.make [(Document_ID.none, empty_version)],
+    Symtab.empty, Inttab.empty, no_execution);
 
 
 (* document versions *)
 
 fun define_version version_id version =
-  map_state (fn (versions, commands, {delay_request, frontier, ...}) =>
+  map_state (fn (versions, blobs, commands, {delay_request, frontier, ...}) =>
     let
       val versions' = Inttab.update_new (version_id, version) versions
         handle Inttab.DUP dup => err_dup "document version" dup;
       val execution' = new_execution version_id delay_request frontier;
-    in (versions', commands, execution') end);
+    in (versions', blobs, commands, execution') end);
 
 fun the_version (State {versions, ...}) version_id =
   (case Inttab.lookup versions version_id of
@@ -265,10 +270,23 @@
     handle Inttab.UNDEF _ => err_undef "document version" version_id;
 
 
+(* inlined files *)
+
+fun define_blob digest text =
+  map_state (fn (versions, blobs, commands, execution) =>
+    let val blobs' = Symtab.update (digest, text) blobs
+    in (versions, blobs', commands, execution) end);
+
+fun the_blob (State {blobs, ...}) digest =
+  (case Symtab.lookup blobs digest of
+    NONE => error ("Undefined blob: " ^ digest)
+  | SOME text => text);
+
+
 (* commands *)
 
-fun define_command command_id name text =
-  map_state (fn (versions, commands, execution) =>
+fun define_command command_id name command_blobs text =
+  map_state (fn (versions, blobs, commands, execution) =>
     let
       val id = Document_ID.print command_id;
       val span =
@@ -279,9 +297,9 @@
         Position.setmp_thread_data (Position.id_only id)
           (fn () => Output.status (Markup.markup_only Markup.accepted)) ();
       val commands' =
-        Inttab.update_new (command_id, (name, span)) commands
+        Inttab.update_new (command_id, (name, command_blobs, span)) commands
           handle Inttab.DUP dup => err_dup "command" dup;
-    in (versions, commands', execution) end);
+    in (versions, blobs, commands', execution) end);
 
 fun the_command (State {commands, ...}) command_id =
   (case Inttab.lookup commands command_id of
@@ -295,7 +313,7 @@
 
 (* remove_versions *)
 
-fun remove_versions version_ids state = state |> map_state (fn (versions, _, execution) =>
+fun remove_versions version_ids state = state |> map_state (fn (versions, _, _, execution) =>
   let
     val _ =
       member (op =) version_ids (#version_id execution) andalso
@@ -308,12 +326,17 @@
           String_Graph.fold (fn (_, (node, _)) => node |>
             iterate_entries (fn ((_, command_id), _) =>
               SOME o Inttab.insert (K true) (command_id, the_command state command_id))));
-  in (versions', commands', execution) end);
+    val blobs' =
+      (commands', Symtab.empty) |->
+        Inttab.fold (fn (_, (_, blobs, _)) => blobs |>
+          fold (fn Exn.Res (_, digest) => Symtab.update (digest, the_blob state digest) | _ => I));
+
+  in (versions', blobs', commands', execution) end);
 
 
 (* document execution *)
 
-fun start_execution state = state |> map_state (fn (versions, commands, execution) =>
+fun start_execution state = state |> map_state (fn (versions, blobs, commands, execution) =>
   timeit "Document.start_execution" (fn () =>
     let
       val {version_id, execution_id, delay_request, frontier} = execution;
@@ -350,7 +373,7 @@
       val execution' =
         {version_id = version_id, execution_id = execution_id,
          delay_request = delay_request', frontier = frontier'};
-    in (versions, commands, execution') end));
+    in (versions, blobs, commands, execution') end));
 
 
 
@@ -476,9 +499,12 @@
 
       val command_visible = visible_command node command_id';
       val command_overlays = overlays node command_id';
-      val (command_name, span) = the_command state command_id' ||> Lazy.force;
+      val (command_name, blobs0, span0) = the_command state command_id';
+      val blobs = map (Exn.map_result (apsnd (the_blob state))) blobs0;
+      val span = Lazy.force span0;
 
-      val eval' = Command.eval (fn () => the_default illegal_init init span) span eval;
+      val eval' =
+        Command.eval (fn () => the_default illegal_init init span) blobs span eval;
       val prints' = perhaps (Command.print command_visible command_overlays command_name eval') [];
       val exec' = (eval', prints');
 
--- a/src/Pure/PIDE/document.scala	Tue Nov 19 13:54:02 2013 +0100
+++ b/src/Pure/PIDE/document.scala	Tue Nov 19 19:33:27 2013 +0100
@@ -397,6 +397,7 @@
 
   final case class State private(
     val versions: Map[Document_ID.Version, Version] = Map.empty,
+    val blobs: Set[SHA1.Digest] = Set.empty,   // inlined files
     val commands: Map[Document_ID.Command, Command.State] = Map.empty,  // static markup from define_command
     val execs: Map[Document_ID.Exec, Command.State] = Map.empty,  // dynamic markup from execution
     val assignments: Map[Document_ID.Version, State.Assignment] = Map.empty,
@@ -411,6 +412,9 @@
         assignments = assignments + (id -> assignment.unfinished))
     }
 
+    def define_blob(digest: SHA1.Digest): State = copy(blobs = blobs + digest)
+    def defined_blob(digest: SHA1.Digest): Boolean = blobs.contains(digest)
+
     def define_command(command: Command): State =
     {
       val id = command.id
@@ -514,6 +518,7 @@
     {
       val versions1 = versions -- removed
       val assignments1 = assignments -- removed
+      var blobs1 = Set.empty[SHA1.Digest]
       var commands1 = Map.empty[Document_ID.Command, Command.State]
       var execs1 = Map.empty[Document_ID.Exec, Command.State]
       for {
@@ -522,14 +527,19 @@
         (_, node) <- version.nodes.entries
         command <- node.commands.iterator
       } {
+        for (digest <- command.blobs_digests; if !blobs1.contains(digest))
+          blobs1 += digest
+
         if (!commands1.isDefinedAt(command.id))
           commands.get(command.id).foreach(st => commands1 += (command.id -> st))
+
         for (exec_id <- command_execs.getOrElse(command.id, Nil)) {
           if (!execs1.isDefinedAt(exec_id))
             execs.get(exec_id).foreach(st => execs1 += (exec_id -> st))
         }
       }
-      copy(versions = versions1, commands = commands1, execs = execs1, assignments = assignments1)
+      copy(versions = versions1, blobs = blobs1, commands = commands1, execs = execs1,
+        assignments = assignments1)
     }
 
     def command_state(version: Version, command: Command): Command.State =
--- a/src/Pure/PIDE/protocol.ML	Tue Nov 19 13:54:02 2013 +0100
+++ b/src/Pure/PIDE/protocol.ML	Tue Nov 19 19:33:27 2013 +0100
@@ -23,9 +23,23 @@
       end);
 
 val _ =
+  Isabelle_Process.protocol_command "Document.define_blob"
+    (fn [digest, content] => Document.change_state (Document.define_blob digest content));
+
+val _ =
   Isabelle_Process.protocol_command "Document.define_command"
-    (fn [id, name, text] =>
-      Document.change_state (Document.define_command (Document_ID.parse id) name text));
+    (fn [id, name, blobs_yxml, text] =>
+      let
+        val blobs =
+          YXML.parse_body blobs_yxml |>
+            let open XML.Decode in
+              list (variant
+               [fn ([a, b], []) => Exn.Res (a, b),
+                fn ([a], []) => Exn.Exn (ERROR a)])
+            end;
+      in
+        Document.change_state (Document.define_command (Document_ID.parse id) name blobs text)
+      end);
 
 val _ =
   Isabelle_Process.protocol_command "Document.discontinue_execution"
--- a/src/Pure/PIDE/protocol.scala	Tue Nov 19 13:54:02 2013 +0100
+++ b/src/Pure/PIDE/protocol.scala	Tue Nov 19 19:33:27 2013 +0100
@@ -308,11 +308,27 @@
 
 trait Protocol extends Isabelle_Process
 {
+  /* inlined files */
+
+  def define_blob(blob: Bytes): Unit =
+    protocol_command_raw("Document.define_blob", Bytes(blob.sha1_digest.toString), blob)
+
+
   /* commands */
 
   def define_command(command: Command): Unit =
+  {
+    val blobs_yxml =
+    { import XML.Encode._
+      val encode_blob: T[Command.Blob] =
+        variant(List(
+          { case Exn.Res((a, b)) => (List(a.node, b.toString), Nil) },
+          { case Exn.Exn(e) => (List(Exn.message(e)), Nil) }))
+      YXML.string_of_body(list(encode_blob)(command.blobs))
+    }
     protocol_command("Document.define_command",
-      Document_ID(command.id), encode(command.name), encode(command.source))
+      Document_ID(command.id), encode(command.name), blobs_yxml, encode(command.source))
+  }
 
 
   /* execution */
--- a/src/Pure/System/session.scala	Tue Nov 19 13:54:02 2013 +0100
+++ b/src/Pure/System/session.scala	Tue Nov 19 19:33:27 2013 +0100
@@ -179,12 +179,12 @@
 
         case Text_Edits(previous, text_edits, version_result) =>
           val prev = previous.get_finished
-          val (doc_edits, version) =
+          val (all_blobs, doc_edits, version) =
             Timing.timeit("Thy_Load.text_edits", timing) {
               thy_load.text_edits(reparse_limit, prev, text_edits)
             }
           version_result.fulfill(version)
-          sender ! Change(doc_edits, prev, version)
+          sender ! Change(all_blobs, doc_edits, prev, version)
 
         case bad => System.err.println("change_parser: ignoring bad message " + bad)
       }
@@ -250,6 +250,7 @@
   private case class Start(args: List[String])
   private case class Cancel_Exec(exec_id: Document_ID.Exec)
   private case class Change(
+    all_blobs: Map[Document.Node.Name, Bytes],
     doc_edits: List[Document.Edit_Command],
     previous: Document.Version,
     version: Document.Version)
@@ -374,6 +375,18 @@
 
       def id_command(command: Command)
       {
+        for {
+          digest <- command.blobs_digests
+          if !global_state().defined_blob(digest)
+        } {
+          change.all_blobs.collectFirst({ case (_, b) if b.sha1_digest == digest => b }) match {
+            case Some(blob) =>
+              global_state >> (_.define_blob(digest))
+              prover.get.define_blob(blob)
+            case None => System.err.println("Missing blob for SHA1 digest " + digest)
+          }
+        }
+
         if (!global_state().defined_command(command.id)) {
           global_state >> (_.define_command(command))
           prover.get.define_command(command)
--- a/src/Pure/Thy/thy_load.ML	Tue Nov 19 13:54:02 2013 +0100
+++ b/src/Pure/Thy/thy_load.ML	Tue Nov 19 19:33:27 2013 +0100
@@ -9,7 +9,6 @@
   val master_directory: theory -> Path.T
   val imports_of: theory -> (string * Position.T) list
   val thy_path: Path.T -> Path.T
-  val read_files: Path.T -> string -> Path.T * Position.T -> Token.file list
   val parse_files: string -> (theory -> Token.file list) parser
   val check_thy: Path.T -> string ->
    {master: Path.T * SHA1.digest, text: string, theory_pos: Position.T,
@@ -76,8 +75,8 @@
 fun parse_files cmd =
   Scan.ahead Parse.not_eof -- Parse.path >> (fn (tok, name) => fn thy =>
     (case Token.get_files tok of
-      SOME files => files
-    | NONE => read_files (master_directory thy) cmd (Path.explode name, Token.position_of tok)));
+      [] => read_files (master_directory thy) cmd (Path.explode name, Token.position_of tok)
+    | files => files));
 
 
 (* theory files *)
@@ -146,7 +145,7 @@
   let
     fun prepare_span span =
       Thy_Syntax.span_content span
-      |> Command.read init
+      |> Command.read init []
       |> (fn tr => Toplevel.put_timing (last_timing tr) tr);
 
     fun element_result span_elem (st, _) =
--- a/src/Pure/Thy/thy_load.scala	Tue Nov 19 13:54:02 2013 +0100
+++ b/src/Pure/Thy/thy_load.scala	Tue Nov 19 19:33:27 2013 +0100
@@ -100,7 +100,7 @@
   /* theory text edits */
 
   def text_edits(reparse_limit: Int, previous: Document.Version, edits: List[Document.Edit_Text])
-      : (List[Document.Edit_Command], Document.Version) =
+      : (Map[Document.Node.Name, Bytes], List[Document.Edit_Command], Document.Version) =
     Thy_Syntax.text_edits(this, reparse_limit, previous, edits)
 }
 
--- a/src/Pure/Thy/thy_syntax.scala	Tue Nov 19 13:54:02 2013 +0100
+++ b/src/Pure/Thy/thy_syntax.scala	Tue Nov 19 19:33:27 2013 +0100
@@ -68,7 +68,7 @@
       /* result structure */
 
       val spans = parse_spans(syntax.scan(text))
-      spans.foreach(span => add(Command(Document_ID.none, node_name, span, Nil)))
+      spans.foreach(span => add(Command(Document_ID.none, node_name, Nil, span)))
       result()
     }
   }
@@ -257,7 +257,7 @@
       node_name: Document.Node.Name,
       span: List[Token],
       all_blobs: Map[Document.Node.Name, Bytes])
-    : Command.Blobs =
+    : List[Command.Blob] =
   {
     span_files(syntax, span).map(file =>
       Exn.capture {
@@ -265,7 +265,7 @@
           Document.Node.Name(thy_load.append(node_name.master_dir, Path.explode(file)))
         all_blobs.get(name) match {
           case Some(blob) => (name, blob.sha1_digest)
-          case None => error("Bad file: " + quote(name.toString))
+          case None => error("No such file: " + quote(name.toString))
         }
       }
     )
@@ -275,10 +275,10 @@
   /* reparse range of command spans */
 
   @tailrec private def chop_common(
-      cmds: List[Command], spans: List[(List[Token], Command.Blobs)])
-      : (List[Command], List[(List[Token], Command.Blobs)]) =
+      cmds: List[Command], spans: List[(List[Command.Blob], List[Token])])
+      : (List[Command], List[(List[Command.Blob], List[Token])]) =
     (cmds, spans) match {
-      case (c :: cs, (span, blobs) :: ps) if c.span == span && c.blobs == blobs =>
+      case (c :: cs, (blobs, span) :: ps) if c.blobs == blobs && c.span == span =>
         chop_common(cs, ps)
       case _ => (cmds, spans)
     }
@@ -294,7 +294,7 @@
     val cmds0 = commands.iterator(first, last).toList
     val spans0 =
       parse_spans(syntax.scan(cmds0.iterator.map(_.source).mkString)).
-        map(span => (span, resolve_files(thy_load, syntax, name, span, all_blobs)))
+        map(span => (resolve_files(thy_load, syntax, name, span, all_blobs), span))
 
     val (cmds1, spans1) = chop_common(cmds0, spans0)
 
@@ -309,7 +309,7 @@
       case cmd :: _ =>
         val hook = commands.prev(cmd)
         val inserted =
-          spans2.map({ case (span, blobs) => Command(Document_ID.make(), name, span, blobs) })
+          spans2.map({ case (blobs, span) => Command(Document_ID.make(), name, blobs, span) })
         (commands /: cmds2)(_ - _).append_after(hook, inserted)
     }
   }
@@ -428,7 +428,7 @@
       reparse_limit: Int,
       previous: Document.Version,
       edits: List[Document.Edit_Text])
-    : (List[Document.Edit_Command], Document.Version) =
+    : (Map[Document.Node.Name, Bytes], List[Document.Edit_Command], Document.Version) =
   {
     val (syntax, reparse0, nodes0, doc_edits0) =
       header_edits(thy_load.base_syntax, previous, edits)
@@ -477,6 +477,6 @@
         nodes += (name -> node2)
     }
 
-    (doc_edits.toList, Document.Version.make(syntax, nodes))
+    (all_blobs, doc_edits.toList, Document.Version.make(syntax, nodes))
   }
 }