store blobs / inlined files as separate text lines: smaller values are more healthy for the Poly/ML RTS and allow implicit sharing;
authorwenzelm
Thu, 27 Feb 2014 17:29:58 +0100
changeset 55788 67699e08e969
parent 55787 41a73a41f6c8
child 55789 8d4d339177dc
store blobs / inlined files as separate text lines: smaller values are more healthy for the Poly/ML RTS and allow implicit sharing; integrity check of SHA1 digests produced in Scala vs. ML; tuned signature;
src/HOL/SMT_Examples/boogie.ML
src/HOL/SPARK/Tools/spark_commands.ML
src/Pure/Isar/token.ML
src/Pure/PIDE/command.ML
src/Pure/PIDE/document.ML
src/Pure/Thy/thy_load.ML
src/Pure/pure_syn.ML
--- a/src/HOL/SMT_Examples/boogie.ML	Thu Feb 27 17:24:46 2014 +0100
+++ b/src/HOL/SMT_Examples/boogie.ML	Thu Feb 27 17:29:58 2014 +0100
@@ -6,7 +6,7 @@
 
 signature BOOGIE =
 sig
-  val boogie_prove: theory -> string -> unit
+  val boogie_prove: theory -> string list -> unit
 end
 
 structure Boogie: BOOGIE =
@@ -252,15 +252,13 @@
 
 
 
-(* splitting of text into a lists of lists of tokens *)
+(* splitting of text lines into a lists of tokens *)
 
 fun is_blank c = (c = " " orelse c = "\t" orelse c = "\r" orelse c = "\n")
 
-fun explode_lines text =
-  text
-  |> split_lines
-  |> map (String.tokens (is_blank o str))
-  |> filter (fn [] => false | _ => true)
+val token_lines =
+  map (String.tokens (is_blank o str))
+  #> filter (fn [] => false | _ => true)
 
 
 
@@ -299,13 +297,11 @@
   ALLGOALS (SMT_Solver.smt_tac ctxt (boogie_rules @ axioms))
 
 
-fun boogie_prove thy text =
+fun boogie_prove thy lines =
   let
-    val lines = explode_lines text
-
     val ((axioms, vc), ctxt) =
       empty_context
-      |> parse_lines lines
+      |> parse_lines (token_lines lines)
       |> add_unique_axioms
       |> build_proof_context thy
 
@@ -324,8 +320,8 @@
     (Thy_Load.provide_parse_files "boogie_file" >> (fn files =>
       Toplevel.theory (fn thy =>
         let
-          val ([{text, ...}], thy') = files thy;
-          val _ = boogie_prove thy' text;
+          val ([{lines, ...}], thy') = files thy;
+          val _ = boogie_prove thy' lines;
         in thy' end)))
 
 end
--- a/src/HOL/SPARK/Tools/spark_commands.ML	Thu Feb 27 17:24:46 2014 +0100
+++ b/src/HOL/SPARK/Tools/spark_commands.ML	Thu Feb 27 17:29:58 2014 +0100
@@ -15,15 +15,15 @@
 
 fun spark_open header (prfx, files) thy =
   let
-    val ([{src_path, text = vc_text, pos = vc_pos, ...}: Token.file,
-      {text = fdl_text, pos = fdl_pos, ...},
-      {text = rls_text, pos = rls_pos, ...}], thy') = files thy;
+    val ([{src_path, lines = vc_lines, pos = vc_pos, ...}: Token.file,
+      {lines = fdl_lines, pos = fdl_pos, ...},
+      {lines = rls_lines, pos = rls_pos, ...}], thy') = files thy;
     val base = fst (Path.split_ext (File.full_path (Thy_Load.master_directory thy') src_path));
   in
     SPARK_VCs.set_vcs
-      (snd (Fdl_Parser.parse_declarations fdl_pos fdl_text))
-      (Fdl_Parser.parse_rules rls_pos rls_text)
-      (snd (Fdl_Parser.parse_vcs header vc_pos vc_text))
+      (snd (Fdl_Parser.parse_declarations fdl_pos (cat_lines fdl_lines)))
+      (Fdl_Parser.parse_rules rls_pos (cat_lines rls_lines))
+      (snd (Fdl_Parser.parse_vcs header vc_pos (cat_lines vc_lines)))
       base prfx thy'
   end;
 
--- a/src/Pure/Isar/token.ML	Thu Feb 27 17:24:46 2014 +0100
+++ b/src/Pure/Isar/token.ML	Thu Feb 27 17:29:58 2014 +0100
@@ -10,7 +10,7 @@
     Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar |
     Nat | Float | String | AltString | Verbatim | Cartouche | Space | Comment | InternalValue |
     Error of string | Sync | EOF
-  type file = {src_path: Path.T, text: string, pos: Position.T}
+  type file = {src_path: Path.T, lines: string list, digest: SHA1.digest, pos: Position.T}
   datatype value =
     Text of string | Typ of typ | Term of term | Fact of thm list |
     Attribute of morphism -> attribute | Files of file Exn.result list
@@ -80,7 +80,7 @@
   args.ML).  Note that an assignable ref designates an intermediate
   state of internalization -- it is NOT meant to persist.*)
 
-type file = {src_path: Path.T, text: string, pos: Position.T};
+type file = {src_path: Path.T, lines: string list, digest: SHA1.digest, pos: Position.T};
 
 datatype value =
   Text of string |
--- a/src/Pure/PIDE/command.ML	Thu Feb 27 17:24:46 2014 +0100
+++ b/src/Pure/PIDE/command.ML	Thu Feb 27 17:29:58 2014 +0100
@@ -6,15 +6,17 @@
 
 signature COMMAND =
 sig
-  type blob = (string * string option) Exn.result
+  type 'a blob = (string * 'a option) Exn.result
+  type blob_digest = string blob
+  type content = SHA1.digest * string list
   val read_file: Path.T -> Position.T -> Path.T -> Token.file
-  val read: (unit -> theory) -> Path.T -> blob list -> Token.T list -> Toplevel.transition
+  val read: (unit -> theory) -> Path.T -> content blob list -> Token.T list -> Toplevel.transition
   type eval
   val eval_eq: eval * eval -> bool
   val eval_running: eval -> bool
   val eval_finished: eval -> bool
   val eval_result_state: eval -> Toplevel.state
-  val eval: (unit -> theory) -> Path.T -> blob list -> Token.T list -> eval -> eval
+  val eval: (unit -> theory) -> Path.T -> content blob list -> Token.T list -> eval -> eval
   type print
   val print: bool -> (string * string list) list -> string ->
     eval -> print list -> print list option
@@ -86,13 +88,29 @@
 
 (* read *)
 
-type blob = (string * string option) Exn.result;  (*file node name, digest or text*)
+type 'a blob = (string * 'a option) Exn.result;  (*file node name, content*)
+type blob_digest = string blob;  (*raw digest*)
+type content = SHA1.digest * string list;  (*digest, lines*)
 
 fun read_file master_dir pos src_path =
   let
     val full_path = File.check_file (File.full_path master_dir src_path);
     val _ = Position.report pos (Markup.path (Path.implode full_path));
-  in {src_path = src_path, text = File.read full_path, pos = Path.position full_path} end;
+    val text = File.read full_path;
+    val lines = split_lines text;
+    val digest = SHA1.digest text;
+  in {src_path = src_path, lines = lines, digest = digest, pos = Path.position full_path} end;
+
+local
+
+fun blob_file src_path file (digest, lines) =
+  let
+    val file_pos =
+      Position.file file (*sic!*) |>
+      (case Position.get_id (Position.thread_data ()) of
+        NONE => I
+      | SOME exec_id => Position.put_id exec_id);
+  in {src_path = src_path, lines = lines, digest = digest, pos = file_pos} end
 
 fun resolve_files master_dir blobs toks =
   (case Thy_Syntax.parse_spans toks of
@@ -101,17 +119,10 @@
         let
           fun make_file src_path (Exn.Res (_, NONE)) =
                 Exn.interruptible_capture (fn () => read_file master_dir pos src_path) ()
-            | make_file src_path (Exn.Res (file, SOME text)) =
-                let
-                  val _ = Position.report pos (Markup.path file);
-                  val file_pos =
-                    Position.file file (*sic!*) |>
-                    (case Position.get_id (Position.thread_data ()) of
-                      NONE => I
-                    | SOME exec_id => Position.put_id exec_id);
-                in Exn.Res {src_path = src_path, text = text, pos = file_pos} end
+            | make_file src_path (Exn.Res (file, SOME content)) =
+               (Position.report pos (Markup.path file);
+                Exn.Res (blob_file src_path file content))
             | make_file _ (Exn.Exn e) = Exn.Exn e;
-
           val src_paths = Keyword.command_files cmd path;
         in
           if null blobs then
@@ -123,6 +134,8 @@
       |> Thy_Syntax.span_content
   | _ => toks);
 
+in
+
 fun read init master_dir blobs span =
   let
     val outer_syntax = #2 (Outer_Syntax.get_syntax ());
@@ -149,6 +162,8 @@
       handle ERROR msg => Toplevel.malformed (#1 proper_range) msg
   end;
 
+end;
+
 
 (* eval *)
 
--- a/src/Pure/PIDE/document.ML	Thu Feb 27 17:24:46 2014 +0100
+++ b/src/Pure/PIDE/document.ML	Thu Feb 27 17:29:58 2014 +0100
@@ -18,7 +18,7 @@
   type state
   val init_state: state
   val define_blob: string -> string -> state -> state
-  val define_command: Document_ID.command -> string -> Command.blob list -> string ->
+  val define_command: Document_ID.command -> string -> Command.blob_digest list -> string ->
     state -> state
   val remove_versions: Document_ID.version list -> state -> state
   val start_execution: state -> state
@@ -234,8 +234,8 @@
 
 abstype state = State of
  {versions: version Inttab.table,  (*version id -> document content*)
-  blobs: string Symtab.table,  (*digest -> text*)
-  commands: (string * Command.blob list * Token.T list lazy) Inttab.table,
+  blobs: (SHA1.digest * string list) Symtab.table,  (*digest -> digest, lines*)
+  commands: (string * Command.blob_digest list * Token.T list lazy) Inttab.table,
     (*command id -> name, inlined files, command span*)
   execution: execution}  (*current execution process*)
 with
@@ -275,13 +275,18 @@
 
 fun define_blob digest text =
   map_state (fn (versions, blobs, commands, execution) =>
-    let val blobs' = Symtab.update (digest, text) blobs
+    let
+      val sha1_digest = SHA1.digest text;
+      val _ =
+        digest = SHA1.rep sha1_digest orelse
+          error ("Ill-defined blob: bad SHA1 digest " ^ digest ^ " vs. " ^ SHA1.rep sha1_digest);
+      val blobs' = Symtab.update (digest, (sha1_digest, split_lines text)) blobs;
     in (versions, blobs', commands, execution) end);
 
 fun the_blob (State {blobs, ...}) digest =
   (case Symtab.lookup blobs digest of
     NONE => error ("Undefined blob: " ^ digest)
-  | SOME text => text);
+  | SOME content => content);
 
 
 (* commands *)
@@ -496,8 +501,8 @@
 
       val command_visible = visible_command node command_id';
       val command_overlays = overlays node command_id';
-      val (command_name, blobs0, span0) = the_command state command_id';
-      val blobs = (map o Exn.map_result o apsnd o Option.map) (the_blob state) blobs0;
+      val (command_name, blob_digests, span0) = the_command state command_id';
+      val blobs = (map o Exn.map_result o apsnd o Option.map) (the_blob state) blob_digests;
       val span = Lazy.force span0;
 
       val eval' =
--- a/src/Pure/Thy/thy_load.ML	Thu Feb 27 17:24:46 2014 +0100
+++ b/src/Pure/Thy/thy_load.ML	Thu Feb 27 17:29:58 2014 +0100
@@ -100,7 +100,7 @@
   parse_files cmd >> (fn files => fn thy =>
     let
       val fs = files thy;
-      val thy' = fold (fn {src_path, text, ...} => provide (src_path, SHA1.digest text)) fs thy;
+      val thy' = fold (fn {src_path, digest, ...} => provide (src_path, digest)) fs thy;
     in (fs, thy') end);
 
 fun load_file thy src_path =
--- a/src/Pure/pure_syn.ML	Thu Feb 27 17:24:46 2014 +0100
+++ b/src/Pure/pure_syn.ML	Thu Feb 27 17:29:58 2014 +0100
@@ -22,11 +22,11 @@
     "ML text from file"
     (Thy_Load.parse_files "ML_file" >> (fn files => Toplevel.generic_theory (fn gthy =>
         let
-          val [{src_path, text, pos}] = files (Context.theory_of gthy);
-          val provide = Thy_Load.provide (src_path, SHA1.digest text);
+          val [{src_path, lines, digest, pos}] = files (Context.theory_of gthy);
+          val provide = Thy_Load.provide (src_path, digest);
         in
           gthy
-          |> ML_Context.exec (fn () => ML_Context.eval_text true pos text)
+          |> ML_Context.exec (fn () => ML_Context.eval_text true pos (cat_lines lines))
           |> Local_Theory.propagate_ml_env
           |> Context.mapping provide (Local_Theory.background_theory provide)
         end)));