store blobs / inlined files as separate text lines: smaller values are more healthy for the Poly/ML RTS and allow implicit sharing;
authorwenzelm
Thu Feb 27 17:29:58 2014 +0100 (2014-02-27)
changeset 5578867699e08e969
parent 55787 41a73a41f6c8
child 55789 8d4d339177dc
store blobs / inlined files as separate text lines: smaller values are more healthy for the Poly/ML RTS and allow implicit sharing;
integrity check of SHA1 digests produced in Scala vs. ML;
tuned signature;
src/HOL/SMT_Examples/boogie.ML
src/HOL/SPARK/Tools/spark_commands.ML
src/Pure/Isar/token.ML
src/Pure/PIDE/command.ML
src/Pure/PIDE/document.ML
src/Pure/Thy/thy_load.ML
src/Pure/pure_syn.ML
     1.1 --- a/src/HOL/SMT_Examples/boogie.ML	Thu Feb 27 17:24:46 2014 +0100
     1.2 +++ b/src/HOL/SMT_Examples/boogie.ML	Thu Feb 27 17:29:58 2014 +0100
     1.3 @@ -6,7 +6,7 @@
     1.4  
     1.5  signature BOOGIE =
     1.6  sig
     1.7 -  val boogie_prove: theory -> string -> unit
     1.8 +  val boogie_prove: theory -> string list -> unit
     1.9  end
    1.10  
    1.11  structure Boogie: BOOGIE =
    1.12 @@ -252,15 +252,13 @@
    1.13  
    1.14  
    1.15  
    1.16 -(* splitting of text into a lists of lists of tokens *)
    1.17 +(* splitting of text lines into a lists of tokens *)
    1.18  
    1.19  fun is_blank c = (c = " " orelse c = "\t" orelse c = "\r" orelse c = "\n")
    1.20  
    1.21 -fun explode_lines text =
    1.22 -  text
    1.23 -  |> split_lines
    1.24 -  |> map (String.tokens (is_blank o str))
    1.25 -  |> filter (fn [] => false | _ => true)
    1.26 +val token_lines =
    1.27 +  map (String.tokens (is_blank o str))
    1.28 +  #> filter (fn [] => false | _ => true)
    1.29  
    1.30  
    1.31  
    1.32 @@ -299,13 +297,11 @@
    1.33    ALLGOALS (SMT_Solver.smt_tac ctxt (boogie_rules @ axioms))
    1.34  
    1.35  
    1.36 -fun boogie_prove thy text =
    1.37 +fun boogie_prove thy lines =
    1.38    let
    1.39 -    val lines = explode_lines text
    1.40 -
    1.41      val ((axioms, vc), ctxt) =
    1.42        empty_context
    1.43 -      |> parse_lines lines
    1.44 +      |> parse_lines (token_lines lines)
    1.45        |> add_unique_axioms
    1.46        |> build_proof_context thy
    1.47  
    1.48 @@ -324,8 +320,8 @@
    1.49      (Thy_Load.provide_parse_files "boogie_file" >> (fn files =>
    1.50        Toplevel.theory (fn thy =>
    1.51          let
    1.52 -          val ([{text, ...}], thy') = files thy;
    1.53 -          val _ = boogie_prove thy' text;
    1.54 +          val ([{lines, ...}], thy') = files thy;
    1.55 +          val _ = boogie_prove thy' lines;
    1.56          in thy' end)))
    1.57  
    1.58  end
     2.1 --- a/src/HOL/SPARK/Tools/spark_commands.ML	Thu Feb 27 17:24:46 2014 +0100
     2.2 +++ b/src/HOL/SPARK/Tools/spark_commands.ML	Thu Feb 27 17:29:58 2014 +0100
     2.3 @@ -15,15 +15,15 @@
     2.4  
     2.5  fun spark_open header (prfx, files) thy =
     2.6    let
     2.7 -    val ([{src_path, text = vc_text, pos = vc_pos, ...}: Token.file,
     2.8 -      {text = fdl_text, pos = fdl_pos, ...},
     2.9 -      {text = rls_text, pos = rls_pos, ...}], thy') = files thy;
    2.10 +    val ([{src_path, lines = vc_lines, pos = vc_pos, ...}: Token.file,
    2.11 +      {lines = fdl_lines, pos = fdl_pos, ...},
    2.12 +      {lines = rls_lines, pos = rls_pos, ...}], thy') = files thy;
    2.13      val base = fst (Path.split_ext (File.full_path (Thy_Load.master_directory thy') src_path));
    2.14    in
    2.15      SPARK_VCs.set_vcs
    2.16 -      (snd (Fdl_Parser.parse_declarations fdl_pos fdl_text))
    2.17 -      (Fdl_Parser.parse_rules rls_pos rls_text)
    2.18 -      (snd (Fdl_Parser.parse_vcs header vc_pos vc_text))
    2.19 +      (snd (Fdl_Parser.parse_declarations fdl_pos (cat_lines fdl_lines)))
    2.20 +      (Fdl_Parser.parse_rules rls_pos (cat_lines rls_lines))
    2.21 +      (snd (Fdl_Parser.parse_vcs header vc_pos (cat_lines vc_lines)))
    2.22        base prfx thy'
    2.23    end;
    2.24  
     3.1 --- a/src/Pure/Isar/token.ML	Thu Feb 27 17:24:46 2014 +0100
     3.2 +++ b/src/Pure/Isar/token.ML	Thu Feb 27 17:29:58 2014 +0100
     3.3 @@ -10,7 +10,7 @@
     3.4      Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar |
     3.5      Nat | Float | String | AltString | Verbatim | Cartouche | Space | Comment | InternalValue |
     3.6      Error of string | Sync | EOF
     3.7 -  type file = {src_path: Path.T, text: string, pos: Position.T}
     3.8 +  type file = {src_path: Path.T, lines: string list, digest: SHA1.digest, pos: Position.T}
     3.9    datatype value =
    3.10      Text of string | Typ of typ | Term of term | Fact of thm list |
    3.11      Attribute of morphism -> attribute | Files of file Exn.result list
    3.12 @@ -80,7 +80,7 @@
    3.13    args.ML).  Note that an assignable ref designates an intermediate
    3.14    state of internalization -- it is NOT meant to persist.*)
    3.15  
    3.16 -type file = {src_path: Path.T, text: string, pos: Position.T};
    3.17 +type file = {src_path: Path.T, lines: string list, digest: SHA1.digest, pos: Position.T};
    3.18  
    3.19  datatype value =
    3.20    Text of string |
     4.1 --- a/src/Pure/PIDE/command.ML	Thu Feb 27 17:24:46 2014 +0100
     4.2 +++ b/src/Pure/PIDE/command.ML	Thu Feb 27 17:29:58 2014 +0100
     4.3 @@ -6,15 +6,17 @@
     4.4  
     4.5  signature COMMAND =
     4.6  sig
     4.7 -  type blob = (string * string option) Exn.result
     4.8 +  type 'a blob = (string * 'a option) Exn.result
     4.9 +  type blob_digest = string blob
    4.10 +  type content = SHA1.digest * string list
    4.11    val read_file: Path.T -> Position.T -> Path.T -> Token.file
    4.12 -  val read: (unit -> theory) -> Path.T -> blob list -> Token.T list -> Toplevel.transition
    4.13 +  val read: (unit -> theory) -> Path.T -> content blob list -> Token.T list -> Toplevel.transition
    4.14    type eval
    4.15    val eval_eq: eval * eval -> bool
    4.16    val eval_running: eval -> bool
    4.17    val eval_finished: eval -> bool
    4.18    val eval_result_state: eval -> Toplevel.state
    4.19 -  val eval: (unit -> theory) -> Path.T -> blob list -> Token.T list -> eval -> eval
    4.20 +  val eval: (unit -> theory) -> Path.T -> content blob list -> Token.T list -> eval -> eval
    4.21    type print
    4.22    val print: bool -> (string * string list) list -> string ->
    4.23      eval -> print list -> print list option
    4.24 @@ -86,13 +88,29 @@
    4.25  
    4.26  (* read *)
    4.27  
    4.28 -type blob = (string * string option) Exn.result;  (*file node name, digest or text*)
    4.29 +type 'a blob = (string * 'a option) Exn.result;  (*file node name, content*)
    4.30 +type blob_digest = string blob;  (*raw digest*)
    4.31 +type content = SHA1.digest * string list;  (*digest, lines*)
    4.32  
    4.33  fun read_file master_dir pos src_path =
    4.34    let
    4.35      val full_path = File.check_file (File.full_path master_dir src_path);
    4.36      val _ = Position.report pos (Markup.path (Path.implode full_path));
    4.37 -  in {src_path = src_path, text = File.read full_path, pos = Path.position full_path} end;
    4.38 +    val text = File.read full_path;
    4.39 +    val lines = split_lines text;
    4.40 +    val digest = SHA1.digest text;
    4.41 +  in {src_path = src_path, lines = lines, digest = digest, pos = Path.position full_path} end;
    4.42 +
    4.43 +local
    4.44 +
    4.45 +fun blob_file src_path file (digest, lines) =
    4.46 +  let
    4.47 +    val file_pos =
    4.48 +      Position.file file (*sic!*) |>
    4.49 +      (case Position.get_id (Position.thread_data ()) of
    4.50 +        NONE => I
    4.51 +      | SOME exec_id => Position.put_id exec_id);
    4.52 +  in {src_path = src_path, lines = lines, digest = digest, pos = file_pos} end
    4.53  
    4.54  fun resolve_files master_dir blobs toks =
    4.55    (case Thy_Syntax.parse_spans toks of
    4.56 @@ -101,17 +119,10 @@
    4.57          let
    4.58            fun make_file src_path (Exn.Res (_, NONE)) =
    4.59                  Exn.interruptible_capture (fn () => read_file master_dir pos src_path) ()
    4.60 -            | make_file src_path (Exn.Res (file, SOME text)) =
    4.61 -                let
    4.62 -                  val _ = Position.report pos (Markup.path file);
    4.63 -                  val file_pos =
    4.64 -                    Position.file file (*sic!*) |>
    4.65 -                    (case Position.get_id (Position.thread_data ()) of
    4.66 -                      NONE => I
    4.67 -                    | SOME exec_id => Position.put_id exec_id);
    4.68 -                in Exn.Res {src_path = src_path, text = text, pos = file_pos} end
    4.69 +            | make_file src_path (Exn.Res (file, SOME content)) =
    4.70 +               (Position.report pos (Markup.path file);
    4.71 +                Exn.Res (blob_file src_path file content))
    4.72              | make_file _ (Exn.Exn e) = Exn.Exn e;
    4.73 -
    4.74            val src_paths = Keyword.command_files cmd path;
    4.75          in
    4.76            if null blobs then
    4.77 @@ -123,6 +134,8 @@
    4.78        |> Thy_Syntax.span_content
    4.79    | _ => toks);
    4.80  
    4.81 +in
    4.82 +
    4.83  fun read init master_dir blobs span =
    4.84    let
    4.85      val outer_syntax = #2 (Outer_Syntax.get_syntax ());
    4.86 @@ -149,6 +162,8 @@
    4.87        handle ERROR msg => Toplevel.malformed (#1 proper_range) msg
    4.88    end;
    4.89  
    4.90 +end;
    4.91 +
    4.92  
    4.93  (* eval *)
    4.94  
     5.1 --- a/src/Pure/PIDE/document.ML	Thu Feb 27 17:24:46 2014 +0100
     5.2 +++ b/src/Pure/PIDE/document.ML	Thu Feb 27 17:29:58 2014 +0100
     5.3 @@ -18,7 +18,7 @@
     5.4    type state
     5.5    val init_state: state
     5.6    val define_blob: string -> string -> state -> state
     5.7 -  val define_command: Document_ID.command -> string -> Command.blob list -> string ->
     5.8 +  val define_command: Document_ID.command -> string -> Command.blob_digest list -> string ->
     5.9      state -> state
    5.10    val remove_versions: Document_ID.version list -> state -> state
    5.11    val start_execution: state -> state
    5.12 @@ -234,8 +234,8 @@
    5.13  
    5.14  abstype state = State of
    5.15   {versions: version Inttab.table,  (*version id -> document content*)
    5.16 -  blobs: string Symtab.table,  (*digest -> text*)
    5.17 -  commands: (string * Command.blob list * Token.T list lazy) Inttab.table,
    5.18 +  blobs: (SHA1.digest * string list) Symtab.table,  (*digest -> digest, lines*)
    5.19 +  commands: (string * Command.blob_digest list * Token.T list lazy) Inttab.table,
    5.20      (*command id -> name, inlined files, command span*)
    5.21    execution: execution}  (*current execution process*)
    5.22  with
    5.23 @@ -275,13 +275,18 @@
    5.24  
    5.25  fun define_blob digest text =
    5.26    map_state (fn (versions, blobs, commands, execution) =>
    5.27 -    let val blobs' = Symtab.update (digest, text) blobs
    5.28 +    let
    5.29 +      val sha1_digest = SHA1.digest text;
    5.30 +      val _ =
    5.31 +        digest = SHA1.rep sha1_digest orelse
    5.32 +          error ("Ill-defined blob: bad SHA1 digest " ^ digest ^ " vs. " ^ SHA1.rep sha1_digest);
    5.33 +      val blobs' = Symtab.update (digest, (sha1_digest, split_lines text)) blobs;
    5.34      in (versions, blobs', commands, execution) end);
    5.35  
    5.36  fun the_blob (State {blobs, ...}) digest =
    5.37    (case Symtab.lookup blobs digest of
    5.38      NONE => error ("Undefined blob: " ^ digest)
    5.39 -  | SOME text => text);
    5.40 +  | SOME content => content);
    5.41  
    5.42  
    5.43  (* commands *)
    5.44 @@ -496,8 +501,8 @@
    5.45  
    5.46        val command_visible = visible_command node command_id';
    5.47        val command_overlays = overlays node command_id';
    5.48 -      val (command_name, blobs0, span0) = the_command state command_id';
    5.49 -      val blobs = (map o Exn.map_result o apsnd o Option.map) (the_blob state) blobs0;
    5.50 +      val (command_name, blob_digests, span0) = the_command state command_id';
    5.51 +      val blobs = (map o Exn.map_result o apsnd o Option.map) (the_blob state) blob_digests;
    5.52        val span = Lazy.force span0;
    5.53  
    5.54        val eval' =
     6.1 --- a/src/Pure/Thy/thy_load.ML	Thu Feb 27 17:24:46 2014 +0100
     6.2 +++ b/src/Pure/Thy/thy_load.ML	Thu Feb 27 17:29:58 2014 +0100
     6.3 @@ -100,7 +100,7 @@
     6.4    parse_files cmd >> (fn files => fn thy =>
     6.5      let
     6.6        val fs = files thy;
     6.7 -      val thy' = fold (fn {src_path, text, ...} => provide (src_path, SHA1.digest text)) fs thy;
     6.8 +      val thy' = fold (fn {src_path, digest, ...} => provide (src_path, digest)) fs thy;
     6.9      in (fs, thy') end);
    6.10  
    6.11  fun load_file thy src_path =
     7.1 --- a/src/Pure/pure_syn.ML	Thu Feb 27 17:24:46 2014 +0100
     7.2 +++ b/src/Pure/pure_syn.ML	Thu Feb 27 17:29:58 2014 +0100
     7.3 @@ -22,11 +22,11 @@
     7.4      "ML text from file"
     7.5      (Thy_Load.parse_files "ML_file" >> (fn files => Toplevel.generic_theory (fn gthy =>
     7.6          let
     7.7 -          val [{src_path, text, pos}] = files (Context.theory_of gthy);
     7.8 -          val provide = Thy_Load.provide (src_path, SHA1.digest text);
     7.9 +          val [{src_path, lines, digest, pos}] = files (Context.theory_of gthy);
    7.10 +          val provide = Thy_Load.provide (src_path, digest);
    7.11          in
    7.12            gthy
    7.13 -          |> ML_Context.exec (fn () => ML_Context.eval_text true pos text)
    7.14 +          |> ML_Context.exec (fn () => ML_Context.eval_text true pos (cat_lines lines))
    7.15            |> Local_Theory.propagate_ml_env
    7.16            |> Context.mapping provide (Local_Theory.background_theory provide)
    7.17          end)));