merged
authorwenzelm
Sun, 01 Jun 2025 20:01:22 +0200
changeset 82682 06aac7eaec29
parent 82675 0a3d61228714 (current diff)
parent 82681 b77163d7e847 (diff)
child 82683 71304514891e
merged
NEWS
--- a/NEWS	Sun Jun 01 10:29:45 2025 +0200
+++ b/NEWS	Sun Jun 01 20:01:22 2025 +0200
@@ -263,6 +263,12 @@
 
 *** System ***
 
+* System option "record_theories" tells "isabelle build" to record
+intermediate theory commands and results, at the cost of approx. 5 times
+larger ML heap images. This allows to retrieve fine-grained semantic
+information later on, using Thy_Info.get_theory_segments or
+Thy_Info.get_theory_elements in Isabelle/ML.
+
 * The Z Garbage Collector (ZGC) of Java 21 is now used by default (see
 also https://wiki.openjdk.org/display/zgc). This should work uniformly
 on all platforms, but requires a reasonably new version of Windows
--- a/etc/options	Sun Jun 01 10:29:45 2025 +0200
+++ b/etc/options	Sun Jun 01 20:01:22 2025 +0200
@@ -121,7 +121,10 @@
   -- "tracing of persistent Proof.context values within ML heap"
 
 
-section "Detail of Proof Checking"
+section "Detail of Theory and Proof Processing"
+
+option record_theories : bool = false for content
+  -- "record intermediate theory commands and results"
 
 option record_proofs : int = -1 for content
   -- "set level of proofterm recording: 0, 1, 2, negative means unchanged"
--- a/src/Doc/Implementation/Integration.thy	Sun Jun 01 10:29:45 2025 +0200
+++ b/src/Doc/Implementation/Integration.thy	Sun Jun 01 20:01:22 2025 +0200
@@ -153,7 +153,6 @@
   @{define_ML use_thy: "string -> unit"} \\
   @{define_ML Thy_Info.get_theory: "string -> theory"} \\
   @{define_ML Thy_Info.remove_thy: "string -> unit"} \\
-  @{define_ML Thy_Info.register_thy: "theory -> unit"} \\
   \end{mldecls}
 
   \<^descr> \<^ML>\<open>use_thy\<close>~\<open>A\<close> ensures that theory \<open>A\<close> is fully up-to-date wrt.\ the
@@ -165,10 +164,6 @@
 
   \<^descr> \<^ML>\<open>Thy_Info.remove_thy\<close>~\<open>A\<close> deletes theory \<open>A\<close> and all descendants from
   the theory database.
-
-  \<^descr> \<^ML>\<open>Thy_Info.register_thy\<close>~\<open>text thy\<close> registers an existing theory value
-  with the theory loader database and updates source version information
-  according to the file store.
 \<close>
 
 end
--- a/src/Pure/Build/build.ML	Sun Jun 01 10:29:45 2025 +0200
+++ b/src/Pure/Build/build.ML	Sun Jun 01 20:01:22 2025 +0200
@@ -83,13 +83,13 @@
             let
               val res =
                 theories |>
-                  (map (build_theories session_name)
+                  (maps (build_theories session_name)
                     |> session_timing
                     |> Exn.capture);
               val res1 =
                 (case res of
                   Exn.Res loaded_theories =>
-                    Exn.capture (apply_hooks session_name) (flat loaded_theories)
+                    Exn.capture (apply_hooks session_name) loaded_theories
                 | Exn.Exn exn => Exn.Exn exn);
               val res2 = Exn.capture_body Session.finish;
 
--- a/src/Pure/PIDE/command_span.ML	Sun Jun 01 10:29:45 2025 +0200
+++ b/src/Pure/PIDE/command_span.ML	Sun Jun 01 20:01:22 2025 +0200
@@ -12,6 +12,9 @@
   val kind: span -> kind
   val content: span -> Token.T list
   val symbol_length: span -> int option
+  val eof: span
+  val is_eof: span -> bool
+  val stopper: span Scan.stopper
   val adjust_offsets_kind: (int -> int option) -> kind -> kind
   val adjust_offsets: (int -> int option) -> span -> span
 end;
@@ -34,6 +37,16 @@
 val symbol_length = Position.distance_of o Token.range_of o content;
 
 
+(* stopper *)
+
+val eof = Span (Command_Span ("", Position.none), []);
+
+fun is_eof (Span (Command_Span ("", _), _)) = true
+  | is_eof _ = false;
+
+val stopper = Scan.stopper (K eof) is_eof;
+
+
 (* presentation positions *)
 
 fun adjust_offsets_kind adjust k =
--- a/src/Pure/Thy/document_output.ML	Sun Jun 01 10:29:45 2025 +0200
+++ b/src/Pure/Thy/document_output.ML	Sun Jun 01 20:01:22 2025 +0200
@@ -16,6 +16,8 @@
   type segment =
     {span: Command_Span.span, command: Toplevel.transition,
      prev_state: Toplevel.state, state: Toplevel.state}
+  val segment_eof: segment
+  val segment_stopper: segment Scan.stopper
   val present_thy: Options.T -> Keyword.keywords -> string -> segment list -> Latex.text
   val pretty_term: Proof.context -> term -> Pretty.T
   val pretty_thm: Proof.context -> thm -> Pretty.T
@@ -371,6 +373,14 @@
   {span: Command_Span.span, command: Toplevel.transition,
    prev_state: Toplevel.state, state: Toplevel.state};
 
+val segment_eof: segment =
+  {span = Command_Span.eof, command = Toplevel.empty,
+   prev_state = Toplevel.make_state NONE, state = Toplevel.make_state NONE};
+
+val segment_stopper =
+  Scan.stopper (K segment_eof) (Command_Span.is_eof o #span);
+
+
 local
 
 val markup_true = "\\isamarkuptrue%\n";
--- a/src/Pure/Thy/thy_element.ML	Sun Jun 01 10:29:45 2025 +0200
+++ b/src/Pure/Thy/thy_element.ML	Sun Jun 01 20:01:22 2025 +0200
@@ -11,6 +11,8 @@
   val map_element: ('a -> 'b) -> 'a element -> 'b element
   val flat_element: 'a element -> 'a list
   val last_element: 'a element -> 'a
+  val parse_elements_generic: Keyword.keywords -> ('a -> Command_Span.span) -> 'a Scan.stopper ->
+    'a list -> 'a element list
   val parse_elements: Keyword.keywords -> Command_Span.span list -> Command_Span.span element list
 end;
 
@@ -35,29 +37,18 @@
   | last_element (Element (_, SOME (_, b))) = b;
 
 
-(* scanning spans *)
-
-val eof = Command_Span.Span (Command_Span.Command_Span ("", Position.none), []);
-
-fun is_eof (Command_Span.Span (Command_Span.Command_Span ("", _), _)) = true
-  | is_eof _ = false;
-
-val not_eof = not o is_eof;
-
-val stopper = Scan.stopper (K eof) is_eof;
-
 
 (* parse *)
 
-local
+fun parse_elements_generic keywords get_span stopper =
+  let
+    val not_eof = not o Scan.is_stopper stopper;
 
-fun parse_element keywords =
-  let
     fun category pred other =
-      Scan.one
-        (fn (Command_Span.Span (Command_Span.Command_Span (name, _), _)) =>
-            pred keywords name
-          | _ => other);
+      Scan.one (fn x =>
+        (case get_span x of
+          Command_Span.Span (Command_Span.Command_Span (name, _), _) => pred keywords name
+        | _ => other));
 
     fun theory_element x =
       (category Keyword.is_theory_goal false -- proof >> element) x
@@ -67,15 +58,15 @@
     and proof x = (Scan.repeat proof_element -- category Keyword.is_qed false) x;
 
     val default_element = Scan.one not_eof >> atom;
-  in theory_element || default_element end;
 
-in
+    val parse_element = theory_element || default_element
+  in
+    Source.of_list #>
+    Source.source stopper (Scan.bulk parse_element) #>
+    Source.exhaust
+  end;
 
 fun parse_elements keywords =
-  Source.of_list #>
-  Source.source stopper (Scan.bulk (parse_element keywords)) #>
-  Source.exhaust;
+  parse_elements_generic keywords I Command_Span.stopper;
 
 end;
-
-end;
--- a/src/Pure/Thy/thy_info.ML	Sun Jun 01 10:29:45 2025 +0200
+++ b/src/Pure/Thy/thy_info.ML	Sun Jun 01 20:01:22 2025 +0200
@@ -17,6 +17,8 @@
   val lookup_theory: string -> theory option
   val defined_theory: string -> bool
   val get_theory: string -> theory
+  val get_theory_segments: string -> Document_Output.segment list
+  val get_theory_elements: string -> Document_Output.segment Thy_Element.element list
   val check_theory: Proof.context -> string * Position.T -> theory
   val master_directory: string -> Path.T
   val remove_thy: string -> unit
@@ -24,7 +26,6 @@
     (theory * Document_Output.segment list) list
   val use_thy: string -> unit
   val script_thy: Position.T -> string -> theory -> theory
-  val register_thy: theory -> unit
   val finish: unit -> unit
 end;
 
@@ -105,9 +106,8 @@
   the_default Path.current (Option.map (Path.dir o #1 o #master) d);
 
 local
-  val global_thys =
-    Synchronized.var "Thy_Info.thys"
-      (String_Graph.empty: (deps option * theory option) String_Graph.T);
+  type thy = deps option * (theory * Document_Output.segment list) option;
+  val global_thys = Synchronized.var "Thy_Info.thys" (String_Graph.empty: thy String_Graph.T);
 in
   fun get_thys () = Synchronized.value global_thys;
   fun change_thys f = Synchronized.change global_thys f;
@@ -140,7 +140,7 @@
 
 fun lookup_theory name =
   (case lookup_thy name of
-    SOME (_, SOME theory) => SOME theory
+    SOME (_, SOME (theory, _)) => SOME theory
   | _ => NONE);
 
 val defined_theory = is_some o lookup_theory;
@@ -150,6 +150,28 @@
     SOME theory => theory
   | _ => error ("Theory loader: undefined entry for theory " ^ quote name));
 
+fun get_theory_segments name =
+  let
+    val _ = get_theory name;
+    val segments =
+      (case lookup_thy name of
+        SOME (_, SOME (_, segments)) => segments
+      | _ => []);
+  in
+    if null segments then
+      error ("Theory loader: no theory segments for theory " ^ quote name ^
+        "\n Need to build with option " ^ quote \<^system_option>\<open>record_theories\<close>)
+    else segments
+  end;
+
+fun get_theory_elements name =
+  let
+    val keywords = Thy_Header.get_keywords (get_theory name);
+    val stopper = Document_Output.segment_stopper;
+    val segments = get_theory_segments name;
+  in Thy_Element.parse_elements_generic keywords #span stopper segments end;
+
+
 val get_imports = Resources.imports_of o get_theory;
 
 val check_theory = Theory.check_theory {get = get_theory, all = get_names};
@@ -175,16 +197,16 @@
 
 (* update *)
 
-fun update deps theory thys =
+fun update deps (res as (theory, _)) thys =
   let
     val name = Context.theory_long_name theory;
     val parents = map Context.theory_long_name (Theory.parents_of theory);
 
     val thys' = remove name thys;
-    val _ = map (get thys') parents;
-  in new_entry name parents (SOME deps, SOME theory) thys' end;
+    val _ = List.app (ignore o get thys') parents;
+  in new_entry name parents (SOME deps, SOME res) thys' end;
 
-fun update_thy deps theory = change_thys (update deps theory);
+val update_thy = change_thys oo update;
 
 
 (* scheduling loader tasks *)
@@ -194,14 +216,14 @@
    {theory: theory,
     exec_id: Document_ID.exec,
     present: unit -> presentation_context option,
-    commit: unit -> unit};
+    commit: Document_Output.segment list -> unit};
 
 fun theory_result theory =
   Result
    {theory = theory,
     exec_id = Document_ID.none,
     present = K NONE,
-    commit = I};
+    commit = K ()};
 
 fun result_theory (Result {theory, ...}) = theory;
 fun result_commit (Result {commit, ...}) = commit;
@@ -220,12 +242,10 @@
         val exns = maps Task_Queue.group_status (Execution.peek exec_id);
       in result :: map Exn.Exn exns end;
 
-fun present_theory (Exn.Exn exn) = [Exn.Exn exn]
+fun present_theory (Exn.Exn exn) = SOME (Exn.Exn exn)
   | present_theory (Exn.Res (Result {theory, present, ...})) =
-      (case present () of
-        NONE => []
-      | SOME (context as {segments, ...}) =>
-          [Exn.capture_body (fn () => (apply_presentation context theory; (theory, segments)))]);
+      present () |> Option.map (fn context as {segments, ...} =>
+        Exn.capture_body (fn () => (apply_presentation context theory; (theory, segments))));
 
 in
 
@@ -252,11 +272,14 @@
 
     val results1 = futures |> maps (consolidate_theory o Future.join_result);
 
-    val present_results = futures |> maps (present_theory o Future.join_result);
+    val presented_results = futures |> map (present_theory o Future.join_result);
+    val present_results = map_filter I presented_results;
+
     val results2 = (map o Exn.map_res) (K ()) present_results;
 
-    val results3 = futures
-      |> map (fn future => Exn.capture_body (fn () => result_commit (Future.join future) ()));
+    val results3 = (futures, presented_results) |> ListPair.map (fn (future, presented) =>
+      let val segments = (case presented of SOME (Exn.Res (_, segments)) => segments | _ => []);
+      in Exn.capture_body (fn () => result_commit (Future.join future) segments) end);
 
     val results4 = map Exn.Exn (maps Task_Queue.group_status (Execution.reset ()));
 
@@ -357,7 +380,9 @@
     val timing_props = [Markup.theory_timing, (Markup.nameN, name)];
     val _ = Output.try_protocol_message (timing_props @ Markup.timing_properties timing_result) []
 
-    fun commit () = update_thy deps theory;
+    fun commit segments =
+      update_thy deps (theory,
+        if Options.bool options \<^system_option>\<open>record_theories\<close> then segments else []);
   in Result {theory = theory, exec_id = exec_id, present = SOME o present, commit = commit} end;
 
 fun check_thy_deps dir name =
@@ -444,22 +469,6 @@
   in Toplevel.end_theory end_pos end_state end;
 
 
-(* register theory *)
-
-fun register_thy theory =
-  let
-    val name = Context.theory_long_name theory;
-    val {master, ...} = Resources.check_thy (Resources.master_directory theory) name;
-    val imports = Resources.imports_of theory;
-  in
-    change_thys (fn thys =>
-      let
-        val thys' = remove name thys;
-        val _ = writeln ("Registering theory " ^ quote name);
-      in update (make_deps master imports) theory thys' end)
-  end;
-
-
 (* finish all theories *)
 
 fun finish () = change_thys (String_Graph.map (fn _ => fn (_, entry) => (NONE, entry)));