clarified signature: more explicit type "output";
authorwenzelm
Sat, 28 Sep 2024 21:16:01 +0200
changeset 80992 fc0f2053c4d2
parent 80991 2d07d2bbd8c6
child 80993 addebc07f06e
clarified signature: more explicit type "output"; minor performance tuning of output_ord: static length;
src/Pure/Syntax/parser.ML
--- a/src/Pure/Syntax/parser.ML	Sat Sep 28 20:28:11 2024 +0200
+++ b/src/Pure/Syntax/parser.ML	Sat Sep 28 21:16:01 2024 +0200
@@ -516,7 +516,7 @@
 
 (** parser **)
 
-(* output tree *)
+(* parse tree *)
 
 datatype tree =
   Node of string * tree list |
@@ -540,8 +540,6 @@
     | (Tip t, Tip t') => Lexicon.token_ord (t, t')
     | _ => int_ord (apply2 cons_nr trees));
 
-structure Trees = Table(type key = tree list val ord = pointer_eq_ord (list_ord tree_ord));
-
 end;
 
 fun pretty_tree (Node (c, ts)) =
@@ -550,29 +548,60 @@
       if Lexicon.valued_token tok then [Pretty.str (Lexicon.str_of_token tok)] else [];
 
 
+(* output *)
+
+abstype output = Output of int * tree list
+with
+
+val empty_output = Output (0, []);
+
+fun init_output t = Output (1, [t]);
+
+fun add_output t (Output (n, ts)) = Output (n + 1, t :: ts);
+
+fun append_output (Output (m, ss)) (Output (n, ts)) = Output (m + n, ss @ ts);
+
+val output_ord = pointer_eq_ord (fn (Output (m, ss), Output (n, tt)) =>
+  (case int_ord (m, n) of
+    EQUAL => dict_ord tree_ord (ss, tt)
+  | ord => ord));
+
+fun output_content (Output (_, ts)) = rev ts;
+
+fun output_result (Output (_, [t])) = SOME t
+  | output_result _ = NONE;
+
+end;
+
+structure Output = Table(type key = output val ord = output_ord);
+
+
+
 (* parser state *)
 
 type state =
   (nt * int *       (*identification and production precedence*)
    string *         (*name of production*)
    int) *           (*index for previous state list*)
-  symb list *       (*input: rest of rhs*)
-  tree list;        (*output (reversed): already parsed nonterminals on rhs*)
+  symb list *       (*remaining input -- after "dot"*)
+  output;           (*accepted output -- before "dot"*)
 
 local
 
-fun update_trees (A, (t, p)) used =
+fun update_output (A, (out, p)) used =
   let
-    val (i: int, ts) = the (Inttab.lookup used A);
-    fun update ts' = Inttab.update (A, (i, ts'));
+    val (i: int, output) = the (Inttab.lookup used A);
+    fun update output' = Inttab.update (A, (i, output'));
   in
-    (case Trees.lookup ts t of
-      NONE => (no_prec, update (Trees.make [(t, p)]) used)
-    | SOME q => (q, if p > q then update (Trees.update (t, p) ts) used else used))
+    (case Output.lookup output out of
+      NONE => (no_prec, update (Output.make [(out, p)]) used)
+    | SOME q => (q, if p > q then update (Output.update (out, p) output) used else used))
   end;
 
-val init_prec = (no_prec, Trees.empty);
-fun update_prec (A, p) used = Inttab.map_default (A, init_prec) (fn (_, ts) => (p, ts)) used;
+val init_prec = (no_prec, Output.empty);
+
+fun update_prec (A, p) used =
+  Inttab.map_default (A, init_prec) (fn (_, output) => (p, output)) used;
 
 fun get_states A min max =
   let
@@ -582,17 +611,20 @@
       | head_nonterm _ = false;
   in filter (fn (_, ss, _): state => head_nonterm ss) end;
 
-fun movedot_nonterm tt (info, Nonterminal _ :: sa, ts) : state = (info, sa, tt @ ts);
+fun movedot_nonterm out' (info, Nonterminal _ :: sa, out) : state =
+  (info, sa, append_output out' out);
 
 fun process_states (Gram {prods = gram_prods, chains = gram_chains, ...}) stateset i c states0 =
   let
     val get = Array.nth stateset;
     val set = Array.upd stateset;
 
-    fun add_prods nt min max =
+    fun add_prods nt min max : state list -> state list =
       let
         fun add (rhs, id, prod_prec) =
-          if until_prec min max prod_prec then cons ((nt, prod_prec, id, i), rhs, []) else I;
+          if until_prec min max prod_prec
+          then cons ((nt, prod_prec, id, i), rhs, empty_output)
+          else I;
         fun token_prods prods =
           fold add (prods_lookup prods c) #>
           fold add (prods_lookup prods Lexicon.dummy);
@@ -600,12 +632,13 @@
       in fold (token_prods o nt_prods) (chains_all_preds gram_chains [nt]) end;
 
     fun process _ [] (Si, Sii) = (Si, Sii)
-      | process used ((S as (info, symbs, ts)) :: states) (Si, Sii) =
+      | process used ((S as (info, symbs, out)) :: states) (Si, Sii) =
           (case symbs of
-            Markup (_, body) :: sa => process used ((info, body @ sa, ts) :: states) (Si, Sii)
+            Markup (_, body) :: sa => process used ((info, body @ sa, out) :: states) (Si, Sii)
           | Nonterminal (nt, min_prec) :: sa =>
               let (*predictor operation*)
-                fun movedot_lambda (t, k) = if min_prec <= k then cons (info, sa, t @ ts) else I;
+                fun movedot_lambda (out', k) =
+                  if min_prec <= k then cons (info, sa, append_output out' out) else I;
                 val (used', states', used_trees) =
                   (case Inttab.lookup used nt of
                     SOME (used_prec, used_trees) =>
@@ -619,8 +652,8 @@
                       let
                         val used' = update_prec (nt, min_prec) used;
                         val states' = states |> add_prods nt min_prec no_prec;
-                      in (used', states', Trees.empty) end);
-                val states'' = states' |> Trees.fold movedot_lambda used_trees;
+                      in (used', states', Output.empty) end);
+                val states'' = states' |> Output.fold movedot_lambda used_trees;
               in process used' states'' (S :: Si, Sii) end
           | Terminal a :: sa => (*scanner operation*)
               let
@@ -628,19 +661,22 @@
                 val Sii' =
                   if Lexicon.token_type_ord (a, c) <> EQUAL then Sii
                   else (*move dot*)
-                    let val ts' = if Lexicon.valued_token c orelse id <> "" then Tip c :: ts else ts
-                    in (info, sa, ts') :: Sii end;
+                    let
+                      val out' =
+                        if Lexicon.valued_token c orelse id <> "" then add_output (Tip c) out
+                        else out;
+                    in (info, sa, out') :: Sii end;
               in process used states (S :: Si, Sii') end
           | [] => (*completer operation*)
               let
                 val (A, p, id, j) = info;
-                val tt = if id = "" then ts else [Node (id, rev ts)];
+                val out' = if id = "" then out else init_output (Node (id, output_content out));
                 val (used', Slist) =
                   if j = i then (*lambda production?*)
-                    let val (q, used') = update_trees (A, (tt, p)) used
+                    let val (q, used') = update_output (A, (out', p)) used
                     in (used', get_states A q p Si) end
                   else (used, get_states A no_prec p (get j));
-                val states' = map (movedot_nonterm tt) Slist;
+                val states' = map (movedot_nonterm out') Slist;
               in process used' (states' @ states) (S :: Si, Sii) end)
 
     val (Si, Sii) = process Inttab.empty states0 ([], []);
@@ -686,14 +722,15 @@
       | SOME tok => Lexicon.end_pos_of_token tok);
     val input = toks @ [Lexicon.mk_eof end_pos];
 
-    val S0: state = ((~1, 0, "", 0), [Nonterminal (start_tag, 0), Terminal Lexicon.eof], []);
+    val S0: state =
+      ((~1, 0, "", 0), [Nonterminal (start_tag, 0), Terminal Lexicon.eof], empty_output);
     val stateset = Array.array (length input + 1, []);
     val _ = Array.upd stateset 0 [S0];
 
-    val output =
+    val result =
       produce gram stateset 0 Lexicon.dummy input
-      |> map_filter (fn (_, _, [t]) => SOME t | _ => NONE);
-  in if null output then raise Fail "Inner syntax: no parse trees" else output end;
+      |> map_filter (fn (_, _, out) => output_result out);
+  in if null result then raise Fail "Inner syntax: no parse trees" else result end;
 
 end;