# HG changeset patch # User wenzelm # Date 1727550961 -7200 # Node ID fc0f2053c4d2aa6f9105f7386db3c17b72880433 # Parent 2d07d2bbd8c60b282f30a383f9b8c38dadcf2c5f clarified signature: more explicit type "output"; minor performance tuning of output_ord: static length; diff -r 2d07d2bbd8c6 -r fc0f2053c4d2 src/Pure/Syntax/parser.ML --- a/src/Pure/Syntax/parser.ML Sat Sep 28 20:28:11 2024 +0200 +++ b/src/Pure/Syntax/parser.ML Sat Sep 28 21:16:01 2024 +0200 @@ -516,7 +516,7 @@ (** parser **) -(* output tree *) +(* parse tree *) datatype tree = Node of string * tree list | @@ -540,8 +540,6 @@ | (Tip t, Tip t') => Lexicon.token_ord (t, t') | _ => int_ord (apply2 cons_nr trees)); -structure Trees = Table(type key = tree list val ord = pointer_eq_ord (list_ord tree_ord)); - end; fun pretty_tree (Node (c, ts)) = @@ -550,29 +548,60 @@ if Lexicon.valued_token tok then [Pretty.str (Lexicon.str_of_token tok)] else []; +(* output *) + +abstype output = Output of int * tree list +with + +val empty_output = Output (0, []); + +fun init_output t = Output (1, [t]); + +fun add_output t (Output (n, ts)) = Output (n + 1, t :: ts); + +fun append_output (Output (m, ss)) (Output (n, ts)) = Output (m + n, ss @ ts); + +val output_ord = pointer_eq_ord (fn (Output (m, ss), Output (n, tt)) => + (case int_ord (m, n) of + EQUAL => dict_ord tree_ord (ss, tt) + | ord => ord)); + +fun output_content (Output (_, ts)) = rev ts; + +fun output_result (Output (_, [t])) = SOME t + | output_result _ = NONE; + +end; + +structure Output = Table(type key = output val ord = output_ord); + + + (* parser state *) type state = (nt * int * (*identification and production precedence*) string * (*name of production*) int) * (*index for previous state list*) - symb list * (*input: rest of rhs*) - tree list; (*output (reversed): already parsed nonterminals on rhs*) + symb list * (*remaining input -- after "dot"*) + output; (*accepted output -- before "dot"*) local -fun update_trees (A, (t, p)) used = +fun update_output (A, (out, p)) used = let - val (i: int, ts) = the (Inttab.lookup used A); - fun update ts' = Inttab.update (A, (i, ts')); + val (i: int, output) = the (Inttab.lookup used A); + fun update output' = Inttab.update (A, (i, output')); in - (case Trees.lookup ts t of - NONE => (no_prec, update (Trees.make [(t, p)]) used) - | SOME q => (q, if p > q then update (Trees.update (t, p) ts) used else used)) + (case Output.lookup output out of + NONE => (no_prec, update (Output.make [(out, p)]) used) + | SOME q => (q, if p > q then update (Output.update (out, p) output) used else used)) end; -val init_prec = (no_prec, Trees.empty); -fun update_prec (A, p) used = Inttab.map_default (A, init_prec) (fn (_, ts) => (p, ts)) used; +val init_prec = (no_prec, Output.empty); + +fun update_prec (A, p) used = + Inttab.map_default (A, init_prec) (fn (_, output) => (p, output)) used; fun get_states A min max = let @@ -582,17 +611,20 @@ | head_nonterm _ = false; in filter (fn (_, ss, _): state => head_nonterm ss) end; -fun movedot_nonterm tt (info, Nonterminal _ :: sa, ts) : state = (info, sa, tt @ ts); +fun movedot_nonterm out' (info, Nonterminal _ :: sa, out) : state = + (info, sa, append_output out' out); fun process_states (Gram {prods = gram_prods, chains = gram_chains, ...}) stateset i c states0 = let val get = Array.nth stateset; val set = Array.upd stateset; - fun add_prods nt min max = + fun add_prods nt min max : state list -> state list = let fun add (rhs, id, prod_prec) = - if until_prec min max prod_prec then cons ((nt, prod_prec, id, i), rhs, []) else I; + if until_prec min max prod_prec + then cons ((nt, prod_prec, id, i), rhs, empty_output) + else I; fun token_prods prods = fold add (prods_lookup prods c) #> fold add (prods_lookup prods Lexicon.dummy); @@ -600,12 +632,13 @@ in fold (token_prods o nt_prods) (chains_all_preds gram_chains [nt]) end; fun process _ [] (Si, Sii) = (Si, Sii) - | process used ((S as (info, symbs, ts)) :: states) (Si, Sii) = + | process used ((S as (info, symbs, out)) :: states) (Si, Sii) = (case symbs of - Markup (_, body) :: sa => process used ((info, body @ sa, ts) :: states) (Si, Sii) + Markup (_, body) :: sa => process used ((info, body @ sa, out) :: states) (Si, Sii) | Nonterminal (nt, min_prec) :: sa => let (*predictor operation*) - fun movedot_lambda (t, k) = if min_prec <= k then cons (info, sa, t @ ts) else I; + fun movedot_lambda (out', k) = + if min_prec <= k then cons (info, sa, append_output out' out) else I; val (used', states', used_trees) = (case Inttab.lookup used nt of SOME (used_prec, used_trees) => @@ -619,8 +652,8 @@ let val used' = update_prec (nt, min_prec) used; val states' = states |> add_prods nt min_prec no_prec; - in (used', states', Trees.empty) end); - val states'' = states' |> Trees.fold movedot_lambda used_trees; + in (used', states', Output.empty) end); + val states'' = states' |> Output.fold movedot_lambda used_trees; in process used' states'' (S :: Si, Sii) end | Terminal a :: sa => (*scanner operation*) let @@ -628,19 +661,22 @@ val Sii' = if Lexicon.token_type_ord (a, c) <> EQUAL then Sii else (*move dot*) - let val ts' = if Lexicon.valued_token c orelse id <> "" then Tip c :: ts else ts - in (info, sa, ts') :: Sii end; + let + val out' = + if Lexicon.valued_token c orelse id <> "" then add_output (Tip c) out + else out; + in (info, sa, out') :: Sii end; in process used states (S :: Si, Sii') end | [] => (*completer operation*) let val (A, p, id, j) = info; - val tt = if id = "" then ts else [Node (id, rev ts)]; + val out' = if id = "" then out else init_output (Node (id, output_content out)); val (used', Slist) = if j = i then (*lambda production?*) - let val (q, used') = update_trees (A, (tt, p)) used + let val (q, used') = update_output (A, (out', p)) used in (used', get_states A q p Si) end else (used, get_states A no_prec p (get j)); - val states' = map (movedot_nonterm tt) Slist; + val states' = map (movedot_nonterm out') Slist; in process used' (states' @ states) (S :: Si, Sii) end) val (Si, Sii) = process Inttab.empty states0 ([], []); @@ -686,14 +722,15 @@ | SOME tok => Lexicon.end_pos_of_token tok); val input = toks @ [Lexicon.mk_eof end_pos]; - val S0: state = ((~1, 0, "", 0), [Nonterminal (start_tag, 0), Terminal Lexicon.eof], []); + val S0: state = + ((~1, 0, "", 0), [Nonterminal (start_tag, 0), Terminal Lexicon.eof], empty_output); val stateset = Array.array (length input + 1, []); val _ = Array.upd stateset 0 [S0]; - val output = + val result = produce gram stateset 0 Lexicon.dummy input - |> map_filter (fn (_, _, [t]) => SOME t | _ => NONE); - in if null output then raise Fail "Inner syntax: no parse trees" else output end; + |> map_filter (fn (_, _, out) => output_result out); + in if null result then raise Fail "Inner syntax: no parse trees" else result end; end;