# HG changeset patch # User wenzelm # Date 1727614820 -7200 # Node ID 7f9e8516ca05cd7b468379ee02faecbb8e40fdb0 # Parent 3668aec7afa2e0744069b5888ae8f58c2849cd83 clarified input and output: support markup blocks via Bg/En; clarified datatype tree vs. tree_ops: reconstruct nested markup blocks via make_tree; clarified tree_ops_ord: ignore markup blocks, proceed like dict_ord; diff -r 3668aec7afa2 -r 7f9e8516ca05 src/Pure/Syntax/parser.ML --- a/src/Pure/Syntax/parser.ML Sun Sep 29 14:55:49 2024 +0200 +++ b/src/Pure/Syntax/parser.ML Sun Sep 29 15:00:20 2024 +0200 @@ -12,9 +12,9 @@ val make_gram: Syntax_Ext.xprod list -> Syntax_Ext.xprod list -> gram option -> gram val pretty_gram: gram -> Pretty.T list datatype tree = + Markup of Markup.T list * tree list | Node of string * tree list | Tip of Lexicon.token - val tree_ord: tree ord val pretty_tree: tree -> Pretty.T list val parse: gram -> string -> Lexicon.token list -> tree list end; @@ -62,7 +62,7 @@ datatype symb = Terminal of Lexicon.token | Nonterminal of nt * int | (*(tag, precedence)*) - Markup of Markup.T list * symb list; + Bg of Markup.T list | En; (*markup block*) type prod = symb list * string * int; (*rhs, name, precedence*) @@ -101,9 +101,8 @@ fun chains_member (chains: chains) = Int_Graph.is_edge chains; fun chains_declare nt : chains -> chains = Int_Graph.default_node (nt, ()); -fun chains_declares (Terminal _) = I - | chains_declares (Nonterminal (nt, _)) = chains_declare nt - | chains_declares (Markup (_, body)) = fold chains_declares body; +fun chains_declares (Nonterminal (nt, _)) = chains_declare nt + | chains_declares _ = I; fun chains_insert (from, to) = chains_declare from #> chains_declare to #> Int_Graph.add_edge (from, to); @@ -169,7 +168,7 @@ (rhs must either be empty or only consist of lambda NTs)*) fun lambda_symb (Nonterminal (id, _)) = nts_member lambdas' id | lambda_symb (Terminal _) = false - | lambda_symb (Markup (_, body)) = forall lambda_symb body; + | lambda_symb _ = true; val new_lambdas = if forall lambda_symb rhs then SOME (filter_out (nts_member lambdas') (chains_all_succs chains' [lhs])) @@ -184,8 +183,7 @@ if nts_member lambdas nt then lookahead_dependency lambdas symbs (nts_insert nt nts) else (NONE, nts_insert nt nts) - | lookahead_dependency lambdas (Markup (_, body) :: symbs) nts = - lookahead_dependency lambdas (body @ symbs) nts; + | lookahead_dependency lambdas (_ :: symbs) nts = lookahead_dependency lambdas symbs nts; (*get all known starting tokens for a nonterminal*) fun starts_for_nt nt = snd (fst (Array.nth array_prods nt)); @@ -416,7 +414,7 @@ then [Pretty.quote (Pretty.keyword1 (Lexicon.str_of_token tok))] else [Pretty.str (Lexicon.str_of_token tok)] | pretty_symb (Nonterminal (tag, p)) = [Pretty.str (print_nt tag ^ print_pri p)] - | pretty_symb (Markup (_, body)) = maps pretty_symb body; + | pretty_symb _ = []; fun pretty_const "" = [] | pretty_const c = [Pretty.str ("\<^bold>\ " ^ quote c)]; @@ -457,31 +455,20 @@ fun extend_gram xprods gram = let - fun make_symbs (Syntax_Ext.Delim s :: xsyms) tags = - let val (syms, xsyms', tags') = make_symbs xsyms tags - in (Terminal (Lexicon.literal s) :: syms, xsyms', tags') end - | make_symbs (Syntax_Ext.Argument a :: xsyms) tags = - let - val (arg, tags') = make_arg a tags; - val (syms, xsyms', tags'') = make_symbs xsyms tags'; - in (arg :: syms, xsyms', tags'') end - | make_symbs (Syntax_Ext.Bg {markup, ...} :: xsyms) tags = - let - val (bsyms, xsyms', tags') = make_symbs xsyms tags; - val (syms, xsyms'', tags'') = make_symbs xsyms' tags'; - val syms' = if null markup then bsyms @ syms else Markup (markup, bsyms) :: syms; - in (syms', xsyms'', tags'') end - | make_symbs (Syntax_Ext.En :: xsyms) tags = ([], xsyms, tags) - | make_symbs (Syntax_Ext.Space _ :: xsyms) tags = make_symbs xsyms tags - | make_symbs (Syntax_Ext.Brk _ :: xsyms) tags = make_symbs xsyms tags - | make_symbs [] tags = ([], [], tags); + fun make_symb (Syntax_Ext.Delim s) (syms, tags) = + (Terminal (Lexicon.literal s) :: syms, tags) + | make_symb (Syntax_Ext.Argument a) (syms, tags) = + let val (arg, tags') = make_arg a tags + in (arg :: syms, tags') end + | make_symb (Syntax_Ext.Bg {markup, ...}) (syms, tags) = (Bg markup :: syms, tags) + | make_symb (Syntax_Ext.En) (syms, tags) = (En :: syms, tags) + | make_symb _ res = res; fun make_prod (Syntax_Ext.XProd (lhs, xsymbs, const, pri)) (result, tags) = let val (tag, tags') = make_tag lhs tags; - val (symbs, xsymbs', tags'') = make_symbs xsymbs tags'; - val _ = if null xsymbs' then () else raise Fail "Unbalanced blocks in grammar production"; - in ((tag, (symbs, const, pri)) :: result, tags'') end; + val (rev_symbs, tags'') = fold make_symb xsymbs ([], tags'); + in ((tag, (rev rev_symbs, const, pri)) :: result, tags'') end; val Gram {tags, chains, lambdas, prods} = gram; @@ -519,26 +506,63 @@ (* parse tree *) datatype tree = + Markup of Markup.T list * tree list | Node of string * tree list | Tip of Lexicon.token; +datatype tree_op = + Markup_Push | + Markup_Pop of Markup.T list | + Node_Op of string * tree_op list | + Tip_Op of Lexicon.token; + local - fun cons_nr (Node _) = 0 - | cons_nr (Tip _) = 1; + fun drop_markup (Markup_Push :: ts) = drop_markup ts + | drop_markup (Markup_Pop _ :: ts) = drop_markup ts + | drop_markup ts = ts; in - fun tree_ord trees = - if pointer_eq trees then EQUAL + fun tree_ops_ord arg = + if pointer_eq arg then EQUAL else - (case trees of - (Node (s, ts), Node (s', ts')) => + (case apply2 drop_markup arg of + (Node_Op (s, ts) :: rest, Node_Op (s', ts') :: rest') => (case fast_string_ord (s, s') of - EQUAL => list_ord tree_ord (ts, ts') + EQUAL => + (case tree_ops_ord (ts, ts') of + EQUAL => tree_ops_ord (rest, rest') + | ord => ord) | ord => ord) - | (Tip t, Tip t') => Lexicon.token_ord (t, t') - | _ => int_ord (apply2 cons_nr trees)); + | (Tip_Op t :: rest, Tip_Op t' :: rest') => + (case Lexicon.token_ord (t, t') of + EQUAL => tree_ops_ord (rest, rest') + | ord => ord) + | (Node_Op _ :: _, Tip_Op _ :: _) => LESS + | (Tip_Op _ :: _, Node_Op _ :: _) => GREATER + | ([], []) => EQUAL + | ([], _ :: _) => LESS + | (_ :: _, []) => GREATER + | _ => raise Match); end; -fun pretty_tree (Node (c, ts)) = +fun make_tree tree_ops = + let + fun top [] = [] + | top (xs :: _) = xs; + + fun pop [] = [] + | pop (_ :: xs) = xs; + + fun make body stack ops = + (case ops of + Markup_Push :: rest => make [] (body :: stack) rest + | Markup_Pop markup :: rest => make (Markup (markup, body) :: top stack) (pop stack) rest + | Node_Op (id, ts) :: rest => make (Node (id, make [] [] ts) :: body) stack rest + | Tip_Op tok :: rest => make (Tip tok :: body) stack rest + | [] => if null stack then body else raise Fail "Pending markup blocks"); + in (case make [] [] tree_ops of [t] => SOME t | _ => NONE) end; + +fun pretty_tree (Markup (_, ts)) = maps pretty_tree ts + | pretty_tree (Node (c, ts)) = [Pretty.enclose "(" ")" (Pretty.breaks (Pretty.quote (Pretty.str c) :: maps pretty_tree ts))] | pretty_tree (Tip tok) = if Lexicon.valued_token tok then [Pretty.str (Lexicon.str_of_token tok)] else []; @@ -546,24 +570,24 @@ (* output *) -abstype output = Output of int * tree list +abstype output = Output of int * tree_op list with val empty_output = Output (0, []); -fun token_output tok (Output (n, ts)) = Output (n + 1, Tip tok :: ts); +fun token_output tok (Output (n, ts)) = Output (n + 1, Tip_Op tok :: ts); -fun node_output id (Output (n, ts)) = Output (n, [Node (id, rev ts)]); +fun node_output id (Output (n, ts)) = Output (n, [Node_Op (id, ts)]); + +fun push_output (Output (n, ts)) = Output (n, Markup_Push :: ts); +fun pop_output markup (Output (n, ts)) = Output (n, Markup_Pop markup :: ts); fun append_output (Output (m, ss)) (Output (n, ts)) = Output (m + n, ss @ ts); val output_ord = pointer_eq_ord (fn (Output (m, ss), Output (n, tt)) => - (case int_ord (m, n) of - EQUAL => dict_ord tree_ord (ss, tt) - | ord => ord)); + (case int_ord (m, n) of EQUAL => tree_ops_ord (ss, tt) | ord => ord)); -fun output_result (Output (_, [t])) = SOME t - | output_result _ = NONE; +fun output_tree (Output (_, ts)) = make_tree ts; end; @@ -599,11 +623,9 @@ fun get_states A min max = let - fun head_nonterm (Nonterminal (B, p) :: _) = A = B andalso upto_prec min max p - | head_nonterm (Markup (_, []) :: rest) = head_nonterm rest - | head_nonterm (Markup (_, body) :: _) = head_nonterm body - | head_nonterm _ = false; - in filter (fn (_, ss, _): state => head_nonterm ss) end; + fun ok (Nonterminal (B, p) :: _) = A = B andalso upto_prec min max p + | ok _ = false; + in filter (fn (_, ss, _): state => ok ss) end; fun movedot_nonterm out' (info, Nonterminal _ :: sa, out) : state = (info, sa, append_output out' out); @@ -628,8 +650,7 @@ fun process _ [] (Si, Sii) = (Si, Sii) | process used ((S as (info, symbs, out)) :: states) (Si, Sii) = (case symbs of - Markup (_, body) :: sa => process used ((info, body @ sa, out) :: states) (Si, Sii) - | Nonterminal (nt, min_prec) :: sa => + Nonterminal (nt, min_prec) :: sa => let (*predictor operation*) fun movedot_lambda (out', k) = if min_prec <= k then cons (info, sa, append_output out' out) else I; @@ -657,10 +678,12 @@ else (*move dot*) let val out' = - if Lexicon.valued_token c orelse id <> "" then token_output c out - else out; + if Lexicon.valued_token c orelse id <> "" + then token_output c out else out; in (info, sa, out') :: Sii end; in process used states (S :: Si, Sii') end + | Bg markup :: sa => process used ((info, sa, pop_output markup out) :: states) (Si, Sii) + | En :: sa => process used ((info, sa, push_output out) :: states) (Si, Sii) | [] => (*completer operation*) let val (A, p, id, j) = info; @@ -723,7 +746,7 @@ val result = produce gram stateset 0 Lexicon.dummy input - |> map_filter (fn (_, _, out) => output_result out); + |> map_filter (output_tree o #3); in if null result then raise Fail "Inner syntax: no parse trees" else result end; end; diff -r 3668aec7afa2 -r 7f9e8516ca05 src/Pure/Syntax/syntax_phases.ML --- a/src/Pure/Syntax/syntax_phases.ML Sun Sep 29 14:55:49 2024 +0200 +++ b/src/Pure/Syntax/syntax_phases.ML Sun Sep 29 15:00:20 2024 +0200 @@ -185,7 +185,8 @@ fun asts_of_position c tok = [Ast.Appl [Ast.Constant c, ast_of (Parser.Tip tok), ast_of_position tok]] - and asts_of (Parser.Node ("_class_name", [Parser.Tip tok])) = + and asts_of (Parser.Markup (markup, pts)) = maps asts_of pts + | asts_of (Parser.Node ("_class_name", [Parser.Tip tok])) = let val pos = report_pos tok; val (c, rs) = Proof_Context.check_class ctxt (Lexicon.str_of_token tok, pos);