--- a/src/Pure/Syntax/parser.ML Sun Sep 29 14:55:49 2024 +0200
+++ b/src/Pure/Syntax/parser.ML Sun Sep 29 15:00:20 2024 +0200
@@ -12,9 +12,9 @@
val make_gram: Syntax_Ext.xprod list -> Syntax_Ext.xprod list -> gram option -> gram
val pretty_gram: gram -> Pretty.T list
datatype tree =
+ Markup of Markup.T list * tree list |
Node of string * tree list |
Tip of Lexicon.token
- val tree_ord: tree ord
val pretty_tree: tree -> Pretty.T list
val parse: gram -> string -> Lexicon.token list -> tree list
end;
@@ -62,7 +62,7 @@
datatype symb =
Terminal of Lexicon.token |
Nonterminal of nt * int | (*(tag, precedence)*)
- Markup of Markup.T list * symb list;
+ Bg of Markup.T list | En; (*markup block*)
type prod = symb list * string * int; (*rhs, name, precedence*)
@@ -101,9 +101,8 @@
fun chains_member (chains: chains) = Int_Graph.is_edge chains;
fun chains_declare nt : chains -> chains = Int_Graph.default_node (nt, ());
-fun chains_declares (Terminal _) = I
- | chains_declares (Nonterminal (nt, _)) = chains_declare nt
- | chains_declares (Markup (_, body)) = fold chains_declares body;
+fun chains_declares (Nonterminal (nt, _)) = chains_declare nt
+ | chains_declares _ = I;
fun chains_insert (from, to) =
chains_declare from #> chains_declare to #> Int_Graph.add_edge (from, to);
@@ -169,7 +168,7 @@
(rhs must either be empty or only consist of lambda NTs)*)
fun lambda_symb (Nonterminal (id, _)) = nts_member lambdas' id
| lambda_symb (Terminal _) = false
- | lambda_symb (Markup (_, body)) = forall lambda_symb body;
+ | lambda_symb _ = true;
val new_lambdas =
if forall lambda_symb rhs
then SOME (filter_out (nts_member lambdas') (chains_all_succs chains' [lhs]))
@@ -184,8 +183,7 @@
if nts_member lambdas nt then
lookahead_dependency lambdas symbs (nts_insert nt nts)
else (NONE, nts_insert nt nts)
- | lookahead_dependency lambdas (Markup (_, body) :: symbs) nts =
- lookahead_dependency lambdas (body @ symbs) nts;
+ | lookahead_dependency lambdas (_ :: symbs) nts = lookahead_dependency lambdas symbs nts;
(*get all known starting tokens for a nonterminal*)
fun starts_for_nt nt = snd (fst (Array.nth array_prods nt));
@@ -416,7 +414,7 @@
then [Pretty.quote (Pretty.keyword1 (Lexicon.str_of_token tok))]
else [Pretty.str (Lexicon.str_of_token tok)]
| pretty_symb (Nonterminal (tag, p)) = [Pretty.str (print_nt tag ^ print_pri p)]
- | pretty_symb (Markup (_, body)) = maps pretty_symb body;
+ | pretty_symb _ = [];
fun pretty_const "" = []
| pretty_const c = [Pretty.str ("\<^bold>\<Rightarrow> " ^ quote c)];
@@ -457,31 +455,20 @@
fun extend_gram xprods gram =
let
- fun make_symbs (Syntax_Ext.Delim s :: xsyms) tags =
- let val (syms, xsyms', tags') = make_symbs xsyms tags
- in (Terminal (Lexicon.literal s) :: syms, xsyms', tags') end
- | make_symbs (Syntax_Ext.Argument a :: xsyms) tags =
- let
- val (arg, tags') = make_arg a tags;
- val (syms, xsyms', tags'') = make_symbs xsyms tags';
- in (arg :: syms, xsyms', tags'') end
- | make_symbs (Syntax_Ext.Bg {markup, ...} :: xsyms) tags =
- let
- val (bsyms, xsyms', tags') = make_symbs xsyms tags;
- val (syms, xsyms'', tags'') = make_symbs xsyms' tags';
- val syms' = if null markup then bsyms @ syms else Markup (markup, bsyms) :: syms;
- in (syms', xsyms'', tags'') end
- | make_symbs (Syntax_Ext.En :: xsyms) tags = ([], xsyms, tags)
- | make_symbs (Syntax_Ext.Space _ :: xsyms) tags = make_symbs xsyms tags
- | make_symbs (Syntax_Ext.Brk _ :: xsyms) tags = make_symbs xsyms tags
- | make_symbs [] tags = ([], [], tags);
+ fun make_symb (Syntax_Ext.Delim s) (syms, tags) =
+ (Terminal (Lexicon.literal s) :: syms, tags)
+ | make_symb (Syntax_Ext.Argument a) (syms, tags) =
+ let val (arg, tags') = make_arg a tags
+ in (arg :: syms, tags') end
+ | make_symb (Syntax_Ext.Bg {markup, ...}) (syms, tags) = (Bg markup :: syms, tags)
+ | make_symb (Syntax_Ext.En) (syms, tags) = (En :: syms, tags)
+ | make_symb _ res = res;
fun make_prod (Syntax_Ext.XProd (lhs, xsymbs, const, pri)) (result, tags) =
let
val (tag, tags') = make_tag lhs tags;
- val (symbs, xsymbs', tags'') = make_symbs xsymbs tags';
- val _ = if null xsymbs' then () else raise Fail "Unbalanced blocks in grammar production";
- in ((tag, (symbs, const, pri)) :: result, tags'') end;
+ val (rev_symbs, tags'') = fold make_symb xsymbs ([], tags');
+ in ((tag, (rev rev_symbs, const, pri)) :: result, tags'') end;
val Gram {tags, chains, lambdas, prods} = gram;
@@ -519,26 +506,63 @@
(* parse tree *)
datatype tree =
+ Markup of Markup.T list * tree list |
Node of string * tree list |
Tip of Lexicon.token;
+datatype tree_op =
+ Markup_Push |
+ Markup_Pop of Markup.T list |
+ Node_Op of string * tree_op list |
+ Tip_Op of Lexicon.token;
+
local
- fun cons_nr (Node _) = 0
- | cons_nr (Tip _) = 1;
+ fun drop_markup (Markup_Push :: ts) = drop_markup ts
+ | drop_markup (Markup_Pop _ :: ts) = drop_markup ts
+ | drop_markup ts = ts;
in
- fun tree_ord trees =
- if pointer_eq trees then EQUAL
+ fun tree_ops_ord arg =
+ if pointer_eq arg then EQUAL
else
- (case trees of
- (Node (s, ts), Node (s', ts')) =>
+ (case apply2 drop_markup arg of
+ (Node_Op (s, ts) :: rest, Node_Op (s', ts') :: rest') =>
(case fast_string_ord (s, s') of
- EQUAL => list_ord tree_ord (ts, ts')
+ EQUAL =>
+ (case tree_ops_ord (ts, ts') of
+ EQUAL => tree_ops_ord (rest, rest')
+ | ord => ord)
| ord => ord)
- | (Tip t, Tip t') => Lexicon.token_ord (t, t')
- | _ => int_ord (apply2 cons_nr trees));
+ | (Tip_Op t :: rest, Tip_Op t' :: rest') =>
+ (case Lexicon.token_ord (t, t') of
+ EQUAL => tree_ops_ord (rest, rest')
+ | ord => ord)
+ | (Node_Op _ :: _, Tip_Op _ :: _) => LESS
+ | (Tip_Op _ :: _, Node_Op _ :: _) => GREATER
+ | ([], []) => EQUAL
+ | ([], _ :: _) => LESS
+ | (_ :: _, []) => GREATER
+ | _ => raise Match);
end;
-fun pretty_tree (Node (c, ts)) =
+fun make_tree tree_ops =
+ let
+ fun top [] = []
+ | top (xs :: _) = xs;
+
+ fun pop [] = []
+ | pop (_ :: xs) = xs;
+
+ fun make body stack ops =
+ (case ops of
+ Markup_Push :: rest => make [] (body :: stack) rest
+ | Markup_Pop markup :: rest => make (Markup (markup, body) :: top stack) (pop stack) rest
+ | Node_Op (id, ts) :: rest => make (Node (id, make [] [] ts) :: body) stack rest
+ | Tip_Op tok :: rest => make (Tip tok :: body) stack rest
+ | [] => if null stack then body else raise Fail "Pending markup blocks");
+ in (case make [] [] tree_ops of [t] => SOME t | _ => NONE) end;
+
+fun pretty_tree (Markup (_, ts)) = maps pretty_tree ts
+ | pretty_tree (Node (c, ts)) =
[Pretty.enclose "(" ")" (Pretty.breaks (Pretty.quote (Pretty.str c) :: maps pretty_tree ts))]
| pretty_tree (Tip tok) =
if Lexicon.valued_token tok then [Pretty.str (Lexicon.str_of_token tok)] else [];
@@ -546,24 +570,24 @@
(* output *)
-abstype output = Output of int * tree list
+abstype output = Output of int * tree_op list
with
val empty_output = Output (0, []);
-fun token_output tok (Output (n, ts)) = Output (n + 1, Tip tok :: ts);
+fun token_output tok (Output (n, ts)) = Output (n + 1, Tip_Op tok :: ts);
-fun node_output id (Output (n, ts)) = Output (n, [Node (id, rev ts)]);
+fun node_output id (Output (n, ts)) = Output (n, [Node_Op (id, ts)]);
+
+fun push_output (Output (n, ts)) = Output (n, Markup_Push :: ts);
+fun pop_output markup (Output (n, ts)) = Output (n, Markup_Pop markup :: ts);
fun append_output (Output (m, ss)) (Output (n, ts)) = Output (m + n, ss @ ts);
val output_ord = pointer_eq_ord (fn (Output (m, ss), Output (n, tt)) =>
- (case int_ord (m, n) of
- EQUAL => dict_ord tree_ord (ss, tt)
- | ord => ord));
+ (case int_ord (m, n) of EQUAL => tree_ops_ord (ss, tt) | ord => ord));
-fun output_result (Output (_, [t])) = SOME t
- | output_result _ = NONE;
+fun output_tree (Output (_, ts)) = make_tree ts;
end;
@@ -599,11 +623,9 @@
fun get_states A min max =
let
- fun head_nonterm (Nonterminal (B, p) :: _) = A = B andalso upto_prec min max p
- | head_nonterm (Markup (_, []) :: rest) = head_nonterm rest
- | head_nonterm (Markup (_, body) :: _) = head_nonterm body
- | head_nonterm _ = false;
- in filter (fn (_, ss, _): state => head_nonterm ss) end;
+ fun ok (Nonterminal (B, p) :: _) = A = B andalso upto_prec min max p
+ | ok _ = false;
+ in filter (fn (_, ss, _): state => ok ss) end;
fun movedot_nonterm out' (info, Nonterminal _ :: sa, out) : state =
(info, sa, append_output out' out);
@@ -628,8 +650,7 @@
fun process _ [] (Si, Sii) = (Si, Sii)
| process used ((S as (info, symbs, out)) :: states) (Si, Sii) =
(case symbs of
- Markup (_, body) :: sa => process used ((info, body @ sa, out) :: states) (Si, Sii)
- | Nonterminal (nt, min_prec) :: sa =>
+ Nonterminal (nt, min_prec) :: sa =>
let (*predictor operation*)
fun movedot_lambda (out', k) =
if min_prec <= k then cons (info, sa, append_output out' out) else I;
@@ -657,10 +678,12 @@
else (*move dot*)
let
val out' =
- if Lexicon.valued_token c orelse id <> "" then token_output c out
- else out;
+ if Lexicon.valued_token c orelse id <> ""
+ then token_output c out else out;
in (info, sa, out') :: Sii end;
in process used states (S :: Si, Sii') end
+ | Bg markup :: sa => process used ((info, sa, pop_output markup out) :: states) (Si, Sii)
+ | En :: sa => process used ((info, sa, push_output out) :: states) (Si, Sii)
| [] => (*completer operation*)
let
val (A, p, id, j) = info;
@@ -723,7 +746,7 @@
val result =
produce gram stateset 0 Lexicon.dummy input
- |> map_filter (fn (_, _, out) => output_result out);
+ |> map_filter (output_tree o #3);
in if null result then raise Fail "Inner syntax: no parse trees" else result end;
end;