# HG changeset patch # User clasohm # Date 767011433 -7200 # Node ID 2fda15dd1e0f0259f29dcf676318ad2f156e752f # Parent 92586a9786487125e0314b038d9905942338d16e changed the way a grammar is generated to allow the new parser to work; also made a lot of changes in parser.ML and minor ones elsewhere diff -r 92586a978648 -r 2fda15dd1e0f src/Pure/Syntax/ROOT.ML --- a/src/Pure/Syntax/ROOT.ML Fri Apr 15 18:43:21 1994 +0200 +++ b/src/Pure/Syntax/ROOT.ML Fri Apr 22 12:43:53 1994 +0200 @@ -10,7 +10,7 @@ use "ast.ML"; use "syn_ext.ML"; use "parser.ML"; -use "earley0A.ML"; +(*use "earley0A.ML";*) use "type_ext.ML"; use "sextension.ML"; use "printer.ML"; @@ -23,13 +23,12 @@ structure SynExt = SynExtFun(structure Lexicon = Lexicon and Ast = Ast); structure Parser = ParserFun(structure Symtab = Symtab and Lexicon = Lexicon and SynExt = SynExt); -structure Earley = EarleyFun(structure Symtab = Symtab and Lexicon = Lexicon - and SynExt = SynExt); +(*structure Earley = EarleyFun(structure Symtab = Symtab and Lexicon = Lexicon + and SynExt = SynExt);*) structure TypeExt = TypeExtFun(structure Lexicon = Lexicon and SynExt = SynExt); -structure SExtension = SExtensionFun(Earley); +structure SExtension = SExtensionFun(Parser); structure Printer = PrinterFun(structure Symtab = Symtab and TypeExt = TypeExt and SExtension = SExtension); structure Syntax = SyntaxFun(structure Symtab = Symtab and TypeExt = TypeExt and SExtension = SExtension and Printer = Printer); structure BasicSyntax: BASIC_SYNTAX = Syntax; - diff -r 92586a978648 -r 2fda15dd1e0f src/Pure/Syntax/earley0A.ML --- a/src/Pure/Syntax/earley0A.ML Fri Apr 15 18:43:21 1994 +0200 +++ b/src/Pure/Syntax/earley0A.ML Fri Apr 22 12:43:53 1994 +0200 @@ -18,7 +18,7 @@ datatype parsetree = Node of string * parsetree list | Tip of token - val parse: gram -> string -> token list -> parsetree + val parse: gram -> string -> token list -> parsetree list end end; @@ -414,7 +414,7 @@ fun syn_err toks = error ("Syntax error at: " ^ quote (space_implode " " (map str_of_token toks))); -fun parse ((_, _, (tab, opLA, rchA)):gram) (root:string) (tl: token list): parsetree = +fun parse ((_, _, (tab, opLA, rchA)):gram) (root:string) (tl: token list): parsetree list = let val tl' = tl; val state_sets = mt_states(len tl' + 1); @@ -455,7 +455,7 @@ lr (tl', 0, s0, EndToken(*dummy*)); (*print_stat state_sets;*) let val St(_, _, _, _, [pt]) = fst_state(sub(state_sets, len tl')) - in pt end + in [pt] end end; end; diff -r 92586a978648 -r 2fda15dd1e0f src/Pure/Syntax/lexicon.ML --- a/src/Pure/Syntax/lexicon.ML Fri Apr 15 18:43:21 1994 +0200 +++ b/src/Pure/Syntax/lexicon.ML Fri Apr 22 12:43:53 1994 +0200 @@ -40,7 +40,7 @@ include SCANNER include LEXICON0 val is_xid: string -> bool - val is_tfree: string -> bool + val is_tid: string -> bool type lexicon datatype token = Token of string | @@ -51,13 +51,14 @@ EndToken val id: string val var: string - val tfree: string + val tid: string val tvar: string val terminals: string list val is_terminal: string -> bool val str_of_token: token -> string val display_token: token -> string val matching_tokens: token * token -> bool + val token_assoc: (token option * 'a list) list * token -> 'a list val valued_token: token -> bool val predef_term: string -> token option val dest_lexicon: lexicon -> string list @@ -83,7 +84,7 @@ "_" :: cs => is_ident cs | cs => is_ident cs); -fun is_tfree s = +fun is_tid s = (case explode s of "'" :: cs => is_ident cs | _ => false); @@ -118,10 +119,10 @@ val id = "id"; val var = "var"; -val tfree = "tfree"; +val tid = "tid"; val tvar = "tvar"; -val terminals = [id, var, tfree, tvar]; +val terminals = [id, var, tid, tvar]; fun is_terminal s = s mem terminals; @@ -141,7 +142,7 @@ fun display_token (Token s) = quote s | display_token (IdentSy s) = "id(" ^ s ^ ")" | display_token (VarSy s) = "var(" ^ s ^ ")" - | display_token (TFreeSy s) = "tfree(" ^ s ^ ")" + | display_token (TFreeSy s) = "tid(" ^ s ^ ")" | display_token (TVarSy s) = "tvar(" ^ s ^ ")" | display_token EndToken = ""; @@ -157,6 +158,17 @@ | matching_tokens _ = false; +(* this function is needed in parser.ML but is placed here + for better performance *) +fun token_assoc (list, key) = + let fun assoc [] = [] + | assoc ((keyi, xi) :: pairs) = + if is_none keyi orelse matching_tokens (the keyi, key) then + (assoc pairs) @ xi + else assoc pairs; + in assoc list end; + + (* valued_token *) fun valued_token (Token _) = false @@ -172,7 +184,7 @@ fun predef_term name = if name = id then Some (IdentSy name) else if name = var then Some (VarSy name) - else if name = tfree then Some (TFreeSy name) + else if name = tid then Some (TFreeSy name) else if name = tvar then Some (TVarSy name) else None; diff -r 92586a978648 -r 2fda15dd1e0f src/Pure/Syntax/parser.ML --- a/src/Pure/Syntax/parser.ML Fri Apr 15 18:43:21 1994 +0200 +++ b/src/Pure/Syntax/parser.ML Fri Apr 22 12:43:53 1994 +0200 @@ -5,8 +5,6 @@ Isabelle's main parser (used for terms and typs). TODO: - fix ambiguous grammar problem - ~1 --> chain_pri improve syntax error extend_gram: remove 'roots' arg *) @@ -24,12 +22,12 @@ datatype parsetree = Node of string * parsetree list | Tip of token - val parse: gram -> string -> token list -> parsetree + val parse: gram -> string -> token list -> parsetree list end end; functor ParserFun(structure Symtab: SYMTAB and Lexicon: LEXICON - and SynExt: SYN_EXT)(*: PARSER *) = (* FIXME *) + and SynExt: SYN_EXT) = struct structure Pretty = SynExt.Ast.Pretty; @@ -44,11 +42,222 @@ Terminal of token | Nonterminal of string * int; -datatype gram = - Gram of (string * (symb list * string * int)) list - * (symb list * string * int) list Symtab.table; +datatype refsymb = Term of token | Nonterm of rhss_ref * int + (*reference to production list instead of name*) +and gram = Gram of (string * (symb list * string * int)) list * + (string * rhss_ref) list +withtype rhss_ref = (token option * (refsymb list * string * int) list) list ref + (*lookahead table: token and productions*) + +(* convert productions to reference grammar with lookaheads and eliminate + chain productions *) +fun mk_gram prods = + let (*get reference on list of all possible rhss for nonterminal lhs + (if it doesn't exist a new one is created and added to the nonterminal + list)*) + fun get_rhss ref_prods lhs = + case assoc (ref_prods, lhs) of + None => + let val l = ref [(None, [])] + in (l, (lhs, l) :: ref_prods) end + | Some l => (l, ref_prods); + + (*convert symb list to refsymb list*) + fun mk_refsymbs ref_prods [] rs = (rs, ref_prods) + | mk_refsymbs ref_prods (Terminal tk :: symbs) rs = + mk_refsymbs ref_prods symbs (rs @ [Term tk]) + | mk_refsymbs ref_prods (Nonterminal (name, prec) :: symbs) rs = + let val (rhss, ref_prods') = get_rhss ref_prods name + in mk_refsymbs ref_prods' symbs (rs @ [Nonterm (rhss, prec)]) + end; + + (*convert prod list to (string * rhss_ref) list + without computing lookaheads*) + fun mk_ref_gram [] ref_prods = ref_prods + | mk_ref_gram ((lhs, (rhs, name, prec)) :: ps) ref_prods = + let val (rhs', ref_prods') = get_rhss ref_prods lhs; + val (dummy, rhss) = hd (!rhs'); + val (ref_symbs, ref_prods'') = mk_refsymbs ref_prods' rhs []; + in rhs' := [(dummy, (ref_symbs, name, prec) :: rhss)]; + mk_ref_gram ps ref_prods'' + end; + + (*eliminate chain productions*) + fun elim_chain ref_gram = + let (*make a list of pairs representing chain productions and delete + these productions*) + fun list_chain [] = [] + | list_chain ((_, rhss_ref) :: ps) = + let fun lists [] new_rhss chains = (new_rhss, chains) + | lists (([Nonterm (id2, ~1)], _, ~1) :: rs) + new_rhss chains = + lists rs new_rhss ((rhss_ref, id2) :: chains) + | lists (rhs :: rs) new_rhss chains = + lists rs (rhs :: new_rhss) chains; + + val (dummy, rhss) = hd (!rhss_ref); + + val (new_rhss, chains) = lists rhss [] []; + in rhss_ref := [(dummy, new_rhss)]; + chains @ (list_chain ps) + end; + + (*convert a list of pairs to an association list*) + fun compress chains = + let fun doit [] result = result + | doit ((id1, id2) :: ps) result = + doit ps + (overwrite (result, (id1, id2 :: (assocs result id1)))); + in doit chains [] end; + + (*replace reference by list of rhss in chain pairs*) + fun unref (id1, ids) = + let fun unref1 [] = [] + | unref1 (id :: ids) = + let val (_, rhss) = hd (!id); + in rhss @ (unref1 ids) end; + in (id1, unref1 ids) end; + + val chain_pairs = + map unref (transitive_closure (compress (list_chain ref_gram))); + + (*add new rhss to productions*) + fun mk_chain (rhss_ref, rhss) = + let val (dummy, old_rhss) = hd (!rhss_ref); + in rhss_ref := [(dummy, old_rhss @ rhss)] end; + in map mk_chain chain_pairs; + ref_gram + end; + + val ref_gram = elim_chain (mk_ref_gram prods []); -fun mk_gram prods = Gram (prods, Symtab.make_multi prods); + (*make a list of all lambda NTs + (i.e. nonterminals that can produce lambda*) + val lambdas = + let fun lambda [] result = result + | lambda ((_, rhss_ref) :: nts) result = + if rhss_ref mem result then + lambda nts result + else + let (*test if a list of rhss contains a production + consisting only of lambda NTs*) + fun only_lambdas _ [] = [] + | only_lambdas result ((_, rhss_ref) :: ps) = + let fun only (symbs, _, _) = + forall (fn (Nonterm (id, _)) => id mem result + | (Term _) => false) symbs; + + val (_, rhss) = hd (!rhss_ref); + in if not (rhss_ref mem result) andalso + exists only rhss then + rhss_ref :: (only_lambdas result ps) + else + only_lambdas result ps + end; + + (*search for NTs that can be produced by a list of + lambda NTs*) + fun produced_by [] result = result + | produced_by (rhss_ref :: rhss_refs) result = + let val new_lambdas = + only_lambdas result ref_gram; + in produced_by (rhss_refs union new_lambdas) + (result @ new_lambdas) + end; + + val (_, rhss) = hd (!rhss_ref); + in if exists (fn (symbs, _, _) => null symbs) rhss + then lambda nts + (produced_by [rhss_ref] (rhss_ref :: result)) + else lambda nts result + end; + in lambda ref_gram [] end; + + (*list all nonterminals on which the lookahead depends (due to lambda + NTs this can be more than one) + and report if there is a terminal at the 'start'*) + fun rhss_start [] skipped = (None, skipped) + | rhss_start (Term tk :: _) skipped = (Some tk, skipped) + | rhss_start (Nonterm (rhss_ref, _) :: rest) skipped = + if rhss_ref mem lambdas then + rhss_start rest (rhss_ref ins skipped) + else + (None, rhss_ref ins skipped); + + (*list all terminals that can start the given rhss*) + fun look_rhss starts rhss_ref = + let fun look [] _ = [] + | look ((symbs, _, _) :: todos) done = + let val (start_token, skipped) = rhss_start symbs []; + + (*process all nonterminals on which the lookahead + depends and build the new todo and done lists for + the look function*) + fun look2 [] todos = look todos (done union skipped) + | look2 (rhss_ref :: ls) todos = + if rhss_ref mem done then look2 ls todos + else case assoc (starts, rhss_ref) of + Some tks => tks union (look2 ls todos) + | None => let val (_, rhss) = hd (!rhss_ref); + in look2 ls (rhss union todos) end; + in case start_token of + Some tk => start_token ins (look2 skipped todos) + | None => look2 skipped todos + end; + + val (_, rhss) = hd (!rhss_ref); + in look rhss [rhss_ref] end; + + (*make a table that contains all possible starting terminals + for each nonterminal*) + fun mk_starts [] starts = starts + | mk_starts ((_, rhss_ref) :: ps) starts = + mk_starts ps ((rhss_ref, look_rhss starts rhss_ref) :: starts); + + val starts = mk_starts ref_gram []; + + (*add list of allowed starting tokens to productions*) + fun mk_lookahead (_, rhss_ref) = + let (*add item to lookahead list (a list containing pairs of token and + rhss that can be started with this token*) + fun add_start _ [] table = table + | add_start new_rhs (token :: tks) table = + let fun add [] = + [(token, [new_rhs])] + | add ((tk, rhss) :: ss) = + if token = tk then + (tk, new_rhs :: rhss) :: ss + else + (tk, rhss) :: (add ss); + in add_start new_rhs tks (add table) end; + + (*combine all lookaheads of a list of nonterminals*) + fun combine_starts [] = [] + | combine_starts (rhss_ref :: ps) = + let val Some tks = assoc (starts, rhss_ref) + in tks union combine_starts ps end; + + (*get lookahead for a rhs and update lhs' lookahead list*) + fun look_rhss [] table = table + | look_rhss ((rhs as (symbs, id, prec)) :: rs) table = + let val (start_token, skipped) = rhss_start symbs []; + val starts = case start_token of + Some tk => Some tk + ins (combine_starts skipped) + | None => if skipped = [] + orelse forall (fn id => id mem lambdas) skipped then + (*lambda production?*) + [None] + else + combine_starts skipped; + in look_rhss rs (add_start rhs starts table) end; + + val (_, rhss) = hd (!rhss_ref); + in rhss_ref := look_rhss rhss [] end; + + in map mk_lookahead ref_gram; + Gram (prods, ref_gram) + end; (* empty, extend, merge grams *) @@ -111,41 +320,39 @@ Tip of token; type state = - string * int - * parsetree list (*before point*) - * symb list (*after point*) - * string * int; + rhss_ref * int (*lhs: identification and production precedence*) + * parsetree list (*already parsed nonterminals on rhs*) + * refsymb list (*rest of rhs*) + * string (*name of production*) + * int; (*index for previous state list*) type earleystate = state list Array.array; - -fun get_prods tab lhs pred = - filter pred (Symtab.lookup_multi (tab, lhs)); +(*Get all rhss with precedence >= minPrec*) +fun getRHS minPrec = filter (fn (_, _, prec:int) => prec >= minPrec); -fun getProductions tab A i = - get_prods tab A (fn (_, _, j) => j >= i orelse j = ~1); - -fun getProductions' tab A i k = - get_prods tab A (fn (_, _, j) => (j >= i andalso j < k) orelse j = ~1); - - +(*Get all rhss with precedence >= minPrec and < maxPrec*) +fun getRHS' minPrec maxPrec = + filter (fn (_, _, prec:int) => prec >= minPrec andalso prec < maxPrec); -fun mkState i jj A ([Nonterminal (B, ~1)], id, ~1) = - (A, max_pri, [], [Nonterminal (B, jj)], id, i) - | mkState i jj A (ss, id, j) = (A, j, [], ss, id, i); - - +(*Make states using a list of rhss*) +fun mkStates i minPrec lhsID rhss = + let fun mkState (rhs, id, prodPrec) = (lhsID, prodPrec, [], rhs, id, i); + in map mkState rhss end; + +(*Add parse tree to list and eliminate duplicates + saving the maximum precedence*) +fun conc (t, prec:int) [] = (None, [(t, prec)]) + | conc (t, prec) ((t', prec') :: ts) = + if t = t' then + (Some prec', if prec' >= prec then (t', prec') :: ts + else (t, prec) :: ts) + else + let val (n, ts') = conc (t, prec) ts + in (n, (t', prec') :: ts') end; -fun conc (t, k:int) [] = (None, [(t, k)]) - | conc (t, k) ((t', k') :: ts) = - if t = t' then - (Some k', if k' >= k then (t', k') :: ts else (t, k) :: ts) - else - let val (n, ts') = conc (t, k) ts - in (n, (t', k') :: ts') end; - - +(*Update entry in used*) fun update_tree ((B, (i, ts)) :: used) (A, t) = if A = B then let val (n, ts') = conc t ts @@ -154,114 +361,117 @@ let val (used', n) = update_tree used (A, t) in ((B, (i, ts)) :: used', n) end; - -fun update_index ((B, (i, ts)) :: used) (A, j) = - if A = B then (A, (j, ts)) :: used - else (B, (i, ts)) :: (update_index used (A, j)); +(*Replace entry in used*) +fun update_index (A, prec) used = + let fun update((hd as (B, (_, ts))) :: used, used') = + if A = B + then used' @ ((A, (prec, ts)) :: used) + else update (used, hd :: used') + in update (used, []) end; - - -fun getS A i Si = +fun getS A maxPrec Si = filter - (fn (_, _, _, Nonterminal (B, j) :: _, _, _) => A = B andalso j <= i + (fn (_, _, _, Nonterm (B, prec) :: _, _, _) + => A = B andalso prec <= maxPrec | _ => false) Si; -fun getS' A k n Si = +fun getS' A maxPrec minPrec Si = filter - (fn (_, _, _, Nonterminal (B, j) :: _, _, _) => A = B andalso j <= k andalso j > n + (fn (_, _, _, Nonterm (B, prec) :: _, _, _) + => A = B andalso prec > minPrec andalso prec <= maxPrec | _ => false) Si; -fun getStates Estate i ii A k = +fun getStates Estate i ii A maxPrec = filter - (fn (_, _, _, Nonterminal (B, j) :: _, _, _) => A = B andalso j <= k + (fn (_, _, _, Nonterm (B, prec) :: _, _, _) + => A = B andalso prec <= maxPrec | _ => false) (Array.sub (Estate, ii)); -(* MMW *)(* FIXME *) -fun movedot_term (A, j, ts, Terminal a :: sa, id, i) c = +fun movedot_term (A, j, ts, Term a :: sa, id, i) c = if valued_token c then (A, j, (ts @ [Tip c]), sa, id, i) else (A, j, ts, sa, id, i); -fun movedot_nonterm ts (A, j, tss, Nonterminal (B, k) :: sa, id, i) = +fun movedot_nonterm ts (A, j, tss, Nonterm _ :: sa, id, i) = (A, j, tss @ ts, sa, id, i); fun movedot_lambda _ [] = [] - | movedot_lambda (B, j, tss, Nonterminal (A, k) :: sa, id, i) ((t, ki) :: ts) = + | movedot_lambda (B, j, tss, Nonterm (A, k) :: sa, id, i) ((t, ki) :: ts) = if k <= ki then (B, j, tss @ t, sa, id, i) :: - movedot_lambda (B, j, tss, Nonterminal (A, k) :: sa, id, i) ts - else movedot_lambda (B, j, tss, Nonterminal (A, k) :: sa, id, i) ts; + movedot_lambda (B, j, tss, Nonterm (A, k) :: sa, id, i) ts + else movedot_lambda (B, j, tss, Nonterm (A, k) :: sa, id, i) ts; -fun PROCESSS Estate grammar i c states = +fun PROCESSS Estate i c states = let +fun get_lookahead rhss_ref = token_assoc (!rhss_ref, c); + fun processS used [] (Si, Sii) = (Si, Sii) | processS used (S :: States) (Si, Sii) = (case S of - (_, _, _, Nonterminal (A, j) :: _, _, _) => - let - val (used_neu, States_neu) = - (case assoc (used, A) of - Some (k, l) => (*A gehoert zu used*) - if k <= j (*Prioritaet wurde behandelt*) - then (used, movedot_lambda S l) - else (*Prioritaet wurde noch nicht behandelt*) - let - val L = getProductions' grammar A j k; - val States' = map (mkState i j A) L; - in - (update_index used (A, j), States' @ movedot_lambda S l) + (_, _, _, Nonterm (rhss_ref, minPrec) :: _, _, _) => + let (*predictor operation*) + val (used_new, States_new) = + (case assoc (used, rhss_ref) of + Some (usedPrec, l) => (*nonterminal has been processed*) + if usedPrec <= minPrec then + (*wanted precedence has been processed*) + (used, movedot_lambda S l) + else (*wanted precedence hasn't been parsed yet*) + let val rhss = get_lookahead rhss_ref; + val States' = mkStates i minPrec rhss_ref + (getRHS' minPrec usedPrec rhss); + in (update_index (rhss_ref, minPrec) used, + movedot_lambda S l @ States') end - | None => (*A gehoert nicht zu used*) - let - val L = getProductions grammar A j; - val States' = map (mkState i j A) L; - in - ((A, (j, [])) :: used, States') - end) + | None => (*nonterminal is parsed for the first time*) + let val rhss = get_lookahead rhss_ref; + val States' = mkStates i minPrec rhss_ref + (getRHS minPrec rhss); + in ((rhss_ref, (minPrec, [])) :: used, States') end) in - processS used_neu (States @ States_neu) (S :: Si, Sii) + processS used_new (States_new @ States) (S :: Si, Sii) end - | (_, _, _, Terminal a :: _, _, _) => + | (_, _, _, Term a :: _, _, _) => (*scanner operation*) processS used States (S :: Si, if matching_tokens (a, c) then movedot_term S c :: Sii else Sii) - (* MMW *)(* FIXME *) - | (A, k, ts, [], id, j) => + | (A, prec, ts, [], id, j) => (*completer operation*) let val tt = if id = "" then ts else [Node (id, ts)] in - if j = i then + if j = i then (*lambda production?*) let - val (used', O) = update_tree used (A, (tt, k)); + val (used', O) = update_tree used (A, (tt, prec)); in (case O of None => let - val Slist = getS A k Si; + val Slist = getS A prec Si; val States' = map (movedot_nonterm tt) Slist; in - processS used' (States @ States') (S :: Si, Sii) + processS used' (States' @ States) (S :: Si, Sii) end | Some n => - if n >= k then + if n >= prec then processS used' States (S :: Si, Sii) else let - val Slist = getS' A k n Si; + val Slist = getS' A prec n Si; val States' = map (movedot_nonterm tt) Slist; in - processS used' (States @ States') (S :: Si, Sii) + processS used' (States' @ States) (S :: Si, Sii) end) - end + end else processS used - (States @ map (movedot_nonterm tt) (getStates Estate i j A k)) + (map (movedot_nonterm tt) (getStates Estate i j A prec) @ States) (S :: Si, Sii) end) in @@ -273,7 +483,7 @@ fun syntax_error toks = error ("Syntax error at: " ^ quote (space_implode " " (map str_of_token toks))); -fun produce grammar stateset i indata = +fun produce stateset i indata = (case Array.sub (stateset, i) of [] => syntax_error indata (* MMW *)(* FIXME *) | s => @@ -281,46 +491,39 @@ [] => Array.sub (stateset, i) | c :: cs => let - val (si, sii) = PROCESSS stateset grammar i c s; + val (si, sii) = PROCESSS stateset i c s; in Array.update (stateset, i, si); Array.update (stateset, i + 1, sii); - produce grammar stateset (i + 1) cs + produce stateset (i + 1) cs end)); -(*(* FIXME new *) val get_trees = mapfilter (fn (_, _, [pt], _, _, _) => Some pt | _ => None); -*) -fun get_trees [] = [] - | get_trees ((_, _, pt_lst, _, _, _) :: stateset) = - let val l = get_trees stateset - in case pt_lst of - [ptree] => ptree :: l - | _ => l - end; fun earley grammar startsymbol indata = let - val S0 = [("S'", 0, [], [Nonterminal (startsymbol, 0), Terminal EndToken], "", 0)]; - val s = length indata + 1; (* MMW *)(* FIXME was .. + 2 *) + val rhss_ref = case assoc (grammar, startsymbol) of + Some r => r + | None => error ("parse: Unknown startsymbol " ^ + quote startsymbol); + val S0 = [(ref [], 0, [], [Nonterm (rhss_ref, 0), Term EndToken], "", 0)]; + val s = length indata + 1; val Estate = Array.array (s, []); in Array.update (Estate, 0, S0); let - val l = produce grammar Estate 0 indata; (* MMW *)(* FIXME was .. @ [EndToken] *) + val l = produce Estate 0 indata; val p_trees = get_trees l; in p_trees end end; -(* FIXME demo *) fun parse (Gram (_, prod_tab)) start toks = (case earley prod_tab start toks of [] => sys_error "parse: no parse trees" - | pt :: _ => pt); - + | pts => pts); end; diff -r 92586a978648 -r 2fda15dd1e0f src/Pure/Syntax/sextension.ML --- a/src/Pure/Syntax/sextension.ML Fri Apr 15 18:43:21 1994 +0200 +++ b/src/Pure/Syntax/sextension.ML Fri Apr 22 12:43:53 1994 +0200 @@ -66,7 +66,7 @@ val xrules_of: sext -> xrule list val abs_tr': term -> term val appl_ast_tr': ast * ast list -> ast - val syn_ext_of_sext: string list -> string list -> (string -> typ) -> sext -> syn_ext + val syn_ext_of_sext: string list -> string list -> string list -> (string -> typ) -> sext -> syn_ext val pt_to_ast: (string -> (ast list -> ast) option) -> parsetree -> ast val ast_to_term: (string -> (term list -> term) option) -> ast -> term val apropC: string @@ -383,7 +383,7 @@ fun infix_name sy = "op " ^ strip_esc sy; -fun syn_ext_of_sext roots xconsts read_typ sext = +fun syn_ext_of_sext all_roots new_roots xconsts read_typ sext = let val {mixfix, parse_ast_translation, parse_translation, print_translation, print_ast_translation, ...} = sext_components sext; @@ -421,7 +421,7 @@ val bparses = map mk_binder_tr binders; val bprints = map (mk_binder_tr' o swap) binders; in - syn_ext roots mfix (distinct (filter is_xid xconsts)) + syn_ext all_roots new_roots mfix (distinct (filter is_xid xconsts)) (parse_ast_translation, bparses @ parse_translation, bprints @ print_translation, diff -r 92586a978648 -r 2fda15dd1e0f src/Pure/Syntax/syn_ext.ML --- a/src/Pure/Syntax/syn_ext.ML Fri Apr 15 18:43:21 1994 +0200 +++ b/src/Pure/Syntax/syn_ext.ML Fri Apr 22 12:43:53 1994 +0200 @@ -20,7 +20,7 @@ val args: string val idT: typ val varT: typ - val tfreeT: typ + val tidT: typ val tvarT: typ val applC: string val typ_to_nonterm: typ -> string @@ -45,12 +45,12 @@ print_translation: (string * (term list -> term)) list, print_rules: (ast * ast) list, print_ast_translation: (string * (ast list -> ast)) list} - val syn_ext: string list -> mfix list -> string list -> + val syn_ext: string list -> string list -> mfix list -> string list -> (string * (ast list -> ast)) list * (string * (term list -> term)) list * (string * (term list -> term)) list * (string * (ast list -> ast)) list -> (ast * ast) list * (ast * ast) list -> syn_ext - val syn_ext_rules: (ast * ast) list * (ast * ast) list -> syn_ext - val syn_ext_roots: string list -> syn_ext + val syn_ext_rules: string list -> (ast * ast) list * (ast * ast) list -> syn_ext + val syn_ext_roots: string list -> string list -> syn_ext end end; @@ -74,16 +74,16 @@ val args = "args"; val argsT = Type (args, []); -val funT = Type ("fun", []); +val typeT = Type ("type", []); -val typeT = Type ("type", []); +val funT = Type ("fun", []); (* terminals *) val idT = Type (id, []); val varT = Type (var, []); -val tfreeT = Type (tfree, []); +val tidT = Type (tid, []); val tvarT = Type (tvar, []); @@ -240,7 +240,6 @@ end; - (** datatype syn_ext **) datatype syn_ext = @@ -258,39 +257,85 @@ (* syn_ext *) -fun syn_ext roots mfixes consts trfuns rules = +fun syn_ext all_roots new_roots mfixes consts trfuns rules = let - fun descend (from, to) = Mfix ("_", to --> from, "", [0], 0); - - fun parents T = Mfix ("'(_')", T --> T, "", [0], max_pri); - - fun mkappl T = - Mfix ("(1_/(1'(_')))", [funT, argsT] ---> T, applC, [max_pri, 0], max_pri); - - fun mkid T = Mfix ("_", idT --> T, "", [], max_pri); - - fun mkvar T = Mfix ("_", varT --> T, "", [], max_pri); - - fun constrain T = - Mfix ("_::_", [T, typeT] ---> T, constrainC, [max_pri, 0], max_pri - 1); - - val (parse_ast_translation, parse_translation, print_translation, print_ast_translation) = trfuns; val (parse_rules, print_rules) = rules; - val Troots = map (apr (Type, [])) roots; - val Troots' = Troots \\ [typeT, propT, logicT]; - val mfixes' = mfixes @ map parents (Troots \ logicT) @ map mkappl Troots' @ + val Troots = map (apr (Type, [])) new_roots; + val Troots' = Troots \\ [typeT, propT]; + + fun change_name T ext = + let val Type (name, ts) = T + in Type (space_implode "" [name, ext], ts) end; + + (* Append "_H" to lhs if production is not a copy or chain production *) + fun hide_xprod roots (XProd (lhs, symbs, const, pri)) = + let fun is_delim (Delim _) = true + | is_delim _ = false + in if const <> "" andalso lhs mem roots andalso exists is_delim symbs then + XProd (space_implode "" [lhs, "_H"], symbs, const, pri) + else XProd (lhs, symbs, const, pri) + end; + + (* Make descend production and append "_H" to rhs nonterminal *) + fun descend_right (from, to) = + Mfix ("_", change_name to "_H" --> from, "", [0], 0); + + (* Make descend production and append "_H" to lhs *) + fun descend_left (from, to) = + Mfix ("_", to --> change_name from "_H", "", [0], 0); + + (* Make descend production and append "_A" to lhs *) + fun descend1 (from, to) = + Mfix ("_", to --> change_name from "_A", "", [0], 0); + + (* Make parentheses production for 'hidden' and 'automatic' nonterminal *) + fun parents T = + if T = typeT then + [Mfix ("'(_')", T --> T, "", [0], max_pri)] + else + [Mfix ("'(_')", change_name T "_H" --> change_name T "_H", "", [0], max_pri), + Mfix ("'(_')", change_name T "_A" --> change_name T "_A", "", [0], max_pri)]; + + fun mkappl T = + Mfix ("(1_/(1'(_')))", [funT, argsT] ---> change_name T "_A", applC, + [max_pri, 0], max_pri); + + fun mkid T = + Mfix ("_", idT --> change_name T "_A", "", [], max_pri); + + fun mkvar T = + Mfix ("_", varT --> change_name T "_A", "", [], max_pri); + + fun constrain T = + Mfix ("_::_", [T, typeT] ---> change_name T "_A", constrainC, + [max_pri, 0], max_pri - 1) + + fun unhide T = + if T <> logicT then + [Mfix ("_", change_name T "_H" --> T, "", [0], 0), + Mfix ("_", change_name T "_A" --> T, "", [0], 0)] + else + [Mfix ("_", change_name T "_A" --> T, "", [0], 0)]; + + val mfixes' = flat (map parents Troots) @ map mkappl Troots' @ map mkid Troots' @ map mkvar Troots' @ map constrain Troots' @ - map (apl (logicT, descend)) (Troots \\ [typeT, logicT]) @ - map (apr (descend, logic1T)) Troots'; + map (apl (logicT, descend_right)) (Troots \\ [logicT, typeT]) @ + map (apr (descend1, logic1T)) (Troots') @ + flat (map unhide (Troots \\ [typeT])); val mfix_consts = - distinct (filter is_xid (map (fn (Mfix (_, _, c, _, _)) => c) mfixes')); + distinct (filter is_xid (map (fn (Mfix (_, _, c, _, _)) => c) + (mfixes @ mfixes'))); + val xprods = map mfix_to_xprod mfixes; + val xprods' = map mfix_to_xprod mfixes'; in SynExt { - roots = roots, - xprods = map mfix_to_xprod mfixes', + roots = new_roots, + xprods = (map (hide_xprod (all_roots \\ ["logic", "type"])) xprods) + @ xprods', (* hide only productions that weren't created + automatically *) consts = consts union mfix_consts, parse_ast_translation = parse_ast_translation, parse_rules = parse_rules, @@ -303,11 +348,11 @@ (* syn_ext_rules, syn_ext_roots *) -fun syn_ext_rules rules = - syn_ext [] [] [] ([], [], [], []) rules; +fun syn_ext_rules roots rules = + syn_ext roots [] [] [] ([], [], [], []) rules; -fun syn_ext_roots roots = - syn_ext roots [] [] ([], [], [], []) ([], []); +fun syn_ext_roots all_roots new_roots = + syn_ext all_roots new_roots [] [] ([], [], [], []) ([], []); end; diff -r 92586a978648 -r 2fda15dd1e0f src/Pure/Syntax/syntax.ML --- a/src/Pure/Syntax/syntax.ML Fri Apr 15 18:43:21 1994 +0200 +++ b/src/Pure/Syntax/syntax.ML Fri Apr 22 12:43:53 1994 +0200 @@ -262,15 +262,15 @@ val {lexicon, gram, parse_ast_trtab, parse_ruletab, ...} = tabs; val toks = tokenize lexicon false str; - val _ = writeln ("tokens: " ^ space_implode " " (map display_token toks)); + val _ = writeln ("tokens: " ^ space_implode " " (map display_token toks)) - val pt = parse gram root toks; - val raw_ast = pt_to_ast (K None) pt; - val _ = writeln ("raw: " ^ str_of_ast raw_ast); - - val pre_ast = pt_to_ast (lookup_trtab parse_ast_trtab) pt; - val _ = normalize true true (lookup_ruletab parse_ruletab) pre_ast; - in () end; + fun show pt = + let val raw_ast = pt_to_ast (K None) pt; + val _ = writeln ("raw: " ^ str_of_ast raw_ast); + val pre_ast = pt_to_ast (lookup_trtab parse_ast_trtab) pt; + val _ = normalize true true (lookup_ruletab parse_ruletab) pre_ast; + in () end + in seq show (parse gram root toks) end; (* read_ast *) @@ -278,9 +278,20 @@ fun read_ast (Syntax tabs) xids root str = let val {lexicon, gram, parse_ast_trtab, ...} = tabs; + val pts = parse gram root (tokenize lexicon xids str); + + fun show_pts pts = + let fun show pt = writeln (str_of_ast (pt_to_ast (K None) pt)) + in seq show pts end in - pt_to_ast (lookup_trtab parse_ast_trtab) - (parse gram root (tokenize lexicon xids str)) + if tl pts = [] then + pt_to_ast (lookup_trtab parse_ast_trtab) (hd pts) + else + (writeln ("Ambiguous input " ^ quote str); + writeln "produces the following parse trees:"; + show_pts pts; + writeln "Please disambiguate the grammar or your input."; + raise ERROR) end; @@ -376,10 +387,10 @@ val Syntax {roots, ...} = syn0; val syn1 = extend_syntax syn0 - (syn_ext_of_sext (all_roots \\ roots) xconsts read_ty sext); + (syn_ext_of_sext all_roots (all_roots \\ roots) xconsts read_ty sext); val syn2 = extend_syntax syn1 - (syn_ext_rules (read_xrules syn1 (xrules_of sext))); + (syn_ext_rules all_roots (read_xrules syn1 (xrules_of sext))); in syn2 end; @@ -392,7 +403,7 @@ (case all_roots \\ roots of [] => syn | new_roots => (writeln ("DEBUG new roots:" ^ commas new_roots); (* FIXME debug *) - extend_syntax syn (syn_ext_roots new_roots))) + extend_syntax syn (syn_ext_roots all_roots new_roots))) end; diff -r 92586a978648 -r 2fda15dd1e0f src/Pure/Syntax/type_ext.ML --- a/src/Pure/Syntax/type_ext.ML Fri Apr 15 18:43:21 1994 +0200 +++ b/src/Pure/Syntax/type_ext.ML Fri Apr 22 12:43:53 1994 +0200 @@ -65,7 +65,7 @@ | sort tm = raise_term "typ_of_term: bad encoding of sort" [tm]; fun typ (Free (x, _), scs) = - (if is_tfree x then TFree (x, []) else Type (x, []), scs) + (if is_tid x then TFree (x, []) else Type (x, []), scs) | typ (Var (xi, _), scs) = (TVar (xi, []), scs) | typ (Const ("_ofsort", _) $ Free (x, _) $ st, scs) = (TFree (x, []), put_sort scs (x, ~1) (sort st)) @@ -170,19 +170,19 @@ val typesT = Type ("types", []); val type_ext = syn_ext - [logic, "type"] - [Mfix ("_", tfreeT --> typeT, "", [], max_pri), + [logic, "type"] [logic, "type"] + [Mfix ("_", tidT --> typeT, "", [], max_pri), Mfix ("_", tvarT --> typeT, "", [], max_pri), Mfix ("_", idT --> typeT, "", [], max_pri), - Mfix ("_::_", [tfreeT, sortT] ---> typeT, "_ofsort", [max_pri, 0], max_pri), + Mfix ("_::_", [tidT, sortT] ---> typeT, "_ofsort", [max_pri, 0], max_pri), Mfix ("_::_", [tvarT, sortT] ---> typeT, "_ofsort", [max_pri, 0], max_pri), Mfix ("_", idT --> sortT, "", [], max_pri), Mfix ("{}", sortT, "_emptysort", [], max_pri), Mfix ("{_}", classesT --> sortT, "_sort", [], max_pri), Mfix ("_", idT --> classesT, "", [], max_pri), Mfix ("_,_", [idT, classesT] ---> classesT, "_classes", [], max_pri), - Mfix ("_ _", [typeT, idT] ---> typeT, "_tapp", [max_pri, 0], max_pri), (* FIXME ambiguous *) - Mfix ("((1'(_'))_)", [typesT, idT] ---> typeT, "_tappl", [], max_pri), (* FIXME ambiguous *) + Mfix ("_ _", [typeT, idT] ---> typeT, "_tapp", [max_pri, 0], max_pri), + Mfix ("((1'(_,/ _'))_)", [typeT, typesT, idT] ---> typeT, "_tappl", [], max_pri), Mfix ("_", typeT --> typesT, "", [], max_pri), Mfix ("_,/ _", [typeT, typesT] ---> typesT, "_types", [], max_pri), Mfix ("(_/ => _)", [typeT, typeT] ---> typeT, "fun", [1, 0], 0),