# HG changeset patch # User wenzelm # Date 1282750450 -7200 # Node ID f7688fd819a86d31350d40cc1fefd70f100c3329 # Parent 79d3cbfb4730a5a0ea1b2ab38372d9a9c62d2c12 some attempts to recover Isabelle/ML style from the depths of time; diff -r 79d3cbfb4730 -r f7688fd819a8 src/Pure/Syntax/parser.ML --- a/src/Pure/Syntax/parser.ML Wed Aug 25 16:13:55 2010 +0200 +++ b/src/Pure/Syntax/parser.ML Wed Aug 25 17:34:10 2010 +0200 @@ -25,362 +25,373 @@ (** datatype gram **) -type nt_tag = int; (*production for the NTs are stored in a vector - so we can identify NTs by their index*) - -datatype symb = Terminal of Lexicon.token - | Nonterminal of nt_tag * int; (*(tag, precedence)*) +(*production for the NTs are stored in a vector + so we can identify NTs by their index*) +type nt_tag = int; -type nt_gram = ((nt_tag list * Lexicon.token list) * - (Lexicon.token option * (symb list * string * int) list) list); - (*(([dependent_nts], [start_tokens]), - [(start_token, [(rhs, name, prio)])])*) - (*depent_nts is a list of all NTs whose lookahead - depends on this NT's lookahead*) +datatype symb = + Terminal of Lexicon.token +| Nonterminal of nt_tag * int; (*(tag, precedence)*) + +type nt_gram = + ((nt_tag list * Lexicon.token list) * + (Lexicon.token option * (symb list * string * int) list) list); + (*(([dependent_nts], [start_tokens]), [(start_token, [(rhs, name, prio)])])*) + (*depent_nts is a list of all NTs whose lookahead depends on this NT's lookahead*) datatype gram = - Gram of {nt_count: int, prod_count: int, - tags: nt_tag Symtab.table, - chains: (nt_tag * nt_tag list) list, (*[(to, [from])]*) - lambdas: nt_tag list, - prods: nt_gram Vector.vector}; - (*"tags" is used to map NT names (i.e. strings) to tags; - chain productions are not stored as normal productions - but instead as an entry in "chains"; - lambda productions are stored as normal productions - and also as an entry in "lambdas"*) + Gram of + {nt_count: int, + prod_count: int, + tags: nt_tag Symtab.table, + chains: (nt_tag * nt_tag list) list, (*[(to, [from])]*) + lambdas: nt_tag list, + prods: nt_gram Vector.vector}; + (*"tags" is used to map NT names (i.e. strings) to tags; + chain productions are not stored as normal productions + but instead as an entry in "chains"; + lambda productions are stored as normal productions + and also as an entry in "lambdas"*) -val UnknownStart = Lexicon.eof; (*productions for which no starting token is - known yet are associated with this token*) -(* get all NTs that are connected with a list of NTs - (used for expanding chain list)*) +(*productions for which no starting token is + known yet are associated with this token*) +val UnknownStart = Lexicon.eof; + +(*get all NTs that are connected with a list of NTs*) fun connected_with _ ([]: nt_tag list) relatives = relatives | connected_with chains (root :: roots) relatives = - let val branches = subtract (op =) relatives (these (AList.lookup (op =) chains root)); - in connected_with chains (branches @ roots) (branches @ relatives) end; - -(* convert productions to grammar; - N.B. that the chains parameter has the form [(from, [to])]; - prod_count is of type "int option" and is only updated if it is <> NONE*) -fun add_prods _ chains lambdas prod_count [] = (chains, lambdas, prod_count) - | add_prods prods chains lambdas prod_count - ((lhs, new_prod as (rhs, name, pri)) :: ps) = - let - val chain_from = case (pri, rhs) of (~1, [Nonterminal (id, ~1)]) => SOME id | _ => NONE; - - (*store chain if it does not already exist*) - val (new_chain, chains') = case chain_from of NONE => (NONE, chains) | SOME from => - let val old_tos = these (AList.lookup (op =) chains from) in - if member (op =) old_tos lhs then (NONE, chains) - else (SOME from, AList.update (op =) (from, insert (op =) lhs old_tos) chains) - end; + let val branches = subtract (op =) relatives (these (AList.lookup (op =) chains root)); + in connected_with chains (branches @ roots) (branches @ relatives) end; - (*propagate new chain in lookahead and lambda lists; - added_starts is used later to associate existing - productions with new starting tokens*) - val (added_starts, lambdas') = - if is_none new_chain then ([], lambdas) else - let (*lookahead of chain's source*) - val ((from_nts, from_tks), _) = Array.sub (prods, the new_chain); - - (*copy from's lookahead to chain's destinations*) - fun copy_lookahead [] added = added - | copy_lookahead (to :: tos) added = - let - val ((to_nts, to_tks), ps) = Array.sub (prods, to); - - val new_tks = subtract (op =) to_tks from_tks; (*added lookahead tokens*) - in Array.update (prods, to, ((to_nts, to_tks @ new_tks), ps)); - copy_lookahead tos (if null new_tks then added - else (to, new_tks) :: added) - end; - - val tos = connected_with chains' [lhs] [lhs]; - in (copy_lookahead tos [], - union (op =) (if member (op =) lambdas lhs then tos else []) lambdas) - end; - - (*test if new production can produce lambda - (rhs must either be empty or only consist of lambda NTs)*) - val (new_lambda, lambdas') = - if forall (fn (Nonterminal (id, _)) => member (op =) lambdas' id - | (Terminal _) => false) rhs then - (true, union (op =) lambdas' (connected_with chains' [lhs] [lhs])) - else - (false, lambdas'); +(*convert productions to grammar; + N.B. that the chains parameter has the form [(from, [to])]; + prod_count is of type "int option" and is only updated if it is <> NONE*) +fun add_prods _ chains lambdas prod_count [] = (chains, lambdas, prod_count) + | add_prods prods chains lambdas prod_count ((lhs, new_prod as (rhs, name, pri)) :: ps) = + let + val chain_from = + (case (pri, rhs) of + (~1, [Nonterminal (id, ~1)]) => SOME id + | _ => NONE); - (*list optional terminal and all nonterminals on which the lookahead - of a production depends*) - fun lookahead_dependency _ [] nts = (NONE, nts) - | lookahead_dependency _ ((Terminal tk) :: _) nts = (SOME tk, nts) - | lookahead_dependency lambdas ((Nonterminal (nt, _)) :: symbs) nts = - if member (op =) lambdas nt then - lookahead_dependency lambdas symbs (nt :: nts) - else (NONE, nt :: nts); - - (*get all known starting tokens for a nonterminal*) - fun starts_for_nt nt = snd (fst (Array.sub (prods, nt))); - - val token_union = uncurry (union Lexicon.matching_tokens); + (*store chain if it does not already exist*) + val (new_chain, chains') = + (case chain_from of + NONE => (NONE, chains) + | SOME from => + let val old_tos = these (AList.lookup (op =) chains from) in + if member (op =) old_tos lhs then (NONE, chains) + else (SOME from, AList.update (op =) (from, insert (op =) lhs old_tos) chains) + end); - (*update prods, lookaheads, and lambdas according to new lambda NTs*) - val (added_starts', lambdas') = - let - (*propagate added lambda NT*) - fun propagate_lambda [] added_starts lambdas= (added_starts, lambdas) - | propagate_lambda (l :: ls) added_starts lambdas = - let - (*get lookahead for lambda NT*) - val ((dependent, l_starts), _) = Array.sub (prods, l); + (*propagate new chain in lookahead and lambda lists; + added_starts is used later to associate existing + productions with new starting tokens*) + val (added_starts, lambdas') = + if is_none new_chain then ([], lambdas) + else + let (*lookahead of chain's source*) + val ((from_nts, from_tks), _) = Array.sub (prods, the new_chain); - (*check productions whose lookahead may depend on lambda NT*) - fun examine_prods [] add_lambda nt_dependencies added_tks - nt_prods = - (add_lambda, nt_dependencies, added_tks, nt_prods) - | examine_prods ((p as (rhs, _, _)) :: ps) add_lambda - nt_dependencies added_tks nt_prods = - let val (tk, nts) = lookahead_dependency lambdas rhs []; + (*copy from's lookahead to chain's destinations*) + fun copy_lookahead [] added = added + | copy_lookahead (to :: tos) added = + let + val ((to_nts, to_tks), ps) = Array.sub (prods, to); + + val new_tks = subtract (op =) to_tks from_tks; (*added lookahead tokens*) + val _ = Array.update (prods, to, ((to_nts, to_tks @ new_tks), ps)); in - if member (op =) nts l then (*update production's lookahead*) - let - val new_lambda = is_none tk andalso subset (op =) (nts, lambdas); - - val new_tks = subtract (op =) l_starts - ((if is_some tk then [the tk] else []) @ - Library.foldl token_union ([], map starts_for_nt nts)); - - val added_tks' = token_union (new_tks, added_tks); - - val nt_dependencies' = union (op =) nts nt_dependencies; - - (*associate production with new starting tokens*) - fun copy ([]: Lexicon.token option list) nt_prods = nt_prods - | copy (tk :: tks) nt_prods = - let val old_prods = these (AList.lookup (op =) nt_prods tk); - - val prods' = p :: old_prods; - in nt_prods - |> AList.update (op =) (tk, prods') - |> copy tks - end; - - val nt_prods' = - let val new_opt_tks = map SOME new_tks; - in copy ((if new_lambda then [NONE] else []) @ - new_opt_tks) nt_prods - end; - in examine_prods ps (add_lambda orelse new_lambda) - nt_dependencies' added_tks' nt_prods' - end - else (*skip production*) - examine_prods ps add_lambda nt_dependencies - added_tks nt_prods + copy_lookahead tos (if null new_tks then added else (to, new_tks) :: added) end; - (*check each NT whose lookahead depends on new lambda NT*) - fun process_nts [] added_lambdas added_starts = - (added_lambdas, added_starts) - | process_nts (nt :: nts) added_lambdas added_starts = - let - val (lookahead as (old_nts, old_tks), nt_prods) = - Array.sub (prods, nt); + val tos = connected_with chains' [lhs] [lhs]; + in + (copy_lookahead tos [], + union (op =) (if member (op =) lambdas lhs then tos else []) lambdas) + end; - (*existing productions whose lookahead may depend on l*) - val tk_prods = - (these o AList.lookup (op =) nt_prods) - (SOME (hd l_starts handle Empty => UnknownStart)); + (*test if new production can produce lambda + (rhs must either be empty or only consist of lambda NTs)*) + val (new_lambda, lambdas') = + if forall + (fn Nonterminal (id, _) => member (op =) lambdas' id + | Terminal _ => false) rhs + then (true, union (op =) lambdas' (connected_with chains' [lhs] [lhs])) + else (false, lambdas'); - (*add_lambda is true if an existing production of the nt - produces lambda due to the new lambda NT l*) - val (add_lambda, nt_dependencies, added_tks, nt_prods') = - examine_prods tk_prods false [] [] nt_prods; + (*list optional terminal and all nonterminals on which the lookahead + of a production depends*) + fun lookahead_dependency _ [] nts = (NONE, nts) + | lookahead_dependency _ ((Terminal tk) :: _) nts = (SOME tk, nts) + | lookahead_dependency lambdas ((Nonterminal (nt, _)) :: symbs) nts = + if member (op =) lambdas nt then + lookahead_dependency lambdas symbs (nt :: nts) + else (NONE, nt :: nts); - val added_nts = subtract (op =) old_nts nt_dependencies; + (*get all known starting tokens for a nonterminal*) + fun starts_for_nt nt = snd (fst (Array.sub (prods, nt))); + + val token_union = uncurry (union Lexicon.matching_tokens); - val added_lambdas' = - if add_lambda then nt :: added_lambdas - else added_lambdas; - in Array.update (prods, nt, - ((added_nts @ old_nts, old_tks @ added_tks), - nt_prods')); - (*N.B. that because the tks component - is used to access existing - productions we have to add new - tokens at the _end_ of the list*) + (*update prods, lookaheads, and lambdas according to new lambda NTs*) + val (added_starts', lambdas') = + let + (*propagate added lambda NT*) + fun propagate_lambda [] added_starts lambdas= (added_starts, lambdas) + | propagate_lambda (l :: ls) added_starts lambdas = + let + (*get lookahead for lambda NT*) + val ((dependent, l_starts), _) = Array.sub (prods, l); + + (*check productions whose lookahead may depend on lambda NT*) + fun examine_prods [] add_lambda nt_dependencies added_tks nt_prods = + (add_lambda, nt_dependencies, added_tks, nt_prods) + | examine_prods ((p as (rhs, _, _)) :: ps) add_lambda + nt_dependencies added_tks nt_prods = + let val (tk, nts) = lookahead_dependency lambdas rhs [] in + if member (op =) nts l then (*update production's lookahead*) + let + val new_lambda = is_none tk andalso subset (op =) (nts, lambdas); - if null added_tks then - process_nts nts added_lambdas' added_starts - else - process_nts nts added_lambdas' - ((nt, added_tks) :: added_starts) - end; + val new_tks = subtract (op =) l_starts + ((if is_some tk then [the tk] else []) @ + Library.foldl token_union ([], map starts_for_nt nts)); + + val added_tks' = token_union (new_tks, added_tks); + + val nt_dependencies' = union (op =) nts nt_dependencies; - val (added_lambdas, added_starts') = - process_nts dependent [] added_starts; - - val added_lambdas' = subtract (op =) lambdas added_lambdas; - in propagate_lambda (ls @ added_lambdas') added_starts' - (added_lambdas' @ lambdas) - end; - in propagate_lambda (subtract (op =) lambdas lambdas') added_starts lambdas' end; + (*associate production with new starting tokens*) + fun copy ([]: Lexicon.token option list) nt_prods = nt_prods + | copy (tk :: tks) nt_prods = + let + val old_prods = these (AList.lookup (op =) nt_prods tk); + val prods' = p :: old_prods; + in + nt_prods + |> AList.update (op =) (tk, prods') + |> copy tks + end; - (*insert production into grammar*) - val (added_starts', prod_count') = - if is_some chain_from then (added_starts', prod_count) (*don't store chain production*) - else let - (*lookahead tokens of new production and on which - NTs lookahead depends*) - val (start_tk, start_nts) = lookahead_dependency lambdas' rhs []; - - val start_tks = Library.foldl token_union - (if is_some start_tk then [the start_tk] else [], - map starts_for_nt start_nts); + val nt_prods' = + let val new_opt_tks = map SOME new_tks in + copy + ((if new_lambda then [NONE] else []) @ new_opt_tks) nt_prods + end; + in + examine_prods ps (add_lambda orelse new_lambda) + nt_dependencies' added_tks' nt_prods' + end + else (*skip production*) + examine_prods ps add_lambda nt_dependencies added_tks nt_prods + end; - val opt_starts = (if new_lambda then [NONE] - else if null start_tks then [SOME UnknownStart] - else []) @ (map SOME start_tks); + (*check each NT whose lookahead depends on new lambda NT*) + fun process_nts [] added_lambdas added_starts = + (added_lambdas, added_starts) + | process_nts (nt :: nts) added_lambdas added_starts = + let + val (lookahead as (old_nts, old_tks), nt_prods) = Array.sub (prods, nt); - (*add lhs NT to list of dependent NTs in lookahead*) - fun add_nts [] = () - | add_nts (nt :: nts) = - let val ((old_nts, old_tks), ps) = Array.sub (prods, nt); - in if member (op =) old_nts lhs then () - else Array.update (prods, nt, ((lhs :: old_nts, old_tks), ps)) - end; + (*existing productions whose lookahead may depend on l*) + val tk_prods = + these + (AList.lookup (op =) nt_prods + (SOME (hd l_starts handle Empty => UnknownStart))); + + (*add_lambda is true if an existing production of the nt + produces lambda due to the new lambda NT l*) + val (add_lambda, nt_dependencies, added_tks, nt_prods') = + examine_prods tk_prods false [] [] nt_prods; + + val added_nts = subtract (op =) old_nts nt_dependencies; - (*add new start tokens to chained NTs' lookahead list; - also store new production for lhs NT*) - fun add_tks [] added prod_count = (added, prod_count) - | add_tks (nt :: nts) added prod_count = - let - val ((old_nts, old_tks), nt_prods) = Array.sub (prods, nt); - - val new_tks = subtract Lexicon.matching_tokens old_tks start_tks; + val added_lambdas' = + if add_lambda then nt :: added_lambdas + else added_lambdas; + val _ = + Array.update + (prods, nt, ((added_nts @ old_nts, old_tks @ added_tks), nt_prods')); + (*N.B. that because the tks component + is used to access existing + productions we have to add new + tokens at the _end_ of the list*) + in + if null added_tks then + process_nts nts added_lambdas' added_starts + else + process_nts nts added_lambdas' ((nt, added_tks) :: added_starts) + end; - (*store new production*) - fun store [] prods is_new = - (prods, if is_some prod_count andalso is_new then - Option.map (fn x => x+1) prod_count - else prod_count, is_new) - | store (tk :: tks) prods is_new = - let val tk_prods = (these o AList.lookup (op =) prods) tk; + val (added_lambdas, added_starts') = process_nts dependent [] added_starts; + val added_lambdas' = subtract (op =) lambdas added_lambdas; + in + propagate_lambda (ls @ added_lambdas') added_starts' (added_lambdas' @ lambdas) + end; + in propagate_lambda (subtract (op =) lambdas lambdas') added_starts lambdas' end; - (*if prod_count = NONE then we can assume that - grammar does not contain new production already*) - val (tk_prods', is_new') = - if is_some prod_count then - if member (op =) tk_prods new_prod then (tk_prods, false) - else (new_prod :: tk_prods, true) - else (new_prod :: tk_prods, true); + (*insert production into grammar*) + val (added_starts', prod_count') = + if is_some chain_from + then (added_starts', prod_count) (*don't store chain production*) + else + let + (*lookahead tokens of new production and on which + NTs lookahead depends*) + val (start_tk, start_nts) = lookahead_dependency lambdas' rhs []; - val prods' = prods - |> is_new' ? AList.update (op =) (tk: Lexicon.token option, tk_prods'); - in store tks prods' (is_new orelse is_new') end; + val start_tks = + Library.foldl token_union + (if is_some start_tk then [the start_tk] else [], + map starts_for_nt start_nts); - val (nt_prods', prod_count', changed) = - if nt = lhs then store opt_starts nt_prods false - else (nt_prods, prod_count, false); - in if not changed andalso null new_tks then () - else Array.update (prods, nt, ((old_nts, old_tks @ new_tks), - nt_prods')); - add_tks nts (if null new_tks then added - else (nt, new_tks) :: added) prod_count' - end; - in add_nts start_nts; - add_tks (connected_with chains' [lhs] [lhs]) [] prod_count - end; + val opt_starts = + (if new_lambda then [NONE] + else if null start_tks then [SOME UnknownStart] + else []) @ map SOME start_tks; + + (*add lhs NT to list of dependent NTs in lookahead*) + fun add_nts [] = () + | add_nts (nt :: nts) = + let val ((old_nts, old_tks), ps) = Array.sub (prods, nt) in + if member (op =) old_nts lhs then () + else Array.update (prods, nt, ((lhs :: old_nts, old_tks), ps)) + end; + + (*add new start tokens to chained NTs' lookahead list; + also store new production for lhs NT*) + fun add_tks [] added prod_count = (added, prod_count) + | add_tks (nt :: nts) added prod_count = + let + val ((old_nts, old_tks), nt_prods) = Array.sub (prods, nt); + + val new_tks = subtract Lexicon.matching_tokens old_tks start_tks; + + (*store new production*) + fun store [] prods is_new = + (prods, + if is_some prod_count andalso is_new then + Option.map (fn x => x + 1) prod_count + else prod_count, is_new) + | store (tk :: tks) prods is_new = + let + val tk_prods = these (AList.lookup (op =) prods tk); + + (*if prod_count = NONE then we can assume that + grammar does not contain new production already*) + val (tk_prods', is_new') = + if is_some prod_count then + if member (op =) tk_prods new_prod then (tk_prods, false) + else (new_prod :: tk_prods, true) + else (new_prod :: tk_prods, true); - (*associate productions with new lookaheads*) - val dummy = - let - (*propagate added start tokens*) - fun add_starts [] = () - | add_starts ((changed_nt, new_tks) :: starts) = - let - (*token under which old productions which - depend on changed_nt could be stored*) - val key = - case find_first (not o member (op =) new_tks) - (starts_for_nt changed_nt) of - NONE => SOME UnknownStart - | t => t; + val prods' = prods + |> is_new' ? AList.update (op =) (tk: Lexicon.token option, tk_prods'); + in store tks prods' (is_new orelse is_new') end; + + val (nt_prods', prod_count', changed) = + if nt = lhs + then store opt_starts nt_prods false + else (nt_prods, prod_count, false); + val _ = + if not changed andalso null new_tks then () + else Array.update (prods, nt, ((old_nts, old_tks @ new_tks), nt_prods')); + in + add_tks nts + (if null new_tks then added else (nt, new_tks) :: added) prod_count' + end; + val _ = add_nts start_nts; + in + add_tks (connected_with chains' [lhs] [lhs]) [] prod_count + end; - (*copy productions whose lookahead depends on changed_nt; - if key = SOME UnknownToken then tk_prods is used to hold - the productions not copied*) - fun update_prods [] result = result - | update_prods ((p as (rhs, _: string, _: nt_tag)) :: ps) - (tk_prods, nt_prods) = - let - (*lookahead dependency for production*) - val (tk, depends) = lookahead_dependency lambdas' rhs []; + (*associate productions with new lookaheads*) + val _ = + let + (*propagate added start tokens*) + fun add_starts [] = () + | add_starts ((changed_nt, new_tks) :: starts) = + let + (*token under which old productions which + depend on changed_nt could be stored*) + val key = + (case find_first (not o member (op =) new_tks) (starts_for_nt changed_nt) of + NONE => SOME UnknownStart + | t => t); - (*test if this production has to be copied*) - val update = member (op =) depends changed_nt; - - (*test if production could already be associated with - a member of new_tks*) - val lambda = length depends > 1 orelse - not (null depends) andalso is_some tk - andalso member (op =) new_tks (the tk); - - (*associate production with new starting tokens*) - fun copy ([]: Lexicon.token list) nt_prods = nt_prods - | copy (tk :: tks) nt_prods = + (*copy productions whose lookahead depends on changed_nt; + if key = SOME UnknownToken then tk_prods is used to hold + the productions not copied*) + fun update_prods [] result = result + | update_prods ((p as (rhs, _: string, _: nt_tag)) :: ps) + (tk_prods, nt_prods) = let - val tk_prods = these (AList.lookup (op =) nt_prods (SOME tk)); + (*lookahead dependency for production*) + val (tk, depends) = lookahead_dependency lambdas' rhs []; + + (*test if this production has to be copied*) + val update = member (op =) depends changed_nt; - val tk_prods' = - if not lambda then p :: tk_prods - else insert (op =) p tk_prods; - (*if production depends on lambda NT we - have to look for duplicates*) - in - nt_prods - |> AList.update (op =) (SOME tk, tk_prods') - |> copy tks - end; - val result = - if update then - (tk_prods, copy new_tks nt_prods) - else if key = SOME UnknownStart then - (p :: tk_prods, nt_prods) - else (tk_prods, nt_prods); - in update_prods ps result end; + (*test if production could already be associated with + a member of new_tks*) + val lambda = + length depends > 1 orelse + not (null depends) andalso is_some tk + andalso member (op =) new_tks (the tk); + + (*associate production with new starting tokens*) + fun copy ([]: Lexicon.token list) nt_prods = nt_prods + | copy (tk :: tks) nt_prods = + let + val tk_prods = these (AList.lookup (op =) nt_prods (SOME tk)); - (*copy existing productions for new starting tokens*) - fun process_nts [] added = added - | process_nts (nt :: nts) added = - let - val (lookahead as (old_nts, old_tks), nt_prods) = - Array.sub (prods, nt); + val tk_prods' = + if not lambda then p :: tk_prods + else insert (op =) p tk_prods; + (*if production depends on lambda NT we + have to look for duplicates*) + in + nt_prods + |> AList.update (op =) (SOME tk, tk_prods') + |> copy tks + end; + val result = + if update then (tk_prods, copy new_tks nt_prods) + else if key = SOME UnknownStart then (p :: tk_prods, nt_prods) + else (tk_prods, nt_prods); + in update_prods ps result end; - val tk_prods = these (AList.lookup (op =) nt_prods key); - - (*associate productions with new lookahead tokens*) - val (tk_prods', nt_prods') = - update_prods tk_prods ([], nt_prods); + (*copy existing productions for new starting tokens*) + fun process_nts [] added = added + | process_nts (nt :: nts) added = + let + val (lookahead as (old_nts, old_tks), nt_prods) = Array.sub (prods, nt); - val nt_prods' = - nt_prods' - |> (key = SOME UnknownStart) ? AList.update (op =) (key, tk_prods') + val tk_prods = these (AList.lookup (op =) nt_prods key); + + (*associate productions with new lookahead tokens*) + val (tk_prods', nt_prods') = update_prods tk_prods ([], nt_prods); + + val nt_prods' = + nt_prods' + |> (key = SOME UnknownStart) ? AList.update (op =) (key, tk_prods'); - val added_tks = - subtract Lexicon.matching_tokens old_tks new_tks; - in if null added_tks then - (Array.update (prods, nt, (lookahead, nt_prods')); - process_nts nts added) - else - (Array.update (prods, nt, - ((old_nts, added_tks @ old_tks), nt_prods')); - process_nts nts ((nt, added_tks) :: added)) - end; + val added_tks = subtract Lexicon.matching_tokens old_tks new_tks; + in + if null added_tks then + (Array.update (prods, nt, (lookahead, nt_prods')); + process_nts nts added) + else + (Array.update (prods, nt, ((old_nts, added_tks @ old_tks), nt_prods')); + process_nts nts ((nt, added_tks) :: added)) + end; - val ((dependent, _), _) = Array.sub (prods, changed_nt); - in add_starts (starts @ process_nts dependent []) end; - in add_starts added_starts' end; - in add_prods prods chains' lambdas' prod_count ps end; + val ((dependent, _), _) = Array.sub (prods, changed_nt); + in add_starts (starts @ process_nts dependent []) end; + in add_starts added_starts' end; + in add_prods prods chains' lambdas' prod_count ps end; (* pretty_gram *) @@ -411,7 +422,7 @@ val nt_prods = Library.foldl (uncurry (union (op =))) ([], map snd (snd (Vector.sub (prods, tag)))) @ - map prod_of_chain ((these o AList.lookup (op =) chains) tag); + map prod_of_chain (these (AList.lookup (op =) chains tag)); in map (pretty_prod name) nt_prods end; in maps pretty_nt (sort_wrt fst (Symtab.dest tags)) end; @@ -419,85 +430,96 @@ (** Operations on gramars **) -val empty_gram = Gram {nt_count = 0, prod_count = 0, - tags = Symtab.empty, chains = [], lambdas = [], - prods = Vector.fromList [(([], []), [])]}; +val empty_gram = + Gram + {nt_count = 0, + prod_count = 0, + tags = Symtab.empty, chains = [], + lambdas = [], + prods = Vector.fromList [(([], []), [])]}; (*Invert list of chain productions*) fun inverse_chains [] result = result | inverse_chains ((root, branches: nt_tag list) :: cs) result = - let fun add ([]: nt_tag list) result = result + let + fun add ([]: nt_tag list) result = result | add (id :: ids) result = - let val old = (these o AList.lookup (op =) result) id; - in add ids (AList.update (op =) (id, root :: old) result) end; - in inverse_chains cs (add branches result) end; + let val old = these (AList.lookup (op =) result id); + in add ids (AList.update (op =) (id, root :: old) result) end; + in inverse_chains cs (add branches result) end; (*Add productions to a grammar*) fun extend_gram [] gram = gram | extend_gram xprods (Gram {nt_count, prod_count, tags, chains, lambdas, prods}) = - let - (*Get tag for existing nonterminal or create a new one*) - fun get_tag nt_count tags nt = - case Symtab.lookup tags nt of - SOME tag => (nt_count, tags, tag) - | NONE => (nt_count+1, Symtab.update_new (nt, nt_count) tags, - nt_count); + let + (*Get tag for existing nonterminal or create a new one*) + fun get_tag nt_count tags nt = + (case Symtab.lookup tags nt of + SOME tag => (nt_count, tags, tag) + | NONE => (nt_count + 1, Symtab.update_new (nt, nt_count) tags, nt_count)); - (*Convert symbols to the form used by the parser; - delimiters and predefined terms are stored as terminals, - nonterminals are converted to integer tags*) - fun symb_of [] nt_count tags result = (nt_count, tags, rev result) - | symb_of ((Syn_Ext.Delim s) :: ss) nt_count tags result = - symb_of ss nt_count tags - (Terminal (Lexicon.Token (Lexicon.Literal, s, Position.no_range)) :: result) - | symb_of ((Syn_Ext.Argument (s, p)) :: ss) nt_count tags result = - let - val (nt_count', tags', new_symb) = - case Lexicon.predef_term s of - NONE => - let val (nt_count', tags', s_tag) = get_tag nt_count tags s; - in (nt_count', tags', Nonterminal (s_tag, p)) end - | SOME tk => (nt_count, tags, Terminal tk); - in symb_of ss nt_count' tags' (new_symb :: result) end - | symb_of (_ :: ss) nt_count tags result = - symb_of ss nt_count tags result; + (*Convert symbols to the form used by the parser; + delimiters and predefined terms are stored as terminals, + nonterminals are converted to integer tags*) + fun symb_of [] nt_count tags result = (nt_count, tags, rev result) + | symb_of ((Syn_Ext.Delim s) :: ss) nt_count tags result = + symb_of ss nt_count tags + (Terminal (Lexicon.Token (Lexicon.Literal, s, Position.no_range)) :: result) + | symb_of ((Syn_Ext.Argument (s, p)) :: ss) nt_count tags result = + let + val (nt_count', tags', new_symb) = + (case Lexicon.predef_term s of + NONE => + let val (nt_count', tags', s_tag) = get_tag nt_count tags s; + in (nt_count', tags', Nonterminal (s_tag, p)) end + | SOME tk => (nt_count, tags, Terminal tk)); + in symb_of ss nt_count' tags' (new_symb :: result) end + | symb_of (_ :: ss) nt_count tags result = symb_of ss nt_count tags result; - (*Convert list of productions by invoking symb_of for each of them*) - fun prod_of [] nt_count prod_count tags result = - (nt_count, prod_count, tags, result) - | prod_of ((Syn_Ext.XProd (lhs, xsymbs, const, pri)) :: ps) - nt_count prod_count tags result = - let val (nt_count', tags', lhs_tag) = get_tag nt_count tags lhs; + (*Convert list of productions by invoking symb_of for each of them*) + fun prod_of [] nt_count prod_count tags result = + (nt_count, prod_count, tags, result) + | prod_of ((Syn_Ext.XProd (lhs, xsymbs, const, pri)) :: ps) + nt_count prod_count tags result = + let + val (nt_count', tags', lhs_tag) = get_tag nt_count tags lhs; + val (nt_count'', tags'', prods) = symb_of xsymbs nt_count' tags' []; + in + prod_of ps nt_count'' (prod_count + 1) tags'' + ((lhs_tag, (prods, const, pri)) :: result) + end; - val (nt_count'', tags'', prods) = - symb_of xsymbs nt_count' tags' []; - in prod_of ps nt_count'' (prod_count+1) tags'' - ((lhs_tag, (prods, const, pri)) :: result) - end; - - val (nt_count', prod_count', tags', xprods') = - prod_of xprods nt_count prod_count tags []; + val (nt_count', prod_count', tags', xprods') = + prod_of xprods nt_count prod_count tags []; - (*Copy array containing productions of old grammar; - this has to be done to preserve the old grammar while being able - to change the array's content*) - val prods' = - let fun get_prod i = if i < nt_count then Vector.sub (prods, i) - else (([], []), []); - in Array.tabulate (nt_count', get_prod) end; + (*Copy array containing productions of old grammar; + this has to be done to preserve the old grammar while being able + to change the array's content*) + val prods' = + let + fun get_prod i = + if i < nt_count then Vector.sub (prods, i) + else (([], []), []); + in Array.tabulate (nt_count', get_prod) end; + + val fromto_chains = inverse_chains chains []; - val fromto_chains = inverse_chains chains []; + (*Add new productions to old ones*) + val (fromto_chains', lambdas', _) = + add_prods prods' fromto_chains lambdas NONE xprods'; - (*Add new productions to old ones*) - val (fromto_chains', lambdas', _) = - add_prods prods' fromto_chains lambdas NONE xprods'; - - val chains' = inverse_chains fromto_chains' []; - in Gram {nt_count = nt_count', prod_count = prod_count', tags = tags', - chains = chains', lambdas = lambdas', prods = Array.vector prods'} - end; + val chains' = inverse_chains fromto_chains' []; + in + Gram + {nt_count = nt_count', + prod_count = prod_count', + tags = tags', + chains = chains', + lambdas = lambdas', + prods = Array.vector prods'} + end; fun make_gram xprods = extend_gram xprods empty_gram; @@ -506,37 +528,40 @@ fun merge_gram (gram_a, gram_b) = let (*find out which grammar is bigger*) - val (Gram {nt_count = nt_count1, prod_count = prod_count1, tags = tags1, - chains = chains1, lambdas = lambdas1, prods = prods1}, - Gram {nt_count = nt_count2, prod_count = prod_count2, tags = tags2, - chains = chains2, lambdas = lambdas2, prods = prods2}) = - let val Gram {prod_count = count_a, ...} = gram_a; - val Gram {prod_count = count_b, ...} = gram_b; - in if count_a > count_b then (gram_a, gram_b) - else (gram_b, gram_a) + val + (Gram {nt_count = nt_count1, prod_count = prod_count1, tags = tags1, + chains = chains1, lambdas = lambdas1, prods = prods1}, + Gram {nt_count = nt_count2, prod_count = prod_count2, tags = tags2, + chains = chains2, lambdas = lambdas2, prods = prods2}) = + let + val Gram {prod_count = count_a, ...} = gram_a; + val Gram {prod_count = count_b, ...} = gram_b; + in + if count_a > count_b + then (gram_a, gram_b) + else (gram_b, gram_a) end; (*get existing tag from grammar1 or create a new one*) fun get_tag nt_count tags nt = - case Symtab.lookup tags nt of + (case Symtab.lookup tags nt of SOME tag => (nt_count, tags, tag) - | NONE => (nt_count+1, Symtab.update_new (nt, nt_count) tags, - nt_count) + | NONE => (nt_count + 1, Symtab.update_new (nt, nt_count) tags, nt_count)); val ((nt_count1', tags1'), tag_table) = - let val tag_list = Symtab.dest tags2; + let + val tag_list = Symtab.dest tags2; - val table = Array.array (nt_count2, ~1); + val table = Array.array (nt_count2, ~1); - fun store_tag nt_count tags ~1 = (nt_count, tags) - | store_tag nt_count tags tag = - let val (nt_count', tags', tag') = - get_tag nt_count tags - (fst (the (find_first (fn (n, t) => t = tag) tag_list))); - in Array.update (table, tag, tag'); - store_tag nt_count' tags' (tag-1) - end; - in (store_tag nt_count1 tags1 (nt_count2-1), table) end; + fun store_tag nt_count tags ~1 = (nt_count, tags) + | store_tag nt_count tags tag = + let + val (nt_count', tags', tag') = + get_tag nt_count tags (fst (the (find_first (fn (n, t) => t = tag) tag_list))); + val _ = Array.update (table, tag, tag'); + in store_tag nt_count' tags' (tag - 1) end; + in (store_tag nt_count1 tags1 (nt_count2 - 1), table) end; (*convert grammar2 tag to grammar1 tag*) fun convert_tag tag = Array.sub (tag_table, tag); @@ -544,44 +569,44 @@ (*convert chain list to raw productions*) fun mk_chain_prods [] result = result | mk_chain_prods ((to, froms) :: cs) result = - let - val to_tag = convert_tag to; + let + val to_tag = convert_tag to; - fun make [] result = result - | make (from :: froms) result = make froms ((to_tag, - ([Nonterminal (convert_tag from, ~1)], "", ~1)) :: result); - in mk_chain_prods cs (make froms [] @ result) end; + fun make [] result = result + | make (from :: froms) result = make froms + ((to_tag, ([Nonterminal (convert_tag from, ~1)], "", ~1)) :: result); + in mk_chain_prods cs (make froms [] @ result) end; val chain_prods = mk_chain_prods chains2 []; (*convert prods2 array to productions*) fun process_nt ~1 result = result | process_nt nt result = - let - val nt_prods = Library.foldl (uncurry (union (op =))) - ([], map snd (snd (Vector.sub (prods2, nt)))); - val lhs_tag = convert_tag nt; + let + val nt_prods = Library.foldl (uncurry (union (op =))) + ([], map snd (snd (Vector.sub (prods2, nt)))); + val lhs_tag = convert_tag nt; - (*convert tags in rhs*) - fun process_rhs [] result = result - | process_rhs (Terminal tk :: rhs) result = - process_rhs rhs (result @ [Terminal tk]) - | process_rhs (Nonterminal (nt, prec) :: rhs) result = - process_rhs rhs - (result @ [Nonterminal (convert_tag nt, prec)]); + (*convert tags in rhs*) + fun process_rhs [] result = result + | process_rhs (Terminal tk :: rhs) result = + process_rhs rhs (result @ [Terminal tk]) + | process_rhs (Nonterminal (nt, prec) :: rhs) result = + process_rhs rhs (result @ [Nonterminal (convert_tag nt, prec)]); - (*convert tags in productions*) - fun process_prods [] result = result - | process_prods ((rhs, id, prec) :: ps) result = - process_prods ps ((lhs_tag, (process_rhs rhs [], id, prec)) - :: result); - in process_nt (nt-1) (process_prods nt_prods [] @ result) end; + (*convert tags in productions*) + fun process_prods [] result = result + | process_prods ((rhs, id, prec) :: ps) result = + process_prods ps ((lhs_tag, (process_rhs rhs [], id, prec)) :: result); + in process_nt (nt - 1) (process_prods nt_prods [] @ result) end; - val raw_prods = chain_prods @ process_nt (nt_count2-1) []; + val raw_prods = chain_prods @ process_nt (nt_count2 - 1) []; val prods1' = - let fun get_prod i = if i < nt_count1 then Vector.sub (prods1, i) - else (([], []), []); + let + fun get_prod i = + if i < nt_count1 then Vector.sub (prods1, i) + else (([], []), []); in Array.tabulate (nt_count1', get_prod) end; val fromto_chains = inverse_chains chains1 []; @@ -590,9 +615,14 @@ add_prods prods1' fromto_chains lambdas1 (SOME prod_count1) raw_prods; val chains' = inverse_chains fromto_chains' []; - in Gram {nt_count = nt_count1', prod_count = prod_count1', - tags = tags1', chains = chains', lambdas = lambdas', - prods = Array.vector prods1'} + in + Gram + {nt_count = nt_count1', + prod_count = prod_count1', + tags = tags1', + chains = chains', + lambdas = lambdas', + prods = Array.vector prods1'} end; @@ -603,11 +633,11 @@ Tip of Lexicon.token; type state = - nt_tag * int * (*identification and production precedence*) - parsetree list * (*already parsed nonterminals on rhs*) - symb list * (*rest of rhs*) - string * (*name of production*) - int; (*index for previous state list*) + nt_tag * int * (*identification and production precedence*) + parsetree list * (*already parsed nonterminals on rhs*) + symb list * (*rest of rhs*) + string * (*name of production*) + int; (*index for previous state list*) (*Get all rhss with precedence >= minPrec*) @@ -627,8 +657,9 @@ fun conc (t: parsetree list, prec:int) [] = (NONE, [(t, prec)]) | conc (t, prec) ((t', prec') :: ts) = if t = t' then - (SOME prec', if prec' >= prec then (t', prec') :: ts - else (t, prec) :: ts) + (SOME prec', + if prec' >= prec then (t', prec') :: ts + else (t, prec) :: ts) else let val (n, ts') = conc (t, prec) ts in (n, (t', prec') :: ts') end; @@ -644,16 +675,16 @@ (*Replace entry in used*) fun update_prec (A: nt_tag, prec) used = - let fun update ((hd as (B, (_, ts))) :: used, used') = - if A = B - then used' @ ((A, (prec, ts)) :: used) - else update (used, hd :: used') + let + fun update ((hd as (B, (_, ts))) :: used, used') = + if A = B + then used' @ ((A, (prec, ts)) :: used) + else update (used, hd :: used') in update (used, []) end; fun getS A maxPrec Si = filter - (fn (_, _, _, Nonterminal (B, prec) :: _, _, _) - => A = B andalso prec <= maxPrec + (fn (_, _, _, Nonterminal (B, prec) :: _, _, _) => A = B andalso prec <= maxPrec | _ => false) Si; fun getS' A maxPrec minPrec Si = @@ -664,15 +695,13 @@ fun getStates Estate i ii A maxPrec = filter - (fn (_, _, _, Nonterminal (B, prec) :: _, _, _) - => A = B andalso prec <= maxPrec + (fn (_, _, _, Nonterminal (B, prec) :: _, _, _) => A = B andalso prec <= maxPrec | _ => false) (Array.sub (Estate, ii)); fun movedot_term (A, j, ts, Terminal a :: sa, id, i) c = - if Lexicon.valued_token c then - (A, j, ts @ [Tip c], sa, id, i) + if Lexicon.valued_token c then (A, j, ts @ [Tip c], sa, id, i) else (A, j, ts, sa, id, i); fun movedot_nonterm ts (A, j, tss, Nonterminal _ :: sa, id, i) = @@ -692,17 +721,18 @@ be started by specified token*) fun prods_for prods chains include_none tk nts = let - fun token_assoc (list, key) = - let fun assoc [] result = result - | assoc ((keyi, pi) :: pairs) result = - if is_some keyi andalso Lexicon.matching_tokens (the keyi, key) - orelse include_none andalso is_none keyi then - assoc pairs (pi @ result) - else assoc pairs result; - in assoc list [] end; + fun token_assoc (list, key) = + let + fun assoc [] result = result + | assoc ((keyi, pi) :: pairs) result = + if is_some keyi andalso Lexicon.matching_tokens (the keyi, key) + orelse include_none andalso is_none keyi then + assoc pairs (pi @ result) + else assoc pairs result; + in assoc list [] end; - fun get_prods [] result = result - | get_prods (nt :: nts) result = + fun get_prods [] result = result + | get_prods (nt :: nts) result = let val nt_prods = snd (Vector.sub (prods, nt)); in get_prods nts ((token_assoc (nt_prods, tk)) @ result) end; in get_prods (connected_with chains nts nts) [] end; @@ -716,14 +746,14 @@ | processS used (S :: States) (Si, Sii) = (case S of (_, _, _, Nonterminal (nt, minPrec) :: _, _, _) => - let (*predictor operation*) + let (*predictor operation*) val (used', new_states) = (case AList.lookup (op =) used nt of - SOME (usedPrec, l) => (*nonterminal has been processed*) + SOME (usedPrec, l) => (*nonterminal has been processed*) if usedPrec <= minPrec then - (*wanted precedence has been processed*) + (*wanted precedence has been processed*) (used, movedot_lambda S l) - else (*wanted precedence hasn't been parsed yet*) + else (*wanted precedence hasn't been parsed yet*) let val tk_prods = all_prods_for nt; @@ -733,48 +763,48 @@ movedot_lambda S l @ States') end - | NONE => (*nonterminal is parsed for the first time*) - let val tk_prods = all_prods_for nt; - val States' = mkStates i minPrec nt - (getRHS minPrec tk_prods); + | NONE => (*nonterminal is parsed for the first time*) + let + val tk_prods = all_prods_for nt; + val States' = mkStates i minPrec nt (getRHS minPrec tk_prods); in ((nt, (minPrec, [])) :: used, States') end); val dummy = - if not (!warned) andalso - length (new_states @ States) > (!branching_level) then + if not (! warned) andalso + length (new_states @ States) > ! branching_level then (warning "Currently parsed expression could be extremely ambiguous."; warned := true) else (); in processS used' (new_states @ States) (S :: Si, Sii) end - | (_, _, _, Terminal a :: _, _, _) => (*scanner operation*) + | (_, _, _, Terminal a :: _, _, _) => (*scanner operation*) processS used States (S :: Si, if Lexicon.matching_tokens (a, c) then movedot_term S c :: Sii else Sii) - | (A, prec, ts, [], id, j) => (*completer operation*) + | (A, prec, ts, [], id, j) => (*completer operation*) let val tt = if id = "" then ts else [Node (id, ts)] in - if j = i then (*lambda production?*) + if j = i then (*lambda production?*) let val (used', O) = update_trees used (A, (tt, prec)); in - case O of + (case O of NONE => - let val Slist = getS A prec Si; - val States' = map (movedot_nonterm tt) Slist; + let + val Slist = getS A prec Si; + val States' = map (movedot_nonterm tt) Slist; in processS used' (States' @ States) (S :: Si, Sii) end | SOME n => if n >= prec then processS used' States (S :: Si, Sii) else - let val Slist = getS' A prec n Si; - val States' = map (movedot_nonterm tt) Slist; - in processS used' (States' @ States) (S :: Si, Sii) end + let + val Slist = getS' A prec n Si; + val States' = map (movedot_nonterm tt) Slist; + in processS used' (States' @ States) (S :: Si, Sii) end) end else let val Slist = getStates Estate i j A prec - in processS used (map (movedot_nonterm tt) Slist @ States) - (S :: Si, Sii) - end + in processS used (map (movedot_nonterm tt) Slist @ States) (S :: Si, Sii) end end) in processS [] states ([], []) end; @@ -793,14 +823,14 @@ [Pretty.str "\""]))) end | s => - (case indata of - [] => Array.sub (stateset, i) - | c :: cs => - let val (si, sii) = PROCESSS warned prods chains stateset i c s; - in Array.update (stateset, i, si); - Array.update (stateset, i + 1, sii); - produce warned prods tags chains stateset (i + 1) cs c - end)); + (case indata of + [] => Array.sub (stateset, i) + | c :: cs => + let val (si, sii) = PROCESSS warned prods chains stateset i c s; + in Array.update (stateset, i, si); + Array.update (stateset, i + 1, sii); + produce warned prods tags chains stateset (i + 1) cs c + end)); fun get_trees l = map_filter (fn (_, _, [pt], _, _, _) => SOME pt | _ => NONE) l; @@ -814,8 +844,8 @@ val S0 = [(~1, 0, [], [Nonterminal (start_tag, 0), Terminal Lexicon.eof], "", 0)]; val s = length indata + 1; val Estate = Array.array (s, []); + val _ = Array.update (Estate, 0, S0); in - Array.update (Estate, 0, S0); get_trees (produce (Unsynchronized.ref false) prods tags chains Estate 0 indata Lexicon.eof) end;