| author | wenzelm | 
| Wed, 29 Jun 2005 15:13:38 +0200 | |
| changeset 16608 | 4f8d7b83c7e2 | 
| parent 15979 | c81578ac2d31 | 
| child 16668 | fdb4992cf1d2 | 
| permissions | -rw-r--r-- | 
| 18 | 1 | (* Title: Pure/Syntax/parser.ML | 
| 46 | 2 | ID: $Id$ | 
| 2186 | 3 | Author: Carsten Clasohm, Sonia Mahjoub, and Markus Wenzel, TU Muenchen | 
| 18 | 4 | |
| 15979 | 5 | General context-free parser for the inner syntax of terms, types, etc. | 
| 18 | 6 | *) | 
| 7 | ||
| 8 | signature PARSER = | |
| 15752 | 9 | sig | 
| 1507 | 10 | type gram | 
| 11 | val empty_gram: gram | |
| 12 | val extend_gram: gram -> SynExt.xprod list -> gram | |
| 15752 | 13 | val make_gram: SynExt.xprod list -> gram | 
| 1507 | 14 | val merge_grams: gram -> gram -> gram | 
| 15 | val pretty_gram: gram -> Pretty.T list | |
| 16 | datatype parsetree = | |
| 17 | Node of string * parsetree list | | |
| 18 | Tip of Lexicon.token | |
| 19 | val parse: gram -> string -> Lexicon.token list -> parsetree list | |
| 20 | val branching_level: int ref | |
| 15752 | 21 | end; | 
| 1507 | 22 | |
| 15752 | 23 | structure Parser: PARSER = | 
| 18 | 24 | struct | 
| 15752 | 25 | |
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 26 | open Lexicon SynExt; | 
| 18 | 27 | |
| 15752 | 28 | |
| 18 | 29 | (** datatype gram **) | 
| 30 | ||
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 31 | type nt_tag = int; (*production for the NTs are stored in an array | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 32 | so we can identify NTs by their index*) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 33 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 34 | datatype symb = Terminal of token | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 35 | | Nonterminal of nt_tag * int; (*(tag, precedence)*) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 36 | |
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 37 | type nt_gram = ((nt_tag list * token list) * | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 38 | (token option * (symb list * string * int) list) list); | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 39 | (*(([dependent_nts], [start_tokens]), | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 40 | [(start_token, [(rhs, name, prio)])])*) | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 41 | (*depent_nts is a list of all NTs whose lookahead | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 42 | depends on this NT's lookahead*) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 43 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 44 | datatype gram = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 45 |   Gram of {nt_count: int, prod_count: int,
 | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 46 | tags: nt_tag Symtab.table, | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 47 | chains: (nt_tag * nt_tag list) list, (*[(to, [from])]*) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 48 | lambdas: nt_tag list, | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 49 | prods: nt_gram Array.array}; | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 50 | (*"tags" is used to map NT names (i.e. strings) to tags; | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 51 | chain productions are not stored as normal productions | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 52 | but instead as an entry in "chains"; | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 53 | lambda productions are stored as normal productions | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 54 | and also as an entry in "lambdas"*) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 55 | |
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 56 | val UnknownStart = EndToken; (*productions for which no starting token is | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 57 | known yet are associated with this token*) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 58 | |
| 15752 | 59 | (* get all NTs that are connected with a list of NTs | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 60 | (used for expanding chain list)*) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 61 | fun connected_with _ [] relatives = relatives | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 62 | | connected_with chains (root :: roots) relatives = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 63 | let val branches = (assocs chains root) \\ relatives; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 64 | in connected_with chains (branches @ roots) (branches @ relatives) end; | 
| 18 | 65 | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 66 | (* convert productions to grammar; | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 67 | N.B. that the chains parameter has the form [(from, [to])]; | 
| 15531 | 68 | prod_count is of type "int option" and is only updated if it is <> NONE*) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 69 | fun add_prods _ chains lambdas prod_count [] = (chains, lambdas, prod_count) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 70 | | add_prods prods chains lambdas prod_count | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 71 | ((lhs, new_prod as (rhs, name, pri)) :: ps) = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 72 | let | 
| 15752 | 73 | val chain_from = case (pri, rhs) of (~1, [Nonterminal (id, ~1)]) => SOME id | _ => NONE; | 
| 74 | ||
| 75 | (*store chain if it does not already exist*) | |
| 76 | val (new_chain, chains') = case chain_from of NONE => (NONE, chains) | SOME from => | |
| 77 | let val old_tos = assocs chains from in | |
| 78 | if lhs mem old_tos then (NONE, chains) | |
| 79 | else (SOME from, overwrite (chains, (from, lhs ins old_tos))) | |
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 80 | end; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 81 | |
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 82 | (*propagate new chain in lookahead and lambda lists; | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 83 | added_starts is used later to associate existing | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 84 | productions with new starting tokens*) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 85 | val (added_starts, lambdas') = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 86 | if is_none new_chain then ([], lambdas) else | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 87 | let (*lookahead of chain's source*) | 
| 15979 | 88 | val ((from_nts, from_tks), _) = Array.sub (prods, the new_chain); | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 89 | |
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 90 | (*copy from's lookahead to chain's destinations*) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 91 | fun copy_lookahead [] added = added | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 92 | | copy_lookahead (to :: tos) added = | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 93 | let | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 94 | val ((to_nts, to_tks), ps) = Array.sub (prods, to); | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 95 | |
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 96 | val new_tks = from_tks \\ to_tks; (*added lookahead tokens*) | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 97 | in Array.update (prods, to, ((to_nts, to_tks @ new_tks), ps)); | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 98 | copy_lookahead tos (if null new_tks then added | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 99 | else (to, new_tks) :: added) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 100 | end; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 101 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 102 | val tos = connected_with chains' [lhs] [lhs]; | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 103 | in (copy_lookahead tos [], | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 104 | (if lhs mem lambdas then tos else []) union lambdas) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 105 | end; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 106 | |
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 107 | (*test if new production can produce lambda | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 108 | (rhs must either be empty or only consist of lambda NTs)*) | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 109 | val (new_lambda, lambdas') = | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 110 | if forall (fn (Nonterminal (id, _)) => id mem lambdas' | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 111 | | (Terminal _) => false) rhs then | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 112 | (true, lambdas' union (connected_with chains' [lhs] [lhs])) | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 113 | else | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 114 | (false, lambdas'); | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 115 | |
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 116 | (*list optional terminal and all nonterminals on which the lookahead | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 117 | of a production depends*) | 
| 15531 | 118 | fun lookahead_dependency _ [] nts = (NONE, nts) | 
| 119 | | lookahead_dependency _ ((Terminal tk) :: _) nts = (SOME tk, nts) | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 120 | | lookahead_dependency lambdas ((Nonterminal (nt, _)) :: symbs) nts = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 121 | if nt mem lambdas then | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 122 | lookahead_dependency lambdas symbs (nt :: nts) | 
| 15531 | 123 | else (NONE, nt :: nts); | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 124 | |
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 125 | (*get all known starting tokens for a nonterminal*) | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 126 | fun starts_for_nt nt = snd (fst (Array.sub (prods, nt))); | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 127 | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 128 | val token_union = gen_union matching_tokens; | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 129 | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 130 | (*update prods, lookaheads, and lambdas according to new lambda NTs*) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 131 | val (added_starts', lambdas') = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 132 | let | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 133 | (*propagate added lambda NT*) | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 134 | fun propagate_lambda [] added_starts lambdas= (added_starts, lambdas) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 135 | | propagate_lambda (l :: ls) added_starts lambdas = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 136 | let | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 137 | (*get lookahead for lambda NT*) | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 138 | val ((dependent, l_starts), _) = Array.sub (prods, l); | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 139 | |
| 15752 | 140 | (*check productions whose lookahead may depend on lambda NT*) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 141 | fun examine_prods [] add_lambda nt_dependencies added_tks | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 142 | nt_prods = | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 143 | (add_lambda, nt_dependencies, added_tks, nt_prods) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 144 | | examine_prods ((p as (rhs, _, _)) :: ps) add_lambda | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 145 | nt_dependencies added_tks nt_prods = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 146 | let val (tk, nts) = lookahead_dependency lambdas rhs []; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 147 | in | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 148 | if l mem nts then (*update production's lookahead*) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 149 | let | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 150 | val new_lambda = is_none tk andalso nts subset lambdas; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 151 | |
| 15979 | 152 | val new_tks = (if is_some tk then [the tk] else []) @ | 
| 15570 | 153 | Library.foldl token_union ([], map starts_for_nt nts) \\ | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 154 | l_starts; | 
| 15752 | 155 | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 156 | val added_tks' = token_union (new_tks, added_tks); | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 157 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 158 | val nt_dependencies' = nts union nt_dependencies; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 159 | |
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 160 | (*associate production with new starting tokens*) | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 161 | fun copy [] nt_prods = nt_prods | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 162 | | copy (tk :: tks) nt_prods = | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 163 | let val old_prods = assocs nt_prods tk; | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 164 | |
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 165 | val prods' = p :: old_prods; | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 166 | in copy tks (overwrite (nt_prods, (tk, prods'))) | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 167 | end; | 
| 15752 | 168 | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 169 | val nt_prods' = | 
| 15531 | 170 | let val new_opt_tks = map SOME new_tks; | 
| 171 | in copy ((if new_lambda then [NONE] else []) @ | |
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 172 | new_opt_tks) nt_prods | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 173 | end; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 174 | in examine_prods ps (add_lambda orelse new_lambda) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 175 | nt_dependencies' added_tks' nt_prods' | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 176 | end | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 177 | else (*skip production*) | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 178 | examine_prods ps add_lambda nt_dependencies | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 179 | added_tks nt_prods | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 180 | end; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 181 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 182 | (*check each NT whose lookahead depends on new lambda NT*) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 183 | fun process_nts [] added_lambdas added_starts = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 184 | (added_lambdas, added_starts) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 185 | | process_nts (nt :: nts) added_lambdas added_starts = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 186 | let | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 187 | val (lookahead as (old_nts, old_tks), nt_prods) = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 188 | Array.sub (prods, nt); | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 189 | |
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 190 | (*existing productions whose lookahead may depend on l*) | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 191 | val tk_prods = | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 192 | assocs nt_prods | 
| 15570 | 193 | (SOME (hd l_starts handle Empty => UnknownStart)); | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 194 | |
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 195 | (*add_lambda is true if an existing production of the nt | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 196 | produces lambda due to the new lambda NT l*) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 197 | val (add_lambda, nt_dependencies, added_tks, nt_prods') = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 198 | examine_prods tk_prods false [] [] nt_prods; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 199 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 200 | val added_nts = nt_dependencies \\ old_nts; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 201 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 202 | val added_lambdas' = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 203 | if add_lambda then nt :: added_lambdas | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 204 | else added_lambdas; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 205 | in Array.update (prods, nt, | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 206 | ((added_nts @ old_nts, old_tks @ added_tks), | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 207 | nt_prods')); | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 208 | (*N.B. that because the tks component | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 209 | is used to access existing | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 210 | productions we have to add new | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 211 | tokens at the _end_ of the list*) | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 212 | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 213 | if null added_tks then | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 214 | process_nts nts added_lambdas' added_starts | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 215 | else | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 216 | process_nts nts added_lambdas' | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 217 | ((nt, added_tks) :: added_starts) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 218 | end; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 219 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 220 | val (added_lambdas, added_starts') = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 221 | process_nts dependent [] added_starts; | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 222 | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 223 | val added_lambdas' = added_lambdas \\ lambdas; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 224 | in propagate_lambda (ls @ added_lambdas') added_starts' | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 225 | (added_lambdas' @ lambdas) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 226 | end; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 227 | in propagate_lambda (lambdas' \\ lambdas) added_starts lambdas' end; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 228 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 229 | (*insert production into grammar*) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 230 | val (added_starts', prod_count') = | 
| 15979 | 231 | if is_some chain_from then (added_starts', prod_count) (*don't store chain production*) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 232 | else let | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 233 | (*lookahead tokens of new production and on which | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 234 | NTs lookahead depends*) | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 235 | val (start_tk, start_nts) = lookahead_dependency lambdas' rhs []; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 236 | |
| 15570 | 237 | val start_tks = Library.foldl token_union | 
| 15979 | 238 | (if is_some start_tk then [the start_tk] else [], | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 239 | map starts_for_nt start_nts); | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 240 | |
| 15531 | 241 | val opt_starts = (if new_lambda then [NONE] | 
| 242 | else if null start_tks then [SOME UnknownStart] | |
| 243 | else []) @ (map SOME start_tks); | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 244 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 245 | (*add lhs NT to list of dependent NTs in lookahead*) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 246 | fun add_nts [] = () | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 247 | | add_nts (nt :: nts) = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 248 | let val ((old_nts, old_tks), ps) = Array.sub (prods, nt); | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 249 | in if lhs mem old_nts then () | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 250 | else Array.update (prods, nt, ((lhs :: old_nts, old_tks), ps)) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 251 | end; | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 252 | |
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 253 | (*add new start tokens to chained NTs' lookahead list; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 254 | also store new production for lhs NT*) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 255 | fun add_tks [] added prod_count = (added, prod_count) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 256 | | add_tks (nt :: nts) added prod_count = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 257 | let | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 258 | val ((old_nts, old_tks), nt_prods) = Array.sub (prods, nt); | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 259 | |
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 260 | val new_tks = gen_rems matching_tokens (start_tks, old_tks); | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 261 | |
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 262 | (*store new production*) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 263 | fun store [] prods is_new = | 
| 15979 | 264 | (prods, if is_some prod_count andalso is_new then | 
| 15570 | 265 | Option.map (fn x => x+1) prod_count | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 266 | else prod_count, is_new) | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 267 | | store (tk :: tks) prods is_new = | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 268 | let val tk_prods = assocs prods tk; | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 269 | |
| 15531 | 270 | (*if prod_count = NONE then we can assume that | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 271 | grammar does not contain new production already*) | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 272 | val (tk_prods', is_new') = | 
| 15979 | 273 | if is_some prod_count then | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 274 | if new_prod mem tk_prods then (tk_prods, false) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 275 | else (new_prod :: tk_prods, true) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 276 | else (new_prod :: tk_prods, true); | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 277 | |
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 278 | val prods' = if is_new' then | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 279 | overwrite (prods, (tk, tk_prods')) | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 280 | else prods; | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 281 | in store tks prods' (is_new orelse is_new') end; | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 282 | |
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 283 | val (nt_prods', prod_count', changed) = | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 284 | if nt = lhs then store opt_starts nt_prods false | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 285 | else (nt_prods, prod_count, false); | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 286 | in if not changed andalso null new_tks then () | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 287 | else Array.update (prods, nt, ((old_nts, old_tks @ new_tks), | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 288 | nt_prods')); | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 289 | add_tks nts (if null new_tks then added | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 290 | else (nt, new_tks) :: added) prod_count' | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 291 | end; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 292 | in add_nts start_nts; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 293 | add_tks (connected_with chains' [lhs] [lhs]) [] prod_count | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 294 | end; | 
| 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 295 | |
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 296 | (*associate productions with new lookaheads*) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 297 | val dummy = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 298 | let | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 299 | (*propagate added start tokens*) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 300 | fun add_starts [] = () | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 301 | | add_starts ((changed_nt, new_tks) :: starts) = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 302 | let | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 303 | (*token under which old productions which | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 304 | depend on changed_nt could be stored*) | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 305 | val key = | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 306 | case find_first (fn t => not (t mem new_tks)) | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 307 | (starts_for_nt changed_nt) of | 
| 15531 | 308 | NONE => SOME UnknownStart | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 309 | | t => t; | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 310 | |
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 311 | (*copy productions whose lookahead depends on changed_nt; | 
| 15531 | 312 | if key = SOME UnknownToken then tk_prods is used to hold | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 313 | the productions not copied*) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 314 | fun update_prods [] result = result | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 315 | | update_prods ((p as (rhs, _, _)) :: ps) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 316 | (tk_prods, nt_prods) = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 317 | let | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 318 | (*lookahead dependency for production*) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 319 | val (tk, depends) = lookahead_dependency lambdas' rhs []; | 
| 18 | 320 | |
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 321 | (*test if this production has to be copied*) | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 322 | val update = changed_nt mem depends; | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 323 | |
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 324 | (*test if production could already be associated with | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 325 | a member of new_tks*) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 326 | val lambda = length depends > 1 orelse | 
| 15979 | 327 | not (null depends) andalso is_some tk | 
| 328 | andalso the tk mem new_tks; | |
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 329 | |
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 330 | (*associate production with new starting tokens*) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 331 | fun copy [] nt_prods = nt_prods | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 332 | | copy (tk :: tks) nt_prods = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 333 | let | 
| 15531 | 334 | val tk_prods = assocs nt_prods (SOME tk); | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 335 | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 336 | val tk_prods' = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 337 | if not lambda then p :: tk_prods | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 338 | else p ins tk_prods; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 339 | (*if production depends on lambda NT we | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 340 | have to look for duplicates*) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 341 | in copy tks | 
| 15531 | 342 | (overwrite (nt_prods, (SOME tk, tk_prods'))) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 343 | end; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 344 | val result = | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 345 | if update then | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 346 | (tk_prods, copy new_tks nt_prods) | 
| 15531 | 347 | else if key = SOME UnknownStart then | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 348 | (p :: tk_prods, nt_prods) | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 349 | else (tk_prods, nt_prods); | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 350 | in update_prods ps result end; | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 351 | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 352 | (*copy existing productions for new starting tokens*) | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 353 | fun process_nts [] added = added | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 354 | | process_nts (nt :: nts) added = | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 355 | let | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 356 | val (lookahead as (old_nts, old_tks), nt_prods) = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 357 | Array.sub (prods, nt); | 
| 377 
ab8917806779
lookaheads are now computed faster (during the grammar is built)
 clasohm parents: 
373diff
changeset | 358 | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 359 | val tk_prods = assocs nt_prods key; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 360 | |
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 361 | (*associate productions with new lookahead tokens*) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 362 | val (tk_prods', nt_prods') = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 363 | update_prods tk_prods ([], nt_prods); | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 364 | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 365 | val nt_prods' = | 
| 15531 | 366 | if key = SOME UnknownStart then | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 367 | overwrite (nt_prods', (key, tk_prods')) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 368 | else nt_prods'; | 
| 18 | 369 | |
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 370 | val added_tks = | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 371 | gen_rems matching_tokens (new_tks, old_tks); | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 372 | in if null added_tks then | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 373 | (Array.update (prods, nt, (lookahead, nt_prods')); | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 374 | process_nts nts added) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 375 | else | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 376 | (Array.update (prods, nt, | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 377 | ((old_nts, added_tks @ old_tks), nt_prods')); | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 378 | process_nts nts ((nt, added_tks) :: added)) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 379 | end; | 
| 18 | 380 | |
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 381 | val ((dependent, _), _) = Array.sub (prods, changed_nt); | 
| 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 382 | in add_starts (starts @ (process_nts dependent [])) end; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 383 | in add_starts added_starts' end; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 384 | in add_prods prods chains' lambdas' prod_count ps end; | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 385 | |
| 18 | 386 | |
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 387 | (* pretty_gram *) | 
| 18 | 388 | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 389 | fun pretty_gram (Gram {tags, prods, chains, ...}) =
 | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 390 | let | 
| 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 391 | fun pretty_name name = [Pretty.str (name ^ " =")]; | 
| 18 | 392 | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 393 | val taglist = Symtab.dest tags; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 394 | |
| 14600 
ba51bc239716
Replaced quote by Pretty.quote / Library.quote, since quote now refers to Symbol.quote
 berghofe parents: 
13057diff
changeset | 395 | fun pretty_symb (Terminal (Token s)) = Pretty.quote (Pretty.str s) | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 396 | | pretty_symb (Terminal tok) = Pretty.str (str_of_token tok) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 397 | | pretty_symb (Nonterminal (tag, p)) = | 
| 15979 | 398 | let val name = fst (the (find_first (fn (n, t) => t = tag) taglist)); | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 399 | in Pretty.str (name ^ "[" ^ string_of_int p ^ "]") end; | 
| 18 | 400 | |
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 401 | fun pretty_const "" = [] | 
| 14600 
ba51bc239716
Replaced quote by Pretty.quote / Library.quote, since quote now refers to Symbol.quote
 berghofe parents: 
13057diff
changeset | 402 |       | pretty_const c = [Pretty.str ("=> " ^ Library.quote c)];
 | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 403 | |
| 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 404 |     fun pretty_pri p = [Pretty.str ("(" ^ string_of_int p ^ ")")];
 | 
| 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 405 | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 406 | fun pretty_prod name (symbs, const, pri) = | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 407 | Pretty.block (Pretty.breaks (pretty_name name @ | 
| 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 408 | map pretty_symb symbs @ pretty_const const @ pretty_pri pri)); | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 409 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 410 | fun pretty_nt (name, tag) = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 411 | let | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 412 | fun prod_of_chain from = ([Nonterminal (from, ~1)], "", ~1); | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 413 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 414 | val nt_prods = | 
| 15570 | 415 | Library.foldl (op union) ([], map snd (snd (Array.sub (prods, tag)))) @ | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 416 | map prod_of_chain (assocs chains tag); | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 417 | in map (pretty_prod name) nt_prods end; | 
| 15752 | 418 | |
| 15570 | 419 | in List.concat (map pretty_nt taglist) end; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 420 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 421 | |
| 1438 | 422 | (** Operations on gramars **) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 423 | |
| 1438 | 424 | (*The mother of all grammars*) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 425 | val empty_gram = Gram {nt_count = 0, prod_count = 0,
 | 
| 4487 | 426 | tags = Symtab.empty, chains = [], lambdas = [], | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 427 | prods = Array.array (0, (([], []), []))}; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 428 | |
| 1438 | 429 | |
| 430 | (*Invert list of chain productions*) | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 431 | fun inverse_chains [] result = result | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 432 | | inverse_chains ((root, branches) :: cs) result = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 433 | let fun add [] result = result | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 434 | | add (id :: ids) result = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 435 | let val old = assocs result id; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 436 | in add ids (overwrite (result, (id, root :: old))) end; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 437 | in inverse_chains cs (add branches result) end; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 438 | |
| 1438 | 439 | |
| 440 | (*Add productions to a grammar*) | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 441 | fun extend_gram gram [] = gram | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 442 |   | extend_gram (Gram {nt_count, prod_count, tags, chains, lambdas, prods})
 | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 443 | xprods = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 444 | let | 
| 1438 | 445 | (*Get tag for existing nonterminal or create a new one*) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 446 | fun get_tag nt_count tags nt = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 447 | case Symtab.lookup (tags, nt) of | 
| 15531 | 448 | SOME tag => (nt_count, tags, tag) | 
| 449 | | NONE => (nt_count+1, Symtab.update_new ((nt, nt_count), tags), | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 450 | nt_count); | 
| 1438 | 451 | |
| 452 | (*Convert symbols to the form used by the parser; | |
| 453 | delimiters and predefined terms are stored as terminals, | |
| 454 | nonterminals are converted to integer tags*) | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 455 | fun symb_of [] nt_count tags result = (nt_count, tags, rev result) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 456 | | symb_of ((Delim s) :: ss) nt_count tags result = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 457 | symb_of ss nt_count tags ((Terminal (Token s)) :: result) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 458 | | symb_of ((Argument (s, p)) :: ss) nt_count tags result = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 459 | let | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 460 | val (nt_count', tags', new_symb) = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 461 | case predef_term s of | 
| 15531 | 462 | NONE => | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 463 | let val (nt_count', tags', s_tag) = get_tag nt_count tags s; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 464 | in (nt_count', tags', Nonterminal (s_tag, p)) end | 
| 15531 | 465 | | SOME tk => (nt_count, tags, Terminal tk); | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 466 | in symb_of ss nt_count' tags' (new_symb :: result) end | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 467 | | symb_of (_ :: ss) nt_count tags result = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 468 | symb_of ss nt_count tags result; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 469 | |
| 1438 | 470 | (*Convert list of productions by invoking symb_of for each of them*) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 471 | fun prod_of [] nt_count prod_count tags result = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 472 | (nt_count, prod_count, tags, result) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 473 | | prod_of ((XProd (lhs, xsymbs, const, pri)) :: ps) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 474 | nt_count prod_count tags result = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 475 | let val (nt_count', tags', lhs_tag) = get_tag nt_count tags lhs; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 476 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 477 | val (nt_count'', tags'', prods) = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 478 | symb_of xsymbs nt_count' tags' []; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 479 | in prod_of ps nt_count'' (prod_count+1) tags'' | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 480 | ((lhs_tag, (prods, const, pri)) :: result) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 481 | end; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 482 | |
| 1438 | 483 | val (nt_count', prod_count', tags', xprods') = | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 484 | prod_of xprods nt_count prod_count tags []; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 485 | |
| 1438 | 486 | (*Copy array containing productions of old grammar; | 
| 487 | this has to be done to preserve the old grammar while being able | |
| 488 | to change the array's content*) | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 489 | val prods' = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 490 | let fun get_prod i = if i < nt_count then Array.sub (prods, i) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 491 | else (([], []), []); | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 492 | in Array.tabulate (nt_count', get_prod) end; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 493 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 494 | val fromto_chains = inverse_chains chains []; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 495 | |
| 1438 | 496 | (*Add new productions to old ones*) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 497 | val (fromto_chains', lambdas', _) = | 
| 15531 | 498 | add_prods prods' fromto_chains lambdas NONE xprods'; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 499 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 500 | val chains' = inverse_chains fromto_chains' []; | 
| 1177 | 501 |   in Gram {nt_count = nt_count', prod_count = prod_count', tags = tags',
 | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 502 | chains = chains', lambdas = lambdas', prods = prods'} | 
| 18 | 503 | end; | 
| 504 | ||
| 15752 | 505 | val make_gram = extend_gram empty_gram; | 
| 506 | ||
| 18 | 507 | |
| 1438 | 508 | (*Merge two grammars*) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 509 | fun merge_grams gram_a gram_b = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 510 | let | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 511 | (*find out which grammar is bigger*) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 512 |     val (Gram {nt_count = nt_count1, prod_count = prod_count1, tags = tags1,
 | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 513 | chains = chains1, lambdas = lambdas1, prods = prods1}, | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 514 |          Gram {nt_count = nt_count2, prod_count = prod_count2, tags = tags2,
 | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 515 | chains = chains2, lambdas = lambdas2, prods = prods2}) = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 516 |       let val Gram {prod_count = count_a, ...} = gram_a;
 | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 517 |           val Gram {prod_count = count_b, ...} = gram_b;
 | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 518 | in if count_a > count_b then (gram_a, gram_b) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 519 | else (gram_b, gram_a) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 520 | end; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 521 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 522 | (*get existing tag from grammar1 or create a new one*) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 523 | fun get_tag nt_count tags nt = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 524 | case Symtab.lookup (tags, nt) of | 
| 15531 | 525 | SOME tag => (nt_count, tags, tag) | 
| 526 | | NONE => (nt_count+1, Symtab.update_new ((nt, nt_count), tags), | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 527 | nt_count) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 528 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 529 | val ((nt_count1', tags1'), tag_table) = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 530 | let val tag_list = Symtab.dest tags2; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 531 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 532 | val table = Array.array (nt_count2, ~1); | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 533 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 534 | fun store_tag nt_count tags ~1 = (nt_count, tags) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 535 | | store_tag nt_count tags tag = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 536 | let val (nt_count', tags', tag') = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 537 | get_tag nt_count tags | 
| 15979 | 538 | (fst (the (find_first (fn (n, t) => t = tag) tag_list))); | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 539 | in Array.update (table, tag, tag'); | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 540 | store_tag nt_count' tags' (tag-1) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 541 | end; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 542 | in (store_tag nt_count1 tags1 (nt_count2-1), table) end; | 
| 15752 | 543 | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 544 | (*convert grammar2 tag to grammar1 tag*) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 545 | fun convert_tag tag = Array.sub (tag_table, tag); | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 546 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 547 | (*convert chain list to raw productions*) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 548 | fun mk_chain_prods [] result = result | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 549 | | mk_chain_prods ((to, froms) :: cs) result = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 550 | let | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 551 | val to_tag = convert_tag to; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 552 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 553 | fun make [] result = result | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 554 | | make (from :: froms) result = make froms ((to_tag, | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 555 | ([Nonterminal (convert_tag from, ~1)], "", ~1)) :: result); | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 556 | in mk_chain_prods cs (make froms [] @ result) end; | 
| 15752 | 557 | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 558 | val chain_prods = mk_chain_prods chains2 []; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 559 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 560 | (*convert prods2 array to productions*) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 561 | fun process_nt ~1 result = result | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 562 | | process_nt nt result = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 563 | let | 
| 15570 | 564 | val nt_prods = Library.foldl (op union) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 565 | ([], map snd (snd (Array.sub (prods2, nt)))); | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 566 | val lhs_tag = convert_tag nt; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 567 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 568 | (*convert tags in rhs*) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 569 | fun process_rhs [] result = result | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 570 | | process_rhs (Terminal tk :: rhs) result = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 571 | process_rhs rhs (result @ [Terminal tk]) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 572 | | process_rhs (Nonterminal (nt, prec) :: rhs) result = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 573 | process_rhs rhs | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 574 | (result @ [Nonterminal (convert_tag nt, prec)]); | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 575 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 576 | (*convert tags in productions*) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 577 | fun process_prods [] result = result | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 578 | | process_prods ((rhs, id, prec) :: ps) result = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 579 | process_prods ps ((lhs_tag, (process_rhs rhs [], id, prec)) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 580 | :: result); | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 581 | in process_nt (nt-1) (process_prods nt_prods [] @ result) end; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 582 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 583 | val raw_prods = chain_prods @ process_nt (nt_count2-1) []; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 584 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 585 | val prods1' = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 586 | let fun get_prod i = if i < nt_count1 then Array.sub (prods1, i) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 587 | else (([], []), []); | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 588 | in Array.tabulate (nt_count1', get_prod) end; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 589 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 590 | val fromto_chains = inverse_chains chains1 []; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 591 | |
| 15531 | 592 | val (fromto_chains', lambdas', SOME prod_count1') = | 
| 593 | add_prods prods1' fromto_chains lambdas1 (SOME prod_count1) raw_prods; | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 594 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 595 | val chains' = inverse_chains fromto_chains' []; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 596 |   in Gram {nt_count = nt_count1', prod_count = prod_count1',
 | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 597 | tags = tags1', chains = chains', lambdas = lambdas', | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 598 | prods = prods1'} | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 599 | end; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 600 | |
| 18 | 601 | |
| 1438 | 602 | (** Parser **) | 
| 18 | 603 | |
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 604 | datatype parsetree = | 
| 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 605 | Node of string * parsetree list | | 
| 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 606 | Tip of token; | 
| 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 607 | |
| 18 | 608 | type state = | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 609 | nt_tag * int * (*identification and production precedence*) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 610 | parsetree list * (*already parsed nonterminals on rhs*) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 611 | symb list * (*rest of rhs*) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 612 | string * (*name of production*) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 613 | int; (*index for previous state list*) | 
| 18 | 614 | |
| 615 | ||
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 616 | (*Get all rhss with precedence >= minPrec*) | 
| 15570 | 617 | fun getRHS minPrec = List.filter (fn (_, _, prec:int) => prec >= minPrec); | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 618 | |
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 619 | (*Get all rhss with precedence >= minPrec and < maxPrec*) | 
| 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 620 | fun getRHS' minPrec maxPrec = | 
| 15570 | 621 | List.filter (fn (_, _, prec:int) => prec >= minPrec andalso prec < maxPrec); | 
| 18 | 622 | |
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 623 | (*Make states using a list of rhss*) | 
| 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 624 | fun mkStates i minPrec lhsID rhss = | 
| 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 625 | let fun mkState (rhs, id, prodPrec) = (lhsID, prodPrec, [], rhs, id, i); | 
| 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 626 | in map mkState rhss end; | 
| 697 
40f72ab196f8
changed warning for extremely ambiguous expressions
 clasohm parents: 
682diff
changeset | 627 | |
| 15752 | 628 | (*Add parse tree to list and eliminate duplicates | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 629 | saving the maximum precedence*) | 
| 15531 | 630 | fun conc (t, prec:int) [] = (NONE, [(t, prec)]) | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 631 | | conc (t, prec) ((t', prec') :: ts) = | 
| 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 632 | if t = t' then | 
| 15752 | 633 | (SOME prec', if prec' >= prec then (t', prec') :: ts | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 634 | else (t, prec) :: ts) | 
| 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 635 | else | 
| 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 636 | let val (n, ts') = conc (t, prec) ts | 
| 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 637 | in (n, (t', prec') :: ts') end; | 
| 18 | 638 | |
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 639 | (*Update entry in used*) | 
| 697 
40f72ab196f8
changed warning for extremely ambiguous expressions
 clasohm parents: 
682diff
changeset | 640 | fun update_trees ((B, (i, ts)) :: used) (A, t) = | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 641 | if A = B then | 
| 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 642 | let val (n, ts') = conc t ts | 
| 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 643 | in ((A, (i, ts')) :: used, n) end | 
| 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 644 | else | 
| 697 
40f72ab196f8
changed warning for extremely ambiguous expressions
 clasohm parents: 
682diff
changeset | 645 | let val (used', n) = update_trees used (A, t) | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 646 | in ((B, (i, ts)) :: used', n) end; | 
| 18 | 647 | |
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 648 | (*Replace entry in used*) | 
| 697 
40f72ab196f8
changed warning for extremely ambiguous expressions
 clasohm parents: 
682diff
changeset | 649 | fun update_prec (A, prec) used = | 
| 
40f72ab196f8
changed warning for extremely ambiguous expressions
 clasohm parents: 
682diff
changeset | 650 | let fun update ((hd as (B, (_, ts))) :: used, used') = | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 651 | if A = B | 
| 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 652 | then used' @ ((A, (prec, ts)) :: used) | 
| 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 653 | else update (used, hd :: used') | 
| 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 654 | in update (used, []) end; | 
| 18 | 655 | |
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 656 | fun getS A maxPrec Si = | 
| 15570 | 657 | List.filter | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 658 | (fn (_, _, _, Nonterminal (B, prec) :: _, _, _) | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 659 | => A = B andalso prec <= maxPrec | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 660 | | _ => false) Si; | 
| 18 | 661 | |
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 662 | fun getS' A maxPrec minPrec Si = | 
| 15570 | 663 | List.filter | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 664 | (fn (_, _, _, Nonterminal (B, prec) :: _, _, _) | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 665 | => A = B andalso prec > minPrec andalso prec <= maxPrec | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 666 | | _ => false) Si; | 
| 18 | 667 | |
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 668 | fun getStates Estate i ii A maxPrec = | 
| 15570 | 669 | List.filter | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 670 | (fn (_, _, _, Nonterminal (B, prec) :: _, _, _) | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 671 | => A = B andalso prec <= maxPrec | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 672 | | _ => false) | 
| 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 673 | (Array.sub (Estate, ii)); | 
| 18 | 674 | |
| 675 | ||
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 676 | fun movedot_term (A, j, ts, Terminal a :: sa, id, i) c = | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 677 | if valued_token c then | 
| 697 
40f72ab196f8
changed warning for extremely ambiguous expressions
 clasohm parents: 
682diff
changeset | 678 | (A, j, ts @ [Tip c], sa, id, i) | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 679 | else (A, j, ts, sa, id, i); | 
| 18 | 680 | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 681 | fun movedot_nonterm ts (A, j, tss, Nonterminal _ :: sa, id, i) = | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 682 | (A, j, tss @ ts, sa, id, i); | 
| 18 | 683 | |
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 684 | fun movedot_lambda _ [] = [] | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 685 | | movedot_lambda (B, j, tss, Nonterminal (A, k) :: sa, id, i) ((t, ki) :: ts) = | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 686 | if k <= ki then | 
| 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 687 | (B, j, tss @ t, sa, id, i) :: | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 688 | movedot_lambda (B, j, tss, Nonterminal (A, k) :: sa, id, i) ts | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 689 | else movedot_lambda (B, j, tss, Nonterminal (A, k) :: sa, id, i) ts; | 
| 18 | 690 | |
| 691 | ||
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 692 | val warned = ref false; (*flag for warning message*) | 
| 13057 
805de10ca485
increased treshold for "this expression could be extremely ambigous" warning
 kleing parents: 
12785diff
changeset | 693 | val branching_level = ref 600; (*trigger value for warnings*) | 
| 18 | 694 | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 695 | (*get all productions of a NT and NTs chained to it which can | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 696 | be started by specified token*) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 697 | fun prods_for prods chains include_none tk nts = | 
| 15979 | 698 | let | 
| 699 | fun token_assoc (list, key) = | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 700 | let fun assoc [] result = result | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 701 | | assoc ((keyi, pi) :: pairs) result = | 
| 15979 | 702 | if is_some keyi andalso matching_tokens (the keyi, key) | 
| 15752 | 703 | orelse include_none andalso is_none keyi then | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 704 | assoc pairs (pi @ result) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 705 | else assoc pairs result; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 706 | in assoc list [] end; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 707 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 708 | fun get_prods [] result = result | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 709 | | get_prods (nt :: nts) result = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 710 | let val nt_prods = snd (Array.sub (prods, nt)); | 
| 15979 | 711 | in get_prods nts ((token_assoc (nt_prods, tk)) @ result) end; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 712 | in get_prods (connected_with chains nts nts) [] end; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 713 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 714 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 715 | fun PROCESSS prods chains Estate i c states = | 
| 18 | 716 | let | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 717 | fun all_prods_for nt = prods_for prods chains true c [nt]; | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 718 | |
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 719 | fun processS used [] (Si, Sii) = (Si, Sii) | 
| 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 720 | | processS used (S :: States) (Si, Sii) = | 
| 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 721 | (case S of | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 722 | (_, _, _, Nonterminal (nt, minPrec) :: _, _, _) => | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 723 | let (*predictor operation*) | 
| 697 
40f72ab196f8
changed warning for extremely ambiguous expressions
 clasohm parents: 
682diff
changeset | 724 | val (used', new_states) = | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 725 | (case assoc (used, nt) of | 
| 15531 | 726 | SOME (usedPrec, l) => (*nonterminal has been processed*) | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 727 | if usedPrec <= minPrec then | 
| 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 728 | (*wanted precedence has been processed*) | 
| 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 729 | (used, movedot_lambda S l) | 
| 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 730 | else (*wanted precedence hasn't been parsed yet*) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 731 | let | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 732 | val tk_prods = all_prods_for nt; | 
| 15752 | 733 | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 734 | val States' = mkStates i minPrec nt | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 735 | (getRHS' minPrec usedPrec tk_prods); | 
| 15752 | 736 | in (update_prec (nt, minPrec) used, | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 737 | movedot_lambda S l @ States') | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 738 | end | 
| 18 | 739 | |
| 15531 | 740 | | NONE => (*nonterminal is parsed for the first time*) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 741 | let val tk_prods = all_prods_for nt; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 742 | val States' = mkStates i minPrec nt | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 743 | (getRHS minPrec tk_prods); | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 744 | in ((nt, (minPrec, [])) :: used, States') end); | 
| 697 
40f72ab196f8
changed warning for extremely ambiguous expressions
 clasohm parents: 
682diff
changeset | 745 | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 746 | val dummy = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 747 | if not (!warned) andalso | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 748 | length (new_states @ States) > (!branching_level) then | 
| 1580 
e3fd931e6095
Added some functions which allow redirection of Isabelle's output
 berghofe parents: 
1507diff
changeset | 749 | (warning "Currently parsed expression could be \ | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 750 | \extremely ambiguous."; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 751 | warned := true) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 752 | else (); | 
| 18 | 753 | in | 
| 697 
40f72ab196f8
changed warning for extremely ambiguous expressions
 clasohm parents: 
682diff
changeset | 754 | processS used' (new_states @ States) (S :: Si, Sii) | 
| 18 | 755 | end | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 756 | | (_, _, _, Terminal a :: _, _, _) => (*scanner operation*) | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 757 | processS used States | 
| 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 758 | (S :: Si, | 
| 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 759 | if matching_tokens (a, c) then movedot_term S c :: Sii else Sii) | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 760 | | (A, prec, ts, [], id, j) => (*completer operation*) | 
| 697 
40f72ab196f8
changed warning for extremely ambiguous expressions
 clasohm parents: 
682diff
changeset | 761 | let val tt = if id = "" then ts else [Node (id, ts)] in | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 762 | if j = i then (*lambda production?*) | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 763 | let | 
| 697 
40f72ab196f8
changed warning for extremely ambiguous expressions
 clasohm parents: 
682diff
changeset | 764 | val (used', O) = update_trees used (A, (tt, prec)); | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 765 | in | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 766 | case O of | 
| 15531 | 767 | NONE => | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 768 | let val Slist = getS A prec Si; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 769 | val States' = map (movedot_nonterm tt) Slist; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 770 | in processS used' (States' @ States) (S :: Si, Sii) end | 
| 15531 | 771 | | SOME n => | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 772 | if n >= prec then processS used' States (S :: Si, Sii) | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 773 | else | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 774 | let val Slist = getS' A prec n Si; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 775 | val States' = map (movedot_nonterm tt) Slist; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 776 | in processS used' (States' @ States) (S :: Si, Sii) end | 
| 15752 | 777 | end | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 778 | else | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 779 | let val Slist = getStates Estate i j A prec | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 780 | in processS used (map (movedot_nonterm tt) Slist @ States) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 781 | (S :: Si, Sii) | 
| 682 | 782 | end | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 783 | end) | 
| 682 | 784 | in processS [] states ([], []) end; | 
| 18 | 785 | |
| 786 | ||
| 362 | 787 | fun syntax_error toks allowed = | 
| 4698 | 788 | let | 
| 789 | val msg = | |
| 790 | if null toks then Pretty.str "Inner syntax error: unexpected end of input" | |
| 791 | else | |
| 792 | Pretty.block (Pretty.str "Inner syntax error at: \"" :: | |
| 7944 
cc1930ad1a88
symbols in (error) messages now consistently with single backslash
 oheimb parents: 
6962diff
changeset | 793 | Pretty.breaks (map (Pretty.str o str_of_token) | 
| 15752 | 794 | (rev (tl (rev toks)))) @ | 
| 4698 | 795 | [Pretty.str "\""]); | 
| 796 | val expected = | |
| 797 |       Pretty.strs ("Expected tokens: " :: map (quote o str_of_token) allowed);
 | |
| 798 | in | |
| 799 | error (Pretty.string_of (Pretty.blk (0, [msg, Pretty.fbrk, expected]))) | |
| 800 | end; | |
| 18 | 801 | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 802 | fun produce prods chains stateset i indata prev_token = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 803 | (*prev_token is used for error messages*) | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 804 | (case Array.sub (stateset, i) of | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 805 | [] => let fun some_prods_for tk nt = prods_for prods chains false tk [nt]; | 
| 373 
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
 clasohm parents: 
372diff
changeset | 806 | |
| 
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
 clasohm parents: 
372diff
changeset | 807 | (*test if tk is a lookahead for a given minimum precedence*) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 808 | fun reduction _ minPrec _ (Terminal _ :: _, _, prec:int) = | 
| 373 
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
 clasohm parents: 
372diff
changeset | 809 | if prec >= minPrec then true | 
| 
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
 clasohm parents: 
372diff
changeset | 810 | else false | 
| 15752 | 811 | | reduction tk minPrec checked | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 812 | (Nonterminal (nt, nt_prec) :: _, _, prec) = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 813 | if prec >= minPrec andalso not (nt mem checked) then | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 814 | let val chained = connected_with chains [nt] [nt]; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 815 | in exists | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 816 | (reduction tk nt_prec (chained @ checked)) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 817 | (some_prods_for tk nt) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 818 | end | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 819 | else false; | 
| 373 
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
 clasohm parents: 
372diff
changeset | 820 | |
| 15752 | 821 | (*compute a list of allowed starting tokens | 
| 373 
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
 clasohm parents: 
372diff
changeset | 822 | for a list of nonterminals considering precedence*) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 823 | fun get_starts [] result = result | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 824 | | get_starts ((nt, minPrec:int) :: nts) result = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 825 | let fun get [] result = result | 
| 15531 | 826 | | get ((SOME tk, prods) :: ps) result = | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 827 | if not (null prods) andalso | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 828 | exists (reduction tk minPrec [nt]) prods | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 829 | then get ps (tk :: result) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 830 | else get ps result | 
| 15531 | 831 | | get ((NONE, _) :: ps) result = get ps result; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 832 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 833 | val (_, nt_prods) = Array.sub (prods, nt); | 
| 362 | 834 | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 835 | val chained = map (fn nt => (nt, minPrec)) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 836 | (assocs chains nt); | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 837 | in get_starts (chained @ nts) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 838 | ((get nt_prods []) union result) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 839 | end; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 840 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 841 | val nts = | 
| 15752 | 842 | List.mapPartial (fn (_, _, _, Nonterminal (a, prec) :: _, _, _) => | 
| 15531 | 843 | SOME (a, prec) | _ => NONE) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 844 | (Array.sub (stateset, i-1)); | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 845 | val allowed = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 846 | distinct (get_starts nts [] @ | 
| 15570 | 847 | (List.mapPartial (fn (_, _, _, Terminal a :: _, _, _) => SOME a | 
| 15531 | 848 | | _ => NONE) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 849 | (Array.sub (stateset, i-1)))); | 
| 362 | 850 | in syntax_error (if prev_token = EndToken then indata | 
| 851 | else prev_token :: indata) allowed | |
| 852 | end | |
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 853 | | s => | 
| 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 854 | (case indata of | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 855 | [] => Array.sub (stateset, i) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 856 | | c :: cs => | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 857 | let val (si, sii) = PROCESSS prods chains stateset i c s; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 858 | in Array.update (stateset, i, si); | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 859 | Array.update (stateset, i + 1, sii); | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 860 | produce prods chains stateset (i + 1) cs c | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 861 | end)); | 
| 18 | 862 | |
| 863 | ||
| 15752 | 864 | fun get_trees l = List.mapPartial (fn (_, _, [pt], _, _, _) => SOME pt | _ => NONE) | 
| 2229 
64acb485ecce
Eta-expansion of a function definition, for value polymorphism
 paulson parents: 
2186diff
changeset | 865 | l; | 
| 18 | 866 | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 867 | fun earley prods tags chains startsymbol indata = | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 868 | let | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 869 | val start_tag = case Symtab.lookup (tags, startsymbol) of | 
| 15531 | 870 | SOME tag => tag | 
| 15752 | 871 |                      | NONE   => error ("parse: Unknown startsymbol " ^
 | 
| 624 | 872 | quote startsymbol); | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 873 | val S0 = [(~1, 0, [], [Nonterminal (start_tag, 0), Terminal EndToken], | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 874 | "", 0)]; | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 875 | val s = length indata + 1; | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 876 | val Estate = Array.array (s, []); | 
| 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 877 | in | 
| 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 878 | Array.update (Estate, 0, S0); | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 879 | warned := false; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 880 | get_trees (produce prods chains Estate 0 indata EndToken) | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 881 | end; | 
| 18 | 882 | |
| 883 | ||
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 884 | fun parse (Gram {tags, prods, chains, ...}) start toks =
 | 
| 624 | 885 | let val r = | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 886 | (case earley prods tags chains start toks of | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 887 | [] => sys_error "parse: no parse trees" | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 888 | | pts => pts); | 
| 624 | 889 | in r end | 
| 18 | 890 | |
| 891 | end; |