| author | wenzelm | 
| Tue, 10 Dec 2024 21:06:04 +0100 | |
| changeset 81572 | 693a95492008 | 
| parent 81223 | f63ffe7f4234 | 
| child 81629 | 79079d94095b | 
| permissions | -rw-r--r-- | 
| 18 | 1 | (* Title: Pure/Syntax/parser.ML | 
| 67545 | 2 | Author: Carsten Clasohm, Sonia Mahjoub | 
| 3 | Author: Makarius | |
| 18 | 4 | |
| 67545 | 5 | General context-free parser for the inner syntax of terms and types. | 
| 18 | 6 | *) | 
| 7 | ||
| 8 | signature PARSER = | |
| 15752 | 9 | sig | 
| 1507 | 10 | type gram | 
| 11 | val empty_gram: gram | |
| 78009 
f906f7f83dae
performance tuning: cached non-persistent Parser.gram reduces heap size by approx. 1-4%;
 wenzelm parents: 
78007diff
changeset | 12 | val make_gram: Syntax_Ext.xprod list -> Syntax_Ext.xprod list -> gram option -> gram | 
| 1507 | 13 | val pretty_gram: gram -> Pretty.T list | 
| 80991 | 14 | datatype tree = | 
| 81007 | 15 |     Markup of {markup: Markup.T list, range: Position.range} * tree list |
 | 
| 16 |     Node of {nonterm: string, const: string, range: Position.range} * tree list |
 | |
| 1507 | 17 | Tip of Lexicon.token | 
| 80991 | 18 | val pretty_tree: tree -> Pretty.T list | 
| 19 | val parse: gram -> string -> Lexicon.token list -> tree list | |
| 15752 | 20 | end; | 
| 1507 | 21 | |
| 15752 | 22 | structure Parser: PARSER = | 
| 18 | 23 | struct | 
| 15752 | 24 | |
| 18 | 25 | (** datatype gram **) | 
| 26 | ||
| 67541 | 27 | (* nonterminals *) | 
| 28 | ||
| 67545 | 29 | (*production for the NTs are stored in a vector, indexed by the NT tag*) | 
| 67541 | 30 | type nt = int; | 
| 31 | ||
| 67545 | 32 | type tags = nt Symtab.table; | 
| 33 | val tags_empty: tags = Symtab.empty; | |
| 80959 
4749c9b0ba73
eliminated redundant nt_count: rely on Symtab.size;
 wenzelm parents: 
80958diff
changeset | 34 | fun tags_size (tags: tags) = Symtab.size tags; | 
| 67545 | 35 | fun tags_content (tags: tags) = sort_by #1 (Symtab.dest tags); | 
| 36 | fun tags_lookup (tags: tags) = Symtab.lookup tags; | |
| 37 | fun tags_insert tag (tags: tags) = Symtab.update_new tag tags; | |
| 81000 | 38 | |
| 39 | type names = string Inttab.table; | |
| 40 | val names_empty: names = Inttab.empty; | |
| 41 | fun make_names (tags: tags): names = Inttab.build (Symtab.fold (Inttab.update_new o swap) tags); | |
| 42 | fun the_name (names: names) = the o Inttab.lookup names; | |
| 67545 | 43 | |
| 79079 | 44 | type nts = Bitset.T; | 
| 45 | val nts_empty: nts = Bitset.empty; | |
| 46 | val nts_merge: nts * nts -> nts = Bitset.merge; | |
| 47 | fun nts_insert nt : nts -> nts = Bitset.insert nt; | |
| 48 | fun nts_member (nts: nts) = Bitset.member nts; | |
| 49 | fun nts_fold f (nts: nts) = Bitset.fold f nts; | |
| 50 | fun nts_subset (nts1: nts, nts2: nts) = Bitset.forall (nts_member nts2) nts1; | |
| 51 | fun nts_is_empty (nts: nts) = Bitset.is_empty nts; | |
| 52 | fun nts_is_unique (nts: nts) = Bitset.is_unique nts; | |
| 67541 | 53 | |
| 54 | ||
| 67539 | 55 | (* tokens *) | 
| 56 | ||
| 80979 | 57 | structure Tokens = Set(type key = Lexicon.token val ord = Lexicon.token_type_ord); | 
| 67539 | 58 | |
| 77823 | 59 | fun tokens_find P tokens = Tokens.get_first (fn tok => if P tok then SOME tok else NONE) tokens; | 
| 60 | fun tokens_add (nt: nt, tokens) = if Tokens.is_empty tokens then I else cons (nt, tokens); | |
| 67533 
f253e5eaf995
clarified types and operations: potentially more efficient add_prods;
 wenzelm parents: 
67532diff
changeset | 61 | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 62 | |
| 67545 | 63 | (* productions *) | 
| 64 | ||
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 65 | datatype symb = | 
| 67545 | 66 | Terminal of Lexicon.token | | 
| 80972 | 67 | Nonterminal of nt * int | (*(tag, precedence)*) | 
| 80999 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 68 | Bg of Markup.T list | En; (*markup block*) | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 69 | |
| 80972 | 70 | type prod = symb list * string * int; (*rhs, name, precedence*) | 
| 80970 | 71 | |
| 72 | fun make_chain_prod from : prod = ([Nonterminal (from, ~1)], "", ~1); | |
| 73 | ||
| 74 | fun dest_chain_prod (([Nonterminal (from, ~1)], _, ~1): prod) = SOME from | |
| 75 | | dest_chain_prod _ = NONE; | |
| 76 | ||
| 80972 | 77 | val no_prec = ~1; | 
| 78 | ||
| 80978 
5e2b1588c5cb
minor performance tuning: proper table for parsetree list;
 wenzelm parents: 
80977diff
changeset | 79 | fun upto_prec min max p = | 
| 
5e2b1588c5cb
minor performance tuning: proper table for parsetree list;
 wenzelm parents: 
80977diff
changeset | 80 | (min = no_prec orelse p >= min) andalso (max = no_prec orelse p <= max); | 
| 
5e2b1588c5cb
minor performance tuning: proper table for parsetree list;
 wenzelm parents: 
80977diff
changeset | 81 | |
| 
5e2b1588c5cb
minor performance tuning: proper table for parsetree list;
 wenzelm parents: 
80977diff
changeset | 82 | fun until_prec min max p = | 
| 80972 | 83 | (min = no_prec orelse p >= min) andalso (max = no_prec orelse p < max); | 
| 84 | ||
| 85 | ||
| 77823 | 86 | structure Prods = Table(Tokens.Key); | 
| 80970 | 87 | type prods = prod list Prods.table; | 
| 67540 | 88 | |
| 77823 | 89 | val prods_empty: prods = Prods.empty; | 
| 90 | fun prods_lookup (prods: prods) = Prods.lookup_list prods; | |
| 91 | fun prods_update entry : prods -> prods = Prods.update entry; | |
| 92 | fun prods_content (prods: prods) = distinct (op =) (maps #2 (Prods.dest prods)); | |
| 67540 | 93 | |
| 77823 | 94 | type nt_gram = (nts * Tokens.T) * prods; (*dependent_nts, start_tokens, prods*) | 
| 67545 | 95 | (*depent_nts is a set of all NTs whose lookahead depends on this NT's lookahead*) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 96 | |
| 77823 | 97 | val nt_gram_empty: nt_gram = ((nts_empty, Tokens.empty), prods_empty); | 
| 67533 
f253e5eaf995
clarified types and operations: potentially more efficient add_prods;
 wenzelm parents: 
67532diff
changeset | 98 | |
| 67531 | 99 | type chains = unit Int_Graph.T; | 
| 100 | fun chains_preds (chains: chains) = Int_Graph.immediate_preds chains; | |
| 101 | fun chains_all_preds (chains: chains) = Int_Graph.all_preds chains; | |
| 102 | fun chains_all_succs (chains: chains) = Int_Graph.all_succs chains; | |
| 103 | val chains_empty: chains = Int_Graph.empty; | |
| 104 | fun chains_member (chains: chains) = Int_Graph.is_edge chains; | |
| 105 | fun chains_declare nt : chains -> chains = Int_Graph.default_node (nt, ()); | |
| 80968 | 106 | |
| 80999 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 107 | fun chains_declares (Nonterminal (nt, _)) = chains_declare nt | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 108 | | chains_declares _ = I; | 
| 80968 | 109 | |
| 67531 | 110 | fun chains_insert (from, to) = | 
| 111 | chains_declare from #> chains_declare to #> Int_Graph.add_edge (from, to); | |
| 112 | ||
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 113 | datatype gram = | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 114 | Gram of | 
| 80959 
4749c9b0ba73
eliminated redundant nt_count: rely on Symtab.size;
 wenzelm parents: 
80958diff
changeset | 115 |    {tags: tags,
 | 
| 81000 | 116 | names: names, | 
| 67531 | 117 | chains: chains, | 
| 67533 
f253e5eaf995
clarified types and operations: potentially more efficient add_prods;
 wenzelm parents: 
67532diff
changeset | 118 | lambdas: nts, | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 119 | prods: nt_gram Vector.vector}; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 120 | (*"tags" is used to map NT names (i.e. strings) to tags; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 121 | chain productions are not stored as normal productions | 
| 67517 
add9a9f6a290
explicit graph for chains, which contains all nts as nodes;
 wenzelm parents: 
67516diff
changeset | 122 | but instead as an entry in "chains": from -> to; | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 123 | lambda productions are stored as normal productions | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 124 | and also as an entry in "lambdas"*) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 125 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 126 | (*productions for which no starting token is | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 127 | known yet are associated with this token*) | 
| 38713 | 128 | val unknown_start = Lexicon.eof; | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 129 | |
| 67539 | 130 | fun get_start tks = | 
| 77823 | 131 | (case Tokens.min tks of | 
| 77818 | 132 | SOME tk => tk | 
| 67539 | 133 | | NONE => unknown_start); | 
| 67516 | 134 | |
| 80970 | 135 | fun add_production array_prods (lhs, new_prod as (rhs, _, _) : prod) (chains, lambdas) = | 
| 67543 | 136 | let | 
| 80970 | 137 | val chain = dest_chain_prod new_prod; | 
| 138 | val (new_chain, chains') = | |
| 139 | (case chain of | |
| 140 | SOME from => | |
| 67543 | 141 | if chains_member chains (from, lhs) | 
| 80970 | 142 | then (false, chains) | 
| 143 | else (true, chains_insert (from, lhs) chains) | |
| 144 | | NONE => (false, chains |> chains_declare lhs |> fold chains_declares rhs)); | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 145 | |
| 67543 | 146 | (*propagate new chain in lookahead and lambdas; | 
| 147 | added_starts is used later to associate existing | |
| 148 | productions with new starting tokens*) | |
| 149 | val (added_starts, lambdas') = | |
| 150 | if not new_chain then ([], lambdas) | |
| 151 | else | |
| 152 | let (*lookahead of chain's source*) | |
| 78007 | 153 | val ((_, from_tks), _) = Array.nth array_prods (the chain); | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 154 | |
| 67543 | 155 | (*copy from's lookahead to chain's destinations*) | 
| 156 | fun copy_lookahead to = | |
| 157 | let | |
| 78007 | 158 | val ((to_nts, to_tks), ps) = Array.nth array_prods to; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 159 | |
| 77823 | 160 | val new_tks = Tokens.subtract to_tks from_tks; (*added lookahead tokens*) | 
| 161 | val to_tks' = Tokens.merge (to_tks, new_tks); | |
| 78007 | 162 | val _ = Array.upd array_prods to ((to_nts, to_tks'), ps); | 
| 67543 | 163 | in tokens_add (to, new_tks) end; | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 164 | |
| 67543 | 165 | val tos = chains_all_succs chains' [lhs]; | 
| 166 | in | |
| 167 | (fold copy_lookahead tos [], | |
| 168 | lambdas |> nts_member lambdas lhs ? fold nts_insert tos) | |
| 169 | end; | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 170 | |
| 67543 | 171 | (*test if new production can produce lambda | 
| 172 | (rhs must either be empty or only consist of lambda NTs)*) | |
| 80969 
667f5072ed2d
pro-forma support for markup blocks, without any change of result yet;
 wenzelm parents: 
80968diff
changeset | 173 | fun lambda_symb (Nonterminal (id, _)) = nts_member lambdas' id | 
| 
667f5072ed2d
pro-forma support for markup blocks, without any change of result yet;
 wenzelm parents: 
80968diff
changeset | 174 | | lambda_symb (Terminal _) = false | 
| 80999 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 175 | | lambda_symb _ = true; | 
| 67543 | 176 | val new_lambdas = | 
| 80969 
667f5072ed2d
pro-forma support for markup blocks, without any change of result yet;
 wenzelm parents: 
80968diff
changeset | 177 | if forall lambda_symb rhs | 
| 67543 | 178 | then SOME (filter_out (nts_member lambdas') (chains_all_succs chains' [lhs])) | 
| 179 | else NONE; | |
| 180 | val lambdas'' = fold nts_insert (these new_lambdas) lambdas'; | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 181 | |
| 67543 | 182 | (*list optional terminal and all nonterminals on which the lookahead | 
| 183 | of a production depends*) | |
| 184 | fun lookahead_dependency _ [] nts = (NONE, nts) | |
| 185 | | lookahead_dependency _ (Terminal tk :: _) nts = (SOME tk, nts) | |
| 186 | | lookahead_dependency lambdas (Nonterminal (nt, _) :: symbs) nts = | |
| 187 | if nts_member lambdas nt then | |
| 188 | lookahead_dependency lambdas symbs (nts_insert nt nts) | |
| 80969 
667f5072ed2d
pro-forma support for markup blocks, without any change of result yet;
 wenzelm parents: 
80968diff
changeset | 189 | else (NONE, nts_insert nt nts) | 
| 80999 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 190 | | lookahead_dependency lambdas (_ :: symbs) nts = lookahead_dependency lambdas symbs nts; | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 191 | |
| 67543 | 192 | (*get all known starting tokens for a nonterminal*) | 
| 78007 | 193 | fun starts_for_nt nt = snd (fst (Array.nth array_prods nt)); | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 194 | |
| 67543 | 195 | (*update prods, lookaheads, and lambdas according to new lambda NTs*) | 
| 196 | val (added_starts', lambdas') = | |
| 197 | let | |
| 198 | (*propagate added lambda NT*) | |
| 199 | fun propagate_lambda [] added_starts lambdas = (added_starts, lambdas) | |
| 200 | | propagate_lambda (l :: ls) added_starts lambdas = | |
| 201 | let | |
| 202 | (*get lookahead for lambda NT*) | |
| 78007 | 203 | val ((dependent, l_starts), _) = Array.nth array_prods l; | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 204 | |
| 67543 | 205 | (*check productions whose lookahead may depend on lambda NT*) | 
| 206 | fun examine_prods [] add_lambda nt_dependencies added_tks nt_prods = | |
| 207 | (add_lambda, nt_dependencies, added_tks, nt_prods) | |
| 208 | | examine_prods ((p as (rhs, _, _)) :: ps) add_lambda | |
| 209 | nt_dependencies added_tks nt_prods = | |
| 210 | let val (tk, nts) = lookahead_dependency lambdas rhs nts_empty in | |
| 211 | if nts_member nts l then (*update production's lookahead*) | |
| 212 | let | |
| 213 | val new_lambda = | |
| 214 | is_none tk andalso nts_subset (nts, lambdas); | |
| 18 | 215 | |
| 67543 | 216 | val new_tks = | 
| 77823 | 217 | Tokens.empty | 
| 218 | |> fold Tokens.insert (the_list tk) | |
| 219 | |> nts_fold (curry Tokens.merge o starts_for_nt) nts | |
| 220 | |> Tokens.subtract l_starts; | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 221 | |
| 77823 | 222 | val added_tks' = Tokens.merge (added_tks, new_tks); | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 223 | |
| 67543 | 224 | val nt_dependencies' = nts_merge (nt_dependencies, nts); | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 225 | |
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 226 | (*associate production with new starting tokens*) | 
| 67539 | 227 | fun copy tk nt_prods = | 
| 67543 | 228 | prods_update (tk, p :: prods_lookup nt_prods tk) nt_prods; | 
| 229 | ||
| 230 | val nt_prods' = nt_prods | |
| 77823 | 231 | |> Tokens.fold copy new_tks | 
| 67550 | 232 | |> new_lambda ? copy Lexicon.dummy; | 
| 67543 | 233 | in | 
| 234 | examine_prods ps (add_lambda orelse new_lambda) | |
| 235 | nt_dependencies' added_tks' nt_prods' | |
| 236 | end | |
| 237 | else (*skip production*) | |
| 238 | examine_prods ps add_lambda nt_dependencies added_tks nt_prods | |
| 239 | end; | |
| 240 | ||
| 241 | (*check each NT whose lookahead depends on new lambda NT*) | |
| 242 | fun process_nts nt (added_lambdas, added_starts) = | |
| 243 | let | |
| 78007 | 244 | val ((old_nts, old_tks), nt_prods) = Array.nth array_prods nt; | 
| 67543 | 245 | |
| 246 | (*existing productions whose lookahead may depend on l*) | |
| 247 | val tk_prods = prods_lookup nt_prods (get_start l_starts); | |
| 248 | ||
| 249 | (*add_lambda is true if an existing production of the nt | |
| 250 | produces lambda due to the new lambda NT l*) | |
| 251 | val (add_lambda, nt_dependencies, added_tks, nt_prods') = | |
| 77823 | 252 | examine_prods tk_prods false nts_empty Tokens.empty nt_prods; | 
| 67543 | 253 | |
| 254 | val new_nts = nts_merge (old_nts, nt_dependencies); | |
| 77823 | 255 | val new_tks = Tokens.merge (old_tks, added_tks); | 
| 67543 | 256 | |
| 257 | val added_lambdas' = added_lambdas |> add_lambda ? cons nt; | |
| 78007 | 258 | val _ = Array.upd array_prods nt ((new_nts, new_tks), nt_prods'); | 
| 67543 | 259 | (*N.B. that because the tks component | 
| 260 | is used to access existing | |
| 261 | productions we have to add new | |
| 262 | tokens at the _end_ of the list*) | |
| 263 | val added_starts' = tokens_add (nt, added_tks) added_starts; | |
| 264 | in (added_lambdas', added_starts') end; | |
| 377 
ab8917806779
lookaheads are now computed faster (during the grammar is built)
 clasohm parents: 
373diff
changeset | 265 | |
| 67543 | 266 | val (added_lambdas, added_starts') = | 
| 267 | nts_fold process_nts dependent ([], added_starts); | |
| 268 | val added_lambdas' = filter_out (nts_member lambdas) added_lambdas; | |
| 269 | in | |
| 270 | propagate_lambda (ls @ added_lambdas') added_starts' | |
| 271 | (fold nts_insert added_lambdas' lambdas) | |
| 272 | end; | |
| 273 | in | |
| 274 | propagate_lambda | |
| 275 | (nts_fold (fn l => not (nts_member lambdas l) ? cons l) lambdas'' []) | |
| 276 | added_starts lambdas'' | |
| 277 | end; | |
| 278 | ||
| 279 | (*insert production into grammar*) | |
| 280 | val added_starts' = | |
| 281 | if is_some chain then added_starts' (*don't store chain production*) | |
| 282 | else | |
| 283 | let | |
| 284 | (*lookahead tokens of new production and on which | |
| 285 | NTs lookahead depends*) | |
| 286 | val (start_tk, start_nts) = lookahead_dependency lambdas' rhs nts_empty; | |
| 287 | ||
| 288 | val start_tks = | |
| 77823 | 289 | Tokens.empty | 
| 290 | |> fold Tokens.insert (the_list start_tk) | |
| 291 | |> nts_fold (curry Tokens.merge o starts_for_nt) start_nts; | |
| 67543 | 292 | |
| 293 | val start_tks' = | |
| 294 | start_tks | |
| 77823 | 295 | |> (if is_some new_lambdas then Tokens.insert Lexicon.dummy | 
| 296 | else if Tokens.is_empty start_tks then Tokens.insert unknown_start | |
| 67543 | 297 | else I); | 
| 298 | ||
| 299 | (*add lhs NT to list of dependent NTs in lookahead*) | |
| 300 | fun add_nts nt initial = | |
| 301 | (if initial then | |
| 78007 | 302 | let val ((old_nts, old_tks), ps) = Array.nth array_prods nt in | 
| 67543 | 303 | if nts_member old_nts lhs then () | 
| 78007 | 304 | else Array.upd array_prods nt ((nts_insert lhs old_nts, old_tks), ps) | 
| 67543 | 305 | end | 
| 306 | else (); false); | |
| 307 | ||
| 308 | (*add new start tokens to chained NTs' lookahead list; | |
| 309 | also store new production for lhs NT*) | |
| 310 | fun add_tks [] added = added | |
| 311 | | add_tks (nt :: nts) added = | |
| 312 | let | |
| 78007 | 313 | val ((old_nts, old_tks), nt_prods) = Array.nth array_prods nt; | 
| 67543 | 314 | |
| 77823 | 315 | val new_tks = Tokens.subtract old_tks start_tks; | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 316 | |
| 67543 | 317 | (*store new production*) | 
| 318 | fun store tk (prods, _) = | |
| 319 | let | |
| 320 | val tk_prods = prods_lookup prods tk; | |
| 321 | val tk_prods' = new_prod :: tk_prods; | |
| 322 | val prods' = prods_update (tk, tk_prods') prods; | |
| 323 | in (prods', true) end; | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 324 | |
| 67543 | 325 | val (nt_prods', changed) = (nt_prods, false) | 
| 77823 | 326 | |> nt = lhs ? Tokens.fold store start_tks'; | 
| 67543 | 327 | val _ = | 
| 77823 | 328 | if not changed andalso Tokens.is_empty new_tks then () | 
| 78007 | 329 | else Array.upd array_prods nt ((old_nts, Tokens.merge (old_tks, new_tks)), nt_prods'); | 
| 67543 | 330 | in add_tks nts (tokens_add (nt, new_tks) added) end; | 
| 331 | val _ = nts_fold add_nts start_nts true; | |
| 332 | in add_tks (chains_all_succs chains' [lhs]) [] end; | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 333 | |
| 67543 | 334 | (*associate productions with new lookaheads*) | 
| 335 | val _ = | |
| 336 | let | |
| 337 | (*propagate added start tokens*) | |
| 338 | fun add_starts [] = () | |
| 339 | | add_starts ((changed_nt, new_tks) :: starts) = | |
| 340 | let | |
| 341 | (*token under which old productions which | |
| 342 | depend on changed_nt could be stored*) | |
| 343 | val key = | |
| 77823 | 344 | tokens_find (not o Tokens.member new_tks) (starts_for_nt changed_nt) | 
| 67543 | 345 | |> the_default unknown_start; | 
| 346 | ||
| 347 | (*copy productions whose lookahead depends on changed_nt; | |
| 348 | if key = SOME unknown_start then tk_prods is used to hold | |
| 349 | the productions not copied*) | |
| 350 | fun update_prods [] result = result | |
| 351 | | update_prods ((p as (rhs, _: string, _: nt)) :: ps) | |
| 352 | (tk_prods, nt_prods) = | |
| 353 | let | |
| 354 | (*lookahead dependency for production*) | |
| 355 | val (tk, depends) = lookahead_dependency lambdas' rhs nts_empty; | |
| 356 | ||
| 357 | (*test if this production has to be copied*) | |
| 358 | val update = nts_member depends changed_nt; | |
| 18 | 359 | |
| 67543 | 360 | (*test if production could already be associated with | 
| 361 | a member of new_tks*) | |
| 362 | val lambda = | |
| 363 | not (nts_is_unique depends) orelse | |
| 364 | not (nts_is_empty depends) andalso is_some tk | |
| 77823 | 365 | andalso Tokens.member new_tks (the tk); | 
| 67543 | 366 | |
| 367 | (*associate production with new starting tokens*) | |
| 368 | fun copy tk nt_prods = | |
| 369 | let | |
| 370 | val tk_prods = prods_lookup nt_prods tk; | |
| 371 | val tk_prods' = | |
| 372 | if not lambda then p :: tk_prods | |
| 373 | else insert (op =) p tk_prods; | |
| 374 | (*if production depends on lambda NT we | |
| 375 | have to look for duplicates*) | |
| 376 | in prods_update (tk, tk_prods') nt_prods end; | |
| 377 | val result = | |
| 77823 | 378 | if update then (tk_prods, Tokens.fold copy new_tks nt_prods) | 
| 67543 | 379 | else if key = unknown_start then (p :: tk_prods, nt_prods) | 
| 380 | else (tk_prods, nt_prods); | |
| 381 | in update_prods ps result end; | |
| 18 | 382 | |
| 67543 | 383 | (*copy existing productions for new starting tokens*) | 
| 384 | fun process_nts nt = | |
| 385 | let | |
| 78007 | 386 | val ((nts, tks), nt_prods) = Array.nth array_prods nt; | 
| 67543 | 387 | |
| 388 | val tk_prods = prods_lookup nt_prods key; | |
| 389 | ||
| 390 | (*associate productions with new lookahead tokens*) | |
| 391 | val (tk_prods', nt_prods') = update_prods tk_prods ([], nt_prods); | |
| 392 | ||
| 393 | val nt_prods'' = | |
| 394 | if key = unknown_start then | |
| 395 | prods_update (key, tk_prods') nt_prods' | |
| 396 | else nt_prods'; | |
| 397 | ||
| 77823 | 398 | val added_tks = Tokens.subtract tks new_tks; | 
| 399 | val tks' = Tokens.merge (tks, added_tks); | |
| 78007 | 400 | val _ = Array.upd array_prods nt ((nts, tks'), nt_prods''); | 
| 67543 | 401 | in tokens_add (nt, added_tks) end; | 
| 402 | ||
| 78007 | 403 | val ((dependent, _), _) = Array.nth array_prods changed_nt; | 
| 67543 | 404 | in add_starts (starts @ nts_fold process_nts dependent []) end; | 
| 405 | in add_starts added_starts' end; | |
| 406 | in (chains', lambdas') end; | |
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 407 | |
| 18 | 408 | |
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 409 | (* pretty_gram *) | 
| 18 | 410 | |
| 81000 | 411 | fun pretty_gram (Gram {tags, names, prods, chains, ...}) =
 | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 412 | let | 
| 81000 | 413 | val print_nt = the_name names; | 
| 67518 | 414 |     fun print_pri p = if p < 0 then "" else Symbol.make_sup ("(" ^ string_of_int p ^ ")");
 | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 415 | |
| 67552 | 416 | fun pretty_symb (Terminal tok) = | 
| 417 | if Lexicon.is_literal tok | |
| 80969 
667f5072ed2d
pro-forma support for markup blocks, without any change of result yet;
 wenzelm parents: 
80968diff
changeset | 418 | then [Pretty.quote (Pretty.keyword1 (Lexicon.str_of_token tok))] | 
| 
667f5072ed2d
pro-forma support for markup blocks, without any change of result yet;
 wenzelm parents: 
80968diff
changeset | 419 | else [Pretty.str (Lexicon.str_of_token tok)] | 
| 
667f5072ed2d
pro-forma support for markup blocks, without any change of result yet;
 wenzelm parents: 
80968diff
changeset | 420 | | pretty_symb (Nonterminal (tag, p)) = [Pretty.str (print_nt tag ^ print_pri p)] | 
| 80999 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 421 | | pretty_symb _ = []; | 
| 18 | 422 | |
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 423 | fun pretty_const "" = [] | 
| 67513 | 424 |       | pretty_const c = [Pretty.str ("\<^bold>\<Rightarrow> " ^ quote c)];
 | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 425 | |
| 67513 | 426 | fun pretty_prod (name, tag) = | 
| 80970 | 427 | (prods_content (#2 (Vector.nth prods tag)) @ map make_chain_prod (chains_preds chains tag)) | 
| 67513 | 428 | |> map (fn (symbs, const, p) => | 
| 429 | Pretty.block (Pretty.breaks | |
| 80969 
667f5072ed2d
pro-forma support for markup blocks, without any change of result yet;
 wenzelm parents: 
80968diff
changeset | 430 | (Pretty.str (name ^ print_pri p ^ " =") :: maps pretty_symb symbs @ pretty_const const))); | 
| 67531 | 431 | in maps pretty_prod (tags_content tags) end; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 432 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 433 | |
| 42217 | 434 | |
| 67545 | 435 | (** operations on grammars **) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 436 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 437 | val empty_gram = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 438 | Gram | 
| 80959 
4749c9b0ba73
eliminated redundant nt_count: rely on Symtab.size;
 wenzelm parents: 
80958diff
changeset | 439 |    {tags = tags_empty,
 | 
| 81000 | 440 | names = names_empty, | 
| 67531 | 441 | chains = chains_empty, | 
| 67533 
f253e5eaf995
clarified types and operations: potentially more efficient add_prods;
 wenzelm parents: 
67532diff
changeset | 442 | lambdas = nts_empty, | 
| 
f253e5eaf995
clarified types and operations: potentially more efficient add_prods;
 wenzelm parents: 
67532diff
changeset | 443 | prods = Vector.fromList [nt_gram_empty]}; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 444 | |
| 80960 | 445 | local | 
| 446 | ||
| 447 | fun make_tag s tags = | |
| 448 | (case tags_lookup tags s of | |
| 449 | SOME tag => (tag, tags) | |
| 450 | | NONE => | |
| 451 | let val tag = tags_size tags | |
| 452 | in (tag, tags_insert (s, tag) tags) end); | |
| 453 | ||
| 454 | fun make_arg (s, p) tags = | |
| 455 | (case Lexicon.get_terminal s of | |
| 456 | NONE => | |
| 457 | let val (tag, tags') = make_tag s tags; | |
| 458 | in (Nonterminal (tag, p), tags') end | |
| 459 | | SOME tok => (Terminal tok, tags)); | |
| 460 | ||
| 80959 
4749c9b0ba73
eliminated redundant nt_count: rely on Symtab.size;
 wenzelm parents: 
80958diff
changeset | 461 | fun extend_gram xprods gram = | 
| 78009 
f906f7f83dae
performance tuning: cached non-persistent Parser.gram reduces heap size by approx. 1-4%;
 wenzelm parents: 
78007diff
changeset | 462 | let | 
| 80999 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 463 | fun make_symb (Syntax_Ext.Delim s) (syms, tags) = | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 464 | (Terminal (Lexicon.literal s) :: syms, tags) | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 465 | | make_symb (Syntax_Ext.Argument a) (syms, tags) = | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 466 | let val (arg, tags') = make_arg a tags | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 467 | in (arg :: syms, tags') end | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 468 |       | make_symb (Syntax_Ext.Bg {markup, ...}) (syms, tags) = (Bg markup :: syms, tags)
 | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 469 | | make_symb (Syntax_Ext.En) (syms, tags) = (En :: syms, tags) | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 470 | | make_symb _ res = res; | 
| 1438 | 471 | |
| 80960 | 472 | fun make_prod (Syntax_Ext.XProd (lhs, xsymbs, const, pri)) (result, tags) = | 
| 473 | let | |
| 474 | val (tag, tags') = make_tag lhs tags; | |
| 80999 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 475 | val (rev_symbs, tags'') = fold make_symb xsymbs ([], tags'); | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 476 | in ((tag, (rev rev_symbs, const, pri)) :: result, tags'') end; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 477 | |
| 80959 
4749c9b0ba73
eliminated redundant nt_count: rely on Symtab.size;
 wenzelm parents: 
80958diff
changeset | 478 | |
| 81000 | 479 |     val Gram {tags, names = _, chains, lambdas, prods} = gram;
 | 
| 80959 
4749c9b0ba73
eliminated redundant nt_count: rely on Symtab.size;
 wenzelm parents: 
80958diff
changeset | 480 | |
| 80960 | 481 | val (new_prods, tags') = fold make_prod xprods ([], tags); | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 482 | |
| 78009 
f906f7f83dae
performance tuning: cached non-persistent Parser.gram reduces heap size by approx. 1-4%;
 wenzelm parents: 
78007diff
changeset | 483 | val array_prods' = | 
| 80959 
4749c9b0ba73
eliminated redundant nt_count: rely on Symtab.size;
 wenzelm parents: 
80958diff
changeset | 484 | Array.tabulate (tags_size tags', fn i => | 
| 
4749c9b0ba73
eliminated redundant nt_count: rely on Symtab.size;
 wenzelm parents: 
80958diff
changeset | 485 | if i < Vector.length prods then Vector.nth prods i | 
| 78009 
f906f7f83dae
performance tuning: cached non-persistent Parser.gram reduces heap size by approx. 1-4%;
 wenzelm parents: 
78007diff
changeset | 486 | else nt_gram_empty); | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 487 | |
| 78009 
f906f7f83dae
performance tuning: cached non-persistent Parser.gram reduces heap size by approx. 1-4%;
 wenzelm parents: 
78007diff
changeset | 488 | val (chains', lambdas') = | 
| 80957 | 489 | (chains, lambdas) |> fold (add_production array_prods') new_prods; | 
| 78009 
f906f7f83dae
performance tuning: cached non-persistent Parser.gram reduces heap size by approx. 1-4%;
 wenzelm parents: 
78007diff
changeset | 490 | in | 
| 
f906f7f83dae
performance tuning: cached non-persistent Parser.gram reduces heap size by approx. 1-4%;
 wenzelm parents: 
78007diff
changeset | 491 | Gram | 
| 80959 
4749c9b0ba73
eliminated redundant nt_count: rely on Symtab.size;
 wenzelm parents: 
80958diff
changeset | 492 |      {tags = tags',
 | 
| 81000 | 493 | names = make_names tags', | 
| 78009 
f906f7f83dae
performance tuning: cached non-persistent Parser.gram reduces heap size by approx. 1-4%;
 wenzelm parents: 
78007diff
changeset | 494 | chains = chains', | 
| 
f906f7f83dae
performance tuning: cached non-persistent Parser.gram reduces heap size by approx. 1-4%;
 wenzelm parents: 
78007diff
changeset | 495 | lambdas = lambdas', | 
| 
f906f7f83dae
performance tuning: cached non-persistent Parser.gram reduces heap size by approx. 1-4%;
 wenzelm parents: 
78007diff
changeset | 496 | prods = Array.vector array_prods'} | 
| 
f906f7f83dae
performance tuning: cached non-persistent Parser.gram reduces heap size by approx. 1-4%;
 wenzelm parents: 
78007diff
changeset | 497 | end; | 
| 18 | 498 | |
| 80960 | 499 | in | 
| 500 | ||
| 78009 
f906f7f83dae
performance tuning: cached non-persistent Parser.gram reduces heap size by approx. 1-4%;
 wenzelm parents: 
78007diff
changeset | 501 | fun make_gram [] _ (SOME gram) = gram | 
| 
f906f7f83dae
performance tuning: cached non-persistent Parser.gram reduces heap size by approx. 1-4%;
 wenzelm parents: 
78007diff
changeset | 502 | | make_gram new_xprods _ (SOME gram) = extend_gram new_xprods gram | 
| 
f906f7f83dae
performance tuning: cached non-persistent Parser.gram reduces heap size by approx. 1-4%;
 wenzelm parents: 
78007diff
changeset | 503 | | make_gram [] [] NONE = empty_gram | 
| 
f906f7f83dae
performance tuning: cached non-persistent Parser.gram reduces heap size by approx. 1-4%;
 wenzelm parents: 
78007diff
changeset | 504 | | make_gram new_xprods old_xprods NONE = extend_gram (new_xprods @ old_xprods) empty_gram; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 505 | |
| 80960 | 506 | end; | 
| 507 | ||
| 18 | 508 | |
| 42217 | 509 | |
| 42374 
b9a6b465da25
clarified pretty_parsetree: suppress literal tokens;
 wenzelm parents: 
42288diff
changeset | 510 | (** parser **) | 
| 
b9a6b465da25
clarified pretty_parsetree: suppress literal tokens;
 wenzelm parents: 
42288diff
changeset | 511 | |
| 80992 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 512 | (* parse tree *) | 
| 18 | 513 | |
| 80991 | 514 | datatype tree = | 
| 81007 | 515 |   Markup of {markup: Markup.T list, range: Position.range} * tree list |
 | 
| 516 |   Node of {nonterm: string, const: string, range: Position.range} * tree list |
 | |
| 37683 | 517 | Tip of Lexicon.token; | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 518 | |
| 81007 | 519 | fun tree_range (Markup ({range, ...}, _)) = range
 | 
| 520 |   | tree_range (Node ({range, ...}, _)) = range
 | |
| 521 | | tree_range (Tip tok) = Lexicon.range_of_token tok; | |
| 522 | ||
| 523 | fun trees_range trees = | |
| 524 | let | |
| 525 | fun first_pos [] = NONE | |
| 526 | | first_pos (t :: rest) = | |
| 527 | let val pos = #1 (tree_range t) | |
| 528 | in if pos = Position.none then first_pos rest else SOME pos end; | |
| 529 | ||
| 530 | fun last_pos [] res = res | |
| 531 | | last_pos (t :: rest) res = | |
| 532 | let | |
| 533 | val end_pos = #2 (tree_range t); | |
| 534 | val res' = if end_pos = Position.none then res else SOME end_pos; | |
| 535 | in last_pos rest res' end; | |
| 536 | in | |
| 537 | (case first_pos trees of | |
| 538 | NONE => Position.no_range | |
| 539 | | SOME pos => | |
| 540 | (case last_pos trees NONE of | |
| 541 | NONE => Position.no_range | |
| 542 | | SOME end_pos => Position.range (pos, end_pos))) | |
| 543 | end; | |
| 544 | ||
| 81015 | 545 | fun pretty_tree (Markup (_, ts)) = maps pretty_tree ts | 
| 546 |   | pretty_tree (Node ({const = c, ...}, ts)) =
 | |
| 547 |       [Pretty.enclose "(" ")" (Pretty.breaks (Pretty.quote (Pretty.str c) :: maps pretty_tree ts))]
 | |
| 548 | | pretty_tree (Tip tok) = | |
| 549 | if Lexicon.valued_token tok then [Pretty.str (Lexicon.str_of_token tok)] else []; | |
| 550 | ||
| 551 | ||
| 552 | (* tree_op: bottom-up construction of markup *) | |
| 553 | ||
| 80999 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 554 | datatype tree_op = | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 555 | Markup_Push | | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 556 | Markup_Pop of Markup.T list | | 
| 81001 
0c6a600c8939
more detailed parse tree: retain nonterminal context as well;
 wenzelm parents: 
81000diff
changeset | 557 |   Node_Op of {nonterm: int, const: string} * tree_op list |
 | 
| 80999 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 558 | Tip_Op of Lexicon.token; | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 559 | |
| 80978 
5e2b1588c5cb
minor performance tuning: proper table for parsetree list;
 wenzelm parents: 
80977diff
changeset | 560 | local | 
| 80999 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 561 | fun drop_markup (Markup_Push :: ts) = drop_markup ts | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 562 | | drop_markup (Markup_Pop _ :: ts) = drop_markup ts | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 563 | | drop_markup ts = ts; | 
| 80978 
5e2b1588c5cb
minor performance tuning: proper table for parsetree list;
 wenzelm parents: 
80977diff
changeset | 564 | in | 
| 80999 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 565 | fun tree_ops_ord arg = | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 566 | if pointer_eq arg then EQUAL | 
| 80993 | 567 | else | 
| 80999 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 568 | (case apply2 drop_markup arg of | 
| 81002 
1f5462055655
clarified comparison: prefer authentic nonterminal context, instead of somewhat accidental "const";
 wenzelm parents: 
81001diff
changeset | 569 |         (Node_Op ({nonterm = nt, ...}, ts) :: rest, Node_Op ({nonterm = nt', ...}, ts') :: rest') =>
 | 
| 
1f5462055655
clarified comparison: prefer authentic nonterminal context, instead of somewhat accidental "const";
 wenzelm parents: 
81001diff
changeset | 570 | (case int_ord (nt, nt') of | 
| 80999 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 571 | EQUAL => | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 572 | (case tree_ops_ord (ts, ts') of | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 573 | EQUAL => tree_ops_ord (rest, rest') | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 574 | | ord => ord) | 
| 80993 | 575 | | ord => ord) | 
| 80999 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 576 | | (Tip_Op t :: rest, Tip_Op t' :: rest') => | 
| 81017 
bc5eb7841b74
clarify comparison of output: ignore token positions, which are somewhat accidental;
 wenzelm parents: 
81015diff
changeset | 577 | (case Lexicon.token_content_ord (t, t') of | 
| 80999 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 578 | EQUAL => tree_ops_ord (rest, rest') | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 579 | | ord => ord) | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 580 | | (Node_Op _ :: _, Tip_Op _ :: _) => LESS | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 581 | | (Tip_Op _ :: _, Node_Op _ :: _) => GREATER | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 582 | | ([], []) => EQUAL | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 583 | | ([], _ :: _) => LESS | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 584 | | (_ :: _, []) => GREATER | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 585 | | _ => raise Match); | 
| 80978 
5e2b1588c5cb
minor performance tuning: proper table for parsetree list;
 wenzelm parents: 
80977diff
changeset | 586 | end; | 
| 
5e2b1588c5cb
minor performance tuning: proper table for parsetree list;
 wenzelm parents: 
80977diff
changeset | 587 | |
| 81013 | 588 | fun make_tree names root tree_ops = | 
| 80999 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 589 | let | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 590 | fun top [] = [] | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 591 | | top (xs :: _) = xs; | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 592 | |
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 593 | fun pop [] = [] | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 594 | | pop (_ :: xs) = xs; | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 595 | |
| 81007 | 596 | fun make_markup markup ts = | 
| 597 |       Markup ({markup = markup, range = trees_range ts}, ts);
 | |
| 81004 
07ad0b407d38
more accurate parse tree: retain all tokens (and thus positions);
 wenzelm parents: 
81002diff
changeset | 598 | |
| 81007 | 599 |     fun make_node {nonterm = nt, const} ts =
 | 
| 600 |       Node ({nonterm = the_name names nt, const = const, range = trees_range ts}, ts);
 | |
| 81001 
0c6a600c8939
more detailed parse tree: retain nonterminal context as well;
 wenzelm parents: 
81000diff
changeset | 601 | |
| 80999 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 602 | fun make body stack ops = | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 603 | (case ops of | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 604 | Markup_Push :: rest => make [] (body :: stack) rest | 
| 81007 | 605 | | Markup_Pop markup :: rest => make (make_markup markup body :: top stack) (pop stack) rest | 
| 606 | | Node_Op (name, ts) :: rest => make (make_node name (make [] [] ts) :: body) stack rest | |
| 80999 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 607 | | Tip_Op tok :: rest => make (Tip tok :: body) stack rest | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 608 | | [] => if null stack then body else raise Fail "Pending markup blocks"); | 
| 81014 | 609 |   in make_node {nonterm = root, const = ""} (make [] [] tree_ops) end;
 | 
| 80999 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 610 | |
| 42374 
b9a6b465da25
clarified pretty_parsetree: suppress literal tokens;
 wenzelm parents: 
42288diff
changeset | 611 | |
| 80992 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 612 | (* output *) | 
| 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 613 | |
| 80999 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 614 | abstype output = Output of int * tree_op list | 
| 80992 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 615 | with | 
| 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 616 | |
| 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 617 | val empty_output = Output (0, []); | 
| 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 618 | |
| 80999 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 619 | fun token_output tok (Output (n, ts)) = Output (n + 1, Tip_Op tok :: ts); | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 620 | fun node_output id (Output (n, ts)) = Output (n, [Node_Op (id, ts)]); | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 621 | |
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 622 | fun push_output (Output (n, ts)) = Output (n, Markup_Push :: ts); | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 623 | fun pop_output markup (Output (n, ts)) = Output (n, Markup_Pop markup :: ts); | 
| 80992 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 624 | |
| 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 625 | fun append_output (Output (m, ss)) (Output (n, ts)) = Output (m + n, ss @ ts); | 
| 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 626 | |
| 81015 | 627 | val output_ord = | 
| 628 | pointer_eq_ord (fn (Output (m, ss), Output (n, tt)) => | |
| 629 | (case int_ord (m, n) of EQUAL => tree_ops_ord (ss, tt) | ord => ord)); | |
| 80992 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 630 | |
| 81013 | 631 | fun output_tree names root (Output (_, ts)) = make_tree names root ts; | 
| 80992 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 632 | |
| 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 633 | end; | 
| 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 634 | |
| 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 635 | structure Output = Table(type key = output val ord = output_ord); | 
| 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 636 | |
| 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 637 | |
| 81015 | 638 | (* parse *) | 
| 42205 
22f5cc06c419
direct pretty printing of parsetrees -- prevent diagnostic output from crashing due to undeclared entities;
 wenzelm parents: 
41378diff
changeset | 639 | |
| 18 | 640 | type state = | 
| 80930 | 641 | (nt * int * (*identification and production precedence*) | 
| 642 | string * (*name of production*) | |
| 643 | int) * (*index for previous state list*) | |
| 80992 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 644 | symb list * (*remaining input -- after "dot"*) | 
| 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 645 | output; (*accepted output -- before "dot"*) | 
| 18 | 646 | |
| 80976 | 647 | local | 
| 648 | ||
| 80992 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 649 | fun update_output (A, (out, p)) used = | 
| 80978 
5e2b1588c5cb
minor performance tuning: proper table for parsetree list;
 wenzelm parents: 
80977diff
changeset | 650 | let | 
| 80992 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 651 | val (i: int, output) = the (Inttab.lookup used A); | 
| 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 652 | fun update output' = Inttab.update (A, (i, output')); | 
| 80978 
5e2b1588c5cb
minor performance tuning: proper table for parsetree list;
 wenzelm parents: 
80977diff
changeset | 653 | in | 
| 80992 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 654 | (case Output.lookup output out of | 
| 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 655 | NONE => (no_prec, update (Output.make [(out, p)]) used) | 
| 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 656 | | SOME q => (q, if p > q then update (Output.update (out, p) output) used else used)) | 
| 80978 
5e2b1588c5cb
minor performance tuning: proper table for parsetree list;
 wenzelm parents: 
80977diff
changeset | 657 | end; | 
| 18 | 658 | |
| 80992 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 659 | val init_prec = (no_prec, Output.empty); | 
| 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 660 | |
| 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 661 | fun update_prec (A, p) used = | 
| 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 662 | Inttab.map_default (A, init_prec) (fn (_, output) => (p, output)) used; | 
| 18 | 663 | |
| 81223 | 664 | fun movedot_states out' A min max = build o fold | 
| 81119 | 665 | (fn ((info, Nonterminal (B, p) :: sa, out): state) => | 
| 81223 | 666 | if A = B andalso upto_prec min max p then cons (info, sa, append_output out' out) | 
| 667 | else I | |
| 668 | | _ => I); | |
| 18 | 669 | |
| 80981 | 670 | fun process_states (Gram {prods = gram_prods, chains = gram_chains, ...}) stateset i c states0 =
 | 
| 37683 | 671 | let | 
| 80988 | 672 | val get = Array.nth stateset; | 
| 673 | val set = Array.upd stateset; | |
| 674 | ||
| 80992 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 675 | fun add_prods nt min max : state list -> state list = | 
| 80947 | 676 | let | 
| 80978 
5e2b1588c5cb
minor performance tuning: proper table for parsetree list;
 wenzelm parents: 
80977diff
changeset | 677 | fun add (rhs, id, prod_prec) = | 
| 80992 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 678 | if until_prec min max prod_prec | 
| 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 679 | then cons ((nt, prod_prec, id, i), rhs, empty_output) | 
| 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 680 | else I; | 
| 80947 | 681 | fun token_prods prods = | 
| 80986 | 682 | fold add (prods_lookup prods c) #> | 
| 80972 | 683 | fold add (prods_lookup prods Lexicon.dummy); | 
| 80947 | 684 | val nt_prods = #2 o Vector.nth gram_prods; | 
| 80974 
d76230f575f2
minor performance tuning (NB: order of prods / states does not matter);
 wenzelm parents: 
80973diff
changeset | 685 | in fold (token_prods o nt_prods) (chains_all_preds gram_chains [nt]) end; | 
| 80947 | 686 | |
| 80933 | 687 | fun process _ [] (Si, Sii) = (Si, Sii) | 
| 80992 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 688 | | process used ((S as (info, symbs, out)) :: states) (Si, Sii) = | 
| 80946 | 689 | (case symbs of | 
| 80999 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 690 | Nonterminal (nt, min_prec) :: sa => | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 691 | let (*predictor operation*) | 
| 80992 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 692 | fun movedot_lambda (out', k) = | 
| 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 693 | if min_prec <= k then cons (info, sa, append_output out' out) else I; | 
| 80990 | 694 | val (used', states', used_trees) = | 
| 42226 
cb650789f2f0
use standard tables with standard argument order;
 wenzelm parents: 
42222diff
changeset | 695 | (case Inttab.lookup used nt of | 
| 80990 | 696 | SOME (used_prec, used_trees) => | 
| 697 | if used_prec <= min_prec then (used, states, used_trees) | |
| 698 | else | |
| 37683 | 699 | let | 
| 80990 | 700 | val used' = update_prec (nt, min_prec) used; | 
| 701 | val states' = states |> add_prods nt min_prec used_prec; | |
| 702 | in (used', states', used_trees) end | |
| 703 | | NONE => | |
| 704 | let | |
| 705 | val used' = update_prec (nt, min_prec) used; | |
| 706 | val states' = states |> add_prods nt min_prec no_prec; | |
| 80992 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 707 | in (used', states', Output.empty) end); | 
| 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 708 | val states'' = states' |> Output.fold movedot_lambda used_trees; | 
| 80990 | 709 | in process used' states'' (S :: Si, Sii) end | 
| 80946 | 710 | | Terminal a :: sa => (*scanner operation*) | 
| 80929 | 711 | let | 
| 712 | val Sii' = | |
| 81004 
07ad0b407d38
more accurate parse tree: retain all tokens (and thus positions);
 wenzelm parents: 
81002diff
changeset | 713 | if is_equal (Lexicon.token_type_ord (a, c)) | 
| 
07ad0b407d38
more accurate parse tree: retain all tokens (and thus positions);
 wenzelm parents: 
81002diff
changeset | 714 | then (*move dot*) (info, sa, token_output c out) :: Sii else Sii; | 
| 80981 | 715 | in process used states (S :: Si, Sii') end | 
| 80999 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 716 | | Bg markup :: sa => process used ((info, sa, pop_output markup out) :: states) (Si, Sii) | 
| 
7f9e8516ca05
clarified input and output: support markup blocks via Bg/En;
 wenzelm parents: 
80995diff
changeset | 717 | | En :: sa => process used ((info, sa, push_output out) :: states) (Si, Sii) | 
| 80946 | 718 | | [] => (*completer operation*) | 
| 80928 | 719 | let | 
| 80978 
5e2b1588c5cb
minor performance tuning: proper table for parsetree list;
 wenzelm parents: 
80977diff
changeset | 720 | val (A, p, id, j) = info; | 
| 81001 
0c6a600c8939
more detailed parse tree: retain nonterminal context as well;
 wenzelm parents: 
81000diff
changeset | 721 |                 val out' = node_output {nonterm = A, const = id} out;
 | 
| 81119 | 722 | val (used', states') = | 
| 80928 | 723 | if j = i then (*lambda production?*) | 
| 80992 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 724 | let val (q, used') = update_output (A, (out', p)) used | 
| 81119 | 725 | in (used', movedot_states out' A q p Si) end | 
| 726 | else (used, movedot_states out' A no_prec p (get j)); | |
| 80981 | 727 | in process used' (states' @ states) (S :: Si, Sii) end) | 
| 80961 | 728 | |
| 80981 | 729 | val (Si, Sii) = process Inttab.empty states0 ([], []); | 
| 80988 | 730 | in set i Si; set (i + 1) Sii end; | 
| 18 | 731 | |
| 80964 | 732 | fun produce gram stateset i prev rest = | 
| 77888 | 733 | (case Array.nth stateset i of | 
| 25986 
26f1e4c172c3
syntax error: reduced spam -- print expected nonterminals instead of terminals;
 wenzelm parents: 
24245diff
changeset | 734 | [] => | 
| 
26f1e4c172c3
syntax error: reduced spam -- print expected nonterminals instead of terminals;
 wenzelm parents: 
24245diff
changeset | 735 | let | 
| 80964 | 736 | val inp = if Lexicon.is_dummy prev then rest else prev :: rest; | 
| 737 | val pos = Position.here (Lexicon.pos_of_token prev); | |
| 27801 
0d0adaf0228d
datatype token: maintain range, tuned representation;
 wenzelm parents: 
26678diff
changeset | 738 | in | 
| 80964 | 739 | if null inp then | 
| 55624 | 740 |           error ("Inner syntax error: unexpected end of input" ^ pos)
 | 
| 741 | else | |
| 742 |           error ("Inner syntax error" ^ pos ^
 | |
| 743 | Markup.markup Markup.no_report | |
| 744 |               ("\n" ^ Pretty.string_of
 | |
| 745 | (Pretty.block [ | |
| 746 | Pretty.str "at", Pretty.brk 1, | |
| 747 | Pretty.block | |
| 748 | (Pretty.str "\"" :: | |
| 80964 | 749 | Pretty.breaks (map (Pretty.str o Lexicon.str_of_token) (#1 (split_last inp))) @ | 
| 55624 | 750 | [Pretty.str "\""])]))) | 
| 27801 
0d0adaf0228d
datatype token: maintain range, tuned representation;
 wenzelm parents: 
26678diff
changeset | 751 | end | 
| 80931 | 752 | | states => | 
| 80964 | 753 | (case rest of | 
| 80931 | 754 | [] => states | 
| 80964 | 755 | | c :: cs => (process_states gram stateset i c states; produce gram stateset (i + 1) c cs))); | 
| 18 | 756 | |
| 80933 | 757 | in | 
| 18 | 758 | |
| 81015 | 759 | fun parse (gram as Gram {tags, names, ...}) root_name toks =
 | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 760 | let | 
| 81015 | 761 | val root = | 
| 762 | (case tags_lookup tags root_name of | |
| 37683 | 763 | SOME tag => tag | 
| 81015 | 764 |       | NONE => error ("Inner syntax: bad grammar root symbol " ^ quote root_name));
 | 
| 18 | 765 | |
| 27801 
0d0adaf0228d
datatype token: maintain range, tuned representation;
 wenzelm parents: 
26678diff
changeset | 766 | val end_pos = | 
| 
0d0adaf0228d
datatype token: maintain range, tuned representation;
 wenzelm parents: 
26678diff
changeset | 767 | (case try List.last toks of | 
| 
0d0adaf0228d
datatype token: maintain range, tuned representation;
 wenzelm parents: 
26678diff
changeset | 768 | NONE => Position.none | 
| 67551 | 769 | | SOME tok => Lexicon.end_pos_of_token tok); | 
| 80933 | 770 | val input = toks @ [Lexicon.mk_eof end_pos]; | 
| 771 | ||
| 81015 | 772 | val S0: state = ((~1, 0, "", 0), [Nonterminal (root, 0), Terminal Lexicon.eof], empty_output); | 
| 80933 | 773 | val stateset = Array.array (length input + 1, []); | 
| 774 | val _ = Array.upd stateset 0 [S0]; | |
| 775 | ||
| 80992 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 776 | val result = | 
| 80964 | 777 | produce gram stateset 0 Lexicon.dummy input | 
| 81015 | 778 | |> map (output_tree names root o #3); | 
| 80992 
fc0f2053c4d2
clarified signature: more explicit type "output";
 wenzelm parents: 
80991diff
changeset | 779 | in if null result then raise Fail "Inner syntax: no parse trees" else result end; | 
| 26678 | 780 | |
| 18 | 781 | end; | 
| 80933 | 782 | |
| 783 | end; |