| author | bulwahn | 
| Thu, 02 Jun 2011 10:13:46 +0200 | |
| changeset 43149 | 9675d631df3d | 
| parent 42374 | b9a6b465da25 | 
| child 44102 | 954e9d6782ea | 
| permissions | -rw-r--r-- | 
| 18 | 1 | (* Title: Pure/Syntax/parser.ML | 
| 2186 | 2 | Author: Carsten Clasohm, Sonia Mahjoub, and Markus Wenzel, TU Muenchen | 
| 18 | 3 | |
| 15979 | 4 | General context-free parser for the inner syntax of terms, types, etc. | 
| 18 | 5 | *) | 
| 6 | ||
| 7 | signature PARSER = | |
| 15752 | 8 | sig | 
| 1507 | 9 | type gram | 
| 10 | val empty_gram: gram | |
| 42288 
2074b31650e6
discontinued special treatment of structure Syntax_Ext (formerly Syn_Ext);
 wenzelm parents: 
42282diff
changeset | 11 | val extend_gram: Syntax_Ext.xprod list -> gram -> gram | 
| 37684 | 12 | val merge_gram: gram * gram -> gram | 
| 1507 | 13 | val pretty_gram: gram -> Pretty.T list | 
| 14 | datatype parsetree = | |
| 15 | Node of string * parsetree list | | |
| 16 | Tip of Lexicon.token | |
| 42374 
b9a6b465da25
clarified pretty_parsetree: suppress literal tokens;
 wenzelm parents: 
42288diff
changeset | 17 | exception PARSETREE of parsetree | 
| 42205 
22f5cc06c419
direct pretty printing of parsetrees -- prevent diagnostic output from crashing due to undeclared entities;
 wenzelm parents: 
41378diff
changeset | 18 | val pretty_parsetree: parsetree -> Pretty.T | 
| 38831 
4933a3dfd745
more careful treatment of context visibility flag wrt. spurious warnings;
 wenzelm parents: 
38713diff
changeset | 19 | val parse: Proof.context -> gram -> string -> Lexicon.token list -> parsetree list | 
| 26678 | 20 | val guess_infix_lr: gram -> string -> (string * bool * bool * int) option | 
| 41378 | 21 | val branching_level: int Config.T | 
| 15752 | 22 | end; | 
| 1507 | 23 | |
| 15752 | 24 | structure Parser: PARSER = | 
| 18 | 25 | struct | 
| 15752 | 26 | |
| 18 | 27 | (** datatype gram **) | 
| 28 | ||
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 29 | (*production for the NTs are stored in a vector | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 30 | so we can identify NTs by their index*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 31 | type nt_tag = int; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 32 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 33 | datatype symb = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 34 | Terminal of Lexicon.token | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 35 | | Nonterminal of nt_tag * int; (*(tag, precedence)*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 36 | |
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 37 | type nt_gram = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 38 | ((nt_tag list * Lexicon.token list) * | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 39 | (Lexicon.token option * (symb list * string * int) list) list); | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 40 | (*(([dependent_nts], [start_tokens]), [(start_token, [(rhs, name, prio)])])*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 41 | (*depent_nts is a list of all NTs whose lookahead depends on this NT's lookahead*) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 42 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 43 | datatype gram = | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 44 | Gram of | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 45 |    {nt_count: int,
 | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 46 | prod_count: int, | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 47 | tags: nt_tag Symtab.table, | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 48 | chains: (nt_tag * nt_tag list) list, (*[(to, [from])]*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 49 | lambdas: nt_tag list, | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 50 | prods: nt_gram Vector.vector}; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 51 | (*"tags" is used to map NT names (i.e. strings) to tags; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 52 | chain productions are not stored as normal productions | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 53 | but instead as an entry in "chains"; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 54 | lambda productions are stored as normal productions | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 55 | and also as an entry in "lambdas"*) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 56 | |
| 42218 
8e0a4d500e5b
streamlined token list operations, assuming that the order of union does not matter;
 wenzelm parents: 
42217diff
changeset | 57 | val union_token = union Lexicon.matching_tokens; | 
| 
8e0a4d500e5b
streamlined token list operations, assuming that the order of union does not matter;
 wenzelm parents: 
42217diff
changeset | 58 | val subtract_token = subtract Lexicon.matching_tokens; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 59 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 60 | (*productions for which no starting token is | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 61 | known yet are associated with this token*) | 
| 38713 | 62 | val unknown_start = Lexicon.eof; | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 63 | |
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 64 | (*get all NTs that are connected with a list of NTs*) | 
| 23909 | 65 | fun connected_with _ ([]: nt_tag list) relatives = relatives | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 66 | | connected_with chains (root :: roots) relatives = | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 67 | let val branches = subtract (op =) relatives (these (AList.lookup (op =) chains root)); | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 68 | in connected_with chains (branches @ roots) (branches @ relatives) end; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 69 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 70 | (*convert productions to grammar; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 71 | N.B. that the chains parameter has the form [(from, [to])]; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 72 | prod_count is of type "int option" and is only updated if it is <> NONE*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 73 | fun add_prods _ chains lambdas prod_count [] = (chains, lambdas, prod_count) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 74 | | add_prods prods chains lambdas prod_count ((lhs, new_prod as (rhs, name, pri)) :: ps) = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 75 | let | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 76 | val chain_from = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 77 | (case (pri, rhs) of | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 78 | (~1, [Nonterminal (id, ~1)]) => SOME id | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 79 | | _ => NONE); | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 80 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 81 | (*store chain if it does not already exist*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 82 | val (new_chain, chains') = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 83 | (case chain_from of | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 84 | NONE => (NONE, chains) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 85 | | SOME from => | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 86 | let val old_tos = these (AList.lookup (op =) chains from) in | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 87 | if member (op =) old_tos lhs then (NONE, chains) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 88 | else (SOME from, AList.update (op =) (from, insert (op =) lhs old_tos) chains) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 89 | end); | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 90 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 91 | (*propagate new chain in lookahead and lambda lists; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 92 | added_starts is used later to associate existing | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 93 | productions with new starting tokens*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 94 | val (added_starts, lambdas') = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 95 | if is_none new_chain then ([], lambdas) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 96 | else | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 97 | let (*lookahead of chain's source*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 98 | val ((from_nts, from_tks), _) = Array.sub (prods, the new_chain); | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 99 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 100 | (*copy from's lookahead to chain's destinations*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 101 | fun copy_lookahead [] added = added | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 102 | | copy_lookahead (to :: tos) added = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 103 | let | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 104 | val ((to_nts, to_tks), ps) = Array.sub (prods, to); | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 105 | |
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 106 | val new_tks = subtract (op =) to_tks from_tks; (*added lookahead tokens*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 107 | val _ = Array.update (prods, to, ((to_nts, to_tks @ new_tks), ps)); | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 108 | in | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 109 | copy_lookahead tos (if null new_tks then added else (to, new_tks) :: added) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 110 | end; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 111 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 112 | val tos = connected_with chains' [lhs] [lhs]; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 113 | in | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 114 | (copy_lookahead tos [], | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 115 | union (op =) (if member (op =) lambdas lhs then tos else []) lambdas) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 116 | end; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 117 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 118 | (*test if new production can produce lambda | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 119 | (rhs must either be empty or only consist of lambda NTs)*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 120 | val (new_lambda, lambdas') = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 121 | if forall | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 122 | (fn Nonterminal (id, _) => member (op =) lambdas' id | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 123 | | Terminal _ => false) rhs | 
| 42218 
8e0a4d500e5b
streamlined token list operations, assuming that the order of union does not matter;
 wenzelm parents: 
42217diff
changeset | 124 | then (true, union (op =) (connected_with chains' [lhs] [lhs]) lambdas') | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 125 | else (false, lambdas'); | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 126 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 127 | (*list optional terminal and all nonterminals on which the lookahead | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 128 | of a production depends*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 129 | fun lookahead_dependency _ [] nts = (NONE, nts) | 
| 42217 | 130 | | lookahead_dependency _ (Terminal tk :: _) nts = (SOME tk, nts) | 
| 131 | | lookahead_dependency lambdas (Nonterminal (nt, _) :: symbs) nts = | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 132 | if member (op =) lambdas nt then | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 133 | lookahead_dependency lambdas symbs (nt :: nts) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 134 | else (NONE, nt :: nts); | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 135 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 136 | (*get all known starting tokens for a nonterminal*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 137 | fun starts_for_nt nt = snd (fst (Array.sub (prods, nt))); | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 138 | |
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 139 | (*update prods, lookaheads, and lambdas according to new lambda NTs*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 140 | val (added_starts', lambdas') = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 141 | let | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 142 | (*propagate added lambda NT*) | 
| 42217 | 143 | fun propagate_lambda [] added_starts lambdas = (added_starts, lambdas) | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 144 | | propagate_lambda (l :: ls) added_starts lambdas = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 145 | let | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 146 | (*get lookahead for lambda NT*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 147 | val ((dependent, l_starts), _) = Array.sub (prods, l); | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 148 | |
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 149 | (*check productions whose lookahead may depend on lambda NT*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 150 | fun examine_prods [] add_lambda nt_dependencies added_tks nt_prods = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 151 | (add_lambda, nt_dependencies, added_tks, nt_prods) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 152 | | examine_prods ((p as (rhs, _, _)) :: ps) add_lambda | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 153 | nt_dependencies added_tks nt_prods = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 154 | let val (tk, nts) = lookahead_dependency lambdas rhs [] in | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 155 | if member (op =) nts l then (*update production's lookahead*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 156 | let | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 157 | val new_lambda = is_none tk andalso subset (op =) (nts, lambdas); | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 158 | |
| 42218 
8e0a4d500e5b
streamlined token list operations, assuming that the order of union does not matter;
 wenzelm parents: 
42217diff
changeset | 159 | val new_tks = | 
| 
8e0a4d500e5b
streamlined token list operations, assuming that the order of union does not matter;
 wenzelm parents: 
42217diff
changeset | 160 | (if is_some tk then [the tk] else []) | 
| 
8e0a4d500e5b
streamlined token list operations, assuming that the order of union does not matter;
 wenzelm parents: 
42217diff
changeset | 161 | |> fold (union_token o starts_for_nt) nts | 
| 
8e0a4d500e5b
streamlined token list operations, assuming that the order of union does not matter;
 wenzelm parents: 
42217diff
changeset | 162 | |> subtract (op =) l_starts; | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 163 | |
| 42218 
8e0a4d500e5b
streamlined token list operations, assuming that the order of union does not matter;
 wenzelm parents: 
42217diff
changeset | 164 | val added_tks' = union_token added_tks new_tks; | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 165 | |
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 166 | val nt_dependencies' = union (op =) nts nt_dependencies; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 167 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 168 | (*associate production with new starting tokens*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 169 | fun copy ([]: Lexicon.token option list) nt_prods = nt_prods | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 170 | | copy (tk :: tks) nt_prods = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 171 | let | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 172 | val old_prods = these (AList.lookup (op =) nt_prods tk); | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 173 | val prods' = p :: old_prods; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 174 | in | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 175 | nt_prods | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 176 | |> AList.update (op =) (tk, prods') | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 177 | |> copy tks | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 178 | end; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 179 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 180 | val nt_prods' = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 181 | let val new_opt_tks = map SOME new_tks in | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 182 | copy | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 183 | ((if new_lambda then [NONE] else []) @ new_opt_tks) nt_prods | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 184 | end; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 185 | in | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 186 | examine_prods ps (add_lambda orelse new_lambda) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 187 | nt_dependencies' added_tks' nt_prods' | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 188 | end | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 189 | else (*skip production*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 190 | examine_prods ps add_lambda nt_dependencies added_tks nt_prods | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 191 | end; | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 192 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 193 | (*check each NT whose lookahead depends on new lambda NT*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 194 | fun process_nts [] added_lambdas added_starts = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 195 | (added_lambdas, added_starts) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 196 | | process_nts (nt :: nts) added_lambdas added_starts = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 197 | let | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 198 | val (lookahead as (old_nts, old_tks), nt_prods) = Array.sub (prods, nt); | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 199 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 200 | (*existing productions whose lookahead may depend on l*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 201 | val tk_prods = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 202 | these | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 203 | (AList.lookup (op =) nt_prods | 
| 38713 | 204 | (SOME (hd l_starts handle Empty => unknown_start))); | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 205 | |
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 206 | (*add_lambda is true if an existing production of the nt | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 207 | produces lambda due to the new lambda NT l*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 208 | val (add_lambda, nt_dependencies, added_tks, nt_prods') = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 209 | examine_prods tk_prods false [] [] nt_prods; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 210 | |
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 211 | val added_nts = subtract (op =) old_nts nt_dependencies; | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 212 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 213 | val added_lambdas' = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 214 | if add_lambda then nt :: added_lambdas | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 215 | else added_lambdas; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 216 | val _ = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 217 | Array.update | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 218 | (prods, nt, ((added_nts @ old_nts, old_tks @ added_tks), nt_prods')); | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 219 | (*N.B. that because the tks component | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 220 | is used to access existing | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 221 | productions we have to add new | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 222 | tokens at the _end_ of the list*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 223 | in | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 224 | if null added_tks then | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 225 | process_nts nts added_lambdas' added_starts | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 226 | else | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 227 | process_nts nts added_lambdas' ((nt, added_tks) :: added_starts) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 228 | end; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 229 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 230 | val (added_lambdas, added_starts') = process_nts dependent [] added_starts; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 231 | val added_lambdas' = subtract (op =) lambdas added_lambdas; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 232 | in | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 233 | propagate_lambda (ls @ added_lambdas') added_starts' (added_lambdas' @ lambdas) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 234 | end; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 235 | in propagate_lambda (subtract (op =) lambdas lambdas') added_starts lambdas' end; | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 236 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 237 | (*insert production into grammar*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 238 | val (added_starts', prod_count') = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 239 | if is_some chain_from | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 240 | then (added_starts', prod_count) (*don't store chain production*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 241 | else | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 242 | let | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 243 | (*lookahead tokens of new production and on which | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 244 | NTs lookahead depends*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 245 | val (start_tk, start_nts) = lookahead_dependency lambdas' rhs []; | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 246 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 247 | val start_tks = | 
| 42218 
8e0a4d500e5b
streamlined token list operations, assuming that the order of union does not matter;
 wenzelm parents: 
42217diff
changeset | 248 | (if is_some start_tk then [the start_tk] else []) | 
| 
8e0a4d500e5b
streamlined token list operations, assuming that the order of union does not matter;
 wenzelm parents: 
42217diff
changeset | 249 | |> fold (union_token o starts_for_nt) start_nts; | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 250 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 251 | val opt_starts = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 252 | (if new_lambda then [NONE] | 
| 38713 | 253 | else if null start_tks then [SOME unknown_start] | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 254 | else []) @ map SOME start_tks; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 255 | |
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 256 | (*add lhs NT to list of dependent NTs in lookahead*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 257 | fun add_nts [] = () | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 258 | | add_nts (nt :: nts) = | 
| 42217 | 259 | let val ((old_nts, old_tks), ps) = Array.sub (prods, nt) in | 
| 260 | if member (op =) old_nts lhs then () | |
| 261 | else Array.update (prods, nt, ((lhs :: old_nts, old_tks), ps)) | |
| 262 | end; | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 263 | |
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 264 | (*add new start tokens to chained NTs' lookahead list; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 265 | also store new production for lhs NT*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 266 | fun add_tks [] added prod_count = (added, prod_count) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 267 | | add_tks (nt :: nts) added prod_count = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 268 | let | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 269 | val ((old_nts, old_tks), nt_prods) = Array.sub (prods, nt); | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 270 | |
| 42218 
8e0a4d500e5b
streamlined token list operations, assuming that the order of union does not matter;
 wenzelm parents: 
42217diff
changeset | 271 | val new_tks = subtract_token old_tks start_tks; | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 272 | |
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 273 | (*store new production*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 274 | fun store [] prods is_new = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 275 | (prods, | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 276 | if is_some prod_count andalso is_new then | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 277 | Option.map (fn x => x + 1) prod_count | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 278 | else prod_count, is_new) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 279 | | store (tk :: tks) prods is_new = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 280 | let | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 281 | val tk_prods = these (AList.lookup (op =) prods tk); | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 282 | |
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 283 | (*if prod_count = NONE then we can assume that | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 284 | grammar does not contain new production already*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 285 | val (tk_prods', is_new') = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 286 | if is_some prod_count then | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 287 | if member (op =) tk_prods new_prod then (tk_prods, false) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 288 | else (new_prod :: tk_prods, true) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 289 | else (new_prod :: tk_prods, true); | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 290 | |
| 42218 
8e0a4d500e5b
streamlined token list operations, assuming that the order of union does not matter;
 wenzelm parents: 
42217diff
changeset | 291 | val prods' = | 
| 
8e0a4d500e5b
streamlined token list operations, assuming that the order of union does not matter;
 wenzelm parents: 
42217diff
changeset | 292 | if is_new' then | 
| 
8e0a4d500e5b
streamlined token list operations, assuming that the order of union does not matter;
 wenzelm parents: 
42217diff
changeset | 293 | AList.update (op =) (tk: Lexicon.token option, tk_prods') prods | 
| 
8e0a4d500e5b
streamlined token list operations, assuming that the order of union does not matter;
 wenzelm parents: 
42217diff
changeset | 294 | else prods; | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 295 | in store tks prods' (is_new orelse is_new') end; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 296 | |
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 297 | val (nt_prods', prod_count', changed) = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 298 | if nt = lhs | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 299 | then store opt_starts nt_prods false | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 300 | else (nt_prods, prod_count, false); | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 301 | val _ = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 302 | if not changed andalso null new_tks then () | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 303 | else Array.update (prods, nt, ((old_nts, old_tks @ new_tks), nt_prods')); | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 304 | in | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 305 | add_tks nts | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 306 | (if null new_tks then added else (nt, new_tks) :: added) prod_count' | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 307 | end; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 308 | val _ = add_nts start_nts; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 309 | in | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 310 | add_tks (connected_with chains' [lhs] [lhs]) [] prod_count | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 311 | end; | 
| 1175 
1b15a4b1a4f7
added comments; fixed a bug; reduced memory usage slightly
 clasohm parents: 
1147diff
changeset | 312 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 313 | (*associate productions with new lookaheads*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 314 | val _ = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 315 | let | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 316 | (*propagate added start tokens*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 317 | fun add_starts [] = () | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 318 | | add_starts ((changed_nt, new_tks) :: starts) = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 319 | let | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 320 | (*token under which old productions which | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 321 | depend on changed_nt could be stored*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 322 | val key = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 323 | (case find_first (not o member (op =) new_tks) (starts_for_nt changed_nt) of | 
| 38713 | 324 | NONE => SOME unknown_start | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 325 | | t => t); | 
| 18 | 326 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 327 | (*copy productions whose lookahead depends on changed_nt; | 
| 38713 | 328 | if key = SOME unknown_start then tk_prods is used to hold | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 329 | the productions not copied*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 330 | fun update_prods [] result = result | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 331 | | update_prods ((p as (rhs, _: string, _: nt_tag)) :: ps) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 332 | (tk_prods, nt_prods) = | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 333 | let | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 334 | (*lookahead dependency for production*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 335 | val (tk, depends) = lookahead_dependency lambdas' rhs []; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 336 | |
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 337 | (*test if this production has to be copied*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 338 | val update = member (op =) depends changed_nt; | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 339 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 340 | (*test if production could already be associated with | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 341 | a member of new_tks*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 342 | val lambda = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 343 | length depends > 1 orelse | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 344 | not (null depends) andalso is_some tk | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 345 | andalso member (op =) new_tks (the tk); | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 346 | |
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 347 | (*associate production with new starting tokens*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 348 | fun copy ([]: Lexicon.token list) nt_prods = nt_prods | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 349 | | copy (tk :: tks) nt_prods = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 350 | let | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 351 | val tk_prods = these (AList.lookup (op =) nt_prods (SOME tk)); | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 352 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 353 | val tk_prods' = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 354 | if not lambda then p :: tk_prods | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 355 | else insert (op =) p tk_prods; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 356 | (*if production depends on lambda NT we | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 357 | have to look for duplicates*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 358 | in | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 359 | nt_prods | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 360 | |> AList.update (op =) (SOME tk, tk_prods') | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 361 | |> copy tks | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 362 | end; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 363 | val result = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 364 | if update then (tk_prods, copy new_tks nt_prods) | 
| 38713 | 365 | else if key = SOME unknown_start then (p :: tk_prods, nt_prods) | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 366 | else (tk_prods, nt_prods); | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 367 | in update_prods ps result end; | 
| 377 
ab8917806779
lookaheads are now computed faster (during the grammar is built)
 clasohm parents: 
373diff
changeset | 368 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 369 | (*copy existing productions for new starting tokens*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 370 | fun process_nts [] added = added | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 371 | | process_nts (nt :: nts) added = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 372 | let | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 373 | val (lookahead as (old_nts, old_tks), nt_prods) = Array.sub (prods, nt); | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 374 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 375 | val tk_prods = these (AList.lookup (op =) nt_prods key); | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 376 | |
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 377 | (*associate productions with new lookahead tokens*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 378 | val (tk_prods', nt_prods') = update_prods tk_prods ([], nt_prods); | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 379 | |
| 42217 | 380 | val nt_prods'' = | 
| 381 | if key = SOME unknown_start then | |
| 382 | AList.update (op =) (key, tk_prods') nt_prods' | |
| 383 | else nt_prods'; | |
| 18 | 384 | |
| 42218 
8e0a4d500e5b
streamlined token list operations, assuming that the order of union does not matter;
 wenzelm parents: 
42217diff
changeset | 385 | val added_tks = subtract_token old_tks new_tks; | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 386 | in | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 387 | if null added_tks then | 
| 42217 | 388 | (Array.update (prods, nt, (lookahead, nt_prods'')); | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 389 | process_nts nts added) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 390 | else | 
| 42217 | 391 | (Array.update (prods, nt, ((old_nts, added_tks @ old_tks), nt_prods'')); | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 392 | process_nts nts ((nt, added_tks) :: added)) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 393 | end; | 
| 18 | 394 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 395 | val ((dependent, _), _) = Array.sub (prods, changed_nt); | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 396 | in add_starts (starts @ process_nts dependent []) end; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 397 | in add_starts added_starts' end; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 398 | in add_prods prods chains' lambdas' prod_count ps end; | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 399 | |
| 18 | 400 | |
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 401 | (* pretty_gram *) | 
| 18 | 402 | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 403 | fun pretty_gram (Gram {tags, prods, chains, ...}) =
 | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 404 | let | 
| 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 405 | fun pretty_name name = [Pretty.str (name ^ " =")]; | 
| 18 | 406 | |
| 26069 
321c4ca82923
syntax error: suppress expected categories altogether;
 wenzelm parents: 
26068diff
changeset | 407 | val nt_name = the o Inttab.lookup (Inttab.make (map swap (Symtab.dest tags))); | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 408 | |
| 37683 | 409 | fun pretty_symb (Terminal (Lexicon.Token (Lexicon.Literal, s, _))) = Pretty.quote (Pretty.str s) | 
| 410 | | pretty_symb (Terminal tok) = Pretty.str (Lexicon.str_of_token tok) | |
| 28843 | 411 | | pretty_symb (Nonterminal (tag, p)) = | 
| 412 | Pretty.str (nt_name tag ^ "[" ^ signed_string_of_int p ^ "]"); | |
| 18 | 413 | |
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 414 | fun pretty_const "" = [] | 
| 28843 | 415 |       | pretty_const c = [Pretty.str ("=> " ^ quote c)];
 | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 416 | |
| 28843 | 417 |     fun pretty_pri p = [Pretty.str ("(" ^ signed_string_of_int p ^ ")")];
 | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 418 | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 419 | fun pretty_prod name (symbs, const, pri) = | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 420 | Pretty.block (Pretty.breaks (pretty_name name @ | 
| 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 421 | map pretty_symb symbs @ pretty_const const @ pretty_pri pri)); | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 422 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 423 | fun pretty_nt (name, tag) = | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 424 | let | 
| 30189 
3633f560f4c3
discontinued experimental support for Alice -- too hard to maintain its many language incompatibilities, never really worked anyway;
 wenzelm parents: 
29565diff
changeset | 425 | fun prod_of_chain from = ([Nonterminal (from, ~1)], "", ~1); | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 426 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 427 | val nt_prods = | 
| 42218 
8e0a4d500e5b
streamlined token list operations, assuming that the order of union does not matter;
 wenzelm parents: 
42217diff
changeset | 428 | fold (union (op =) o snd) (snd (Vector.sub (prods, tag))) [] @ | 
| 
8e0a4d500e5b
streamlined token list operations, assuming that the order of union does not matter;
 wenzelm parents: 
42217diff
changeset | 429 | map prod_of_chain (these (AList.lookup (op =) chains tag)); | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 430 | in map (pretty_prod name) nt_prods end; | 
| 15752 | 431 | |
| 25986 
26f1e4c172c3
syntax error: reduced spam -- print expected nonterminals instead of terminals;
 wenzelm parents: 
24245diff
changeset | 432 | in maps pretty_nt (sort_wrt fst (Symtab.dest tags)) end; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 433 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 434 | |
| 42217 | 435 | |
| 1438 | 436 | (** Operations on gramars **) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 437 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 438 | val empty_gram = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 439 | Gram | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 440 |    {nt_count = 0,
 | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 441 | prod_count = 0, | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 442 | tags = Symtab.empty, chains = [], | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 443 | lambdas = [], | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 444 | prods = Vector.fromList [(([], []), [])]}; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 445 | |
| 1438 | 446 | |
| 447 | (*Invert list of chain productions*) | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 448 | fun inverse_chains [] result = result | 
| 23909 | 449 | | inverse_chains ((root, branches: nt_tag list) :: cs) result = | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 450 | let | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 451 | fun add ([]: nt_tag list) result = result | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 452 | | add (id :: ids) result = | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 453 | let val old = these (AList.lookup (op =) result id); | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 454 | in add ids (AList.update (op =) (id, root :: old) result) end; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 455 | in inverse_chains cs (add branches result) end; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 456 | |
| 1438 | 457 | |
| 458 | (*Add productions to a grammar*) | |
| 37684 | 459 | fun extend_gram [] gram = gram | 
| 460 |   | extend_gram xprods (Gram {nt_count, prod_count, tags, chains, lambdas, prods}) =
 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 461 | let | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 462 | (*Get tag for existing nonterminal or create a new one*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 463 | fun get_tag nt_count tags nt = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 464 | (case Symtab.lookup tags nt of | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 465 | SOME tag => (nt_count, tags, tag) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 466 | | NONE => (nt_count + 1, Symtab.update_new (nt, nt_count) tags, nt_count)); | 
| 1438 | 467 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 468 | (*Convert symbols to the form used by the parser; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 469 | delimiters and predefined terms are stored as terminals, | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 470 | nonterminals are converted to integer tags*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 471 | fun symb_of [] nt_count tags result = (nt_count, tags, rev result) | 
| 42288 
2074b31650e6
discontinued special treatment of structure Syntax_Ext (formerly Syn_Ext);
 wenzelm parents: 
42282diff
changeset | 472 | | symb_of (Syntax_Ext.Delim s :: ss) nt_count tags result = | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 473 | symb_of ss nt_count tags | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 474 | (Terminal (Lexicon.Token (Lexicon.Literal, s, Position.no_range)) :: result) | 
| 42288 
2074b31650e6
discontinued special treatment of structure Syntax_Ext (formerly Syn_Ext);
 wenzelm parents: 
42282diff
changeset | 475 | | symb_of (Syntax_Ext.Argument (s, p) :: ss) nt_count tags result = | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 476 | let | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 477 | val (nt_count', tags', new_symb) = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 478 | (case Lexicon.predef_term s of | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 479 | NONE => | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 480 | let val (nt_count', tags', s_tag) = get_tag nt_count tags s; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 481 | in (nt_count', tags', Nonterminal (s_tag, p)) end | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 482 | | SOME tk => (nt_count, tags, Terminal tk)); | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 483 | in symb_of ss nt_count' tags' (new_symb :: result) end | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 484 | | symb_of (_ :: ss) nt_count tags result = symb_of ss nt_count tags result; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 485 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 486 | (*Convert list of productions by invoking symb_of for each of them*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 487 | fun prod_of [] nt_count prod_count tags result = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 488 | (nt_count, prod_count, tags, result) | 
| 42288 
2074b31650e6
discontinued special treatment of structure Syntax_Ext (formerly Syn_Ext);
 wenzelm parents: 
42282diff
changeset | 489 | | prod_of (Syntax_Ext.XProd (lhs, xsymbs, const, pri) :: ps) | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 490 | nt_count prod_count tags result = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 491 | let | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 492 | val (nt_count', tags', lhs_tag) = get_tag nt_count tags lhs; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 493 | val (nt_count'', tags'', prods) = symb_of xsymbs nt_count' tags' []; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 494 | in | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 495 | prod_of ps nt_count'' (prod_count + 1) tags'' | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 496 | ((lhs_tag, (prods, const, pri)) :: result) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 497 | end; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 498 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 499 | val (nt_count', prod_count', tags', xprods') = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 500 | prod_of xprods nt_count prod_count tags []; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 501 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 502 | (*Copy array containing productions of old grammar; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 503 | this has to be done to preserve the old grammar while being able | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 504 | to change the array's content*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 505 | val prods' = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 506 | let | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 507 | fun get_prod i = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 508 | if i < nt_count then Vector.sub (prods, i) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 509 | else (([], []), []); | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 510 | in Array.tabulate (nt_count', get_prod) end; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 511 | |
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 512 | val fromto_chains = inverse_chains chains []; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 513 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 514 | (*Add new productions to old ones*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 515 | val (fromto_chains', lambdas', _) = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 516 | add_prods prods' fromto_chains lambdas NONE xprods'; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 517 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 518 | val chains' = inverse_chains fromto_chains' []; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 519 | in | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 520 | Gram | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 521 |          {nt_count = nt_count',
 | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 522 | prod_count = prod_count', | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 523 | tags = tags', | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 524 | chains = chains', | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 525 | lambdas = lambdas', | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 526 | prods = Array.vector prods'} | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 527 | end; | 
| 18 | 528 | |
| 529 | ||
| 1438 | 530 | (*Merge two grammars*) | 
| 37684 | 531 | fun merge_gram (gram_a, gram_b) = | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 532 | let | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 533 | (*find out which grammar is bigger*) | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 534 | val | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 535 |       (Gram {nt_count = nt_count1, prod_count = prod_count1, tags = tags1,
 | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 536 | chains = chains1, lambdas = lambdas1, prods = prods1}, | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 537 |       Gram {nt_count = nt_count2, prod_count = prod_count2, tags = tags2,
 | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 538 | chains = chains2, lambdas = lambdas2, prods = prods2}) = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 539 | let | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 540 |         val Gram {prod_count = count_a, ...} = gram_a;
 | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 541 |         val Gram {prod_count = count_b, ...} = gram_b;
 | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 542 | in | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 543 | if count_a > count_b | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 544 | then (gram_a, gram_b) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 545 | else (gram_b, gram_a) | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 546 | end; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 547 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 548 | (*get existing tag from grammar1 or create a new one*) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 549 | fun get_tag nt_count tags nt = | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 550 | (case Symtab.lookup tags nt of | 
| 15531 | 551 | SOME tag => (nt_count, tags, tag) | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 552 | | NONE => (nt_count + 1, Symtab.update_new (nt, nt_count) tags, nt_count)); | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 553 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 554 | val ((nt_count1', tags1'), tag_table) = | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 555 | let | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 556 | val table = Array.array (nt_count2, ~1); | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 557 | |
| 42221 | 558 | fun the_nt tag = | 
| 559 | the (Symtab.get_first (fn (n, t) => if t = tag then SOME n else NONE) tags2); | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 560 | fun store_tag nt_count tags ~1 = (nt_count, tags) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 561 | | store_tag nt_count tags tag = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 562 | let | 
| 42221 | 563 | val (nt_count', tags', tag') = get_tag nt_count tags (the_nt tag); | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 564 | val _ = Array.update (table, tag, tag'); | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 565 | in store_tag nt_count' tags' (tag - 1) end; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 566 | in (store_tag nt_count1 tags1 (nt_count2 - 1), table) end; | 
| 15752 | 567 | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 568 | (*convert grammar2 tag to grammar1 tag*) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 569 | fun convert_tag tag = Array.sub (tag_table, tag); | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 570 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 571 | (*convert chain list to raw productions*) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 572 | fun mk_chain_prods [] result = result | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 573 | | mk_chain_prods ((to, froms) :: cs) result = | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 574 | let | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 575 | val to_tag = convert_tag to; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 576 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 577 | fun make [] result = result | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 578 | | make (from :: froms) result = make froms | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 579 | ((to_tag, ([Nonterminal (convert_tag from, ~1)], "", ~1)) :: result); | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 580 | in mk_chain_prods cs (make froms [] @ result) end; | 
| 15752 | 581 | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 582 | val chain_prods = mk_chain_prods chains2 []; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 583 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 584 | (*convert prods2 array to productions*) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 585 | fun process_nt ~1 result = result | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 586 | | process_nt nt result = | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 587 | let | 
| 42218 
8e0a4d500e5b
streamlined token list operations, assuming that the order of union does not matter;
 wenzelm parents: 
42217diff
changeset | 588 | val nt_prods = fold (union (op =) o snd) (snd (Vector.sub (prods2, nt))) []; | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 589 | val lhs_tag = convert_tag nt; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 590 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 591 | (*convert tags in rhs*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 592 | fun process_rhs [] result = result | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 593 | | process_rhs (Terminal tk :: rhs) result = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 594 | process_rhs rhs (result @ [Terminal tk]) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 595 | | process_rhs (Nonterminal (nt, prec) :: rhs) result = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 596 | process_rhs rhs (result @ [Nonterminal (convert_tag nt, prec)]); | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 597 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 598 | (*convert tags in productions*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 599 | fun process_prods [] result = result | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 600 | | process_prods ((rhs, id, prec) :: ps) result = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 601 | process_prods ps ((lhs_tag, (process_rhs rhs [], id, prec)) :: result); | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 602 | in process_nt (nt - 1) (process_prods nt_prods [] @ result) end; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 603 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 604 | val raw_prods = chain_prods @ process_nt (nt_count2 - 1) []; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 605 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 606 | val prods1' = | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 607 | let | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 608 | fun get_prod i = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 609 | if i < nt_count1 then Vector.sub (prods1, i) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 610 | else (([], []), []); | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 611 | in Array.tabulate (nt_count1', get_prod) end; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 612 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 613 | val fromto_chains = inverse_chains chains1 []; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 614 | |
| 15531 | 615 | val (fromto_chains', lambdas', SOME prod_count1') = | 
| 616 | add_prods prods1' fromto_chains lambdas1 (SOME prod_count1) raw_prods; | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 617 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 618 | val chains' = inverse_chains fromto_chains' []; | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 619 | in | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 620 | Gram | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 621 |      {nt_count = nt_count1',
 | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 622 | prod_count = prod_count1', | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 623 | tags = tags1', | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 624 | chains = chains', | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 625 | lambdas = lambdas', | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 626 | prods = Array.vector prods1'} | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 627 | end; | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 628 | |
| 18 | 629 | |
| 42217 | 630 | |
| 42374 
b9a6b465da25
clarified pretty_parsetree: suppress literal tokens;
 wenzelm parents: 
42288diff
changeset | 631 | (** parser **) | 
| 
b9a6b465da25
clarified pretty_parsetree: suppress literal tokens;
 wenzelm parents: 
42288diff
changeset | 632 | |
| 
b9a6b465da25
clarified pretty_parsetree: suppress literal tokens;
 wenzelm parents: 
42288diff
changeset | 633 | (* parsetree *) | 
| 18 | 634 | |
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 635 | datatype parsetree = | 
| 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 636 | Node of string * parsetree list | | 
| 37683 | 637 | Tip of Lexicon.token; | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 638 | |
| 42374 
b9a6b465da25
clarified pretty_parsetree: suppress literal tokens;
 wenzelm parents: 
42288diff
changeset | 639 | exception PARSETREE of parsetree; | 
| 
b9a6b465da25
clarified pretty_parsetree: suppress literal tokens;
 wenzelm parents: 
42288diff
changeset | 640 | |
| 
b9a6b465da25
clarified pretty_parsetree: suppress literal tokens;
 wenzelm parents: 
42288diff
changeset | 641 | fun pretty_parsetree parsetree = | 
| 
b9a6b465da25
clarified pretty_parsetree: suppress literal tokens;
 wenzelm parents: 
42288diff
changeset | 642 | let | 
| 
b9a6b465da25
clarified pretty_parsetree: suppress literal tokens;
 wenzelm parents: 
42288diff
changeset | 643 | fun pretty (Node (c, pts)) = | 
| 
b9a6b465da25
clarified pretty_parsetree: suppress literal tokens;
 wenzelm parents: 
42288diff
changeset | 644 |           [Pretty.enclose "(" ")" (Pretty.breaks (Pretty.quote (Pretty.str c) :: maps pretty pts))]
 | 
| 
b9a6b465da25
clarified pretty_parsetree: suppress literal tokens;
 wenzelm parents: 
42288diff
changeset | 645 | | pretty (Tip tok) = | 
| 
b9a6b465da25
clarified pretty_parsetree: suppress literal tokens;
 wenzelm parents: 
42288diff
changeset | 646 | if Lexicon.valued_token tok then [Pretty.str (Lexicon.str_of_token tok)] else []; | 
| 
b9a6b465da25
clarified pretty_parsetree: suppress literal tokens;
 wenzelm parents: 
42288diff
changeset | 647 | in (case pretty parsetree of [prt] => prt | _ => raise PARSETREE parsetree) end; | 
| 
b9a6b465da25
clarified pretty_parsetree: suppress literal tokens;
 wenzelm parents: 
42288diff
changeset | 648 | |
| 
b9a6b465da25
clarified pretty_parsetree: suppress literal tokens;
 wenzelm parents: 
42288diff
changeset | 649 | |
| 
b9a6b465da25
clarified pretty_parsetree: suppress literal tokens;
 wenzelm parents: 
42288diff
changeset | 650 | (* parser state *) | 
| 42205 
22f5cc06c419
direct pretty printing of parsetrees -- prevent diagnostic output from crashing due to undeclared entities;
 wenzelm parents: 
41378diff
changeset | 651 | |
| 18 | 652 | type state = | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 653 | nt_tag * int * (*identification and production precedence*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 654 | parsetree list * (*already parsed nonterminals on rhs*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 655 | symb list * (*rest of rhs*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 656 | string * (*name of production*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 657 | int; (*index for previous state list*) | 
| 18 | 658 | |
| 659 | ||
| 38713 | 660 | (*Get all rhss with precedence >= min_prec*) | 
| 42217 | 661 | fun get_RHS min_prec = filter (fn (_, _, prec: int) => prec >= min_prec); | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 662 | |
| 38713 | 663 | (*Get all rhss with precedence >= min_prec and < max_prec*) | 
| 664 | fun get_RHS' min_prec max_prec = | |
| 42217 | 665 | filter (fn (_, _, prec: int) => prec >= min_prec andalso prec < max_prec); | 
| 18 | 666 | |
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 667 | (*Make states using a list of rhss*) | 
| 38713 | 668 | fun mk_states i min_prec lhs_ID rhss = | 
| 669 | let fun mk_state (rhs, id, prod_prec) = (lhs_ID, prod_prec, [], rhs, id, i); | |
| 670 | in map mk_state rhss end; | |
| 697 
40f72ab196f8
changed warning for extremely ambiguous expressions
 clasohm parents: 
682diff
changeset | 671 | |
| 15752 | 672 | (*Add parse tree to list and eliminate duplicates | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 673 | saving the maximum precedence*) | 
| 42217 | 674 | fun conc (t: parsetree list, prec: int) [] = (NONE, [(t, prec)]) | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 675 | | conc (t, prec) ((t', prec') :: ts) = | 
| 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 676 | if t = t' then | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 677 | (SOME prec', | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 678 | if prec' >= prec then (t', prec') :: ts | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 679 | else (t, prec) :: ts) | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 680 | else | 
| 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 681 | let val (n, ts') = conc (t, prec) ts | 
| 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 682 | in (n, (t', prec') :: ts') end; | 
| 18 | 683 | |
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 684 | (*Update entry in used*) | 
| 42226 
cb650789f2f0
use standard tables with standard argument order;
 wenzelm parents: 
42222diff
changeset | 685 | fun update_trees (A, t) used = | 
| 
cb650789f2f0
use standard tables with standard argument order;
 wenzelm parents: 
42222diff
changeset | 686 | let | 
| 
cb650789f2f0
use standard tables with standard argument order;
 wenzelm parents: 
42222diff
changeset | 687 | val (i, ts) = the (Inttab.lookup used A); | 
| 
cb650789f2f0
use standard tables with standard argument order;
 wenzelm parents: 
42222diff
changeset | 688 | val (n, ts') = conc t ts; | 
| 
cb650789f2f0
use standard tables with standard argument order;
 wenzelm parents: 
42222diff
changeset | 689 | in (n, Inttab.update (A, (i, ts')) used) end; | 
| 18 | 690 | |
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 691 | (*Replace entry in used*) | 
| 42226 
cb650789f2f0
use standard tables with standard argument order;
 wenzelm parents: 
42222diff
changeset | 692 | fun update_prec (A, prec) = | 
| 
cb650789f2f0
use standard tables with standard argument order;
 wenzelm parents: 
42222diff
changeset | 693 | Inttab.map_entry A (fn (_, ts) => (prec, ts)); | 
| 18 | 694 | |
| 42220 | 695 | fun getS A max_prec NONE Si = | 
| 696 | filter | |
| 697 | (fn (_, _, _, Nonterminal (B, prec) :: _, _, _) => A = B andalso prec <= max_prec | |
| 698 | | _ => false) Si | |
| 699 | | getS A max_prec (SOME min_prec) Si = | |
| 700 | filter | |
| 701 | (fn (_, _, _, Nonterminal (B, prec) :: _, _, _) => | |
| 702 | A = B andalso prec > min_prec andalso prec <= max_prec | |
| 703 | | _ => false) Si; | |
| 18 | 704 | |
| 38713 | 705 | fun get_states Estate i ii A max_prec = | 
| 33317 | 706 | filter | 
| 38713 | 707 | (fn (_, _, _, Nonterminal (B, prec) :: _, _, _) => A = B andalso prec <= max_prec | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 708 | | _ => false) | 
| 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 709 | (Array.sub (Estate, ii)); | 
| 18 | 710 | |
| 711 | ||
| 42219 | 712 | fun movedot_term c (A, j, ts, Terminal a :: sa, id, i) = | 
| 42282 
4fa41e068ff0
report literal tokens according to parsetree head;
 wenzelm parents: 
42226diff
changeset | 713 | if Lexicon.valued_token c orelse id <> "" | 
| 
4fa41e068ff0
report literal tokens according to parsetree head;
 wenzelm parents: 
42226diff
changeset | 714 | then (A, j, Tip c :: ts, sa, id, i) | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 715 | else (A, j, ts, sa, id, i); | 
| 18 | 716 | |
| 42219 | 717 | fun movedot_nonterm tt (A, j, ts, Nonterminal _ :: sa, id, i) = | 
| 42282 
4fa41e068ff0
report literal tokens according to parsetree head;
 wenzelm parents: 
42226diff
changeset | 718 | (A, j, tt @ ts, sa, id, i); | 
| 18 | 719 | |
| 42219 | 720 | fun movedot_lambda [] _ = [] | 
| 42221 | 721 | | movedot_lambda ((t, ki) :: ts) (state as (B, j, tss, Nonterminal (A, k) :: sa, id, i)) = | 
| 42282 
4fa41e068ff0
report literal tokens according to parsetree head;
 wenzelm parents: 
42226diff
changeset | 722 | if k <= ki then (B, j, t @ tss, sa, id, i) :: movedot_lambda ts state | 
| 42221 | 723 | else movedot_lambda ts state; | 
| 18 | 724 | |
| 725 | ||
| 41378 | 726 | (*trigger value for warnings*) | 
| 727 | val branching_level = Config.int (Config.declare "syntax_branching_level" (fn _ => Config.Int 600)); | |
| 18 | 728 | |
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 729 | (*get all productions of a NT and NTs chained to it which can | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 730 | be started by specified token*) | 
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 731 | fun prods_for prods chains include_none tk nts = | 
| 37683 | 732 | let | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 733 | fun token_assoc (list, key) = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 734 | let | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 735 | fun assoc [] result = result | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 736 | | assoc ((keyi, pi) :: pairs) result = | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 737 | if is_some keyi andalso Lexicon.matching_tokens (the keyi, key) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 738 | orelse include_none andalso is_none keyi then | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 739 | assoc pairs (pi @ result) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 740 | else assoc pairs result; | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 741 | in assoc list [] end; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 742 | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 743 | fun get_prods [] result = result | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 744 | | get_prods (nt :: nts) result = | 
| 38711 
79d3cbfb4730
keep persistent production tables as immutable vectors -- potential performance improvement on modern hardware;
 wenzelm parents: 
37852diff
changeset | 745 | let val nt_prods = snd (Vector.sub (prods, nt)); | 
| 42217 | 746 | in get_prods nts (token_assoc (nt_prods, tk) @ result) end; | 
| 37683 | 747 | in get_prods (connected_with chains nts nts) [] end; | 
| 1147 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 748 | |
| 
57b5f55bf879
removed 'raw' productions from gram datatype; replaced mk_gram by add_prods;
 clasohm parents: 
890diff
changeset | 749 | |
| 38831 
4933a3dfd745
more careful treatment of context visibility flag wrt. spurious warnings;
 wenzelm parents: 
38713diff
changeset | 750 | fun PROCESSS ctxt warned prods chains Estate i c states = | 
| 37683 | 751 | let | 
| 752 | fun all_prods_for nt = prods_for prods chains true c [nt]; | |
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 753 | |
| 37683 | 754 | fun processS used [] (Si, Sii) = (Si, Sii) | 
| 755 | | processS used (S :: States) (Si, Sii) = | |
| 756 | (case S of | |
| 38713 | 757 | (_, _, _, Nonterminal (nt, min_prec) :: _, _, _) => | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 758 | let (*predictor operation*) | 
| 37683 | 759 | val (used', new_states) = | 
| 42226 
cb650789f2f0
use standard tables with standard argument order;
 wenzelm parents: 
42222diff
changeset | 760 | (case Inttab.lookup used nt of | 
| 38713 | 761 | SOME (used_prec, l) => (*nonterminal has been processed*) | 
| 762 | if used_prec <= min_prec then | |
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 763 | (*wanted precedence has been processed*) | 
| 42219 | 764 | (used, movedot_lambda l S) | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 765 | else (*wanted precedence hasn't been parsed yet*) | 
| 37683 | 766 | let | 
| 767 | val tk_prods = all_prods_for nt; | |
| 42217 | 768 | val States' = | 
| 769 | mk_states i min_prec nt (get_RHS' min_prec used_prec tk_prods); | |
| 42219 | 770 | in (update_prec (nt, min_prec) used, movedot_lambda l S @ States') end | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 771 | | NONE => (*nonterminal is parsed for the first time*) | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 772 | let | 
| 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 773 | val tk_prods = all_prods_for nt; | 
| 38713 | 774 | val States' = mk_states i min_prec nt (get_RHS min_prec tk_prods); | 
| 42226 
cb650789f2f0
use standard tables with standard argument order;
 wenzelm parents: 
42222diff
changeset | 775 | in (Inttab.update (nt, (min_prec, [])) used, States') end); | 
| 697 
40f72ab196f8
changed warning for extremely ambiguous expressions
 clasohm parents: 
682diff
changeset | 776 | |
| 42217 | 777 | val _ = | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 778 | if not (! warned) andalso | 
| 42217 | 779 | length new_states + length States > Config.get ctxt branching_level then | 
| 38831 
4933a3dfd745
more careful treatment of context visibility flag wrt. spurious warnings;
 wenzelm parents: 
38713diff
changeset | 780 | (Context_Position.if_visible ctxt warning | 
| 38875 
c7a66b584147
tuned messages: discontinued spurious full-stops (messages are occasionally composed unexpectedly);
 wenzelm parents: 
38831diff
changeset | 781 | "Currently parsed expression could be extremely ambiguous"; | 
| 37683 | 782 | warned := true) | 
| 783 | else (); | |
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 784 | in | 
| 37683 | 785 | processS used' (new_states @ States) (S :: Si, Sii) | 
| 15752 | 786 | end | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 787 | | (_, _, _, Terminal a :: _, _, _) => (*scanner operation*) | 
| 37683 | 788 | processS used States | 
| 789 | (S :: Si, | |
| 42219 | 790 | if Lexicon.matching_tokens (a, c) then movedot_term c S :: Sii else Sii) | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 791 | | (A, prec, ts, [], id, j) => (*completer operation*) | 
| 42222 
ff50c436b199
accumulate parsetrees in canonical reverse order;
 wenzelm parents: 
42221diff
changeset | 792 | let val tt = if id = "" then ts else [Node (id, rev ts)] in | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 793 | if j = i then (*lambda production?*) | 
| 37683 | 794 | let | 
| 42226 
cb650789f2f0
use standard tables with standard argument order;
 wenzelm parents: 
42222diff
changeset | 795 | val (prec', used') = update_trees (A, (tt, prec)) used; | 
| 42220 | 796 | val Slist = getS A prec prec' Si; | 
| 797 | val States' = map (movedot_nonterm tt) Slist; | |
| 798 | in processS used' (States' @ States) (S :: Si, Sii) end | |
| 37683 | 799 | else | 
| 38713 | 800 | let val Slist = get_states Estate i j A prec | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 801 | in processS used (map (movedot_nonterm tt) Slist @ States) (S :: Si, Sii) end | 
| 37683 | 802 | end) | 
| 42226 
cb650789f2f0
use standard tables with standard argument order;
 wenzelm parents: 
42222diff
changeset | 803 | in processS Inttab.empty states ([], []) end; | 
| 18 | 804 | |
| 805 | ||
| 38831 
4933a3dfd745
more careful treatment of context visibility flag wrt. spurious warnings;
 wenzelm parents: 
38713diff
changeset | 806 | fun produce ctxt warned prods tags chains stateset i indata prev_token = | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 807 | (case Array.sub (stateset, i) of | 
| 25986 
26f1e4c172c3
syntax error: reduced spam -- print expected nonterminals instead of terminals;
 wenzelm parents: 
24245diff
changeset | 808 | [] => | 
| 
26f1e4c172c3
syntax error: reduced spam -- print expected nonterminals instead of terminals;
 wenzelm parents: 
24245diff
changeset | 809 | let | 
| 37683 | 810 | val toks = if Lexicon.is_eof prev_token then indata else prev_token :: indata; | 
| 811 | val pos = Position.str_of (Lexicon.pos_of_token prev_token); | |
| 27801 
0d0adaf0228d
datatype token: maintain range, tuned representation;
 wenzelm parents: 
26678diff
changeset | 812 | in | 
| 
0d0adaf0228d
datatype token: maintain range, tuned representation;
 wenzelm parents: 
26678diff
changeset | 813 |         if null toks then error ("Inner syntax error: unexpected end of input" ^ pos)
 | 
| 
0d0adaf0228d
datatype token: maintain range, tuned representation;
 wenzelm parents: 
26678diff
changeset | 814 | else error (Pretty.string_of (Pretty.block | 
| 27807 | 815 |           (Pretty.str ("Inner syntax error" ^ pos ^ " at \"") ::
 | 
| 37683 | 816 | Pretty.breaks (map (Pretty.str o Lexicon.str_of_token) (#1 (split_last toks))) @ | 
| 27807 | 817 | [Pretty.str "\""]))) | 
| 27801 
0d0adaf0228d
datatype token: maintain range, tuned representation;
 wenzelm parents: 
26678diff
changeset | 818 | end | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 819 | | s => | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 820 | (case indata of | 
| 42218 
8e0a4d500e5b
streamlined token list operations, assuming that the order of union does not matter;
 wenzelm parents: 
42217diff
changeset | 821 | [] => s | 
| 42217 | 822 | | c :: cs => | 
| 823 | let | |
| 824 | val (si, sii) = PROCESSS ctxt warned prods chains stateset i c s; | |
| 825 | val _ = Array.update (stateset, i, si); | |
| 826 | val _ = Array.update (stateset, i + 1, sii); | |
| 827 | in produce ctxt warned prods tags chains stateset (i + 1) cs c end)); | |
| 18 | 828 | |
| 829 | ||
| 42217 | 830 | fun get_trees states = map_filter (fn (_, _, [pt], _, _, _) => SOME pt | _ => NONE) states; | 
| 18 | 831 | |
| 38831 
4933a3dfd745
more careful treatment of context visibility flag wrt. spurious warnings;
 wenzelm parents: 
38713diff
changeset | 832 | fun earley ctxt prods tags chains startsymbol indata = | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 833 | let | 
| 37683 | 834 | val start_tag = | 
| 835 | (case Symtab.lookup tags startsymbol of | |
| 836 | SOME tag => tag | |
| 40959 | 837 |       | NONE => error ("Inner syntax: bad grammar root symbol " ^ quote startsymbol));
 | 
| 37683 | 838 | val S0 = [(~1, 0, [], [Nonterminal (start_tag, 0), Terminal Lexicon.eof], "", 0)]; | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
258diff
changeset | 839 | val s = length indata + 1; | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 840 | val Estate = Array.array (s, []); | 
| 38712 
f7688fd819a8
some attempts to recover Isabelle/ML style from the depths of time;
 wenzelm parents: 
38711diff
changeset | 841 | val _ = Array.update (Estate, 0, S0); | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 842 | in | 
| 38831 
4933a3dfd745
more careful treatment of context visibility flag wrt. spurious warnings;
 wenzelm parents: 
38713diff
changeset | 843 | get_trees | 
| 
4933a3dfd745
more careful treatment of context visibility flag wrt. spurious warnings;
 wenzelm parents: 
38713diff
changeset | 844 | (produce ctxt (Unsynchronized.ref false) prods tags chains Estate 0 indata Lexicon.eof) | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
46diff
changeset | 845 | end; | 
| 18 | 846 | |
| 847 | ||
| 38831 
4933a3dfd745
more careful treatment of context visibility flag wrt. spurious warnings;
 wenzelm parents: 
38713diff
changeset | 848 | fun parse ctxt (Gram {tags, prods, chains, ...}) start toks =
 | 
| 27801 
0d0adaf0228d
datatype token: maintain range, tuned representation;
 wenzelm parents: 
26678diff
changeset | 849 | let | 
| 
0d0adaf0228d
datatype token: maintain range, tuned representation;
 wenzelm parents: 
26678diff
changeset | 850 | val end_pos = | 
| 
0d0adaf0228d
datatype token: maintain range, tuned representation;
 wenzelm parents: 
26678diff
changeset | 851 | (case try List.last toks of | 
| 
0d0adaf0228d
datatype token: maintain range, tuned representation;
 wenzelm parents: 
26678diff
changeset | 852 | NONE => Position.none | 
| 37683 | 853 | | SOME (Lexicon.Token (_, _, (_, end_pos))) => end_pos); | 
| 27801 
0d0adaf0228d
datatype token: maintain range, tuned representation;
 wenzelm parents: 
26678diff
changeset | 854 | val r = | 
| 38831 
4933a3dfd745
more careful treatment of context visibility flag wrt. spurious warnings;
 wenzelm parents: 
38713diff
changeset | 855 | (case earley ctxt prods tags chains start (toks @ [Lexicon.mk_eof end_pos]) of | 
| 37852 
a902f158b4fc
eliminated old-style sys_error/SYS_ERROR in favour of exception Fail -- after careful checking that there is no overlap with existing handling of that;
 wenzelm parents: 
37684diff
changeset | 856 | [] => raise Fail "Inner syntax: no parse trees" | 
| 27801 
0d0adaf0228d
datatype token: maintain range, tuned representation;
 wenzelm parents: 
26678diff
changeset | 857 | | pts => pts); | 
| 
0d0adaf0228d
datatype token: maintain range, tuned representation;
 wenzelm parents: 
26678diff
changeset | 858 | in r end; | 
| 26678 | 859 | |
| 860 | ||
| 861 | fun guess_infix_lr (Gram gram) c = (*based on educated guess*) | |
| 862 | let | |
| 38711 
79d3cbfb4730
keep persistent production tables as immutable vectors -- potential performance improvement on modern hardware;
 wenzelm parents: 
37852diff
changeset | 863 | fun freeze a = map_range (curry Vector.sub a) (Vector.length a); | 
| 27801 
0d0adaf0228d
datatype token: maintain range, tuned representation;
 wenzelm parents: 
26678diff
changeset | 864 | val prods = maps snd (maps snd (freeze (#prods gram))); | 
| 37683 | 865 | fun guess (SOME ([Nonterminal (_, k), | 
| 866 | Terminal (Lexicon.Token (Lexicon.Literal, s, _)), Nonterminal (_, l)], _, j)) = | |
| 26678 | 867 | if k = j andalso l = j + 1 then SOME (s, true, false, j) | 
| 868 | else if k = j + 1 then if l = j then SOME (s, false, true, j) | |
| 869 | else if l = j + 1 then SOME (s, false, false, j) | |
| 870 | else NONE | |
| 871 | else NONE | |
| 872 | | guess _ = NONE; | |
| 873 | in guess (find_first (fn (_, s, _) => s = c) prods) end; | |
| 18 | 874 | |
| 875 | end; |