author | wenzelm |
Fri, 19 Aug 1994 15:36:23 +0200 | |
changeset 552 | fc92fac8b0de |
parent 395 | 712dceb1ecc7 |
child 624 | 33b9b5da3e6f |
permissions | -rw-r--r-- |
18 | 1 |
(* Title: Pure/Syntax/parser.ML |
46 | 2 |
ID: $Id$ |
345 | 3 |
Author: Sonia Mahjoub, Markus Wenzel and Carsten Clasohm, TU Muenchen |
18 | 4 |
|
552 | 5 |
Isabelle's main parser (used for terms and types). |
18 | 6 |
*) |
7 |
||
8 |
signature PARSER = |
|
9 |
sig |
|
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
10 |
structure Lexicon: LEXICON |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
11 |
structure SynExt: SYN_EXT |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
12 |
local open Lexicon SynExt SynExt.Ast in |
18 | 13 |
type gram |
14 |
val empty_gram: gram |
|
552 | 15 |
val extend_gram: gram -> xprod list -> gram |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
16 |
val merge_grams: gram -> gram -> gram |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
17 |
val pretty_gram: gram -> Pretty.T list |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
18 |
datatype parsetree = |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
19 |
Node of string * parsetree list | |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
20 |
Tip of token |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
21 |
val parse: gram -> string -> token list -> parsetree list |
18 | 22 |
end |
23 |
end; |
|
24 |
||
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
25 |
functor ParserFun(structure Symtab: SYMTAB and Lexicon: LEXICON |
552 | 26 |
and SynExt: SYN_EXT): PARSER = |
18 | 27 |
struct |
28 |
||
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
29 |
structure Pretty = SynExt.Ast.Pretty; |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
30 |
structure Lexicon = Lexicon; |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
31 |
structure SynExt = SynExt; |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
32 |
open Lexicon SynExt; |
18 | 33 |
|
34 |
||
35 |
(** datatype gram **) |
|
36 |
||
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
37 |
datatype symb = |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
38 |
Terminal of token | |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
39 |
Nonterminal of string * int; |
18 | 40 |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
41 |
datatype refsymb = Term of token | Nonterm of rhss_ref * int |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
42 |
(*reference to production list instead of name*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
43 |
and gram = Gram of (string * (symb list * string * int)) list * |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
44 |
(string * rhss_ref) list |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
45 |
withtype rhss_ref = (token option * (refsymb list * string * int) list) list ref |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
46 |
(*lookahead table: token and productions*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
47 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
48 |
(* convert productions to reference grammar with lookaheads and eliminate |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
49 |
chain productions *) |
373
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
50 |
fun mk_gram prods = |
395
712dceb1ecc7
"Building new grammar" message is no longer displayed by empty_gram
clasohm
parents:
377
diff
changeset
|
51 |
let (*get reference on list of all possible rhss for nonterminal lhs |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
52 |
(if it doesn't exist a new one is created and added to the nonterminal |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
53 |
list)*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
54 |
fun get_rhss ref_prods lhs = |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
55 |
case assoc (ref_prods, lhs) of |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
56 |
None => |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
57 |
let val l = ref [(None, [])] |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
58 |
in (l, (lhs, l) :: ref_prods) end |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
59 |
| Some l => (l, ref_prods); |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
60 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
61 |
(*convert symb list to refsymb list*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
62 |
fun mk_refsymbs ref_prods [] rs = (rs, ref_prods) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
63 |
| mk_refsymbs ref_prods (Terminal tk :: symbs) rs = |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
64 |
mk_refsymbs ref_prods symbs (rs @ [Term tk]) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
65 |
| mk_refsymbs ref_prods (Nonterminal (name, prec) :: symbs) rs = |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
66 |
let val (rhss, ref_prods') = get_rhss ref_prods name |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
67 |
in mk_refsymbs ref_prods' symbs (rs @ [Nonterm (rhss, prec)]) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
68 |
end; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
69 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
70 |
(*convert prod list to (string * rhss_ref) list |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
71 |
without computing lookaheads*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
72 |
fun mk_ref_gram [] ref_prods = ref_prods |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
73 |
| mk_ref_gram ((lhs, (rhs, name, prec)) :: ps) ref_prods = |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
74 |
let val (rhs', ref_prods') = get_rhss ref_prods lhs; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
75 |
val (dummy, rhss) = hd (!rhs'); |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
76 |
val (ref_symbs, ref_prods'') = mk_refsymbs ref_prods' rhs []; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
77 |
in rhs' := [(dummy, (ref_symbs, name, prec) :: rhss)]; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
78 |
mk_ref_gram ps ref_prods'' |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
79 |
end; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
80 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
81 |
(*eliminate chain productions*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
82 |
fun elim_chain ref_gram = |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
83 |
let (*make a list of pairs representing chain productions and delete |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
84 |
these productions*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
85 |
fun list_chain [] = [] |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
86 |
| list_chain ((_, rhss_ref) :: ps) = |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
87 |
let fun lists [] new_rhss chains = (new_rhss, chains) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
88 |
| lists (([Nonterm (id2, ~1)], _, ~1) :: rs) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
89 |
new_rhss chains = |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
90 |
lists rs new_rhss ((rhss_ref, id2) :: chains) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
91 |
| lists (rhs :: rs) new_rhss chains = |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
92 |
lists rs (rhs :: new_rhss) chains; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
93 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
94 |
val (dummy, rhss) = hd (!rhss_ref); |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
95 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
96 |
val (new_rhss, chains) = lists rhss [] []; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
97 |
in rhss_ref := [(dummy, new_rhss)]; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
98 |
chains @ (list_chain ps) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
99 |
end; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
100 |
|
345 | 101 |
(*convert a list of pairs to an association list |
102 |
by using the first element as the key*) |
|
103 |
fun mk_assoc pairs = |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
104 |
let fun doit [] result = result |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
105 |
| doit ((id1, id2) :: ps) result = |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
106 |
doit ps |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
107 |
(overwrite (result, (id1, id2 :: (assocs result id1)))); |
345 | 108 |
in doit pairs [] end; |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
109 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
110 |
(*replace reference by list of rhss in chain pairs*) |
345 | 111 |
fun deref (id1, ids) = |
112 |
let fun deref1 [] = [] |
|
113 |
| deref1 (id :: ids) = |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
114 |
let val (_, rhss) = hd (!id); |
345 | 115 |
in rhss @ (deref1 ids) end; |
116 |
in (id1, deref1 ids) end; |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
117 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
118 |
val chain_pairs = |
345 | 119 |
map deref (transitive_closure (mk_assoc (list_chain ref_gram))); |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
120 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
121 |
(*add new rhss to productions*) |
345 | 122 |
fun elim (rhss_ref, rhss) = |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
123 |
let val (dummy, old_rhss) = hd (!rhss_ref); |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
124 |
in rhss_ref := [(dummy, old_rhss @ rhss)] end; |
345 | 125 |
in map elim chain_pairs; |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
126 |
ref_gram |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
127 |
end; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
128 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
129 |
val ref_gram = elim_chain (mk_ref_gram prods []); |
18 | 130 |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
131 |
(*make a list of all lambda NTs |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
132 |
(i.e. nonterminals that can produce lambda*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
133 |
val lambdas = |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
134 |
let fun lambda [] result = result |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
135 |
| lambda ((_, rhss_ref) :: nts) result = |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
136 |
if rhss_ref mem result then |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
137 |
lambda nts result |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
138 |
else |
345 | 139 |
let (*list all NTs that can be produced by a rhs |
140 |
containing only lambda NTs*) |
|
141 |
fun only_lambdas [] result = result |
|
142 |
| only_lambdas ((_, rhss_ref) :: ps) result = |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
143 |
let fun only (symbs, _, _) = |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
144 |
forall (fn (Nonterm (id, _)) => id mem result |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
145 |
| (Term _) => false) symbs; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
146 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
147 |
val (_, rhss) = hd (!rhss_ref); |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
148 |
in if not (rhss_ref mem result) andalso |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
149 |
exists only rhss then |
345 | 150 |
only_lambdas ref_gram (rhss_ref :: result) |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
151 |
else |
345 | 152 |
only_lambdas ps result |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
153 |
end; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
154 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
155 |
val (_, rhss) = hd (!rhss_ref); |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
156 |
in if exists (fn (symbs, _, _) => null symbs) rhss |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
157 |
then lambda nts |
345 | 158 |
(only_lambdas ref_gram (rhss_ref :: result)) |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
159 |
else lambda nts result |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
160 |
end; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
161 |
in lambda ref_gram [] end; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
162 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
163 |
(*list all nonterminals on which the lookahead depends (due to lambda |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
164 |
NTs this can be more than one) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
165 |
and report if there is a terminal at the 'start'*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
166 |
fun rhss_start [] skipped = (None, skipped) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
167 |
| rhss_start (Term tk :: _) skipped = (Some tk, skipped) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
168 |
| rhss_start (Nonterm (rhss_ref, _) :: rest) skipped = |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
169 |
if rhss_ref mem lambdas then |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
170 |
rhss_start rest (rhss_ref ins skipped) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
171 |
else |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
172 |
(None, rhss_ref ins skipped); |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
173 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
174 |
(*list all terminals that can start the given rhss*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
175 |
fun look_rhss starts rhss_ref = |
377
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
176 |
let fun look [] _ result = result |
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
177 |
| look ((symbs, _, _) :: todos) done result = |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
178 |
let val (start_token, skipped) = rhss_start symbs []; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
179 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
180 |
(*process all nonterminals on which the lookahead |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
181 |
depends and build the new todo and done lists for |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
182 |
the look function*) |
377
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
183 |
fun look2 [] todos result = |
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
184 |
look todos (done union skipped) result |
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
185 |
| look2 (rhss_ref :: ls) todos result = |
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
186 |
if rhss_ref mem done then look2 ls todos result |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
187 |
else case assoc (starts, rhss_ref) of |
377
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
188 |
Some tks => look2 ls todos (tks union result) |
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
189 |
| None => |
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
190 |
let val (_, rhss) = hd (!rhss_ref); |
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
191 |
in look2 ls (rhss @ todos) result end; |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
192 |
in case start_token of |
377
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
193 |
Some tk => look2 skipped todos (start_token ins result) |
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
194 |
| None => look2 skipped todos result |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
195 |
end; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
196 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
197 |
val (_, rhss) = hd (!rhss_ref); |
377
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
198 |
in look rhss [rhss_ref] [] end; |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
199 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
200 |
(*make a table that contains all possible starting terminals |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
201 |
for each nonterminal*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
202 |
fun mk_starts [] starts = starts |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
203 |
| mk_starts ((_, rhss_ref) :: ps) starts = |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
204 |
mk_starts ps ((rhss_ref, look_rhss starts rhss_ref) :: starts); |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
205 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
206 |
val starts = mk_starts ref_gram []; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
207 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
208 |
(*add list of allowed starting tokens to productions*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
209 |
fun mk_lookahead (_, rhss_ref) = |
377
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
210 |
let (*compares two values of type token option |
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
211 |
(used for speed reasons)*) |
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
212 |
fun matching_opt_tks (Some tk1, Some tk2) = |
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
213 |
matching_tokens (tk1, tk2) |
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
214 |
| matching_opt_tks _ = false; |
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
215 |
|
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
216 |
(*add item to lookahead list (a list containing pairs of token and |
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
217 |
rhss that can be started with it)*) |
345 | 218 |
fun add_start new_rhs tokens table = |
219 |
let fun add [] [] = [] |
|
220 |
| add (tk :: tks) [] = |
|
221 |
(tk, [new_rhs]) :: (add tks []) |
|
222 |
| add tokens ((tk, rhss) :: ss) = |
|
377
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
223 |
if gen_mem matching_opt_tks (tk, tokens) then |
345 | 224 |
(tk, new_rhs :: rhss) :: (add (tokens \ tk) ss) |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
225 |
else |
345 | 226 |
(tk, rhss) :: (add tokens ss); |
227 |
in add tokens table end; |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
228 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
229 |
(*combine all lookaheads of a list of nonterminals*) |
345 | 230 |
fun combine_starts rhss_refs = |
377
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
231 |
foldr (gen_union matching_opt_tks) |
345 | 232 |
((map (fn rhss_ref => let val Some tks = assoc (starts, rhss_ref) |
233 |
in tks end) rhss_refs), []); |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
234 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
235 |
(*get lookahead for a rhs and update lhs' lookahead list*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
236 |
fun look_rhss [] table = table |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
237 |
| look_rhss ((rhs as (symbs, id, prec)) :: rs) table = |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
238 |
let val (start_token, skipped) = rhss_start symbs []; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
239 |
val starts = case start_token of |
377
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
240 |
Some tk => gen_ins matching_opt_tks |
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
241 |
(Some tk, combine_starts skipped) |
345 | 242 |
| None => if skipped subset lambdas then |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
243 |
[None] |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
244 |
else |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
245 |
combine_starts skipped; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
246 |
in look_rhss rs (add_start rhs starts table) end; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
247 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
248 |
val (_, rhss) = hd (!rhss_ref); |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
249 |
in rhss_ref := look_rhss rhss [] end; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
250 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
251 |
in map mk_lookahead ref_gram; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
252 |
Gram (prods, ref_gram) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
253 |
end; |
18 | 254 |
|
255 |
||
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
256 |
(* empty, extend, merge grams *) |
18 | 257 |
|
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
258 |
val empty_gram = mk_gram []; |
18 | 259 |
|
552 | 260 |
fun extend_gram (gram1 as Gram (prods1, _)) xprods2 = |
18 | 261 |
let |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
262 |
fun symb_of (Delim s) = Some (Terminal (Token s)) |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
263 |
| symb_of (Argument (s, p)) = |
18 | 264 |
(case predef_term s of |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
265 |
None => Some (Nonterminal (s, p)) |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
266 |
| Some tk => Some (Terminal tk)) |
18 | 267 |
| symb_of _ = None; |
268 |
||
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
269 |
fun prod_of (XProd (lhs, xsymbs, const, pri)) = |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
270 |
(lhs, (mapfilter symb_of xsymbs, const, pri)); |
18 | 271 |
|
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
272 |
val prods2 = distinct (map prod_of xprods2); |
18 | 273 |
in |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
274 |
if prods2 subset prods1 then gram1 |
395
712dceb1ecc7
"Building new grammar" message is no longer displayed by empty_gram
clasohm
parents:
377
diff
changeset
|
275 |
else (writeln "Building new grammar..."; |
712dceb1ecc7
"Building new grammar" message is no longer displayed by empty_gram
clasohm
parents:
377
diff
changeset
|
276 |
mk_gram (extend_list prods1 prods2)) |
18 | 277 |
end; |
278 |
||
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
279 |
fun merge_grams (gram1 as Gram (prods1, _)) (gram2 as Gram (prods2, _)) = |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
280 |
if prods2 subset prods1 then gram1 |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
281 |
else if prods1 subset prods2 then gram2 |
395
712dceb1ecc7
"Building new grammar" message is no longer displayed by empty_gram
clasohm
parents:
377
diff
changeset
|
282 |
else (writeln "Building new grammar..."; |
712dceb1ecc7
"Building new grammar" message is no longer displayed by empty_gram
clasohm
parents:
377
diff
changeset
|
283 |
mk_gram (merge_lists prods1 prods2)); |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
284 |
|
18 | 285 |
|
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
286 |
(* pretty_gram *) |
18 | 287 |
|
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
288 |
fun pretty_gram (Gram (prods, _)) = |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
289 |
let |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
290 |
fun pretty_name name = [Pretty.str (name ^ " =")]; |
18 | 291 |
|
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
292 |
fun pretty_symb (Terminal (Token s)) = Pretty.str (quote s) |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
293 |
| pretty_symb (Terminal tok) = Pretty.str (str_of_token tok) |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
294 |
| pretty_symb (Nonterminal (s, p)) = |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
295 |
Pretty.str (s ^ "[" ^ string_of_int p ^ "]"); |
18 | 296 |
|
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
297 |
fun pretty_const "" = [] |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
298 |
| pretty_const c = [Pretty.str ("=> " ^ quote c)]; |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
299 |
|
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
300 |
fun pretty_pri p = [Pretty.str ("(" ^ string_of_int p ^ ")")]; |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
301 |
|
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
302 |
fun pretty_prod (name, (symbs, const, pri)) = |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
303 |
Pretty.block (Pretty.breaks (pretty_name name @ |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
304 |
map pretty_symb symbs @ pretty_const const @ pretty_pri pri)); |
18 | 305 |
in |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
306 |
map pretty_prod prods |
18 | 307 |
end; |
308 |
||
309 |
||
310 |
||
311 |
(** parse **) |
|
312 |
||
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
313 |
datatype parsetree = |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
314 |
Node of string * parsetree list | |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
315 |
Tip of token; |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
316 |
|
18 | 317 |
type state = |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
318 |
rhss_ref * int (*lhs: identification and production precedence*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
319 |
* parsetree list (*already parsed nonterminals on rhs*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
320 |
* refsymb list (*rest of rhs*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
321 |
* string (*name of production*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
322 |
* int; (*index for previous state list*) |
18 | 323 |
|
324 |
type earleystate = state list Array.array; |
|
325 |
||
326 |
||
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
327 |
(*Get all rhss with precedence >= minPrec*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
328 |
fun getRHS minPrec = filter (fn (_, _, prec:int) => prec >= minPrec); |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
329 |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
330 |
(*Get all rhss with precedence >= minPrec and < maxPrec*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
331 |
fun getRHS' minPrec maxPrec = |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
332 |
filter (fn (_, _, prec:int) => prec >= minPrec andalso prec < maxPrec); |
18 | 333 |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
334 |
(*Make states using a list of rhss*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
335 |
fun mkStates i minPrec lhsID rhss = |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
336 |
let fun mkState (rhs, id, prodPrec) = (lhsID, prodPrec, [], rhs, id, i); |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
337 |
in map mkState rhss end; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
338 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
339 |
(*Add parse tree to list and eliminate duplicates |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
340 |
saving the maximum precedence*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
341 |
fun conc (t, prec:int) [] = (None, [(t, prec)]) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
342 |
| conc (t, prec) ((t', prec') :: ts) = |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
343 |
if t = t' then |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
344 |
(Some prec', if prec' >= prec then (t', prec') :: ts |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
345 |
else (t, prec) :: ts) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
346 |
else |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
347 |
let val (n, ts') = conc (t, prec) ts |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
348 |
in (n, (t', prec') :: ts') end; |
18 | 349 |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
350 |
(*Update entry in used*) |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
351 |
fun update_tree ((B, (i, ts)) :: used) (A, t) = |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
352 |
if A = B then |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
353 |
let val (n, ts') = conc t ts |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
354 |
in ((A, (i, ts')) :: used, n) end |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
355 |
else |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
356 |
let val (used', n) = update_tree used (A, t) |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
357 |
in ((B, (i, ts)) :: used', n) end; |
18 | 358 |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
359 |
(*Replace entry in used*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
360 |
fun update_index (A, prec) used = |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
361 |
let fun update((hd as (B, (_, ts))) :: used, used') = |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
362 |
if A = B |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
363 |
then used' @ ((A, (prec, ts)) :: used) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
364 |
else update (used, hd :: used') |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
365 |
in update (used, []) end; |
18 | 366 |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
367 |
fun getS A maxPrec Si = |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
368 |
filter |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
369 |
(fn (_, _, _, Nonterm (B, prec) :: _, _, _) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
370 |
=> A = B andalso prec <= maxPrec |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
371 |
| _ => false) Si; |
18 | 372 |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
373 |
fun getS' A maxPrec minPrec Si = |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
374 |
filter |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
375 |
(fn (_, _, _, Nonterm (B, prec) :: _, _, _) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
376 |
=> A = B andalso prec > minPrec andalso prec <= maxPrec |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
377 |
| _ => false) Si; |
18 | 378 |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
379 |
fun getStates Estate i ii A maxPrec = |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
380 |
filter |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
381 |
(fn (_, _, _, Nonterm (B, prec) :: _, _, _) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
382 |
=> A = B andalso prec <= maxPrec |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
383 |
| _ => false) |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
384 |
(Array.sub (Estate, ii)); |
18 | 385 |
|
386 |
||
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
387 |
fun movedot_term (A, j, ts, Term a :: sa, id, i) c = |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
388 |
if valued_token c then |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
389 |
(A, j, (ts @ [Tip c]), sa, id, i) |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
390 |
else (A, j, ts, sa, id, i); |
18 | 391 |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
392 |
fun movedot_nonterm ts (A, j, tss, Nonterm _ :: sa, id, i) = |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
393 |
(A, j, tss @ ts, sa, id, i); |
18 | 394 |
|
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
395 |
fun movedot_lambda _ [] = [] |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
396 |
| movedot_lambda (B, j, tss, Nonterm (A, k) :: sa, id, i) ((t, ki) :: ts) = |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
397 |
if k <= ki then |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
398 |
(B, j, tss @ t, sa, id, i) :: |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
399 |
movedot_lambda (B, j, tss, Nonterm (A, k) :: sa, id, i) ts |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
400 |
else movedot_lambda (B, j, tss, Nonterm (A, k) :: sa, id, i) ts; |
18 | 401 |
|
402 |
||
403 |
||
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
404 |
fun PROCESSS Estate i c states = |
18 | 405 |
let |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
406 |
fun get_lookahead rhss_ref = token_assoc (!rhss_ref, c); |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
407 |
|
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
408 |
fun processS used [] (Si, Sii) = (Si, Sii) |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
409 |
| processS used (S :: States) (Si, Sii) = |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
410 |
(case S of |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
411 |
(_, _, _, Nonterm (rhss_ref, minPrec) :: _, _, _) => |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
412 |
let (*predictor operation*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
413 |
val (used_new, States_new) = |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
414 |
(case assoc (used, rhss_ref) of |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
415 |
Some (usedPrec, l) => (*nonterminal has been processed*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
416 |
if usedPrec <= minPrec then |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
417 |
(*wanted precedence has been processed*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
418 |
(used, movedot_lambda S l) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
419 |
else (*wanted precedence hasn't been parsed yet*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
420 |
let val rhss = get_lookahead rhss_ref; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
421 |
val States' = mkStates i minPrec rhss_ref |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
422 |
(getRHS' minPrec usedPrec rhss); |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
423 |
in (update_index (rhss_ref, minPrec) used, |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
424 |
movedot_lambda S l @ States') |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
425 |
end |
18 | 426 |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
427 |
| None => (*nonterminal is parsed for the first time*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
428 |
let val rhss = get_lookahead rhss_ref; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
429 |
val States' = mkStates i minPrec rhss_ref |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
430 |
(getRHS minPrec rhss); |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
431 |
in ((rhss_ref, (minPrec, [])) :: used, States') end) |
18 | 432 |
in |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
433 |
processS used_new (States_new @ States) (S :: Si, Sii) |
18 | 434 |
end |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
435 |
| (_, _, _, Term a :: _, _, _) => (*scanner operation*) |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
436 |
processS used States |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
437 |
(S :: Si, |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
438 |
if matching_tokens (a, c) then movedot_term S c :: Sii else Sii) |
18 | 439 |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
440 |
| (A, prec, ts, [], id, j) => (*completer operation*) |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
441 |
let |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
442 |
val tt = if id = "" then ts else [Node (id, ts)] |
18 | 443 |
in |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
444 |
if j = i then (*lambda production?*) |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
445 |
let |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
446 |
val (used', O) = update_tree used (A, (tt, prec)); |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
447 |
in |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
448 |
(case O of |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
449 |
None => |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
450 |
let |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
451 |
val Slist = getS A prec Si; |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
452 |
val States' = map (movedot_nonterm tt) Slist; |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
453 |
in |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
454 |
processS used' (States' @ States) (S :: Si, Sii) |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
455 |
end |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
456 |
| Some n => |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
457 |
if n >= prec then |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
458 |
processS used' States (S :: Si, Sii) |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
459 |
else |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
460 |
let |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
461 |
val Slist = getS' A prec n Si; |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
462 |
val States' = map (movedot_nonterm tt) Slist; |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
463 |
in |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
464 |
processS used' (States' @ States) (S :: Si, Sii) |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
465 |
end) |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
466 |
end |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
467 |
else |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
468 |
processS used |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
469 |
(map (movedot_nonterm tt) (getStates Estate i j A prec) @ States) |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
470 |
(S :: Si, Sii) |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
471 |
end) |
18 | 472 |
in |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
473 |
processS [] states ([], []) |
18 | 474 |
end; |
475 |
||
476 |
||
477 |
||
362 | 478 |
fun syntax_error toks allowed = |
479 |
error |
|
480 |
((if toks = [] then |
|
481 |
"error: unexpected end of input\n" |
|
482 |
else |
|
367
b734dc03067e
syntax_error now removes duplicate tokens in its output and doesn't
clasohm
parents:
362
diff
changeset
|
483 |
"Syntax error at: " ^ quote (space_implode " " (map str_of_token |
b734dc03067e
syntax_error now removes duplicate tokens in its output and doesn't
clasohm
parents:
362
diff
changeset
|
484 |
((rev o tl o rev) toks))) |
362 | 485 |
^ "\n") |
486 |
^ "Expected tokens: " |
|
487 |
^ space_implode ", " (map (quote o str_of_token) allowed)); |
|
18 | 488 |
|
362 | 489 |
fun produce stateset i indata prev_token = |
490 |
(*the argument prev_token is only used for error messages*) |
|
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
491 |
(case Array.sub (stateset, i) of |
373
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
492 |
[] => let (*similar to token_assoc but does not automatically |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
493 |
include 'None' key*) |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
494 |
fun token_assoc2 (list, key) = |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
495 |
let fun assoc [] = [] |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
496 |
| assoc ((keyi, xi) :: pairs) = |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
497 |
if is_some keyi andalso |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
498 |
matching_tokens (the keyi, key) then |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
499 |
(assoc pairs) @ xi |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
500 |
else assoc pairs; |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
501 |
in assoc list end; |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
502 |
|
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
503 |
(*test if tk is a lookahead for a given minimum precedence*) |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
504 |
fun reduction minPrec tk _ (Term _ :: _, _, prec:int) = |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
505 |
if prec >= minPrec then true |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
506 |
else false |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
507 |
| reduction minPrec tk checked |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
508 |
(Nonterm (rhss_ref, NTprec)::_,_, prec) = |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
509 |
if prec >= minPrec andalso not (rhss_ref mem checked) then |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
510 |
exists (reduction NTprec tk (rhss_ref :: checked)) |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
511 |
(token_assoc2 (!rhss_ref, tk)) |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
512 |
else false; |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
513 |
|
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
514 |
(*compute a list of allowed starting tokens |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
515 |
for a list of nonterminals considering precedence*) |
362 | 516 |
fun get_starts [] = [] |
372
40d565e51dea
syntax_error now checks precedences when computing expected tokens
clasohm
parents:
367
diff
changeset
|
517 |
| get_starts ((rhss_ref, minPrec:int) :: refs) = |
362 | 518 |
let fun get [] = [] |
373
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
519 |
| get ((Some tk, prods) :: rhss) = |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
520 |
if exists (reduction minPrec tk [rhss_ref]) prods |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
521 |
then tk :: (get rhss) |
372
40d565e51dea
syntax_error now checks precedences when computing expected tokens
clasohm
parents:
367
diff
changeset
|
522 |
else get rhss |
362 | 523 |
| get ((None, _) :: rhss) = |
524 |
get rhss; |
|
525 |
in (get (!rhss_ref)) union (get_starts refs) end; |
|
526 |
||
372
40d565e51dea
syntax_error now checks precedences when computing expected tokens
clasohm
parents:
367
diff
changeset
|
527 |
val NTs = map (fn (_, _, _, Nonterm (a, prec) :: _, _, _) => |
40d565e51dea
syntax_error now checks precedences when computing expected tokens
clasohm
parents:
367
diff
changeset
|
528 |
(a, prec)) |
362 | 529 |
(filter (fn (_, _, _, Nonterm _ :: _, _, _) => true |
530 |
| _ => false) (Array.sub (stateset, i-1))); |
|
367
b734dc03067e
syntax_error now removes duplicate tokens in its output and doesn't
clasohm
parents:
362
diff
changeset
|
531 |
val allowed = distinct (get_starts NTs @ |
362 | 532 |
(map (fn (_, _, _, Term a :: _, _, _) => a) |
533 |
(filter (fn (_, _, _, Term _ :: _, _, _) => true |
|
367
b734dc03067e
syntax_error now removes duplicate tokens in its output and doesn't
clasohm
parents:
362
diff
changeset
|
534 |
| _ => false) (Array.sub (stateset, i-1))))); |
362 | 535 |
in syntax_error (if prev_token = EndToken then indata |
536 |
else prev_token :: indata) allowed |
|
537 |
end |
|
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
538 |
| s => |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
539 |
(case indata of |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
540 |
[] => Array.sub (stateset, i) |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
541 |
| c :: cs => |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
542 |
let |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
543 |
val (si, sii) = PROCESSS stateset i c s; |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
544 |
in |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
545 |
Array.update (stateset, i, si); |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
546 |
Array.update (stateset, i + 1, sii); |
362 | 547 |
produce stateset (i + 1) cs c |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
548 |
end)); |
18 | 549 |
|
550 |
||
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
551 |
val get_trees = mapfilter (fn (_, _, [pt], _, _, _) => Some pt | _ => None); |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
552 |
|
18 | 553 |
|
554 |
fun earley grammar startsymbol indata = |
|
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
555 |
let |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
556 |
val rhss_ref = case assoc (grammar, startsymbol) of |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
557 |
Some r => r |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
558 |
| None => error ("parse: Unknown startsymbol " ^ |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
559 |
quote startsymbol); |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
560 |
val S0 = [(ref [], 0, [], [Nonterm (rhss_ref, 0), Term EndToken], "", 0)]; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
561 |
val s = length indata + 1; |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
562 |
val Estate = Array.array (s, []); |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
563 |
in |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
564 |
Array.update (Estate, 0, S0); |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
565 |
let |
362 | 566 |
val l = produce Estate 0 indata EndToken(*dummy*); |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
567 |
val p_trees = get_trees l; |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
568 |
in p_trees end |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
569 |
end; |
18 | 570 |
|
571 |
||
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
572 |
fun parse (Gram (_, prod_tab)) start toks = |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
573 |
(case earley prod_tab start toks of |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
574 |
[] => sys_error "parse: no parse trees" |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
575 |
| pts => pts); |
18 | 576 |
|
577 |
end; |
|
578 |