26 |
26 |
27 (** datatype gram **) |
27 (** datatype gram **) |
28 |
28 |
29 (*production for the NTs are stored in a vector |
29 (*production for the NTs are stored in a vector |
30 so we can identify NTs by their index*) |
30 so we can identify NTs by their index*) |
31 type nt_tag = int; |
31 type nt = int; |
|
32 |
|
33 type nts = Inttab.set; |
|
34 val nts_empty: nts = Inttab.empty; |
|
35 val nts_merge: nts * nts -> nts = Inttab.merge (K true); |
|
36 fun nts_insert nt : nts -> nts = Inttab.insert_set nt; |
|
37 fun nts_member (nts: nts) = Inttab.defined nts; |
|
38 fun nts_fold f (nts: nts) = Inttab.fold (f o #1) nts; |
32 |
39 |
33 datatype symb = |
40 datatype symb = |
34 Terminal of Lexicon.token |
41 Terminal of Lexicon.token |
35 | Nonterminal of nt_tag * int; (*(tag, precedence)*) |
42 | Nonterminal of nt * int; (*(tag, precedence)*) |
36 |
43 |
37 type nt_gram = |
44 type nt_gram = |
38 ((nt_tag list * Lexicon.token list) * |
45 ((nts * Lexicon.token list) * |
39 (Lexicon.token option * (symb list * string * int) list) list); |
46 (Lexicon.token option * (symb list * string * int) list) list); |
40 (*(([dependent_nts], [start_tokens]), [(start_token, [(rhs, name, prio)])])*) |
47 (*(([dependent_nts], [start_tokens]), [(start_token, [(rhs, name, prio)])])*) |
41 (*depent_nts is a list of all NTs whose lookahead depends on this NT's lookahead*) |
48 (*depent_nts is a list of all NTs whose lookahead depends on this NT's lookahead*) |
42 |
49 |
43 type tags = nt_tag Symtab.table; |
50 val nt_gram_empty: nt_gram = ((nts_empty, []), []); |
|
51 |
|
52 type tags = nt Symtab.table; |
44 val tags_empty: tags = Symtab.empty; |
53 val tags_empty: tags = Symtab.empty; |
45 fun tags_content (tags: tags) = sort_by #1 (Symtab.dest tags); |
54 fun tags_content (tags: tags) = sort_by #1 (Symtab.dest tags); |
46 fun tags_lookup (tags: tags) = Symtab.lookup tags; |
55 fun tags_lookup (tags: tags) = Symtab.lookup tags; |
47 fun tags_insert tag (tags: tags) = Symtab.update_new tag tags; |
56 fun tags_insert tag (tags: tags) = Symtab.update_new tag tags; |
48 fun tags_name (tags: tags) = the o Inttab.lookup (Inttab.make (map swap (Symtab.dest tags))); |
57 fun tags_name (tags: tags) = the o Inttab.lookup (Inttab.make (map swap (Symtab.dest tags))); |
55 fun chains_member (chains: chains) = Int_Graph.is_edge chains; |
64 fun chains_member (chains: chains) = Int_Graph.is_edge chains; |
56 fun chains_declare nt : chains -> chains = Int_Graph.default_node (nt, ()); |
65 fun chains_declare nt : chains -> chains = Int_Graph.default_node (nt, ()); |
57 fun chains_insert (from, to) = |
66 fun chains_insert (from, to) = |
58 chains_declare from #> chains_declare to #> Int_Graph.add_edge (from, to); |
67 chains_declare from #> chains_declare to #> Int_Graph.add_edge (from, to); |
59 |
68 |
60 type lambdas = Inttab.set; |
|
61 val lambdas_empty: lambdas = Inttab.empty; |
|
62 fun lambdas_member (lambdas: lambdas) = Inttab.defined lambdas; |
|
63 fun lambdas_insert nt : lambdas -> lambdas = Inttab.insert_set nt; |
|
64 fun lambdas_fold f (lambdas: lambdas) = Inttab.fold (f o #1) lambdas; |
|
65 |
|
66 datatype gram = |
69 datatype gram = |
67 Gram of |
70 Gram of |
68 {nt_count: int, |
71 {nt_count: int, |
69 prod_count: int, |
72 prod_count: int, |
70 tags: tags, |
73 tags: tags, |
71 chains: chains, |
74 chains: chains, |
72 lambdas: lambdas, |
75 lambdas: nts, |
73 prods: nt_gram Vector.vector}; |
76 prods: nt_gram Vector.vector}; |
74 (*"tags" is used to map NT names (i.e. strings) to tags; |
77 (*"tags" is used to map NT names (i.e. strings) to tags; |
75 chain productions are not stored as normal productions |
78 chain productions are not stored as normal productions |
76 but instead as an entry in "chains": from -> to; |
79 but instead as an entry in "chains": from -> to; |
77 lambda productions are stored as normal productions |
80 lambda productions are stored as normal productions |
128 end; |
131 end; |
129 |
132 |
130 val tos = chains_all_succs chains' [lhs]; |
133 val tos = chains_all_succs chains' [lhs]; |
131 in |
134 in |
132 (copy_lookahead tos [], |
135 (copy_lookahead tos [], |
133 lambdas |> lambdas_member lambdas lhs ? fold lambdas_insert tos) |
136 lambdas |> nts_member lambdas lhs ? fold nts_insert tos) |
134 end; |
137 end; |
135 |
138 |
136 (*test if new production can produce lambda |
139 (*test if new production can produce lambda |
137 (rhs must either be empty or only consist of lambda NTs)*) |
140 (rhs must either be empty or only consist of lambda NTs)*) |
138 val new_lambdas = |
141 val new_lambdas = |
139 if forall |
142 if forall |
140 (fn Nonterminal (id, _) => lambdas_member lambdas' id |
143 (fn Nonterminal (id, _) => nts_member lambdas' id |
141 | Terminal _ => false) rhs |
144 | Terminal _ => false) rhs |
142 then SOME (filter_out (lambdas_member lambdas') (chains_all_succs chains' [lhs])) |
145 then SOME (filter_out (nts_member lambdas') (chains_all_succs chains' [lhs])) |
143 else NONE; |
146 else NONE; |
144 val lambdas'' = fold lambdas_insert (these new_lambdas) lambdas'; |
147 val lambdas'' = fold nts_insert (these new_lambdas) lambdas'; |
145 |
148 |
146 (*list optional terminal and all nonterminals on which the lookahead |
149 (*list optional terminal and all nonterminals on which the lookahead |
147 of a production depends*) |
150 of a production depends*) |
148 fun lookahead_dependency _ [] nts = (NONE, nts) |
151 fun lookahead_dependency _ [] nts = (NONE, nts) |
149 | lookahead_dependency _ (Terminal tk :: _) nts = (SOME tk, nts) |
152 | lookahead_dependency _ (Terminal tk :: _) nts = (SOME tk, nts) |
150 | lookahead_dependency lambdas (Nonterminal (nt, _) :: symbs) nts = |
153 | lookahead_dependency lambdas (Nonterminal (nt, _) :: symbs) nts = |
151 if lambdas_member lambdas nt then |
154 if nts_member lambdas nt then |
152 lookahead_dependency lambdas symbs (nt :: nts) |
155 lookahead_dependency lambdas symbs (nt :: nts) |
153 else (NONE, nt :: nts); |
156 else (NONE, nt :: nts); |
154 |
157 |
155 (*get all known starting tokens for a nonterminal*) |
158 (*get all known starting tokens for a nonterminal*) |
156 fun starts_for_nt nt = snd (fst (Array.sub (prods, nt))); |
159 fun starts_for_nt nt = snd (fst (Array.sub (prods, nt))); |
172 nt_dependencies added_tks nt_prods = |
175 nt_dependencies added_tks nt_prods = |
173 let val (tk, nts) = lookahead_dependency lambdas rhs [] in |
176 let val (tk, nts) = lookahead_dependency lambdas rhs [] in |
174 if member (op =) nts l then (*update production's lookahead*) |
177 if member (op =) nts l then (*update production's lookahead*) |
175 let |
178 let |
176 val new_lambda = |
179 val new_lambda = |
177 is_none tk andalso forall (lambdas_member lambdas) nts; |
180 is_none tk andalso forall (nts_member lambdas) nts; |
178 |
181 |
179 val new_tks = |
182 val new_tks = |
180 (if is_some tk then [the tk] else []) |
183 (if is_some tk then [the tk] else []) |
181 |> fold (tokens_union o starts_for_nt) nts |
184 |> fold (tokens_union o starts_for_nt) nts |
182 |> subtract (op =) l_starts; |
185 |> subtract (op =) l_starts; |
183 |
186 |
184 val added_tks' = tokens_union added_tks new_tks; |
187 val added_tks' = tokens_union added_tks new_tks; |
185 |
188 |
186 val nt_dependencies' = union (op =) nts nt_dependencies; |
189 val nt_dependencies' = fold nts_insert nts nt_dependencies; |
187 |
190 |
188 (*associate production with new starting tokens*) |
191 (*associate production with new starting tokens*) |
189 fun copy ([]: Lexicon.token option list) nt_prods = nt_prods |
192 fun copy ([]: Lexicon.token option list) nt_prods = nt_prods |
190 | copy (tk :: tks) nt_prods = |
193 | copy (tk :: tks) nt_prods = |
191 let |
194 let |
209 else (*skip production*) |
212 else (*skip production*) |
210 examine_prods ps add_lambda nt_dependencies added_tks nt_prods |
213 examine_prods ps add_lambda nt_dependencies added_tks nt_prods |
211 end; |
214 end; |
212 |
215 |
213 (*check each NT whose lookahead depends on new lambda NT*) |
216 (*check each NT whose lookahead depends on new lambda NT*) |
214 fun process_nts [] added_lambdas added_starts = |
217 fun process_nts nt (added_lambdas, added_starts) = |
215 (added_lambdas, added_starts) |
218 let |
216 | process_nts (nt :: nts) added_lambdas added_starts = |
219 val ((old_nts, old_tks), nt_prods) = Array.sub (prods, nt); |
217 let |
220 |
218 val ((old_nts, old_tks), nt_prods) = Array.sub (prods, nt); |
221 (*existing productions whose lookahead may depend on l*) |
219 |
222 val tk_prods = |
220 (*existing productions whose lookahead may depend on l*) |
223 these (AList.lookup (op =) nt_prods (SOME (get_start l_starts))); |
221 val tk_prods = |
224 |
222 these (AList.lookup (op =) nt_prods (SOME (get_start l_starts))); |
225 (*add_lambda is true if an existing production of the nt |
223 |
226 produces lambda due to the new lambda NT l*) |
224 (*add_lambda is true if an existing production of the nt |
227 val (add_lambda, nt_dependencies, added_tks, nt_prods') = |
225 produces lambda due to the new lambda NT l*) |
228 examine_prods tk_prods false nts_empty [] nt_prods; |
226 val (add_lambda, nt_dependencies, added_tks, nt_prods') = |
229 |
227 examine_prods tk_prods false [] [] nt_prods; |
230 val new_nts = nts_merge (old_nts, nt_dependencies); |
228 |
231 |
229 val added_nts = subtract (op =) old_nts nt_dependencies; |
232 val added_lambdas' = added_lambdas |> add_lambda ? cons nt; |
230 |
233 val _ = |
231 val added_lambdas' = added_lambdas |> add_lambda ? cons nt; |
234 Array.update (prods, nt, ((new_nts, old_tks @ added_tks), nt_prods')); |
232 val _ = |
235 (*N.B. that because the tks component |
233 Array.update |
236 is used to access existing |
234 (prods, nt, ((added_nts @ old_nts, old_tks @ added_tks), nt_prods')); |
237 productions we have to add new |
235 (*N.B. that because the tks component |
238 tokens at the _end_ of the list*) |
236 is used to access existing |
239 val added_starts' = |
237 productions we have to add new |
240 if null added_tks then added_starts |
238 tokens at the _end_ of the list*) |
241 else ((nt, added_tks) :: added_starts); |
239 in |
242 in (added_lambdas', added_starts') end; |
240 if null added_tks then |
243 |
241 process_nts nts added_lambdas' added_starts |
244 val (added_lambdas, added_starts') = |
242 else |
245 nts_fold process_nts dependent ([], added_starts); |
243 process_nts nts added_lambdas' ((nt, added_tks) :: added_starts) |
246 val added_lambdas' = filter_out (nts_member lambdas) added_lambdas; |
244 end; |
|
245 |
|
246 val (added_lambdas, added_starts') = process_nts dependent [] added_starts; |
|
247 val added_lambdas' = filter_out (lambdas_member lambdas) added_lambdas; |
|
248 in |
247 in |
249 propagate_lambda (ls @ added_lambdas') added_starts' |
248 propagate_lambda (ls @ added_lambdas') added_starts' |
250 (fold lambdas_insert added_lambdas' lambdas) |
249 (fold nts_insert added_lambdas' lambdas) |
251 end; |
250 end; |
252 in |
251 in |
253 propagate_lambda |
252 propagate_lambda |
254 (lambdas_fold (fn l => not (lambdas_member lambdas l) ? cons l) lambdas'' []) |
253 (nts_fold (fn l => not (nts_member lambdas l) ? cons l) lambdas'' []) |
255 added_starts lambdas'' |
254 added_starts lambdas'' |
256 end; |
255 end; |
257 |
256 |
258 (*insert production into grammar*) |
257 (*insert production into grammar*) |
259 val (added_starts', _) = |
258 val (added_starts', _) = |
276 |
275 |
277 (*add lhs NT to list of dependent NTs in lookahead*) |
276 (*add lhs NT to list of dependent NTs in lookahead*) |
278 fun add_nts [] = () |
277 fun add_nts [] = () |
279 | add_nts (nt :: _) = |
278 | add_nts (nt :: _) = |
280 let val ((old_nts, old_tks), ps) = Array.sub (prods, nt) in |
279 let val ((old_nts, old_tks), ps) = Array.sub (prods, nt) in |
281 if member (op =) old_nts lhs then () |
280 if nts_member old_nts lhs then () |
282 else Array.update (prods, nt, ((lhs :: old_nts, old_tks), ps)) |
281 else Array.update (prods, nt, ((nts_insert lhs old_nts, old_tks), ps)) |
283 end; |
282 end; |
284 |
283 |
285 (*add new start tokens to chained NTs' lookahead list; |
284 (*add new start tokens to chained NTs' lookahead list; |
286 also store new production for lhs NT*) |
285 also store new production for lhs NT*) |
287 fun add_tks [] added prod_count = (added, prod_count) |
286 fun add_tks [] added prod_count = (added, prod_count) |
347 |
346 |
348 (*copy productions whose lookahead depends on changed_nt; |
347 (*copy productions whose lookahead depends on changed_nt; |
349 if key = SOME unknown_start then tk_prods is used to hold |
348 if key = SOME unknown_start then tk_prods is used to hold |
350 the productions not copied*) |
349 the productions not copied*) |
351 fun update_prods [] result = result |
350 fun update_prods [] result = result |
352 | update_prods ((p as (rhs, _: string, _: nt_tag)) :: ps) |
351 | update_prods ((p as (rhs, _: string, _: nt)) :: ps) |
353 (tk_prods, nt_prods) = |
352 (tk_prods, nt_prods) = |
354 let |
353 let |
355 (*lookahead dependency for production*) |
354 (*lookahead dependency for production*) |
356 val (tk, depends) = lookahead_dependency lambdas' rhs []; |
355 val (tk, depends) = lookahead_dependency lambdas' rhs []; |
357 |
356 |
386 else if key = SOME unknown_start then (p :: tk_prods, nt_prods) |
385 else if key = SOME unknown_start then (p :: tk_prods, nt_prods) |
387 else (tk_prods, nt_prods); |
386 else (tk_prods, nt_prods); |
388 in update_prods ps result end; |
387 in update_prods ps result end; |
389 |
388 |
390 (*copy existing productions for new starting tokens*) |
389 (*copy existing productions for new starting tokens*) |
391 fun process_nts [] added = added |
390 fun process_nts nt added = |
392 | process_nts (nt :: nts) added = |
391 let |
393 let |
392 val (lookahead as (old_nts, old_tks), nt_prods) = Array.sub (prods, nt); |
394 val (lookahead as (old_nts, old_tks), nt_prods) = Array.sub (prods, nt); |
393 |
395 |
394 val tk_prods = these (AList.lookup (op =) nt_prods key); |
396 val tk_prods = these (AList.lookup (op =) nt_prods key); |
395 |
397 |
396 (*associate productions with new lookahead tokens*) |
398 (*associate productions with new lookahead tokens*) |
397 val (tk_prods', nt_prods') = update_prods tk_prods ([], nt_prods); |
399 val (tk_prods', nt_prods') = update_prods tk_prods ([], nt_prods); |
398 |
400 |
399 val nt_prods'' = |
401 val nt_prods'' = |
400 if key = SOME unknown_start then |
402 if key = SOME unknown_start then |
401 AList.update (op =) (key, tk_prods') nt_prods' |
403 AList.update (op =) (key, tk_prods') nt_prods' |
402 else nt_prods'; |
404 else nt_prods'; |
403 |
405 |
404 val added_tks = tokens_subtract old_tks new_tks; |
406 val added_tks = tokens_subtract old_tks new_tks; |
405 in |
407 in |
406 if null added_tks then |
408 if null added_tks then |
407 (Array.update (prods, nt, (lookahead, nt_prods'')); |
409 (Array.update (prods, nt, (lookahead, nt_prods'')); |
408 added) |
410 process_nts nts added) |
409 else |
411 else |
410 (Array.update (prods, nt, ((old_nts, added_tks @ old_tks), nt_prods'')); |
412 (Array.update (prods, nt, ((old_nts, added_tks @ old_tks), nt_prods'')); |
411 ((nt, added_tks) :: added)) |
413 process_nts nts ((nt, added_tks) :: added)) |
412 end; |
414 end; |
|
415 |
413 |
416 val ((dependent, _), _) = Array.sub (prods, changed_nt); |
414 val ((dependent, _), _) = Array.sub (prods, changed_nt); |
417 in add_starts (starts @ process_nts dependent []) end; |
415 in add_starts (starts @ nts_fold process_nts dependent []) end; |
418 in add_starts added_starts' end; |
416 in add_starts added_starts' end; |
419 in add_prods prods chains' lambdas' prod_count ps end; |
417 in add_prods prods chains' lambdas' prod_count ps end; |
420 |
418 |
421 |
419 |
422 (* pretty_gram *) |
420 (* pretty_gram *) |
451 Gram |
449 Gram |
452 {nt_count = 0, |
450 {nt_count = 0, |
453 prod_count = 0, |
451 prod_count = 0, |
454 tags = tags_empty, |
452 tags = tags_empty, |
455 chains = chains_empty, |
453 chains = chains_empty, |
456 lambdas = lambdas_empty, |
454 lambdas = nts_empty, |
457 prods = Vector.fromList [(([], []), [])]}; |
455 prods = Vector.fromList [nt_gram_empty]}; |
458 |
456 |
459 |
457 |
460 (*Add productions to a grammar*) |
458 (*Add productions to a grammar*) |
461 fun extend_gram [] gram = gram |
459 fun extend_gram [] gram = gram |
462 | extend_gram xprods (Gram {nt_count, prod_count, tags, chains, lambdas, prods}) = |
460 | extend_gram xprods (Gram {nt_count, prod_count, tags, chains, lambdas, prods}) = |