43 datatype gram = |
43 datatype gram = |
44 Gram of |
44 Gram of |
45 {nt_count: int, |
45 {nt_count: int, |
46 prod_count: int, |
46 prod_count: int, |
47 tags: nt_tag Symtab.table, |
47 tags: nt_tag Symtab.table, |
48 chains: (nt_tag * nt_tag list) list, (*[(to, [from])]*) |
48 chains: unit Int_Graph.T, |
49 lambdas: nt_tag list, |
49 lambdas: nt_tag list, |
50 prods: nt_gram Vector.vector}; |
50 prods: nt_gram Vector.vector}; |
51 (*"tags" is used to map NT names (i.e. strings) to tags; |
51 (*"tags" is used to map NT names (i.e. strings) to tags; |
52 chain productions are not stored as normal productions |
52 chain productions are not stored as normal productions |
53 but instead as an entry in "chains"; |
53 but instead as an entry in "chains": from -> to; |
54 lambda productions are stored as normal productions |
54 lambda productions are stored as normal productions |
55 and also as an entry in "lambdas"*) |
55 and also as an entry in "lambdas"*) |
56 |
56 |
57 val union_token = union Lexicon.matching_tokens; |
57 val union_token = union Lexicon.matching_tokens; |
58 val subtract_token = subtract Lexicon.matching_tokens; |
58 val subtract_token = subtract Lexicon.matching_tokens; |
62 val unknown_start = Lexicon.eof; |
62 val unknown_start = Lexicon.eof; |
63 |
63 |
64 fun get_start (tok :: _) = tok |
64 fun get_start (tok :: _) = tok |
65 | get_start [] = unknown_start; |
65 | get_start [] = unknown_start; |
66 |
66 |
67 (*get all NTs that are connected with a list of NTs*) |
|
68 fun connected_with _ ([]: nt_tag list) relatives = relatives |
|
69 | connected_with chains (root :: roots) relatives = |
|
70 let val branches = subtract (op =) relatives (these (AList.lookup (op =) chains root)); |
|
71 in connected_with chains (branches @ roots) (branches @ relatives) end; |
|
72 |
|
73 (*convert productions to grammar; |
67 (*convert productions to grammar; |
74 N.B. that the chains parameter has the form [(from, [to])]; |
|
75 prod_count is of type "int option" and is only updated if it is <> NONE*) |
68 prod_count is of type "int option" and is only updated if it is <> NONE*) |
76 fun add_prods _ chains lambdas prod_count [] = (chains, lambdas, prod_count) |
69 fun add_prods _ chains lambdas prod_count [] = (chains, lambdas, prod_count) |
77 | add_prods prods chains lambdas prod_count ((lhs, new_prod as (rhs, _, pri)) :: ps) = |
70 | add_prods prods chains lambdas prod_count ((lhs, new_prod as (rhs, _, pri)) :: ps) = |
78 let |
71 let |
79 val chain_from = |
72 (*store chain if it does not already exist*) |
|
73 val (chain, new_chain, chains') = |
80 (case (pri, rhs) of |
74 (case (pri, rhs) of |
81 (~1, [Nonterminal (id, ~1)]) => SOME id |
75 (~1, [Nonterminal (from, ~1)]) => |
82 | _ => NONE); |
76 if Int_Graph.is_edge chains (from, lhs) |
83 |
77 then (SOME from, false, chains) |
84 (*store chain if it does not already exist*) |
78 else (SOME from, true, |
85 val (new_chain, chains') = |
79 chains |
86 (case chain_from of |
80 |> Int_Graph.default_node (from, ()) |
87 NONE => (NONE, chains) |
81 |> Int_Graph.default_node (lhs, ()) |
88 | SOME from => |
82 |> Int_Graph.add_edge (from, lhs)) |
89 let val old_tos = these (AList.lookup (op =) chains from) in |
83 | _ => (NONE, false, chains |> Int_Graph.default_node (lhs, ()))); |
90 if member (op =) old_tos lhs then (NONE, chains) |
|
91 else (SOME from, AList.update (op =) (from, insert (op =) lhs old_tos) chains) |
|
92 end); |
|
93 |
84 |
94 (*propagate new chain in lookahead and lambda lists; |
85 (*propagate new chain in lookahead and lambda lists; |
95 added_starts is used later to associate existing |
86 added_starts is used later to associate existing |
96 productions with new starting tokens*) |
87 productions with new starting tokens*) |
97 val (added_starts, lambdas') = |
88 val (added_starts, lambdas') = |
98 if is_none new_chain then ([], lambdas) |
89 if not new_chain then ([], lambdas) |
99 else |
90 else |
100 let (*lookahead of chain's source*) |
91 let (*lookahead of chain's source*) |
101 val ((_, from_tks), _) = Array.sub (prods, the new_chain); |
92 val ((_, from_tks), _) = Array.sub (prods, the chain); |
102 |
93 |
103 (*copy from's lookahead to chain's destinations*) |
94 (*copy from's lookahead to chain's destinations*) |
104 fun copy_lookahead [] added = added |
95 fun copy_lookahead [] added = added |
105 | copy_lookahead (to :: tos) added = |
96 | copy_lookahead (to :: tos) added = |
106 let |
97 let |
122 (rhs must either be empty or only consist of lambda NTs)*) |
113 (rhs must either be empty or only consist of lambda NTs)*) |
123 val (new_lambda, lambdas') = |
114 val (new_lambda, lambdas') = |
124 if forall |
115 if forall |
125 (fn Nonterminal (id, _) => member (op =) lambdas' id |
116 (fn Nonterminal (id, _) => member (op =) lambdas' id |
126 | Terminal _ => false) rhs |
117 | Terminal _ => false) rhs |
127 then (true, union (op =) (connected_with chains' [lhs] [lhs]) lambdas') |
118 then (true, union (op =) (Int_Graph.all_succs chains' [lhs]) lambdas') |
128 else (false, lambdas'); |
119 else (false, lambdas'); |
129 |
120 |
130 (*list optional terminal and all nonterminals on which the lookahead |
121 (*list optional terminal and all nonterminals on which the lookahead |
131 of a production depends*) |
122 of a production depends*) |
132 fun lookahead_dependency _ [] nts = (NONE, nts) |
123 fun lookahead_dependency _ [] nts = (NONE, nts) |
415 |
406 |
416 fun prod_of_chain from = ([Nonterminal (from, ~1)], "", ~1); |
407 fun prod_of_chain from = ([Nonterminal (from, ~1)], "", ~1); |
417 |
408 |
418 fun pretty_prod (name, tag) = |
409 fun pretty_prod (name, tag) = |
419 (fold (union (op =) o #2) (#2 (Vector.sub (prods, tag))) [] @ |
410 (fold (union (op =) o #2) (#2 (Vector.sub (prods, tag))) [] @ |
420 map prod_of_chain (these (AList.lookup (op =) chains tag))) |
411 map prod_of_chain (Int_Graph.immediate_preds chains tag)) |
421 |> map (fn (symbs, const, p) => |
412 |> map (fn (symbs, const, p) => |
422 Pretty.block (Pretty.breaks |
413 Pretty.block (Pretty.breaks |
423 (Pretty.str (name ^ print_pri p ^ " =") :: map pretty_symb symbs @ pretty_const const))); |
414 (Pretty.str (name ^ print_pri p ^ " =") :: map pretty_symb symbs @ pretty_const const))); |
424 in maps pretty_prod (sort_by #1 (Symtab.dest tags)) end; |
415 in maps pretty_prod (sort_by #1 (Symtab.dest tags)) end; |
425 |
416 |
429 |
420 |
430 val empty_gram = |
421 val empty_gram = |
431 Gram |
422 Gram |
432 {nt_count = 0, |
423 {nt_count = 0, |
433 prod_count = 0, |
424 prod_count = 0, |
434 tags = Symtab.empty, chains = [], |
425 tags = Symtab.empty, |
|
426 chains = Int_Graph.empty, |
435 lambdas = [], |
427 lambdas = [], |
436 prods = Vector.fromList [(([], []), [])]}; |
428 prods = Vector.fromList [(([], []), [])]}; |
437 |
|
438 |
|
439 (*Invert list of chain productions*) |
|
440 fun inverse_chains [] result = result |
|
441 | inverse_chains ((root, branches: nt_tag list) :: cs) result = |
|
442 let |
|
443 fun add ([]: nt_tag list) result = result |
|
444 | add (id :: ids) result = |
|
445 let val old = these (AList.lookup (op =) result id); |
|
446 in add ids (AList.update (op =) (id, root :: old) result) end; |
|
447 in inverse_chains cs (add branches result) end; |
|
448 |
429 |
449 |
430 |
450 (*Add productions to a grammar*) |
431 (*Add productions to a grammar*) |
451 fun extend_gram [] gram = gram |
432 fun extend_gram [] gram = gram |
452 | extend_gram xprods (Gram {nt_count, prod_count, tags, chains, lambdas, prods}) = |
433 | extend_gram xprods (Gram {nt_count, prod_count, tags, chains, lambdas, prods}) = |
499 fun get_prod i = |
480 fun get_prod i = |
500 if i < nt_count then Vector.sub (prods, i) |
481 if i < nt_count then Vector.sub (prods, i) |
501 else (([], []), []); |
482 else (([], []), []); |
502 in Array.tabulate (nt_count', get_prod) end; |
483 in Array.tabulate (nt_count', get_prod) end; |
503 |
484 |
504 val fromto_chains = inverse_chains chains []; |
|
505 |
|
506 (*Add new productions to old ones*) |
485 (*Add new productions to old ones*) |
507 val (fromto_chains', lambdas', _) = |
486 val (chains', lambdas', _) = add_prods prods' chains lambdas NONE xprods'; |
508 add_prods prods' fromto_chains lambdas NONE xprods'; |
|
509 |
|
510 val chains' = inverse_chains fromto_chains' []; |
|
511 in |
487 in |
512 Gram |
488 Gram |
513 {nt_count = nt_count', |
489 {nt_count = nt_count', |
514 prod_count = prod_count', |
490 prod_count = prod_count', |
515 tags = tags', |
491 tags = tags', |
637 |
613 |
638 fun get_prods [] result = result |
614 fun get_prods [] result = result |
639 | get_prods (nt :: nts) result = |
615 | get_prods (nt :: nts) result = |
640 let val nt_prods = snd (Vector.sub (prods, nt)); |
616 let val nt_prods = snd (Vector.sub (prods, nt)); |
641 in get_prods nts (token_assoc (nt_prods, tk) @ result) end; |
617 in get_prods nts (token_assoc (nt_prods, tk) @ result) end; |
642 in get_prods (connected_with chains nts nts) [] end; |
618 in get_prods (Int_Graph.all_preds chains nts) [] end; |
643 |
619 |
644 |
620 |
645 fun PROCESSS gram Estate i c states = |
621 fun PROCESSS gram Estate i c states = |
646 let |
622 let |
647 fun all_prods_for nt = prods_for gram true c [nt]; |
623 fun all_prods_for nt = prods_for gram true c [nt]; |