4 |
4 |
5 Isabelle's main parser (used for terms and types). |
5 Isabelle's main parser (used for terms and types). |
6 *) |
6 *) |
7 |
7 |
8 signature PARSER = |
8 signature PARSER = |
9 sig |
9 sig |
10 type gram |
10 type gram |
11 val empty_gram: gram |
11 val empty_gram: gram |
12 val extend_gram: gram -> SynExt.xprod list -> gram |
12 val extend_gram: gram -> SynExt.xprod list -> gram |
|
13 val make_gram: SynExt.xprod list -> gram |
13 val merge_grams: gram -> gram -> gram |
14 val merge_grams: gram -> gram -> gram |
14 val pretty_gram: gram -> Pretty.T list |
15 val pretty_gram: gram -> Pretty.T list |
15 datatype parsetree = |
16 datatype parsetree = |
16 Node of string * parsetree list | |
17 Node of string * parsetree list | |
17 Tip of Lexicon.token |
18 Tip of Lexicon.token |
18 val parse: gram -> string -> Lexicon.token list -> parsetree list |
19 val parse: gram -> string -> Lexicon.token list -> parsetree list |
19 val branching_level: int ref |
20 val branching_level: int ref |
20 end; |
21 end; |
21 |
22 |
22 |
23 structure Parser: PARSER = |
23 structure Parser : PARSER = |
|
24 struct |
24 struct |
|
25 |
25 open Lexicon SynExt; |
26 open Lexicon SynExt; |
|
27 |
26 |
28 |
27 (** datatype gram **) |
29 (** datatype gram **) |
28 |
30 |
29 type nt_tag = int; (*production for the NTs are stored in an array |
31 type nt_tag = int; (*production for the NTs are stored in an array |
30 so we can identify NTs by their index*) |
32 so we can identify NTs by their index*) |
52 and also as an entry in "lambdas"*) |
54 and also as an entry in "lambdas"*) |
53 |
55 |
54 val UnknownStart = EndToken; (*productions for which no starting token is |
56 val UnknownStart = EndToken; (*productions for which no starting token is |
55 known yet are associated with this token*) |
57 known yet are associated with this token*) |
56 |
58 |
57 (* get all NTs that are connected with a list of NTs |
59 (* get all NTs that are connected with a list of NTs |
58 (used for expanding chain list)*) |
60 (used for expanding chain list)*) |
59 fun connected_with _ [] relatives = relatives |
61 fun connected_with _ [] relatives = relatives |
60 | connected_with chains (root :: roots) relatives = |
62 | connected_with chains (root :: roots) relatives = |
61 let val branches = (assocs chains root) \\ relatives; |
63 let val branches = (assocs chains root) \\ relatives; |
62 in connected_with chains (branches @ roots) (branches @ relatives) end; |
64 in connected_with chains (branches @ roots) (branches @ relatives) end; |
66 prod_count is of type "int option" and is only updated if it is <> NONE*) |
68 prod_count is of type "int option" and is only updated if it is <> NONE*) |
67 fun add_prods _ chains lambdas prod_count [] = (chains, lambdas, prod_count) |
69 fun add_prods _ chains lambdas prod_count [] = (chains, lambdas, prod_count) |
68 | add_prods prods chains lambdas prod_count |
70 | add_prods prods chains lambdas prod_count |
69 ((lhs, new_prod as (rhs, name, pri)) :: ps) = |
71 ((lhs, new_prod as (rhs, name, pri)) :: ps) = |
70 let |
72 let |
71 (*test if new_prod is a chain production*) |
73 val chain_from = case (pri, rhs) of (~1, [Nonterminal (id, ~1)]) => SOME id | _ => NONE; |
72 val (new_chain, chains') = |
74 |
73 let (*store chain if it does not already exist*) |
75 (*store chain if it does not already exist*) |
74 fun store_chain from = |
76 val (new_chain, chains') = case chain_from of NONE => (NONE, chains) | SOME from => |
75 let val old_tos = assocs chains from; |
77 let val old_tos = assocs chains from in |
76 in if lhs mem old_tos then (NONE, chains) |
78 if lhs mem old_tos then (NONE, chains) |
77 else (SOME from, overwrite (chains, (from, lhs ins old_tos))) |
79 else (SOME from, overwrite (chains, (from, lhs ins old_tos))) |
78 end; |
|
79 in if pri = ~1 then |
|
80 case rhs of [Nonterminal (id, ~1)] => store_chain id |
|
81 | _ => (NONE, chains) |
|
82 else (NONE, chains) |
|
83 end; |
80 end; |
84 |
81 |
85 (*propagate new chain in lookahead and lambda lists; |
82 (*propagate new chain in lookahead and lambda lists; |
86 added_starts is used later to associate existing |
83 added_starts is used later to associate existing |
87 productions with new starting tokens*) |
84 productions with new starting tokens*) |
138 | propagate_lambda (l :: ls) added_starts lambdas = |
135 | propagate_lambda (l :: ls) added_starts lambdas = |
139 let |
136 let |
140 (*get lookahead for lambda NT*) |
137 (*get lookahead for lambda NT*) |
141 val ((dependent, l_starts), _) = Array.sub (prods, l); |
138 val ((dependent, l_starts), _) = Array.sub (prods, l); |
142 |
139 |
143 (*check productions whose lookahead may depend on lamdba NT*) |
140 (*check productions whose lookahead may depend on lambda NT*) |
144 fun examine_prods [] add_lambda nt_dependencies added_tks |
141 fun examine_prods [] add_lambda nt_dependencies added_tks |
145 nt_prods = |
142 nt_prods = |
146 (add_lambda, nt_dependencies, added_tks, nt_prods) |
143 (add_lambda, nt_dependencies, added_tks, nt_prods) |
147 | examine_prods ((p as (rhs, _, _)) :: ps) add_lambda |
144 | examine_prods ((p as (rhs, _, _)) :: ps) add_lambda |
148 nt_dependencies added_tks nt_prods = |
145 nt_dependencies added_tks nt_prods = |
153 val new_lambda = is_none tk andalso nts subset lambdas; |
150 val new_lambda = is_none tk andalso nts subset lambdas; |
154 |
151 |
155 val new_tks = (if isSome tk then [valOf tk] else []) @ |
152 val new_tks = (if isSome tk then [valOf tk] else []) @ |
156 Library.foldl token_union ([], map starts_for_nt nts) \\ |
153 Library.foldl token_union ([], map starts_for_nt nts) \\ |
157 l_starts; |
154 l_starts; |
158 |
155 |
159 val added_tks' = token_union (new_tks, added_tks); |
156 val added_tks' = token_union (new_tks, added_tks); |
160 |
157 |
161 val nt_dependencies' = nts union nt_dependencies; |
158 val nt_dependencies' = nts union nt_dependencies; |
162 |
159 |
163 (*associate production with new starting tokens*) |
160 (*associate production with new starting tokens*) |
166 let val old_prods = assocs nt_prods tk; |
163 let val old_prods = assocs nt_prods tk; |
167 |
164 |
168 val prods' = p :: old_prods; |
165 val prods' = p :: old_prods; |
169 in copy tks (overwrite (nt_prods, (tk, prods'))) |
166 in copy tks (overwrite (nt_prods, (tk, prods'))) |
170 end; |
167 end; |
171 |
168 |
172 val nt_prods' = |
169 val nt_prods' = |
173 let val new_opt_tks = map SOME new_tks; |
170 let val new_opt_tks = map SOME new_tks; |
174 in copy ((if new_lambda then [NONE] else []) @ |
171 in copy ((if new_lambda then [NONE] else []) @ |
175 new_opt_tks) nt_prods |
172 new_opt_tks) nt_prods |
176 end; |
173 end; |
229 end; |
226 end; |
230 in propagate_lambda (lambdas' \\ lambdas) added_starts lambdas' end; |
227 in propagate_lambda (lambdas' \\ lambdas) added_starts lambdas' end; |
231 |
228 |
232 (*insert production into grammar*) |
229 (*insert production into grammar*) |
233 val (added_starts', prod_count') = |
230 val (added_starts', prod_count') = |
234 if isSome new_chain then (added_starts', prod_count) |
231 if isSome chain_from then (added_starts', prod_count) (*don't store chain production*) |
235 (*don't store chain production*) |
|
236 else let |
232 else let |
237 (*lookahead tokens of new production and on which |
233 (*lookahead tokens of new production and on which |
238 NTs lookahead depends*) |
234 NTs lookahead depends*) |
239 val (start_tk, start_nts) = lookahead_dependency lambdas' rhs []; |
235 val (start_tk, start_nts) = lookahead_dependency lambdas' rhs []; |
240 |
236 |
417 |
413 |
418 val nt_prods = |
414 val nt_prods = |
419 Library.foldl (op union) ([], map snd (snd (Array.sub (prods, tag)))) @ |
415 Library.foldl (op union) ([], map snd (snd (Array.sub (prods, tag)))) @ |
420 map prod_of_chain (assocs chains tag); |
416 map prod_of_chain (assocs chains tag); |
421 in map (pretty_prod name) nt_prods end; |
417 in map (pretty_prod name) nt_prods end; |
422 |
418 |
423 in List.concat (map pretty_nt taglist) end; |
419 in List.concat (map pretty_nt taglist) end; |
424 |
420 |
425 |
421 |
426 (** Operations on gramars **) |
422 (** Operations on gramars **) |
427 |
423 |
504 val chains' = inverse_chains fromto_chains' []; |
500 val chains' = inverse_chains fromto_chains' []; |
505 in Gram {nt_count = nt_count', prod_count = prod_count', tags = tags', |
501 in Gram {nt_count = nt_count', prod_count = prod_count', tags = tags', |
506 chains = chains', lambdas = lambdas', prods = prods'} |
502 chains = chains', lambdas = lambdas', prods = prods'} |
507 end; |
503 end; |
508 |
504 |
|
505 val make_gram = extend_gram empty_gram; |
|
506 |
509 |
507 |
510 (*Merge two grammars*) |
508 (*Merge two grammars*) |
511 fun merge_grams gram_a gram_b = |
509 fun merge_grams gram_a gram_b = |
512 let |
510 let |
513 (*find out which grammar is bigger*) |
511 (*find out which grammar is bigger*) |
540 (fst (valOf (find_first (fn (n, t) => t = tag) tag_list))); |
538 (fst (valOf (find_first (fn (n, t) => t = tag) tag_list))); |
541 in Array.update (table, tag, tag'); |
539 in Array.update (table, tag, tag'); |
542 store_tag nt_count' tags' (tag-1) |
540 store_tag nt_count' tags' (tag-1) |
543 end; |
541 end; |
544 in (store_tag nt_count1 tags1 (nt_count2-1), table) end; |
542 in (store_tag nt_count1 tags1 (nt_count2-1), table) end; |
545 |
543 |
546 (*convert grammar2 tag to grammar1 tag*) |
544 (*convert grammar2 tag to grammar1 tag*) |
547 fun convert_tag tag = Array.sub (tag_table, tag); |
545 fun convert_tag tag = Array.sub (tag_table, tag); |
548 |
546 |
549 (*convert chain list to raw productions*) |
547 (*convert chain list to raw productions*) |
550 fun mk_chain_prods [] result = result |
548 fun mk_chain_prods [] result = result |
554 |
552 |
555 fun make [] result = result |
553 fun make [] result = result |
556 | make (from :: froms) result = make froms ((to_tag, |
554 | make (from :: froms) result = make froms ((to_tag, |
557 ([Nonterminal (convert_tag from, ~1)], "", ~1)) :: result); |
555 ([Nonterminal (convert_tag from, ~1)], "", ~1)) :: result); |
558 in mk_chain_prods cs (make froms [] @ result) end; |
556 in mk_chain_prods cs (make froms [] @ result) end; |
559 |
557 |
560 val chain_prods = mk_chain_prods chains2 []; |
558 val chain_prods = mk_chain_prods chains2 []; |
561 |
559 |
562 (*convert prods2 array to productions*) |
560 (*convert prods2 array to productions*) |
563 fun process_nt ~1 result = result |
561 fun process_nt ~1 result = result |
564 | process_nt nt result = |
562 | process_nt nt result = |
625 (*Make states using a list of rhss*) |
623 (*Make states using a list of rhss*) |
626 fun mkStates i minPrec lhsID rhss = |
624 fun mkStates i minPrec lhsID rhss = |
627 let fun mkState (rhs, id, prodPrec) = (lhsID, prodPrec, [], rhs, id, i); |
625 let fun mkState (rhs, id, prodPrec) = (lhsID, prodPrec, [], rhs, id, i); |
628 in map mkState rhss end; |
626 in map mkState rhss end; |
629 |
627 |
630 (*Add parse tree to list and eliminate duplicates |
628 (*Add parse tree to list and eliminate duplicates |
631 saving the maximum precedence*) |
629 saving the maximum precedence*) |
632 fun conc (t, prec:int) [] = (NONE, [(t, prec)]) |
630 fun conc (t, prec:int) [] = (NONE, [(t, prec)]) |
633 | conc (t, prec) ((t', prec') :: ts) = |
631 | conc (t, prec) ((t', prec') :: ts) = |
634 if t = t' then |
632 if t = t' then |
635 (SOME prec', if prec' >= prec then (t', prec') :: ts |
633 (SOME prec', if prec' >= prec then (t', prec') :: ts |
636 else (t, prec) :: ts) |
634 else (t, prec) :: ts) |
637 else |
635 else |
638 let val (n, ts') = conc (t, prec) ts |
636 let val (n, ts') = conc (t, prec) ts |
639 in (n, (t', prec') :: ts') end; |
637 in (n, (t', prec') :: ts') end; |
640 |
638 |
700 let (*similar to token_assoc but does not automatically include 'NONE' key*) |
698 let (*similar to token_assoc but does not automatically include 'NONE' key*) |
701 fun token_assoc2 (list, key) = |
699 fun token_assoc2 (list, key) = |
702 let fun assoc [] result = result |
700 let fun assoc [] result = result |
703 | assoc ((keyi, pi) :: pairs) result = |
701 | assoc ((keyi, pi) :: pairs) result = |
704 if isSome keyi andalso matching_tokens (valOf keyi, key) |
702 if isSome keyi andalso matching_tokens (valOf keyi, key) |
705 orelse include_none andalso is_none keyi then |
703 orelse include_none andalso is_none keyi then |
706 assoc pairs (pi @ result) |
704 assoc pairs (pi @ result) |
707 else assoc pairs result; |
705 else assoc pairs result; |
708 in assoc list [] end; |
706 in assoc list [] end; |
709 |
707 |
710 fun get_prods [] result = result |
708 fun get_prods [] result = result |
730 (*wanted precedence has been processed*) |
728 (*wanted precedence has been processed*) |
731 (used, movedot_lambda S l) |
729 (used, movedot_lambda S l) |
732 else (*wanted precedence hasn't been parsed yet*) |
730 else (*wanted precedence hasn't been parsed yet*) |
733 let |
731 let |
734 val tk_prods = all_prods_for nt; |
732 val tk_prods = all_prods_for nt; |
735 |
733 |
736 val States' = mkStates i minPrec nt |
734 val States' = mkStates i minPrec nt |
737 (getRHS' minPrec usedPrec tk_prods); |
735 (getRHS' minPrec usedPrec tk_prods); |
738 in (update_prec (nt, minPrec) used, |
736 in (update_prec (nt, minPrec) used, |
739 movedot_lambda S l @ States') |
737 movedot_lambda S l @ States') |
740 end |
738 end |
741 |
739 |
742 | NONE => (*nonterminal is parsed for the first time*) |
740 | NONE => (*nonterminal is parsed for the first time*) |
743 let val tk_prods = all_prods_for nt; |
741 let val tk_prods = all_prods_for nt; |
774 if n >= prec then processS used' States (S :: Si, Sii) |
772 if n >= prec then processS used' States (S :: Si, Sii) |
775 else |
773 else |
776 let val Slist = getS' A prec n Si; |
774 let val Slist = getS' A prec n Si; |
777 val States' = map (movedot_nonterm tt) Slist; |
775 val States' = map (movedot_nonterm tt) Slist; |
778 in processS used' (States' @ States) (S :: Si, Sii) end |
776 in processS used' (States' @ States) (S :: Si, Sii) end |
779 end |
777 end |
780 else |
778 else |
781 let val Slist = getStates Estate i j A prec |
779 let val Slist = getStates Estate i j A prec |
782 in processS used (map (movedot_nonterm tt) Slist @ States) |
780 in processS used (map (movedot_nonterm tt) Slist @ States) |
783 (S :: Si, Sii) |
781 (S :: Si, Sii) |
784 end |
782 end |
791 val msg = |
789 val msg = |
792 if null toks then Pretty.str "Inner syntax error: unexpected end of input" |
790 if null toks then Pretty.str "Inner syntax error: unexpected end of input" |
793 else |
791 else |
794 Pretty.block (Pretty.str "Inner syntax error at: \"" :: |
792 Pretty.block (Pretty.str "Inner syntax error at: \"" :: |
795 Pretty.breaks (map (Pretty.str o str_of_token) |
793 Pretty.breaks (map (Pretty.str o str_of_token) |
796 (rev (tl (rev toks)))) @ |
794 (rev (tl (rev toks)))) @ |
797 [Pretty.str "\""]); |
795 [Pretty.str "\""]); |
798 val expected = |
796 val expected = |
799 Pretty.strs ("Expected tokens: " :: map (quote o str_of_token) allowed); |
797 Pretty.strs ("Expected tokens: " :: map (quote o str_of_token) allowed); |
800 in |
798 in |
801 error (Pretty.string_of (Pretty.blk (0, [msg, Pretty.fbrk, expected]))) |
799 error (Pretty.string_of (Pretty.blk (0, [msg, Pretty.fbrk, expected]))) |
808 |
806 |
809 (*test if tk is a lookahead for a given minimum precedence*) |
807 (*test if tk is a lookahead for a given minimum precedence*) |
810 fun reduction _ minPrec _ (Terminal _ :: _, _, prec:int) = |
808 fun reduction _ minPrec _ (Terminal _ :: _, _, prec:int) = |
811 if prec >= minPrec then true |
809 if prec >= minPrec then true |
812 else false |
810 else false |
813 | reduction tk minPrec checked |
811 | reduction tk minPrec checked |
814 (Nonterminal (nt, nt_prec) :: _, _, prec) = |
812 (Nonterminal (nt, nt_prec) :: _, _, prec) = |
815 if prec >= minPrec andalso not (nt mem checked) then |
813 if prec >= minPrec andalso not (nt mem checked) then |
816 let val chained = connected_with chains [nt] [nt]; |
814 let val chained = connected_with chains [nt] [nt]; |
817 in exists |
815 in exists |
818 (reduction tk nt_prec (chained @ checked)) |
816 (reduction tk nt_prec (chained @ checked)) |
819 (some_prods_for tk nt) |
817 (some_prods_for tk nt) |
820 end |
818 end |
821 else false; |
819 else false; |
822 |
820 |
823 (*compute a list of allowed starting tokens |
821 (*compute a list of allowed starting tokens |
824 for a list of nonterminals considering precedence*) |
822 for a list of nonterminals considering precedence*) |
825 fun get_starts [] result = result |
823 fun get_starts [] result = result |
826 | get_starts ((nt, minPrec:int) :: nts) result = |
824 | get_starts ((nt, minPrec:int) :: nts) result = |
827 let fun get [] result = result |
825 let fun get [] result = result |
828 | get ((SOME tk, prods) :: ps) result = |
826 | get ((SOME tk, prods) :: ps) result = |
839 in get_starts (chained @ nts) |
837 in get_starts (chained @ nts) |
840 ((get nt_prods []) union result) |
838 ((get nt_prods []) union result) |
841 end; |
839 end; |
842 |
840 |
843 val nts = |
841 val nts = |
844 List.mapPartial (fn (_, _, _, Nonterminal (a, prec) :: _, _, _) => |
842 List.mapPartial (fn (_, _, _, Nonterminal (a, prec) :: _, _, _) => |
845 SOME (a, prec) | _ => NONE) |
843 SOME (a, prec) | _ => NONE) |
846 (Array.sub (stateset, i-1)); |
844 (Array.sub (stateset, i-1)); |
847 val allowed = |
845 val allowed = |
848 distinct (get_starts nts [] @ |
846 distinct (get_starts nts [] @ |
849 (List.mapPartial (fn (_, _, _, Terminal a :: _, _, _) => SOME a |
847 (List.mapPartial (fn (_, _, _, Terminal a :: _, _, _) => SOME a |
861 Array.update (stateset, i + 1, sii); |
859 Array.update (stateset, i + 1, sii); |
862 produce prods chains stateset (i + 1) cs c |
860 produce prods chains stateset (i + 1) cs c |
863 end)); |
861 end)); |
864 |
862 |
865 |
863 |
866 fun get_trees l = List.mapPartial (fn (_, _, [pt], _, _, _) => SOME pt | _ => NONE) |
864 fun get_trees l = List.mapPartial (fn (_, _, [pt], _, _, _) => SOME pt | _ => NONE) |
867 l; |
865 l; |
868 |
866 |
869 fun earley prods tags chains startsymbol indata = |
867 fun earley prods tags chains startsymbol indata = |
870 let |
868 let |
871 val start_tag = case Symtab.lookup (tags, startsymbol) of |
869 val start_tag = case Symtab.lookup (tags, startsymbol) of |
872 SOME tag => tag |
870 SOME tag => tag |
873 | NONE => error ("parse: Unknown startsymbol " ^ |
871 | NONE => error ("parse: Unknown startsymbol " ^ |
874 quote startsymbol); |
872 quote startsymbol); |
875 val S0 = [(~1, 0, [], [Nonterminal (start_tag, 0), Terminal EndToken], |
873 val S0 = [(~1, 0, [], [Nonterminal (start_tag, 0), Terminal EndToken], |
876 "", 0)]; |
874 "", 0)]; |
877 val s = length indata + 1; |
875 val s = length indata + 1; |
878 val Estate = Array.array (s, []); |
876 val Estate = Array.array (s, []); |