620 val branching_level = |
620 val branching_level = |
621 Config.int (Config.declare ("syntax_branching_level", \<^here>) (fn _ => Config.Int 600)); |
621 Config.int (Config.declare ("syntax_branching_level", \<^here>) (fn _ => Config.Int 600)); |
622 |
622 |
623 (*get all productions of a NT and NTs chained to it which can |
623 (*get all productions of a NT and NTs chained to it which can |
624 be started by specified token*) |
624 be started by specified token*) |
625 fun prods_for prods chains include_none tk nts = |
625 fun prods_for (Gram {prods, chains, ...}) include_none tk nts = |
626 let |
626 let |
627 fun token_assoc (list, key) = |
627 fun token_assoc (list, key) = |
628 let |
628 let |
629 fun assoc [] result = result |
629 fun assoc [] result = result |
630 | assoc ((keyi, pi) :: pairs) result = |
630 | assoc ((keyi, pi) :: pairs) result = |
639 let val nt_prods = snd (Vector.sub (prods, nt)); |
639 let val nt_prods = snd (Vector.sub (prods, nt)); |
640 in get_prods nts (token_assoc (nt_prods, tk) @ result) end; |
640 in get_prods nts (token_assoc (nt_prods, tk) @ result) end; |
641 in get_prods (connected_with chains nts nts) [] end; |
641 in get_prods (connected_with chains nts nts) [] end; |
642 |
642 |
643 |
643 |
644 fun PROCESSS prods chains Estate i c states = |
644 fun PROCESSS gram Estate i c states = |
645 let |
645 let |
646 fun all_prods_for nt = prods_for prods chains true c [nt]; |
646 fun all_prods_for nt = prods_for gram true c [nt]; |
647 |
647 |
648 fun processS _ [] (Si, Sii) = (Si, Sii) |
648 fun processS _ [] (Si, Sii) = (Si, Sii) |
649 | processS used (S :: States) (Si, Sii) = |
649 | processS used (S :: States) (Si, Sii) = |
650 (case S of |
650 (case S of |
651 (_, _, _, Nonterminal (nt, min_prec) :: _, _, _) => |
651 (_, _, _, Nonterminal (nt, min_prec) :: _, _, _) => |
687 in processS used (map (movedot_nonterm tt) Slist @ States) (S :: Si, Sii) end |
687 in processS used (map (movedot_nonterm tt) Slist @ States) (S :: Si, Sii) end |
688 end) |
688 end) |
689 in processS Inttab.empty states ([], []) end; |
689 in processS Inttab.empty states ([], []) end; |
690 |
690 |
691 |
691 |
692 fun produce prods tags chains stateset i indata prev_token = |
692 fun produce gram stateset i indata prev_token = |
693 (case Array.sub (stateset, i) of |
693 (case Array.sub (stateset, i) of |
694 [] => |
694 [] => |
695 let |
695 let |
696 val toks = if Lexicon.is_eof prev_token then indata else prev_token :: indata; |
696 val toks = if Lexicon.is_eof prev_token then indata else prev_token :: indata; |
697 val pos = Position.here (Lexicon.pos_of_token prev_token); |
697 val pos = Position.here (Lexicon.pos_of_token prev_token); |
712 | s => |
712 | s => |
713 (case indata of |
713 (case indata of |
714 [] => s |
714 [] => s |
715 | c :: cs => |
715 | c :: cs => |
716 let |
716 let |
717 val (si, sii) = PROCESSS prods chains stateset i c s; |
717 val (si, sii) = PROCESSS gram stateset i c s; |
718 val _ = Array.update (stateset, i, si); |
718 val _ = Array.update (stateset, i, si); |
719 val _ = Array.update (stateset, i + 1, sii); |
719 val _ = Array.update (stateset, i + 1, sii); |
720 in produce prods tags chains stateset (i + 1) cs c end)); |
720 in produce gram stateset (i + 1) cs c end)); |
721 |
721 |
722 |
722 |
723 fun get_trees states = map_filter (fn (_, _, [pt], _, _, _) => SOME pt | _ => NONE) states; |
723 fun get_trees states = map_filter (fn (_, _, [pt], _, _, _) => SOME pt | _ => NONE) states; |
724 |
724 |
725 fun earley prods tags chains startsymbol indata = |
725 fun earley (gram as Gram {tags, ...}) startsymbol indata = |
726 let |
726 let |
727 val start_tag = |
727 val start_tag = |
728 (case Symtab.lookup tags startsymbol of |
728 (case Symtab.lookup tags startsymbol of |
729 SOME tag => tag |
729 SOME tag => tag |
730 | NONE => error ("Inner syntax: bad grammar root symbol " ^ quote startsymbol)); |
730 | NONE => error ("Inner syntax: bad grammar root symbol " ^ quote startsymbol)); |
731 val S0 = [(~1, 0, [], [Nonterminal (start_tag, 0), Terminal Lexicon.eof], "", 0)]; |
731 val S0 = [(~1, 0, [], [Nonterminal (start_tag, 0), Terminal Lexicon.eof], "", 0)]; |
732 val s = length indata + 1; |
732 val s = length indata + 1; |
733 val Estate = Array.array (s, []); |
733 val Estate = Array.array (s, []); |
734 val _ = Array.update (Estate, 0, S0); |
734 val _ = Array.update (Estate, 0, S0); |
735 in |
735 in |
736 get_trees (produce prods tags chains Estate 0 indata Lexicon.eof) |
736 get_trees (produce gram Estate 0 indata Lexicon.eof) |
737 end; |
737 end; |
738 |
738 |
739 |
739 |
740 fun parse (Gram {tags, prods, chains, ...}) start toks = |
740 fun parse gram start toks = |
741 let |
741 let |
742 val end_pos = |
742 val end_pos = |
743 (case try List.last toks of |
743 (case try List.last toks of |
744 NONE => Position.none |
744 NONE => Position.none |
745 | SOME (Lexicon.Token (_, _, (_, end_pos))) => end_pos); |
745 | SOME (Lexicon.Token (_, _, (_, end_pos))) => end_pos); |
746 val r = |
746 val r = |
747 (case earley prods tags chains start (toks @ [Lexicon.mk_eof end_pos]) of |
747 (case earley gram start (toks @ [Lexicon.mk_eof end_pos]) of |
748 [] => raise Fail "Inner syntax: no parse trees" |
748 [] => raise Fail "Inner syntax: no parse trees" |
749 | pts => pts); |
749 | pts => pts); |
750 in r end; |
750 in r end; |
751 |
751 |
752 |
752 |