52 chain productions are not stored as normal productions |
52 chain productions are not stored as normal productions |
53 but instead as an entry in "chains"; |
53 but instead as an entry in "chains"; |
54 lambda productions are stored as normal productions |
54 lambda productions are stored as normal productions |
55 and also as an entry in "lambdas"*) |
55 and also as an entry in "lambdas"*) |
56 |
56 |
57 val UnknownStart = EndToken; (*productions for which no starting token is |
57 val UnknownStart = eof; (*productions for which no starting token is |
58 known yet are associated with this token*) |
58 known yet are associated with this token*) |
59 |
59 |
60 (* get all NTs that are connected with a list of NTs |
60 (* get all NTs that are connected with a list of NTs |
61 (used for expanding chain list)*) |
61 (used for expanding chain list)*) |
62 fun connected_with _ ([]: nt_tag list) relatives = relatives |
62 fun connected_with _ ([]: nt_tag list) relatives = relatives |
63 | connected_with chains (root :: roots) relatives = |
63 | connected_with chains (root :: roots) relatives = |
393 let |
393 let |
394 fun pretty_name name = [Pretty.str (name ^ " =")]; |
394 fun pretty_name name = [Pretty.str (name ^ " =")]; |
395 |
395 |
396 val nt_name = the o Inttab.lookup (Inttab.make (map swap (Symtab.dest tags))); |
396 val nt_name = the o Inttab.lookup (Inttab.make (map swap (Symtab.dest tags))); |
397 |
397 |
398 fun pretty_symb (Terminal (Token s)) = Pretty.quote (Pretty.str s) |
398 fun pretty_symb (Terminal (Token (Literal, s, _))) = Pretty.quote (Pretty.str s) |
399 | pretty_symb (Terminal tok) = Pretty.str (str_of_token tok) |
399 | pretty_symb (Terminal tok) = Pretty.str (str_of_token tok) |
400 | pretty_symb (Nonterminal (tag, p)) = Pretty.str (nt_name tag ^ "[" ^ string_of_int p ^ "]"); |
400 | pretty_symb (Nonterminal (tag, p)) = Pretty.str (nt_name tag ^ "[" ^ string_of_int p ^ "]"); |
401 |
401 |
402 fun pretty_const "" = [] |
402 fun pretty_const "" = [] |
403 | pretty_const c = [Pretty.str ("=> " ^ Library.quote c)]; |
403 | pretty_const c = [Pretty.str ("=> " ^ Library.quote c)]; |
453 (*Convert symbols to the form used by the parser; |
453 (*Convert symbols to the form used by the parser; |
454 delimiters and predefined terms are stored as terminals, |
454 delimiters and predefined terms are stored as terminals, |
455 nonterminals are converted to integer tags*) |
455 nonterminals are converted to integer tags*) |
456 fun symb_of [] nt_count tags result = (nt_count, tags, rev result) |
456 fun symb_of [] nt_count tags result = (nt_count, tags, rev result) |
457 | symb_of ((Delim s) :: ss) nt_count tags result = |
457 | symb_of ((Delim s) :: ss) nt_count tags result = |
458 symb_of ss nt_count tags ((Terminal (Token s)) :: result) |
458 symb_of ss nt_count tags (Terminal (Token (Literal, s, Position.no_range)) :: result) |
459 | symb_of ((Argument (s, p)) :: ss) nt_count tags result = |
459 | symb_of ((Argument (s, p)) :: ss) nt_count tags result = |
460 let |
460 let |
461 val (nt_count', tags', new_symb) = |
461 val (nt_count', tags', new_symb) = |
462 case predef_term s of |
462 case predef_term s of |
463 NONE => |
463 NONE => |
785 |
785 |
786 fun produce warned prods tags chains stateset i indata prev_token = |
786 fun produce warned prods tags chains stateset i indata prev_token = |
787 (case Array.sub (stateset, i) of |
787 (case Array.sub (stateset, i) of |
788 [] => |
788 [] => |
789 let |
789 let |
790 val toks = |
790 val toks = if is_eof prev_token then indata else prev_token :: indata; |
791 if prev_token = EndToken then indata |
791 val pos = pos_of_token prev_token; |
792 else prev_token :: indata; |
792 in |
793 |
793 if null toks then error ("Inner syntax error: unexpected end of input" ^ pos) |
794 val msg = |
794 else error (Pretty.string_of (Pretty.block |
795 if null toks then Pretty.str "Inner syntax error: unexpected end of input" |
795 (Pretty.str ("Inner syntax error" ^ pos ^ " at: ") :: |
796 else |
796 Pretty.breaks (map (Pretty.str o str_of_token) (#1 (split_last toks)))))) |
797 Pretty.block (Pretty.str "Inner syntax error at: \"" :: |
797 end |
798 Pretty.breaks (map (Pretty.str o str_of_token) (#1 (split_last toks))) @ |
|
799 [Pretty.str "\""]); |
|
800 in error (Pretty.string_of msg) end |
|
801 | s => |
798 | s => |
802 (case indata of |
799 (case indata of |
803 [] => Array.sub (stateset, i) |
800 [] => Array.sub (stateset, i) |
804 | c :: cs => |
801 | c :: cs => |
805 let val (si, sii) = PROCESSS warned prods chains stateset i c s; |
802 let val (si, sii) = PROCESSS warned prods chains stateset i c s; |
816 let |
813 let |
817 val start_tag = case Symtab.lookup tags startsymbol of |
814 val start_tag = case Symtab.lookup tags startsymbol of |
818 SOME tag => tag |
815 SOME tag => tag |
819 | NONE => error ("parse: Unknown startsymbol " ^ |
816 | NONE => error ("parse: Unknown startsymbol " ^ |
820 quote startsymbol); |
817 quote startsymbol); |
821 val S0 = [(~1, 0, [], [Nonterminal (start_tag, 0), Terminal EndToken], |
818 val S0 = [(~1, 0, [], [Nonterminal (start_tag, 0), Terminal eof], "", 0)]; |
822 "", 0)]; |
|
823 val s = length indata + 1; |
819 val s = length indata + 1; |
824 val Estate = Array.array (s, []); |
820 val Estate = Array.array (s, []); |
825 in |
821 in |
826 Array.update (Estate, 0, S0); |
822 Array.update (Estate, 0, S0); |
827 get_trees (produce (ref false) prods tags chains Estate 0 indata EndToken) |
823 get_trees (produce (ref false) prods tags chains Estate 0 indata eof) |
828 end; |
824 end; |
829 |
825 |
830 |
826 |
831 fun parse (Gram {tags, prods, chains, ...}) start toks = |
827 fun parse (Gram {tags, prods, chains, ...}) start toks = |
832 let val r = |
828 let |
833 (case earley prods tags chains start (toks @ [Lexicon.EndToken]) of |
829 val end_pos = |
834 [] => sys_error "parse: no parse trees" |
830 (case try List.last toks of |
835 | pts => pts); |
831 NONE => Position.none |
836 in r end; |
832 | SOME (Token (_, _, (_, end_pos))) => end_pos); |
|
833 val r = |
|
834 (case earley prods tags chains start (toks @ [mk_eof end_pos]) of |
|
835 [] => sys_error "parse: no parse trees" |
|
836 | pts => pts); |
|
837 in r end; |
837 |
838 |
838 |
839 |
839 fun guess_infix_lr (Gram gram) c = (*based on educated guess*) |
840 fun guess_infix_lr (Gram gram) c = (*based on educated guess*) |
840 let |
841 let |
841 fun freeze a = map (curry Array.sub a) (0 upto Array.length a - 1); |
842 fun freeze a = map (curry Array.sub a) (0 upto Array.length a - 1); |
842 val prods = (maps snd o maps snd o freeze o #prods) gram; |
843 val prods = maps snd (maps snd (freeze (#prods gram))); |
843 fun guess (SOME ([Nonterminal (_, k), Terminal (Token s), Nonterminal (_, l)], _, j)) = |
844 fun guess (SOME ([Nonterminal (_, k), Terminal (Token (Literal, s, _)), Nonterminal (_, l)], _, j)) = |
844 if k = j andalso l = j + 1 then SOME (s, true, false, j) |
845 if k = j andalso l = j + 1 then SOME (s, true, false, j) |
845 else if k = j + 1 then if l = j then SOME (s, false, true, j) |
846 else if k = j + 1 then if l = j then SOME (s, false, true, j) |
846 else if l = j + 1 then SOME (s, false, false, j) |
847 else if l = j + 1 then SOME (s, false, false, j) |
847 else NONE |
848 else NONE |
848 else NONE |
849 else NONE |