src/Pure/Syntax/parser.ML
changeset 67515 fb87a0e9af21
parent 67513 731b1ad6759a
child 67516 656720e8f443
equal deleted inserted replaced
67514:6877af8bc18d 67515:fb87a0e9af21
   620 val branching_level =
   620 val branching_level =
   621   Config.int (Config.declare ("syntax_branching_level", \<^here>) (fn _ => Config.Int 600));
   621   Config.int (Config.declare ("syntax_branching_level", \<^here>) (fn _ => Config.Int 600));
   622 
   622 
   623 (*get all productions of a NT and NTs chained to it which can
   623 (*get all productions of a NT and NTs chained to it which can
   624   be started by specified token*)
   624   be started by specified token*)
   625 fun prods_for prods chains include_none tk nts =
   625 fun prods_for (Gram {prods, chains, ...}) include_none tk nts =
   626   let
   626   let
   627     fun token_assoc (list, key) =
   627     fun token_assoc (list, key) =
   628       let
   628       let
   629         fun assoc [] result = result
   629         fun assoc [] result = result
   630           | assoc ((keyi, pi) :: pairs) result =
   630           | assoc ((keyi, pi) :: pairs) result =
   639           let val nt_prods = snd (Vector.sub (prods, nt));
   639           let val nt_prods = snd (Vector.sub (prods, nt));
   640           in get_prods nts (token_assoc (nt_prods, tk) @ result) end;
   640           in get_prods nts (token_assoc (nt_prods, tk) @ result) end;
   641   in get_prods (connected_with chains nts nts) [] end;
   641   in get_prods (connected_with chains nts nts) [] end;
   642 
   642 
   643 
   643 
   644 fun PROCESSS prods chains Estate i c states =
   644 fun PROCESSS gram Estate i c states =
   645   let
   645   let
   646     fun all_prods_for nt = prods_for prods chains true c [nt];
   646     fun all_prods_for nt = prods_for gram true c [nt];
   647 
   647 
   648     fun processS _ [] (Si, Sii) = (Si, Sii)
   648     fun processS _ [] (Si, Sii) = (Si, Sii)
   649       | processS used (S :: States) (Si, Sii) =
   649       | processS used (S :: States) (Si, Sii) =
   650           (case S of
   650           (case S of
   651             (_, _, _, Nonterminal (nt, min_prec) :: _, _, _) =>
   651             (_, _, _, Nonterminal (nt, min_prec) :: _, _, _) =>
   687                   in processS used (map (movedot_nonterm tt) Slist @ States) (S :: Si, Sii) end
   687                   in processS used (map (movedot_nonterm tt) Slist @ States) (S :: Si, Sii) end
   688               end)
   688               end)
   689   in processS Inttab.empty states ([], []) end;
   689   in processS Inttab.empty states ([], []) end;
   690 
   690 
   691 
   691 
   692 fun produce prods tags chains stateset i indata prev_token =
   692 fun produce gram stateset i indata prev_token =
   693   (case Array.sub (stateset, i) of
   693   (case Array.sub (stateset, i) of
   694     [] =>
   694     [] =>
   695       let
   695       let
   696         val toks = if Lexicon.is_eof prev_token then indata else prev_token :: indata;
   696         val toks = if Lexicon.is_eof prev_token then indata else prev_token :: indata;
   697         val pos = Position.here (Lexicon.pos_of_token prev_token);
   697         val pos = Position.here (Lexicon.pos_of_token prev_token);
   712   | s =>
   712   | s =>
   713       (case indata of
   713       (case indata of
   714         [] => s
   714         [] => s
   715       | c :: cs =>
   715       | c :: cs =>
   716           let
   716           let
   717             val (si, sii) = PROCESSS prods chains stateset i c s;
   717             val (si, sii) = PROCESSS gram stateset i c s;
   718             val _ = Array.update (stateset, i, si);
   718             val _ = Array.update (stateset, i, si);
   719             val _ = Array.update (stateset, i + 1, sii);
   719             val _ = Array.update (stateset, i + 1, sii);
   720           in produce prods tags chains stateset (i + 1) cs c end));
   720           in produce gram stateset (i + 1) cs c end));
   721 
   721 
   722 
   722 
   723 fun get_trees states = map_filter (fn (_, _, [pt], _, _, _) => SOME pt | _ => NONE) states;
   723 fun get_trees states = map_filter (fn (_, _, [pt], _, _, _) => SOME pt | _ => NONE) states;
   724 
   724 
   725 fun earley prods tags chains startsymbol indata =
   725 fun earley (gram as Gram {tags, ...}) startsymbol indata =
   726   let
   726   let
   727     val start_tag =
   727     val start_tag =
   728       (case Symtab.lookup tags startsymbol of
   728       (case Symtab.lookup tags startsymbol of
   729         SOME tag => tag
   729         SOME tag => tag
   730       | NONE => error ("Inner syntax: bad grammar root symbol " ^ quote startsymbol));
   730       | NONE => error ("Inner syntax: bad grammar root symbol " ^ quote startsymbol));
   731     val S0 = [(~1, 0, [], [Nonterminal (start_tag, 0), Terminal Lexicon.eof], "", 0)];
   731     val S0 = [(~1, 0, [], [Nonterminal (start_tag, 0), Terminal Lexicon.eof], "", 0)];
   732     val s = length indata + 1;
   732     val s = length indata + 1;
   733     val Estate = Array.array (s, []);
   733     val Estate = Array.array (s, []);
   734     val _ = Array.update (Estate, 0, S0);
   734     val _ = Array.update (Estate, 0, S0);
   735   in
   735   in
   736     get_trees (produce prods tags chains Estate 0 indata Lexicon.eof)
   736     get_trees (produce gram Estate 0 indata Lexicon.eof)
   737   end;
   737   end;
   738 
   738 
   739 
   739 
   740 fun parse (Gram {tags, prods, chains, ...}) start toks =
   740 fun parse gram start toks =
   741   let
   741   let
   742     val end_pos =
   742     val end_pos =
   743       (case try List.last toks of
   743       (case try List.last toks of
   744         NONE => Position.none
   744         NONE => Position.none
   745       | SOME (Lexicon.Token (_, _, (_, end_pos))) => end_pos);
   745       | SOME (Lexicon.Token (_, _, (_, end_pos))) => end_pos);
   746     val r =
   746     val r =
   747       (case earley prods tags chains start (toks @ [Lexicon.mk_eof end_pos]) of
   747       (case earley gram start (toks @ [Lexicon.mk_eof end_pos]) of
   748         [] => raise Fail "Inner syntax: no parse trees"
   748         [] => raise Fail "Inner syntax: no parse trees"
   749       | pts => pts);
   749       | pts => pts);
   750   in r end;
   750   in r end;
   751 
   751 
   752 
   752