src/Pure/Syntax/parser.ML
changeset 67530 a7de81d847b0
parent 67518 30ecd3958bc3
child 67531 d19686205f86
equal deleted inserted replaced
67529:37db2dc5c022 67530:a7de81d847b0
    44   Gram of
    44   Gram of
    45    {nt_count: int,
    45    {nt_count: int,
    46     prod_count: int,
    46     prod_count: int,
    47     tags: nt_tag Symtab.table,
    47     tags: nt_tag Symtab.table,
    48     chains: unit Int_Graph.T,
    48     chains: unit Int_Graph.T,
    49     lambdas: nt_tag list,
    49     lambdas: Inttab.set,
    50     prods: nt_gram Vector.vector};
    50     prods: nt_gram Vector.vector};
    51     (*"tags" is used to map NT names (i.e. strings) to tags;
    51     (*"tags" is used to map NT names (i.e. strings) to tags;
    52      chain productions are not stored as normal productions
    52      chain productions are not stored as normal productions
    53      but instead as an entry in "chains": from -> to;
    53      but instead as an entry in "chains": from -> to;
    54      lambda productions are stored as normal productions
    54      lambda productions are stored as normal productions
    80                 |> Int_Graph.default_node (from, ())
    80                 |> Int_Graph.default_node (from, ())
    81                 |> Int_Graph.default_node (lhs, ())
    81                 |> Int_Graph.default_node (lhs, ())
    82                 |> Int_Graph.add_edge (from, lhs))
    82                 |> Int_Graph.add_edge (from, lhs))
    83           | _ => (NONE, false, chains |> Int_Graph.default_node (lhs, ())));
    83           | _ => (NONE, false, chains |> Int_Graph.default_node (lhs, ())));
    84 
    84 
    85         (*propagate new chain in lookahead and lambda lists;
    85         (*propagate new chain in lookahead and lambdas;
    86           added_starts is used later to associate existing
    86           added_starts is used later to associate existing
    87           productions with new starting tokens*)
    87           productions with new starting tokens*)
    88         val (added_starts, lambdas') =
    88         val (added_starts, lambdas') =
    89           if not new_chain then ([], lambdas)
    89           if not new_chain then ([], lambdas)
    90           else
    90           else
   104                     end;
   104                     end;
   105 
   105 
   106               val tos = Int_Graph.all_succs chains' [lhs];
   106               val tos = Int_Graph.all_succs chains' [lhs];
   107             in
   107             in
   108               (copy_lookahead tos [],
   108               (copy_lookahead tos [],
   109                 union (op =) (if member (op =) lambdas lhs then tos else []) lambdas)
   109                 lambdas |> Inttab.defined lambdas lhs ? fold Inttab.insert_set tos)
   110             end;
   110             end;
   111 
   111 
   112         (*test if new production can produce lambda
   112         (*test if new production can produce lambda
   113           (rhs must either be empty or only consist of lambda NTs)*)
   113           (rhs must either be empty or only consist of lambda NTs)*)
   114         val (new_lambda, lambdas') =
   114         val new_lambdas =
   115           if forall
   115           if forall
   116             (fn Nonterminal (id, _) => member (op =) lambdas' id
   116             (fn Nonterminal (id, _) => Inttab.defined lambdas' id
   117               | Terminal _ => false) rhs
   117               | Terminal _ => false) rhs
   118           then (true, union (op =) (Int_Graph.all_succs chains' [lhs]) lambdas')
   118           then SOME (filter_out (Inttab.defined lambdas') (Int_Graph.all_succs chains' [lhs]))
   119           else (false, lambdas');
   119           else NONE;
       
   120         val lambdas'' = fold Inttab.insert_set (these new_lambdas) lambdas';
   120 
   121 
   121         (*list optional terminal and all nonterminals on which the lookahead
   122         (*list optional terminal and all nonterminals on which the lookahead
   122           of a production depends*)
   123           of a production depends*)
   123         fun lookahead_dependency _ [] nts = (NONE, nts)
   124         fun lookahead_dependency _ [] nts = (NONE, nts)
   124           | lookahead_dependency _ (Terminal tk :: _) nts = (SOME tk, nts)
   125           | lookahead_dependency _ (Terminal tk :: _) nts = (SOME tk, nts)
   125           | lookahead_dependency lambdas (Nonterminal (nt, _) :: symbs) nts =
   126           | lookahead_dependency lambdas (Nonterminal (nt, _) :: symbs) nts =
   126               if member (op =) lambdas nt then
   127               if Inttab.defined lambdas nt then
   127                 lookahead_dependency lambdas symbs (nt :: nts)
   128                 lookahead_dependency lambdas symbs (nt :: nts)
   128               else (NONE, nt :: nts);
   129               else (NONE, nt :: nts);
   129 
   130 
   130         (*get all known starting tokens for a nonterminal*)
   131         (*get all known starting tokens for a nonterminal*)
   131         fun starts_for_nt nt = snd (fst (Array.sub (prods, nt)));
   132         fun starts_for_nt nt = snd (fst (Array.sub (prods, nt)));
   146                       | examine_prods ((p as (rhs, _, _)) :: ps) add_lambda
   147                       | examine_prods ((p as (rhs, _, _)) :: ps) add_lambda
   147                             nt_dependencies added_tks nt_prods =
   148                             nt_dependencies added_tks nt_prods =
   148                           let val (tk, nts) = lookahead_dependency lambdas rhs [] in
   149                           let val (tk, nts) = lookahead_dependency lambdas rhs [] in
   149                             if member (op =) nts l then       (*update production's lookahead*)
   150                             if member (op =) nts l then       (*update production's lookahead*)
   150                               let
   151                               let
   151                                 val new_lambda = is_none tk andalso subset (op =) (nts, lambdas);
   152                                 val new_lambda =
       
   153                                   is_none tk andalso forall (Inttab.defined lambdas) nts;
   152 
   154 
   153                                 val new_tks =
   155                                 val new_tks =
   154                                   (if is_some tk then [the tk] else [])
   156                                   (if is_some tk then [the tk] else [])
   155                                   |> fold (union_token o starts_for_nt) nts
   157                                   |> fold (union_token o starts_for_nt) nts
   156                                   |> subtract (op =) l_starts;
   158                                   |> subtract (op =) l_starts;
   200                             val (add_lambda, nt_dependencies, added_tks, nt_prods') =
   202                             val (add_lambda, nt_dependencies, added_tks, nt_prods') =
   201                               examine_prods tk_prods false [] [] nt_prods;
   203                               examine_prods tk_prods false [] [] nt_prods;
   202 
   204 
   203                             val added_nts = subtract (op =) old_nts nt_dependencies;
   205                             val added_nts = subtract (op =) old_nts nt_dependencies;
   204 
   206 
   205                             val added_lambdas' =
   207                             val added_lambdas' = added_lambdas |> add_lambda ? cons nt;
   206                               if add_lambda then nt :: added_lambdas
       
   207                               else added_lambdas;
       
   208                             val _ =
   208                             val _ =
   209                               Array.update
   209                               Array.update
   210                                 (prods, nt, ((added_nts @ old_nts, old_tks @ added_tks), nt_prods'));
   210                                 (prods, nt, ((added_nts @ old_nts, old_tks @ added_tks), nt_prods'));
   211                               (*N.B. that because the tks component
   211                               (*N.B. that because the tks component
   212                                 is used to access existing
   212                                 is used to access existing
   218                             else
   218                             else
   219                               process_nts nts added_lambdas' ((nt, added_tks) :: added_starts)
   219                               process_nts nts added_lambdas' ((nt, added_tks) :: added_starts)
   220                           end;
   220                           end;
   221 
   221 
   222                     val (added_lambdas, added_starts') = process_nts dependent [] added_starts;
   222                     val (added_lambdas, added_starts') = process_nts dependent [] added_starts;
   223                     val added_lambdas' = subtract (op =) lambdas added_lambdas;
   223                     val added_lambdas' = filter_out (Inttab.defined lambdas) added_lambdas;
   224                   in
   224                   in
   225                     propagate_lambda (ls @ added_lambdas') added_starts' (added_lambdas' @ lambdas)
   225                     propagate_lambda (ls @ added_lambdas') added_starts'
       
   226                       (fold Inttab.insert_set added_lambdas' lambdas)
   226                   end;
   227                   end;
   227           in propagate_lambda (subtract (op =) lambdas lambdas') added_starts lambdas' end;
   228           in
       
   229             propagate_lambda
       
   230               (Inttab.fold (fn (l, ()) => not (Inttab.defined lambdas l) ? cons l) lambdas'' [])
       
   231               added_starts lambdas''
       
   232           end;
   228 
   233 
   229         (*insert production into grammar*)
   234         (*insert production into grammar*)
   230         val (added_starts', _) =
   235         val (added_starts', _) =
   231           if is_some chain
   236           if is_some chain
   232           then (added_starts', prod_count)  (*don't store chain production*)
   237           then (added_starts', prod_count)  (*don't store chain production*)
   239               val start_tks =
   244               val start_tks =
   240                 (if is_some start_tk then [the start_tk] else [])
   245                 (if is_some start_tk then [the start_tk] else [])
   241                 |> fold (union_token o starts_for_nt) start_nts;
   246                 |> fold (union_token o starts_for_nt) start_nts;
   242 
   247 
   243               val opt_starts =
   248               val opt_starts =
   244                (if new_lambda then [NONE]
   249                (if is_some new_lambdas then [NONE]
   245                 else if null start_tks then [SOME unknown_start]
   250                 else if null start_tks then [SOME unknown_start]
   246                 else []) @ map SOME start_tks;
   251                 else []) @ map SOME start_tks;
   247 
   252 
   248               (*add lhs NT to list of dependent NTs in lookahead*)
   253               (*add lhs NT to list of dependent NTs in lookahead*)
   249               fun add_nts [] = ()
   254               fun add_nts [] = ()
   422   Gram
   427   Gram
   423    {nt_count = 0,
   428    {nt_count = 0,
   424     prod_count = 0,
   429     prod_count = 0,
   425     tags = Symtab.empty,
   430     tags = Symtab.empty,
   426     chains = Int_Graph.empty,
   431     chains = Int_Graph.empty,
   427     lambdas = [],
   432     lambdas = Inttab.empty,
   428     prods = Vector.fromList [(([], []), [])]};
   433     prods = Vector.fromList [(([], []), [])]};
   429 
   434 
   430 
   435 
   431 (*Add productions to a grammar*)
   436 (*Add productions to a grammar*)
   432 fun extend_gram [] gram = gram
   437 fun extend_gram [] gram = gram