src/Pure/Syntax/parser.ML
changeset 67517 add9a9f6a290
parent 67516 656720e8f443
child 67518 30ecd3958bc3
equal deleted inserted replaced
67516:656720e8f443 67517:add9a9f6a290
    43 datatype gram =
    43 datatype gram =
    44   Gram of
    44   Gram of
    45    {nt_count: int,
    45    {nt_count: int,
    46     prod_count: int,
    46     prod_count: int,
    47     tags: nt_tag Symtab.table,
    47     tags: nt_tag Symtab.table,
    48     chains: (nt_tag * nt_tag list) list,  (*[(to, [from])]*)
    48     chains: unit Int_Graph.T,
    49     lambdas: nt_tag list,
    49     lambdas: nt_tag list,
    50     prods: nt_gram Vector.vector};
    50     prods: nt_gram Vector.vector};
    51     (*"tags" is used to map NT names (i.e. strings) to tags;
    51     (*"tags" is used to map NT names (i.e. strings) to tags;
    52      chain productions are not stored as normal productions
    52      chain productions are not stored as normal productions
    53      but instead as an entry in "chains";
    53      but instead as an entry in "chains": from -> to;
    54      lambda productions are stored as normal productions
    54      lambda productions are stored as normal productions
    55      and also as an entry in "lambdas"*)
    55      and also as an entry in "lambdas"*)
    56 
    56 
    57 val union_token = union Lexicon.matching_tokens;
    57 val union_token = union Lexicon.matching_tokens;
    58 val subtract_token = subtract Lexicon.matching_tokens;
    58 val subtract_token = subtract Lexicon.matching_tokens;
    62 val unknown_start = Lexicon.eof;
    62 val unknown_start = Lexicon.eof;
    63 
    63 
    64 fun get_start (tok :: _) = tok
    64 fun get_start (tok :: _) = tok
    65   | get_start [] = unknown_start;
    65   | get_start [] = unknown_start;
    66 
    66 
    67 (*get all NTs that are connected with a list of NTs*)
       
    68 fun connected_with _ ([]: nt_tag list) relatives = relatives
       
    69   | connected_with chains (root :: roots) relatives =
       
    70       let val branches = subtract (op =) relatives (these (AList.lookup (op =) chains root));
       
    71       in connected_with chains (branches @ roots) (branches @ relatives) end;
       
    72 
       
    73 (*convert productions to grammar;
    67 (*convert productions to grammar;
    74   N.B. that the chains parameter has the form [(from, [to])];
       
    75   prod_count is of type "int option" and is only updated if it is <> NONE*)
    68   prod_count is of type "int option" and is only updated if it is <> NONE*)
    76 fun add_prods _ chains lambdas prod_count [] = (chains, lambdas, prod_count)
    69 fun add_prods _ chains lambdas prod_count [] = (chains, lambdas, prod_count)
    77   | add_prods prods chains lambdas prod_count ((lhs, new_prod as (rhs, _, pri)) :: ps) =
    70   | add_prods prods chains lambdas prod_count ((lhs, new_prod as (rhs, _, pri)) :: ps) =
    78       let
    71       let
    79         val chain_from =
    72         (*store chain if it does not already exist*)
       
    73         val (chain, new_chain, chains') =
    80           (case (pri, rhs) of
    74           (case (pri, rhs) of
    81             (~1, [Nonterminal (id, ~1)]) => SOME id
    75             (~1, [Nonterminal (from, ~1)]) =>
    82           | _ => NONE);
    76               if Int_Graph.is_edge chains (from, lhs)
    83 
    77               then (SOME from, false, chains)
    84         (*store chain if it does not already exist*)
    78               else (SOME from, true,
    85         val (new_chain, chains') =
    79                 chains
    86           (case chain_from of
    80                 |> Int_Graph.default_node (from, ())
    87             NONE => (NONE, chains)
    81                 |> Int_Graph.default_node (lhs, ())
    88           | SOME from =>
    82                 |> Int_Graph.add_edge (from, lhs))
    89               let val old_tos = these (AList.lookup (op =) chains from) in
    83           | _ => (NONE, false, chains |> Int_Graph.default_node (lhs, ())));
    90                 if member (op =) old_tos lhs then (NONE, chains)
       
    91                 else (SOME from, AList.update (op =) (from, insert (op =) lhs old_tos) chains)
       
    92               end);
       
    93 
    84 
    94         (*propagate new chain in lookahead and lambda lists;
    85         (*propagate new chain in lookahead and lambda lists;
    95           added_starts is used later to associate existing
    86           added_starts is used later to associate existing
    96           productions with new starting tokens*)
    87           productions with new starting tokens*)
    97         val (added_starts, lambdas') =
    88         val (added_starts, lambdas') =
    98           if is_none new_chain then ([], lambdas)
    89           if not new_chain then ([], lambdas)
    99           else
    90           else
   100             let (*lookahead of chain's source*)
    91             let (*lookahead of chain's source*)
   101               val ((_, from_tks), _) = Array.sub (prods, the new_chain);
    92               val ((_, from_tks), _) = Array.sub (prods, the chain);
   102 
    93 
   103               (*copy from's lookahead to chain's destinations*)
    94               (*copy from's lookahead to chain's destinations*)
   104               fun copy_lookahead [] added = added
    95               fun copy_lookahead [] added = added
   105                 | copy_lookahead (to :: tos) added =
    96                 | copy_lookahead (to :: tos) added =
   106                     let
    97                     let
   110                       val _ = Array.update (prods, to, ((to_nts, to_tks @ new_tks), ps));
   101                       val _ = Array.update (prods, to, ((to_nts, to_tks @ new_tks), ps));
   111                     in
   102                     in
   112                       copy_lookahead tos (if null new_tks then added else (to, new_tks) :: added)
   103                       copy_lookahead tos (if null new_tks then added else (to, new_tks) :: added)
   113                     end;
   104                     end;
   114 
   105 
   115               val tos = connected_with chains' [lhs] [lhs];
   106               val tos = Int_Graph.all_succs chains' [lhs];
   116             in
   107             in
   117               (copy_lookahead tos [],
   108               (copy_lookahead tos [],
   118                 union (op =) (if member (op =) lambdas lhs then tos else []) lambdas)
   109                 union (op =) (if member (op =) lambdas lhs then tos else []) lambdas)
   119             end;
   110             end;
   120 
   111 
   122           (rhs must either be empty or only consist of lambda NTs)*)
   113           (rhs must either be empty or only consist of lambda NTs)*)
   123         val (new_lambda, lambdas') =
   114         val (new_lambda, lambdas') =
   124           if forall
   115           if forall
   125             (fn Nonterminal (id, _) => member (op =) lambdas' id
   116             (fn Nonterminal (id, _) => member (op =) lambdas' id
   126               | Terminal _ => false) rhs
   117               | Terminal _ => false) rhs
   127           then (true, union (op =) (connected_with chains' [lhs] [lhs]) lambdas')
   118           then (true, union (op =) (Int_Graph.all_succs chains' [lhs]) lambdas')
   128           else (false, lambdas');
   119           else (false, lambdas');
   129 
   120 
   130         (*list optional terminal and all nonterminals on which the lookahead
   121         (*list optional terminal and all nonterminals on which the lookahead
   131           of a production depends*)
   122           of a production depends*)
   132         fun lookahead_dependency _ [] nts = (NONE, nts)
   123         fun lookahead_dependency _ [] nts = (NONE, nts)
   235                   end;
   226                   end;
   236           in propagate_lambda (subtract (op =) lambdas lambdas') added_starts lambdas' end;
   227           in propagate_lambda (subtract (op =) lambdas lambdas') added_starts lambdas' end;
   237 
   228 
   238         (*insert production into grammar*)
   229         (*insert production into grammar*)
   239         val (added_starts', _) =
   230         val (added_starts', _) =
   240           if is_some chain_from
   231           if is_some chain
   241           then (added_starts', prod_count)  (*don't store chain production*)
   232           then (added_starts', prod_count)  (*don't store chain production*)
   242           else
   233           else
   243             let
   234             let
   244               (*lookahead tokens of new production and on which
   235               (*lookahead tokens of new production and on which
   245                 NTs lookahead depends*)
   236                 NTs lookahead depends*)
   306                       add_tks nts
   297                       add_tks nts
   307                         (if null new_tks then added else (nt, new_tks) :: added) prod_count'
   298                         (if null new_tks then added else (nt, new_tks) :: added) prod_count'
   308                     end;
   299                     end;
   309               val _ = add_nts start_nts;
   300               val _ = add_nts start_nts;
   310             in
   301             in
   311               add_tks (connected_with chains' [lhs] [lhs]) [] prod_count
   302               add_tks (Int_Graph.all_succs chains' [lhs]) [] prod_count
   312             end;
   303             end;
   313 
   304 
   314         (*associate productions with new lookaheads*)
   305         (*associate productions with new lookaheads*)
   315         val _ =
   306         val _ =
   316           let
   307           let
   415 
   406 
   416     fun prod_of_chain from = ([Nonterminal (from, ~1)], "", ~1);
   407     fun prod_of_chain from = ([Nonterminal (from, ~1)], "", ~1);
   417 
   408 
   418     fun pretty_prod (name, tag) =
   409     fun pretty_prod (name, tag) =
   419       (fold (union (op =) o #2) (#2 (Vector.sub (prods, tag))) [] @
   410       (fold (union (op =) o #2) (#2 (Vector.sub (prods, tag))) [] @
   420         map prod_of_chain (these (AList.lookup (op =) chains tag)))
   411         map prod_of_chain (Int_Graph.immediate_preds chains tag))
   421       |> map (fn (symbs, const, p) =>
   412       |> map (fn (symbs, const, p) =>
   422           Pretty.block (Pretty.breaks
   413           Pretty.block (Pretty.breaks
   423             (Pretty.str (name ^ print_pri p ^ " =") :: map pretty_symb symbs @ pretty_const const)));
   414             (Pretty.str (name ^ print_pri p ^ " =") :: map pretty_symb symbs @ pretty_const const)));
   424   in maps pretty_prod (sort_by #1 (Symtab.dest tags)) end;
   415   in maps pretty_prod (sort_by #1 (Symtab.dest tags)) end;
   425 
   416 
   429 
   420 
   430 val empty_gram =
   421 val empty_gram =
   431   Gram
   422   Gram
   432    {nt_count = 0,
   423    {nt_count = 0,
   433     prod_count = 0,
   424     prod_count = 0,
   434     tags = Symtab.empty, chains = [],
   425     tags = Symtab.empty,
       
   426     chains = Int_Graph.empty,
   435     lambdas = [],
   427     lambdas = [],
   436     prods = Vector.fromList [(([], []), [])]};
   428     prods = Vector.fromList [(([], []), [])]};
   437 
       
   438 
       
   439 (*Invert list of chain productions*)
       
   440 fun inverse_chains [] result = result
       
   441   | inverse_chains ((root, branches: nt_tag list) :: cs) result =
       
   442       let
       
   443         fun add ([]: nt_tag list) result = result
       
   444           | add (id :: ids) result =
       
   445               let val old = these (AList.lookup (op =) result id);
       
   446               in add ids (AList.update (op =) (id, root :: old) result) end;
       
   447       in inverse_chains cs (add branches result) end;
       
   448 
   429 
   449 
   430 
   450 (*Add productions to a grammar*)
   431 (*Add productions to a grammar*)
   451 fun extend_gram [] gram = gram
   432 fun extend_gram [] gram = gram
   452   | extend_gram xprods (Gram {nt_count, prod_count, tags, chains, lambdas, prods}) =
   433   | extend_gram xprods (Gram {nt_count, prod_count, tags, chains, lambdas, prods}) =
   499             fun get_prod i =
   480             fun get_prod i =
   500               if i < nt_count then Vector.sub (prods, i)
   481               if i < nt_count then Vector.sub (prods, i)
   501               else (([], []), []);
   482               else (([], []), []);
   502           in Array.tabulate (nt_count', get_prod) end;
   483           in Array.tabulate (nt_count', get_prod) end;
   503 
   484 
   504         val fromto_chains = inverse_chains chains [];
       
   505 
       
   506         (*Add new productions to old ones*)
   485         (*Add new productions to old ones*)
   507         val (fromto_chains', lambdas', _) =
   486         val (chains', lambdas', _) = add_prods prods' chains lambdas NONE xprods';
   508           add_prods prods' fromto_chains lambdas NONE xprods';
       
   509 
       
   510         val chains' = inverse_chains fromto_chains' [];
       
   511       in
   487       in
   512         Gram
   488         Gram
   513          {nt_count = nt_count',
   489          {nt_count = nt_count',
   514           prod_count = prod_count',
   490           prod_count = prod_count',
   515           tags = tags',
   491           tags = tags',
   637 
   613 
   638     fun get_prods [] result = result
   614     fun get_prods [] result = result
   639       | get_prods (nt :: nts) result =
   615       | get_prods (nt :: nts) result =
   640           let val nt_prods = snd (Vector.sub (prods, nt));
   616           let val nt_prods = snd (Vector.sub (prods, nt));
   641           in get_prods nts (token_assoc (nt_prods, tk) @ result) end;
   617           in get_prods nts (token_assoc (nt_prods, tk) @ result) end;
   642   in get_prods (connected_with chains nts nts) [] end;
   618   in get_prods (Int_Graph.all_preds chains nts) [] end;
   643 
   619 
   644 
   620 
   645 fun PROCESSS gram Estate i c states =
   621 fun PROCESSS gram Estate i c states =
   646   let
   622   let
   647     fun all_prods_for nt = prods_for gram true c [nt];
   623     fun all_prods_for nt = prods_for gram true c [nt];