src/Pure/Syntax/parser.ML
changeset 67533 f253e5eaf995
parent 67532 71b36ff8f94d
child 67534 ca8fec4a00fa
equal deleted inserted replaced
67532:71b36ff8f94d 67533:f253e5eaf995
    26 
    26 
    27 (** datatype gram **)
    27 (** datatype gram **)
    28 
    28 
    29 (*production for the NTs are stored in a vector
    29 (*production for the NTs are stored in a vector
    30   so we can identify NTs by their index*)
    30   so we can identify NTs by their index*)
    31 type nt_tag = int;
    31 type nt = int;
       
    32 
       
    33 type nts = Inttab.set;
       
    34 val nts_empty: nts = Inttab.empty;
       
    35 val nts_merge: nts * nts -> nts = Inttab.merge (K true);
       
    36 fun nts_insert nt : nts -> nts = Inttab.insert_set nt;
       
    37 fun nts_member (nts: nts) = Inttab.defined nts;
       
    38 fun nts_fold f (nts: nts) = Inttab.fold (f o #1) nts;
    32 
    39 
    33 datatype symb =
    40 datatype symb =
    34   Terminal of Lexicon.token
    41   Terminal of Lexicon.token
    35 | Nonterminal of nt_tag * int;  (*(tag, precedence)*)
    42 | Nonterminal of nt * int;  (*(tag, precedence)*)
    36 
    43 
    37 type nt_gram =
    44 type nt_gram =
    38   ((nt_tag list * Lexicon.token list) *
    45   ((nts * Lexicon.token list) *
    39     (Lexicon.token option * (symb list * string * int) list) list);
    46     (Lexicon.token option * (symb list * string * int) list) list);
    40   (*(([dependent_nts], [start_tokens]), [(start_token, [(rhs, name, prio)])])*)
    47   (*(([dependent_nts], [start_tokens]), [(start_token, [(rhs, name, prio)])])*)
    41   (*depent_nts is a list of all NTs whose lookahead depends on this NT's lookahead*)
    48   (*depent_nts is a list of all NTs whose lookahead depends on this NT's lookahead*)
    42 
    49 
    43 type tags = nt_tag Symtab.table;
    50 val nt_gram_empty: nt_gram = ((nts_empty, []), []);
       
    51 
       
    52 type tags = nt Symtab.table;
    44 val tags_empty: tags = Symtab.empty;
    53 val tags_empty: tags = Symtab.empty;
    45 fun tags_content (tags: tags) = sort_by #1 (Symtab.dest tags);
    54 fun tags_content (tags: tags) = sort_by #1 (Symtab.dest tags);
    46 fun tags_lookup (tags: tags) = Symtab.lookup tags;
    55 fun tags_lookup (tags: tags) = Symtab.lookup tags;
    47 fun tags_insert tag (tags: tags) = Symtab.update_new tag tags;
    56 fun tags_insert tag (tags: tags) = Symtab.update_new tag tags;
    48 fun tags_name (tags: tags) = the o Inttab.lookup (Inttab.make (map swap (Symtab.dest tags)));
    57 fun tags_name (tags: tags) = the o Inttab.lookup (Inttab.make (map swap (Symtab.dest tags)));
    55 fun chains_member (chains: chains) = Int_Graph.is_edge chains;
    64 fun chains_member (chains: chains) = Int_Graph.is_edge chains;
    56 fun chains_declare nt : chains -> chains = Int_Graph.default_node (nt, ());
    65 fun chains_declare nt : chains -> chains = Int_Graph.default_node (nt, ());
    57 fun chains_insert (from, to) =
    66 fun chains_insert (from, to) =
    58   chains_declare from #> chains_declare to #> Int_Graph.add_edge (from, to);
    67   chains_declare from #> chains_declare to #> Int_Graph.add_edge (from, to);
    59 
    68 
    60 type lambdas = Inttab.set;
       
    61 val lambdas_empty: lambdas = Inttab.empty;
       
    62 fun lambdas_member (lambdas: lambdas) = Inttab.defined lambdas;
       
    63 fun lambdas_insert nt : lambdas -> lambdas = Inttab.insert_set nt;
       
    64 fun lambdas_fold f (lambdas: lambdas) = Inttab.fold (f o #1) lambdas;
       
    65 
       
    66 datatype gram =
    69 datatype gram =
    67   Gram of
    70   Gram of
    68    {nt_count: int,
    71    {nt_count: int,
    69     prod_count: int,
    72     prod_count: int,
    70     tags: tags,
    73     tags: tags,
    71     chains: chains,
    74     chains: chains,
    72     lambdas: lambdas,
    75     lambdas: nts,
    73     prods: nt_gram Vector.vector};
    76     prods: nt_gram Vector.vector};
    74     (*"tags" is used to map NT names (i.e. strings) to tags;
    77     (*"tags" is used to map NT names (i.e. strings) to tags;
    75      chain productions are not stored as normal productions
    78      chain productions are not stored as normal productions
    76      but instead as an entry in "chains": from -> to;
    79      but instead as an entry in "chains": from -> to;
    77      lambda productions are stored as normal productions
    80      lambda productions are stored as normal productions
   128                     end;
   131                     end;
   129 
   132 
   130               val tos = chains_all_succs chains' [lhs];
   133               val tos = chains_all_succs chains' [lhs];
   131             in
   134             in
   132               (copy_lookahead tos [],
   135               (copy_lookahead tos [],
   133                 lambdas |> lambdas_member lambdas lhs ? fold lambdas_insert tos)
   136                 lambdas |> nts_member lambdas lhs ? fold nts_insert tos)
   134             end;
   137             end;
   135 
   138 
   136         (*test if new production can produce lambda
   139         (*test if new production can produce lambda
   137           (rhs must either be empty or only consist of lambda NTs)*)
   140           (rhs must either be empty or only consist of lambda NTs)*)
   138         val new_lambdas =
   141         val new_lambdas =
   139           if forall
   142           if forall
   140             (fn Nonterminal (id, _) => lambdas_member lambdas' id
   143             (fn Nonterminal (id, _) => nts_member lambdas' id
   141               | Terminal _ => false) rhs
   144               | Terminal _ => false) rhs
   142           then SOME (filter_out (lambdas_member lambdas') (chains_all_succs chains' [lhs]))
   145           then SOME (filter_out (nts_member lambdas') (chains_all_succs chains' [lhs]))
   143           else NONE;
   146           else NONE;
   144         val lambdas'' = fold lambdas_insert (these new_lambdas) lambdas';
   147         val lambdas'' = fold nts_insert (these new_lambdas) lambdas';
   145 
   148 
   146         (*list optional terminal and all nonterminals on which the lookahead
   149         (*list optional terminal and all nonterminals on which the lookahead
   147           of a production depends*)
   150           of a production depends*)
   148         fun lookahead_dependency _ [] nts = (NONE, nts)
   151         fun lookahead_dependency _ [] nts = (NONE, nts)
   149           | lookahead_dependency _ (Terminal tk :: _) nts = (SOME tk, nts)
   152           | lookahead_dependency _ (Terminal tk :: _) nts = (SOME tk, nts)
   150           | lookahead_dependency lambdas (Nonterminal (nt, _) :: symbs) nts =
   153           | lookahead_dependency lambdas (Nonterminal (nt, _) :: symbs) nts =
   151               if lambdas_member lambdas nt then
   154               if nts_member lambdas nt then
   152                 lookahead_dependency lambdas symbs (nt :: nts)
   155                 lookahead_dependency lambdas symbs (nt :: nts)
   153               else (NONE, nt :: nts);
   156               else (NONE, nt :: nts);
   154 
   157 
   155         (*get all known starting tokens for a nonterminal*)
   158         (*get all known starting tokens for a nonterminal*)
   156         fun starts_for_nt nt = snd (fst (Array.sub (prods, nt)));
   159         fun starts_for_nt nt = snd (fst (Array.sub (prods, nt)));
   172                             nt_dependencies added_tks nt_prods =
   175                             nt_dependencies added_tks nt_prods =
   173                           let val (tk, nts) = lookahead_dependency lambdas rhs [] in
   176                           let val (tk, nts) = lookahead_dependency lambdas rhs [] in
   174                             if member (op =) nts l then       (*update production's lookahead*)
   177                             if member (op =) nts l then       (*update production's lookahead*)
   175                               let
   178                               let
   176                                 val new_lambda =
   179                                 val new_lambda =
   177                                   is_none tk andalso forall (lambdas_member lambdas) nts;
   180                                   is_none tk andalso forall (nts_member lambdas) nts;
   178 
   181 
   179                                 val new_tks =
   182                                 val new_tks =
   180                                   (if is_some tk then [the tk] else [])
   183                                   (if is_some tk then [the tk] else [])
   181                                   |> fold (tokens_union o starts_for_nt) nts
   184                                   |> fold (tokens_union o starts_for_nt) nts
   182                                   |> subtract (op =) l_starts;
   185                                   |> subtract (op =) l_starts;
   183 
   186 
   184                                 val added_tks' = tokens_union added_tks new_tks;
   187                                 val added_tks' = tokens_union added_tks new_tks;
   185 
   188 
   186                                 val nt_dependencies' = union (op =) nts nt_dependencies;
   189                                 val nt_dependencies' = fold nts_insert nts nt_dependencies;
   187 
   190 
   188                                 (*associate production with new starting tokens*)
   191                                 (*associate production with new starting tokens*)
   189                                 fun copy ([]: Lexicon.token option list) nt_prods = nt_prods
   192                                 fun copy ([]: Lexicon.token option list) nt_prods = nt_prods
   190                                   | copy (tk :: tks) nt_prods =
   193                                   | copy (tk :: tks) nt_prods =
   191                                       let
   194                                       let
   209                             else (*skip production*)
   212                             else (*skip production*)
   210                               examine_prods ps add_lambda nt_dependencies added_tks nt_prods
   213                               examine_prods ps add_lambda nt_dependencies added_tks nt_prods
   211                           end;
   214                           end;
   212 
   215 
   213                     (*check each NT whose lookahead depends on new lambda NT*)
   216                     (*check each NT whose lookahead depends on new lambda NT*)
   214                     fun process_nts [] added_lambdas added_starts =
   217                     fun process_nts nt (added_lambdas, added_starts) =
   215                           (added_lambdas, added_starts)
   218                       let
   216                       | process_nts (nt :: nts) added_lambdas added_starts =
   219                         val ((old_nts, old_tks), nt_prods) = Array.sub (prods, nt);
   217                           let
   220 
   218                             val ((old_nts, old_tks), nt_prods) = Array.sub (prods, nt);
   221                         (*existing productions whose lookahead may depend on l*)
   219 
   222                         val tk_prods =
   220                             (*existing productions whose lookahead may depend on l*)
   223                           these (AList.lookup (op =) nt_prods (SOME (get_start l_starts)));
   221                             val tk_prods =
   224 
   222                               these (AList.lookup (op =) nt_prods (SOME (get_start l_starts)));
   225                         (*add_lambda is true if an existing production of the nt
   223 
   226                           produces lambda due to the new lambda NT l*)
   224                             (*add_lambda is true if an existing production of the nt
   227                         val (add_lambda, nt_dependencies, added_tks, nt_prods') =
   225                               produces lambda due to the new lambda NT l*)
   228                           examine_prods tk_prods false nts_empty [] nt_prods;
   226                             val (add_lambda, nt_dependencies, added_tks, nt_prods') =
   229 
   227                               examine_prods tk_prods false [] [] nt_prods;
   230                         val new_nts = nts_merge (old_nts, nt_dependencies);
   228 
   231 
   229                             val added_nts = subtract (op =) old_nts nt_dependencies;
   232                         val added_lambdas' = added_lambdas |> add_lambda ? cons nt;
   230 
   233                         val _ =
   231                             val added_lambdas' = added_lambdas |> add_lambda ? cons nt;
   234                           Array.update (prods, nt, ((new_nts, old_tks @ added_tks), nt_prods'));
   232                             val _ =
   235                           (*N.B. that because the tks component
   233                               Array.update
   236                             is used to access existing
   234                                 (prods, nt, ((added_nts @ old_nts, old_tks @ added_tks), nt_prods'));
   237                             productions we have to add new
   235                               (*N.B. that because the tks component
   238                             tokens at the _end_ of the list*)
   236                                 is used to access existing
   239                         val added_starts' =
   237                                 productions we have to add new
   240                           if null added_tks then added_starts
   238                                 tokens at the _end_ of the list*)
   241                           else ((nt, added_tks) :: added_starts);
   239                           in
   242                       in (added_lambdas', added_starts') end;
   240                             if null added_tks then
   243 
   241                               process_nts nts added_lambdas' added_starts
   244                     val (added_lambdas, added_starts') =
   242                             else
   245                       nts_fold process_nts dependent ([], added_starts);
   243                               process_nts nts added_lambdas' ((nt, added_tks) :: added_starts)
   246                     val added_lambdas' = filter_out (nts_member lambdas) added_lambdas;
   244                           end;
       
   245 
       
   246                     val (added_lambdas, added_starts') = process_nts dependent [] added_starts;
       
   247                     val added_lambdas' = filter_out (lambdas_member lambdas) added_lambdas;
       
   248                   in
   247                   in
   249                     propagate_lambda (ls @ added_lambdas') added_starts'
   248                     propagate_lambda (ls @ added_lambdas') added_starts'
   250                       (fold lambdas_insert added_lambdas' lambdas)
   249                       (fold nts_insert added_lambdas' lambdas)
   251                   end;
   250                   end;
   252           in
   251           in
   253             propagate_lambda
   252             propagate_lambda
   254               (lambdas_fold (fn l => not (lambdas_member lambdas l) ? cons l) lambdas'' [])
   253               (nts_fold (fn l => not (nts_member lambdas l) ? cons l) lambdas'' [])
   255               added_starts lambdas''
   254               added_starts lambdas''
   256           end;
   255           end;
   257 
   256 
   258         (*insert production into grammar*)
   257         (*insert production into grammar*)
   259         val (added_starts', _) =
   258         val (added_starts', _) =
   276 
   275 
   277               (*add lhs NT to list of dependent NTs in lookahead*)
   276               (*add lhs NT to list of dependent NTs in lookahead*)
   278               fun add_nts [] = ()
   277               fun add_nts [] = ()
   279                 | add_nts (nt :: _) =
   278                 | add_nts (nt :: _) =
   280                     let val ((old_nts, old_tks), ps) = Array.sub (prods, nt) in
   279                     let val ((old_nts, old_tks), ps) = Array.sub (prods, nt) in
   281                       if member (op =) old_nts lhs then ()
   280                       if nts_member old_nts lhs then ()
   282                       else Array.update (prods, nt, ((lhs :: old_nts, old_tks), ps))
   281                       else Array.update (prods, nt, ((nts_insert lhs old_nts, old_tks), ps))
   283                     end;
   282                     end;
   284 
   283 
   285               (*add new start tokens to chained NTs' lookahead list;
   284               (*add new start tokens to chained NTs' lookahead list;
   286                 also store new production for lhs NT*)
   285                 also store new production for lhs NT*)
   287               fun add_tks [] added prod_count = (added, prod_count)
   286               fun add_tks [] added prod_count = (added, prod_count)
   347 
   346 
   348                     (*copy productions whose lookahead depends on changed_nt;
   347                     (*copy productions whose lookahead depends on changed_nt;
   349                       if key = SOME unknown_start then tk_prods is used to hold
   348                       if key = SOME unknown_start then tk_prods is used to hold
   350                       the productions not copied*)
   349                       the productions not copied*)
   351                     fun update_prods [] result = result
   350                     fun update_prods [] result = result
   352                       | update_prods ((p as (rhs, _: string, _: nt_tag)) :: ps)
   351                       | update_prods ((p as (rhs, _: string, _: nt)) :: ps)
   353                             (tk_prods, nt_prods) =
   352                             (tk_prods, nt_prods) =
   354                           let
   353                           let
   355                             (*lookahead dependency for production*)
   354                             (*lookahead dependency for production*)
   356                             val (tk, depends) = lookahead_dependency lambdas' rhs [];
   355                             val (tk, depends) = lookahead_dependency lambdas' rhs [];
   357 
   356 
   386                               else if key = SOME unknown_start then (p :: tk_prods, nt_prods)
   385                               else if key = SOME unknown_start then (p :: tk_prods, nt_prods)
   387                               else (tk_prods, nt_prods);
   386                               else (tk_prods, nt_prods);
   388                           in update_prods ps result end;
   387                           in update_prods ps result end;
   389 
   388 
   390                     (*copy existing productions for new starting tokens*)
   389                     (*copy existing productions for new starting tokens*)
   391                     fun process_nts [] added = added
   390                     fun process_nts nt added =
   392                       | process_nts (nt :: nts) added =
   391                       let
   393                           let
   392                         val (lookahead as (old_nts, old_tks), nt_prods) = Array.sub (prods, nt);
   394                             val (lookahead as (old_nts, old_tks), nt_prods) = Array.sub (prods, nt);
   393 
   395 
   394                         val tk_prods = these (AList.lookup (op =) nt_prods key);
   396                             val tk_prods = these (AList.lookup (op =) nt_prods key);
   395 
   397 
   396                         (*associate productions with new lookahead tokens*)
   398                             (*associate productions with new lookahead tokens*)
   397                         val (tk_prods', nt_prods') = update_prods tk_prods ([], nt_prods);
   399                             val (tk_prods', nt_prods') = update_prods tk_prods ([], nt_prods);
   398 
   400 
   399                         val nt_prods'' =
   401                             val nt_prods'' =
   400                           if key = SOME unknown_start then
   402                               if key = SOME unknown_start then
   401                             AList.update (op =) (key, tk_prods') nt_prods'
   403                                 AList.update (op =) (key, tk_prods') nt_prods'
   402                           else nt_prods';
   404                               else nt_prods';
   403 
   405 
   404                         val added_tks = tokens_subtract old_tks new_tks;
   406                             val added_tks = tokens_subtract old_tks new_tks;
   405                       in
   407                           in
   406                         if null added_tks then
   408                             if null added_tks then
   407                           (Array.update (prods, nt, (lookahead, nt_prods''));
   409                               (Array.update (prods, nt, (lookahead, nt_prods''));
   408                             added)
   410                                 process_nts nts added)
   409                         else
   411                             else
   410                           (Array.update (prods, nt, ((old_nts, added_tks @ old_tks), nt_prods''));
   412                               (Array.update (prods, nt, ((old_nts, added_tks @ old_tks), nt_prods''));
   411                             ((nt, added_tks) :: added))
   413                                 process_nts nts ((nt, added_tks) :: added))
   412                       end;
   414                           end;
       
   415 
   413 
   416                     val ((dependent, _), _) = Array.sub (prods, changed_nt);
   414                     val ((dependent, _), _) = Array.sub (prods, changed_nt);
   417                   in add_starts (starts @ process_nts dependent []) end;
   415                   in add_starts (starts @ nts_fold process_nts dependent []) end;
   418           in add_starts added_starts' end;
   416           in add_starts added_starts' end;
   419       in add_prods prods chains' lambdas' prod_count ps end;
   417       in add_prods prods chains' lambdas' prod_count ps end;
   420 
   418 
   421 
   419 
   422 (* pretty_gram *)
   420 (* pretty_gram *)
   451   Gram
   449   Gram
   452    {nt_count = 0,
   450    {nt_count = 0,
   453     prod_count = 0,
   451     prod_count = 0,
   454     tags = tags_empty,
   452     tags = tags_empty,
   455     chains = chains_empty,
   453     chains = chains_empty,
   456     lambdas = lambdas_empty,
   454     lambdas = nts_empty,
   457     prods = Vector.fromList [(([], []), [])]};
   455     prods = Vector.fromList [nt_gram_empty]};
   458 
   456 
   459 
   457 
   460 (*Add productions to a grammar*)
   458 (*Add productions to a grammar*)
   461 fun extend_gram [] gram = gram
   459 fun extend_gram [] gram = gram
   462   | extend_gram xprods (Gram {nt_count, prod_count, tags, chains, lambdas, prods}) =
   460   | extend_gram xprods (Gram {nt_count, prod_count, tags, chains, lambdas, prods}) =
   506           to change the array's content*)
   504           to change the array's content*)
   507         val prods' =
   505         val prods' =
   508           let
   506           let
   509             fun get_prod i =
   507             fun get_prod i =
   510               if i < nt_count then Vector.sub (prods, i)
   508               if i < nt_count then Vector.sub (prods, i)
   511               else (([], []), []);
   509               else nt_gram_empty;
   512           in Array.tabulate (nt_count', get_prod) end;
   510           in Array.tabulate (nt_count', get_prod) end;
   513 
   511 
   514         (*Add new productions to old ones*)
   512         (*Add new productions to old ones*)
   515         val (chains', lambdas', _) = add_prods prods' chains lambdas NONE xprods';
   513         val (chains', lambdas', _) = add_prods prods' chains lambdas NONE xprods';
   516       in
   514       in
   547 
   545 
   548 
   546 
   549 (* parser state *)
   547 (* parser state *)
   550 
   548 
   551 type state =
   549 type state =
   552   nt_tag * int *    (*identification and production precedence*)
   550   nt * int *    (*identification and production precedence*)
   553   parsetree list *  (*already parsed nonterminals on rhs*)
   551   parsetree list *  (*already parsed nonterminals on rhs*)
   554   symb list *       (*rest of rhs*)
   552   symb list *       (*rest of rhs*)
   555   string *          (*name of production*)
   553   string *          (*name of production*)
   556   int;              (*index for previous state list*)
   554   int;              (*index for previous state list*)
   557 
   555