added comments; fixed a bug; reduced memory usage slightly
authorclasohm
Mon, 03 Jul 1995 13:06:36 +0200
changeset 1175 1b15a4b1a4f7
parent 1174 e57a93d41de0
child 1176 77faa3872f73
added comments; fixed a bug; reduced memory usage slightly
src/Pure/Syntax/parser.ML
--- a/src/Pure/Syntax/parser.ML	Fri Jun 30 11:39:20 1995 +0200
+++ b/src/Pure/Syntax/parser.ML	Mon Jul 03 13:06:36 1995 +0200
@@ -35,79 +35,98 @@
 
 (** datatype gram **)
 
-type nt_tag = int;
+type nt_tag = int;              (*production for the NTs are stored in an array
+                                  so we can identify NTs by their index*)
 
 datatype symb = Terminal of token
-              | Nonterminal of nt_tag * int;
+              | Nonterminal of nt_tag * int;              (*(tag, precedence)*)
 
-type gram_nt = ((nt_tag list * token list) *
+type nt_gram = ((nt_tag list * token list) *
                 (token option * (symb list * string * int) list) list);
                                      (*(([dependent_nts], [start_tokens]),
                                         [(start_token, [(rhs, name, prio)])])*)
+                              (*depent_nts is a list of all NTs whose lookahead
+                                depends on this NT's lookahead*)
 
 datatype gram =
   Gram of {nt_count: int, prod_count: int,
            tags: nt_tag Symtab.table,
            chains: (nt_tag * nt_tag list) list,              (*[(to, [from])]*)
            lambdas: nt_tag list,
-           prods: gram_nt Array.array};
+           prods: nt_gram Array.array};
+                       (*"tags" is used to map NT names (i.e. strings) to tags;
+                         chain productions are not stored as normal productions
+                         but instead as an entry in "chains";
+                         lambda productions are stored as normal productions
+                         and also as an entry in "lambdas"*)
 
-val UnknownToken = EndToken;
+val UnknownStart = EndToken;       (*productions for which no starting token is
+                                     known yet are associated with this token*)
 
-(*get all NTs that are connected with a list of NTs
-  (can be used for [(to, [from])] as well as for [(from, [to])])*)
+(* get all NTs that are connected with a list of NTs 
+   (used for expanding chain list)*)
 fun connected_with _ [] relatives = relatives
   | connected_with chains (root :: roots) relatives =
     let val branches = (assocs chains root) \\ relatives;
     in connected_with chains (branches @ roots) (branches @ relatives) end;
 
 (* convert productions to grammar;
-   N.B. that the chains parameter has the form [(from, [to])]*)
+   N.B. that the chains parameter has the form [(from, [to])];
+   prod_count is of type "int option" and is only updated if it is <> None*)
 fun add_prods _ chains lambdas prod_count [] = (chains, lambdas, prod_count)
   | add_prods prods chains lambdas prod_count
               ((lhs, new_prod as (rhs, name, pri)) :: ps) =
     let
-      (*store new chain*)
-      fun store_chain from =
-        overwrite (chains, (from, lhs ins (assocs chains from)));
- 
-      (*test if prod introduces new chain*)
+      (*test if new_prod is a chain production*)
       val (new_chain, chains') =
-        if pri = ~1 then
-          case rhs of [Nonterminal (id, ~1)] => (Some id, store_chain id)
-                    | _ => (None, chains)
-        else (None, chains);
+        let (*store chain if it does not already exist*)
+            fun store_chain from =
+              let val old_tos = assocs chains from;
+              in if lhs mem old_tos then (None, chains)
+                 else (Some from, overwrite (chains, (from, lhs ins old_tos)))
+              end;
+        in if pri = ~1 then
+             case rhs of [Nonterminal (id, ~1)] => store_chain id
+                       | _ => (None, chains)
+           else (None, chains)
+        end;
 
-      (*propagate new chain in lookahead and lambda lists*)
+      (*propagate new chain in lookahead and lambda lists;
+        added_starts is used later to associate existing
+        productions with new starting tokens*)
       val (added_starts, lambdas') =
         if is_none new_chain then ([], lambdas) else
-        let val ((from_nts, from_tks), _) = Array.sub (prods, the new_chain);
+        let (*lookahead of chain's source*)
+            val ((from_nts, from_tks), _) = Array.sub (prods, the new_chain);
 
+            (*copy from's lookahead to chain's destinations*)
             fun copy_lookahead [] added = added
               | copy_lookahead (to :: tos) added =
-                let val ((to_nts, to_tks), ps) = Array.sub (prods, to);
+                let
+                  val ((to_nts, to_tks), ps) = Array.sub (prods, to);
 
-                    val new_tks = from_tks \\ to_tks;
-                in Array.update (prods, to,
-                     ((to_nts, to_tks @ new_tks), ps));
+                  val new_tks = from_tks \\ to_tks;  (*added lookahead tokens*)
+                in Array.update (prods, to, ((to_nts, to_tks @ new_tks), ps));
                    copy_lookahead tos (if null new_tks then added
                                        else (to, new_tks) :: added)
                 end;
 
             val tos = connected_with chains' [lhs] [lhs];
-        in (copy_lookahead tos [], if lhs mem lambdas then tos union lambdas
-                                   else lambdas)
+        in (copy_lookahead tos [],
+            (if lhs mem lambdas then tos else []) union lambdas)
         end;
-        
-      (*test if new production can produce lambda*)
-      val new_lambda = forall (fn (Nonterminal (id, _)) => id mem lambdas'
-                                | (Terminal _) => false) rhs;
 
-      (*compute new lambda NTs produced by lambda production and chains*)
-      val lambdas' = if not new_lambda then lambdas' else
-                     lambdas' union (connected_with chains' [lhs] [lhs]);
+      (*test if new production can produce lambda
+        (rhs must either be empty or only consist of lambda NTs)*)
+      val (new_lambda, lambdas') =
+        if forall (fn (Nonterminal (id, _)) => id mem lambdas'
+                    | (Terminal _) => false) rhs then
+          (true, lambdas' union (connected_with chains' [lhs] [lhs]))
+        else
+          (false, lambdas');
 
-      (*list all nonterminals on which the lookahead of a production depends*)
+      (*list optional terminal and all nonterminals on which the lookahead
+        of a production depends*)
       fun lookahead_dependency _ [] nts = (None, nts)
         | lookahead_dependency _ ((Terminal tk) :: _) nts = (Some tk, nts)
         | lookahead_dependency lambdas ((Nonterminal (nt, _)) :: symbs) nts =
@@ -115,61 +134,61 @@
               lookahead_dependency lambdas symbs (nt :: nts)
             else (None, nt :: nts);
 
+      (*get all known starting tokens for a nonterminal*)
+      fun starts_for_nt nt = snd (fst (Array.sub (prods, nt)));
+
       val token_union = gen_union matching_tokens;
 
       (*update prods, lookaheads, and lambdas according to new lambda NTs*)
       val (added_starts', lambdas') =
         let
           (*propagate added lambda NT*)
-          fun propagate_lambda [] added_starts lambdas =
-                (added_starts, lambdas)
+          fun propagate_lambda [] added_starts lambdas= (added_starts, lambdas)
             | propagate_lambda (l :: ls) added_starts lambdas =
               let
-                val ((_, l_starts), _) = Array.sub (prods, l);
+                (*get lookahead for lambda NT*)
+                val ((dependent, l_starts), _) = Array.sub (prods, l);
 
                 (*check productions whose lookahead may depend on lamdba NT*)
                 fun examine_prods [] add_lambda nt_dependencies added_tks
-                      nt_prods =
+                                  nt_prods =
                       (add_lambda, nt_dependencies, added_tks, nt_prods)
                   | examine_prods ((p as (rhs, _, _)) :: ps) add_lambda
                       nt_dependencies added_tks nt_prods =
                     let val (tk, nts) = lookahead_dependency lambdas rhs [];
                     in
-                      if l mem nts then
+                      if l mem nts then       (*update production's lookahead*)
                       let
                         val new_lambda = is_none tk andalso nts subset lambdas;
 
-                        fun tks_for_nt nt = snd (fst (Array.sub (prods, nt)));
-
                         val new_tks = (if is_some tk then [the tk] else []) @
-                          foldl token_union ([], map tks_for_nt nts) \\
+                          foldl token_union ([], map starts_for_nt nts) \\
                           l_starts;
   
-                        (*copy production to new starting tokens*)
-                        fun copy [] nt_prods = nt_prods
-                          | copy (tk :: tks) nt_prods =
-                            let
-                              val old_prods = assocs nt_prods tk;
-
-                              val prods' = p :: old_prods;
-                           in copy tks (overwrite (nt_prods, (tk, prods')))
-                           end;
-  
                         val added_tks' = token_union (new_tks, added_tks);
 
                         val nt_dependencies' = nts union nt_dependencies;
 
+                        (*associate production with new starting tokens*)
+                        fun copy [] nt_prods = nt_prods
+                          | copy (tk :: tks) nt_prods =
+                            let val old_prods = assocs nt_prods tk;
+
+                                val prods' = p :: old_prods;
+                            in copy tks (overwrite (nt_prods, (tk, prods')))
+                            end;
+  
                         val nt_prods' =
                           let val new_opt_tks = map Some new_tks;
-                          in if new_lambda then
-                               copy (None :: new_opt_tks) nt_prods
-                             else copy new_opt_tks nt_prods
+                          in copy ((if new_lambda then [None] else []) @
+                                   new_opt_tks) nt_prods
                           end;
                       in examine_prods ps (add_lambda orelse new_lambda)
                            nt_dependencies' added_tks' nt_prods'
                       end
-                      else examine_prods ps add_lambda nt_dependencies
-                             added_tks nt_prods
+                      else                                  (*skip production*)
+                        examine_prods ps add_lambda nt_dependencies
+                                      added_tks nt_prods
                     end;
 
                 (*check each NT whose lookahead depends on new lambda NT*)
@@ -180,10 +199,13 @@
                       val (lookahead as (old_nts, old_tks), nt_prods) =
                         Array.sub (prods, nt);
 
-                      val key = Some (hd l_starts  handle Hd => UnknownToken);
+                      (*existing productions whose lookahead may depend on l*)
+                      val tk_prods =
+                        assocs nt_prods
+                               (Some (hd l_starts  handle Hd => UnknownStart));
 
-                      val tk_prods = assocs nt_prods key;
-
+                      (*add_lambda is true if an existing production of the nt
+                        produces lambda due to the new lambda NT l*)
                       val (add_lambda, nt_dependencies, added_tks, nt_prods') =
                         examine_prods tk_prods false [] [] nt_prods;
 
@@ -193,9 +215,13 @@
                         if add_lambda then nt :: added_lambdas
                         else added_lambdas;
                     in Array.update (prods, nt,
-                            ((added_nts @ old_nts, old_tks @ added_tks),
-                              nt_prods'));         (*N.B. that this must not be
-                                                    "added_tks @ old_tks"!*)
+                                   ((added_nts @ old_nts, old_tks @ added_tks),
+                                    nt_prods'));
+                                          (*N.B. that because the tks component
+                                            is used to access existing
+                                            productions we have to add new
+                                            tokens at the _end_ of the list*)
+
                        if null added_tks then
                          process_nts nts added_lambdas' added_starts
                        else
@@ -203,8 +229,6 @@
                                       ((nt, added_tks) :: added_starts)
                     end;
 
-                val ((dependent, _), _) = Array.sub (prods, l);
-
                 val (added_lambdas, added_starts') =
                   process_nts dependent [] added_starts;
 
@@ -217,56 +241,64 @@
       (*insert production into grammar*)
       val (added_starts', prod_count') =
         if is_some new_chain then (added_starts', prod_count)
-                                           (*we don't store chain productions*)
+                                               (*don't store chain production*)
         else let
-          (*get all known starting tokens for a nonterminal*)
-          fun starts_for_nt nt = snd (fst (Array.sub (prods, nt)));
+          (*lookahead tokens of new production and on which
+            NTs lookahead depends*)
+          val (start_tk, start_nts) = lookahead_dependency lambdas' rhs [];
 
-          (*tokens by which new production can be started and on which
-            nonterminals this depends*)
-          val (start_tk, start_nts) = lookahead_dependency lambdas' rhs [];
-          val start_tks = (if is_some start_tk then [the start_tk] else []) @
-                          foldl (op union) ([], map starts_for_nt start_nts)
+          val start_tks = foldl token_union
+                          (if is_some start_tk then [the start_tk] else [],
+                           map starts_for_nt start_nts);
+
           val opt_starts = (if new_lambda then [None]
-                            else if null start_tks then [Some UnknownToken]
+                            else if null start_tks then [Some UnknownStart]
                             else []) @ (map Some start_tks);
 
           (*add lhs NT to list of dependent NTs in lookahead*)
           fun add_nts [] = ()
             | add_nts (nt :: nts) =
               let val ((old_nts, old_tks), ps) = Array.sub (prods, nt);
-              in Array.update (prods, nt, ((lhs ins old_nts, old_tks), ps))
+              in if lhs mem old_nts then ()
+                 else Array.update (prods, nt, ((lhs :: old_nts, old_tks), ps))
               end;
 
-          (*add new start tokens to chained NTs;
+          (*add new start tokens to chained NTs' lookahead list;
             also store new production for lhs NT*)
           fun add_tks [] added prod_count = (added, prod_count)
             | add_tks (nt :: nts) added prod_count =
               let
-                val ((old_nts, old_tks), ps) = Array.sub (prods, nt);
+                val ((old_nts, old_tks), nt_prods) = Array.sub (prods, nt);
 
-                val new_tks = start_tks \\ old_tks;
+                val new_tks = gen_rems matching_tokens (start_tks, old_tks);
 
+                (*store new production*)
                 fun store [] prods is_new =
                       (prods, if is_some prod_count andalso is_new then
-                                Some (the prod_count + 1) else prod_count)
-                  | store (tk :: tks) prods was_new =
+                                apsome (fn x => x+1) prod_count
+                              else prod_count, is_new)
+                  | store (tk :: tks) prods is_new =
                     let val tk_prods = assocs prods tk;
 
-                        val (tk_prods', is_new) =
+                        (*if prod_count = None then we can assume that
+                          grammar does not contain new production already*)
+                        val (tk_prods', is_new') =
                           if is_some prod_count then
                             if new_prod mem tk_prods then (tk_prods, false)
                             else (new_prod :: tk_prods, true)
                           else (new_prod :: tk_prods, true);
-                    in
-                      store tks (overwrite (prods, (tk, tk_prods')))
-                            (was_new orelse is_new)
-                    end;
+
+                        val prods' = if is_new' then
+                                       overwrite (prods, (tk, tk_prods'))
+                                     else prods;
+                    in store tks prods' (is_new orelse is_new') end;
 
-                val (ps', prod_count') =
-                  if nt = lhs then store opt_starts ps false
-                              else (ps, prod_count);
-              in Array.update (prods, nt, ((old_nts, old_tks @ new_tks), ps'));
+                val (nt_prods', prod_count', changed) =
+                  if nt = lhs then store opt_starts nt_prods false
+                              else (nt_prods, prod_count, false);
+              in if not changed andalso null new_tks then ()
+                 else Array.update (prods, nt, ((old_nts, old_tks @ new_tks),
+                                                nt_prods'));
                  add_tks nts (if null new_tks then added
                               else (nt, new_tks) :: added) prod_count'
               end;
@@ -274,24 +306,41 @@
            add_tks (connected_with chains' [lhs] [lhs]) [] prod_count
         end;
 
-      (*update productions with added lookaheads*)
+      (*associate productions with new lookaheads*)
       val dummy =
         let
           (*propagate added start tokens*)
           fun add_starts [] = ()
             | add_starts ((changed_nt, new_tks) :: starts) =
               let
-                (*copy productions which need to be copied*)
+                (*token under which old productions which
+                  depend on changed_nt could be stored*)
+                val key =
+                 case find_first (fn t => not (t mem new_tks))
+                                 (starts_for_nt changed_nt) of
+                      None => Some UnknownStart
+                    | t => t;
+
+                (*copy productions whose lookahead depends on changed_nt;
+                  if key = Some UnknownToken then tk_prods is used to hold
+                  the productions not copied*)
                 fun update_prods [] result = result
                   | update_prods ((p as (rhs, _, _)) :: ps)
                       (tk_prods, nt_prods) =
                     let
+                      (*lookahead dependency for production*)
                       val (tk, depends) = lookahead_dependency lambdas' rhs [];
 
+                      (*test if this production has to be copied*)
+                      val update = changed_nt mem depends;
+
+                      (*test if production could already be associated with
+                        a member of new_tks*)
                       val lambda = length depends > 1 orelse
-                                   not (null depends) andalso is_some tk;
+                                   not (null depends) andalso is_some tk
+                                   andalso the tk mem new_tks;
 
-                      (*copy one production to new starting tokens*)
+                      (*associate production with new starting tokens*)
                       fun copy [] nt_prods = nt_prods
                         | copy (tk :: tks) nt_prods =
                           let
@@ -305,52 +354,45 @@
                          in copy tks
                                  (overwrite (nt_prods, (Some tk, tk_prods')))
                          end;
-
-                      val update = changed_nt mem depends;
-
                       val result =
-                        if update then (tk_prods, copy new_tks nt_prods)
-                        else (p :: tk_prods, nt_prods);
+                        if update then
+                          (tk_prods, copy new_tks nt_prods)
+                        else if key = Some UnknownStart then
+                          (p :: tk_prods, nt_prods)
+                        else (tk_prods, nt_prods);
                     in update_prods ps result end;
 
                 (*copy existing productions for new starting tokens*)
-                fun process_nt [] added = added
-                  | process_nt (nt :: nts) added =
+                fun process_nts [] added = added
+                  | process_nts (nt :: nts) added =
                     let
                       val (lookahead as (old_nts, old_tks), nt_prods) =
                         Array.sub (prods, nt);
 
-                      (*find a token under which old productions which
-                        depend on changed_nt are stored*)
-                      val key =
-                       case find_first (fn t => not (t mem new_tks)) old_tks of
-                            None => Some UnknownToken
-                          | t => t;
-
                       val tk_prods = assocs nt_prods key;
 
+                      (*associate productions with new lookahead tokens*)
                       val (tk_prods', nt_prods') =
                         update_prods tk_prods ([], nt_prods);
 
                       val nt_prods' =
-                        if key = Some UnknownToken then
+                        if key = Some UnknownStart then
                           overwrite (nt_prods', (key, tk_prods'))
                         else nt_prods';
 
-                      val added_tks = new_tks \\ old_tks;
+                      val added_tks =
+                        gen_rems matching_tokens (new_tks, old_tks);
                     in if null added_tks then
                          (Array.update (prods, nt, (lookahead, nt_prods'));
-                          process_nt nts added)
+                          process_nts nts added)
                        else
                          (Array.update (prods, nt,
                             ((old_nts, added_tks @ old_tks), nt_prods'));
-                          process_nt nts ((nt, added_tks) :: added))
+                          process_nts nts ((nt, added_tks) :: added))
                     end;
 
-                val dependent = fst (fst (Array.sub (prods, changed_nt)));
-
-                val added = process_nt dependent [];
-              in add_starts (starts @ added) end;
+                val ((dependent, _), _) = Array.sub (prods, changed_nt);
+              in add_starts (starts @ (process_nts dependent [])) end;
         in add_starts added_starts' end;
   in add_prods prods chains' lambdas' prod_count ps end;
 
@@ -457,8 +499,11 @@
       add_prods prods' fromto_chains lambdas None prods2;
 
     val chains' = inverse_chains fromto_chains' [];
-  in Gram {nt_count = nt_count', prod_count = prod_count', tags = tags',
+  in Pretty.writeln (Pretty.big_list "prods:" (pretty_gram (Gram {nt_count = nt_count', prod_count = prod_count', tags = tags',
+           chains = chains', lambdas = lambdas', prods = prods'})));
+Gram {nt_count = nt_count', prod_count = prod_count', tags = tags',
            chains = chains', lambdas = lambdas', prods = prods'}
+     
   end;