tuned data structure and operations;
authorwenzelm
Tue, 30 Jan 2018 18:38:18 +0100
changeset 67539 1b8aad1909b7
parent 67538 7747761fa067
child 67540 15297abe6472
tuned data structure and operations;
src/Pure/Syntax/lexicon.ML
src/Pure/Syntax/parser.ML
--- a/src/Pure/Syntax/lexicon.ML	Tue Jan 30 16:12:50 2018 +0100
+++ b/src/Pure/Syntax/lexicon.ML	Tue Jan 30 18:38:18 2018 +0100
@@ -23,6 +23,7 @@
   datatype token_kind =
     Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy | NumSy | FloatSy |
     StrSy | StringSy | Cartouche | Space | Comment of Comment.kind option | EOF
+  val token_kind_ord: token_kind * token_kind -> order
   datatype token = Token of token_kind * string * Position.range
   val kind_of_token: token -> token_kind
   val str_of_token: token -> string
@@ -115,6 +116,28 @@
   Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy | NumSy | FloatSy | StrSy |
   StringSy | Cartouche | Space | Comment of Comment.kind option | EOF;
 
+val token_kind_index =
+  fn Literal => 0
+   | IdentSy => 1
+   | LongIdentSy => 2
+   | VarSy => 3
+   | TFreeSy => 4
+   | TVarSy => 5
+   | NumSy => 6
+   | FloatSy => 7
+   | StrSy => 8
+   | StringSy => 9
+   | Cartouche => 10
+   | Space => 11
+   | Comment NONE => 12
+   | Comment (SOME Comment.Comment) => 13
+   | Comment (SOME Comment.Cancel) => 14
+   | Comment (SOME Comment.Latex) => 15
+   | EOF => 16;
+
+val token_kind_ord = int_ord o apply2 token_kind_index;
+
+
 datatype token = Token of token_kind * string * Position.range;
 
 fun kind_of_token (Token (k, _, _)) = k;
--- a/src/Pure/Syntax/parser.ML	Tue Jan 30 16:12:50 2018 +0100
+++ b/src/Pure/Syntax/parser.ML	Tue Jan 30 18:38:18 2018 +0100
@@ -26,6 +26,36 @@
 
 (** datatype gram **)
 
+(* tokens *)
+
+fun tokens_match toks =
+  (case apply2 Lexicon.kind_of_token toks of
+    (Lexicon.Literal, Lexicon.Literal) => op = (apply2 Lexicon.str_of_token toks)
+  | kinds => op = kinds);
+
+fun tokens_match_ord toks =
+  (case apply2 Lexicon.kind_of_token toks of
+    (Lexicon.Literal, Lexicon.Literal) => fast_string_ord (apply2 Lexicon.str_of_token toks)
+  | kinds => Lexicon.token_kind_ord kinds);
+
+structure Tokens = Table(type key = Lexicon.token val ord = tokens_match_ord);
+
+type tokens = Tokens.set;
+val tokens_empty: tokens = Tokens.empty;
+val tokens_merge: tokens * tokens -> tokens = Tokens.merge (K true);
+fun tokens_insert tk : tokens -> tokens = Tokens.insert_set tk;
+fun tokens_remove tk : tokens -> tokens = Tokens.remove_set tk;
+fun tokens_member (tokens: tokens) = Tokens.defined tokens;
+fun tokens_is_empty (tokens: tokens) = Tokens.is_empty tokens;
+fun tokens_fold f (tokens: tokens) = Tokens.fold (f o #1) tokens;
+val tokens_union = tokens_fold tokens_insert;
+val tokens_subtract = tokens_fold tokens_remove;
+fun tokens_find P (tokens: tokens) =
+  Tokens.get_first (fn (tok, ()) => if P tok then SOME tok else NONE) tokens;
+
+
+(* nonterminals *)
+
 (*production for the NTs are stored in a vector
   so we can identify NTs by their index*)
 type nt = int;
@@ -45,12 +75,12 @@
 | Nonterminal of nt * int;  (*(tag, precedence)*)
 
 type nt_gram =
-  ((nts * Lexicon.token list) *
+  ((nts * tokens) *
     (Lexicon.token * (symb list * string * int) list) list);
   (*(([dependent_nts], [start_tokens]), [(start_token, [(rhs, name, prio)])])*)
   (*depent_nts is a list of all NTs whose lookahead depends on this NT's lookahead*)
 
-val nt_gram_empty: nt_gram = ((nts_empty, []), []);
+val nt_gram_empty: nt_gram = ((nts_empty, tokens_empty), []);
 
 type tags = nt Symtab.table;
 val tags_empty: tags = Symtab.empty;
@@ -83,22 +113,16 @@
      lambda productions are stored as normal productions
      and also as an entry in "lambdas"*)
 
-fun tokens_match toks =
-  (case apply2 Lexicon.kind_of_token toks of
-    (Lexicon.Literal, Lexicon.Literal) => op = (apply2 Lexicon.str_of_token toks)
-  | kinds => op = kinds);
-
-val tokens_union = union tokens_match;
-val tokens_subtract = subtract tokens_match;
-
 val token_none = Lexicon.Token (Lexicon.Space, "", Position.no_range);
 
 (*productions for which no starting token is
   known yet are associated with this token*)
 val unknown_start = Lexicon.eof;
 
-fun get_start (tok :: _) = tok
-  | get_start [] = unknown_start;
+fun get_start tks =
+  (case Tokens.min tks of
+    SOME (tk, _) => tk
+  | NONE => unknown_start);
 
 (*convert productions to grammar;
   prod_count is of type "int option" and is only updated if it is <> NONE*)
@@ -124,20 +148,18 @@
               val ((_, from_tks), _) = Array.sub (prods, the chain);
 
               (*copy from's lookahead to chain's destinations*)
-              fun copy_lookahead [] added = added
-                | copy_lookahead (to :: tos) added =
-                    let
-                      val ((to_nts, to_tks), ps) = Array.sub (prods, to);
+              fun copy_lookahead to added =
+                let
+                  val ((to_nts, to_tks), ps) = Array.sub (prods, to);
 
-                      val new_tks = tokens_subtract to_tks from_tks;  (*added lookahead tokens*)
-                      val _ = Array.update (prods, to, ((to_nts, to_tks @ new_tks), ps));
-                    in
-                      copy_lookahead tos (if null new_tks then added else (to, new_tks) :: added)
-                    end;
+                  val new_tks = tokens_subtract to_tks from_tks;  (*added lookahead tokens*)
+                  val to_tks' = tokens_merge (to_tks, new_tks);
+                  val _ = Array.update (prods, to, ((to_nts, to_tks'), ps));
+                in if tokens_is_empty new_tks then added else (to, new_tks) :: added end;
 
               val tos = chains_all_succs chains' [lhs];
             in
-              (copy_lookahead tos [],
+              (fold copy_lookahead tos [],
                 lambdas |> nts_member lambdas lhs ? fold nts_insert tos)
             end;
 
@@ -185,28 +207,23 @@
                                   is_none tk andalso nts_subset (nts, lambdas);
 
                                 val new_tks =
-                                  the_list tk
+                                  tokens_empty
+                                  |> fold tokens_insert (the_list tk)
                                   |> nts_fold (tokens_union o starts_for_nt) nts
                                   |> tokens_subtract l_starts;
 
-                                val added_tks' = tokens_union added_tks new_tks;
+                                val added_tks' = tokens_merge (added_tks, new_tks);
 
                                 val nt_dependencies' = nts_merge (nt_dependencies, nts);
 
                                 (*associate production with new starting tokens*)
-                                fun copy ([]: Lexicon.token list) nt_prods = nt_prods
-                                  | copy (tk :: tks) nt_prods =
-                                      let
-                                        val old_prods = these (AList.lookup (op =) nt_prods tk);
-                                        val prods' = p :: old_prods;
-                                      in
-                                        nt_prods
-                                        |> AList.update (op =) (tk, prods')
-                                        |> copy tks
-                                      end;
+                                fun copy (tk: Lexicon.token) nt_prods =
+                                  let val old_prods = these (AList.lookup (op =) nt_prods tk);
+                                  in AList.update (op =) (tk, p :: old_prods) nt_prods end;
 
-                                val nt_prods' =
-                                  copy (new_tks |> new_lambda ? cons token_none) nt_prods;
+                                val nt_prods' = nt_prods
+                                  |> tokens_fold copy new_tks
+                                  |> new_lambda ? copy token_none;
                               in
                                 examine_prods ps (add_lambda orelse new_lambda)
                                   nt_dependencies' added_tks' nt_prods'
@@ -227,19 +244,19 @@
                         (*add_lambda is true if an existing production of the nt
                           produces lambda due to the new lambda NT l*)
                         val (add_lambda, nt_dependencies, added_tks, nt_prods') =
-                          examine_prods tk_prods false nts_empty [] nt_prods;
+                          examine_prods tk_prods false nts_empty tokens_empty nt_prods;
 
                         val new_nts = nts_merge (old_nts, nt_dependencies);
+                        val new_tks = tokens_merge (old_tks, added_tks);
 
                         val added_lambdas' = added_lambdas |> add_lambda ? cons nt;
-                        val _ =
-                          Array.update (prods, nt, ((new_nts, old_tks @ added_tks), nt_prods'));
+                        val _ = Array.update (prods, nt, ((new_nts, new_tks), nt_prods'));
                           (*N.B. that because the tks component
                             is used to access existing
                             productions we have to add new
                             tokens at the _end_ of the list*)
                         val added_starts' =
-                          if null added_tks then added_starts
+                          if tokens_is_empty added_tks then added_starts
                           else ((nt, added_tks) :: added_starts);
                       in (added_lambdas', added_starts') end;
 
@@ -267,13 +284,15 @@
               val (start_tk, start_nts) = lookahead_dependency lambdas' rhs nts_empty;
 
               val start_tks =
-                the_list start_tk
+                tokens_empty
+                |> fold tokens_insert (the_list start_tk)
                 |> nts_fold (tokens_union o starts_for_nt) start_nts;
 
               val start_tks' =
-               (if is_some new_lambdas then [token_none]
-                else if null start_tks then [unknown_start]
-                else []) @ start_tks;
+                start_tks
+                |> (if is_some new_lambdas then tokens_insert token_none
+                    else if tokens_is_empty start_tks then tokens_insert unknown_start
+                    else I);
 
               (*add lhs NT to list of dependent NTs in lookahead*)
               fun add_nts nt initial =
@@ -294,39 +313,37 @@
                       val new_tks = tokens_subtract old_tks start_tks;
 
                       (*store new production*)
-                      fun store [] prods is_new =
-                            (prods,
-                              if is_some prod_count andalso is_new then
-                                Option.map (fn x => x + 1) prod_count
-                              else prod_count, is_new)
-                        | store (tk :: tks) prods is_new =
-                            let
-                              val tk_prods = these (AList.lookup (op =) prods tk);
+                      fun store tk (prods, is_new) =
+                        let
+                          val tk_prods = these (AList.lookup (op =) prods tk);
+
+                          (*if prod_count = NONE then we can assume that
+                            grammar does not contain new production already*)
+                          val (tk_prods', is_new') =
+                            if is_some prod_count then
+                              if member (op =) tk_prods new_prod then (tk_prods, false)
+                              else (new_prod :: tk_prods, true)
+                            else (new_prod :: tk_prods, true);
 
-                              (*if prod_count = NONE then we can assume that
-                                grammar does not contain new production already*)
-                              val (tk_prods', is_new') =
-                                if is_some prod_count then
-                                  if member (op =) tk_prods new_prod then (tk_prods, false)
-                                  else (new_prod :: tk_prods, true)
-                                else (new_prod :: tk_prods, true);
+                          val prods' = prods
+                            |> is_new' ? AList.update (op =) (tk, tk_prods');
+                        in (prods', is_new orelse is_new') end;
 
-                              val prods' =
-                                if is_new' then
-                                  AList.update (op =) (tk: Lexicon.token, tk_prods') prods
-                                else prods;
-                            in store tks prods' (is_new orelse is_new') end;
-
-                      val (nt_prods', prod_count', changed) =
-                        if nt = lhs
-                        then store start_tks' nt_prods false
-                        else (nt_prods, prod_count, false);
+                      val (nt_prods', changed) = (nt_prods, false)
+                        |> nt = lhs ? tokens_fold store start_tks';
+                      val prod_count' =
+                        if is_some prod_count andalso changed then
+                          Option.map (fn x => x + 1) prod_count
+                        else prod_count;
                       val _ =
-                        if not changed andalso null new_tks then ()
-                        else Array.update (prods, nt, ((old_nts, old_tks @ new_tks), nt_prods'));
+                        if not changed andalso tokens_is_empty new_tks then ()
+                        else
+                          Array.update
+                            (prods, nt, ((old_nts, tokens_merge (old_tks, new_tks)), nt_prods'));
                     in
                       add_tks nts
-                        (if null new_tks then added else (nt, new_tks) :: added) prod_count'
+                        (if tokens_is_empty new_tks then added else (nt, new_tks) :: added)
+                        prod_count'
                     end;
               val _ = nts_fold add_nts start_nts true;
             in
@@ -343,7 +360,7 @@
                     (*token under which old productions which
                       depend on changed_nt could be stored*)
                     val key =
-                      find_first (not o member (op =) new_tks) (starts_for_nt changed_nt)
+                      tokens_find (not o tokens_member new_tks) (starts_for_nt changed_nt)
                       |> the_default unknown_start;
 
                     (*copy productions whose lookahead depends on changed_nt;
@@ -364,26 +381,20 @@
                             val lambda =
                               not (nts_is_unique depends) orelse
                               not (nts_is_empty depends) andalso is_some tk
-                              andalso member (op =) new_tks (the tk);
+                              andalso tokens_member new_tks (the tk);
 
                             (*associate production with new starting tokens*)
-                            fun copy ([]: Lexicon.token list) nt_prods = nt_prods
-                              | copy (tk :: tks) nt_prods =
-                                 let
-                                   val tk_prods = these (AList.lookup (op =) nt_prods tk);
-
-                                   val tk_prods' =
-                                     if not lambda then p :: tk_prods
-                                     else insert (op =) p tk_prods;
-                                     (*if production depends on lambda NT we
-                                       have to look for duplicates*)
-                                 in
-                                   nt_prods
-                                   |> AList.update (op =) (tk, tk_prods')
-                                   |> copy tks
-                                 end;
+                            fun copy tk nt_prods =
+                              let
+                                val tk_prods = these (AList.lookup (op =) nt_prods tk);
+                                val tk_prods' =
+                                  if not lambda then p :: tk_prods
+                                  else insert (op =) p tk_prods;
+                                   (*if production depends on lambda NT we
+                                     have to look for duplicates*)
+                              in AList.update (op =) (tk, tk_prods') nt_prods end;
                             val result =
-                              if update then (tk_prods, copy new_tks nt_prods)
+                              if update then (tk_prods, tokens_fold copy new_tks nt_prods)
                               else if key = unknown_start then (p :: tk_prods, nt_prods)
                               else (tk_prods, nt_prods);
                           in update_prods ps result end;
@@ -405,11 +416,12 @@
 
                         val added_tks = tokens_subtract old_tks new_tks;
                       in
-                        if null added_tks then
+                        if tokens_is_empty added_tks then
                           (Array.update (prods, nt, (lookahead, nt_prods''));
                             added)
                         else
-                          (Array.update (prods, nt, ((old_nts, added_tks @ old_tks), nt_prods''));
+                          (Array.update
+                             (prods, nt, ((old_nts, tokens_merge (old_tks, added_tks)), nt_prods''));
                             ((nt, added_tks) :: added))
                       end;