do not open auxiliary ML structures;
authorwenzelm
Fri, 02 Jul 2010 21:41:06 +0200
changeset 37683 ece640e48a6c
parent 37682 ad5489a6be48
child 37684 d123b1e08856
do not open auxiliary ML structures; misc tuning;
src/Pure/Syntax/parser.ML
--- a/src/Pure/Syntax/parser.ML	Fri Jul 02 20:44:17 2010 +0200
+++ b/src/Pure/Syntax/parser.ML	Fri Jul 02 21:41:06 2010 +0200
@@ -23,19 +23,16 @@
 structure Parser: PARSER =
 struct
 
-open Lexicon Syn_Ext;
-
-
 (** datatype gram **)
 
 type nt_tag = int;              (*production for the NTs are stored in an array
                                   so we can identify NTs by their index*)
 
-datatype symb = Terminal of token
+datatype symb = Terminal of Lexicon.token
               | Nonterminal of nt_tag * int;              (*(tag, precedence)*)
 
-type nt_gram = ((nt_tag list * token list) *
-                (token option * (symb list * string * int) list) list);
+type nt_gram = ((nt_tag list * Lexicon.token list) *
+                (Lexicon.token option * (symb list * string * int) list) list);
                                      (*(([dependent_nts], [start_tokens]),
                                         [(start_token, [(rhs, name, prio)])])*)
                               (*depent_nts is a list of all NTs whose lookahead
@@ -53,8 +50,8 @@
                          lambda productions are stored as normal productions
                          and also as an entry in "lambdas"*)
 
-val UnknownStart = eof;       (*productions for which no starting token is
-                                known yet are associated with this token*)
+val UnknownStart = Lexicon.eof;       (*productions for which no starting token is
+                                        known yet are associated with this token*)
 
 (* get all NTs that are connected with a list of NTs
    (used for expanding chain list)*)
@@ -125,7 +122,7 @@
       (*get all known starting tokens for a nonterminal*)
       fun starts_for_nt nt = snd (fst (Array.sub (prods, nt)));
 
-      val token_union = uncurry (union matching_tokens);
+      val token_union = uncurry (union Lexicon.matching_tokens);
 
       (*update prods, lookaheads, and lambdas according to new lambda NTs*)
       val (added_starts', lambdas') =
@@ -158,7 +155,7 @@
                         val nt_dependencies' = union (op =) nts nt_dependencies;
 
                         (*associate production with new starting tokens*)
-                        fun copy ([]: token option list) nt_prods = nt_prods
+                        fun copy ([]: Lexicon.token option list) nt_prods = nt_prods
                           | copy (tk :: tks) nt_prods =
                             let val old_prods = these (AList.lookup (op =) nt_prods tk);
 
@@ -259,7 +256,7 @@
               let
                 val ((old_nts, old_tks), nt_prods) = Array.sub (prods, nt);
 
-                val new_tks = subtract matching_tokens old_tks start_tks;
+                val new_tks = subtract Lexicon.matching_tokens old_tks start_tks;
 
                 (*store new production*)
                 fun store [] prods is_new =
@@ -278,7 +275,7 @@
                           else (new_prod :: tk_prods, true);
 
                         val prods' = prods
-                          |> is_new' ? AList.update (op =) (tk: token option, tk_prods');
+                          |> is_new' ? AList.update (op =) (tk: Lexicon.token option, tk_prods');
                     in store tks prods' (is_new orelse is_new') end;
 
                 val (nt_prods', prod_count', changed) =
@@ -329,10 +326,10 @@
                                    andalso member (op =) new_tks (the tk);
 
                       (*associate production with new starting tokens*)
-                      fun copy ([]: token list) nt_prods = nt_prods
+                      fun copy ([]: Lexicon.token list) nt_prods = nt_prods
                         | copy (tk :: tks) nt_prods =
                           let
-                            val tk_prods = (these o AList.lookup (op =) nt_prods) (SOME tk);
+                            val tk_prods = these (AList.lookup (op =) nt_prods (SOME tk));
 
                             val tk_prods' =
                               if not lambda then p :: tk_prods
@@ -359,7 +356,7 @@
                       val (lookahead as (old_nts, old_tks), nt_prods) =
                         Array.sub (prods, nt);
 
-                      val tk_prods = (these o AList.lookup (op =) nt_prods) key;
+                      val tk_prods = these (AList.lookup (op =) nt_prods key);
 
                       (*associate productions with new lookahead tokens*)
                       val (tk_prods', nt_prods') =
@@ -370,7 +367,7 @@
                         |> (key = SOME UnknownStart) ? AList.update (op =) (key, tk_prods')
 
                       val added_tks =
-                        subtract matching_tokens old_tks new_tks;
+                        subtract Lexicon.matching_tokens old_tks new_tks;
                     in if null added_tks then
                          (Array.update (prods, nt, (lookahead, nt_prods'));
                           process_nts nts added)
@@ -381,7 +378,7 @@
                     end;
 
                 val ((dependent, _), _) = Array.sub (prods, changed_nt);
-              in add_starts (starts @ (process_nts dependent [])) end;
+              in add_starts (starts @ process_nts dependent []) end;
         in add_starts added_starts' end;
   in add_prods prods chains' lambdas' prod_count ps end;
 
@@ -394,8 +391,8 @@
 
     val nt_name = the o Inttab.lookup (Inttab.make (map swap (Symtab.dest tags)));
 
-    fun pretty_symb (Terminal (Token (Literal, s, _))) = Pretty.quote (Pretty.str s)
-      | pretty_symb (Terminal tok) = Pretty.str (str_of_token tok)
+    fun pretty_symb (Terminal (Lexicon.Token (Lexicon.Literal, s, _))) = Pretty.quote (Pretty.str s)
+      | pretty_symb (Terminal tok) = Pretty.str (Lexicon.str_of_token tok)
       | pretty_symb (Nonterminal (tag, p)) =
           Pretty.str (nt_name tag ^ "[" ^ signed_string_of_int p ^ "]");
 
@@ -422,7 +419,6 @@
 
 (** Operations on gramars **)
 
-(*The mother of all grammars*)
 val empty_gram = Gram {nt_count = 0, prod_count = 0,
                        tags = Symtab.empty, chains = [], lambdas = [],
                        prods = Array.array (0, (([], []), []))};
@@ -454,12 +450,13 @@
       delimiters and predefined terms are stored as terminals,
       nonterminals are converted to integer tags*)
     fun symb_of [] nt_count tags result = (nt_count, tags, rev result)
-      | symb_of ((Delim s) :: ss) nt_count tags result =
-          symb_of ss nt_count tags (Terminal (Token (Literal, s, Position.no_range)) :: result)
-      | symb_of ((Argument (s, p)) :: ss) nt_count tags result =
+      | symb_of ((Syn_Ext.Delim s) :: ss) nt_count tags result =
+          symb_of ss nt_count tags
+            (Terminal (Lexicon.Token (Lexicon.Literal, s, Position.no_range)) :: result)
+      | symb_of ((Syn_Ext.Argument (s, p)) :: ss) nt_count tags result =
           let
             val (nt_count', tags', new_symb) =
-              case predef_term s of
+              case Lexicon.predef_term s of
                 NONE =>
                   let val (nt_count', tags', s_tag) = get_tag nt_count tags s;
                   in (nt_count', tags', Nonterminal (s_tag, p)) end
@@ -471,7 +468,7 @@
     (*Convert list of productions by invoking symb_of for each of them*)
     fun prod_of [] nt_count prod_count tags result =
           (nt_count, prod_count, tags, result)
-      | prod_of ((XProd (lhs, xsymbs, const, pri)) :: ps)
+      | prod_of ((Syn_Ext.XProd (lhs, xsymbs, const, pri)) :: ps)
                 nt_count prod_count tags result =
         let val (nt_count', tags', lhs_tag) = get_tag nt_count tags lhs;
 
@@ -604,7 +601,7 @@
 
 datatype parsetree =
   Node of string * parsetree list |
-  Tip of token;
+  Tip of Lexicon.token;
 
 type state =
   nt_tag * int *                (*identification and production precedence*)
@@ -675,7 +672,7 @@
 
 
 fun movedot_term (A, j, ts, Terminal a :: sa, id, i) c =
-  if valued_token c then
+  if Lexicon.valued_token c then
     (A, j, ts @ [Tip c], sa, id, i)
   else (A, j, ts, sa, id, i);
 
@@ -695,105 +692,105 @@
 (*get all productions of a NT and NTs chained to it which can
   be started by specified token*)
 fun prods_for prods chains include_none tk nts =
-let
-    fun token_assoc (list, key) =
-      let fun assoc [] result = result
-            | assoc ((keyi, pi) :: pairs) result =
-                if is_some keyi andalso matching_tokens (the keyi, key)
-                   orelse include_none andalso is_none keyi then
-                  assoc pairs (pi @ result)
-                else assoc pairs result;
-      in assoc list [] end;
+  let
+      fun token_assoc (list, key) =
+        let fun assoc [] result = result
+              | assoc ((keyi, pi) :: pairs) result =
+                  if is_some keyi andalso Lexicon.matching_tokens (the keyi, key)
+                     orelse include_none andalso is_none keyi then
+                    assoc pairs (pi @ result)
+                  else assoc pairs result;
+        in assoc list [] end;
 
-    fun get_prods [] result = result
-      | get_prods (nt :: nts) result =
-        let val nt_prods = snd (Array.sub (prods, nt));
-        in get_prods nts ((token_assoc (nt_prods, tk)) @ result) end;
-in get_prods (connected_with chains nts nts) [] end;
+      fun get_prods [] result = result
+        | get_prods (nt :: nts) result =
+          let val nt_prods = snd (Array.sub (prods, nt));
+          in get_prods nts ((token_assoc (nt_prods, tk)) @ result) end;
+  in get_prods (connected_with chains nts nts) [] end;
 
 
 fun PROCESSS warned prods chains Estate i c states =
-let
-fun all_prods_for nt = prods_for prods chains true c [nt];
+  let
+    fun all_prods_for nt = prods_for prods chains true c [nt];
 
-fun processS used [] (Si, Sii) = (Si, Sii)
-  | processS used (S :: States) (Si, Sii) =
-      (case S of
-        (_, _, _, Nonterminal (nt, minPrec) :: _, _, _) =>
-          let                                       (*predictor operation*)
-            val (used', new_states) =
-              (case AList.lookup (op =) used nt of
-                SOME (usedPrec, l) =>       (*nonterminal has been processed*)
-                  if usedPrec <= minPrec then
-                                      (*wanted precedence has been processed*)
-                    (used, movedot_lambda S l)
-                  else            (*wanted precedence hasn't been parsed yet*)
-                    let
-                      val tk_prods = all_prods_for nt;
+    fun processS used [] (Si, Sii) = (Si, Sii)
+      | processS used (S :: States) (Si, Sii) =
+          (case S of
+            (_, _, _, Nonterminal (nt, minPrec) :: _, _, _) =>
+              let                                       (*predictor operation*)
+                val (used', new_states) =
+                  (case AList.lookup (op =) used nt of
+                    SOME (usedPrec, l) =>       (*nonterminal has been processed*)
+                      if usedPrec <= minPrec then
+                                          (*wanted precedence has been processed*)
+                        (used, movedot_lambda S l)
+                      else            (*wanted precedence hasn't been parsed yet*)
+                        let
+                          val tk_prods = all_prods_for nt;
 
-                      val States' = mkStates i minPrec nt
-                                      (getRHS' minPrec usedPrec tk_prods);
-                    in (update_prec (nt, minPrec) used,
-                        movedot_lambda S l @ States')
-                    end
+                          val States' = mkStates i minPrec nt
+                                          (getRHS' minPrec usedPrec tk_prods);
+                        in (update_prec (nt, minPrec) used,
+                            movedot_lambda S l @ States')
+                        end
 
-              | NONE =>           (*nonterminal is parsed for the first time*)
-                  let val tk_prods = all_prods_for nt;
-                      val States' = mkStates i minPrec nt
-                                      (getRHS minPrec tk_prods);
-                  in ((nt, (minPrec, [])) :: used, States') end);
+                  | NONE =>           (*nonterminal is parsed for the first time*)
+                      let val tk_prods = all_prods_for nt;
+                          val States' = mkStates i minPrec nt
+                                          (getRHS minPrec tk_prods);
+                      in ((nt, (minPrec, [])) :: used, States') end);
 
-            val dummy =
-              if not (!warned) andalso
-                 length (new_states @ States) > (!branching_level) then
-                (warning "Currently parsed expression could be extremely ambiguous.";
-                 warned := true)
-              else ();
-          in
-            processS used' (new_states @ States) (S :: Si, Sii)
-          end
-      | (_, _, _, Terminal a :: _, _, _) =>               (*scanner operation*)
-          processS used States
-            (S :: Si,
-              if matching_tokens (a, c) then movedot_term S c :: Sii else Sii)
-      | (A, prec, ts, [], id, j) =>                   (*completer operation*)
-          let val tt = if id = "" then ts else [Node (id, ts)] in
-            if j = i then                             (*lambda production?*)
-              let
-                val (used', O) = update_trees used (A, (tt, prec));
+                val dummy =
+                  if not (!warned) andalso
+                     length (new_states @ States) > (!branching_level) then
+                    (warning "Currently parsed expression could be extremely ambiguous.";
+                     warned := true)
+                  else ();
               in
-                case O of
-                  NONE =>
-                    let val Slist = getS A prec Si;
-                        val States' = map (movedot_nonterm tt) Slist;
-                    in processS used' (States' @ States) (S :: Si, Sii) end
-                | SOME n =>
-                    if n >= prec then processS used' States (S :: Si, Sii)
-                    else
-                      let val Slist = getS' A prec n Si;
-                          val States' = map (movedot_nonterm tt) Slist;
-                      in processS used' (States' @ States) (S :: Si, Sii) end
+                processS used' (new_states @ States) (S :: Si, Sii)
               end
-            else
-              let val Slist = getStates Estate i j A prec
-              in processS used (map (movedot_nonterm tt) Slist @ States)
-                          (S :: Si, Sii)
-              end
-          end)
-in processS [] states ([], []) end;
+          | (_, _, _, Terminal a :: _, _, _) =>               (*scanner operation*)
+              processS used States
+                (S :: Si,
+                  if Lexicon.matching_tokens (a, c) then movedot_term S c :: Sii else Sii)
+          | (A, prec, ts, [], id, j) =>                   (*completer operation*)
+              let val tt = if id = "" then ts else [Node (id, ts)] in
+                if j = i then                             (*lambda production?*)
+                  let
+                    val (used', O) = update_trees used (A, (tt, prec));
+                  in
+                    case O of
+                      NONE =>
+                        let val Slist = getS A prec Si;
+                            val States' = map (movedot_nonterm tt) Slist;
+                        in processS used' (States' @ States) (S :: Si, Sii) end
+                    | SOME n =>
+                        if n >= prec then processS used' States (S :: Si, Sii)
+                        else
+                          let val Slist = getS' A prec n Si;
+                              val States' = map (movedot_nonterm tt) Slist;
+                          in processS used' (States' @ States) (S :: Si, Sii) end
+                  end
+                else
+                  let val Slist = getStates Estate i j A prec
+                  in processS used (map (movedot_nonterm tt) Slist @ States)
+                              (S :: Si, Sii)
+                  end
+              end)
+  in processS [] states ([], []) end;
 
 
 fun produce warned prods tags chains stateset i indata prev_token =
   (case Array.sub (stateset, i) of
     [] =>
       let
-        val toks = if is_eof prev_token then indata else prev_token :: indata;
-        val pos = Position.str_of (pos_of_token prev_token);
+        val toks = if Lexicon.is_eof prev_token then indata else prev_token :: indata;
+        val pos = Position.str_of (Lexicon.pos_of_token prev_token);
       in
         if null toks then error ("Inner syntax error: unexpected end of input" ^ pos)
         else error (Pretty.string_of (Pretty.block
           (Pretty.str ("Inner syntax error" ^ pos ^ " at \"") ::
-            Pretty.breaks (map (Pretty.str o str_of_token) (#1 (split_last toks))) @
+            Pretty.breaks (map (Pretty.str o Lexicon.str_of_token) (#1 (split_last toks))) @
             [Pretty.str "\""])))
       end
   | s =>
@@ -807,21 +804,20 @@
        end));
 
 
-fun get_trees l = map_filter (fn (_, _, [pt], _, _, _) => SOME pt | _ => NONE)
-                            l;
+fun get_trees l = map_filter (fn (_, _, [pt], _, _, _) => SOME pt | _ => NONE) l;
 
 fun earley prods tags chains startsymbol indata =
   let
-    val start_tag = case Symtab.lookup tags startsymbol of
-                       SOME tag => tag
-                     | NONE   => error ("parse: Unknown startsymbol " ^
-                                        quote startsymbol);
-    val S0 = [(~1, 0, [], [Nonterminal (start_tag, 0), Terminal eof], "", 0)];
+    val start_tag =
+      (case Symtab.lookup tags startsymbol of
+        SOME tag => tag
+      | NONE => error ("parse: Unknown startsymbol " ^ quote startsymbol));
+    val S0 = [(~1, 0, [], [Nonterminal (start_tag, 0), Terminal Lexicon.eof], "", 0)];
     val s = length indata + 1;
     val Estate = Array.array (s, []);
   in
     Array.update (Estate, 0, S0);
-    get_trees (produce (Unsynchronized.ref false) prods tags chains Estate 0 indata eof)
+    get_trees (produce (Unsynchronized.ref false) prods tags chains Estate 0 indata Lexicon.eof)
   end;
 
 
@@ -830,9 +826,9 @@
     val end_pos =
       (case try List.last toks of
         NONE => Position.none
-      | SOME (Token (_, _, (_, end_pos))) => end_pos);
+      | SOME (Lexicon.Token (_, _, (_, end_pos))) => end_pos);
     val r =
-      (case earley prods tags chains start (toks @ [mk_eof end_pos]) of
+      (case earley prods tags chains start (toks @ [Lexicon.mk_eof end_pos]) of
         [] => sys_error "parse: no parse trees"
       | pts => pts);
   in r end;
@@ -842,7 +838,8 @@
   let
     fun freeze a = map_range (curry Array.sub a) (Array.length a);
     val prods = maps snd (maps snd (freeze (#prods gram)));
-    fun guess (SOME ([Nonterminal (_, k), Terminal (Token (Literal, s, _)), Nonterminal (_, l)], _, j)) =
+    fun guess (SOME ([Nonterminal (_, k),
+            Terminal (Lexicon.Token (Lexicon.Literal, s, _)), Nonterminal (_, l)], _, j)) =
           if k = j andalso l = j + 1 then SOME (s, true, false, j)
           else if k = j + 1 then if l = j then SOME (s, false, true, j)
             else if l = j + 1 then SOME (s, false, false, j)