added Space, Comment token kinds (keep actual text);
authorwenzelm
Sun, 03 Oct 1999 15:51:38 +0200
changeset 7682 46de8064c93c
parent 7681 4434adbe24a1
child 7683 e74f43f1d8a3
added Space, Comment token kinds (keep actual text); source: do not filter proper;
src/Pure/Isar/outer_lex.ML
--- a/src/Pure/Isar/outer_lex.ML	Fri Oct 01 20:41:58 1999 +0200
+++ b/src/Pure/Isar/outer_lex.ML	Sun Oct 03 15:51:38 1999 +0200
@@ -9,7 +9,7 @@
 sig
   datatype token_kind =
     Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar |
-    Nat | String | Verbatim | Ignore | Sync | EOF
+    Nat | String | Verbatim | Space | Comment | Sync | EOF
   type token
   val str_of_kind: token_kind -> string
   val stopper: token * (token -> bool)
@@ -27,7 +27,7 @@
     Position.T * Symbol.symbol list -> token * (Position.T * Symbol.symbol list)
   val source: bool -> (unit -> (Scan.lexicon * Scan.lexicon)) ->
     Position.T -> (Symbol.symbol, 'a) Source.source ->
-    (token, (token, Position.T * (Symbol.symbol, 'a) Source.source) Source.source) Source.source
+    (token, Position.T * (Symbol.symbol, 'a) Source.source) Source.source
 end;
 
 structure OuterLex: OUTER_LEX =
@@ -40,7 +40,7 @@
 
 datatype token_kind =
   Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar |
-  Nat | String | Verbatim | Ignore | Sync | EOF;
+  Nat | String | Verbatim | Space | Comment | Sync | EOF;
 
 datatype token = Token of Position.T * (token_kind * string);
 
@@ -56,7 +56,8 @@
   | Nat => "number"
   | String => "string"
   | Verbatim => "verbatim text"
-  | Ignore => "ignored text"
+  | Space => "white space"
+  | Comment => "comment text"
   | Sync => "sync marker"
   | EOF => "end-of-file";
 
@@ -93,7 +94,8 @@
 
 fun name_of (Token (_, (k, _))) = str_of_kind k;
 
-fun is_proper (Token (_, (Ignore, _))) = false
+fun is_proper (Token (_, (Space, _))) = false
+  | is_proper (Token (_, (Comment, _))) = false
   | is_proper _ = true;
 
 
@@ -139,7 +141,7 @@
 (* scan strings *)
 
 val scan_str =
-  scan_blank >> K Symbol.space ||
+  scan_blank ||
   keep_line ($$ "\\" |-- Scan.one (Symbol.not_sync andf Symbol.not_eof)) ||
   keep_line (Scan.one (not_equal "\\" andf not_equal "\"" andf
     Symbol.not_sync andf Symbol.not_eof));
@@ -168,8 +170,8 @@
 val is_space = Symbol.is_blank andf not_equal "\n";
 
 val scan_space =
-  keep_line (Scan.any1 is_space) |-- Scan.optional (incr_line ($$ "\n")) "" ||
-  keep_line (Scan.any is_space) |-- incr_line ($$ "\n");
+  (keep_line (Scan.any1 is_space) -- Scan.optional (incr_line ($$ "\n")) "" ||
+    keep_line (Scan.any is_space) -- incr_line ($$ "\n")) >> (fn (cs, c) => implode cs ^ c);
 
 
 (* scan nested comments *)
@@ -185,7 +187,7 @@
   keep_line ($$ "(" -- $$ "*") |--
     !! (lex_err (K "missing end of comment"))
       (change_prompt
-        (Scan.pass 0 (Scan.repeat scan_cmt) |-- keep_line ($$ "*" -- $$ ")") >> K ""));
+        (Scan.pass 0 (Scan.repeat scan_cmt >> implode) --| keep_line ($$ "*" -- $$ ")")));
 
 
 (* scan token *)
@@ -193,14 +195,13 @@
 fun scan (lex1, lex2) (pos, cs) =
   let
     fun token k x = Token (pos, (k, x));
-    fun ignore _ = token Ignore "";
     fun sync _ = token Sync Symbol.sync;
 
     val scanner =
       scan_string >> token String ||
       scan_verbatim >> token Verbatim ||
-      scan_space >> ignore ||
-      scan_comment >> ignore ||
+      scan_space >> token Space ||
+      scan_comment >> token Comment ||
       keep_line (Scan.one Symbol.is_sync >> sync) ||
       keep_line (Scan.max token_leq
         (Scan.max token_leq
@@ -223,8 +224,7 @@
 
 fun source do_recover get_lex pos src =
   Source.source' pos Symbol.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs))
-    (if do_recover then Some recover else None) src
-  |> Source.filter is_proper;
+    (if do_recover then Some recover else None) src;
 
 
 end;