Malformed token: error msg;
authorwenzelm
Mon, 09 Jul 2007 23:12:45 +0200
changeset 23678 f5d315390edc
parent 23677 1114cc909800
child 23679 57dceb84d1a0
Malformed token: error msg; scan: explicit handling of malformed symbols from previous stage; source: interactive flag indicates intermittent error_msg; tuned;
src/Pure/Isar/outer_lex.ML
--- a/src/Pure/Isar/outer_lex.ML	Mon Jul 09 23:12:44 2007 +0200
+++ b/src/Pure/Isar/outer_lex.ML	Mon Jul 09 23:12:45 2007 +0200
@@ -9,7 +9,7 @@
 sig
   datatype token_kind =
     Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar |
-    Nat | String | AltString | Verbatim | Space | Comment | Sync | Malformed | EOF
+    Nat | String | AltString | Verbatim | Space | Comment | Sync | Malformed of string | EOF
   eqtype token
   val str_of_kind: token_kind -> string
   val stopper: token * (token -> bool)
@@ -39,7 +39,7 @@
   val scan_string: Position.T * Symbol.symbol list -> string * (Position.T * Symbol.symbol list)
   val scan: (Scan.lexicon * Scan.lexicon) ->
     Position.T * Symbol.symbol list -> token * (Position.T * Symbol.symbol list)
-  val source: bool -> (unit -> (Scan.lexicon * Scan.lexicon)) ->
+  val source: bool option -> (unit -> (Scan.lexicon * Scan.lexicon)) ->
     Position.T -> (Symbol.symbol, 'a) Source.source ->
     (token, Position.T * (Symbol.symbol, 'a) Source.source) Source.source
   val source_proper: (token, 'a) Source.source ->
@@ -57,7 +57,7 @@
 
 datatype token_kind =
   Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar |
-  Nat | String | AltString | Verbatim | Space | Comment | Sync | Malformed | EOF;
+  Nat | String | AltString | Verbatim | Space | Comment | Sync | Malformed of string | EOF;
 
 datatype token = Token of Position.T * (token_kind * string);
 
@@ -77,21 +77,12 @@
   | Space => "white space"
   | Comment => "comment text"
   | Sync => "sync marker"
-  | Malformed => "bad input"
+  | Malformed _ => "bad input"
   | EOF => "end-of-file";
 
 
 (* control tokens *)
 
-fun not_sync (Token (_, (Sync, _))) = false
-  | not_sync _ = true;
-
-val malformed = Token (Position.none, (Malformed, ""));
-fun malformed_of xs = Token (Position.none, (Malformed, implode xs));
-
-
-(* eof token *)
-
 val eof = Token (Position.none, (EOF, ""));
 
 fun is_eof (Token (_, (EOF, _))) = true
@@ -101,6 +92,10 @@
 val not_eof = not o is_eof;
 
 
+fun not_sync (Token (_, (Sync, _))) = false
+  | not_sync _ = true;
+
+
 (* get position *)
 
 fun position_of (Token (pos, _)) = pos;
@@ -136,10 +131,10 @@
 
 (* blanks and newlines -- space tokens obey lines *)
 
-fun is_blank (Token (_, (Space, s))) = not (String.isSuffix "\n" s)
+fun is_blank (Token (_, (Space, x))) = not (String.isSuffix "\n" x)
   | is_blank _ = false;
 
-fun is_newline (Token (_, (Space, s))) = String.isSuffix "\n" s
+fun is_newline (Token (_, (Space, x))) = String.isSuffix "\n" x
   | is_newline _ = false;
 
 
@@ -271,6 +266,25 @@
         (Scan.pass 0 (Scan.repeat scan_cmt >> implode) --| keep_line ($$ "*" -- $$ ")")));
 
 
+(* scan malformed symbols *)
+
+local
+
+val scan_mal =
+  scan_blank ||
+  keep_line (Scan.one (fn s =>
+    s <> Symbol.end_malformed andalso Symbol.not_sync s andalso Symbol.not_eof s));
+
+in
+
+val scan_malformed =
+  keep_line ($$ Symbol.malformed) |--
+    change_prompt (Scan.repeat scan_mal >> (Output.escape o implode))
+      --| keep_line (Scan.option ($$ Symbol.end_malformed));
+
+end;
+
+
 (* scan token *)
 
 fun scan (lex1, lex2) =
@@ -285,6 +299,7 @@
         scan_verbatim >> token Verbatim ||
         scan_space >> token Space ||
         scan_comment >> token Comment ||
+        scan_malformed >> token (Malformed "Malformed symbolic character.") ||
         keep_line (Scan.one Symbol.is_sync >> sync) ||
         keep_line (Scan.max token_leq
           (Scan.max token_leq
@@ -303,12 +318,22 @@
 
 (* token sources *)
 
+local
+
 val is_junk = (not o Symbol.is_blank) andf Symbol.not_sync andf Symbol.not_eof;
-fun recover xs = (keep_line (Scan.many is_junk) >> (fn ts => [malformed_of ts])) xs;
+
+fun recover interactive msg x =
+  (if interactive then Output.error_msg msg else ();
+    (Scan.state :-- (fn pos => keep_line (Scan.many is_junk)
+      >> (fn xs => [Token (pos, (Malformed msg, implode xs))])) >> #2) x);
+
+in
 
 fun source do_recover get_lex pos src =
   Source.source' pos Symbol.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs))
-    (if do_recover then SOME recover else NONE) src;
+    (Option.map recover do_recover) src;
+
+end;
 
 fun source_proper src = src |> Source.filter is_proper;