Malformed token: error msg;
authorwenzelm
Mon Jul 09 23:12:45 2007 +0200 (2007-07-09)
changeset 23678f5d315390edc
parent 23677 1114cc909800
child 23679 57dceb84d1a0
Malformed token: error msg;
scan: explicit handling of malformed symbols from previous stage;
source: interactive flag indicates intermittent error_msg;
tuned;
src/Pure/Isar/outer_lex.ML
     1.1 --- a/src/Pure/Isar/outer_lex.ML	Mon Jul 09 23:12:44 2007 +0200
     1.2 +++ b/src/Pure/Isar/outer_lex.ML	Mon Jul 09 23:12:45 2007 +0200
     1.3 @@ -9,7 +9,7 @@
     1.4  sig
     1.5    datatype token_kind =
     1.6      Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar |
     1.7 -    Nat | String | AltString | Verbatim | Space | Comment | Sync | Malformed | EOF
     1.8 +    Nat | String | AltString | Verbatim | Space | Comment | Sync | Malformed of string | EOF
     1.9    eqtype token
    1.10    val str_of_kind: token_kind -> string
    1.11    val stopper: token * (token -> bool)
    1.12 @@ -39,7 +39,7 @@
    1.13    val scan_string: Position.T * Symbol.symbol list -> string * (Position.T * Symbol.symbol list)
    1.14    val scan: (Scan.lexicon * Scan.lexicon) ->
    1.15      Position.T * Symbol.symbol list -> token * (Position.T * Symbol.symbol list)
    1.16 -  val source: bool -> (unit -> (Scan.lexicon * Scan.lexicon)) ->
    1.17 +  val source: bool option -> (unit -> (Scan.lexicon * Scan.lexicon)) ->
    1.18      Position.T -> (Symbol.symbol, 'a) Source.source ->
    1.19      (token, Position.T * (Symbol.symbol, 'a) Source.source) Source.source
    1.20    val source_proper: (token, 'a) Source.source ->
    1.21 @@ -57,7 +57,7 @@
    1.22  
    1.23  datatype token_kind =
    1.24    Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar |
    1.25 -  Nat | String | AltString | Verbatim | Space | Comment | Sync | Malformed | EOF;
    1.26 +  Nat | String | AltString | Verbatim | Space | Comment | Sync | Malformed of string | EOF;
    1.27  
    1.28  datatype token = Token of Position.T * (token_kind * string);
    1.29  
    1.30 @@ -77,21 +77,12 @@
    1.31    | Space => "white space"
    1.32    | Comment => "comment text"
    1.33    | Sync => "sync marker"
    1.34 -  | Malformed => "bad input"
    1.35 +  | Malformed _ => "bad input"
    1.36    | EOF => "end-of-file";
    1.37  
    1.38  
    1.39  (* control tokens *)
    1.40  
    1.41 -fun not_sync (Token (_, (Sync, _))) = false
    1.42 -  | not_sync _ = true;
    1.43 -
    1.44 -val malformed = Token (Position.none, (Malformed, ""));
    1.45 -fun malformed_of xs = Token (Position.none, (Malformed, implode xs));
    1.46 -
    1.47 -
    1.48 -(* eof token *)
    1.49 -
    1.50  val eof = Token (Position.none, (EOF, ""));
    1.51  
    1.52  fun is_eof (Token (_, (EOF, _))) = true
    1.53 @@ -101,6 +92,10 @@
    1.54  val not_eof = not o is_eof;
    1.55  
    1.56  
    1.57 +fun not_sync (Token (_, (Sync, _))) = false
    1.58 +  | not_sync _ = true;
    1.59 +
    1.60 +
    1.61  (* get position *)
    1.62  
    1.63  fun position_of (Token (pos, _)) = pos;
    1.64 @@ -136,10 +131,10 @@
    1.65  
    1.66  (* blanks and newlines -- space tokens obey lines *)
    1.67  
    1.68 -fun is_blank (Token (_, (Space, s))) = not (String.isSuffix "\n" s)
    1.69 +fun is_blank (Token (_, (Space, x))) = not (String.isSuffix "\n" x)
    1.70    | is_blank _ = false;
    1.71  
    1.72 -fun is_newline (Token (_, (Space, s))) = String.isSuffix "\n" s
    1.73 +fun is_newline (Token (_, (Space, x))) = String.isSuffix "\n" x
    1.74    | is_newline _ = false;
    1.75  
    1.76  
    1.77 @@ -271,6 +266,25 @@
    1.78          (Scan.pass 0 (Scan.repeat scan_cmt >> implode) --| keep_line ($$ "*" -- $$ ")")));
    1.79  
    1.80  
    1.81 +(* scan malformed symbols *)
    1.82 +
    1.83 +local
    1.84 +
    1.85 +val scan_mal =
    1.86 +  scan_blank ||
    1.87 +  keep_line (Scan.one (fn s =>
    1.88 +    s <> Symbol.end_malformed andalso Symbol.not_sync s andalso Symbol.not_eof s));
    1.89 +
    1.90 +in
    1.91 +
    1.92 +val scan_malformed =
    1.93 +  keep_line ($$ Symbol.malformed) |--
    1.94 +    change_prompt (Scan.repeat scan_mal >> (Output.escape o implode))
    1.95 +      --| keep_line (Scan.option ($$ Symbol.end_malformed));
    1.96 +
    1.97 +end;
    1.98 +
    1.99 +
   1.100  (* scan token *)
   1.101  
   1.102  fun scan (lex1, lex2) =
   1.103 @@ -285,6 +299,7 @@
   1.104          scan_verbatim >> token Verbatim ||
   1.105          scan_space >> token Space ||
   1.106          scan_comment >> token Comment ||
   1.107 +        scan_malformed >> token (Malformed "Malformed symbolic character.") ||
   1.108          keep_line (Scan.one Symbol.is_sync >> sync) ||
   1.109          keep_line (Scan.max token_leq
   1.110            (Scan.max token_leq
   1.111 @@ -303,12 +318,22 @@
   1.112  
   1.113  (* token sources *)
   1.114  
   1.115 +local
   1.116 +
   1.117  val is_junk = (not o Symbol.is_blank) andf Symbol.not_sync andf Symbol.not_eof;
   1.118 -fun recover xs = (keep_line (Scan.many is_junk) >> (fn ts => [malformed_of ts])) xs;
   1.119 +
   1.120 +fun recover interactive msg x =
   1.121 +  (if interactive then Output.error_msg msg else ();
   1.122 +    (Scan.state :-- (fn pos => keep_line (Scan.many is_junk)
   1.123 +      >> (fn xs => [Token (pos, (Malformed msg, implode xs))])) >> #2) x);
   1.124 +
   1.125 +in
   1.126  
   1.127  fun source do_recover get_lex pos src =
   1.128    Source.source' pos Symbol.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs))
   1.129 -    (if do_recover then SOME recover else NONE) src;
   1.130 +    (Option.map recover do_recover) src;
   1.131 +
   1.132 +end;
   1.133  
   1.134  fun source_proper src = src |> Source.filter is_proper;
   1.135