# HG changeset patch # User wenzelm # Date 1184015565 -7200 # Node ID f5d315390edcfb34dee95fe84fec27bdd4a0bec8 # Parent 1114cc909800f93107fe86d88522ca259b036242 Malformed token: error msg; scan: explicit handling of malformed symbols from previous stage; source: interactive flag indicates intermittent error_msg; tuned; diff -r 1114cc909800 -r f5d315390edc src/Pure/Isar/outer_lex.ML --- a/src/Pure/Isar/outer_lex.ML Mon Jul 09 23:12:44 2007 +0200 +++ b/src/Pure/Isar/outer_lex.ML Mon Jul 09 23:12:45 2007 +0200 @@ -9,7 +9,7 @@ sig datatype token_kind = Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar | - Nat | String | AltString | Verbatim | Space | Comment | Sync | Malformed | EOF + Nat | String | AltString | Verbatim | Space | Comment | Sync | Malformed of string | EOF eqtype token val str_of_kind: token_kind -> string val stopper: token * (token -> bool) @@ -39,7 +39,7 @@ val scan_string: Position.T * Symbol.symbol list -> string * (Position.T * Symbol.symbol list) val scan: (Scan.lexicon * Scan.lexicon) -> Position.T * Symbol.symbol list -> token * (Position.T * Symbol.symbol list) - val source: bool -> (unit -> (Scan.lexicon * Scan.lexicon)) -> + val source: bool option -> (unit -> (Scan.lexicon * Scan.lexicon)) -> Position.T -> (Symbol.symbol, 'a) Source.source -> (token, Position.T * (Symbol.symbol, 'a) Source.source) Source.source val source_proper: (token, 'a) Source.source -> @@ -57,7 +57,7 @@ datatype token_kind = Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar | - Nat | String | AltString | Verbatim | Space | Comment | Sync | Malformed | EOF; + Nat | String | AltString | Verbatim | Space | Comment | Sync | Malformed of string | EOF; datatype token = Token of Position.T * (token_kind * string); @@ -77,21 +77,12 @@ | Space => "white space" | Comment => "comment text" | Sync => "sync marker" - | Malformed => "bad input" + | Malformed _ => "bad input" | EOF => "end-of-file"; (* control tokens *) -fun not_sync (Token (_, (Sync, _))) = false - | not_sync _ = true; - -val malformed = Token (Position.none, (Malformed, "")); -fun malformed_of xs = Token (Position.none, (Malformed, implode xs)); - - -(* eof token *) - val eof = Token (Position.none, (EOF, "")); fun is_eof (Token (_, (EOF, _))) = true @@ -101,6 +92,10 @@ val not_eof = not o is_eof; +fun not_sync (Token (_, (Sync, _))) = false + | not_sync _ = true; + + (* get position *) fun position_of (Token (pos, _)) = pos; @@ -136,10 +131,10 @@ (* blanks and newlines -- space tokens obey lines *) -fun is_blank (Token (_, (Space, s))) = not (String.isSuffix "\n" s) +fun is_blank (Token (_, (Space, x))) = not (String.isSuffix "\n" x) | is_blank _ = false; -fun is_newline (Token (_, (Space, s))) = String.isSuffix "\n" s +fun is_newline (Token (_, (Space, x))) = String.isSuffix "\n" x | is_newline _ = false; @@ -271,6 +266,25 @@ (Scan.pass 0 (Scan.repeat scan_cmt >> implode) --| keep_line ($$ "*" -- $$ ")"))); +(* scan malformed symbols *) + +local + +val scan_mal = + scan_blank || + keep_line (Scan.one (fn s => + s <> Symbol.end_malformed andalso Symbol.not_sync s andalso Symbol.not_eof s)); + +in + +val scan_malformed = + keep_line ($$ Symbol.malformed) |-- + change_prompt (Scan.repeat scan_mal >> (Output.escape o implode)) + --| keep_line (Scan.option ($$ Symbol.end_malformed)); + +end; + + (* scan token *) fun scan (lex1, lex2) = @@ -285,6 +299,7 @@ scan_verbatim >> token Verbatim || scan_space >> token Space || scan_comment >> token Comment || + scan_malformed >> token (Malformed "Malformed symbolic character.") || keep_line (Scan.one Symbol.is_sync >> sync) || keep_line (Scan.max token_leq (Scan.max token_leq @@ -303,12 +318,22 @@ (* token sources *) +local + val is_junk = (not o Symbol.is_blank) andf Symbol.not_sync andf Symbol.not_eof; -fun recover xs = (keep_line (Scan.many is_junk) >> (fn ts => [malformed_of ts])) xs; + +fun recover interactive msg x = + (if interactive then Output.error_msg msg else (); + (Scan.state :-- (fn pos => keep_line (Scan.many is_junk) + >> (fn xs => [Token (pos, (Malformed msg, implode xs))])) >> #2) x); + +in fun source do_recover get_lex pos src = Source.source' pos Symbol.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs)) - (if do_recover then SOME recover else NONE) src; + (Option.map recover do_recover) src; + +end; fun source_proper src = src |> Source.filter is_proper;