# HG changeset patch # User wenzelm # Date 1344515863 -7200 # Node ID a72f8ffecf318306271104056d9692b9a34e53ed # Parent 28d59ce5ebfd950286f42e80618e291f756ef4d7 refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol; diff -r 28d59ce5ebfd -r a72f8ffecf31 src/Pure/General/scan.ML --- a/src/Pure/General/scan.ML Thu Aug 09 14:09:36 2012 +0200 +++ b/src/Pure/General/scan.ML Thu Aug 09 14:37:43 2012 +0200 @@ -39,6 +39,7 @@ sig include BASIC_SCAN val prompt: string -> ('a -> 'b) -> 'a -> 'b + val permissive: ('a -> 'b) -> 'a -> 'b val error: ('a -> 'b) -> 'a -> 'b val catch: ('a -> 'b) -> 'a -> 'b (*exception Fail*) val fail: 'a -> 'b diff -r 28d59ce5ebfd -r a72f8ffecf31 src/Pure/General/scan.scala --- a/src/Pure/General/scan.scala Thu Aug 09 14:09:36 2012 +0200 +++ b/src/Pure/General/scan.scala Thu Aug 09 14:37:43 2012 +0200 @@ -235,6 +235,9 @@ } }.named("quoted_context") + def quoted_prefix(quote: Symbol.Symbol): Parser[String] = + quoted_context(quote, Finished) ^^ (_._1) + /* verbatim text */ @@ -267,6 +270,9 @@ } }.named("verbatim_context") + val verbatim_prefix: Parser[String] = + verbatim_context(Finished) ^^ (_._1) + /* nested comments */ @@ -303,6 +309,11 @@ def comment: Parser[String] = comment_depth(0) ^? { case (x, d) if d == 0 => x } + def comment_content(source: String): String = + { + require(parseAll(comment, source).successful) + source.substring(2, source.length - 2) + } def comment_context(ctxt: Context): Parser[(String, Context)] = { val depth = @@ -318,11 +329,8 @@ else failure("") } - def comment_content(source: String): String = - { - require(parseAll(comment, source).successful) - source.substring(2, source.length - 2) - } + val comment_prefix: Parser[String] = + comment_context(Finished) ^^ (_._1) /* outer syntax tokens */ @@ -360,18 +368,16 @@ (many1(Symbol.is_symbolic_char) | one(sym => Symbol.is_symbolic(sym))) ^^ (x => Token(Token.Kind.SYM_IDENT, x)) + val command_keyword = + keyword ^^ (x => Token(if (is_command(x)) Token.Kind.COMMAND else Token.Kind.KEYWORD, x)) + val space = many1(Symbol.is_blank) ^^ (x => Token(Token.Kind.SPACE, x)) - // FIXME check - val junk = many(sym => !(Symbol.is_blank(sym))) - val junk1 = many1(sym => !(Symbol.is_blank(sym))) + val bad_delimiter = + (quoted_prefix("\"") | (quoted_prefix("`") | (verbatim_prefix | comment_prefix))) ^^ + (x => Token(Token.Kind.UNPARSED, x)) - val bad_delimiter = - ("\"" | "`" | "{*" | "(*") ~ junk ^^ { case x ~ y => Token(Token.Kind.UNPARSED, x + y) } - val bad = junk1 ^^ (x => Token(Token.Kind.UNPARSED, x)) - - val command_keyword = - keyword ^^ (x => Token(if (is_command(x)) Token.Kind.COMMAND else Token.Kind.KEYWORD, x)) + val bad = one(_ => true) ^^ (x => Token(Token.Kind.UNPARSED, x)) space | (bad_delimiter | (((ident | (var_ | (type_ident | (type_var | (float | (nat_ | sym_ident)))))) ||| diff -r 28d59ce5ebfd -r a72f8ffecf31 src/Pure/General/symbol_pos.ML --- a/src/Pure/General/symbol_pos.ML Thu Aug 09 14:09:36 2012 +0200 +++ b/src/Pure/General/symbol_pos.ML Thu Aug 09 14:37:43 2012 +0200 @@ -19,6 +19,9 @@ val scan_string_q: T list -> (Position.T * (T list * Position.T)) * T list val scan_string_qq: T list -> (Position.T * (T list * Position.T)) * T list val scan_string_bq: T list -> (Position.T * (T list * Position.T)) * T list + val recover_string_q: T list -> T list * T list + val recover_string_qq: T list -> T list * T list + val recover_string_bq: T list -> T list * T list val quote_string_q: string -> string val quote_string_qq: string -> string val quote_string_bq: string -> string @@ -26,6 +29,7 @@ T list -> T list * T list val scan_comment_body: (string -> (T list -> T list * T list) -> T list -> T list * T list) -> T list -> T list * T list + val recover_comment: T list -> T list * T list val source: Position.T -> (Symbol.symbol, 'a) Source.source -> (T, Position.T * (Symbol.symbol, 'a) Source.source) Source.source type text = string @@ -98,12 +102,19 @@ (scan_pos --| $$$ q) -- !!! (fn () => "missing quote at end of string") (change_prompt ((Scan.repeat (scan_str q) >> flat) -- ($$$ q |-- scan_pos))); +fun recover_strs q = + $$$ q @@@ (Scan.repeat (Scan.permissive (scan_str q)) >> flat); + in val scan_string_q = scan_strs "'"; val scan_string_qq = scan_strs "\""; val scan_string_bq = scan_strs "`"; +val recover_string_q = recover_strs "'"; +val recover_string_qq = recover_strs "\""; +val recover_string_bq = recover_strs "`"; + end; @@ -140,7 +151,9 @@ Scan.lift ($$$ "*" --| Scan.ahead (~$$$ ")")) || Scan.lift (Scan.one (fn (s, _) => s <> "*" andalso Symbol.is_regular s)) >> single; -val scan_body = change_prompt (Scan.pass 0 (Scan.repeat scan_cmt >> flat)); +val scan_cmts = Scan.pass 0 (Scan.repeat scan_cmt >> flat); + +val scan_body = change_prompt scan_cmts; in @@ -150,6 +163,9 @@ fun scan_comment_body cut = $$$ "(" |-- $$$ "*" |-- cut "missing end of comment" (scan_body --| $$$ "*" --| $$$ ")"); +val recover_comment = + $$$ "(" @@@ $$$ "*" @@@ scan_cmts; + end; diff -r 28d59ce5ebfd -r a72f8ffecf31 src/Pure/Isar/token.ML --- a/src/Pure/Isar/token.ML Thu Aug 09 14:09:36 2012 +0200 +++ b/src/Pure/Isar/token.ML Thu Aug 09 14:37:43 2012 +0200 @@ -303,6 +303,9 @@ (Symbol_Pos.change_prompt ((Scan.repeat scan_verb >> flat) -- ($$$ "*" |-- $$$ "}" |-- Symbol_Pos.scan_pos))); +val recover_verbatim = + $$$ "{" @@@ $$$ "*" @@@ (Scan.repeat scan_verb >> flat); + (* scan space *) @@ -355,7 +358,11 @@ scan_symid >> pair SymIdent) >> uncurry token)); fun recover msg = - Scan.many ((Symbol.is_regular andf (not o Symbol.is_blank)) o Symbol_Pos.symbol) + (Symbol_Pos.recover_string_qq || + Symbol_Pos.recover_string_bq || + recover_verbatim || + Symbol_Pos.recover_comment || + Scan.one (Symbol.is_regular o Symbol_Pos.symbol) >> single) >> (single o token (Error msg)); in diff -r 28d59ce5ebfd -r a72f8ffecf31 src/Pure/ML/ml_lex.ML --- a/src/Pure/ML/ml_lex.ML Thu Aug 09 14:09:36 2012 +0200 +++ b/src/Pure/ML/ml_lex.ML Thu Aug 09 14:37:43 2012 +0200 @@ -235,10 +235,16 @@ val scan_char = $$$ "#" @@@ $$$ "\"" @@@ scan_gaps @@@ scan_str @@@ scan_gaps @@@ $$$ "\""; +val recover_char = + $$$ "#" @@@ $$$ "\"" @@@ scan_gaps @@@ Scan.optional (Scan.permissive scan_str @@@ scan_gaps) []; + val scan_string = $$$ "\"" @@@ !!! "missing quote at end of string" ((Scan.repeat (scan_gap || scan_str) >> flat) @@@ $$$ "\""); +val recover_string = + $$$ "\"" @@@ (Scan.repeat (scan_gap || Scan.permissive scan_str) >> flat); + end; @@ -265,8 +271,11 @@ val scan_antiq = Antiquote.scan || scan_ml >> Antiquote.Text; fun recover msg = - Scan.many (((not o Symbol.is_blank) andf Symbol.is_regular) o Symbol_Pos.symbol) - >> (fn cs => [token (Error msg) cs]); + (recover_char || + recover_string || + Symbol_Pos.recover_comment || + Scan.one (Symbol.not_eof o Symbol_Pos.symbol) >> single) + >> (single o token (Error msg)); in