refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
--- a/src/Pure/General/scan.ML Thu Aug 09 14:09:36 2012 +0200
+++ b/src/Pure/General/scan.ML Thu Aug 09 14:37:43 2012 +0200
@@ -39,6 +39,7 @@
sig
include BASIC_SCAN
val prompt: string -> ('a -> 'b) -> 'a -> 'b
+ val permissive: ('a -> 'b) -> 'a -> 'b
val error: ('a -> 'b) -> 'a -> 'b
val catch: ('a -> 'b) -> 'a -> 'b (*exception Fail*)
val fail: 'a -> 'b
--- a/src/Pure/General/scan.scala Thu Aug 09 14:09:36 2012 +0200
+++ b/src/Pure/General/scan.scala Thu Aug 09 14:37:43 2012 +0200
@@ -235,6 +235,9 @@
}
}.named("quoted_context")
+ def quoted_prefix(quote: Symbol.Symbol): Parser[String] =
+ quoted_context(quote, Finished) ^^ (_._1)
+
/* verbatim text */
@@ -267,6 +270,9 @@
}
}.named("verbatim_context")
+ val verbatim_prefix: Parser[String] =
+ verbatim_context(Finished) ^^ (_._1)
+
/* nested comments */
@@ -303,6 +309,11 @@
def comment: Parser[String] =
comment_depth(0) ^? { case (x, d) if d == 0 => x }
+ def comment_content(source: String): String =
+ {
+ require(parseAll(comment, source).successful)
+ source.substring(2, source.length - 2)
+ }
def comment_context(ctxt: Context): Parser[(String, Context)] =
{
val depth =
@@ -318,11 +329,8 @@
else failure("")
}
- def comment_content(source: String): String =
- {
- require(parseAll(comment, source).successful)
- source.substring(2, source.length - 2)
- }
+ val comment_prefix: Parser[String] =
+ comment_context(Finished) ^^ (_._1)
/* outer syntax tokens */
@@ -360,18 +368,16 @@
(many1(Symbol.is_symbolic_char) | one(sym => Symbol.is_symbolic(sym))) ^^
(x => Token(Token.Kind.SYM_IDENT, x))
+ val command_keyword =
+ keyword ^^ (x => Token(if (is_command(x)) Token.Kind.COMMAND else Token.Kind.KEYWORD, x))
+
val space = many1(Symbol.is_blank) ^^ (x => Token(Token.Kind.SPACE, x))
- // FIXME check
- val junk = many(sym => !(Symbol.is_blank(sym)))
- val junk1 = many1(sym => !(Symbol.is_blank(sym)))
+ val bad_delimiter =
+ (quoted_prefix("\"") | (quoted_prefix("`") | (verbatim_prefix | comment_prefix))) ^^
+ (x => Token(Token.Kind.UNPARSED, x))
- val bad_delimiter =
- ("\"" | "`" | "{*" | "(*") ~ junk ^^ { case x ~ y => Token(Token.Kind.UNPARSED, x + y) }
- val bad = junk1 ^^ (x => Token(Token.Kind.UNPARSED, x))
-
- val command_keyword =
- keyword ^^ (x => Token(if (is_command(x)) Token.Kind.COMMAND else Token.Kind.KEYWORD, x))
+ val bad = one(_ => true) ^^ (x => Token(Token.Kind.UNPARSED, x))
space | (bad_delimiter |
(((ident | (var_ | (type_ident | (type_var | (float | (nat_ | sym_ident)))))) |||
--- a/src/Pure/General/symbol_pos.ML Thu Aug 09 14:09:36 2012 +0200
+++ b/src/Pure/General/symbol_pos.ML Thu Aug 09 14:37:43 2012 +0200
@@ -19,6 +19,9 @@
val scan_string_q: T list -> (Position.T * (T list * Position.T)) * T list
val scan_string_qq: T list -> (Position.T * (T list * Position.T)) * T list
val scan_string_bq: T list -> (Position.T * (T list * Position.T)) * T list
+ val recover_string_q: T list -> T list * T list
+ val recover_string_qq: T list -> T list * T list
+ val recover_string_bq: T list -> T list * T list
val quote_string_q: string -> string
val quote_string_qq: string -> string
val quote_string_bq: string -> string
@@ -26,6 +29,7 @@
T list -> T list * T list
val scan_comment_body: (string -> (T list -> T list * T list) -> T list -> T list * T list) ->
T list -> T list * T list
+ val recover_comment: T list -> T list * T list
val source: Position.T -> (Symbol.symbol, 'a) Source.source ->
(T, Position.T * (Symbol.symbol, 'a) Source.source) Source.source
type text = string
@@ -98,12 +102,19 @@
(scan_pos --| $$$ q) -- !!! (fn () => "missing quote at end of string")
(change_prompt ((Scan.repeat (scan_str q) >> flat) -- ($$$ q |-- scan_pos)));
+fun recover_strs q =
+ $$$ q @@@ (Scan.repeat (Scan.permissive (scan_str q)) >> flat);
+
in
val scan_string_q = scan_strs "'";
val scan_string_qq = scan_strs "\"";
val scan_string_bq = scan_strs "`";
+val recover_string_q = recover_strs "'";
+val recover_string_qq = recover_strs "\"";
+val recover_string_bq = recover_strs "`";
+
end;
@@ -140,7 +151,9 @@
Scan.lift ($$$ "*" --| Scan.ahead (~$$$ ")")) ||
Scan.lift (Scan.one (fn (s, _) => s <> "*" andalso Symbol.is_regular s)) >> single;
-val scan_body = change_prompt (Scan.pass 0 (Scan.repeat scan_cmt >> flat));
+val scan_cmts = Scan.pass 0 (Scan.repeat scan_cmt >> flat);
+
+val scan_body = change_prompt scan_cmts;
in
@@ -150,6 +163,9 @@
fun scan_comment_body cut =
$$$ "(" |-- $$$ "*" |-- cut "missing end of comment" (scan_body --| $$$ "*" --| $$$ ")");
+val recover_comment =
+ $$$ "(" @@@ $$$ "*" @@@ scan_cmts;
+
end;
--- a/src/Pure/Isar/token.ML Thu Aug 09 14:09:36 2012 +0200
+++ b/src/Pure/Isar/token.ML Thu Aug 09 14:37:43 2012 +0200
@@ -303,6 +303,9 @@
(Symbol_Pos.change_prompt
((Scan.repeat scan_verb >> flat) -- ($$$ "*" |-- $$$ "}" |-- Symbol_Pos.scan_pos)));
+val recover_verbatim =
+ $$$ "{" @@@ $$$ "*" @@@ (Scan.repeat scan_verb >> flat);
+
(* scan space *)
@@ -355,7 +358,11 @@
scan_symid >> pair SymIdent) >> uncurry token));
fun recover msg =
- Scan.many ((Symbol.is_regular andf (not o Symbol.is_blank)) o Symbol_Pos.symbol)
+ (Symbol_Pos.recover_string_qq ||
+ Symbol_Pos.recover_string_bq ||
+ recover_verbatim ||
+ Symbol_Pos.recover_comment ||
+ Scan.one (Symbol.is_regular o Symbol_Pos.symbol) >> single)
>> (single o token (Error msg));
in
--- a/src/Pure/ML/ml_lex.ML Thu Aug 09 14:09:36 2012 +0200
+++ b/src/Pure/ML/ml_lex.ML Thu Aug 09 14:37:43 2012 +0200
@@ -235,10 +235,16 @@
val scan_char =
$$$ "#" @@@ $$$ "\"" @@@ scan_gaps @@@ scan_str @@@ scan_gaps @@@ $$$ "\"";
+val recover_char =
+ $$$ "#" @@@ $$$ "\"" @@@ scan_gaps @@@ Scan.optional (Scan.permissive scan_str @@@ scan_gaps) [];
+
val scan_string =
$$$ "\"" @@@ !!! "missing quote at end of string"
((Scan.repeat (scan_gap || scan_str) >> flat) @@@ $$$ "\"");
+val recover_string =
+ $$$ "\"" @@@ (Scan.repeat (scan_gap || Scan.permissive scan_str) >> flat);
+
end;
@@ -265,8 +271,11 @@
val scan_antiq = Antiquote.scan || scan_ml >> Antiquote.Text;
fun recover msg =
- Scan.many (((not o Symbol.is_blank) andf Symbol.is_regular) o Symbol_Pos.symbol)
- >> (fn cs => [token (Error msg) cs]);
+ (recover_char ||
+ recover_string ||
+ Symbol_Pos.recover_comment ||
+ Scan.one (Symbol.not_eof o Symbol_Pos.symbol) >> single)
+ >> (single o token (Error msg));
in