refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
authorwenzelm
Thu, 09 Aug 2012 14:37:43 +0200
changeset 48743 a72f8ffecf31
parent 48742 28d59ce5ebfd
child 48744 4b4ece802cb3
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
src/Pure/General/scan.ML
src/Pure/General/scan.scala
src/Pure/General/symbol_pos.ML
src/Pure/Isar/token.ML
src/Pure/ML/ml_lex.ML
--- a/src/Pure/General/scan.ML	Thu Aug 09 14:09:36 2012 +0200
+++ b/src/Pure/General/scan.ML	Thu Aug 09 14:37:43 2012 +0200
@@ -39,6 +39,7 @@
 sig
   include BASIC_SCAN
   val prompt: string -> ('a -> 'b) -> 'a -> 'b
+  val permissive: ('a -> 'b) -> 'a -> 'b
   val error: ('a -> 'b) -> 'a -> 'b
   val catch: ('a -> 'b) -> 'a -> 'b    (*exception Fail*)
   val fail: 'a -> 'b
--- a/src/Pure/General/scan.scala	Thu Aug 09 14:09:36 2012 +0200
+++ b/src/Pure/General/scan.scala	Thu Aug 09 14:37:43 2012 +0200
@@ -235,6 +235,9 @@
       }
     }.named("quoted_context")
 
+    def quoted_prefix(quote: Symbol.Symbol): Parser[String] =
+      quoted_context(quote, Finished) ^^ (_._1)
+
 
     /* verbatim text */
 
@@ -267,6 +270,9 @@
       }
     }.named("verbatim_context")
 
+    val verbatim_prefix: Parser[String] =
+      verbatim_context(Finished) ^^ (_._1)
+
 
     /* nested comments */
 
@@ -303,6 +309,11 @@
     def comment: Parser[String] =
       comment_depth(0) ^? { case (x, d) if d == 0 => x }
 
+    def comment_content(source: String): String =
+    {
+      require(parseAll(comment, source).successful)
+      source.substring(2, source.length - 2)
+    }
     def comment_context(ctxt: Context): Parser[(String, Context)] =
     {
       val depth =
@@ -318,11 +329,8 @@
       else failure("")
     }
 
-    def comment_content(source: String): String =
-    {
-      require(parseAll(comment, source).successful)
-      source.substring(2, source.length - 2)
-    }
+    val comment_prefix: Parser[String] =
+      comment_context(Finished) ^^ (_._1)
 
 
     /* outer syntax tokens */
@@ -360,18 +368,16 @@
         (many1(Symbol.is_symbolic_char) | one(sym => Symbol.is_symbolic(sym))) ^^
         (x => Token(Token.Kind.SYM_IDENT, x))
 
+      val command_keyword =
+        keyword ^^ (x => Token(if (is_command(x)) Token.Kind.COMMAND else Token.Kind.KEYWORD, x))
+
       val space = many1(Symbol.is_blank) ^^ (x => Token(Token.Kind.SPACE, x))
 
-      // FIXME check
-      val junk = many(sym => !(Symbol.is_blank(sym)))
-      val junk1 = many1(sym => !(Symbol.is_blank(sym)))
+      val bad_delimiter =
+        (quoted_prefix("\"") | (quoted_prefix("`") | (verbatim_prefix | comment_prefix))) ^^
+          (x => Token(Token.Kind.UNPARSED, x))
 
-      val bad_delimiter =
-        ("\"" | "`" | "{*" | "(*") ~ junk ^^ { case x ~ y => Token(Token.Kind.UNPARSED, x + y) }
-      val bad = junk1 ^^ (x => Token(Token.Kind.UNPARSED, x))
-
-      val command_keyword =
-        keyword ^^ (x => Token(if (is_command(x)) Token.Kind.COMMAND else Token.Kind.KEYWORD, x))
+      val bad = one(_ => true) ^^ (x => Token(Token.Kind.UNPARSED, x))
 
       space | (bad_delimiter |
         (((ident | (var_ | (type_ident | (type_var | (float | (nat_ | sym_ident)))))) |||
--- a/src/Pure/General/symbol_pos.ML	Thu Aug 09 14:09:36 2012 +0200
+++ b/src/Pure/General/symbol_pos.ML	Thu Aug 09 14:37:43 2012 +0200
@@ -19,6 +19,9 @@
   val scan_string_q: T list -> (Position.T * (T list * Position.T)) * T list
   val scan_string_qq: T list -> (Position.T * (T list * Position.T)) * T list
   val scan_string_bq: T list -> (Position.T * (T list * Position.T)) * T list
+  val recover_string_q: T list -> T list * T list
+  val recover_string_qq: T list -> T list * T list
+  val recover_string_bq: T list -> T list * T list
   val quote_string_q: string -> string
   val quote_string_qq: string -> string
   val quote_string_bq: string -> string
@@ -26,6 +29,7 @@
     T list -> T list * T list
   val scan_comment_body: (string -> (T list -> T list * T list) -> T list -> T list * T list) ->
     T list -> T list * T list
+  val recover_comment: T list -> T list * T list
   val source: Position.T -> (Symbol.symbol, 'a) Source.source ->
     (T, Position.T * (Symbol.symbol, 'a) Source.source) Source.source
   type text = string
@@ -98,12 +102,19 @@
   (scan_pos --| $$$ q) -- !!! (fn () => "missing quote at end of string")
     (change_prompt ((Scan.repeat (scan_str q) >> flat) -- ($$$ q |-- scan_pos)));
 
+fun recover_strs q =
+  $$$ q @@@ (Scan.repeat (Scan.permissive (scan_str q)) >> flat);
+
 in
 
 val scan_string_q = scan_strs "'";
 val scan_string_qq = scan_strs "\"";
 val scan_string_bq = scan_strs "`";
 
+val recover_string_q = recover_strs "'";
+val recover_string_qq = recover_strs "\"";
+val recover_string_bq = recover_strs "`";
+
 end;
 
 
@@ -140,7 +151,9 @@
   Scan.lift ($$$ "*" --| Scan.ahead (~$$$ ")")) ||
   Scan.lift (Scan.one (fn (s, _) => s <> "*" andalso Symbol.is_regular s)) >> single;
 
-val scan_body = change_prompt (Scan.pass 0 (Scan.repeat scan_cmt >> flat));
+val scan_cmts = Scan.pass 0 (Scan.repeat scan_cmt >> flat);
+
+val scan_body = change_prompt scan_cmts;
 
 in
 
@@ -150,6 +163,9 @@
 fun scan_comment_body cut =
   $$$ "(" |-- $$$ "*" |-- cut "missing end of comment" (scan_body --| $$$ "*" --| $$$ ")");
 
+val recover_comment =
+  $$$ "(" @@@ $$$ "*" @@@ scan_cmts;
+
 end;
 
 
--- a/src/Pure/Isar/token.ML	Thu Aug 09 14:09:36 2012 +0200
+++ b/src/Pure/Isar/token.ML	Thu Aug 09 14:37:43 2012 +0200
@@ -303,6 +303,9 @@
     (Symbol_Pos.change_prompt
       ((Scan.repeat scan_verb >> flat) -- ($$$ "*" |-- $$$ "}" |-- Symbol_Pos.scan_pos)));
 
+val recover_verbatim =
+  $$$ "{" @@@ $$$ "*" @@@ (Scan.repeat scan_verb >> flat);
+
 
 (* scan space *)
 
@@ -355,7 +358,11 @@
         scan_symid >> pair SymIdent) >> uncurry token));
 
 fun recover msg =
-  Scan.many ((Symbol.is_regular andf (not o Symbol.is_blank)) o Symbol_Pos.symbol)
+  (Symbol_Pos.recover_string_qq ||
+    Symbol_Pos.recover_string_bq ||
+    recover_verbatim ||
+    Symbol_Pos.recover_comment ||
+    Scan.one (Symbol.is_regular o Symbol_Pos.symbol) >> single)
   >> (single o token (Error msg));
 
 in
--- a/src/Pure/ML/ml_lex.ML	Thu Aug 09 14:09:36 2012 +0200
+++ b/src/Pure/ML/ml_lex.ML	Thu Aug 09 14:37:43 2012 +0200
@@ -235,10 +235,16 @@
 val scan_char =
   $$$ "#" @@@ $$$ "\"" @@@ scan_gaps @@@ scan_str @@@ scan_gaps @@@ $$$ "\"";
 
+val recover_char =
+  $$$ "#" @@@ $$$ "\"" @@@ scan_gaps @@@ Scan.optional (Scan.permissive scan_str @@@ scan_gaps) [];
+
 val scan_string =
   $$$ "\"" @@@ !!! "missing quote at end of string"
     ((Scan.repeat (scan_gap || scan_str) >> flat) @@@ $$$ "\"");
 
+val recover_string =
+  $$$ "\"" @@@ (Scan.repeat (scan_gap || Scan.permissive scan_str) >> flat);
+
 end;
 
 
@@ -265,8 +271,11 @@
 val scan_antiq = Antiquote.scan || scan_ml >> Antiquote.Text;
 
 fun recover msg =
-  Scan.many (((not o Symbol.is_blank) andf Symbol.is_regular) o Symbol_Pos.symbol)
-  >> (fn cs => [token (Error msg) cs]);
+ (recover_char ||
+  recover_string ||
+  Symbol_Pos.recover_comment ||
+  Scan.one (Symbol.not_eof o Symbol_Pos.symbol) >> single)
+  >> (single o token (Error msg));
 
 in