refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
authorwenzelm
Thu Aug 09 14:37:43 2012 +0200 (2012-08-09)
changeset 48743a72f8ffecf31
parent 48742 28d59ce5ebfd
child 48744 4b4ece802cb3
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
src/Pure/General/scan.ML
src/Pure/General/scan.scala
src/Pure/General/symbol_pos.ML
src/Pure/Isar/token.ML
src/Pure/ML/ml_lex.ML
     1.1 --- a/src/Pure/General/scan.ML	Thu Aug 09 14:09:36 2012 +0200
     1.2 +++ b/src/Pure/General/scan.ML	Thu Aug 09 14:37:43 2012 +0200
     1.3 @@ -39,6 +39,7 @@
     1.4  sig
     1.5    include BASIC_SCAN
     1.6    val prompt: string -> ('a -> 'b) -> 'a -> 'b
     1.7 +  val permissive: ('a -> 'b) -> 'a -> 'b
     1.8    val error: ('a -> 'b) -> 'a -> 'b
     1.9    val catch: ('a -> 'b) -> 'a -> 'b    (*exception Fail*)
    1.10    val fail: 'a -> 'b
     2.1 --- a/src/Pure/General/scan.scala	Thu Aug 09 14:09:36 2012 +0200
     2.2 +++ b/src/Pure/General/scan.scala	Thu Aug 09 14:37:43 2012 +0200
     2.3 @@ -235,6 +235,9 @@
     2.4        }
     2.5      }.named("quoted_context")
     2.6  
     2.7 +    def quoted_prefix(quote: Symbol.Symbol): Parser[String] =
     2.8 +      quoted_context(quote, Finished) ^^ (_._1)
     2.9 +
    2.10  
    2.11      /* verbatim text */
    2.12  
    2.13 @@ -267,6 +270,9 @@
    2.14        }
    2.15      }.named("verbatim_context")
    2.16  
    2.17 +    val verbatim_prefix: Parser[String] =
    2.18 +      verbatim_context(Finished) ^^ (_._1)
    2.19 +
    2.20  
    2.21      /* nested comments */
    2.22  
    2.23 @@ -303,6 +309,11 @@
    2.24      def comment: Parser[String] =
    2.25        comment_depth(0) ^? { case (x, d) if d == 0 => x }
    2.26  
    2.27 +    def comment_content(source: String): String =
    2.28 +    {
    2.29 +      require(parseAll(comment, source).successful)
    2.30 +      source.substring(2, source.length - 2)
    2.31 +    }
    2.32      def comment_context(ctxt: Context): Parser[(String, Context)] =
    2.33      {
    2.34        val depth =
    2.35 @@ -318,11 +329,8 @@
    2.36        else failure("")
    2.37      }
    2.38  
    2.39 -    def comment_content(source: String): String =
    2.40 -    {
    2.41 -      require(parseAll(comment, source).successful)
    2.42 -      source.substring(2, source.length - 2)
    2.43 -    }
    2.44 +    val comment_prefix: Parser[String] =
    2.45 +      comment_context(Finished) ^^ (_._1)
    2.46  
    2.47  
    2.48      /* outer syntax tokens */
    2.49 @@ -360,18 +368,16 @@
    2.50          (many1(Symbol.is_symbolic_char) | one(sym => Symbol.is_symbolic(sym))) ^^
    2.51          (x => Token(Token.Kind.SYM_IDENT, x))
    2.52  
    2.53 +      val command_keyword =
    2.54 +        keyword ^^ (x => Token(if (is_command(x)) Token.Kind.COMMAND else Token.Kind.KEYWORD, x))
    2.55 +
    2.56        val space = many1(Symbol.is_blank) ^^ (x => Token(Token.Kind.SPACE, x))
    2.57  
    2.58 -      // FIXME check
    2.59 -      val junk = many(sym => !(Symbol.is_blank(sym)))
    2.60 -      val junk1 = many1(sym => !(Symbol.is_blank(sym)))
    2.61 +      val bad_delimiter =
    2.62 +        (quoted_prefix("\"") | (quoted_prefix("`") | (verbatim_prefix | comment_prefix))) ^^
    2.63 +          (x => Token(Token.Kind.UNPARSED, x))
    2.64  
    2.65 -      val bad_delimiter =
    2.66 -        ("\"" | "`" | "{*" | "(*") ~ junk ^^ { case x ~ y => Token(Token.Kind.UNPARSED, x + y) }
    2.67 -      val bad = junk1 ^^ (x => Token(Token.Kind.UNPARSED, x))
    2.68 -
    2.69 -      val command_keyword =
    2.70 -        keyword ^^ (x => Token(if (is_command(x)) Token.Kind.COMMAND else Token.Kind.KEYWORD, x))
    2.71 +      val bad = one(_ => true) ^^ (x => Token(Token.Kind.UNPARSED, x))
    2.72  
    2.73        space | (bad_delimiter |
    2.74          (((ident | (var_ | (type_ident | (type_var | (float | (nat_ | sym_ident)))))) |||
     3.1 --- a/src/Pure/General/symbol_pos.ML	Thu Aug 09 14:09:36 2012 +0200
     3.2 +++ b/src/Pure/General/symbol_pos.ML	Thu Aug 09 14:37:43 2012 +0200
     3.3 @@ -19,6 +19,9 @@
     3.4    val scan_string_q: T list -> (Position.T * (T list * Position.T)) * T list
     3.5    val scan_string_qq: T list -> (Position.T * (T list * Position.T)) * T list
     3.6    val scan_string_bq: T list -> (Position.T * (T list * Position.T)) * T list
     3.7 +  val recover_string_q: T list -> T list * T list
     3.8 +  val recover_string_qq: T list -> T list * T list
     3.9 +  val recover_string_bq: T list -> T list * T list
    3.10    val quote_string_q: string -> string
    3.11    val quote_string_qq: string -> string
    3.12    val quote_string_bq: string -> string
    3.13 @@ -26,6 +29,7 @@
    3.14      T list -> T list * T list
    3.15    val scan_comment_body: (string -> (T list -> T list * T list) -> T list -> T list * T list) ->
    3.16      T list -> T list * T list
    3.17 +  val recover_comment: T list -> T list * T list
    3.18    val source: Position.T -> (Symbol.symbol, 'a) Source.source ->
    3.19      (T, Position.T * (Symbol.symbol, 'a) Source.source) Source.source
    3.20    type text = string
    3.21 @@ -98,12 +102,19 @@
    3.22    (scan_pos --| $$$ q) -- !!! (fn () => "missing quote at end of string")
    3.23      (change_prompt ((Scan.repeat (scan_str q) >> flat) -- ($$$ q |-- scan_pos)));
    3.24  
    3.25 +fun recover_strs q =
    3.26 +  $$$ q @@@ (Scan.repeat (Scan.permissive (scan_str q)) >> flat);
    3.27 +
    3.28  in
    3.29  
    3.30  val scan_string_q = scan_strs "'";
    3.31  val scan_string_qq = scan_strs "\"";
    3.32  val scan_string_bq = scan_strs "`";
    3.33  
    3.34 +val recover_string_q = recover_strs "'";
    3.35 +val recover_string_qq = recover_strs "\"";
    3.36 +val recover_string_bq = recover_strs "`";
    3.37 +
    3.38  end;
    3.39  
    3.40  
    3.41 @@ -140,7 +151,9 @@
    3.42    Scan.lift ($$$ "*" --| Scan.ahead (~$$$ ")")) ||
    3.43    Scan.lift (Scan.one (fn (s, _) => s <> "*" andalso Symbol.is_regular s)) >> single;
    3.44  
    3.45 -val scan_body = change_prompt (Scan.pass 0 (Scan.repeat scan_cmt >> flat));
    3.46 +val scan_cmts = Scan.pass 0 (Scan.repeat scan_cmt >> flat);
    3.47 +
    3.48 +val scan_body = change_prompt scan_cmts;
    3.49  
    3.50  in
    3.51  
    3.52 @@ -150,6 +163,9 @@
    3.53  fun scan_comment_body cut =
    3.54    $$$ "(" |-- $$$ "*" |-- cut "missing end of comment" (scan_body --| $$$ "*" --| $$$ ")");
    3.55  
    3.56 +val recover_comment =
    3.57 +  $$$ "(" @@@ $$$ "*" @@@ scan_cmts;
    3.58 +
    3.59  end;
    3.60  
    3.61  
     4.1 --- a/src/Pure/Isar/token.ML	Thu Aug 09 14:09:36 2012 +0200
     4.2 +++ b/src/Pure/Isar/token.ML	Thu Aug 09 14:37:43 2012 +0200
     4.3 @@ -303,6 +303,9 @@
     4.4      (Symbol_Pos.change_prompt
     4.5        ((Scan.repeat scan_verb >> flat) -- ($$$ "*" |-- $$$ "}" |-- Symbol_Pos.scan_pos)));
     4.6  
     4.7 +val recover_verbatim =
     4.8 +  $$$ "{" @@@ $$$ "*" @@@ (Scan.repeat scan_verb >> flat);
     4.9 +
    4.10  
    4.11  (* scan space *)
    4.12  
    4.13 @@ -355,7 +358,11 @@
    4.14          scan_symid >> pair SymIdent) >> uncurry token));
    4.15  
    4.16  fun recover msg =
    4.17 -  Scan.many ((Symbol.is_regular andf (not o Symbol.is_blank)) o Symbol_Pos.symbol)
    4.18 +  (Symbol_Pos.recover_string_qq ||
    4.19 +    Symbol_Pos.recover_string_bq ||
    4.20 +    recover_verbatim ||
    4.21 +    Symbol_Pos.recover_comment ||
    4.22 +    Scan.one (Symbol.is_regular o Symbol_Pos.symbol) >> single)
    4.23    >> (single o token (Error msg));
    4.24  
    4.25  in
     5.1 --- a/src/Pure/ML/ml_lex.ML	Thu Aug 09 14:09:36 2012 +0200
     5.2 +++ b/src/Pure/ML/ml_lex.ML	Thu Aug 09 14:37:43 2012 +0200
     5.3 @@ -235,10 +235,16 @@
     5.4  val scan_char =
     5.5    $$$ "#" @@@ $$$ "\"" @@@ scan_gaps @@@ scan_str @@@ scan_gaps @@@ $$$ "\"";
     5.6  
     5.7 +val recover_char =
     5.8 +  $$$ "#" @@@ $$$ "\"" @@@ scan_gaps @@@ Scan.optional (Scan.permissive scan_str @@@ scan_gaps) [];
     5.9 +
    5.10  val scan_string =
    5.11    $$$ "\"" @@@ !!! "missing quote at end of string"
    5.12      ((Scan.repeat (scan_gap || scan_str) >> flat) @@@ $$$ "\"");
    5.13  
    5.14 +val recover_string =
    5.15 +  $$$ "\"" @@@ (Scan.repeat (scan_gap || Scan.permissive scan_str) >> flat);
    5.16 +
    5.17  end;
    5.18  
    5.19  
    5.20 @@ -265,8 +271,11 @@
    5.21  val scan_antiq = Antiquote.scan || scan_ml >> Antiquote.Text;
    5.22  
    5.23  fun recover msg =
    5.24 -  Scan.many (((not o Symbol.is_blank) andf Symbol.is_regular) o Symbol_Pos.symbol)
    5.25 -  >> (fn cs => [token (Error msg) cs]);
    5.26 + (recover_char ||
    5.27 +  recover_string ||
    5.28 +  Symbol_Pos.recover_comment ||
    5.29 +  Scan.one (Symbol.not_eof o Symbol_Pos.symbol) >> single)
    5.30 +  >> (single o token (Error msg));
    5.31  
    5.32  in
    5.33