--- a/src/Pure/General/symbol.ML Sat Aug 11 19:34:36 2012 +0200
+++ b/src/Pure/General/symbol.ML Sat Aug 11 20:54:06 2012 +0200
@@ -157,8 +157,9 @@
(* encode_raw *)
fun raw_chr c =
- ord space <= ord c andalso ord c <= ord "~" andalso c <> "." andalso c <> ">"
- orelse ord c >= 128;
+ is_char c andalso
+ (ord space <= ord c andalso ord c <= ord "~" andalso c <> "." andalso c <> ">"
+ orelse ord c >= 128);
fun encode_raw "" = ""
| encode_raw str =
@@ -434,7 +435,8 @@
Scan.one is_plain ||
Scan.one is_utf8 ::: Scan.many is_utf8_trailer >> implode_pseudo_utf8 ||
scan_encoded_newline ||
- ($$ "\\" ^^ $$ "<" ^^ (($$ "^" ^^ (scan_raw || scan_id) || scan_id) ^^ $$ ">")) ||
+ ($$ "\\" ^^ $$ "<" ^^
+ (($$ "^" ^^ (scan_raw || scan_id) || scan_id) ^^ Scan.optional ($$ ">") "")) ||
Scan.this_string "\\<^" ||
Scan.this_string "\\<" ||
Scan.one not_eof;
--- a/src/Pure/General/symbol.scala Sat Aug 11 19:34:36 2012 +0200
+++ b/src/Pure/General/symbol.scala Sat Aug 11 20:54:06 2012 +0200
@@ -37,26 +37,31 @@
private val symbol = new Regex("""(?xs)
\\ < (?:
- \^? [A-Za-z][A-Za-z0-9_']* |
- \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >""")
+ \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* |
+ \^? ([A-Za-z][A-Za-z0-9_']*)? ) >?""")
- private val malformed_symbol = new Regex("(?xs) (?!" + symbol + ")" +
- """ [\ud800-\udbff\ufffd] | \\<\^? """)
-
- val regex_total =
- new Regex(plain + "|" + physical_newline + "|" + symbol + "|" + malformed_symbol + "| .")
+ val regex_total = new Regex(plain + "|" + physical_newline + "|" + symbol + "| .")
/* basic matching */
- def is_plain(c: Char): Boolean = !(c == '\r' || c == '\\' || '\ud800' <= c && c <= '\udfff')
+ def is_plain(c: Char): Boolean = !(c == '\r' || c == '\\' || Character.isHighSurrogate(c))
+
+ def is_malformed(s: Symbol): Boolean =
+ s.length match {
+ case 1 =>
+ val c = s(0)
+ Character.isHighSurrogate(c) || Character.isLowSurrogate(c) || c == '\ufffd'
+ case 2 =>
+ val c1 = s(0)
+ val c2 = s(1)
+ !(c1 == '\r' && c2 == '\n' || Character.isSurrogatePair(c1, c2))
+ case _ => !s.endsWith(">")
+ }
def is_physical_newline(s: Symbol): Boolean =
s == "\n" || s == "\r" || s == "\r\n"
- def is_malformed(s: Symbol): Boolean =
- !(s.length == 1 && is_plain(s(0))) && malformed_symbol.pattern.matcher(s).matches
-
class Matcher(text: CharSequence)
{
private val matcher = regex_total.pattern.matcher(text)
--- a/src/Pure/Thy/thy_syntax.ML Sat Aug 11 19:34:36 2012 +0200
+++ b/src/Pure/Thy/thy_syntax.ML Sat Aug 11 20:54:06 2012 +0200
@@ -77,7 +77,7 @@
Symbol_Pos.explode (Token.source_position_of tok)
|> map_filter (fn (sym, pos) =>
if Symbol.is_malformed sym
- then SOME ((pos, Isabelle_Markup.bad), "Malformed symbol") else NONE);
+ then SOME ((pos, Isabelle_Markup.bad), "Malformed symbolic character") else NONE);
val is_malformed = Token.is_error tok orelse not (null malformed_symbols);
val (markup, txt) = token_markup tok;
val reports = ((Token.position_of tok, markup), txt) :: malformed_symbols;