clarified Symbol.is_plain/is_wellformed -- is_closed was rejecting plain backslashes;
--- a/src/Pure/General/scan.scala Mon Jan 11 20:50:03 2010 +0100
+++ b/src/Pure/General/scan.scala Mon Jan 11 21:37:48 2010 +0100
@@ -179,7 +179,7 @@
private def quoted(quote: String): Parser[String] =
{
quote ~
- rep(many1(sym => sym != quote && sym != "\\" && Symbol.is_closed(sym)) |
+ rep(many1(sym => sym != quote && sym != "\\" && Symbol.is_wellformed(sym)) |
"\\" + quote | "\\\\" |
(("""\\\d\d\d""".r) ^? { case x if x.substring(1, 4).toInt <= 255 => x })) ~
quote ^^ { case x ~ ys ~ z => x + ys.mkString + z }
@@ -191,7 +191,7 @@
val body = source.substring(1, source.length - 1)
if (body.exists(_ == '\\')) {
val content =
- rep(many1(sym => sym != quote && sym != "\\" && Symbol.is_closed(sym)) |
+ rep(many1(sym => sym != quote && sym != "\\" && Symbol.is_wellformed(sym)) |
"\\" ~> (quote | "\\" | """\d\d\d""".r ^^ { case x => x.toInt.toChar.toString }))
parseAll(content ^^ (_.mkString), body).get
}
@@ -203,7 +203,7 @@
private def verbatim: Parser[String] =
{
- "{*" ~ rep(many1(sym => sym != "*" && Symbol.is_closed(sym)) | """\*(?!\})""".r) ~ "*}" ^^
+ "{*" ~ rep(many1(sym => sym != "*" && Symbol.is_wellformed(sym)) | """\*(?!\})""".r) ~ "*}" ^^
{ case x ~ ys ~ z => x + ys.mkString + z }
}.named("verbatim")
@@ -219,7 +219,7 @@
def comment: Parser[String] = new Parser[String]
{
val comment_text =
- rep(many1(sym => sym != "*" && sym != "(" && Symbol.is_closed(sym)) |
+ rep(many1(sym => sym != "*" && sym != "(" && Symbol.is_wellformed(sym)) |
"""\*(?!\))|\((?!\*)""".r)
val comment_open = "(*" ~ comment_text ^^^ ()
val comment_close = comment_text ~ "*)" ^^^ ()
@@ -276,7 +276,7 @@
val sym_ident =
(many1(symbols.is_symbolic_char) |
- one(sym => symbols.is_symbolic(sym) & Symbol.is_closed(sym))) ^^
+ one(sym => symbols.is_symbolic(sym) & Symbol.is_wellformed(sym))) ^^
(x => Token(Token_Kind.SYM_IDENT, x))
val space = many1(symbols.is_blank) ^^ (x => Token(Token_Kind.SPACE, x))
--- a/src/Pure/General/symbol.scala Mon Jan 11 20:50:03 2010 +0100
+++ b/src/Pure/General/symbol.scala Mon Jan 11 21:37:48 2010 +0100
@@ -23,6 +23,8 @@
\^? [A-Za-z][A-Za-z0-9_']* |
\^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >""")
+ // FIXME cover bad surrogates!?
+ // FIXME check wrt. ML version
private val bad_symbol = new Regex("(?xs) (?!" + symbol + ")" +
""" \\ < (?: (?! \s | [\"`\\] | \(\* | \*\) | \{\* | \*\} ) . )*""")
@@ -32,14 +34,10 @@
/* basic matching */
- def is_closed(c: Char): Boolean =
- !(c == '\\' || Character.isHighSurrogate(c))
+ def is_plain(c: Char): Boolean = !(c == '\\' || '\ud800' <= c && c <= '\udfff')
- def is_closed(s: CharSequence): Boolean =
- {
- if (s.length == 1) is_closed(s.charAt(0))
- else !bad_symbol.pattern.matcher(s).matches
- }
+ def is_wellformed(s: CharSequence): Boolean =
+ s.length == 1 && is_plain(s.charAt(0)) || !bad_symbol.pattern.matcher(s).matches
class Matcher(text: CharSequence)
{
@@ -47,7 +45,7 @@
def apply(start: Int, end: Int): Int =
{
require(0 <= start && start < end && end <= text.length)
- if (is_closed(text.charAt(start))) 1
+ if (is_plain(text.charAt(start))) 1
else {
matcher.region(start, end).lookingAt
matcher.group.length
@@ -177,7 +175,7 @@
}
}
decl.split("\\s+").toList match {
- case sym :: props if sym.length > 1 && is_closed(sym) => (sym, read_props(props))
+ case sym :: props if sym.length > 1 && is_wellformed(sym) => (sym, read_props(props))
case _ => err()
}
}