--- a/src/Pure/General/symbol.scala Mon Jan 11 20:50:03 2010 +0100
+++ b/src/Pure/General/symbol.scala Mon Jan 11 21:37:48 2010 +0100
@@ -23,6 +23,8 @@
\^? [A-Za-z][A-Za-z0-9_']* |
\^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >""")
+ // FIXME cover bad surrogates!?
+ // FIXME check wrt. ML version
private val bad_symbol = new Regex("(?xs) (?!" + symbol + ")" +
""" \\ < (?: (?! \s | [\"`\\] | \(\* | \*\) | \{\* | \*\} ) . )*""")
@@ -32,14 +34,10 @@
/* basic matching */
- def is_closed(c: Char): Boolean =
- !(c == '\\' || Character.isHighSurrogate(c))
+ def is_plain(c: Char): Boolean = !(c == '\\' || '\ud800' <= c && c <= '\udfff')
- def is_closed(s: CharSequence): Boolean =
- {
- if (s.length == 1) is_closed(s.charAt(0))
- else !bad_symbol.pattern.matcher(s).matches
- }
+ def is_wellformed(s: CharSequence): Boolean =
+ s.length == 1 && is_plain(s.charAt(0)) || !bad_symbol.pattern.matcher(s).matches
class Matcher(text: CharSequence)
{
@@ -47,7 +45,7 @@
def apply(start: Int, end: Int): Int =
{
require(0 <= start && start < end && end <= text.length)
- if (is_closed(text.charAt(start))) 1
+ if (is_plain(text.charAt(start))) 1
else {
matcher.region(start, end).lookingAt
matcher.group.length
@@ -177,7 +175,7 @@
}
}
decl.split("\\s+").toList match {
- case sym :: props if sym.length > 1 && is_closed(sym) => (sym, read_props(props))
+ case sym :: props if sym.length > 1 && is_wellformed(sym) => (sym, read_props(props))
case _ => err()
}
}