clarified Symbol.is_plain/is_wellformed -- is_closed was rejecting plain backslashes;
authorwenzelm
Mon, 11 Jan 2010 21:37:48 +0100
changeset 34316 f879b649ac4c
parent 34315 c47a2228fead
child 34317 c1509b9d624f
clarified Symbol.is_plain/is_wellformed -- is_closed was rejecting plain backslashes;
src/Pure/General/scan.scala
src/Pure/General/symbol.scala
--- a/src/Pure/General/scan.scala	Mon Jan 11 20:50:03 2010 +0100
+++ b/src/Pure/General/scan.scala	Mon Jan 11 21:37:48 2010 +0100
@@ -179,7 +179,7 @@
     private def quoted(quote: String): Parser[String] =
     {
       quote ~
-        rep(many1(sym => sym != quote && sym != "\\" && Symbol.is_closed(sym)) |
+        rep(many1(sym => sym != quote && sym != "\\" && Symbol.is_wellformed(sym)) |
           "\\" + quote | "\\\\" |
           (("""\\\d\d\d""".r) ^? { case x if x.substring(1, 4).toInt <= 255 => x })) ~
       quote ^^ { case x ~ ys ~ z => x + ys.mkString + z }
@@ -191,7 +191,7 @@
       val body = source.substring(1, source.length - 1)
       if (body.exists(_ == '\\')) {
         val content =
-          rep(many1(sym => sym != quote && sym != "\\" && Symbol.is_closed(sym)) |
+          rep(many1(sym => sym != quote && sym != "\\" && Symbol.is_wellformed(sym)) |
               "\\" ~> (quote | "\\" | """\d\d\d""".r ^^ { case x => x.toInt.toChar.toString }))
         parseAll(content ^^ (_.mkString), body).get
       }
@@ -203,7 +203,7 @@
 
     private def verbatim: Parser[String] =
     {
-      "{*" ~ rep(many1(sym => sym != "*" && Symbol.is_closed(sym)) | """\*(?!\})""".r) ~ "*}" ^^
+      "{*" ~ rep(many1(sym => sym != "*" && Symbol.is_wellformed(sym)) | """\*(?!\})""".r) ~ "*}" ^^
         { case x ~ ys ~ z => x + ys.mkString + z }
     }.named("verbatim")
 
@@ -219,7 +219,7 @@
     def comment: Parser[String] = new Parser[String]
     {
       val comment_text =
-        rep(many1(sym => sym != "*" && sym != "(" && Symbol.is_closed(sym)) |
+        rep(many1(sym => sym != "*" && sym != "(" && Symbol.is_wellformed(sym)) |
           """\*(?!\))|\((?!\*)""".r)
       val comment_open = "(*" ~ comment_text ^^^ ()
       val comment_close = comment_text ~ "*)" ^^^ ()
@@ -276,7 +276,7 @@
 
       val sym_ident =
         (many1(symbols.is_symbolic_char) |
-          one(sym => symbols.is_symbolic(sym) & Symbol.is_closed(sym))) ^^
+          one(sym => symbols.is_symbolic(sym) & Symbol.is_wellformed(sym))) ^^
         (x => Token(Token_Kind.SYM_IDENT, x))
 
       val space = many1(symbols.is_blank) ^^ (x => Token(Token_Kind.SPACE, x))
--- a/src/Pure/General/symbol.scala	Mon Jan 11 20:50:03 2010 +0100
+++ b/src/Pure/General/symbol.scala	Mon Jan 11 21:37:48 2010 +0100
@@ -23,6 +23,8 @@
       \^? [A-Za-z][A-Za-z0-9_']* |
       \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >""")
 
+  // FIXME cover bad surrogates!?
+  // FIXME check wrt. ML version
   private val bad_symbol = new Regex("(?xs) (?!" + symbol + ")" +
     """ \\ < (?: (?! \s | [\"`\\] | \(\* | \*\) | \{\* | \*\} ) . )*""")
 
@@ -32,14 +34,10 @@
 
   /* basic matching */
 
-  def is_closed(c: Char): Boolean =
-    !(c == '\\' || Character.isHighSurrogate(c))
+  def is_plain(c: Char): Boolean = !(c == '\\' || '\ud800' <= c && c <= '\udfff')
 
-  def is_closed(s: CharSequence): Boolean =
-  {
-    if (s.length == 1) is_closed(s.charAt(0))
-    else !bad_symbol.pattern.matcher(s).matches
-  }
+  def is_wellformed(s: CharSequence): Boolean =
+    s.length == 1 && is_plain(s.charAt(0)) || !bad_symbol.pattern.matcher(s).matches
 
   class Matcher(text: CharSequence)
   {
@@ -47,7 +45,7 @@
     def apply(start: Int, end: Int): Int =
     {
       require(0 <= start && start < end && end <= text.length)
-      if (is_closed(text.charAt(start))) 1
+      if (is_plain(text.charAt(start))) 1
       else {
         matcher.region(start, end).lookingAt
         matcher.group.length
@@ -177,7 +175,7 @@
         }
       }
       decl.split("\\s+").toList match {
-        case sym :: props if sym.length > 1 && is_closed(sym) => (sym, read_props(props))
+        case sym :: props if sym.length > 1 && is_wellformed(sym) => (sym, read_props(props))
         case _ => err()
       }
     }