# HG changeset patch # User wenzelm # Date 1260132983 -3600 # Node ID fc56cfc6906e800543776b86e3f44b5e151daf67 # Parent 9b95b0025ea5faeb7fb85abdaada1ee4e9ff2c7a added elements: Interator; first isHighSurrogate, not isLowSurrogate; misc tuning and generalization; diff -r 9b95b0025ea5 -r fc56cfc6906e src/Pure/General/symbol.scala --- a/src/Pure/General/symbol.scala Sat Dec 05 19:08:56 2009 +0100 +++ b/src/Pure/General/symbol.scala Sun Dec 06 21:56:23 2009 +0100 @@ -13,8 +13,7 @@ object Symbol { - - /** Symbol regexps **/ + /* Symbol regexps */ private val plain = new Regex("""(?xs) [^\\ \ud800-\udfff] | [\ud800-\udbff][\udc00-\udfff] """) @@ -31,35 +30,57 @@ val regex = new Regex(plain + "|" + symbol + "|" + bad_symbol + "| .") // prefix of another symbol - def is_open(s: String): Boolean = + def is_open(s: CharSequence): Boolean = { val len = s.length - len == 1 && Character.isLowSurrogate(s(0)) || + len == 1 && Character.isHighSurrogate(s.charAt(0)) || s == "\\" || s == "\\<" || - len > 2 && s(len - 1) != '>' + len > 2 && s.charAt(len - 1) != '>' } - /** Decoding offsets **/ + /* elements */ + + private def could_open(c: Char): Boolean = + c == '\\' || Character.isHighSurrogate(c) - class Index(text: String) + def elements(text: CharSequence) = new Iterator[CharSequence] { + private val matcher = regex.pattern.matcher(text) + private var i = 0 + def hasNext = i < text.length + def next = { + val len = + if (could_open(text.charAt(i))) { + matcher.region(i, text.length).lookingAt + matcher.group.length + } + else 1 + val s = text.subSequence(i, i + len) + i += len + s + } + } + + + /* decoding offsets */ + + class Index(text: CharSequence) { case class Entry(chr: Int, sym: Int) val index: Array[Entry] = { - val length = text.length val matcher = regex.pattern.matcher(text) val buf = new mutable.ArrayBuffer[Entry] var chr = 0 var sym = 0 - while (chr < length) { - val c = text(chr) + while (chr < text.length) { val len = - if (c == '\\' || Character.isHighSurrogate(c)) { - matcher.region(chr, length).lookingAt + if (could_open(text.charAt(chr))) { + matcher.region(chr, text.length).lookingAt matcher.group.length - } else 1 + } + else 1 chr += len sym += 1 if (len > 1) buf += Entry(chr, sym) @@ -86,7 +107,7 @@ } - /** Recoding text **/ + /* recoding text */ private class Recoder(list: List[(String, String)]) { @@ -195,6 +216,5 @@ def decode(text: String) = decoder.recode(text) def encode(text: String) = encoder.recode(text) - } }