--- a/src/Pure/General/yxml.scala Thu Dec 17 19:30:12 2009 +0100
+++ b/src/Pure/General/yxml.scala Thu Dec 17 20:09:19 2009 +0100
@@ -19,7 +19,8 @@
/* iterate over chunks (resembles space_explode in ML) */
- private def chunks(sep: Char, source: CharSequence) = new Iterator[CharSequence] {
+ private def chunks(sep: Char, source: CharSequence) = new Iterator[CharSequence]
+ {
private val end = source.length
private var state = if (end == 0) None else get_chunk(-1)
private def get_chunk(i: Int) =
@@ -39,6 +40,68 @@
}
+ /* decoding pseudo UTF-8 */
+
+ private class Decode_Chars(decode: String => String,
+ buffer: Array[Byte], start: Int, end: Int) extends CharSequence
+ {
+ def length: Int = end - start
+ def charAt(i: Int): Char = (buffer(start + i).asInstanceOf[Int] & 0xFF).asInstanceOf[Char]
+ def subSequence(i: Int, j: Int): CharSequence =
+ new Decode_Chars(decode, buffer, start + i, start + j)
+
+ // toString with adhoc decoding: abuse of CharSequence interface
+ // see also http://en.wikipedia.org/wiki/UTF-8#Description
+ override def toString: String =
+ {
+ val buf = new java.lang.StringBuilder(length)
+ var code = -1
+ var rest = 0
+ def flush()
+ {
+ if (code != -1) {
+ if (rest == 0) buf.appendCodePoint(code)
+ else buf.append('\uFFFD')
+ code = -1
+ rest = 0
+ }
+ }
+ def init(x: Int, n: Int)
+ {
+ flush()
+ code = x
+ rest = n
+ }
+ def push(x: Int)
+ {
+ if (rest <= 0) init(x, -1)
+ else {
+ code <<= 6
+ code += x
+ rest -= 1
+ }
+ }
+ for (i <- 0 until length) {
+ val c = charAt(i)
+ if (c < 128) { flush(); buf.append(c) }
+ else if ((c & 0xC0) == 0x80) push(c & 0x3F)
+ else if ((c & 0xE0) == 0xC0) init(c & 0x1F, 1)
+ else if ((c & 0xF0) == 0xE0) init(c & 0x0F, 2)
+ else if ((c & 0xF8) == 0xF0) init(c & 0x07, 3)
+ }
+ flush()
+ decode(buf.toString)
+ }
+ }
+
+ def decode_chars(decode: String => String,
+ buffer: Array[Byte], start: Int, end: Int): CharSequence =
+ {
+ require(0 <= start && start <= end && end <= buffer.length)
+ new Decode_Chars(decode, buffer, start, end)
+ }
+
+
/* parsing */
private def err(msg: String) = error("Malformed YXML: " + msg)
@@ -80,11 +143,12 @@
/* parse chunks */
stack = List((("", Nil), Nil))
- for (chunk <- chunks(X, source) if chunk != "") {
- if (chunk == Y_string) pop()
+ for (chunk <- chunks(X, source) if chunk.length != 0) {
+ if (chunk.length == 1 && chunk.charAt(0) == Y) pop()
else {
chunks(Y, chunk).toList match {
- case "" :: name :: atts => push(name.toString.intern, atts.map(parse_attrib))
+ case ch :: name :: atts if ch.length == 0 =>
+ push(name.toString.intern, atts.map(parse_attrib))
case txts => for (txt <- txts) add(XML.Text(txt.toString))
}
}