diff -r dfcf667bbfed -r b6960fc09ef3 src/Pure/library.scala --- a/src/Pure/library.scala Sun Dec 27 22:36:47 2009 +0100 +++ b/src/Pure/library.scala Sun Dec 27 23:09:16 2009 +0100 @@ -36,6 +36,52 @@ } + /* permissive UTF-8 decoding */ + + // see also http://en.wikipedia.org/wiki/UTF-8#Description + def decode_permissive_utf8(text: CharSequence): String = + { + val buf = new java.lang.StringBuilder(text.length) + var code = -1 + var rest = 0 + def flush() + { + if (code != -1) { + if (rest == 0 && Character.isValidCodePoint(code)) + buf.appendCodePoint(code) + else buf.append('\uFFFD') + code = -1 + rest = 0 + } + } + def init(x: Int, n: Int) + { + flush() + code = x + rest = n + } + def push(x: Int) + { + if (rest <= 0) init(x, -1) + else { + code <<= 6 + code += x + rest -= 1 + } + } + for (i <- 0 until text.length) { + val c = text.charAt(i) + if (c < 128) { flush(); buf.append(c) } + else if ((c & 0xC0) == 0x80) push(c & 0x3F) + else if ((c & 0xE0) == 0xC0) init(c & 0x1F, 1) + else if ((c & 0xF0) == 0xE0) init(c & 0x0F, 2) + else if ((c & 0xF8) == 0xF0) init(c & 0x07, 3) + } + flush() + buf.toString + } + + /* timing */ def timeit[A](e: => A) =