--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Pure/General/utf8.scala Wed Dec 21 11:21:46 2016 +0100
@@ -0,0 +1,99 @@
+/* Title: Pure/General/utf8.scala
+ Author: Makarius
+
+Variations on UTF-8.
+*/
+
+package isabelle
+
+
+import java.nio.charset.Charset
+import scala.io.Codec
+
+
+object UTF8
+{
+ /* charset */
+
+ val charset_name: String = "UTF-8"
+ val charset: Charset = Charset.forName(charset_name)
+ def codec(): Codec = Codec(charset)
+
+ def bytes(s: String): Array[Byte] = s.getBytes(charset)
+
+ object Length extends Codepoint.Length
+ {
+ override def codepoint_length(c: Int): Int =
+ if (c < 0x80) 1
+ else if (c < 0x800) 2
+ else if (c < 0x10000) 3
+ else 4
+ }
+
+
+ /* permissive UTF-8 decoding */
+
+ // see also http://en.wikipedia.org/wiki/UTF-8#Description
+ // overlong encodings enable byte-stuffing of low-ASCII
+
+ def decode_permissive(text: CharSequence): String =
+ {
+ val buf = new java.lang.StringBuilder(text.length)
+ var code = -1
+ var rest = 0
+ def flush()
+ {
+ if (code != -1) {
+ if (rest == 0 && Character.isValidCodePoint(code))
+ buf.appendCodePoint(code)
+ else buf.append('\uFFFD')
+ code = -1
+ rest = 0
+ }
+ }
+ def init(x: Int, n: Int)
+ {
+ flush()
+ code = x
+ rest = n
+ }
+ def push(x: Int)
+ {
+ if (rest <= 0) init(x, -1)
+ else {
+ code <<= 6
+ code += x
+ rest -= 1
+ }
+ }
+ for (i <- 0 until text.length) {
+ val c = text.charAt(i)
+ if (c < 128) { flush(); buf.append(c) }
+ else if ((c & 0xC0) == 0x80) push(c & 0x3F)
+ else if ((c & 0xE0) == 0xC0) init(c & 0x1F, 1)
+ else if ((c & 0xF0) == 0xE0) init(c & 0x0F, 2)
+ else if ((c & 0xF8) == 0xF0) init(c & 0x07, 3)
+ }
+ flush()
+ buf.toString
+ }
+
+ private class Decode_Chars(decode: String => String,
+ buffer: Array[Byte], start: Int, end: Int) extends CharSequence
+ {
+ def length: Int = end - start
+ def charAt(i: Int): Char = (buffer(start + i).asInstanceOf[Int] & 0xFF).asInstanceOf[Char]
+ def subSequence(i: Int, j: Int): CharSequence =
+ new Decode_Chars(decode, buffer, start + i, start + j)
+
+ // toString with adhoc decoding: abuse of CharSequence interface
+ override def toString: String = decode(decode_permissive(this))
+ }
+
+ def decode_chars(decode: String => String,
+ buffer: Array[Byte], start: Int, end: Int): CharSequence =
+ {
+ require(0 <= start && start <= end && end <= buffer.length)
+ new Decode_Chars(decode, buffer, start, end)
+ }
+}
--- a/src/Pure/System/utf8.scala Tue Dec 20 22:32:04 2016 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,99 +0,0 @@
-/* Title: Pure/System/utf8.scala
- Author: Makarius
-
-Variations on UTF-8.
-*/
-
-package isabelle
-
-
-import java.nio.charset.Charset
-import scala.io.Codec
-
-
-object UTF8
-{
- /* charset */
-
- val charset_name: String = "UTF-8"
- val charset: Charset = Charset.forName(charset_name)
- def codec(): Codec = Codec(charset)
-
- def bytes(s: String): Array[Byte] = s.getBytes(charset)
-
- object Length extends Codepoint.Length
- {
- override def codepoint_length(c: Int): Int =
- if (c < 0x80) 1
- else if (c < 0x800) 2
- else if (c < 0x10000) 3
- else 4
- }
-
-
- /* permissive UTF-8 decoding */
-
- // see also http://en.wikipedia.org/wiki/UTF-8#Description
- // overlong encodings enable byte-stuffing of low-ASCII
-
- def decode_permissive(text: CharSequence): String =
- {
- val buf = new java.lang.StringBuilder(text.length)
- var code = -1
- var rest = 0
- def flush()
- {
- if (code != -1) {
- if (rest == 0 && Character.isValidCodePoint(code))
- buf.appendCodePoint(code)
- else buf.append('\uFFFD')
- code = -1
- rest = 0
- }
- }
- def init(x: Int, n: Int)
- {
- flush()
- code = x
- rest = n
- }
- def push(x: Int)
- {
- if (rest <= 0) init(x, -1)
- else {
- code <<= 6
- code += x
- rest -= 1
- }
- }
- for (i <- 0 until text.length) {
- val c = text.charAt(i)
- if (c < 128) { flush(); buf.append(c) }
- else if ((c & 0xC0) == 0x80) push(c & 0x3F)
- else if ((c & 0xE0) == 0xC0) init(c & 0x1F, 1)
- else if ((c & 0xF0) == 0xE0) init(c & 0x0F, 2)
- else if ((c & 0xF8) == 0xF0) init(c & 0x07, 3)
- }
- flush()
- buf.toString
- }
-
- private class Decode_Chars(decode: String => String,
- buffer: Array[Byte], start: Int, end: Int) extends CharSequence
- {
- def length: Int = end - start
- def charAt(i: Int): Char = (buffer(start + i).asInstanceOf[Int] & 0xFF).asInstanceOf[Char]
- def subSequence(i: Int, j: Int): CharSequence =
- new Decode_Chars(decode, buffer, start + i, start + j)
-
- // toString with adhoc decoding: abuse of CharSequence interface
- override def toString: String = decode(decode_permissive(this))
- }
-
- def decode_chars(decode: String => String,
- buffer: Array[Byte], start: Int, end: Int): CharSequence =
- {
- require(0 <= start && start <= end && end <= buffer.length)
- new Decode_Chars(decode, buffer, start, end)
- }
-}
--- a/src/Pure/build-jars Tue Dec 20 22:32:04 2016 +0100
+++ b/src/Pure/build-jars Wed Dec 21 11:21:46 2016 +0100
@@ -70,6 +70,7 @@
General/timing.scala
General/untyped.scala
General/url.scala
+ General/utf8.scala
General/value.scala
General/word.scala
General/xz.scala
@@ -118,7 +119,6 @@
System/process_result.scala
System/progress.scala
System/system_channel.scala
- System/utf8.scala
Thy/html.scala
Thy/present.scala
Thy/sessions.scala