explicit module UTF8;
authorwenzelm
Sun, 25 Nov 2012 20:17:04 +0100
changeset 50203 00d8ad713e32
parent 50202 ec0f2f8dbeb9
child 50204 daeb1674fb91
explicit module UTF8;
src/Pure/General/file.scala
src/Pure/General/sha1.scala
src/Pure/System/build.scala
src/Pure/System/isabelle_charset.scala
src/Pure/System/isabelle_process.scala
src/Pure/System/isabelle_system.scala
src/Pure/System/standard_system.scala
src/Pure/System/utf8.scala
src/Pure/build-jars
src/Tools/jEdit/src/isabelle_encoding.scala
src/Tools/jEdit/src/scala_console.scala
--- a/src/Pure/General/file.scala	Sun Nov 25 19:55:42 2012 +0100
+++ b/src/Pure/General/file.scala	Sun Nov 25 20:17:04 2012 +0100
@@ -53,8 +53,7 @@
     buf
   }
 
-  def read(file: JFile): String =
-    new String(read_bytes(file), Standard_System.charset)
+  def read(file: JFile): String = new String(read_bytes(file), UTF8.charset)
 
   def read(path: Path): String = read(path.file)
 
@@ -68,7 +67,7 @@
   }
 
   def read(stream: InputStream): String =
-    read(new BufferedReader(new InputStreamReader(stream, Standard_System.charset)))
+    read(new BufferedReader(new InputStreamReader(stream, UTF8.charset)))
 
 
   /* try_read */
@@ -90,7 +89,7 @@
   {
     val stream1 = new FileOutputStream(file)
     val stream2 = if (gzip) new GZIPOutputStream(new BufferedOutputStream(stream1)) else stream1
-    val writer = new BufferedWriter(new OutputStreamWriter(stream2, Standard_System.charset))
+    val writer = new BufferedWriter(new OutputStreamWriter(stream2, UTF8.charset))
     try { writer.append(text) }
     finally { writer.close }
   }
--- a/src/Pure/General/sha1.scala	Sun Nov 25 19:55:42 2012 +0100
+++ b/src/Pure/General/sha1.scala	Sun Nov 25 20:17:04 2012 +0100
@@ -56,6 +56,6 @@
     make_result(digest)
   }
 
-  def digest(string: String): Digest = digest(Standard_System.string_bytes(string))
+  def digest(string: String): Digest = digest(UTF8.string_bytes(string))
 }
 
--- a/src/Pure/System/build.scala	Sun Nov 25 19:55:42 2012 +0100
+++ b/src/Pure/System/build.scala	Sun Nov 25 20:17:04 2012 +0100
@@ -522,7 +522,7 @@
   private def read_stamps(path: Path): Option[(String, String, String)] =
     if (path.is_file) {
       val stream = new GZIPInputStream (new BufferedInputStream(new FileInputStream(path.file)))
-      val reader = new BufferedReader(new InputStreamReader(stream, Standard_System.charset))
+      val reader = new BufferedReader(new InputStreamReader(stream, UTF8.charset))
       val (s, h1, h2) =
         try { (reader.readLine, reader.readLine, reader.readLine) }
         finally { reader.close }
--- a/src/Pure/System/isabelle_charset.scala	Sun Nov 25 19:55:42 2012 +0100
+++ b/src/Pure/System/isabelle_charset.scala	Sun Nov 25 20:17:04 2012 +0100
@@ -22,14 +22,11 @@
 class Isabelle_Charset extends Charset(Isabelle_Charset.name, null)
 {
   override def contains(cs: Charset): Boolean =
-    cs.name.equalsIgnoreCase(Standard_System.charset_name) ||
-    Standard_System.charset.contains(cs)
+    cs.name.equalsIgnoreCase(UTF8.charset_name) || UTF8.charset.contains(cs)
 
-  override def newDecoder(): CharsetDecoder =
-    Standard_System.charset.newDecoder
+  override def newDecoder(): CharsetDecoder = UTF8.charset.newDecoder
 
-  override def newEncoder(): CharsetEncoder =
-    Standard_System.charset.newEncoder
+  override def newEncoder(): CharsetEncoder = UTF8.charset.newEncoder
 }
 
 
--- a/src/Pure/System/isabelle_process.scala	Sun Nov 25 19:55:42 2012 +0100
+++ b/src/Pure/System/isabelle_process.scala	Sun Nov 25 20:17:04 2012 +0100
@@ -292,9 +292,8 @@
           //{{{
           receive {
             case Input_Chunks(chunks) =>
-              stream.write(Standard_System.string_bytes(
-                  chunks.map(_.length).mkString("", ",", "\n")));
-              chunks.foreach(stream.write(_));
+              stream.write(UTF8.string_bytes(chunks.map(_.length).mkString("", ",", "\n")))
+              chunks.foreach(stream.write(_))
               stream.flush
             case Close =>
               stream.close
@@ -350,8 +349,8 @@
         if (i != n) throw new Protocol_Error("bad message chunk content")
 
         if (do_decode)
-          YXML.parse_body_failsafe(Standard_System.decode_chars(decode, buf, 0, n))
-        else List(XML.Text(Standard_System.decode_chars(s => s, buf, 0, n).toString))
+          YXML.parse_body_failsafe(UTF8.decode_chars(decode, buf, 0, n))
+        else List(XML.Text(UTF8.decode_chars(s => s, buf, 0, n).toString))
         //}}}
       }
 
@@ -390,12 +389,12 @@
   def input_raw(text: String): Unit = standard_input._2 ! Input_Text(text)
 
   def input_bytes(name: String, args: Array[Byte]*): Unit =
-    command_input._2 ! Input_Chunks(Standard_System.string_bytes(name) :: args.toList)
+    command_input._2 ! Input_Chunks(UTF8.string_bytes(name) :: args.toList)
 
   def input(name: String, args: String*)
   {
     receiver(new Input(name, args.toList))
-    input_bytes(name, args.map(Standard_System.string_bytes): _*)
+    input_bytes(name, args.map(UTF8.string_bytes): _*)
   }
 
   def options(opts: Options): Unit =
--- a/src/Pure/System/isabelle_system.scala	Sun Nov 25 19:55:42 2012 +0100
+++ b/src/Pure/System/isabelle_system.scala	Sun Nov 25 20:17:04 2012 +0100
@@ -168,13 +168,13 @@
     // channels
 
     val stdin: BufferedWriter =
-      new BufferedWriter(new OutputStreamWriter(proc.getOutputStream, Standard_System.charset))
+      new BufferedWriter(new OutputStreamWriter(proc.getOutputStream, UTF8.charset))
 
     val stdout: BufferedReader =
-      new BufferedReader(new InputStreamReader(proc.getInputStream, Standard_System.charset))
+      new BufferedReader(new InputStreamReader(proc.getInputStream, UTF8.charset))
 
     val stderr: BufferedReader =
-      new BufferedReader(new InputStreamReader(proc.getErrorStream, Standard_System.charset))
+      new BufferedReader(new InputStreamReader(proc.getErrorStream, UTF8.charset))
 
 
     // signals
--- a/src/Pure/System/standard_system.scala	Sun Nov 25 19:55:42 2012 +0100
+++ b/src/Pure/System/standard_system.scala	Sun Nov 25 20:17:04 2012 +0100
@@ -12,90 +12,12 @@
 import java.util.Locale
 import java.net.URL
 import java.io.{File => JFile}
-import java.nio.charset.Charset
 
-import scala.io.Codec
 import scala.util.matching.Regex
 
 
 object Standard_System
 {
-  /* UTF-8 charset */
-
-  val charset_name: String = "UTF-8"
-  val charset: Charset = Charset.forName(charset_name)
-  def codec(): Codec = Codec(charset)
-
-  def string_bytes(s: String): Array[Byte] = s.getBytes(charset)
-
-
-  /* permissive UTF-8 decoding */
-
-  // see also http://en.wikipedia.org/wiki/UTF-8#Description
-  // overlong encodings enable byte-stuffing
-
-  def decode_permissive_utf8(text: CharSequence): String =
-  {
-    val buf = new java.lang.StringBuilder(text.length)
-    var code = -1
-    var rest = 0
-    def flush()
-    {
-      if (code != -1) {
-        if (rest == 0 && Character.isValidCodePoint(code))
-          buf.appendCodePoint(code)
-        else buf.append('\uFFFD')
-        code = -1
-        rest = 0
-      }
-    }
-    def init(x: Int, n: Int)
-    {
-      flush()
-      code = x
-      rest = n
-    }
-    def push(x: Int)
-    {
-      if (rest <= 0) init(x, -1)
-      else {
-        code <<= 6
-        code += x
-        rest -= 1
-      }
-    }
-    for (i <- 0 until text.length) {
-      val c = text.charAt(i)
-      if (c < 128) { flush(); buf.append(c) }
-      else if ((c & 0xC0) == 0x80) push(c & 0x3F)
-      else if ((c & 0xE0) == 0xC0) init(c & 0x1F, 1)
-      else if ((c & 0xF0) == 0xE0) init(c & 0x0F, 2)
-      else if ((c & 0xF8) == 0xF0) init(c & 0x07, 3)
-    }
-    flush()
-    buf.toString
-  }
-
-  private class Decode_Chars(decode: String => String,
-    buffer: Array[Byte], start: Int, end: Int) extends CharSequence
-  {
-    def length: Int = end - start
-    def charAt(i: Int): Char = (buffer(start + i).asInstanceOf[Int] & 0xFF).asInstanceOf[Char]
-    def subSequence(i: Int, j: Int): CharSequence =
-      new Decode_Chars(decode, buffer, start + i, start + j)
-
-    // toString with adhoc decoding: abuse of CharSequence interface
-    override def toString: String = decode(decode_permissive_utf8(this))
-  }
-
-  def decode_chars(decode: String => String,
-    buffer: Array[Byte], start: Int, end: Int): CharSequence =
-  {
-    require(0 <= start && start <= end && end <= buffer.length)
-    new Decode_Chars(decode, buffer, start, end)
-  }
-
-
   /* shell processes */
 
   def raw_execute(cwd: JFile, env: Map[String, String], redirect: Boolean, args: String*): Process =
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Pure/System/utf8.scala	Sun Nov 25 20:17:04 2012 +0100
@@ -0,0 +1,92 @@
+/*  Title:      Pure/System/utf8.scala
+    Module:     PIDE
+    Author:     Makarius
+
+Variations on UTF-8.
+*/
+
+package isabelle
+
+
+import java.nio.charset.Charset
+import scala.io.Codec
+
+
+object UTF8
+{
+  /* charset */
+
+  val charset_name: String = "UTF-8"
+  val charset: Charset = Charset.forName(charset_name)
+  def codec(): Codec = Codec(charset)
+
+  def string_bytes(s: String): Array[Byte] = s.getBytes(charset)
+
+
+  /* permissive UTF-8 decoding */
+
+  // see also http://en.wikipedia.org/wiki/UTF-8#Description
+  // overlong encodings enable byte-stuffing
+
+  def decode_permissive(text: CharSequence): String =
+  {
+    val buf = new java.lang.StringBuilder(text.length)
+    var code = -1
+    var rest = 0
+    def flush()
+    {
+      if (code != -1) {
+        if (rest == 0 && Character.isValidCodePoint(code))
+          buf.appendCodePoint(code)
+        else buf.append('\uFFFD')
+        code = -1
+        rest = 0
+      }
+    }
+    def init(x: Int, n: Int)
+    {
+      flush()
+      code = x
+      rest = n
+    }
+    def push(x: Int)
+    {
+      if (rest <= 0) init(x, -1)
+      else {
+        code <<= 6
+        code += x
+        rest -= 1
+      }
+    }
+    for (i <- 0 until text.length) {
+      val c = text.charAt(i)
+      if (c < 128) { flush(); buf.append(c) }
+      else if ((c & 0xC0) == 0x80) push(c & 0x3F)
+      else if ((c & 0xE0) == 0xC0) init(c & 0x1F, 1)
+      else if ((c & 0xF0) == 0xE0) init(c & 0x0F, 2)
+      else if ((c & 0xF8) == 0xF0) init(c & 0x07, 3)
+    }
+    flush()
+    buf.toString
+  }
+
+  private class Decode_Chars(decode: String => String,
+    buffer: Array[Byte], start: Int, end: Int) extends CharSequence
+  {
+    def length: Int = end - start
+    def charAt(i: Int): Char = (buffer(start + i).asInstanceOf[Int] & 0xFF).asInstanceOf[Char]
+    def subSequence(i: Int, j: Int): CharSequence =
+      new Decode_Chars(decode, buffer, start + i, start + j)
+
+    // toString with adhoc decoding: abuse of CharSequence interface
+    override def toString: String = decode(decode_permissive(this))
+  }
+
+  def decode_chars(decode: String => String,
+    buffer: Array[Byte], start: Int, end: Int): CharSequence =
+  {
+    require(0 <= start && start <= end && end <= buffer.length)
+    new Decode_Chars(decode, buffer, start, end)
+  }
+}
+
--- a/src/Pure/build-jars	Sun Nov 25 19:55:42 2012 +0100
+++ b/src/Pure/build-jars	Sun Nov 25 20:17:04 2012 +0100
@@ -56,6 +56,7 @@
   System/standard_system.scala
   System/swing_thread.scala
   System/system_channel.scala
+  System/utf8.scala
   Thy/completion.scala
   Thy/html.scala
   Thy/thy_header.scala
--- a/src/Tools/jEdit/src/isabelle_encoding.scala	Sun Nov 25 19:55:42 2012 +0100
+++ b/src/Tools/jEdit/src/isabelle_encoding.scala	Sun Nov 25 20:17:04 2012 +0100
@@ -37,12 +37,11 @@
     new CharArrayReader(Symbol.decode(source.mkString).toArray)
   }
 
-  override def getTextReader(in: InputStream): Reader =
-    text_reader(in, Standard_System.codec())
+  override def getTextReader(in: InputStream): Reader = text_reader(in, UTF8.codec())
 
   override def getPermissiveTextReader(in: InputStream): Reader =
   {
-    val codec = Standard_System.codec()
+    val codec = UTF8.codec()
     codec.onMalformedInput(CodingErrorAction.REPLACE)
     codec.onUnmappableCharacter(CodingErrorAction.REPLACE)
     text_reader(in, codec)
@@ -53,12 +52,12 @@
     val buffer = new ByteArrayOutputStream(BUFSIZE) {
       override def flush()
       {
-        val text = Symbol.encode(toString(Standard_System.charset_name))
-        out.write(text.getBytes(Standard_System.charset))
+        val text = Symbol.encode(toString(UTF8.charset_name))
+        out.write(text.getBytes(UTF8.charset))
         out.flush()
       }
       override def close() { out.close() }
     }
-    new OutputStreamWriter(buffer, Standard_System.charset.newEncoder())
+    new OutputStreamWriter(buffer, UTF8.charset.newEncoder())
   }
 }
--- a/src/Tools/jEdit/src/scala_console.scala	Sun Nov 25 19:55:42 2012 +0100
+++ b/src/Tools/jEdit/src/scala_console.scala	Sun Nov 25 20:17:04 2012 +0100
@@ -64,7 +64,7 @@
     val buf = new StringBuilder
     override def flush()
     {
-      val str = Standard_System.decode_permissive_utf8(buf.toString)
+      val str = UTF8.decode_permissive(buf.toString)
       buf.clear
       if (global_out == null) System.out.print(str)
       else Swing_Thread.now { global_out.writeAttrs(null, str) }