--- a/src/Pure/General/file.scala Sun Nov 25 19:55:42 2012 +0100
+++ b/src/Pure/General/file.scala Sun Nov 25 20:17:04 2012 +0100
@@ -53,8 +53,7 @@
buf
}
- def read(file: JFile): String =
- new String(read_bytes(file), Standard_System.charset)
+ def read(file: JFile): String = new String(read_bytes(file), UTF8.charset)
def read(path: Path): String = read(path.file)
@@ -68,7 +67,7 @@
}
def read(stream: InputStream): String =
- read(new BufferedReader(new InputStreamReader(stream, Standard_System.charset)))
+ read(new BufferedReader(new InputStreamReader(stream, UTF8.charset)))
/* try_read */
@@ -90,7 +89,7 @@
{
val stream1 = new FileOutputStream(file)
val stream2 = if (gzip) new GZIPOutputStream(new BufferedOutputStream(stream1)) else stream1
- val writer = new BufferedWriter(new OutputStreamWriter(stream2, Standard_System.charset))
+ val writer = new BufferedWriter(new OutputStreamWriter(stream2, UTF8.charset))
try { writer.append(text) }
finally { writer.close }
}
--- a/src/Pure/General/sha1.scala Sun Nov 25 19:55:42 2012 +0100
+++ b/src/Pure/General/sha1.scala Sun Nov 25 20:17:04 2012 +0100
@@ -56,6 +56,6 @@
make_result(digest)
}
- def digest(string: String): Digest = digest(Standard_System.string_bytes(string))
+ def digest(string: String): Digest = digest(UTF8.string_bytes(string))
}
--- a/src/Pure/System/build.scala Sun Nov 25 19:55:42 2012 +0100
+++ b/src/Pure/System/build.scala Sun Nov 25 20:17:04 2012 +0100
@@ -522,7 +522,7 @@
private def read_stamps(path: Path): Option[(String, String, String)] =
if (path.is_file) {
val stream = new GZIPInputStream (new BufferedInputStream(new FileInputStream(path.file)))
- val reader = new BufferedReader(new InputStreamReader(stream, Standard_System.charset))
+ val reader = new BufferedReader(new InputStreamReader(stream, UTF8.charset))
val (s, h1, h2) =
try { (reader.readLine, reader.readLine, reader.readLine) }
finally { reader.close }
--- a/src/Pure/System/isabelle_charset.scala Sun Nov 25 19:55:42 2012 +0100
+++ b/src/Pure/System/isabelle_charset.scala Sun Nov 25 20:17:04 2012 +0100
@@ -22,14 +22,11 @@
class Isabelle_Charset extends Charset(Isabelle_Charset.name, null)
{
override def contains(cs: Charset): Boolean =
- cs.name.equalsIgnoreCase(Standard_System.charset_name) ||
- Standard_System.charset.contains(cs)
+ cs.name.equalsIgnoreCase(UTF8.charset_name) || UTF8.charset.contains(cs)
- override def newDecoder(): CharsetDecoder =
- Standard_System.charset.newDecoder
+ override def newDecoder(): CharsetDecoder = UTF8.charset.newDecoder
- override def newEncoder(): CharsetEncoder =
- Standard_System.charset.newEncoder
+ override def newEncoder(): CharsetEncoder = UTF8.charset.newEncoder
}
--- a/src/Pure/System/isabelle_process.scala Sun Nov 25 19:55:42 2012 +0100
+++ b/src/Pure/System/isabelle_process.scala Sun Nov 25 20:17:04 2012 +0100
@@ -292,9 +292,8 @@
//{{{
receive {
case Input_Chunks(chunks) =>
- stream.write(Standard_System.string_bytes(
- chunks.map(_.length).mkString("", ",", "\n")));
- chunks.foreach(stream.write(_));
+ stream.write(UTF8.string_bytes(chunks.map(_.length).mkString("", ",", "\n")))
+ chunks.foreach(stream.write(_))
stream.flush
case Close =>
stream.close
@@ -350,8 +349,8 @@
if (i != n) throw new Protocol_Error("bad message chunk content")
if (do_decode)
- YXML.parse_body_failsafe(Standard_System.decode_chars(decode, buf, 0, n))
- else List(XML.Text(Standard_System.decode_chars(s => s, buf, 0, n).toString))
+ YXML.parse_body_failsafe(UTF8.decode_chars(decode, buf, 0, n))
+ else List(XML.Text(UTF8.decode_chars(s => s, buf, 0, n).toString))
//}}}
}
@@ -390,12 +389,12 @@
def input_raw(text: String): Unit = standard_input._2 ! Input_Text(text)
def input_bytes(name: String, args: Array[Byte]*): Unit =
- command_input._2 ! Input_Chunks(Standard_System.string_bytes(name) :: args.toList)
+ command_input._2 ! Input_Chunks(UTF8.string_bytes(name) :: args.toList)
def input(name: String, args: String*)
{
receiver(new Input(name, args.toList))
- input_bytes(name, args.map(Standard_System.string_bytes): _*)
+ input_bytes(name, args.map(UTF8.string_bytes): _*)
}
def options(opts: Options): Unit =
--- a/src/Pure/System/isabelle_system.scala Sun Nov 25 19:55:42 2012 +0100
+++ b/src/Pure/System/isabelle_system.scala Sun Nov 25 20:17:04 2012 +0100
@@ -168,13 +168,13 @@
// channels
val stdin: BufferedWriter =
- new BufferedWriter(new OutputStreamWriter(proc.getOutputStream, Standard_System.charset))
+ new BufferedWriter(new OutputStreamWriter(proc.getOutputStream, UTF8.charset))
val stdout: BufferedReader =
- new BufferedReader(new InputStreamReader(proc.getInputStream, Standard_System.charset))
+ new BufferedReader(new InputStreamReader(proc.getInputStream, UTF8.charset))
val stderr: BufferedReader =
- new BufferedReader(new InputStreamReader(proc.getErrorStream, Standard_System.charset))
+ new BufferedReader(new InputStreamReader(proc.getErrorStream, UTF8.charset))
// signals
--- a/src/Pure/System/standard_system.scala Sun Nov 25 19:55:42 2012 +0100
+++ b/src/Pure/System/standard_system.scala Sun Nov 25 20:17:04 2012 +0100
@@ -12,90 +12,12 @@
import java.util.Locale
import java.net.URL
import java.io.{File => JFile}
-import java.nio.charset.Charset
-import scala.io.Codec
import scala.util.matching.Regex
object Standard_System
{
- /* UTF-8 charset */
-
- val charset_name: String = "UTF-8"
- val charset: Charset = Charset.forName(charset_name)
- def codec(): Codec = Codec(charset)
-
- def string_bytes(s: String): Array[Byte] = s.getBytes(charset)
-
-
- /* permissive UTF-8 decoding */
-
- // see also http://en.wikipedia.org/wiki/UTF-8#Description
- // overlong encodings enable byte-stuffing
-
- def decode_permissive_utf8(text: CharSequence): String =
- {
- val buf = new java.lang.StringBuilder(text.length)
- var code = -1
- var rest = 0
- def flush()
- {
- if (code != -1) {
- if (rest == 0 && Character.isValidCodePoint(code))
- buf.appendCodePoint(code)
- else buf.append('\uFFFD')
- code = -1
- rest = 0
- }
- }
- def init(x: Int, n: Int)
- {
- flush()
- code = x
- rest = n
- }
- def push(x: Int)
- {
- if (rest <= 0) init(x, -1)
- else {
- code <<= 6
- code += x
- rest -= 1
- }
- }
- for (i <- 0 until text.length) {
- val c = text.charAt(i)
- if (c < 128) { flush(); buf.append(c) }
- else if ((c & 0xC0) == 0x80) push(c & 0x3F)
- else if ((c & 0xE0) == 0xC0) init(c & 0x1F, 1)
- else if ((c & 0xF0) == 0xE0) init(c & 0x0F, 2)
- else if ((c & 0xF8) == 0xF0) init(c & 0x07, 3)
- }
- flush()
- buf.toString
- }
-
- private class Decode_Chars(decode: String => String,
- buffer: Array[Byte], start: Int, end: Int) extends CharSequence
- {
- def length: Int = end - start
- def charAt(i: Int): Char = (buffer(start + i).asInstanceOf[Int] & 0xFF).asInstanceOf[Char]
- def subSequence(i: Int, j: Int): CharSequence =
- new Decode_Chars(decode, buffer, start + i, start + j)
-
- // toString with adhoc decoding: abuse of CharSequence interface
- override def toString: String = decode(decode_permissive_utf8(this))
- }
-
- def decode_chars(decode: String => String,
- buffer: Array[Byte], start: Int, end: Int): CharSequence =
- {
- require(0 <= start && start <= end && end <= buffer.length)
- new Decode_Chars(decode, buffer, start, end)
- }
-
-
/* shell processes */
def raw_execute(cwd: JFile, env: Map[String, String], redirect: Boolean, args: String*): Process =
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Pure/System/utf8.scala Sun Nov 25 20:17:04 2012 +0100
@@ -0,0 +1,92 @@
+/* Title: Pure/System/utf8.scala
+ Module: PIDE
+ Author: Makarius
+
+Variations on UTF-8.
+*/
+
+package isabelle
+
+
+import java.nio.charset.Charset
+import scala.io.Codec
+
+
+object UTF8
+{
+ /* charset */
+
+ val charset_name: String = "UTF-8"
+ val charset: Charset = Charset.forName(charset_name)
+ def codec(): Codec = Codec(charset)
+
+ def string_bytes(s: String): Array[Byte] = s.getBytes(charset)
+
+
+ /* permissive UTF-8 decoding */
+
+ // see also http://en.wikipedia.org/wiki/UTF-8#Description
+ // overlong encodings enable byte-stuffing
+
+ def decode_permissive(text: CharSequence): String =
+ {
+ val buf = new java.lang.StringBuilder(text.length)
+ var code = -1
+ var rest = 0
+ def flush()
+ {
+ if (code != -1) {
+ if (rest == 0 && Character.isValidCodePoint(code))
+ buf.appendCodePoint(code)
+ else buf.append('\uFFFD')
+ code = -1
+ rest = 0
+ }
+ }
+ def init(x: Int, n: Int)
+ {
+ flush()
+ code = x
+ rest = n
+ }
+ def push(x: Int)
+ {
+ if (rest <= 0) init(x, -1)
+ else {
+ code <<= 6
+ code += x
+ rest -= 1
+ }
+ }
+ for (i <- 0 until text.length) {
+ val c = text.charAt(i)
+ if (c < 128) { flush(); buf.append(c) }
+ else if ((c & 0xC0) == 0x80) push(c & 0x3F)
+ else if ((c & 0xE0) == 0xC0) init(c & 0x1F, 1)
+ else if ((c & 0xF0) == 0xE0) init(c & 0x0F, 2)
+ else if ((c & 0xF8) == 0xF0) init(c & 0x07, 3)
+ }
+ flush()
+ buf.toString
+ }
+
+ private class Decode_Chars(decode: String => String,
+ buffer: Array[Byte], start: Int, end: Int) extends CharSequence
+ {
+ def length: Int = end - start
+ def charAt(i: Int): Char = (buffer(start + i).asInstanceOf[Int] & 0xFF).asInstanceOf[Char]
+ def subSequence(i: Int, j: Int): CharSequence =
+ new Decode_Chars(decode, buffer, start + i, start + j)
+
+ // toString with adhoc decoding: abuse of CharSequence interface
+ override def toString: String = decode(decode_permissive(this))
+ }
+
+ def decode_chars(decode: String => String,
+ buffer: Array[Byte], start: Int, end: Int): CharSequence =
+ {
+ require(0 <= start && start <= end && end <= buffer.length)
+ new Decode_Chars(decode, buffer, start, end)
+ }
+}
+
--- a/src/Pure/build-jars Sun Nov 25 19:55:42 2012 +0100
+++ b/src/Pure/build-jars Sun Nov 25 20:17:04 2012 +0100
@@ -56,6 +56,7 @@
System/standard_system.scala
System/swing_thread.scala
System/system_channel.scala
+ System/utf8.scala
Thy/completion.scala
Thy/html.scala
Thy/thy_header.scala
--- a/src/Tools/jEdit/src/isabelle_encoding.scala Sun Nov 25 19:55:42 2012 +0100
+++ b/src/Tools/jEdit/src/isabelle_encoding.scala Sun Nov 25 20:17:04 2012 +0100
@@ -37,12 +37,11 @@
new CharArrayReader(Symbol.decode(source.mkString).toArray)
}
- override def getTextReader(in: InputStream): Reader =
- text_reader(in, Standard_System.codec())
+ override def getTextReader(in: InputStream): Reader = text_reader(in, UTF8.codec())
override def getPermissiveTextReader(in: InputStream): Reader =
{
- val codec = Standard_System.codec()
+ val codec = UTF8.codec()
codec.onMalformedInput(CodingErrorAction.REPLACE)
codec.onUnmappableCharacter(CodingErrorAction.REPLACE)
text_reader(in, codec)
@@ -53,12 +52,12 @@
val buffer = new ByteArrayOutputStream(BUFSIZE) {
override def flush()
{
- val text = Symbol.encode(toString(Standard_System.charset_name))
- out.write(text.getBytes(Standard_System.charset))
+ val text = Symbol.encode(toString(UTF8.charset_name))
+ out.write(text.getBytes(UTF8.charset))
out.flush()
}
override def close() { out.close() }
}
- new OutputStreamWriter(buffer, Standard_System.charset.newEncoder())
+ new OutputStreamWriter(buffer, UTF8.charset.newEncoder())
}
}
--- a/src/Tools/jEdit/src/scala_console.scala Sun Nov 25 19:55:42 2012 +0100
+++ b/src/Tools/jEdit/src/scala_console.scala Sun Nov 25 20:17:04 2012 +0100
@@ -64,7 +64,7 @@
val buf = new StringBuilder
override def flush()
{
- val str = Standard_System.decode_permissive_utf8(buf.toString)
+ val str = UTF8.decode_permissive(buf.toString)
buf.clear
if (global_out == null) System.out.print(str)
else Swing_Thread.now { global_out.writeAttrs(null, str) }