clarified signature: more direct Bytes.raw and subsequent UTF-8 default decoding;
--- a/src/Pure/General/bytes.scala Wed Jul 03 20:18:36 2024 +0200
+++ b/src/Pure/General/bytes.scala Wed Jul 03 20:35:10 2024 +0200
@@ -10,6 +10,7 @@
import java.io.{ByteArrayInputStream, ByteArrayOutputStream, FileInputStream, FileOutputStream,
InputStream, OutputStream, File => JFile}
import java.nio.ByteBuffer
+import java.nio.charset.StandardCharsets.ISO_8859_1
import java.nio.channels.FileChannel
import java.nio.file.StandardOpenOption
import java.util.Arrays
@@ -45,6 +46,10 @@
val empty: Bytes = reuse_array(new Array(0))
+ def raw(s: String): Bytes =
+ if (s.isEmpty) empty
+ else Builder.use(hint = s.length) { builder => builder += s.getBytes(ISO_8859_1) }
+
def apply(s: String): Bytes =
if (s.isEmpty) empty
else Builder.use(hint = s.length) { builder => builder += s }
@@ -131,21 +136,6 @@
def append(path: Path, bytes: Bytes): Unit = append(path.file, bytes)
- /* vector of short unsigned integers */
-
- trait Vec {
- def size: Long
- def apply(i: Long): Char
- }
-
- class Vec_String(string: String) extends Vec {
- override def size: Long = string.length.toLong
- override def apply(i: Long): Char =
- if (0 <= i && i < size) string(i.toInt)
- else throw new IndexOutOfBoundsException
- }
-
-
/* incremental builder: unsynchronized! */
object Builder {
@@ -294,7 +284,7 @@
protected val chunk0: Array[Byte],
protected val offset: Long,
val size: Long
-) extends Bytes.Vec with YXML.Source {
+) extends YXML.Source {
bytes =>
assert(
@@ -499,7 +489,7 @@
}
utf8
- if (utf8) UTF8.decode_permissive_bytes(bytes)
+ if (utf8) UTF8.decode_permissive(bytes)
else new String(make_array, UTF8.charset)
}
--- a/src/Pure/General/scan.scala Wed Jul 03 20:18:36 2024 +0200
+++ b/src/Pure/General/scan.scala Wed Jul 03 20:35:10 2024 +0200
@@ -436,7 +436,7 @@
reader.isInstanceOf[Byte_Reader]
def reader_decode_utf8(is_utf8: Boolean, s: String): String =
- if (is_utf8) UTF8.decode_permissive(s) else s
+ if (is_utf8) UTF8.decode_permissive(Bytes.raw(s)) else s
def reader_decode_utf8(reader: Reader[Char], s: String): String =
reader_decode_utf8(reader_is_utf8(reader), s)
--- a/src/Pure/General/utf8.scala Wed Jul 03 20:18:36 2024 +0200
+++ b/src/Pure/General/utf8.scala Wed Jul 03 20:35:10 2024 +0200
@@ -23,7 +23,7 @@
// see also https://en.wikipedia.org/wiki/UTF-8#Description
// overlong encodings enable byte-stuffing of low-ASCII
- def decode_permissive_bytes(bytes: Bytes.Vec): String = {
+ def decode_permissive(bytes: Bytes): String = {
val size = bytes.size
val buf = new java.lang.StringBuilder((size min Space.GiB(1).bytes).toInt)
var code = -1
@@ -60,19 +60,4 @@
flush()
buf.toString
}
-
- def decode_permissive(text: String): String = {
- val relevant = {
- var i = 0
- val n = text.length
- var found = false
- while (i < n && !found) {
- if (text(i) >= 128) { found = true }
- i += 1
- }
- found
- }
- if (relevant) decode_permissive_bytes(new Bytes.Vec_String(text))
- else text
- }
}
--- a/src/Tools/jEdit/jedit_main/scala_console.scala Wed Jul 03 20:18:36 2024 +0200
+++ b/src/Tools/jEdit/jedit_main/scala_console.scala Wed Jul 03 20:35:10 2024 +0200
@@ -50,7 +50,7 @@
override def flush(): Unit = {
val s = buf.synchronized { val s = buf.toString; buf.clear(); s }
- val str = UTF8.decode_permissive(s)
+ val str = UTF8.decode_permissive(Bytes.raw(s))
GUI_Thread.later {
if (global_out == null) java.lang.System.out.print(str)
else global_out.writeAttrs(null, str)