# HG changeset patch # User wenzelm # Date 1720031710 -7200 # Node ID 43323d886ea319e63f0eb348018d25ca85cae610 # Parent b439582efc8ac30535e7bfae07ef550ac323d4b7 clarified signature: more direct Bytes.raw and subsequent UTF-8 default decoding; diff -r b439582efc8a -r 43323d886ea3 src/Pure/General/bytes.scala --- a/src/Pure/General/bytes.scala Wed Jul 03 20:18:36 2024 +0200 +++ b/src/Pure/General/bytes.scala Wed Jul 03 20:35:10 2024 +0200 @@ -10,6 +10,7 @@ import java.io.{ByteArrayInputStream, ByteArrayOutputStream, FileInputStream, FileOutputStream, InputStream, OutputStream, File => JFile} import java.nio.ByteBuffer +import java.nio.charset.StandardCharsets.ISO_8859_1 import java.nio.channels.FileChannel import java.nio.file.StandardOpenOption import java.util.Arrays @@ -45,6 +46,10 @@ val empty: Bytes = reuse_array(new Array(0)) + def raw(s: String): Bytes = + if (s.isEmpty) empty + else Builder.use(hint = s.length) { builder => builder += s.getBytes(ISO_8859_1) } + def apply(s: String): Bytes = if (s.isEmpty) empty else Builder.use(hint = s.length) { builder => builder += s } @@ -131,21 +136,6 @@ def append(path: Path, bytes: Bytes): Unit = append(path.file, bytes) - /* vector of short unsigned integers */ - - trait Vec { - def size: Long - def apply(i: Long): Char - } - - class Vec_String(string: String) extends Vec { - override def size: Long = string.length.toLong - override def apply(i: Long): Char = - if (0 <= i && i < size) string(i.toInt) - else throw new IndexOutOfBoundsException - } - - /* incremental builder: unsynchronized! */ object Builder { @@ -294,7 +284,7 @@ protected val chunk0: Array[Byte], protected val offset: Long, val size: Long -) extends Bytes.Vec with YXML.Source { +) extends YXML.Source { bytes => assert( @@ -499,7 +489,7 @@ } utf8 - if (utf8) UTF8.decode_permissive_bytes(bytes) + if (utf8) UTF8.decode_permissive(bytes) else new String(make_array, UTF8.charset) } diff -r b439582efc8a -r 43323d886ea3 src/Pure/General/scan.scala --- a/src/Pure/General/scan.scala Wed Jul 03 20:18:36 2024 +0200 +++ b/src/Pure/General/scan.scala Wed Jul 03 20:35:10 2024 +0200 @@ -436,7 +436,7 @@ reader.isInstanceOf[Byte_Reader] def reader_decode_utf8(is_utf8: Boolean, s: String): String = - if (is_utf8) UTF8.decode_permissive(s) else s + if (is_utf8) UTF8.decode_permissive(Bytes.raw(s)) else s def reader_decode_utf8(reader: Reader[Char], s: String): String = reader_decode_utf8(reader_is_utf8(reader), s) diff -r b439582efc8a -r 43323d886ea3 src/Pure/General/utf8.scala --- a/src/Pure/General/utf8.scala Wed Jul 03 20:18:36 2024 +0200 +++ b/src/Pure/General/utf8.scala Wed Jul 03 20:35:10 2024 +0200 @@ -23,7 +23,7 @@ // see also https://en.wikipedia.org/wiki/UTF-8#Description // overlong encodings enable byte-stuffing of low-ASCII - def decode_permissive_bytes(bytes: Bytes.Vec): String = { + def decode_permissive(bytes: Bytes): String = { val size = bytes.size val buf = new java.lang.StringBuilder((size min Space.GiB(1).bytes).toInt) var code = -1 @@ -60,19 +60,4 @@ flush() buf.toString } - - def decode_permissive(text: String): String = { - val relevant = { - var i = 0 - val n = text.length - var found = false - while (i < n && !found) { - if (text(i) >= 128) { found = true } - i += 1 - } - found - } - if (relevant) decode_permissive_bytes(new Bytes.Vec_String(text)) - else text - } } diff -r b439582efc8a -r 43323d886ea3 src/Tools/jEdit/jedit_main/scala_console.scala --- a/src/Tools/jEdit/jedit_main/scala_console.scala Wed Jul 03 20:18:36 2024 +0200 +++ b/src/Tools/jEdit/jedit_main/scala_console.scala Wed Jul 03 20:35:10 2024 +0200 @@ -50,7 +50,7 @@ override def flush(): Unit = { val s = buf.synchronized { val s = buf.toString; buf.clear(); s } - val str = UTF8.decode_permissive(s) + val str = UTF8.decode_permissive(Bytes.raw(s)) GUI_Thread.later { if (global_out == null) java.lang.System.out.print(str) else global_out.writeAttrs(null, str)