--- a/src/Pure/General/bytes.scala Sat Jun 15 22:43:01 2024 +0200
+++ b/src/Pure/General/bytes.scala Sat Jun 15 23:24:24 2024 +0200
@@ -410,10 +410,18 @@
def text: String =
if (is_empty) ""
- else if (byte_iterator.forall(_ >= 0)) {
- new String(make_array, UTF8.charset)
+ else {
+ var i = 0L
+ var utf8 = false
+ while (i < size && !utf8) {
+ if (byte_unchecked(i) < 0) { utf8 = true }
+ i += 1
+ }
+ utf8
+
+ if (utf8) UTF8.decode_permissive_bytes(this)
+ else new String(make_array, UTF8.charset)
}
- else UTF8.decode_permissive_bytes(this)
def wellformed_text: Option[String] =
try {
--- a/src/Pure/General/utf8.scala Sat Jun 15 22:43:01 2024 +0200
+++ b/src/Pure/General/utf8.scala Sat Jun 15 23:24:24 2024 +0200
@@ -17,8 +17,16 @@
def bytes(s: String): Array[Byte] = s.getBytes(charset)
- def relevant(c: Char): Boolean = c >= 128
- def relevant(s: CharSequence): Boolean = (0 until s.length).exists(i => relevant(s.charAt(i)))
+ def relevant(s: CharSequence): Boolean = {
+ var i = 0
+ val n = s.length
+ var utf8 = false
+ while (i < n && !utf8) {
+ if (s.charAt(i) >= 128) { utf8 = true }
+ i += 1
+ }
+ utf8
+ }
/* permissive UTF-8 decoding */