clarified signature: more direct Bytes.raw and subsequent UTF-8 default decoding;
authorwenzelm
Wed, 03 Jul 2024 20:35:10 +0200
changeset 80492 43323d886ea3
parent 80491 b439582efc8a
child 80493 d334f158442b
clarified signature: more direct Bytes.raw and subsequent UTF-8 default decoding;
src/Pure/General/bytes.scala
src/Pure/General/scan.scala
src/Pure/General/utf8.scala
src/Tools/jEdit/jedit_main/scala_console.scala
--- a/src/Pure/General/bytes.scala	Wed Jul 03 20:18:36 2024 +0200
+++ b/src/Pure/General/bytes.scala	Wed Jul 03 20:35:10 2024 +0200
@@ -10,6 +10,7 @@
 import java.io.{ByteArrayInputStream, ByteArrayOutputStream, FileInputStream, FileOutputStream,
   InputStream, OutputStream, File => JFile}
 import java.nio.ByteBuffer
+import java.nio.charset.StandardCharsets.ISO_8859_1
 import java.nio.channels.FileChannel
 import java.nio.file.StandardOpenOption
 import java.util.Arrays
@@ -45,6 +46,10 @@
 
   val empty: Bytes = reuse_array(new Array(0))
 
+  def raw(s: String): Bytes =
+    if (s.isEmpty) empty
+    else Builder.use(hint = s.length) { builder => builder += s.getBytes(ISO_8859_1) }
+
   def apply(s: String): Bytes =
     if (s.isEmpty) empty
     else Builder.use(hint = s.length) { builder => builder += s }
@@ -131,21 +136,6 @@
   def append(path: Path, bytes: Bytes): Unit = append(path.file, bytes)
 
 
-  /* vector of short unsigned integers */
-
-  trait Vec {
-    def size: Long
-    def apply(i: Long): Char
-  }
-
-  class Vec_String(string: String) extends Vec {
-    override def size: Long = string.length.toLong
-    override def apply(i: Long): Char =
-      if (0 <= i && i < size) string(i.toInt)
-      else throw new IndexOutOfBoundsException
-  }
-
-
   /* incremental builder: unsynchronized! */
 
   object Builder {
@@ -294,7 +284,7 @@
   protected val chunk0: Array[Byte],
   protected val offset: Long,
   val size: Long
-) extends Bytes.Vec with YXML.Source {
+) extends YXML.Source {
   bytes =>
 
   assert(
@@ -499,7 +489,7 @@
       }
       utf8
 
-      if (utf8) UTF8.decode_permissive_bytes(bytes)
+      if (utf8) UTF8.decode_permissive(bytes)
       else new String(make_array, UTF8.charset)
     }
 
--- a/src/Pure/General/scan.scala	Wed Jul 03 20:18:36 2024 +0200
+++ b/src/Pure/General/scan.scala	Wed Jul 03 20:35:10 2024 +0200
@@ -436,7 +436,7 @@
     reader.isInstanceOf[Byte_Reader]
 
   def reader_decode_utf8(is_utf8: Boolean, s: String): String =
-    if (is_utf8) UTF8.decode_permissive(s) else s
+    if (is_utf8) UTF8.decode_permissive(Bytes.raw(s)) else s
 
   def reader_decode_utf8(reader: Reader[Char], s: String): String =
     reader_decode_utf8(reader_is_utf8(reader), s)
--- a/src/Pure/General/utf8.scala	Wed Jul 03 20:18:36 2024 +0200
+++ b/src/Pure/General/utf8.scala	Wed Jul 03 20:35:10 2024 +0200
@@ -23,7 +23,7 @@
   // see also https://en.wikipedia.org/wiki/UTF-8#Description
   // overlong encodings enable byte-stuffing of low-ASCII
 
-  def decode_permissive_bytes(bytes: Bytes.Vec): String = {
+  def decode_permissive(bytes: Bytes): String = {
     val size = bytes.size
     val buf = new java.lang.StringBuilder((size min Space.GiB(1).bytes).toInt)
     var code = -1
@@ -60,19 +60,4 @@
     flush()
     buf.toString
   }
-
-  def decode_permissive(text: String): String = {
-    val relevant = {
-      var i = 0
-      val n = text.length
-      var found = false
-      while (i < n && !found) {
-        if (text(i) >= 128) { found = true }
-        i += 1
-      }
-      found
-    }
-    if (relevant) decode_permissive_bytes(new Bytes.Vec_String(text))
-    else text
-  }
 }
--- a/src/Tools/jEdit/jedit_main/scala_console.scala	Wed Jul 03 20:18:36 2024 +0200
+++ b/src/Tools/jEdit/jedit_main/scala_console.scala	Wed Jul 03 20:35:10 2024 +0200
@@ -50,7 +50,7 @@
 
     override def flush(): Unit = {
       val s = buf.synchronized { val s = buf.toString; buf.clear(); s }
-      val str = UTF8.decode_permissive(s)
+      val str = UTF8.decode_permissive(Bytes.raw(s))
       GUI_Thread.later {
         if (global_out == null) java.lang.System.out.print(str)
         else global_out.writeAttrs(null, str)