clarified signature: more accurate types;
authorwenzelm
Tue, 11 Jun 2024 15:49:39 +0200
changeset 80351 dbbe26afc319
parent 80350 96843eb96493
child 80352 7a6cba7c77c9
clarified signature: more accurate types;
src/Pure/General/bytes.scala
src/Pure/General/utf8.scala
--- a/src/Pure/General/bytes.scala	Tue Jun 11 14:18:19 2024 +0200
+++ b/src/Pure/General/bytes.scala	Tue Jun 11 15:49:39 2024 +0200
@@ -112,12 +112,27 @@
     using(new FileOutputStream(file, true))(bytes.write_stream(_))
 
   def append(path: Path, bytes: Bytes): Unit = append(path.file, bytes)
+
+
+  /* vector of unsigned integers */
+
+  trait Vec {
+    def size: Long
+    def apply(i: Long): Int
+  }
+
+  class Vec_String(string: String) extends Vec {
+    override def size: Long = string.length.toLong
+    override def apply(i: Long): Int =
+      if (0 <= i && i < size) string(i.toInt).toInt
+      else throw new IndexOutOfBoundsException
+  }
 }
 
 final class Bytes private(
   protected val bytes: Array[Byte],
   protected val offset: Int,
-  val length: Int) extends CharSequence {
+  val length: Int) extends Bytes.Vec with CharSequence {
   /* equality */
 
   override def equals(that: Any): Boolean = {
@@ -195,11 +210,18 @@
     }
 
 
+  /* Vec operations */
+
+  def size: Long = length.toLong
+
+  def apply(i: Long): Int =
+    if (0 <= i && i < size) bytes((offset + i).toInt).asInstanceOf[Int] & 0xFF
+    else throw new IndexOutOfBoundsException
+
+
   /* CharSequence operations */
 
-  def charAt(i: Int): Char =
-    if (0 <= i && i < length) (bytes(offset + i).asInstanceOf[Int] & 0xFF).asInstanceOf[Char]
-    else throw new IndexOutOfBoundsException
+  def charAt(i: Int): Char = apply(i).asInstanceOf[Char]
 
   def subSequence(i: Int, j: Int): Bytes = {
     if (0 <= i && i <= j && j <= length) new Bytes(bytes, offset + i, j - i)
--- a/src/Pure/General/utf8.scala	Tue Jun 11 14:18:19 2024 +0200
+++ b/src/Pure/General/utf8.scala	Tue Jun 11 15:49:39 2024 +0200
@@ -23,9 +23,9 @@
   // see also https://en.wikipedia.org/wiki/UTF-8#Description
   // overlong encodings enable byte-stuffing of low-ASCII
 
-  def decode_permissive_bytes(bytes: CharSequence): String = {
-    val len = bytes.length
-    val buf = new java.lang.StringBuilder(len)
+  def decode_permissive_bytes(bytes: Bytes.Vec): String = {
+    val size = bytes.size
+    val buf = new java.lang.StringBuilder((size min Space.GiB(1).bytes).toInt)
     var code = -1
     var rest = 0
     def flush(): Unit = {
@@ -50,18 +50,18 @@
         rest -= 1
       }
     }
-    for (i <- 0 until len) {
-      val c = bytes.charAt(i)
-      if (c < 128) { flush(); buf.append(c) }
-      else if ((c & 0xC0) == 0x80) push(c & 0x3F)
-      else if ((c & 0xE0) == 0xC0) init(c & 0x1F, 1)
-      else if ((c & 0xF0) == 0xE0) init(c & 0x0F, 2)
-      else if ((c & 0xF8) == 0xF0) init(c & 0x07, 3)
+    for (i <- 0L until size) {
+      val b = bytes(i)
+      if (b < 128) { flush(); buf.append(b.toChar) }
+      else if ((b & 0xC0) == 0x80) push(b & 0x3F)
+      else if ((b & 0xE0) == 0xC0) init(b & 0x1F, 1)
+      else if ((b & 0xF0) == 0xE0) init(b & 0x0F, 2)
+      else if ((b & 0xF8) == 0xF0) init(b & 0x07, 3)
     }
     flush()
     buf.toString
   }
 
   def decode_permissive(text: String): String =
-    decode_permissive_bytes(text)
+    decode_permissive_bytes(new Bytes.Vec_String(text))
 }