--- a/src/Pure/General/bytes.scala Tue Jun 11 14:18:19 2024 +0200
+++ b/src/Pure/General/bytes.scala Tue Jun 11 15:49:39 2024 +0200
@@ -112,12 +112,27 @@
using(new FileOutputStream(file, true))(bytes.write_stream(_))
def append(path: Path, bytes: Bytes): Unit = append(path.file, bytes)
+
+
+ /* vector of unsigned integers */
+
+ trait Vec {
+ def size: Long
+ def apply(i: Long): Int
+ }
+
+ class Vec_String(string: String) extends Vec {
+ override def size: Long = string.length.toLong
+ override def apply(i: Long): Int =
+ if (0 <= i && i < size) string(i.toInt).toInt
+ else throw new IndexOutOfBoundsException
+ }
}
final class Bytes private(
protected val bytes: Array[Byte],
protected val offset: Int,
- val length: Int) extends CharSequence {
+ val length: Int) extends Bytes.Vec with CharSequence {
/* equality */
override def equals(that: Any): Boolean = {
@@ -195,11 +210,18 @@
}
+ /* Vec operations */
+
+ def size: Long = length.toLong
+
+ def apply(i: Long): Int =
+ if (0 <= i && i < size) bytes((offset + i).toInt).asInstanceOf[Int] & 0xFF
+ else throw new IndexOutOfBoundsException
+
+
/* CharSequence operations */
- def charAt(i: Int): Char =
- if (0 <= i && i < length) (bytes(offset + i).asInstanceOf[Int] & 0xFF).asInstanceOf[Char]
- else throw new IndexOutOfBoundsException
+ def charAt(i: Int): Char = apply(i).asInstanceOf[Char]
def subSequence(i: Int, j: Int): Bytes = {
if (0 <= i && i <= j && j <= length) new Bytes(bytes, offset + i, j - i)
--- a/src/Pure/General/utf8.scala Tue Jun 11 14:18:19 2024 +0200
+++ b/src/Pure/General/utf8.scala Tue Jun 11 15:49:39 2024 +0200
@@ -23,9 +23,9 @@
// see also https://en.wikipedia.org/wiki/UTF-8#Description
// overlong encodings enable byte-stuffing of low-ASCII
- def decode_permissive_bytes(bytes: CharSequence): String = {
- val len = bytes.length
- val buf = new java.lang.StringBuilder(len)
+ def decode_permissive_bytes(bytes: Bytes.Vec): String = {
+ val size = bytes.size
+ val buf = new java.lang.StringBuilder((size min Space.GiB(1).bytes).toInt)
var code = -1
var rest = 0
def flush(): Unit = {
@@ -50,18 +50,18 @@
rest -= 1
}
}
- for (i <- 0 until len) {
- val c = bytes.charAt(i)
- if (c < 128) { flush(); buf.append(c) }
- else if ((c & 0xC0) == 0x80) push(c & 0x3F)
- else if ((c & 0xE0) == 0xC0) init(c & 0x1F, 1)
- else if ((c & 0xF0) == 0xE0) init(c & 0x0F, 2)
- else if ((c & 0xF8) == 0xF0) init(c & 0x07, 3)
+ for (i <- 0L until size) {
+ val b = bytes(i)
+ if (b < 128) { flush(); buf.append(b.toChar) }
+ else if ((b & 0xC0) == 0x80) push(b & 0x3F)
+ else if ((b & 0xE0) == 0xC0) init(b & 0x1F, 1)
+ else if ((b & 0xF0) == 0xE0) init(b & 0x0F, 2)
+ else if ((b & 0xF8) == 0xF0) init(b & 0x07, 3)
}
flush()
buf.toString
}
def decode_permissive(text: String): String =
- decode_permissive_bytes(text)
+ decode_permissive_bytes(new Bytes.Vec_String(text))
}