merged
authorwenzelm
Sun, 16 Jun 2024 21:54:09 +0200
changeset 80396 94875d8cc8bd
parent 80349 2503ff5d29ce (current diff)
parent 80395 46135b44b1a3 (diff)
child 80397 7e0cbc6600b9
merged
src/Pure/Build/build_manager.scala
src/Pure/System/web_app.scala
--- a/src/HOL/Tools/Mirabelle/mirabelle.scala	Wed Jun 12 10:47:53 2024 +0200
+++ b/src/HOL/Tools/Mirabelle/mirabelle.scala	Sun Jun 16 21:54:09 2024 +0200
@@ -72,7 +72,7 @@
                   progress.echo(
                     "Mirabelle export " + quote(args.compound_name) + " (in " + session_name + ")",
                     verbose = true)
-                  val yxml = YXML.parse_body(UTF8.decode_permissive(msg.chunk), cache = build_results0.cache)
+                  val yxml = YXML.parse_body(msg.chunk.text, cache = build_results0.cache)
                   val lines = Pretty.string_of(yxml).trim()
                   val prefix =
                     Export.explode_name(args.name) match {
--- a/src/Pure/Build/build.scala	Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/Build/build.scala	Sun Jun 16 21:54:09 2024 +0200
@@ -715,7 +715,7 @@
     unicode_symbols: Boolean = false
   ): Option[Document.Snapshot] = {
     def decode_bytes(bytes: Bytes): String =
-      Symbol.output(unicode_symbols, UTF8.decode_permissive(bytes))
+      Symbol.output(unicode_symbols, bytes.text)
 
     def read(name: String): Export.Entry = theory_context(name, permissive = true)
 
--- a/src/Pure/Build/build_manager.scala	Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/Build/build_manager.scala	Sun Jun 16 21:54:09 2024 +0200
@@ -1179,7 +1179,7 @@
       val head =
         List(
           HTML.title("Isabelle Build Manager"),
-          Web_App.More_HTML.icon("data:image/x-icon;base64," + logo.encode_base64),
+          Web_App.More_HTML.icon("data:image/x-icon;base64," + logo.encode_base64.text),
           HTML.style_file("https://hawkz.github.io/gdcss/gd.css"),
           HTML.style("html { background-color: white; }"))
     }
--- a/src/Pure/Build/export.scala	Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/Build/export.scala	Sun Jun 16 21:54:09 2024 +0200
@@ -194,7 +194,7 @@
 
     def text: String = bytes.text
 
-    def yxml: XML.Body = YXML.parse_body(UTF8.decode_permissive(bytes), cache = cache)
+    def yxml: XML.Body = YXML.parse_body(bytes.text, cache = cache)
   }
 
   def make_regex(pattern: String): Regex = {
--- a/src/Pure/General/base64.scala	Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/General/base64.scala	Sun Jun 16 21:54:09 2024 +0200
@@ -1,26 +1,34 @@
 /*  Title:      Pure/General/base64.scala
     Author:     Makarius
 
-Support for Base64 data encoding.
+Support for Base64 data representation.
 */
 
 package isabelle
 
 
+import java.io.{InputStream, OutputStream}
+
+
 object Base64 {
+  /* main operations */
+
   def encode(a: Array[Byte]): String = java.util.Base64.getEncoder.encodeToString(a)
   def decode(s: String): Array[Byte] = java.util.Base64.getDecoder.decode(s)
 
+  def encode_stream(s: OutputStream): OutputStream = java.util.Base64.getEncoder.wrap(s)
+  def decode_stream(s: InputStream): InputStream = java.util.Base64.getDecoder.wrap(s)
+
 
   /* Scala functions in ML */
 
   object Decode extends Scala.Fun_Bytes("Base64.decode") {
     val here = Scala_Project.here
-    def apply(arg: Bytes): Bytes = Bytes.decode_base64(arg.text)
+    def apply(bytes: Bytes): Bytes = bytes.decode_base64
   }
 
   object Encode extends Scala.Fun_Bytes("Base64.encode") {
     val here = Scala_Project.here
-    def apply(arg: Bytes): Bytes = Bytes(arg.encode_base64)
+    def apply(bytes: Bytes): Bytes = bytes.encode_base64
   }
 }
--- a/src/Pure/General/bytes.scala	Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/General/bytes.scala	Sun Jun 16 21:54:09 2024 +0200
@@ -16,80 +16,92 @@
 import org.tukaani.xz
 import com.github.luben.zstd
 
+import scala.collection.mutable.ArrayBuffer
+
 
 object Bytes {
-  val empty: Bytes = new Bytes(Array[Byte](), 0, 0)
+  /* internal sizes */
+
+  private val array_size: Long = Int.MaxValue - 8  // see java.io.InputStream.MAX_BUFFER_SIZE
+  private val block_size: Int = 16384  // see java.io.InputStream.DEFAULT_BUFFER_SIZE
+  private val chunk_size: Long = Space.MiB(100).bytes
+
+  private def make_size(chunks: Array[Array[Byte]], buffer: Array[Byte]): Long =
+    chunks.foldLeft(buffer.length.toLong)((n, chunk) => n + chunk.length)
 
-  def apply(s: CharSequence): Bytes = {
-    val str = s.toString
-    if (str.isEmpty) empty
-    else {
-      val b = UTF8.bytes(str)
-      new Bytes(b, 0, b.length)
-    }
+  class Too_Large(size: Long) extends IndexOutOfBoundsException {
+    override def getMessage: String =
+      "Bytes too large for particular operation: " +
+        Space.bytes(size).print + " > " + Space.bytes(array_size).print
   }
 
+
+  /* main constructors */
+
+  private def reuse_array(bytes: Array[Byte]): Bytes =
+    if (bytes.length <= chunk_size) new Bytes(None, bytes, 0L, bytes.length.toLong)
+    else apply(bytes)
+
+  val empty: Bytes = reuse_array(new Array(0))
+
+  def apply(s: CharSequence): Bytes =
+    if (s.isEmpty) empty
+    else Builder.use(hint = s.length) { builder => builder += s }
+
   def apply(a: Array[Byte]): Bytes = apply(a, 0, a.length)
 
   def apply(a: Array[Byte], offset: Int, length: Int): Bytes =
-    if (length == 0) empty
-    else {
-      val b = new Array[Byte](length)
-      System.arraycopy(a, offset, b, 0, length)
-      new Bytes(b, 0, b.length)
-    }
+    Builder.use(hint = length) { builder => builder += (a, offset, length) }
 
   val newline: Bytes = apply("\n")
 
 
-  /* base64 */
-
-  def decode_base64(s: String): Bytes = {
-    val a = Base64.decode(s)
-    new Bytes(a, 0, a.length)
-  }
-
-
   /* read */
 
-  def read_stream(stream: InputStream, limit: Int = Int.MaxValue, hint: Int = 1024): Bytes =
+  def read_stream(stream: InputStream, limit: Long = -1L, hint: Long = 0L): Bytes = {
     if (limit == 0) empty
     else {
-      val out_size = (if (limit == Int.MaxValue) hint else limit) max 1024
-      val out = new ByteArrayOutputStream(out_size)
-      val buf = new Array[Byte](8192)
-      var m = 0
-
-      while ({
-        m = stream.read(buf, 0, buf.length min (limit - out.size))
-        if (m != -1) out.write(buf, 0, m)
-        m != -1 && limit > out.size
-      }) ()
-
-      new Bytes(out.toByteArray, 0, out.size)
+      Builder.use(hint = if (limit > 0) limit else hint) { builder =>
+        val buf = new Array[Byte](block_size)
+        var m = 0
+        var n = 0L
+        while ({
+          val l = if (limit > 0) ((limit - n) min buf.length).toInt else buf.length
+          m = stream.read(buf, 0, l)
+          if (m != -1) {
+            builder += (buf, 0, m)
+            n += m
+          }
+          m != -1 && (limit < 0 || limit > n)
+        }) ()
+      }
     }
+  }
 
   def read_url(name: String): Bytes = using(Url(name).open_stream())(read_stream(_))
 
-  def read_file(path: Path, offset: Long = 0L, limit: Long = Long.MaxValue): Bytes = {
+  def read_file(path: Path, offset: Long = 0L, limit: Long = -1L): Bytes = {
     val length = File.size(path)
     val start = offset.max(0L)
-    val len = (length - start).max(0L).min(limit)
-    if (len > Int.MaxValue) error("Cannot read large file slice: " + Space.bytes(len).print)
-    else if (len == 0L) empty
+    val stop = (length - start).max(0L).min(if (limit < 0) Long.MaxValue else limit)
+    if (stop == 0L) empty
     else {
-      using(FileChannel.open(path.java_path, StandardOpenOption.READ)) { java_path =>
-        java_path.position(start)
-        val n = len.toInt
-        val buf = ByteBuffer.allocate(n)
-        var i = 0
-        var m = 0
-        while ({
-          m = java_path.read(buf)
-          if (m != -1) i += m
-          m != -1 && n > i
-        }) ()
-        new Bytes(buf.array, 0, i)
+      Builder.use(hint = stop) { builder =>
+        using(FileChannel.open(path.java_path, StandardOpenOption.READ)) { channel =>
+          channel.position(start)
+          val buf = ByteBuffer.allocate(block_size)
+          var m = 0
+          var n = 0L
+          while ({
+            m = channel.read(buf)
+            if (m != -1) {
+              builder += (buf.array(), 0, m)
+              buf.clear()
+              n += m
+            }
+            m != -1 && stop > n
+          }) ()
+        }
       }
     }
   }
@@ -112,147 +124,415 @@
     using(new FileOutputStream(file, true))(bytes.write_stream(_))
 
   def append(path: Path, bytes: Bytes): Unit = append(path.file, bytes)
+
+
+  /* vector of short unsigned integers */
+
+  trait Vec {
+    def size: Long
+    def apply(i: Long): Char
+  }
+
+  class Vec_String(string: String) extends Vec {
+    override def size: Long = string.length.toLong
+    override def apply(i: Long): Char =
+      if (0 <= i && i < size) string(i.toInt)
+      else throw new IndexOutOfBoundsException
+  }
+
+
+  /* incremental builder: unsynchronized! */
+
+  object Builder {
+    def use(hint: Long = 0L)(body: Builder => Unit): Bytes = {
+      val builder = new Builder(hint)
+      body(builder)
+      builder.done()
+    }
+
+    def use_stream(hint: Long = 0L)(body: OutputStream => Unit): Bytes = {
+      val stream = new Stream(hint = hint)
+      body(stream)
+      stream.builder.done()
+    }
+
+    private class Stream(hint: Long = 0L) extends OutputStream {
+      val builder = new Builder(hint)
+
+      override def write(b: Int): Unit =
+        { builder += b.toByte }
+
+      override def write(array: Array[Byte], offset: Int, length: Int): Unit =
+        { builder += (array, offset, length) }
+    }
+  }
+
+  final class Builder private[Bytes](hint: Long) {
+    private var size = 0L
+    private var chunks =
+      new ArrayBuffer[Array[Byte]](if (hint <= 0) 16 else (hint / chunk_size).toInt)
+    private var buffer =
+      new ByteArrayOutputStream(if (hint <= 0) 1024 else (hint min chunk_size min array_size).toInt)
+
+    private def buffer_free(): Int = chunk_size.toInt - buffer.size()
+    private def buffer_check(): Unit =
+      if (buffer_free() == 0) {
+        chunks += buffer.toByteArray
+        buffer = new ByteArrayOutputStream(chunk_size.toInt)
+      }
+
+    def += (b: Byte): Unit = {
+      size += 1
+      buffer.write(b)
+      buffer_check()
+    }
+
+    def += (s: CharSequence): Unit = {
+      val n = s.length
+      if (n > 0) {
+        if (UTF8.relevant(s)) { this += UTF8.bytes(s.toString) }
+        else {
+          var i = 0
+          while (i < n) {
+            buffer.write(s.charAt(i).toByte)
+            size += 1
+            i += 1
+            buffer_check()
+          }
+        }
+      }
+    }
+
+    def += (array: Array[Byte], offset: Int, length: Int): Unit = {
+      if (offset < 0 || length < 0 || offset.toLong + length.toLong > array.length) {
+        throw new IndexOutOfBoundsException
+      }
+      else if (length > 0) {
+        var i = offset
+        var n = length
+        while (n > 0) {
+          val m = buffer_free()
+          if (m > 0) {
+            val l = m min n
+            buffer.write(array, i, l)
+            size += l
+            i += l
+            n -= l
+          }
+          buffer_check()
+        }
+      }
+    }
+
+    def += (array: Array[Byte]): Unit = { this += (array, 0, array.length) }
+
+    def += (a: Subarray): Unit = { this += (a.array, a.offset, a.length) }
+
+    private def done(): Bytes = {
+      val cs = chunks.toArray
+      val b = buffer.toByteArray
+      chunks = null
+      buffer = null
+      new Bytes(if (cs.isEmpty) None else Some(cs), b, 0L, size)
+    }
+  }
+
+
+  /* subarray */
+
+  object Subarray {
+    val empty: Subarray = new Subarray(new Array[Byte](0), 0, 0)
+
+    def apply(array: Array[Byte], offset: Int, length: Int): Subarray = {
+      val n = array.length
+      if (0 <= offset && offset < n && 0 <= length && offset + length <= n) {
+        if (length == 0) empty
+        else new Subarray(array, offset, length)
+      }
+      else throw new IndexOutOfBoundsException
+    }
+  }
+
+  final class Subarray private(
+    val array: Array[Byte],
+    val offset: Int,
+    val length: Int
+  ) {
+    override def toString: String = "Bytes.Subarray(" + Space.bytes(length).print + ")"
+
+    def byte_iterator: Iterator[Byte] =
+      if (length == 0) Iterator.empty
+      else { for (i <- (offset until (offset + length)).iterator) yield array(i) }
+  }
 }
 
 final class Bytes private(
-  protected val bytes: Array[Byte],
-  protected val offset: Int,
-  val length: Int) extends CharSequence {
-  /* equality */
+  protected val chunks: Option[Array[Array[Byte]]],
+  protected val chunk0: Array[Byte],
+  protected val offset: Long,
+  val size: Long
+) extends Bytes.Vec {
+  assert(
+    (chunks.isEmpty ||
+      chunks.get.nonEmpty &&
+      chunks.get.forall(chunk => chunk.length == Bytes.chunk_size)) &&
+    chunk0.length < Bytes.chunk_size)
+
+  def small_size: Int =
+    if (size > Bytes.array_size) throw new Bytes.Too_Large(size)
+    else size.toInt
+
+  def is_empty: Boolean = size == 0
+
+  def proper: Option[Bytes] = if (is_empty) None else Some(this)
+
+  def is_sliced: Boolean =
+    offset != 0L || {
+      chunks match {
+        case None => size != chunk0.length
+        case Some(cs) => size != Bytes.make_size(cs, chunk0)
+      }
+    }
+
+  override def toString: String =
+    if (is_empty) "Bytes.empty"
+    else "Bytes(" + Space.bytes(size).print + if_proper(is_sliced, ", sliced") + ")"
+
+
+  /* elements: signed Byte or unsigned Char */
+
+  protected def byte_unchecked(i: Long): Byte = {
+    val a = offset + i
+    chunks match {
+      case None => chunk0(a.toInt)
+      case Some(cs) =>
+        val b = a % Bytes.chunk_size
+        val c = a / Bytes.chunk_size
+        if (c < cs.length) cs(c.toInt)(b.toInt) else chunk0(b.toInt)
+    }
+  }
+
+  def byte(i: Long): Byte =
+    if (0 <= i && i < size) byte_unchecked(i)
+    else throw new IndexOutOfBoundsException
+
+  def apply(i: Long): Char = (byte(i).toInt & 0xff).toChar
+
+  protected def subarray_iterator: Iterator[Bytes.Subarray] =
+    if (is_empty) Iterator.empty
+    else if (chunks.isEmpty) Iterator(Bytes.Subarray(chunk0, offset.toInt, size.toInt))
+    else {
+      val end_offset = offset + size
+      for ((array, index) <- (chunks.get.iterator ++ Iterator(chunk0)).zipWithIndex) yield {
+        val array_start = Bytes.chunk_size * index
+        val array_stop = array_start + array.length
+        if (offset < array_stop && array_start < end_offset) {
+          val i = (array_start max offset) - array_start
+          val j = (array_stop min end_offset) - array_start
+          Bytes.Subarray(array, i.toInt, (j - i).toInt)
+        }
+        else Bytes.Subarray.empty
+      }
+    }
+
+  def byte_iterator: Iterator[Byte] =
+    for {
+      a <- subarray_iterator
+      b <- a.byte_iterator
+    } yield b
+
+
+  /* slice */
+
+  def slice(i: Long, j: Long): Bytes =
+    if (0 <= i && i <= j && j <= size) {
+      if (i == j) Bytes.empty
+      else new Bytes(chunks, chunk0, offset + i, j - i)
+    }
+    else throw new IndexOutOfBoundsException
+
+  def unslice: Bytes =
+    if (is_sliced) {
+      Bytes.Builder.use(hint = size) { builder =>
+        for (a <- subarray_iterator) { builder += a }
+      }
+    }
+    else this
+
+  def trim_line: Bytes =
+    if (size >= 2 && byte_unchecked(size - 2) == 13 && byte_unchecked(size - 1) == 10) {
+      slice(0, size - 2)
+    }
+    else if (size >= 1 && (byte_unchecked(size - 1) == 13 || byte_unchecked(size - 1) == 10)) {
+      slice(0, size - 1)
+    }
+    else this
+
+
+  /* hash and equality */
+
+  lazy val sha1_digest: SHA1.Digest =
+    if (is_empty) SHA1.digest_empty
+    else {
+      SHA1.make_digest { sha =>
+        for (a <- subarray_iterator if a.length > 0) {
+          sha.update(a.array, a.offset, a.length)
+        }
+      }
+    }
+
+  override def hashCode(): Int = sha1_digest.hashCode()
 
   override def equals(that: Any): Boolean = {
     that match {
       case other: Bytes =>
-        this.eq(other) ||
-        Arrays.equals(bytes, offset, offset + length,
-          other.bytes, other.offset, other.offset + other.length)
+        if (this.eq(other)) true
+        else if (size != other.size) false
+        else {
+          if (chunks.isEmpty && other.chunks.isEmpty) {
+            Arrays.equals(chunk0, offset.toInt, (offset + size).toInt,
+              other.chunk0, other.offset.toInt, (other.offset + other.size).toInt)
+          }
+          else if (!is_sliced && !other.is_sliced) {
+            (subarray_iterator zip other.subarray_iterator)
+              .forall((a, b) => Arrays.equals(a.array, b.array))
+          }
+          else sha1_digest == other.sha1_digest
+        }
       case _ => false
     }
   }
 
-  private lazy val hash: Int = {
-    var h = 0
-    for (i <- offset until offset + length) {
-      val b = bytes(i).asInstanceOf[Int] & 0xFF
-      h = 31 * h + b
-    }
-    h
-  }
-
-  override def hashCode(): Int = hash
-
 
   /* content */
 
-  lazy val sha1_digest: SHA1.Digest = SHA1.digest(bytes)
-
-  def is_empty: Boolean = length == 0
+  def + (other: Bytes): Bytes =
+    if (other.is_empty) this
+    else if (is_empty) other
+    else {
+      Bytes.Builder.use(hint = size + other.size) { builder =>
+        for (a <- subarray_iterator ++ other.subarray_iterator) {
+          builder += a
+        }
+      }
+    }
 
-  def iterator: Iterator[Byte] =
-    for (i <- (offset until (offset + length)).iterator)
-      yield bytes(i)
-
-  def array: Array[Byte] = {
-    val a = new Array[Byte](length)
-    System.arraycopy(bytes, offset, a, 0, length)
-    a
+  def make_array: Array[Byte] = {
+    val buf = new ByteArrayOutputStream(small_size)
+    for (a <- subarray_iterator) { buf.write(a.array, a.offset, a.length) }
+    buf.toByteArray
   }
 
-  def text: String = UTF8.decode_permissive(this)
+  def text: String =
+    if (is_empty) ""
+    else {
+      var i = 0L
+      var utf8 = false
+      while (i < size && !utf8) {
+        if (byte_unchecked(i) < 0) { utf8 = true }
+        i += 1
+      }
+      utf8
 
-  def wellformed_text: Option[String] = {
-    val s = text
-    if (this == Bytes(s)) Some(s) else None
-  }
+      if (utf8) UTF8.decode_permissive_bytes(this)
+      else new String(make_array, UTF8.charset)
+    }
 
-  def encode_base64: String = {
-    val b =
-      if (offset == 0 && length == bytes.length) bytes
-      else Bytes(bytes, offset, length).bytes
-    Base64.encode(b)
-  }
+  def wellformed_text: Option[String] =
+    try {
+      val s = text
+      if (this == Bytes(s)) Some(s) else None
+    }
+    catch { case ERROR(_) => None }
+
+
+  /* Base64 data representation */
+
+  def encode_base64: Bytes =
+    Bytes.Builder.use_stream(hint = (size * 1.5).round) { out =>
+      using(Base64.encode_stream(out))(write_stream(_))
+    }
+
+  def decode_base64: Bytes =
+    using(Base64.decode_stream(stream()))(Bytes.read_stream(_, hint = (size / 1.2).round))
 
   def maybe_encode_base64: (Boolean, String) =
     wellformed_text match {
       case Some(s) => (false, s)
-      case None => (true, encode_base64)
-    }
-
-  override def toString: String =
-    if (is_empty) "Bytes.empty" else "Bytes(" + Space.bytes(length).print + ")"
-
-  def proper: Option[Bytes] = if (is_empty) None else Some(this)
-  def proper_text: Option[String] = if (is_empty) None else Some(text)
-
-  def +(other: Bytes): Bytes =
-    if (other.is_empty) this
-    else if (is_empty) other
-    else {
-      val new_bytes = new Array[Byte](length + other.length)
-      System.arraycopy(bytes, offset, new_bytes, 0, length)
-      System.arraycopy(other.bytes, other.offset, new_bytes, length, other.length)
-      new Bytes(new_bytes, 0, new_bytes.length)
+      case None => (true, encode_base64.text)
     }
 
 
-  /* CharSequence operations */
-
-  def charAt(i: Int): Char =
-    if (0 <= i && i < length) (bytes(offset + i).asInstanceOf[Int] & 0xFF).asInstanceOf[Char]
-    else throw new IndexOutOfBoundsException
-
-  def subSequence(i: Int, j: Int): Bytes = {
-    if (0 <= i && i <= j && j <= length) new Bytes(bytes, offset + i, j - i)
-    else throw new IndexOutOfBoundsException
-  }
-
-  def trim_line: Bytes =
-    if (length >= 2 && charAt(length - 2) == 13 && charAt(length - 1) == 10) {
-      subSequence(0, length - 2)
-    }
-    else if (length >= 1 && (charAt(length - 1) == 13 || charAt(length - 1) == 10)) {
-      subSequence(0, length - 1)
-    }
-    else this
-
-
   /* streams */
 
-  def stream(): ByteArrayInputStream = new ByteArrayInputStream(bytes, offset, length)
+  def stream(): InputStream =
+    if (chunks.isEmpty) new ByteArrayInputStream(chunk0, offset.toInt, size.toInt)
+    else {
+      new InputStream {
+        private var index = 0L
+
+        def read(): Int = {
+          if (index < size) {
+            val res = byte_unchecked(index).toInt & 0xff
+            index += 1
+            res
+          }
+          else -1
+        }
 
-  def write_stream(stream: OutputStream): Unit = stream.write(bytes, offset, length)
+        override def read(buffer: Array[Byte], start: Int, length: Int): Int = {
+          if (length < 16) super.read(buffer, start, length)
+          else {
+            val index0 = index
+            index = (index + length) min size
+            val n = (index - index0).toInt
+            if (n == 0) -1
+            else {
+              var i = start
+              for (a <- slice(index0, index).subarray_iterator) {
+                val l = a.length
+                if (l > 0) {
+                  System.arraycopy(a.array, a.offset, buffer, i, l)
+                  i += l
+                }
+              }
+              n
+            }
+          }
+        }
+      }
+    }
+
+  def write_stream(stream: OutputStream): Unit =
+    for (a <- subarray_iterator if a.length > 0) {
+      stream.write(a.array, a.offset, a.length)
+    }
 
 
   /* XZ / Zstd data compression */
 
   def detect_xz: Boolean =
-    length >= 6 &&
-      bytes(offset)     == 0xFD.toByte &&
-      bytes(offset + 1) == 0x37.toByte &&
-      bytes(offset + 2) == 0x7A.toByte &&
-      bytes(offset + 3) == 0x58.toByte &&
-      bytes(offset + 4) == 0x5A.toByte &&
-      bytes(offset + 5) == 0x00.toByte
+    size >= 6 &&
+      byte_unchecked(0) == 0xFD.toByte &&
+      byte_unchecked(1) == 0x37.toByte &&
+      byte_unchecked(2) == 0x7A.toByte &&
+      byte_unchecked(3) == 0x58.toByte &&
+      byte_unchecked(4) == 0x5A.toByte &&
+      byte_unchecked(5) == 0x00.toByte
 
   def detect_zstd: Boolean =
-    length >= 4 &&
-      bytes(offset)     == 0x28.toByte &&
-      bytes(offset + 1) == 0xB5.toByte &&
-      bytes(offset + 2) == 0x2F.toByte &&
-      bytes(offset + 3) == 0xFD.toByte
+    size >= 4 &&
+      byte_unchecked(0) == 0x28.toByte &&
+      byte_unchecked(1) == 0xB5.toByte &&
+      byte_unchecked(2) == 0x2F.toByte &&
+      byte_unchecked(3) == 0xFD.toByte
 
   def uncompress_xz(cache: Compress.Cache = Compress.Cache.none): Bytes =
-    using(new xz.XZInputStream(stream(), cache.for_xz))(Bytes.read_stream(_, hint = length))
+    using(new xz.XZInputStream(stream(), cache.for_xz))(Bytes.read_stream(_, hint = size))
 
   def uncompress_zstd(cache: Compress.Cache = Compress.Cache.none): Bytes = {
     Zstd.init()
-    val n = zstd.Zstd.decompressedSize(bytes, offset, length)
-    if (n > 0 && n < Int.MaxValue) {
-      Bytes(zstd.Zstd.decompress(array, n.toInt))
-    }
-    else {
-      using(new zstd.ZstdInputStream(stream(), cache.for_zstd))(Bytes.read_stream(_, hint = length))
-    }
+    using(new zstd.ZstdInputStream(stream(), cache.for_zstd))(Bytes.read_stream(_, hint = size))
   }
 
   def uncompress(cache: Compress.Cache = Compress.Cache.none): Bytes =
@@ -264,14 +544,15 @@
     options: Compress.Options = Compress.Options(),
     cache: Compress.Cache = Compress.Cache.none
   ): Bytes = {
-    options match {
-      case options_xz: Compress.Options_XZ =>
-        val result = new ByteArrayOutputStream(length)
-        using(new xz.XZOutputStream(result, options_xz.make, cache.for_xz))(write_stream)
-        new Bytes(result.toByteArray, 0, result.size)
-      case options_zstd: Compress.Options_Zstd =>
-        Zstd.init()
-        Bytes(zstd.Zstd.compress(if (offset == 0) bytes else array, options_zstd.level))
+    Bytes.Builder.use_stream(hint = size) { out =>
+      using(
+        options match {
+          case options_xz: Compress.Options_XZ =>
+            new xz.XZOutputStream(out, options_xz.make, cache.for_xz)
+          case options_zstd: Compress.Options_Zstd =>
+            new zstd.ZstdOutputStream(out, cache.for_zstd, options_zstd.level)
+        }
+      ) { s => for (a <- subarray_iterator) s.write(a.array, a.offset, a.length) }
     }
   }
 
@@ -280,6 +561,6 @@
     cache: Compress.Cache = Compress.Cache.none
   ) : (Boolean, Bytes) = {
     val compressed = compress(options = options, cache = cache)
-    if (compressed.length < length) (true, compressed) else (false, this)
+    if (compressed.size < size) (true, compressed) else (false, this)
   }
 }
--- a/src/Pure/General/graphics_file.scala	Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/General/graphics_file.scala	Sun Jun 16 21:54:09 2024 +0200
@@ -51,7 +51,7 @@
       val params = new DefaultFontMapper.BaseFontParameters(File.platform_path(entry.path))
       params.encoding = BaseFont.IDENTITY_H
       params.embedded = true
-      params.ttfAfm = entry.bytes.array
+      params.ttfAfm = entry.bytes.make_array
       mapper.putName(entry.name, params)
     }
     mapper
--- a/src/Pure/General/http.scala	Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/General/http.scala	Sun Jun 16 21:54:09 2024 +0200
@@ -50,7 +50,7 @@
     val encoding: String,
     val elapsed_time: Time
   ) {
-    def text: String = new String(bytes.array, encoding)
+    def text: String = new String(bytes.make_array, encoding)
     def json: JSON.T = JSON.parse(text)
   }
 
@@ -227,7 +227,7 @@
         val encoded_digest = Base64.encode(HexFormat.of().parseHex(SHA1.digest(output).toString))
         http.getResponseHeaders.set("Content-Digest", "sha=:" + encoded_digest + ":")
       }
-      http.sendResponseHeaders(code, if (is_head) -1 else output.length.toLong)
+      http.sendResponseHeaders(code, if (is_head) -1 else output.size)
       if (!is_head) using(http.getResponseBody)(output.write_stream)
     }
   }
--- a/src/Pure/General/sha1.scala	Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/General/sha1.scala	Sun Jun 16 21:54:09 2024 +0200
@@ -33,6 +33,9 @@
     new Digest(Setup_Build.make_digest(digest_body))
   }
 
+  val digest_empty: Digest = make_digest(_ => ())
+  def digest_length: Int = digest_empty.toString.length
+
   def digest(file: JFile): Digest =
     make_digest(sha => using(new FileInputStream(file)) { stream =>
       val buf = new Array[Byte](65536)
@@ -46,11 +49,11 @@
 
   def digest(path: Path): Digest = digest(path.file)
   def digest(bytes: Array[Byte]): Digest = make_digest(_.update(bytes))
+  def digest(bytes: Array[Byte], offset: Int, length: Int): Digest =
+    make_digest(_.update(bytes, offset, length))
   def digest(bytes: Bytes): Digest = bytes.sha1_digest
   def digest(string: String): Digest = digest(Bytes(string))
 
-  val digest_length: Int = digest("").toString.length
-
 
   /* shasum */
 
--- a/src/Pure/General/sql.scala	Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/General/sql.scala	Sun Jun 16 21:54:09 2024 +0200
@@ -329,7 +329,8 @@
     object bytes {
       def update(i: Int, bytes: Bytes): Unit = {
         if (bytes == null) rep.setBytes(i, null)
-        else rep.setBinaryStream(i, bytes.stream(), bytes.length)
+        else if (bytes.size > Int.MaxValue) throw new IllegalArgumentException
+        else rep.setBinaryStream(i, bytes.stream(), bytes.size.toInt)
       }
       def update(i: Int, bytes: Option[Bytes]): Unit = update(i, bytes.orNull)
     }
--- a/src/Pure/General/utf8.scala	Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/General/utf8.scala	Sun Jun 16 21:54:09 2024 +0200
@@ -17,15 +17,26 @@
 
   def bytes(s: String): Array[Byte] = s.getBytes(charset)
 
+  def relevant(s: CharSequence): Boolean = {
+    var i = 0
+    val n = s.length
+    var utf8 = false
+    while (i < n && !utf8) {
+      if (s.charAt(i) >= 128) { utf8 = true }
+      i += 1
+    }
+    utf8
+  }
+
 
   /* permissive UTF-8 decoding */
 
   // see also https://en.wikipedia.org/wiki/UTF-8#Description
   // overlong encodings enable byte-stuffing of low-ASCII
 
-  def decode_permissive(text: CharSequence): String = {
-    val len = text.length
-    val buf = new java.lang.StringBuilder(len)
+  def decode_permissive_bytes(bytes: Bytes.Vec): String = {
+    val size = bytes.size
+    val buf = new java.lang.StringBuilder((size min Space.GiB(1).bytes).toInt)
     var code = -1
     var rest = 0
     def flush(): Unit = {
@@ -50,8 +61,8 @@
         rest -= 1
       }
     }
-    for (i <- 0 until len) {
-      val c = text.charAt(i)
+    for (i <- 0L until size) {
+      val c: Char = bytes(i)
       if (c < 128) { flush(); buf.append(c) }
       else if ((c & 0xC0) == 0x80) push(c & 0x3F)
       else if ((c & 0xE0) == 0xC0) init(c & 0x1F, 1)
@@ -61,4 +72,8 @@
     flush()
     buf.toString
   }
+
+  def decode_permissive(text: String): String =
+    if (relevant(text)) decode_permissive_bytes(new Bytes.Vec_String(text))
+    else text
 }
--- a/src/Pure/ML/ml_heap.scala	Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/ML/ml_heap.scala	Sun Jun 16 21:54:09 2024 +0200
@@ -16,7 +16,7 @@
   def read_file_digest(heap: Path): Option[SHA1.Digest] = {
     if (heap.is_file) {
       val bs = Bytes.read_file(heap, offset = File.size(heap) - sha1_length)
-      if (bs.length == sha1_length) {
+      if (bs.size == sha1_length) {
         val s = bs.text
         if (s.startsWith(sha1_prefix)) Some(SHA1.fake_digest(s.substring(sha1_prefix.length)))
         else None
--- a/src/Pure/PIDE/byte_message.scala	Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/PIDE/byte_message.scala	Sun Jun 16 21:54:09 2024 +0200
@@ -6,7 +6,7 @@
 
 package isabelle
 
-import java.io.{ByteArrayOutputStream, OutputStream, InputStream, IOException}
+import java.io.{OutputStream, InputStream, IOException}
 
 
 object Byte_Message {
@@ -27,45 +27,44 @@
 
   /* input operations */
 
-  def read(stream: InputStream, n: Int): Bytes =
+  def read(stream: InputStream, n: Long): Bytes =
     Bytes.read_stream(stream, limit = n)
 
-  def read_block(stream: InputStream, n: Int): (Option[Bytes], Int) = {
+  def read_block(stream: InputStream, n: Long): (Option[Bytes], Long) = {
     val msg = read(stream, n)
-    val len = msg.length
+    val len = msg.size
     (if (len == n) Some(msg) else None, len)
   }
 
   def read_line(stream: InputStream): Option[Bytes] = {
-    val line = new ByteArrayOutputStream(100)
     var c = 0
-    while ({ c = stream.read; c != -1 && c != 10 }) line.write(c)
-
-    if (c == -1 && line.size == 0) None
-    else {
-      val a = line.toByteArray
-      val n = a.length
-      val len = if (n > 0 && a(n - 1) == 13) n - 1 else n
-      Some(Bytes(a, 0, len))
-    }
+    val line =
+      Bytes.Builder.use(hint = 100) { builder =>
+        while ({ c = stream.read; c != -1 && c != 10 }) {
+          builder += c.toByte
+        }
+      }
+    if (c == -1 && line.size == 0) None else Some(line.trim_line)
   }
 
 
   /* messages with multiple chunks (arbitrary content) */
 
-  private def make_header(ns: List[Int]): List[Bytes] =
+  private def make_header(ns: List[Long]): List[Bytes] =
     List(Bytes(ns.mkString(",")), Bytes.newline)
 
   def write_message(stream: OutputStream, chunks: List[Bytes]): Unit = {
-    write(stream, make_header(chunks.map(_.length)) ::: chunks)
+    write(stream, make_header(chunks.map(_.size)) ::: chunks)
     flush(stream)
   }
 
-  private def parse_header(line: String): List[Int] =
-    try { space_explode(',', line).map(Value.Nat.parse) }
-    catch { case ERROR(_) => error("Malformed message header: " + quote(line)) }
+  private def parse_header(line: String): List[Long] = {
+    val args = space_explode(',', line)
+    if (args.forall(is_length)) args.map(Value.Long.parse)
+    else error("Malformed message header: " + quote(line))
+  }
 
-  private def read_chunk(stream: InputStream, n: Int): Bytes =
+  private def read_chunk(stream: InputStream, n: Long): Bytes =
     read_block(stream, n) match {
       case (Some(chunk), _) => chunk
       case (None, len) =>
@@ -78,32 +77,39 @@
 
   /* hybrid messages: line or length+block (restricted content) */
 
+  private def is_length(str: String): Boolean =
+    !str.isEmpty && str.iterator.forall(Symbol.is_ascii_digit) &&
+      Value.Long.unapply(str).isDefined
+
   private def is_length(msg: Bytes): Boolean =
-    !msg.is_empty && msg.iterator.forall(b => Symbol.is_ascii_digit(b.toChar))
+    !msg.is_empty && msg.byte_iterator.forall(b => Symbol.is_ascii_digit(b.toChar)) &&
+      Value.Long.unapply(msg.text).isDefined
 
-  private def is_terminated(msg: Bytes): Boolean = {
-    val len = msg.length
-    len > 0 && Symbol.is_ascii_line_terminator(msg.charAt(len - 1))
-  }
+  private def is_terminated(msg: Bytes): Boolean =
+    msg.size match {
+      case size if size > 0 =>
+        val c = msg(size - 1)
+        c <= Char.MaxValue && Symbol.is_ascii_line_terminator(c.toChar)
+      case _ => false
+    }
 
   def write_line_message(stream: OutputStream, msg: Bytes): Unit = {
-    if (is_length(msg) || is_terminated(msg))
+    if (is_length(msg) || is_terminated(msg)) {
       error ("Bad content for line message:\n" ++ msg.text.take(100))
+    }
 
-    val n = msg.length
+    val n = msg.size
     write(stream,
-      (if (n > 100 || msg.iterator.contains(10)) make_header(List(n + 1)) else Nil) :::
+      (if (n > 100 || msg.byte_iterator.contains(10)) make_header(List(n + 1)) else Nil) :::
         List(msg, Bytes.newline))
     flush(stream)
   }
 
   def read_line_message(stream: InputStream): Option[Bytes] =
     read_line(stream) match {
-      case None => None
-      case Some(line) =>
-        Value.Nat.unapply(line.text) match {
-          case None => Some(line)
-          case Some(n) => read_block(stream, n)._1.map(_.trim_line)
-        }
+      case Some(line) if is_length(line) =>
+        val n = Value.Long.parse(line.text)
+        read_block(stream, n)._1.map(_.trim_line)
+      case res => res
     }
 }
--- a/src/Pure/PIDE/prover.scala	Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/PIDE/prover.scala	Sun Jun 16 21:54:09 2024 +0200
@@ -194,7 +194,7 @@
             {
               case chunks =>
                 try {
-                  Bytes(chunks.map(_.length).mkString("", ",", "\n")).write_stream(stream)
+                  Bytes(chunks.map(_.size).mkString("", ",", "\n")).write_stream(stream)
                   chunks.foreach(_.write_stream(stream))
                   stream.flush
                   true
@@ -288,7 +288,7 @@
     command_input match {
       case Some(thread) if thread.is_active() =>
         if (trace) {
-          val payload = args.foldLeft(0) { case (n, b) => n + b.length }
+          val payload = args.foldLeft(0L) { case (n, b) => n + b.size }
           Output.writeln(
             "protocol_command " + name + ", args = " + args.length + ", payload = " + payload)
         }
--- a/src/Pure/ROOT.ML	Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/ROOT.ML	Sun Jun 16 21:54:09 2024 +0200
@@ -372,3 +372,4 @@
 ML_file "Tools/jedit.ML";
 ML_file "Tools/ghc.ML";
 ML_file "Tools/generated_files.ML";
+
--- a/src/Pure/System/bash.scala	Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/System/bash.scala	Sun Jun 16 21:54:09 2024 +0200
@@ -36,7 +36,7 @@
   }
 
   def string(s: String): String =
-    if (s == "") "\"\""
+    if (s.isEmpty) "\"\""
     else UTF8.bytes(s).iterator.map(bash_chr).mkString
 
   def strings(ss: Iterable[String]): String =
--- a/src/Pure/System/isabelle_system.scala	Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/System/isabelle_system.scala	Sun Jun 16 21:54:09 2024 +0200
@@ -474,7 +474,7 @@
               else {
                 val bytes = using(zip_file.getInputStream(entry))(Bytes.read_stream(_))
                 Files.createDirectories(res.getParent)
-                Files.write(res, bytes.array)
+                Files.write(res, bytes.make_array)
               }
             }
             (entry, result)
--- a/src/Pure/System/web_app.scala	Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/System/web_app.scala	Sun Jun 16 21:54:09 2024 +0200
@@ -120,12 +120,12 @@
 
             // text might be shorter than bytes because of misinterpreted characters
             var found = false
-            var bytes_i = 0
-            while (!found && bytes_i < bytes.length) {
+            var bytes_i = 0L
+            while (!found && bytes_i < bytes.size) {
               var sep_ok = true
               var sep_i = 0
               while (sep_ok && sep_i < sep.length) {
-                if (bytes.charAt(bytes_i + sep_i) == sep.charAt(sep_i)) {
+                if (bytes(bytes_i + sep_i) == sep(sep_i)) {
                   sep_i += 1
                 } else {
                   bytes_i += 1
@@ -135,8 +135,8 @@
               if (sep_ok) found = true
             }
 
-            val before_bytes = bytes.subSequence(0, bytes_i)
-            val after_bytes = bytes.subSequence(bytes_i + sep.length, bytes.length)
+            val before_bytes = bytes.slice(0, bytes_i)
+            val after_bytes = bytes.slice(bytes_i + sep.length, bytes.size)
 
             Some(Seq(before_text, before_bytes), Seq(after_text, after_bytes))
           } else None
@@ -147,7 +147,7 @@
 
       def perhaps_unprefix(pfx: String, s: Seq): Seq =
         Library.try_unprefix(pfx, s.text) match {
-          case Some(text) => Seq(text, s.bytes.subSequence(pfx.length, s.bytes.length))
+          case Some(text) => Seq(text, s.bytes.slice(pfx.length, s.bytes.size))
           case None => s
         }
 
--- a/src/Tools/VSCode/src/channel.scala	Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Tools/VSCode/src/channel.scala	Sun Jun 16 21:54:09 2024 +0200
@@ -9,7 +9,7 @@
 
 import isabelle._
 
-import java.io.{InputStream, OutputStream, FileOutputStream, ByteArrayOutputStream, File => JFile}
+import java.io.{InputStream, OutputStream, FileOutputStream, File => JFile}
 
 import scala.collection.mutable
 
@@ -39,8 +39,8 @@
 
   private def read_content(n: Int): String = {
     val bytes = Bytes.read_stream(in, limit = n)
-    if (bytes.length == n) bytes.text
-    else error("Bad message content (unexpected EOF after " + bytes.length + " of " + n + " bytes)")
+    if (bytes.size == n) bytes.text
+    else error("Bad message content (unexpected EOF after " + bytes.size + " of " + n + " bytes)")
   }
 
   def read(): Option[JSON.T] = {
--- a/src/Tools/VSCode/src/component_vscodium.scala	Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Tools/VSCode/src/component_vscodium.scala	Sun Jun 16 21:54:09 2024 +0200
@@ -229,7 +229,7 @@
   // function computeChecksum(filename)
   private def file_checksum(path: Path): String = {
     val digest = MessageDigest.getInstance("MD5")
-    digest.update(Bytes.read(path).array)
+    digest.update(Bytes.read(path).make_array)
     Bytes(Base64.getEncoder.encode(digest.digest()))
       .text.replaceAll("=", "")
   }
--- a/src/Tools/jEdit/src/isabelle_export.scala	Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Tools/jEdit/src/isabelle_export.scala	Sun Jun 16 21:54:09 2024 +0200
@@ -56,7 +56,7 @@
                 } yield {
                   val is_dir = entry.name_elems.length > depth
                   val path = Export.implode_name(theory :: entry.name_elems.take(depth))
-                  val file_size = if (is_dir) None else Some(entry.bytes.length.toLong)
+                  val file_size = if (is_dir) None else Some(entry.bytes.size)
                   (path, file_size)
                 }).toSet.toList
               (for ((path, file_size) <- entries.iterator) yield {