# HG changeset patch # User wenzelm # Date 1718567649 -7200 # Node ID 94875d8cc8bd2e9b58cf077b7c8cca331f612e0c # Parent 2503ff5d29ce9e819895832bce463a8356f85c86# Parent 46135b44b1a37a940be67360efdf346cdceb4dbc merged diff -r 2503ff5d29ce -r 94875d8cc8bd src/HOL/Tools/Mirabelle/mirabelle.scala --- a/src/HOL/Tools/Mirabelle/mirabelle.scala Wed Jun 12 10:47:53 2024 +0200 +++ b/src/HOL/Tools/Mirabelle/mirabelle.scala Sun Jun 16 21:54:09 2024 +0200 @@ -72,7 +72,7 @@ progress.echo( "Mirabelle export " + quote(args.compound_name) + " (in " + session_name + ")", verbose = true) - val yxml = YXML.parse_body(UTF8.decode_permissive(msg.chunk), cache = build_results0.cache) + val yxml = YXML.parse_body(msg.chunk.text, cache = build_results0.cache) val lines = Pretty.string_of(yxml).trim() val prefix = Export.explode_name(args.name) match { diff -r 2503ff5d29ce -r 94875d8cc8bd src/Pure/Build/build.scala --- a/src/Pure/Build/build.scala Wed Jun 12 10:47:53 2024 +0200 +++ b/src/Pure/Build/build.scala Sun Jun 16 21:54:09 2024 +0200 @@ -715,7 +715,7 @@ unicode_symbols: Boolean = false ): Option[Document.Snapshot] = { def decode_bytes(bytes: Bytes): String = - Symbol.output(unicode_symbols, UTF8.decode_permissive(bytes)) + Symbol.output(unicode_symbols, bytes.text) def read(name: String): Export.Entry = theory_context(name, permissive = true) diff -r 2503ff5d29ce -r 94875d8cc8bd src/Pure/Build/build_manager.scala --- a/src/Pure/Build/build_manager.scala Wed Jun 12 10:47:53 2024 +0200 +++ b/src/Pure/Build/build_manager.scala Sun Jun 16 21:54:09 2024 +0200 @@ -1179,7 +1179,7 @@ val head = List( HTML.title("Isabelle Build Manager"), - Web_App.More_HTML.icon("data:image/x-icon;base64," + logo.encode_base64), + Web_App.More_HTML.icon("data:image/x-icon;base64," + logo.encode_base64.text), HTML.style_file("https://hawkz.github.io/gdcss/gd.css"), HTML.style("html { background-color: white; }")) } diff -r 2503ff5d29ce -r 94875d8cc8bd src/Pure/Build/export.scala --- a/src/Pure/Build/export.scala Wed Jun 12 10:47:53 2024 +0200 +++ b/src/Pure/Build/export.scala Sun Jun 16 21:54:09 2024 +0200 @@ -194,7 +194,7 @@ def text: String = bytes.text - def yxml: XML.Body = YXML.parse_body(UTF8.decode_permissive(bytes), cache = cache) + def yxml: XML.Body = YXML.parse_body(bytes.text, cache = cache) } def make_regex(pattern: String): Regex = { diff -r 2503ff5d29ce -r 94875d8cc8bd src/Pure/General/base64.scala --- a/src/Pure/General/base64.scala Wed Jun 12 10:47:53 2024 +0200 +++ b/src/Pure/General/base64.scala Sun Jun 16 21:54:09 2024 +0200 @@ -1,26 +1,34 @@ /* Title: Pure/General/base64.scala Author: Makarius -Support for Base64 data encoding. +Support for Base64 data representation. */ package isabelle +import java.io.{InputStream, OutputStream} + + object Base64 { + /* main operations */ + def encode(a: Array[Byte]): String = java.util.Base64.getEncoder.encodeToString(a) def decode(s: String): Array[Byte] = java.util.Base64.getDecoder.decode(s) + def encode_stream(s: OutputStream): OutputStream = java.util.Base64.getEncoder.wrap(s) + def decode_stream(s: InputStream): InputStream = java.util.Base64.getDecoder.wrap(s) + /* Scala functions in ML */ object Decode extends Scala.Fun_Bytes("Base64.decode") { val here = Scala_Project.here - def apply(arg: Bytes): Bytes = Bytes.decode_base64(arg.text) + def apply(bytes: Bytes): Bytes = bytes.decode_base64 } object Encode extends Scala.Fun_Bytes("Base64.encode") { val here = Scala_Project.here - def apply(arg: Bytes): Bytes = Bytes(arg.encode_base64) + def apply(bytes: Bytes): Bytes = bytes.encode_base64 } } diff -r 2503ff5d29ce -r 94875d8cc8bd src/Pure/General/bytes.scala --- a/src/Pure/General/bytes.scala Wed Jun 12 10:47:53 2024 +0200 +++ b/src/Pure/General/bytes.scala Sun Jun 16 21:54:09 2024 +0200 @@ -16,80 +16,92 @@ import org.tukaani.xz import com.github.luben.zstd +import scala.collection.mutable.ArrayBuffer + object Bytes { - val empty: Bytes = new Bytes(Array[Byte](), 0, 0) + /* internal sizes */ + + private val array_size: Long = Int.MaxValue - 8 // see java.io.InputStream.MAX_BUFFER_SIZE + private val block_size: Int = 16384 // see java.io.InputStream.DEFAULT_BUFFER_SIZE + private val chunk_size: Long = Space.MiB(100).bytes + + private def make_size(chunks: Array[Array[Byte]], buffer: Array[Byte]): Long = + chunks.foldLeft(buffer.length.toLong)((n, chunk) => n + chunk.length) - def apply(s: CharSequence): Bytes = { - val str = s.toString - if (str.isEmpty) empty - else { - val b = UTF8.bytes(str) - new Bytes(b, 0, b.length) - } + class Too_Large(size: Long) extends IndexOutOfBoundsException { + override def getMessage: String = + "Bytes too large for particular operation: " + + Space.bytes(size).print + " > " + Space.bytes(array_size).print } + + /* main constructors */ + + private def reuse_array(bytes: Array[Byte]): Bytes = + if (bytes.length <= chunk_size) new Bytes(None, bytes, 0L, bytes.length.toLong) + else apply(bytes) + + val empty: Bytes = reuse_array(new Array(0)) + + def apply(s: CharSequence): Bytes = + if (s.isEmpty) empty + else Builder.use(hint = s.length) { builder => builder += s } + def apply(a: Array[Byte]): Bytes = apply(a, 0, a.length) def apply(a: Array[Byte], offset: Int, length: Int): Bytes = - if (length == 0) empty - else { - val b = new Array[Byte](length) - System.arraycopy(a, offset, b, 0, length) - new Bytes(b, 0, b.length) - } + Builder.use(hint = length) { builder => builder += (a, offset, length) } val newline: Bytes = apply("\n") - /* base64 */ - - def decode_base64(s: String): Bytes = { - val a = Base64.decode(s) - new Bytes(a, 0, a.length) - } - - /* read */ - def read_stream(stream: InputStream, limit: Int = Int.MaxValue, hint: Int = 1024): Bytes = + def read_stream(stream: InputStream, limit: Long = -1L, hint: Long = 0L): Bytes = { if (limit == 0) empty else { - val out_size = (if (limit == Int.MaxValue) hint else limit) max 1024 - val out = new ByteArrayOutputStream(out_size) - val buf = new Array[Byte](8192) - var m = 0 - - while ({ - m = stream.read(buf, 0, buf.length min (limit - out.size)) - if (m != -1) out.write(buf, 0, m) - m != -1 && limit > out.size - }) () - - new Bytes(out.toByteArray, 0, out.size) + Builder.use(hint = if (limit > 0) limit else hint) { builder => + val buf = new Array[Byte](block_size) + var m = 0 + var n = 0L + while ({ + val l = if (limit > 0) ((limit - n) min buf.length).toInt else buf.length + m = stream.read(buf, 0, l) + if (m != -1) { + builder += (buf, 0, m) + n += m + } + m != -1 && (limit < 0 || limit > n) + }) () + } } + } def read_url(name: String): Bytes = using(Url(name).open_stream())(read_stream(_)) - def read_file(path: Path, offset: Long = 0L, limit: Long = Long.MaxValue): Bytes = { + def read_file(path: Path, offset: Long = 0L, limit: Long = -1L): Bytes = { val length = File.size(path) val start = offset.max(0L) - val len = (length - start).max(0L).min(limit) - if (len > Int.MaxValue) error("Cannot read large file slice: " + Space.bytes(len).print) - else if (len == 0L) empty + val stop = (length - start).max(0L).min(if (limit < 0) Long.MaxValue else limit) + if (stop == 0L) empty else { - using(FileChannel.open(path.java_path, StandardOpenOption.READ)) { java_path => - java_path.position(start) - val n = len.toInt - val buf = ByteBuffer.allocate(n) - var i = 0 - var m = 0 - while ({ - m = java_path.read(buf) - if (m != -1) i += m - m != -1 && n > i - }) () - new Bytes(buf.array, 0, i) + Builder.use(hint = stop) { builder => + using(FileChannel.open(path.java_path, StandardOpenOption.READ)) { channel => + channel.position(start) + val buf = ByteBuffer.allocate(block_size) + var m = 0 + var n = 0L + while ({ + m = channel.read(buf) + if (m != -1) { + builder += (buf.array(), 0, m) + buf.clear() + n += m + } + m != -1 && stop > n + }) () + } } } } @@ -112,147 +124,415 @@ using(new FileOutputStream(file, true))(bytes.write_stream(_)) def append(path: Path, bytes: Bytes): Unit = append(path.file, bytes) + + + /* vector of short unsigned integers */ + + trait Vec { + def size: Long + def apply(i: Long): Char + } + + class Vec_String(string: String) extends Vec { + override def size: Long = string.length.toLong + override def apply(i: Long): Char = + if (0 <= i && i < size) string(i.toInt) + else throw new IndexOutOfBoundsException + } + + + /* incremental builder: unsynchronized! */ + + object Builder { + def use(hint: Long = 0L)(body: Builder => Unit): Bytes = { + val builder = new Builder(hint) + body(builder) + builder.done() + } + + def use_stream(hint: Long = 0L)(body: OutputStream => Unit): Bytes = { + val stream = new Stream(hint = hint) + body(stream) + stream.builder.done() + } + + private class Stream(hint: Long = 0L) extends OutputStream { + val builder = new Builder(hint) + + override def write(b: Int): Unit = + { builder += b.toByte } + + override def write(array: Array[Byte], offset: Int, length: Int): Unit = + { builder += (array, offset, length) } + } + } + + final class Builder private[Bytes](hint: Long) { + private var size = 0L + private var chunks = + new ArrayBuffer[Array[Byte]](if (hint <= 0) 16 else (hint / chunk_size).toInt) + private var buffer = + new ByteArrayOutputStream(if (hint <= 0) 1024 else (hint min chunk_size min array_size).toInt) + + private def buffer_free(): Int = chunk_size.toInt - buffer.size() + private def buffer_check(): Unit = + if (buffer_free() == 0) { + chunks += buffer.toByteArray + buffer = new ByteArrayOutputStream(chunk_size.toInt) + } + + def += (b: Byte): Unit = { + size += 1 + buffer.write(b) + buffer_check() + } + + def += (s: CharSequence): Unit = { + val n = s.length + if (n > 0) { + if (UTF8.relevant(s)) { this += UTF8.bytes(s.toString) } + else { + var i = 0 + while (i < n) { + buffer.write(s.charAt(i).toByte) + size += 1 + i += 1 + buffer_check() + } + } + } + } + + def += (array: Array[Byte], offset: Int, length: Int): Unit = { + if (offset < 0 || length < 0 || offset.toLong + length.toLong > array.length) { + throw new IndexOutOfBoundsException + } + else if (length > 0) { + var i = offset + var n = length + while (n > 0) { + val m = buffer_free() + if (m > 0) { + val l = m min n + buffer.write(array, i, l) + size += l + i += l + n -= l + } + buffer_check() + } + } + } + + def += (array: Array[Byte]): Unit = { this += (array, 0, array.length) } + + def += (a: Subarray): Unit = { this += (a.array, a.offset, a.length) } + + private def done(): Bytes = { + val cs = chunks.toArray + val b = buffer.toByteArray + chunks = null + buffer = null + new Bytes(if (cs.isEmpty) None else Some(cs), b, 0L, size) + } + } + + + /* subarray */ + + object Subarray { + val empty: Subarray = new Subarray(new Array[Byte](0), 0, 0) + + def apply(array: Array[Byte], offset: Int, length: Int): Subarray = { + val n = array.length + if (0 <= offset && offset < n && 0 <= length && offset + length <= n) { + if (length == 0) empty + else new Subarray(array, offset, length) + } + else throw new IndexOutOfBoundsException + } + } + + final class Subarray private( + val array: Array[Byte], + val offset: Int, + val length: Int + ) { + override def toString: String = "Bytes.Subarray(" + Space.bytes(length).print + ")" + + def byte_iterator: Iterator[Byte] = + if (length == 0) Iterator.empty + else { for (i <- (offset until (offset + length)).iterator) yield array(i) } + } } final class Bytes private( - protected val bytes: Array[Byte], - protected val offset: Int, - val length: Int) extends CharSequence { - /* equality */ + protected val chunks: Option[Array[Array[Byte]]], + protected val chunk0: Array[Byte], + protected val offset: Long, + val size: Long +) extends Bytes.Vec { + assert( + (chunks.isEmpty || + chunks.get.nonEmpty && + chunks.get.forall(chunk => chunk.length == Bytes.chunk_size)) && + chunk0.length < Bytes.chunk_size) + + def small_size: Int = + if (size > Bytes.array_size) throw new Bytes.Too_Large(size) + else size.toInt + + def is_empty: Boolean = size == 0 + + def proper: Option[Bytes] = if (is_empty) None else Some(this) + + def is_sliced: Boolean = + offset != 0L || { + chunks match { + case None => size != chunk0.length + case Some(cs) => size != Bytes.make_size(cs, chunk0) + } + } + + override def toString: String = + if (is_empty) "Bytes.empty" + else "Bytes(" + Space.bytes(size).print + if_proper(is_sliced, ", sliced") + ")" + + + /* elements: signed Byte or unsigned Char */ + + protected def byte_unchecked(i: Long): Byte = { + val a = offset + i + chunks match { + case None => chunk0(a.toInt) + case Some(cs) => + val b = a % Bytes.chunk_size + val c = a / Bytes.chunk_size + if (c < cs.length) cs(c.toInt)(b.toInt) else chunk0(b.toInt) + } + } + + def byte(i: Long): Byte = + if (0 <= i && i < size) byte_unchecked(i) + else throw new IndexOutOfBoundsException + + def apply(i: Long): Char = (byte(i).toInt & 0xff).toChar + + protected def subarray_iterator: Iterator[Bytes.Subarray] = + if (is_empty) Iterator.empty + else if (chunks.isEmpty) Iterator(Bytes.Subarray(chunk0, offset.toInt, size.toInt)) + else { + val end_offset = offset + size + for ((array, index) <- (chunks.get.iterator ++ Iterator(chunk0)).zipWithIndex) yield { + val array_start = Bytes.chunk_size * index + val array_stop = array_start + array.length + if (offset < array_stop && array_start < end_offset) { + val i = (array_start max offset) - array_start + val j = (array_stop min end_offset) - array_start + Bytes.Subarray(array, i.toInt, (j - i).toInt) + } + else Bytes.Subarray.empty + } + } + + def byte_iterator: Iterator[Byte] = + for { + a <- subarray_iterator + b <- a.byte_iterator + } yield b + + + /* slice */ + + def slice(i: Long, j: Long): Bytes = + if (0 <= i && i <= j && j <= size) { + if (i == j) Bytes.empty + else new Bytes(chunks, chunk0, offset + i, j - i) + } + else throw new IndexOutOfBoundsException + + def unslice: Bytes = + if (is_sliced) { + Bytes.Builder.use(hint = size) { builder => + for (a <- subarray_iterator) { builder += a } + } + } + else this + + def trim_line: Bytes = + if (size >= 2 && byte_unchecked(size - 2) == 13 && byte_unchecked(size - 1) == 10) { + slice(0, size - 2) + } + else if (size >= 1 && (byte_unchecked(size - 1) == 13 || byte_unchecked(size - 1) == 10)) { + slice(0, size - 1) + } + else this + + + /* hash and equality */ + + lazy val sha1_digest: SHA1.Digest = + if (is_empty) SHA1.digest_empty + else { + SHA1.make_digest { sha => + for (a <- subarray_iterator if a.length > 0) { + sha.update(a.array, a.offset, a.length) + } + } + } + + override def hashCode(): Int = sha1_digest.hashCode() override def equals(that: Any): Boolean = { that match { case other: Bytes => - this.eq(other) || - Arrays.equals(bytes, offset, offset + length, - other.bytes, other.offset, other.offset + other.length) + if (this.eq(other)) true + else if (size != other.size) false + else { + if (chunks.isEmpty && other.chunks.isEmpty) { + Arrays.equals(chunk0, offset.toInt, (offset + size).toInt, + other.chunk0, other.offset.toInt, (other.offset + other.size).toInt) + } + else if (!is_sliced && !other.is_sliced) { + (subarray_iterator zip other.subarray_iterator) + .forall((a, b) => Arrays.equals(a.array, b.array)) + } + else sha1_digest == other.sha1_digest + } case _ => false } } - private lazy val hash: Int = { - var h = 0 - for (i <- offset until offset + length) { - val b = bytes(i).asInstanceOf[Int] & 0xFF - h = 31 * h + b - } - h - } - - override def hashCode(): Int = hash - /* content */ - lazy val sha1_digest: SHA1.Digest = SHA1.digest(bytes) - - def is_empty: Boolean = length == 0 + def + (other: Bytes): Bytes = + if (other.is_empty) this + else if (is_empty) other + else { + Bytes.Builder.use(hint = size + other.size) { builder => + for (a <- subarray_iterator ++ other.subarray_iterator) { + builder += a + } + } + } - def iterator: Iterator[Byte] = - for (i <- (offset until (offset + length)).iterator) - yield bytes(i) - - def array: Array[Byte] = { - val a = new Array[Byte](length) - System.arraycopy(bytes, offset, a, 0, length) - a + def make_array: Array[Byte] = { + val buf = new ByteArrayOutputStream(small_size) + for (a <- subarray_iterator) { buf.write(a.array, a.offset, a.length) } + buf.toByteArray } - def text: String = UTF8.decode_permissive(this) + def text: String = + if (is_empty) "" + else { + var i = 0L + var utf8 = false + while (i < size && !utf8) { + if (byte_unchecked(i) < 0) { utf8 = true } + i += 1 + } + utf8 - def wellformed_text: Option[String] = { - val s = text - if (this == Bytes(s)) Some(s) else None - } + if (utf8) UTF8.decode_permissive_bytes(this) + else new String(make_array, UTF8.charset) + } - def encode_base64: String = { - val b = - if (offset == 0 && length == bytes.length) bytes - else Bytes(bytes, offset, length).bytes - Base64.encode(b) - } + def wellformed_text: Option[String] = + try { + val s = text + if (this == Bytes(s)) Some(s) else None + } + catch { case ERROR(_) => None } + + + /* Base64 data representation */ + + def encode_base64: Bytes = + Bytes.Builder.use_stream(hint = (size * 1.5).round) { out => + using(Base64.encode_stream(out))(write_stream(_)) + } + + def decode_base64: Bytes = + using(Base64.decode_stream(stream()))(Bytes.read_stream(_, hint = (size / 1.2).round)) def maybe_encode_base64: (Boolean, String) = wellformed_text match { case Some(s) => (false, s) - case None => (true, encode_base64) - } - - override def toString: String = - if (is_empty) "Bytes.empty" else "Bytes(" + Space.bytes(length).print + ")" - - def proper: Option[Bytes] = if (is_empty) None else Some(this) - def proper_text: Option[String] = if (is_empty) None else Some(text) - - def +(other: Bytes): Bytes = - if (other.is_empty) this - else if (is_empty) other - else { - val new_bytes = new Array[Byte](length + other.length) - System.arraycopy(bytes, offset, new_bytes, 0, length) - System.arraycopy(other.bytes, other.offset, new_bytes, length, other.length) - new Bytes(new_bytes, 0, new_bytes.length) + case None => (true, encode_base64.text) } - /* CharSequence operations */ - - def charAt(i: Int): Char = - if (0 <= i && i < length) (bytes(offset + i).asInstanceOf[Int] & 0xFF).asInstanceOf[Char] - else throw new IndexOutOfBoundsException - - def subSequence(i: Int, j: Int): Bytes = { - if (0 <= i && i <= j && j <= length) new Bytes(bytes, offset + i, j - i) - else throw new IndexOutOfBoundsException - } - - def trim_line: Bytes = - if (length >= 2 && charAt(length - 2) == 13 && charAt(length - 1) == 10) { - subSequence(0, length - 2) - } - else if (length >= 1 && (charAt(length - 1) == 13 || charAt(length - 1) == 10)) { - subSequence(0, length - 1) - } - else this - - /* streams */ - def stream(): ByteArrayInputStream = new ByteArrayInputStream(bytes, offset, length) + def stream(): InputStream = + if (chunks.isEmpty) new ByteArrayInputStream(chunk0, offset.toInt, size.toInt) + else { + new InputStream { + private var index = 0L + + def read(): Int = { + if (index < size) { + val res = byte_unchecked(index).toInt & 0xff + index += 1 + res + } + else -1 + } - def write_stream(stream: OutputStream): Unit = stream.write(bytes, offset, length) + override def read(buffer: Array[Byte], start: Int, length: Int): Int = { + if (length < 16) super.read(buffer, start, length) + else { + val index0 = index + index = (index + length) min size + val n = (index - index0).toInt + if (n == 0) -1 + else { + var i = start + for (a <- slice(index0, index).subarray_iterator) { + val l = a.length + if (l > 0) { + System.arraycopy(a.array, a.offset, buffer, i, l) + i += l + } + } + n + } + } + } + } + } + + def write_stream(stream: OutputStream): Unit = + for (a <- subarray_iterator if a.length > 0) { + stream.write(a.array, a.offset, a.length) + } /* XZ / Zstd data compression */ def detect_xz: Boolean = - length >= 6 && - bytes(offset) == 0xFD.toByte && - bytes(offset + 1) == 0x37.toByte && - bytes(offset + 2) == 0x7A.toByte && - bytes(offset + 3) == 0x58.toByte && - bytes(offset + 4) == 0x5A.toByte && - bytes(offset + 5) == 0x00.toByte + size >= 6 && + byte_unchecked(0) == 0xFD.toByte && + byte_unchecked(1) == 0x37.toByte && + byte_unchecked(2) == 0x7A.toByte && + byte_unchecked(3) == 0x58.toByte && + byte_unchecked(4) == 0x5A.toByte && + byte_unchecked(5) == 0x00.toByte def detect_zstd: Boolean = - length >= 4 && - bytes(offset) == 0x28.toByte && - bytes(offset + 1) == 0xB5.toByte && - bytes(offset + 2) == 0x2F.toByte && - bytes(offset + 3) == 0xFD.toByte + size >= 4 && + byte_unchecked(0) == 0x28.toByte && + byte_unchecked(1) == 0xB5.toByte && + byte_unchecked(2) == 0x2F.toByte && + byte_unchecked(3) == 0xFD.toByte def uncompress_xz(cache: Compress.Cache = Compress.Cache.none): Bytes = - using(new xz.XZInputStream(stream(), cache.for_xz))(Bytes.read_stream(_, hint = length)) + using(new xz.XZInputStream(stream(), cache.for_xz))(Bytes.read_stream(_, hint = size)) def uncompress_zstd(cache: Compress.Cache = Compress.Cache.none): Bytes = { Zstd.init() - val n = zstd.Zstd.decompressedSize(bytes, offset, length) - if (n > 0 && n < Int.MaxValue) { - Bytes(zstd.Zstd.decompress(array, n.toInt)) - } - else { - using(new zstd.ZstdInputStream(stream(), cache.for_zstd))(Bytes.read_stream(_, hint = length)) - } + using(new zstd.ZstdInputStream(stream(), cache.for_zstd))(Bytes.read_stream(_, hint = size)) } def uncompress(cache: Compress.Cache = Compress.Cache.none): Bytes = @@ -264,14 +544,15 @@ options: Compress.Options = Compress.Options(), cache: Compress.Cache = Compress.Cache.none ): Bytes = { - options match { - case options_xz: Compress.Options_XZ => - val result = new ByteArrayOutputStream(length) - using(new xz.XZOutputStream(result, options_xz.make, cache.for_xz))(write_stream) - new Bytes(result.toByteArray, 0, result.size) - case options_zstd: Compress.Options_Zstd => - Zstd.init() - Bytes(zstd.Zstd.compress(if (offset == 0) bytes else array, options_zstd.level)) + Bytes.Builder.use_stream(hint = size) { out => + using( + options match { + case options_xz: Compress.Options_XZ => + new xz.XZOutputStream(out, options_xz.make, cache.for_xz) + case options_zstd: Compress.Options_Zstd => + new zstd.ZstdOutputStream(out, cache.for_zstd, options_zstd.level) + } + ) { s => for (a <- subarray_iterator) s.write(a.array, a.offset, a.length) } } } @@ -280,6 +561,6 @@ cache: Compress.Cache = Compress.Cache.none ) : (Boolean, Bytes) = { val compressed = compress(options = options, cache = cache) - if (compressed.length < length) (true, compressed) else (false, this) + if (compressed.size < size) (true, compressed) else (false, this) } } diff -r 2503ff5d29ce -r 94875d8cc8bd src/Pure/General/file.scala diff -r 2503ff5d29ce -r 94875d8cc8bd src/Pure/General/graphics_file.scala --- a/src/Pure/General/graphics_file.scala Wed Jun 12 10:47:53 2024 +0200 +++ b/src/Pure/General/graphics_file.scala Sun Jun 16 21:54:09 2024 +0200 @@ -51,7 +51,7 @@ val params = new DefaultFontMapper.BaseFontParameters(File.platform_path(entry.path)) params.encoding = BaseFont.IDENTITY_H params.embedded = true - params.ttfAfm = entry.bytes.array + params.ttfAfm = entry.bytes.make_array mapper.putName(entry.name, params) } mapper diff -r 2503ff5d29ce -r 94875d8cc8bd src/Pure/General/http.scala --- a/src/Pure/General/http.scala Wed Jun 12 10:47:53 2024 +0200 +++ b/src/Pure/General/http.scala Sun Jun 16 21:54:09 2024 +0200 @@ -50,7 +50,7 @@ val encoding: String, val elapsed_time: Time ) { - def text: String = new String(bytes.array, encoding) + def text: String = new String(bytes.make_array, encoding) def json: JSON.T = JSON.parse(text) } @@ -227,7 +227,7 @@ val encoded_digest = Base64.encode(HexFormat.of().parseHex(SHA1.digest(output).toString)) http.getResponseHeaders.set("Content-Digest", "sha=:" + encoded_digest + ":") } - http.sendResponseHeaders(code, if (is_head) -1 else output.length.toLong) + http.sendResponseHeaders(code, if (is_head) -1 else output.size) if (!is_head) using(http.getResponseBody)(output.write_stream) } } diff -r 2503ff5d29ce -r 94875d8cc8bd src/Pure/General/sha1.scala --- a/src/Pure/General/sha1.scala Wed Jun 12 10:47:53 2024 +0200 +++ b/src/Pure/General/sha1.scala Sun Jun 16 21:54:09 2024 +0200 @@ -33,6 +33,9 @@ new Digest(Setup_Build.make_digest(digest_body)) } + val digest_empty: Digest = make_digest(_ => ()) + def digest_length: Int = digest_empty.toString.length + def digest(file: JFile): Digest = make_digest(sha => using(new FileInputStream(file)) { stream => val buf = new Array[Byte](65536) @@ -46,11 +49,11 @@ def digest(path: Path): Digest = digest(path.file) def digest(bytes: Array[Byte]): Digest = make_digest(_.update(bytes)) + def digest(bytes: Array[Byte], offset: Int, length: Int): Digest = + make_digest(_.update(bytes, offset, length)) def digest(bytes: Bytes): Digest = bytes.sha1_digest def digest(string: String): Digest = digest(Bytes(string)) - val digest_length: Int = digest("").toString.length - /* shasum */ diff -r 2503ff5d29ce -r 94875d8cc8bd src/Pure/General/sql.scala --- a/src/Pure/General/sql.scala Wed Jun 12 10:47:53 2024 +0200 +++ b/src/Pure/General/sql.scala Sun Jun 16 21:54:09 2024 +0200 @@ -329,7 +329,8 @@ object bytes { def update(i: Int, bytes: Bytes): Unit = { if (bytes == null) rep.setBytes(i, null) - else rep.setBinaryStream(i, bytes.stream(), bytes.length) + else if (bytes.size > Int.MaxValue) throw new IllegalArgumentException + else rep.setBinaryStream(i, bytes.stream(), bytes.size.toInt) } def update(i: Int, bytes: Option[Bytes]): Unit = update(i, bytes.orNull) } diff -r 2503ff5d29ce -r 94875d8cc8bd src/Pure/General/utf8.scala --- a/src/Pure/General/utf8.scala Wed Jun 12 10:47:53 2024 +0200 +++ b/src/Pure/General/utf8.scala Sun Jun 16 21:54:09 2024 +0200 @@ -17,15 +17,26 @@ def bytes(s: String): Array[Byte] = s.getBytes(charset) + def relevant(s: CharSequence): Boolean = { + var i = 0 + val n = s.length + var utf8 = false + while (i < n && !utf8) { + if (s.charAt(i) >= 128) { utf8 = true } + i += 1 + } + utf8 + } + /* permissive UTF-8 decoding */ // see also https://en.wikipedia.org/wiki/UTF-8#Description // overlong encodings enable byte-stuffing of low-ASCII - def decode_permissive(text: CharSequence): String = { - val len = text.length - val buf = new java.lang.StringBuilder(len) + def decode_permissive_bytes(bytes: Bytes.Vec): String = { + val size = bytes.size + val buf = new java.lang.StringBuilder((size min Space.GiB(1).bytes).toInt) var code = -1 var rest = 0 def flush(): Unit = { @@ -50,8 +61,8 @@ rest -= 1 } } - for (i <- 0 until len) { - val c = text.charAt(i) + for (i <- 0L until size) { + val c: Char = bytes(i) if (c < 128) { flush(); buf.append(c) } else if ((c & 0xC0) == 0x80) push(c & 0x3F) else if ((c & 0xE0) == 0xC0) init(c & 0x1F, 1) @@ -61,4 +72,8 @@ flush() buf.toString } + + def decode_permissive(text: String): String = + if (relevant(text)) decode_permissive_bytes(new Bytes.Vec_String(text)) + else text } diff -r 2503ff5d29ce -r 94875d8cc8bd src/Pure/ML/ml_heap.scala --- a/src/Pure/ML/ml_heap.scala Wed Jun 12 10:47:53 2024 +0200 +++ b/src/Pure/ML/ml_heap.scala Sun Jun 16 21:54:09 2024 +0200 @@ -16,7 +16,7 @@ def read_file_digest(heap: Path): Option[SHA1.Digest] = { if (heap.is_file) { val bs = Bytes.read_file(heap, offset = File.size(heap) - sha1_length) - if (bs.length == sha1_length) { + if (bs.size == sha1_length) { val s = bs.text if (s.startsWith(sha1_prefix)) Some(SHA1.fake_digest(s.substring(sha1_prefix.length))) else None diff -r 2503ff5d29ce -r 94875d8cc8bd src/Pure/PIDE/byte_message.scala --- a/src/Pure/PIDE/byte_message.scala Wed Jun 12 10:47:53 2024 +0200 +++ b/src/Pure/PIDE/byte_message.scala Sun Jun 16 21:54:09 2024 +0200 @@ -6,7 +6,7 @@ package isabelle -import java.io.{ByteArrayOutputStream, OutputStream, InputStream, IOException} +import java.io.{OutputStream, InputStream, IOException} object Byte_Message { @@ -27,45 +27,44 @@ /* input operations */ - def read(stream: InputStream, n: Int): Bytes = + def read(stream: InputStream, n: Long): Bytes = Bytes.read_stream(stream, limit = n) - def read_block(stream: InputStream, n: Int): (Option[Bytes], Int) = { + def read_block(stream: InputStream, n: Long): (Option[Bytes], Long) = { val msg = read(stream, n) - val len = msg.length + val len = msg.size (if (len == n) Some(msg) else None, len) } def read_line(stream: InputStream): Option[Bytes] = { - val line = new ByteArrayOutputStream(100) var c = 0 - while ({ c = stream.read; c != -1 && c != 10 }) line.write(c) - - if (c == -1 && line.size == 0) None - else { - val a = line.toByteArray - val n = a.length - val len = if (n > 0 && a(n - 1) == 13) n - 1 else n - Some(Bytes(a, 0, len)) - } + val line = + Bytes.Builder.use(hint = 100) { builder => + while ({ c = stream.read; c != -1 && c != 10 }) { + builder += c.toByte + } + } + if (c == -1 && line.size == 0) None else Some(line.trim_line) } /* messages with multiple chunks (arbitrary content) */ - private def make_header(ns: List[Int]): List[Bytes] = + private def make_header(ns: List[Long]): List[Bytes] = List(Bytes(ns.mkString(",")), Bytes.newline) def write_message(stream: OutputStream, chunks: List[Bytes]): Unit = { - write(stream, make_header(chunks.map(_.length)) ::: chunks) + write(stream, make_header(chunks.map(_.size)) ::: chunks) flush(stream) } - private def parse_header(line: String): List[Int] = - try { space_explode(',', line).map(Value.Nat.parse) } - catch { case ERROR(_) => error("Malformed message header: " + quote(line)) } + private def parse_header(line: String): List[Long] = { + val args = space_explode(',', line) + if (args.forall(is_length)) args.map(Value.Long.parse) + else error("Malformed message header: " + quote(line)) + } - private def read_chunk(stream: InputStream, n: Int): Bytes = + private def read_chunk(stream: InputStream, n: Long): Bytes = read_block(stream, n) match { case (Some(chunk), _) => chunk case (None, len) => @@ -78,32 +77,39 @@ /* hybrid messages: line or length+block (restricted content) */ + private def is_length(str: String): Boolean = + !str.isEmpty && str.iterator.forall(Symbol.is_ascii_digit) && + Value.Long.unapply(str).isDefined + private def is_length(msg: Bytes): Boolean = - !msg.is_empty && msg.iterator.forall(b => Symbol.is_ascii_digit(b.toChar)) + !msg.is_empty && msg.byte_iterator.forall(b => Symbol.is_ascii_digit(b.toChar)) && + Value.Long.unapply(msg.text).isDefined - private def is_terminated(msg: Bytes): Boolean = { - val len = msg.length - len > 0 && Symbol.is_ascii_line_terminator(msg.charAt(len - 1)) - } + private def is_terminated(msg: Bytes): Boolean = + msg.size match { + case size if size > 0 => + val c = msg(size - 1) + c <= Char.MaxValue && Symbol.is_ascii_line_terminator(c.toChar) + case _ => false + } def write_line_message(stream: OutputStream, msg: Bytes): Unit = { - if (is_length(msg) || is_terminated(msg)) + if (is_length(msg) || is_terminated(msg)) { error ("Bad content for line message:\n" ++ msg.text.take(100)) + } - val n = msg.length + val n = msg.size write(stream, - (if (n > 100 || msg.iterator.contains(10)) make_header(List(n + 1)) else Nil) ::: + (if (n > 100 || msg.byte_iterator.contains(10)) make_header(List(n + 1)) else Nil) ::: List(msg, Bytes.newline)) flush(stream) } def read_line_message(stream: InputStream): Option[Bytes] = read_line(stream) match { - case None => None - case Some(line) => - Value.Nat.unapply(line.text) match { - case None => Some(line) - case Some(n) => read_block(stream, n)._1.map(_.trim_line) - } + case Some(line) if is_length(line) => + val n = Value.Long.parse(line.text) + read_block(stream, n)._1.map(_.trim_line) + case res => res } } diff -r 2503ff5d29ce -r 94875d8cc8bd src/Pure/PIDE/prover.scala --- a/src/Pure/PIDE/prover.scala Wed Jun 12 10:47:53 2024 +0200 +++ b/src/Pure/PIDE/prover.scala Sun Jun 16 21:54:09 2024 +0200 @@ -194,7 +194,7 @@ { case chunks => try { - Bytes(chunks.map(_.length).mkString("", ",", "\n")).write_stream(stream) + Bytes(chunks.map(_.size).mkString("", ",", "\n")).write_stream(stream) chunks.foreach(_.write_stream(stream)) stream.flush true @@ -288,7 +288,7 @@ command_input match { case Some(thread) if thread.is_active() => if (trace) { - val payload = args.foldLeft(0) { case (n, b) => n + b.length } + val payload = args.foldLeft(0L) { case (n, b) => n + b.size } Output.writeln( "protocol_command " + name + ", args = " + args.length + ", payload = " + payload) } diff -r 2503ff5d29ce -r 94875d8cc8bd src/Pure/ROOT.ML --- a/src/Pure/ROOT.ML Wed Jun 12 10:47:53 2024 +0200 +++ b/src/Pure/ROOT.ML Sun Jun 16 21:54:09 2024 +0200 @@ -372,3 +372,4 @@ ML_file "Tools/jedit.ML"; ML_file "Tools/ghc.ML"; ML_file "Tools/generated_files.ML"; + diff -r 2503ff5d29ce -r 94875d8cc8bd src/Pure/System/bash.scala --- a/src/Pure/System/bash.scala Wed Jun 12 10:47:53 2024 +0200 +++ b/src/Pure/System/bash.scala Sun Jun 16 21:54:09 2024 +0200 @@ -36,7 +36,7 @@ } def string(s: String): String = - if (s == "") "\"\"" + if (s.isEmpty) "\"\"" else UTF8.bytes(s).iterator.map(bash_chr).mkString def strings(ss: Iterable[String]): String = diff -r 2503ff5d29ce -r 94875d8cc8bd src/Pure/System/isabelle_system.scala --- a/src/Pure/System/isabelle_system.scala Wed Jun 12 10:47:53 2024 +0200 +++ b/src/Pure/System/isabelle_system.scala Sun Jun 16 21:54:09 2024 +0200 @@ -474,7 +474,7 @@ else { val bytes = using(zip_file.getInputStream(entry))(Bytes.read_stream(_)) Files.createDirectories(res.getParent) - Files.write(res, bytes.array) + Files.write(res, bytes.make_array) } } (entry, result) diff -r 2503ff5d29ce -r 94875d8cc8bd src/Pure/System/web_app.scala --- a/src/Pure/System/web_app.scala Wed Jun 12 10:47:53 2024 +0200 +++ b/src/Pure/System/web_app.scala Sun Jun 16 21:54:09 2024 +0200 @@ -120,12 +120,12 @@ // text might be shorter than bytes because of misinterpreted characters var found = false - var bytes_i = 0 - while (!found && bytes_i < bytes.length) { + var bytes_i = 0L + while (!found && bytes_i < bytes.size) { var sep_ok = true var sep_i = 0 while (sep_ok && sep_i < sep.length) { - if (bytes.charAt(bytes_i + sep_i) == sep.charAt(sep_i)) { + if (bytes(bytes_i + sep_i) == sep(sep_i)) { sep_i += 1 } else { bytes_i += 1 @@ -135,8 +135,8 @@ if (sep_ok) found = true } - val before_bytes = bytes.subSequence(0, bytes_i) - val after_bytes = bytes.subSequence(bytes_i + sep.length, bytes.length) + val before_bytes = bytes.slice(0, bytes_i) + val after_bytes = bytes.slice(bytes_i + sep.length, bytes.size) Some(Seq(before_text, before_bytes), Seq(after_text, after_bytes)) } else None @@ -147,7 +147,7 @@ def perhaps_unprefix(pfx: String, s: Seq): Seq = Library.try_unprefix(pfx, s.text) match { - case Some(text) => Seq(text, s.bytes.subSequence(pfx.length, s.bytes.length)) + case Some(text) => Seq(text, s.bytes.slice(pfx.length, s.bytes.size)) case None => s } diff -r 2503ff5d29ce -r 94875d8cc8bd src/Tools/VSCode/src/channel.scala --- a/src/Tools/VSCode/src/channel.scala Wed Jun 12 10:47:53 2024 +0200 +++ b/src/Tools/VSCode/src/channel.scala Sun Jun 16 21:54:09 2024 +0200 @@ -9,7 +9,7 @@ import isabelle._ -import java.io.{InputStream, OutputStream, FileOutputStream, ByteArrayOutputStream, File => JFile} +import java.io.{InputStream, OutputStream, FileOutputStream, File => JFile} import scala.collection.mutable @@ -39,8 +39,8 @@ private def read_content(n: Int): String = { val bytes = Bytes.read_stream(in, limit = n) - if (bytes.length == n) bytes.text - else error("Bad message content (unexpected EOF after " + bytes.length + " of " + n + " bytes)") + if (bytes.size == n) bytes.text + else error("Bad message content (unexpected EOF after " + bytes.size + " of " + n + " bytes)") } def read(): Option[JSON.T] = { diff -r 2503ff5d29ce -r 94875d8cc8bd src/Tools/VSCode/src/component_vscodium.scala --- a/src/Tools/VSCode/src/component_vscodium.scala Wed Jun 12 10:47:53 2024 +0200 +++ b/src/Tools/VSCode/src/component_vscodium.scala Sun Jun 16 21:54:09 2024 +0200 @@ -229,7 +229,7 @@ // function computeChecksum(filename) private def file_checksum(path: Path): String = { val digest = MessageDigest.getInstance("MD5") - digest.update(Bytes.read(path).array) + digest.update(Bytes.read(path).make_array) Bytes(Base64.getEncoder.encode(digest.digest())) .text.replaceAll("=", "") } diff -r 2503ff5d29ce -r 94875d8cc8bd src/Tools/jEdit/src/isabelle_export.scala --- a/src/Tools/jEdit/src/isabelle_export.scala Wed Jun 12 10:47:53 2024 +0200 +++ b/src/Tools/jEdit/src/isabelle_export.scala Sun Jun 16 21:54:09 2024 +0200 @@ -56,7 +56,7 @@ } yield { val is_dir = entry.name_elems.length > depth val path = Export.implode_name(theory :: entry.name_elems.take(depth)) - val file_size = if (is_dir) None else Some(entry.bytes.length.toLong) + val file_size = if (is_dir) None else Some(entry.bytes.size) (path, file_size) }).toSet.toList (for ((path, file_size) <- entries.iterator) yield {