--- a/src/HOL/Tools/Mirabelle/mirabelle.scala Wed Jun 12 10:47:53 2024 +0200
+++ b/src/HOL/Tools/Mirabelle/mirabelle.scala Sun Jun 16 21:54:09 2024 +0200
@@ -72,7 +72,7 @@
progress.echo(
"Mirabelle export " + quote(args.compound_name) + " (in " + session_name + ")",
verbose = true)
- val yxml = YXML.parse_body(UTF8.decode_permissive(msg.chunk), cache = build_results0.cache)
+ val yxml = YXML.parse_body(msg.chunk.text, cache = build_results0.cache)
val lines = Pretty.string_of(yxml).trim()
val prefix =
Export.explode_name(args.name) match {
--- a/src/Pure/Build/build.scala Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/Build/build.scala Sun Jun 16 21:54:09 2024 +0200
@@ -715,7 +715,7 @@
unicode_symbols: Boolean = false
): Option[Document.Snapshot] = {
def decode_bytes(bytes: Bytes): String =
- Symbol.output(unicode_symbols, UTF8.decode_permissive(bytes))
+ Symbol.output(unicode_symbols, bytes.text)
def read(name: String): Export.Entry = theory_context(name, permissive = true)
--- a/src/Pure/Build/build_manager.scala Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/Build/build_manager.scala Sun Jun 16 21:54:09 2024 +0200
@@ -1179,7 +1179,7 @@
val head =
List(
HTML.title("Isabelle Build Manager"),
- Web_App.More_HTML.icon("data:image/x-icon;base64," + logo.encode_base64),
+ Web_App.More_HTML.icon("data:image/x-icon;base64," + logo.encode_base64.text),
HTML.style_file("https://hawkz.github.io/gdcss/gd.css"),
HTML.style("html { background-color: white; }"))
}
--- a/src/Pure/Build/export.scala Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/Build/export.scala Sun Jun 16 21:54:09 2024 +0200
@@ -194,7 +194,7 @@
def text: String = bytes.text
- def yxml: XML.Body = YXML.parse_body(UTF8.decode_permissive(bytes), cache = cache)
+ def yxml: XML.Body = YXML.parse_body(bytes.text, cache = cache)
}
def make_regex(pattern: String): Regex = {
--- a/src/Pure/General/base64.scala Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/General/base64.scala Sun Jun 16 21:54:09 2024 +0200
@@ -1,26 +1,34 @@
/* Title: Pure/General/base64.scala
Author: Makarius
-Support for Base64 data encoding.
+Support for Base64 data representation.
*/
package isabelle
+import java.io.{InputStream, OutputStream}
+
+
object Base64 {
+ /* main operations */
+
def encode(a: Array[Byte]): String = java.util.Base64.getEncoder.encodeToString(a)
def decode(s: String): Array[Byte] = java.util.Base64.getDecoder.decode(s)
+ def encode_stream(s: OutputStream): OutputStream = java.util.Base64.getEncoder.wrap(s)
+ def decode_stream(s: InputStream): InputStream = java.util.Base64.getDecoder.wrap(s)
+
/* Scala functions in ML */
object Decode extends Scala.Fun_Bytes("Base64.decode") {
val here = Scala_Project.here
- def apply(arg: Bytes): Bytes = Bytes.decode_base64(arg.text)
+ def apply(bytes: Bytes): Bytes = bytes.decode_base64
}
object Encode extends Scala.Fun_Bytes("Base64.encode") {
val here = Scala_Project.here
- def apply(arg: Bytes): Bytes = Bytes(arg.encode_base64)
+ def apply(bytes: Bytes): Bytes = bytes.encode_base64
}
}
--- a/src/Pure/General/bytes.scala Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/General/bytes.scala Sun Jun 16 21:54:09 2024 +0200
@@ -16,80 +16,92 @@
import org.tukaani.xz
import com.github.luben.zstd
+import scala.collection.mutable.ArrayBuffer
+
object Bytes {
- val empty: Bytes = new Bytes(Array[Byte](), 0, 0)
+ /* internal sizes */
+
+ private val array_size: Long = Int.MaxValue - 8 // see java.io.InputStream.MAX_BUFFER_SIZE
+ private val block_size: Int = 16384 // see java.io.InputStream.DEFAULT_BUFFER_SIZE
+ private val chunk_size: Long = Space.MiB(100).bytes
+
+ private def make_size(chunks: Array[Array[Byte]], buffer: Array[Byte]): Long =
+ chunks.foldLeft(buffer.length.toLong)((n, chunk) => n + chunk.length)
- def apply(s: CharSequence): Bytes = {
- val str = s.toString
- if (str.isEmpty) empty
- else {
- val b = UTF8.bytes(str)
- new Bytes(b, 0, b.length)
- }
+ class Too_Large(size: Long) extends IndexOutOfBoundsException {
+ override def getMessage: String =
+ "Bytes too large for particular operation: " +
+ Space.bytes(size).print + " > " + Space.bytes(array_size).print
}
+
+ /* main constructors */
+
+ private def reuse_array(bytes: Array[Byte]): Bytes =
+ if (bytes.length <= chunk_size) new Bytes(None, bytes, 0L, bytes.length.toLong)
+ else apply(bytes)
+
+ val empty: Bytes = reuse_array(new Array(0))
+
+ def apply(s: CharSequence): Bytes =
+ if (s.isEmpty) empty
+ else Builder.use(hint = s.length) { builder => builder += s }
+
def apply(a: Array[Byte]): Bytes = apply(a, 0, a.length)
def apply(a: Array[Byte], offset: Int, length: Int): Bytes =
- if (length == 0) empty
- else {
- val b = new Array[Byte](length)
- System.arraycopy(a, offset, b, 0, length)
- new Bytes(b, 0, b.length)
- }
+ Builder.use(hint = length) { builder => builder += (a, offset, length) }
val newline: Bytes = apply("\n")
- /* base64 */
-
- def decode_base64(s: String): Bytes = {
- val a = Base64.decode(s)
- new Bytes(a, 0, a.length)
- }
-
-
/* read */
- def read_stream(stream: InputStream, limit: Int = Int.MaxValue, hint: Int = 1024): Bytes =
+ def read_stream(stream: InputStream, limit: Long = -1L, hint: Long = 0L): Bytes = {
if (limit == 0) empty
else {
- val out_size = (if (limit == Int.MaxValue) hint else limit) max 1024
- val out = new ByteArrayOutputStream(out_size)
- val buf = new Array[Byte](8192)
- var m = 0
-
- while ({
- m = stream.read(buf, 0, buf.length min (limit - out.size))
- if (m != -1) out.write(buf, 0, m)
- m != -1 && limit > out.size
- }) ()
-
- new Bytes(out.toByteArray, 0, out.size)
+ Builder.use(hint = if (limit > 0) limit else hint) { builder =>
+ val buf = new Array[Byte](block_size)
+ var m = 0
+ var n = 0L
+ while ({
+ val l = if (limit > 0) ((limit - n) min buf.length).toInt else buf.length
+ m = stream.read(buf, 0, l)
+ if (m != -1) {
+ builder += (buf, 0, m)
+ n += m
+ }
+ m != -1 && (limit < 0 || limit > n)
+ }) ()
+ }
}
+ }
def read_url(name: String): Bytes = using(Url(name).open_stream())(read_stream(_))
- def read_file(path: Path, offset: Long = 0L, limit: Long = Long.MaxValue): Bytes = {
+ def read_file(path: Path, offset: Long = 0L, limit: Long = -1L): Bytes = {
val length = File.size(path)
val start = offset.max(0L)
- val len = (length - start).max(0L).min(limit)
- if (len > Int.MaxValue) error("Cannot read large file slice: " + Space.bytes(len).print)
- else if (len == 0L) empty
+ val stop = (length - start).max(0L).min(if (limit < 0) Long.MaxValue else limit)
+ if (stop == 0L) empty
else {
- using(FileChannel.open(path.java_path, StandardOpenOption.READ)) { java_path =>
- java_path.position(start)
- val n = len.toInt
- val buf = ByteBuffer.allocate(n)
- var i = 0
- var m = 0
- while ({
- m = java_path.read(buf)
- if (m != -1) i += m
- m != -1 && n > i
- }) ()
- new Bytes(buf.array, 0, i)
+ Builder.use(hint = stop) { builder =>
+ using(FileChannel.open(path.java_path, StandardOpenOption.READ)) { channel =>
+ channel.position(start)
+ val buf = ByteBuffer.allocate(block_size)
+ var m = 0
+ var n = 0L
+ while ({
+ m = channel.read(buf)
+ if (m != -1) {
+ builder += (buf.array(), 0, m)
+ buf.clear()
+ n += m
+ }
+ m != -1 && stop > n
+ }) ()
+ }
}
}
}
@@ -112,147 +124,415 @@
using(new FileOutputStream(file, true))(bytes.write_stream(_))
def append(path: Path, bytes: Bytes): Unit = append(path.file, bytes)
+
+
+ /* vector of short unsigned integers */
+
+ trait Vec {
+ def size: Long
+ def apply(i: Long): Char
+ }
+
+ class Vec_String(string: String) extends Vec {
+ override def size: Long = string.length.toLong
+ override def apply(i: Long): Char =
+ if (0 <= i && i < size) string(i.toInt)
+ else throw new IndexOutOfBoundsException
+ }
+
+
+ /* incremental builder: unsynchronized! */
+
+ object Builder {
+ def use(hint: Long = 0L)(body: Builder => Unit): Bytes = {
+ val builder = new Builder(hint)
+ body(builder)
+ builder.done()
+ }
+
+ def use_stream(hint: Long = 0L)(body: OutputStream => Unit): Bytes = {
+ val stream = new Stream(hint = hint)
+ body(stream)
+ stream.builder.done()
+ }
+
+ private class Stream(hint: Long = 0L) extends OutputStream {
+ val builder = new Builder(hint)
+
+ override def write(b: Int): Unit =
+ { builder += b.toByte }
+
+ override def write(array: Array[Byte], offset: Int, length: Int): Unit =
+ { builder += (array, offset, length) }
+ }
+ }
+
+ final class Builder private[Bytes](hint: Long) {
+ private var size = 0L
+ private var chunks =
+ new ArrayBuffer[Array[Byte]](if (hint <= 0) 16 else (hint / chunk_size).toInt)
+ private var buffer =
+ new ByteArrayOutputStream(if (hint <= 0) 1024 else (hint min chunk_size min array_size).toInt)
+
+ private def buffer_free(): Int = chunk_size.toInt - buffer.size()
+ private def buffer_check(): Unit =
+ if (buffer_free() == 0) {
+ chunks += buffer.toByteArray
+ buffer = new ByteArrayOutputStream(chunk_size.toInt)
+ }
+
+ def += (b: Byte): Unit = {
+ size += 1
+ buffer.write(b)
+ buffer_check()
+ }
+
+ def += (s: CharSequence): Unit = {
+ val n = s.length
+ if (n > 0) {
+ if (UTF8.relevant(s)) { this += UTF8.bytes(s.toString) }
+ else {
+ var i = 0
+ while (i < n) {
+ buffer.write(s.charAt(i).toByte)
+ size += 1
+ i += 1
+ buffer_check()
+ }
+ }
+ }
+ }
+
+ def += (array: Array[Byte], offset: Int, length: Int): Unit = {
+ if (offset < 0 || length < 0 || offset.toLong + length.toLong > array.length) {
+ throw new IndexOutOfBoundsException
+ }
+ else if (length > 0) {
+ var i = offset
+ var n = length
+ while (n > 0) {
+ val m = buffer_free()
+ if (m > 0) {
+ val l = m min n
+ buffer.write(array, i, l)
+ size += l
+ i += l
+ n -= l
+ }
+ buffer_check()
+ }
+ }
+ }
+
+ def += (array: Array[Byte]): Unit = { this += (array, 0, array.length) }
+
+ def += (a: Subarray): Unit = { this += (a.array, a.offset, a.length) }
+
+ private def done(): Bytes = {
+ val cs = chunks.toArray
+ val b = buffer.toByteArray
+ chunks = null
+ buffer = null
+ new Bytes(if (cs.isEmpty) None else Some(cs), b, 0L, size)
+ }
+ }
+
+
+ /* subarray */
+
+ object Subarray {
+ val empty: Subarray = new Subarray(new Array[Byte](0), 0, 0)
+
+ def apply(array: Array[Byte], offset: Int, length: Int): Subarray = {
+ val n = array.length
+ if (0 <= offset && offset < n && 0 <= length && offset + length <= n) {
+ if (length == 0) empty
+ else new Subarray(array, offset, length)
+ }
+ else throw new IndexOutOfBoundsException
+ }
+ }
+
+ final class Subarray private(
+ val array: Array[Byte],
+ val offset: Int,
+ val length: Int
+ ) {
+ override def toString: String = "Bytes.Subarray(" + Space.bytes(length).print + ")"
+
+ def byte_iterator: Iterator[Byte] =
+ if (length == 0) Iterator.empty
+ else { for (i <- (offset until (offset + length)).iterator) yield array(i) }
+ }
}
final class Bytes private(
- protected val bytes: Array[Byte],
- protected val offset: Int,
- val length: Int) extends CharSequence {
- /* equality */
+ protected val chunks: Option[Array[Array[Byte]]],
+ protected val chunk0: Array[Byte],
+ protected val offset: Long,
+ val size: Long
+) extends Bytes.Vec {
+ assert(
+ (chunks.isEmpty ||
+ chunks.get.nonEmpty &&
+ chunks.get.forall(chunk => chunk.length == Bytes.chunk_size)) &&
+ chunk0.length < Bytes.chunk_size)
+
+ def small_size: Int =
+ if (size > Bytes.array_size) throw new Bytes.Too_Large(size)
+ else size.toInt
+
+ def is_empty: Boolean = size == 0
+
+ def proper: Option[Bytes] = if (is_empty) None else Some(this)
+
+ def is_sliced: Boolean =
+ offset != 0L || {
+ chunks match {
+ case None => size != chunk0.length
+ case Some(cs) => size != Bytes.make_size(cs, chunk0)
+ }
+ }
+
+ override def toString: String =
+ if (is_empty) "Bytes.empty"
+ else "Bytes(" + Space.bytes(size).print + if_proper(is_sliced, ", sliced") + ")"
+
+
+ /* elements: signed Byte or unsigned Char */
+
+ protected def byte_unchecked(i: Long): Byte = {
+ val a = offset + i
+ chunks match {
+ case None => chunk0(a.toInt)
+ case Some(cs) =>
+ val b = a % Bytes.chunk_size
+ val c = a / Bytes.chunk_size
+ if (c < cs.length) cs(c.toInt)(b.toInt) else chunk0(b.toInt)
+ }
+ }
+
+ def byte(i: Long): Byte =
+ if (0 <= i && i < size) byte_unchecked(i)
+ else throw new IndexOutOfBoundsException
+
+ def apply(i: Long): Char = (byte(i).toInt & 0xff).toChar
+
+ protected def subarray_iterator: Iterator[Bytes.Subarray] =
+ if (is_empty) Iterator.empty
+ else if (chunks.isEmpty) Iterator(Bytes.Subarray(chunk0, offset.toInt, size.toInt))
+ else {
+ val end_offset = offset + size
+ for ((array, index) <- (chunks.get.iterator ++ Iterator(chunk0)).zipWithIndex) yield {
+ val array_start = Bytes.chunk_size * index
+ val array_stop = array_start + array.length
+ if (offset < array_stop && array_start < end_offset) {
+ val i = (array_start max offset) - array_start
+ val j = (array_stop min end_offset) - array_start
+ Bytes.Subarray(array, i.toInt, (j - i).toInt)
+ }
+ else Bytes.Subarray.empty
+ }
+ }
+
+ def byte_iterator: Iterator[Byte] =
+ for {
+ a <- subarray_iterator
+ b <- a.byte_iterator
+ } yield b
+
+
+ /* slice */
+
+ def slice(i: Long, j: Long): Bytes =
+ if (0 <= i && i <= j && j <= size) {
+ if (i == j) Bytes.empty
+ else new Bytes(chunks, chunk0, offset + i, j - i)
+ }
+ else throw new IndexOutOfBoundsException
+
+ def unslice: Bytes =
+ if (is_sliced) {
+ Bytes.Builder.use(hint = size) { builder =>
+ for (a <- subarray_iterator) { builder += a }
+ }
+ }
+ else this
+
+ def trim_line: Bytes =
+ if (size >= 2 && byte_unchecked(size - 2) == 13 && byte_unchecked(size - 1) == 10) {
+ slice(0, size - 2)
+ }
+ else if (size >= 1 && (byte_unchecked(size - 1) == 13 || byte_unchecked(size - 1) == 10)) {
+ slice(0, size - 1)
+ }
+ else this
+
+
+ /* hash and equality */
+
+ lazy val sha1_digest: SHA1.Digest =
+ if (is_empty) SHA1.digest_empty
+ else {
+ SHA1.make_digest { sha =>
+ for (a <- subarray_iterator if a.length > 0) {
+ sha.update(a.array, a.offset, a.length)
+ }
+ }
+ }
+
+ override def hashCode(): Int = sha1_digest.hashCode()
override def equals(that: Any): Boolean = {
that match {
case other: Bytes =>
- this.eq(other) ||
- Arrays.equals(bytes, offset, offset + length,
- other.bytes, other.offset, other.offset + other.length)
+ if (this.eq(other)) true
+ else if (size != other.size) false
+ else {
+ if (chunks.isEmpty && other.chunks.isEmpty) {
+ Arrays.equals(chunk0, offset.toInt, (offset + size).toInt,
+ other.chunk0, other.offset.toInt, (other.offset + other.size).toInt)
+ }
+ else if (!is_sliced && !other.is_sliced) {
+ (subarray_iterator zip other.subarray_iterator)
+ .forall((a, b) => Arrays.equals(a.array, b.array))
+ }
+ else sha1_digest == other.sha1_digest
+ }
case _ => false
}
}
- private lazy val hash: Int = {
- var h = 0
- for (i <- offset until offset + length) {
- val b = bytes(i).asInstanceOf[Int] & 0xFF
- h = 31 * h + b
- }
- h
- }
-
- override def hashCode(): Int = hash
-
/* content */
- lazy val sha1_digest: SHA1.Digest = SHA1.digest(bytes)
-
- def is_empty: Boolean = length == 0
+ def + (other: Bytes): Bytes =
+ if (other.is_empty) this
+ else if (is_empty) other
+ else {
+ Bytes.Builder.use(hint = size + other.size) { builder =>
+ for (a <- subarray_iterator ++ other.subarray_iterator) {
+ builder += a
+ }
+ }
+ }
- def iterator: Iterator[Byte] =
- for (i <- (offset until (offset + length)).iterator)
- yield bytes(i)
-
- def array: Array[Byte] = {
- val a = new Array[Byte](length)
- System.arraycopy(bytes, offset, a, 0, length)
- a
+ def make_array: Array[Byte] = {
+ val buf = new ByteArrayOutputStream(small_size)
+ for (a <- subarray_iterator) { buf.write(a.array, a.offset, a.length) }
+ buf.toByteArray
}
- def text: String = UTF8.decode_permissive(this)
+ def text: String =
+ if (is_empty) ""
+ else {
+ var i = 0L
+ var utf8 = false
+ while (i < size && !utf8) {
+ if (byte_unchecked(i) < 0) { utf8 = true }
+ i += 1
+ }
+ utf8
- def wellformed_text: Option[String] = {
- val s = text
- if (this == Bytes(s)) Some(s) else None
- }
+ if (utf8) UTF8.decode_permissive_bytes(this)
+ else new String(make_array, UTF8.charset)
+ }
- def encode_base64: String = {
- val b =
- if (offset == 0 && length == bytes.length) bytes
- else Bytes(bytes, offset, length).bytes
- Base64.encode(b)
- }
+ def wellformed_text: Option[String] =
+ try {
+ val s = text
+ if (this == Bytes(s)) Some(s) else None
+ }
+ catch { case ERROR(_) => None }
+
+
+ /* Base64 data representation */
+
+ def encode_base64: Bytes =
+ Bytes.Builder.use_stream(hint = (size * 1.5).round) { out =>
+ using(Base64.encode_stream(out))(write_stream(_))
+ }
+
+ def decode_base64: Bytes =
+ using(Base64.decode_stream(stream()))(Bytes.read_stream(_, hint = (size / 1.2).round))
def maybe_encode_base64: (Boolean, String) =
wellformed_text match {
case Some(s) => (false, s)
- case None => (true, encode_base64)
- }
-
- override def toString: String =
- if (is_empty) "Bytes.empty" else "Bytes(" + Space.bytes(length).print + ")"
-
- def proper: Option[Bytes] = if (is_empty) None else Some(this)
- def proper_text: Option[String] = if (is_empty) None else Some(text)
-
- def +(other: Bytes): Bytes =
- if (other.is_empty) this
- else if (is_empty) other
- else {
- val new_bytes = new Array[Byte](length + other.length)
- System.arraycopy(bytes, offset, new_bytes, 0, length)
- System.arraycopy(other.bytes, other.offset, new_bytes, length, other.length)
- new Bytes(new_bytes, 0, new_bytes.length)
+ case None => (true, encode_base64.text)
}
- /* CharSequence operations */
-
- def charAt(i: Int): Char =
- if (0 <= i && i < length) (bytes(offset + i).asInstanceOf[Int] & 0xFF).asInstanceOf[Char]
- else throw new IndexOutOfBoundsException
-
- def subSequence(i: Int, j: Int): Bytes = {
- if (0 <= i && i <= j && j <= length) new Bytes(bytes, offset + i, j - i)
- else throw new IndexOutOfBoundsException
- }
-
- def trim_line: Bytes =
- if (length >= 2 && charAt(length - 2) == 13 && charAt(length - 1) == 10) {
- subSequence(0, length - 2)
- }
- else if (length >= 1 && (charAt(length - 1) == 13 || charAt(length - 1) == 10)) {
- subSequence(0, length - 1)
- }
- else this
-
-
/* streams */
- def stream(): ByteArrayInputStream = new ByteArrayInputStream(bytes, offset, length)
+ def stream(): InputStream =
+ if (chunks.isEmpty) new ByteArrayInputStream(chunk0, offset.toInt, size.toInt)
+ else {
+ new InputStream {
+ private var index = 0L
+
+ def read(): Int = {
+ if (index < size) {
+ val res = byte_unchecked(index).toInt & 0xff
+ index += 1
+ res
+ }
+ else -1
+ }
- def write_stream(stream: OutputStream): Unit = stream.write(bytes, offset, length)
+ override def read(buffer: Array[Byte], start: Int, length: Int): Int = {
+ if (length < 16) super.read(buffer, start, length)
+ else {
+ val index0 = index
+ index = (index + length) min size
+ val n = (index - index0).toInt
+ if (n == 0) -1
+ else {
+ var i = start
+ for (a <- slice(index0, index).subarray_iterator) {
+ val l = a.length
+ if (l > 0) {
+ System.arraycopy(a.array, a.offset, buffer, i, l)
+ i += l
+ }
+ }
+ n
+ }
+ }
+ }
+ }
+ }
+
+ def write_stream(stream: OutputStream): Unit =
+ for (a <- subarray_iterator if a.length > 0) {
+ stream.write(a.array, a.offset, a.length)
+ }
/* XZ / Zstd data compression */
def detect_xz: Boolean =
- length >= 6 &&
- bytes(offset) == 0xFD.toByte &&
- bytes(offset + 1) == 0x37.toByte &&
- bytes(offset + 2) == 0x7A.toByte &&
- bytes(offset + 3) == 0x58.toByte &&
- bytes(offset + 4) == 0x5A.toByte &&
- bytes(offset + 5) == 0x00.toByte
+ size >= 6 &&
+ byte_unchecked(0) == 0xFD.toByte &&
+ byte_unchecked(1) == 0x37.toByte &&
+ byte_unchecked(2) == 0x7A.toByte &&
+ byte_unchecked(3) == 0x58.toByte &&
+ byte_unchecked(4) == 0x5A.toByte &&
+ byte_unchecked(5) == 0x00.toByte
def detect_zstd: Boolean =
- length >= 4 &&
- bytes(offset) == 0x28.toByte &&
- bytes(offset + 1) == 0xB5.toByte &&
- bytes(offset + 2) == 0x2F.toByte &&
- bytes(offset + 3) == 0xFD.toByte
+ size >= 4 &&
+ byte_unchecked(0) == 0x28.toByte &&
+ byte_unchecked(1) == 0xB5.toByte &&
+ byte_unchecked(2) == 0x2F.toByte &&
+ byte_unchecked(3) == 0xFD.toByte
def uncompress_xz(cache: Compress.Cache = Compress.Cache.none): Bytes =
- using(new xz.XZInputStream(stream(), cache.for_xz))(Bytes.read_stream(_, hint = length))
+ using(new xz.XZInputStream(stream(), cache.for_xz))(Bytes.read_stream(_, hint = size))
def uncompress_zstd(cache: Compress.Cache = Compress.Cache.none): Bytes = {
Zstd.init()
- val n = zstd.Zstd.decompressedSize(bytes, offset, length)
- if (n > 0 && n < Int.MaxValue) {
- Bytes(zstd.Zstd.decompress(array, n.toInt))
- }
- else {
- using(new zstd.ZstdInputStream(stream(), cache.for_zstd))(Bytes.read_stream(_, hint = length))
- }
+ using(new zstd.ZstdInputStream(stream(), cache.for_zstd))(Bytes.read_stream(_, hint = size))
}
def uncompress(cache: Compress.Cache = Compress.Cache.none): Bytes =
@@ -264,14 +544,15 @@
options: Compress.Options = Compress.Options(),
cache: Compress.Cache = Compress.Cache.none
): Bytes = {
- options match {
- case options_xz: Compress.Options_XZ =>
- val result = new ByteArrayOutputStream(length)
- using(new xz.XZOutputStream(result, options_xz.make, cache.for_xz))(write_stream)
- new Bytes(result.toByteArray, 0, result.size)
- case options_zstd: Compress.Options_Zstd =>
- Zstd.init()
- Bytes(zstd.Zstd.compress(if (offset == 0) bytes else array, options_zstd.level))
+ Bytes.Builder.use_stream(hint = size) { out =>
+ using(
+ options match {
+ case options_xz: Compress.Options_XZ =>
+ new xz.XZOutputStream(out, options_xz.make, cache.for_xz)
+ case options_zstd: Compress.Options_Zstd =>
+ new zstd.ZstdOutputStream(out, cache.for_zstd, options_zstd.level)
+ }
+ ) { s => for (a <- subarray_iterator) s.write(a.array, a.offset, a.length) }
}
}
@@ -280,6 +561,6 @@
cache: Compress.Cache = Compress.Cache.none
) : (Boolean, Bytes) = {
val compressed = compress(options = options, cache = cache)
- if (compressed.length < length) (true, compressed) else (false, this)
+ if (compressed.size < size) (true, compressed) else (false, this)
}
}
--- a/src/Pure/General/graphics_file.scala Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/General/graphics_file.scala Sun Jun 16 21:54:09 2024 +0200
@@ -51,7 +51,7 @@
val params = new DefaultFontMapper.BaseFontParameters(File.platform_path(entry.path))
params.encoding = BaseFont.IDENTITY_H
params.embedded = true
- params.ttfAfm = entry.bytes.array
+ params.ttfAfm = entry.bytes.make_array
mapper.putName(entry.name, params)
}
mapper
--- a/src/Pure/General/http.scala Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/General/http.scala Sun Jun 16 21:54:09 2024 +0200
@@ -50,7 +50,7 @@
val encoding: String,
val elapsed_time: Time
) {
- def text: String = new String(bytes.array, encoding)
+ def text: String = new String(bytes.make_array, encoding)
def json: JSON.T = JSON.parse(text)
}
@@ -227,7 +227,7 @@
val encoded_digest = Base64.encode(HexFormat.of().parseHex(SHA1.digest(output).toString))
http.getResponseHeaders.set("Content-Digest", "sha=:" + encoded_digest + ":")
}
- http.sendResponseHeaders(code, if (is_head) -1 else output.length.toLong)
+ http.sendResponseHeaders(code, if (is_head) -1 else output.size)
if (!is_head) using(http.getResponseBody)(output.write_stream)
}
}
--- a/src/Pure/General/sha1.scala Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/General/sha1.scala Sun Jun 16 21:54:09 2024 +0200
@@ -33,6 +33,9 @@
new Digest(Setup_Build.make_digest(digest_body))
}
+ val digest_empty: Digest = make_digest(_ => ())
+ def digest_length: Int = digest_empty.toString.length
+
def digest(file: JFile): Digest =
make_digest(sha => using(new FileInputStream(file)) { stream =>
val buf = new Array[Byte](65536)
@@ -46,11 +49,11 @@
def digest(path: Path): Digest = digest(path.file)
def digest(bytes: Array[Byte]): Digest = make_digest(_.update(bytes))
+ def digest(bytes: Array[Byte], offset: Int, length: Int): Digest =
+ make_digest(_.update(bytes, offset, length))
def digest(bytes: Bytes): Digest = bytes.sha1_digest
def digest(string: String): Digest = digest(Bytes(string))
- val digest_length: Int = digest("").toString.length
-
/* shasum */
--- a/src/Pure/General/sql.scala Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/General/sql.scala Sun Jun 16 21:54:09 2024 +0200
@@ -329,7 +329,8 @@
object bytes {
def update(i: Int, bytes: Bytes): Unit = {
if (bytes == null) rep.setBytes(i, null)
- else rep.setBinaryStream(i, bytes.stream(), bytes.length)
+ else if (bytes.size > Int.MaxValue) throw new IllegalArgumentException
+ else rep.setBinaryStream(i, bytes.stream(), bytes.size.toInt)
}
def update(i: Int, bytes: Option[Bytes]): Unit = update(i, bytes.orNull)
}
--- a/src/Pure/General/utf8.scala Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/General/utf8.scala Sun Jun 16 21:54:09 2024 +0200
@@ -17,15 +17,26 @@
def bytes(s: String): Array[Byte] = s.getBytes(charset)
+ def relevant(s: CharSequence): Boolean = {
+ var i = 0
+ val n = s.length
+ var utf8 = false
+ while (i < n && !utf8) {
+ if (s.charAt(i) >= 128) { utf8 = true }
+ i += 1
+ }
+ utf8
+ }
+
/* permissive UTF-8 decoding */
// see also https://en.wikipedia.org/wiki/UTF-8#Description
// overlong encodings enable byte-stuffing of low-ASCII
- def decode_permissive(text: CharSequence): String = {
- val len = text.length
- val buf = new java.lang.StringBuilder(len)
+ def decode_permissive_bytes(bytes: Bytes.Vec): String = {
+ val size = bytes.size
+ val buf = new java.lang.StringBuilder((size min Space.GiB(1).bytes).toInt)
var code = -1
var rest = 0
def flush(): Unit = {
@@ -50,8 +61,8 @@
rest -= 1
}
}
- for (i <- 0 until len) {
- val c = text.charAt(i)
+ for (i <- 0L until size) {
+ val c: Char = bytes(i)
if (c < 128) { flush(); buf.append(c) }
else if ((c & 0xC0) == 0x80) push(c & 0x3F)
else if ((c & 0xE0) == 0xC0) init(c & 0x1F, 1)
@@ -61,4 +72,8 @@
flush()
buf.toString
}
+
+ def decode_permissive(text: String): String =
+ if (relevant(text)) decode_permissive_bytes(new Bytes.Vec_String(text))
+ else text
}
--- a/src/Pure/ML/ml_heap.scala Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/ML/ml_heap.scala Sun Jun 16 21:54:09 2024 +0200
@@ -16,7 +16,7 @@
def read_file_digest(heap: Path): Option[SHA1.Digest] = {
if (heap.is_file) {
val bs = Bytes.read_file(heap, offset = File.size(heap) - sha1_length)
- if (bs.length == sha1_length) {
+ if (bs.size == sha1_length) {
val s = bs.text
if (s.startsWith(sha1_prefix)) Some(SHA1.fake_digest(s.substring(sha1_prefix.length)))
else None
--- a/src/Pure/PIDE/byte_message.scala Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/PIDE/byte_message.scala Sun Jun 16 21:54:09 2024 +0200
@@ -6,7 +6,7 @@
package isabelle
-import java.io.{ByteArrayOutputStream, OutputStream, InputStream, IOException}
+import java.io.{OutputStream, InputStream, IOException}
object Byte_Message {
@@ -27,45 +27,44 @@
/* input operations */
- def read(stream: InputStream, n: Int): Bytes =
+ def read(stream: InputStream, n: Long): Bytes =
Bytes.read_stream(stream, limit = n)
- def read_block(stream: InputStream, n: Int): (Option[Bytes], Int) = {
+ def read_block(stream: InputStream, n: Long): (Option[Bytes], Long) = {
val msg = read(stream, n)
- val len = msg.length
+ val len = msg.size
(if (len == n) Some(msg) else None, len)
}
def read_line(stream: InputStream): Option[Bytes] = {
- val line = new ByteArrayOutputStream(100)
var c = 0
- while ({ c = stream.read; c != -1 && c != 10 }) line.write(c)
-
- if (c == -1 && line.size == 0) None
- else {
- val a = line.toByteArray
- val n = a.length
- val len = if (n > 0 && a(n - 1) == 13) n - 1 else n
- Some(Bytes(a, 0, len))
- }
+ val line =
+ Bytes.Builder.use(hint = 100) { builder =>
+ while ({ c = stream.read; c != -1 && c != 10 }) {
+ builder += c.toByte
+ }
+ }
+ if (c == -1 && line.size == 0) None else Some(line.trim_line)
}
/* messages with multiple chunks (arbitrary content) */
- private def make_header(ns: List[Int]): List[Bytes] =
+ private def make_header(ns: List[Long]): List[Bytes] =
List(Bytes(ns.mkString(",")), Bytes.newline)
def write_message(stream: OutputStream, chunks: List[Bytes]): Unit = {
- write(stream, make_header(chunks.map(_.length)) ::: chunks)
+ write(stream, make_header(chunks.map(_.size)) ::: chunks)
flush(stream)
}
- private def parse_header(line: String): List[Int] =
- try { space_explode(',', line).map(Value.Nat.parse) }
- catch { case ERROR(_) => error("Malformed message header: " + quote(line)) }
+ private def parse_header(line: String): List[Long] = {
+ val args = space_explode(',', line)
+ if (args.forall(is_length)) args.map(Value.Long.parse)
+ else error("Malformed message header: " + quote(line))
+ }
- private def read_chunk(stream: InputStream, n: Int): Bytes =
+ private def read_chunk(stream: InputStream, n: Long): Bytes =
read_block(stream, n) match {
case (Some(chunk), _) => chunk
case (None, len) =>
@@ -78,32 +77,39 @@
/* hybrid messages: line or length+block (restricted content) */
+ private def is_length(str: String): Boolean =
+ !str.isEmpty && str.iterator.forall(Symbol.is_ascii_digit) &&
+ Value.Long.unapply(str).isDefined
+
private def is_length(msg: Bytes): Boolean =
- !msg.is_empty && msg.iterator.forall(b => Symbol.is_ascii_digit(b.toChar))
+ !msg.is_empty && msg.byte_iterator.forall(b => Symbol.is_ascii_digit(b.toChar)) &&
+ Value.Long.unapply(msg.text).isDefined
- private def is_terminated(msg: Bytes): Boolean = {
- val len = msg.length
- len > 0 && Symbol.is_ascii_line_terminator(msg.charAt(len - 1))
- }
+ private def is_terminated(msg: Bytes): Boolean =
+ msg.size match {
+ case size if size > 0 =>
+ val c = msg(size - 1)
+ c <= Char.MaxValue && Symbol.is_ascii_line_terminator(c.toChar)
+ case _ => false
+ }
def write_line_message(stream: OutputStream, msg: Bytes): Unit = {
- if (is_length(msg) || is_terminated(msg))
+ if (is_length(msg) || is_terminated(msg)) {
error ("Bad content for line message:\n" ++ msg.text.take(100))
+ }
- val n = msg.length
+ val n = msg.size
write(stream,
- (if (n > 100 || msg.iterator.contains(10)) make_header(List(n + 1)) else Nil) :::
+ (if (n > 100 || msg.byte_iterator.contains(10)) make_header(List(n + 1)) else Nil) :::
List(msg, Bytes.newline))
flush(stream)
}
def read_line_message(stream: InputStream): Option[Bytes] =
read_line(stream) match {
- case None => None
- case Some(line) =>
- Value.Nat.unapply(line.text) match {
- case None => Some(line)
- case Some(n) => read_block(stream, n)._1.map(_.trim_line)
- }
+ case Some(line) if is_length(line) =>
+ val n = Value.Long.parse(line.text)
+ read_block(stream, n)._1.map(_.trim_line)
+ case res => res
}
}
--- a/src/Pure/PIDE/prover.scala Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/PIDE/prover.scala Sun Jun 16 21:54:09 2024 +0200
@@ -194,7 +194,7 @@
{
case chunks =>
try {
- Bytes(chunks.map(_.length).mkString("", ",", "\n")).write_stream(stream)
+ Bytes(chunks.map(_.size).mkString("", ",", "\n")).write_stream(stream)
chunks.foreach(_.write_stream(stream))
stream.flush
true
@@ -288,7 +288,7 @@
command_input match {
case Some(thread) if thread.is_active() =>
if (trace) {
- val payload = args.foldLeft(0) { case (n, b) => n + b.length }
+ val payload = args.foldLeft(0L) { case (n, b) => n + b.size }
Output.writeln(
"protocol_command " + name + ", args = " + args.length + ", payload = " + payload)
}
--- a/src/Pure/ROOT.ML Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/ROOT.ML Sun Jun 16 21:54:09 2024 +0200
@@ -372,3 +372,4 @@
ML_file "Tools/jedit.ML";
ML_file "Tools/ghc.ML";
ML_file "Tools/generated_files.ML";
+
--- a/src/Pure/System/bash.scala Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/System/bash.scala Sun Jun 16 21:54:09 2024 +0200
@@ -36,7 +36,7 @@
}
def string(s: String): String =
- if (s == "") "\"\""
+ if (s.isEmpty) "\"\""
else UTF8.bytes(s).iterator.map(bash_chr).mkString
def strings(ss: Iterable[String]): String =
--- a/src/Pure/System/isabelle_system.scala Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/System/isabelle_system.scala Sun Jun 16 21:54:09 2024 +0200
@@ -474,7 +474,7 @@
else {
val bytes = using(zip_file.getInputStream(entry))(Bytes.read_stream(_))
Files.createDirectories(res.getParent)
- Files.write(res, bytes.array)
+ Files.write(res, bytes.make_array)
}
}
(entry, result)
--- a/src/Pure/System/web_app.scala Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Pure/System/web_app.scala Sun Jun 16 21:54:09 2024 +0200
@@ -120,12 +120,12 @@
// text might be shorter than bytes because of misinterpreted characters
var found = false
- var bytes_i = 0
- while (!found && bytes_i < bytes.length) {
+ var bytes_i = 0L
+ while (!found && bytes_i < bytes.size) {
var sep_ok = true
var sep_i = 0
while (sep_ok && sep_i < sep.length) {
- if (bytes.charAt(bytes_i + sep_i) == sep.charAt(sep_i)) {
+ if (bytes(bytes_i + sep_i) == sep(sep_i)) {
sep_i += 1
} else {
bytes_i += 1
@@ -135,8 +135,8 @@
if (sep_ok) found = true
}
- val before_bytes = bytes.subSequence(0, bytes_i)
- val after_bytes = bytes.subSequence(bytes_i + sep.length, bytes.length)
+ val before_bytes = bytes.slice(0, bytes_i)
+ val after_bytes = bytes.slice(bytes_i + sep.length, bytes.size)
Some(Seq(before_text, before_bytes), Seq(after_text, after_bytes))
} else None
@@ -147,7 +147,7 @@
def perhaps_unprefix(pfx: String, s: Seq): Seq =
Library.try_unprefix(pfx, s.text) match {
- case Some(text) => Seq(text, s.bytes.subSequence(pfx.length, s.bytes.length))
+ case Some(text) => Seq(text, s.bytes.slice(pfx.length, s.bytes.size))
case None => s
}
--- a/src/Tools/VSCode/src/channel.scala Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Tools/VSCode/src/channel.scala Sun Jun 16 21:54:09 2024 +0200
@@ -9,7 +9,7 @@
import isabelle._
-import java.io.{InputStream, OutputStream, FileOutputStream, ByteArrayOutputStream, File => JFile}
+import java.io.{InputStream, OutputStream, FileOutputStream, File => JFile}
import scala.collection.mutable
@@ -39,8 +39,8 @@
private def read_content(n: Int): String = {
val bytes = Bytes.read_stream(in, limit = n)
- if (bytes.length == n) bytes.text
- else error("Bad message content (unexpected EOF after " + bytes.length + " of " + n + " bytes)")
+ if (bytes.size == n) bytes.text
+ else error("Bad message content (unexpected EOF after " + bytes.size + " of " + n + " bytes)")
}
def read(): Option[JSON.T] = {
--- a/src/Tools/VSCode/src/component_vscodium.scala Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Tools/VSCode/src/component_vscodium.scala Sun Jun 16 21:54:09 2024 +0200
@@ -229,7 +229,7 @@
// function computeChecksum(filename)
private def file_checksum(path: Path): String = {
val digest = MessageDigest.getInstance("MD5")
- digest.update(Bytes.read(path).array)
+ digest.update(Bytes.read(path).make_array)
Bytes(Base64.getEncoder.encode(digest.digest()))
.text.replaceAll("=", "")
}
--- a/src/Tools/jEdit/src/isabelle_export.scala Wed Jun 12 10:47:53 2024 +0200
+++ b/src/Tools/jEdit/src/isabelle_export.scala Sun Jun 16 21:54:09 2024 +0200
@@ -56,7 +56,7 @@
} yield {
val is_dir = entry.name_elems.length > depth
val path = Export.implode_name(theory :: entry.name_elems.take(depth))
- val file_size = if (is_dir) None else Some(entry.bytes.length.toLong)
+ val file_size = if (is_dir) None else Some(entry.bytes.size)
(path, file_size)
}).toSet.toList
(for ((path, file_size) <- entries.iterator) yield {