src/Pure/General/bytes.scala
author wenzelm
Sat, 15 Jun 2024 17:12:49 +0200
changeset 80366 ac4d53bc8f6b
parent 80364 d5c48fe601b6
child 80367 a6c1526600b3
permissions -rw-r--r--
support large byte arrays, using multiple "chunks"; support incremental builder; clarified "limit" (valid >= 0) vs. "hint" (valid > 0); clarified byte access: prefer unchecked acces and iterators internally;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
54439
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
     1
/*  Title:      Pure/General/bytes.scala
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
     3
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
     4
Immutable byte vectors versus UTF8 strings.
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
     5
*/
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
     6
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
     7
package isabelle
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
     8
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
     9
78855
6fdcd6c8c97a prefer old-style import "=>";
wenzelm
parents: 78243
diff changeset
    10
import java.io.{ByteArrayInputStream, ByteArrayOutputStream, FileInputStream, FileOutputStream,
6fdcd6c8c97a prefer old-style import "=>";
wenzelm
parents: 78243
diff changeset
    11
  InputStream, OutputStream, File => JFile}
77711
25fd62cba347 clarified signature: more general operation Bytes.read_slice;
wenzelm
parents: 76361
diff changeset
    12
import java.nio.ByteBuffer
25fd62cba347 clarified signature: more general operation Bytes.read_slice;
wenzelm
parents: 76361
diff changeset
    13
import java.nio.channels.FileChannel
25fd62cba347 clarified signature: more general operation Bytes.read_slice;
wenzelm
parents: 76361
diff changeset
    14
import java.nio.file.StandardOpenOption
77712
dd4bb80dbc3a tuned performance: much faster low-level operation;
wenzelm
parents: 77711
diff changeset
    15
import java.util.Arrays
76353
3698d0f3da18 clarified signature;
wenzelm
parents: 76351
diff changeset
    16
import org.tukaani.xz
3698d0f3da18 clarified signature;
wenzelm
parents: 76351
diff changeset
    17
import com.github.luben.zstd
54440
2c4940d2edf7 tuned signature;
wenzelm
parents: 54439
diff changeset
    18
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    19
import scala.collection.mutable.ArrayBuffer
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    20
54440
2c4940d2edf7 tuned signature;
wenzelm
parents: 54439
diff changeset
    21
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75382
diff changeset
    22
object Bytes {
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    23
  /* internal sizes */
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    24
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    25
  val array_size: Long = Int.MaxValue - 8  // see java.io.InputStream.MAX_BUFFER_SIZE
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    26
  val chunk_size: Long = Space.MiB(100).bytes
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    27
  val block_size: Int = 8192
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    28
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    29
  class Too_Large(size: Long) extends IndexOutOfBoundsException {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    30
    override def getMessage: String =
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    31
      "Bytes too large for particular operation: " +
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    32
        Space.bytes(size).print + " > " + Space.bytes(array_size).print
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    33
  }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    34
54439
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
    35
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    36
  /* main constructors */
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    37
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    38
  private def reuse_array(bytes: Array[Byte]): Bytes =
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    39
    if (bytes.length <= chunk_size) new Bytes(None, bytes, 0L, bytes.length.toLong)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    40
    else apply(bytes)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    41
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    42
  val empty: Bytes = reuse_array(new Array(0))
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    43
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    44
  def apply(s: CharSequence): Bytes = {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    45
    val str = s.toString
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    46
    if (str.isEmpty) empty
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    47
    else Builder.use(hint = str.length) { builder => builder += str }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    48
  }
54440
2c4940d2edf7 tuned signature;
wenzelm
parents: 54439
diff changeset
    49
63779
9da65bc75610 more operations;
wenzelm
parents: 62527
diff changeset
    50
  def apply(a: Array[Byte]): Bytes = apply(a, 0, a.length)
9da65bc75610 more operations;
wenzelm
parents: 62527
diff changeset
    51
54442
c39972ddd672 more specific Protocol_Output: empty message.body, main content via bytes/text;
wenzelm
parents: 54440
diff changeset
    52
  def apply(a: Array[Byte], offset: Int, length: Int): Bytes =
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    53
    Builder.use(hint = length) { builder => builder += (a, offset, length) }
54442
c39972ddd672 more specific Protocol_Output: empty message.body, main content via bytes/text;
wenzelm
parents: 54440
diff changeset
    54
69454
ef051edd4d10 more uniform multi-language operations;
wenzelm
parents: 69448
diff changeset
    55
  val newline: Bytes = apply("\n")
54440
2c4940d2edf7 tuned signature;
wenzelm
parents: 54439
diff changeset
    56
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    57
  def decode_base64(s: String): Bytes = Bytes.reuse_array(Base64.decode(s))
68108
2277fe496d78 more operations;
wenzelm
parents: 68106
diff changeset
    58
75579
3362b6a5d697 support XZ compression in Isabelle/ML;
wenzelm
parents: 75393
diff changeset
    59
54440
2c4940d2edf7 tuned signature;
wenzelm
parents: 54439
diff changeset
    60
  /* read */
2c4940d2edf7 tuned signature;
wenzelm
parents: 54439
diff changeset
    61
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    62
  def read_stream(stream: InputStream, limit: Long = -1L, hint: Long = 0L): Bytes = {
64004
b4ece7a3f2ca clarified stream operations;
wenzelm
parents: 64001
diff changeset
    63
    if (limit == 0) empty
b4ece7a3f2ca clarified stream operations;
wenzelm
parents: 64001
diff changeset
    64
    else {
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    65
      Builder.use(hint = if (limit > 0) limit else hint) { builder =>
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    66
        val buf = new Array[Byte](Bytes.block_size)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    67
        var m = 0
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    68
        var n = 0L
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    69
        while ({
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    70
          val l = if (limit > 0) ((limit - n) min buf.length).toInt else buf.length
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    71
          m = stream.read(buf, 0, l)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    72
          if (m != -1) {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    73
            builder += (buf, 0, m)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    74
            n += m
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    75
          }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    76
          m != -1 && (limit < 0 || limit > n)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    77
        }) ()
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    78
      }
54440
2c4940d2edf7 tuned signature;
wenzelm
parents: 54439
diff changeset
    79
    }
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    80
  }
64001
7ecb22be8f03 more general read_stream: return actual byte count;
wenzelm
parents: 63779
diff changeset
    81
79510
d8330439823a clarified signature: explicit type isabelle.Url to avoid oddities of java.net.URL (e.g. its "equals" method);
wenzelm
parents: 79509
diff changeset
    82
  def read_url(name: String): Bytes = using(Url(name).open_stream())(read_stream(_))
77717
6a2daddc238c tuned signature;
wenzelm
parents: 77716
diff changeset
    83
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    84
  def read_file(path: Path, offset: Long = 0L, limit: Long = -1L): Bytes = {
78956
12abaffb0346 tuned signature: more operations;
wenzelm
parents: 78954
diff changeset
    85
    val length = File.size(path)
77711
25fd62cba347 clarified signature: more general operation Bytes.read_slice;
wenzelm
parents: 76361
diff changeset
    86
    val start = offset.max(0L)
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    87
    val len = (length - start).max(0L).min(if (limit < 0) Long.MaxValue else limit)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    88
    if (len == 0L) empty
77711
25fd62cba347 clarified signature: more general operation Bytes.read_slice;
wenzelm
parents: 76361
diff changeset
    89
    else {
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    90
      Builder.use(hint = len) { builder =>
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    91
        using(FileChannel.open(path.java_path, StandardOpenOption.READ)) { channel =>
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    92
          channel.position(start)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    93
          val buf = ByteBuffer.allocate(Bytes.block_size)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    94
          var m = 0
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    95
          var n = 0L
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    96
          while ({
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    97
            m = channel.read(buf)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    98
            if (m != -1) {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    99
              builder += (buf.array(), 0, m)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   100
              buf.clear()
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   101
              n += m
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   102
            }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   103
            m != -1 && len > n
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   104
          }) ()
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   105
        }
77711
25fd62cba347 clarified signature: more general operation Bytes.read_slice;
wenzelm
parents: 76361
diff changeset
   106
      }
25fd62cba347 clarified signature: more general operation Bytes.read_slice;
wenzelm
parents: 76361
diff changeset
   107
    }
25fd62cba347 clarified signature: more general operation Bytes.read_slice;
wenzelm
parents: 76361
diff changeset
   108
  }
25fd62cba347 clarified signature: more general operation Bytes.read_slice;
wenzelm
parents: 76361
diff changeset
   109
78953
b6116a86d2ac clarified signature;
wenzelm
parents: 78855
diff changeset
   110
  def read(path: Path): Bytes = read_file(path)
b6116a86d2ac clarified signature;
wenzelm
parents: 78855
diff changeset
   111
  def read(file: JFile): Bytes = read_file(File.path(file))
77718
6ad3a412ed97 clarified signature;
wenzelm
parents: 77717
diff changeset
   112
64229
12aa3980f65c more operations;
wenzelm
parents: 64224
diff changeset
   113
12aa3980f65c more operations;
wenzelm
parents: 64224
diff changeset
   114
  /* write */
12aa3980f65c more operations;
wenzelm
parents: 64224
diff changeset
   115
69393
ed0824ef337e static type for Library.using: avoid Java 11 warnings on "Illegal reflective access";
wenzelm
parents: 69365
diff changeset
   116
  def write(file: JFile, bytes: Bytes): Unit =
ed0824ef337e static type for Library.using: avoid Java 11 warnings on "Illegal reflective access";
wenzelm
parents: 69365
diff changeset
   117
    using(new FileOutputStream(file))(bytes.write_stream(_))
64229
12aa3980f65c more operations;
wenzelm
parents: 64224
diff changeset
   118
12aa3980f65c more operations;
wenzelm
parents: 64224
diff changeset
   119
  def write(path: Path, bytes: Bytes): Unit = write(path.file, bytes)
78194
da721ba809a4 more operations;
wenzelm
parents: 77718
diff changeset
   120
da721ba809a4 more operations;
wenzelm
parents: 77718
diff changeset
   121
da721ba809a4 more operations;
wenzelm
parents: 77718
diff changeset
   122
  /* append */
da721ba809a4 more operations;
wenzelm
parents: 77718
diff changeset
   123
da721ba809a4 more operations;
wenzelm
parents: 77718
diff changeset
   124
  def append(file: JFile, bytes: Bytes): Unit =
da721ba809a4 more operations;
wenzelm
parents: 77718
diff changeset
   125
    using(new FileOutputStream(file, true))(bytes.write_stream(_))
da721ba809a4 more operations;
wenzelm
parents: 77718
diff changeset
   126
da721ba809a4 more operations;
wenzelm
parents: 77718
diff changeset
   127
  def append(path: Path, bytes: Bytes): Unit = append(path.file, bytes)
80351
dbbe26afc319 clarified signature: more accurate types;
wenzelm
parents: 80350
diff changeset
   128
dbbe26afc319 clarified signature: more accurate types;
wenzelm
parents: 80350
diff changeset
   129
80355
5a555acad203 clarified signature (again);
wenzelm
parents: 80353
diff changeset
   130
  /* vector of short unsigned integers */
80351
dbbe26afc319 clarified signature: more accurate types;
wenzelm
parents: 80350
diff changeset
   131
dbbe26afc319 clarified signature: more accurate types;
wenzelm
parents: 80350
diff changeset
   132
  trait Vec {
dbbe26afc319 clarified signature: more accurate types;
wenzelm
parents: 80350
diff changeset
   133
    def size: Long
80355
5a555acad203 clarified signature (again);
wenzelm
parents: 80353
diff changeset
   134
    def apply(i: Long): Char
80351
dbbe26afc319 clarified signature: more accurate types;
wenzelm
parents: 80350
diff changeset
   135
  }
dbbe26afc319 clarified signature: more accurate types;
wenzelm
parents: 80350
diff changeset
   136
dbbe26afc319 clarified signature: more accurate types;
wenzelm
parents: 80350
diff changeset
   137
  class Vec_String(string: String) extends Vec {
dbbe26afc319 clarified signature: more accurate types;
wenzelm
parents: 80350
diff changeset
   138
    override def size: Long = string.length.toLong
80355
5a555acad203 clarified signature (again);
wenzelm
parents: 80353
diff changeset
   139
    override def apply(i: Long): Char =
5a555acad203 clarified signature (again);
wenzelm
parents: 80353
diff changeset
   140
      if (0 <= i && i < size) string(i.toInt)
80351
dbbe26afc319 clarified signature: more accurate types;
wenzelm
parents: 80350
diff changeset
   141
      else throw new IndexOutOfBoundsException
dbbe26afc319 clarified signature: more accurate types;
wenzelm
parents: 80350
diff changeset
   142
  }
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   143
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   144
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   145
  /* incremental builder: synchronized */
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   146
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   147
  private def make_size(chunks: Array[Array[Byte]], buffer: Array[Byte]): Long =
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   148
    chunks.foldLeft(buffer.length.toLong)((n, chunk) => n + chunk.length)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   149
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   150
  object Builder {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   151
    def use(hint: Long = 0L)(body: Builder => Unit): Bytes = {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   152
      val chunks_size = if (hint <= 0) 16 else (hint / chunk_size).toInt
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   153
      val buffer_size = if (hint <= 0) 1024 else (hint min chunk_size min array_size).toInt
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   154
      val builder = new Builder(chunks_size, buffer_size)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   155
      body(builder)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   156
      builder.done()
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   157
    }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   158
  }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   159
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   160
  final class Builder private[Bytes](chunks_size: Int, buffer_size: Int) {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   161
    var chunks = new ArrayBuffer[Array[Byte]](chunks_size)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   162
    var buffer = new ByteArrayOutputStream(buffer_size)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   163
    def buffer_free(): Int = chunk_size.toInt - buffer.size()
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   164
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   165
    def += (array: Array[Byte], offset: Int, length: Int): Unit = {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   166
      if (offset < 0 || length < 0 || offset.toLong + length.toLong > array.length) {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   167
        throw new IndexOutOfBoundsException
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   168
      }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   169
      else if (length > 0) {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   170
        synchronized {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   171
          var i = offset
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   172
          var n = length
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   173
          while (n > 0) {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   174
            val m = buffer_free()
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   175
            if (m > 0) {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   176
              val l = m min n
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   177
              buffer.write(array, i, l)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   178
              i += l
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   179
              n -= l
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   180
            }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   181
            if (buffer_free() == 0) {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   182
              chunks += buffer.toByteArray
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   183
              buffer = new ByteArrayOutputStream
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   184
            }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   185
          }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   186
        }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   187
      }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   188
    }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   189
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   190
    def += (array: Array[Byte]): Unit = { this += (array, 0, array.length) }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   191
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   192
    def += (a: Subarray): Unit = { this += (a.array, a.offset, a.length) }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   193
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   194
    def += (string: String): Unit = if (string.nonEmpty) { this += UTF8.bytes(string) }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   195
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   196
    private def done(): Bytes = synchronized {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   197
      val cs = chunks.toArray
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   198
      val b = buffer.toByteArray
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   199
      chunks = null
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   200
      buffer = null
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   201
      new Bytes(if (cs.isEmpty) None else Some(cs), b, 0L, make_size(cs, b))
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   202
    }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   203
  }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   204
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   205
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   206
  /* subarray */
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   207
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   208
  object Subarray {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   209
    val empty: Subarray = new Subarray(new Array[Byte](0), 0, 0)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   210
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   211
    def apply(array: Array[Byte], offset: Int, length: Int): Subarray = {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   212
      val n = array.length
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   213
      if (0 <= offset && offset < n && 0 <= length && offset + length <= n) {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   214
        if (length == 0) empty
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   215
        else new Subarray(array, offset, length)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   216
      }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   217
      else throw new IndexOutOfBoundsException
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   218
    }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   219
  }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   220
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   221
  final class Subarray private(
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   222
    val array: Array[Byte],
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   223
    val offset: Int,
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   224
    val length: Int
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   225
  ) {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   226
    override def toString: String = "Bytes.Subarray(" + Space.bytes(length).print + ")"
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   227
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   228
    def byte_iterator: Iterator[Byte] =
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   229
      if (length == 0) Iterator.empty
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   230
      else { for (i <- (offset until (offset + length)).iterator) yield array(i) }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   231
  }
54439
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
   232
}
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
   233
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
   234
final class Bytes private(
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   235
  protected val chunks: Option[Array[Array[Byte]]],
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   236
  protected val chunk0: Array[Byte],
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   237
  protected val offset: Long,
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   238
  val size: Long
80361
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   239
) extends Bytes.Vec {
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   240
  assert(
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   241
    (chunks.isEmpty ||
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   242
      chunks.get.nonEmpty &&
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   243
      chunks.get.forall(chunk => chunk.length == Bytes.chunk_size)) &&
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   244
    chunk0.length < Bytes.chunk_size)
80361
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   245
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   246
  def is_empty: Boolean = size == 0
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   247
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   248
  def is_sliced: Boolean =
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   249
    offset != 0L || {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   250
      chunks match {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   251
        case None => size != chunk0.length
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   252
        case Some(cs) => size != Bytes.make_size(cs, chunk0)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   253
      }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   254
    }
80361
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   255
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   256
  override def toString: String =
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   257
    if (is_empty) "Bytes.empty"
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   258
    else "Bytes(" + Space.bytes(size).print + if_proper(is_sliced, ", sliced") + ")"
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   259
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   260
  def small_size: Int =
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   261
    if (size > Bytes.array_size) throw new Bytes.Too_Large(size)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   262
    else size.toInt
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   263
80361
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   264
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   265
  /* slice */
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   266
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   267
  def slice(i: Long, j: Long): Bytes =
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   268
    if (0 <= i && i <= j && j <= size) {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   269
      if (i == j) Bytes.empty
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   270
      else new Bytes(chunks, chunk0, offset + i, j - i)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   271
    }
80361
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   272
    else throw new IndexOutOfBoundsException
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   273
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   274
  def unslice: Bytes =
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   275
    if (is_sliced) {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   276
      Bytes.Builder.use(hint = size) { builder =>
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   277
        for (a <- subarray_iterator) { builder += a }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   278
      }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   279
    }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   280
    else this
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   281
80361
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   282
  def trim_line: Bytes =
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   283
    if (size >= 2 && byte_unchecked(size - 2) == 13 && byte_unchecked(size - 1) == 10) {
80361
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   284
      slice(0, size - 2)
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   285
    }
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   286
    else if (size >= 1 && (byte_unchecked(size - 1) == 13 || byte_unchecked(size - 1) == 10)) {
80361
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   287
      slice(0, size - 1)
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   288
    }
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   289
    else this
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   290
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   291
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   292
  /* elements: signed Byte or unsigned Char */
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   293
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   294
  protected def byte_unchecked(i: Long): Byte = {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   295
    val a = offset + i
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   296
    chunks match {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   297
      case None => chunk0(a.toInt)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   298
      case Some(cs) =>
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   299
        val b = a % Bytes.chunk_size
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   300
        val c = a / Bytes.chunk_size
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   301
        if (c < cs.length) cs(c.toInt)(b.toInt) else chunk0(b.toInt)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   302
    }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   303
  }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   304
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   305
  def byte(i: Long): Byte =
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   306
    if (0 <= i && i < size) byte_unchecked(i)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   307
    else throw new IndexOutOfBoundsException
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   308
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   309
  def apply(i: Long): Char = (byte(i).toInt & 0xff).toChar
80361
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   310
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   311
  protected def subarray_iterator: Iterator[Bytes.Subarray] =
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   312
    if (is_empty) Iterator.empty
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   313
    else if (chunks.isEmpty) Iterator(Bytes.Subarray(chunk0, offset.toInt, size.toInt))
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   314
    else {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   315
      val end_offset = offset + size
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   316
      for ((array, index) <- (chunks.get.iterator ++ Iterator(chunk0)).zipWithIndex) yield {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   317
        val array_start = Bytes.chunk_size * index
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   318
        val array_stop = array_start + array.length
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   319
        if (offset < array_stop && array_start < end_offset) {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   320
          val i = (array_start max offset) - array_start
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   321
          val j = (array_stop min end_offset) - array_start
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   322
          Bytes.Subarray(array, i.toInt, (j - i).toInt)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   323
        }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   324
        else Bytes.Subarray.empty
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   325
      }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   326
    }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   327
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   328
  def byte_iterator: Iterator[Byte] =
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   329
    for {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   330
      a <- subarray_iterator
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   331
      b <- a.byte_iterator
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   332
    } yield b
80361
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   333
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   334
80363
306f273c91ec clarified hash and equality: depend on sha1 digest to be collision-free;
wenzelm
parents: 80362
diff changeset
   335
  /* hash and equality */
306f273c91ec clarified hash and equality: depend on sha1 digest to be collision-free;
wenzelm
parents: 80362
diff changeset
   336
306f273c91ec clarified hash and equality: depend on sha1 digest to be collision-free;
wenzelm
parents: 80362
diff changeset
   337
  lazy val sha1_digest: SHA1.Digest =
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   338
    if (is_empty) SHA1.digest_empty
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   339
    else {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   340
      SHA1.make_digest { sha =>
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   341
        for (a <- subarray_iterator if a.length > 0) {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   342
          sha.update(a.array, a.offset, a.length)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   343
        }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   344
      }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   345
    }
80363
306f273c91ec clarified hash and equality: depend on sha1 digest to be collision-free;
wenzelm
parents: 80362
diff changeset
   346
306f273c91ec clarified hash and equality: depend on sha1 digest to be collision-free;
wenzelm
parents: 80362
diff changeset
   347
  override def hashCode(): Int = sha1_digest.hashCode()
54439
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
   348
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75382
diff changeset
   349
  override def equals(that: Any): Boolean = {
54440
2c4940d2edf7 tuned signature;
wenzelm
parents: 54439
diff changeset
   350
    that match {
2c4940d2edf7 tuned signature;
wenzelm
parents: 54439
diff changeset
   351
      case other: Bytes =>
80363
306f273c91ec clarified hash and equality: depend on sha1 digest to be collision-free;
wenzelm
parents: 80362
diff changeset
   352
        if (this.eq(other)) true
306f273c91ec clarified hash and equality: depend on sha1 digest to be collision-free;
wenzelm
parents: 80362
diff changeset
   353
        else if (size != other.size) false
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   354
        else if (chunks.isEmpty && size <= 10 * SHA1.digest_length) {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   355
          Arrays.equals(chunk0, offset.toInt, (offset + size).toInt,
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   356
            other.chunk0, other.offset.toInt, (other.offset + other.size).toInt)
80363
306f273c91ec clarified hash and equality: depend on sha1 digest to be collision-free;
wenzelm
parents: 80362
diff changeset
   357
        }
306f273c91ec clarified hash and equality: depend on sha1 digest to be collision-free;
wenzelm
parents: 80362
diff changeset
   358
        else sha1_digest == other.sha1_digest
54440
2c4940d2edf7 tuned signature;
wenzelm
parents: 54439
diff changeset
   359
      case _ => false
2c4940d2edf7 tuned signature;
wenzelm
parents: 54439
diff changeset
   360
    }
2c4940d2edf7 tuned signature;
wenzelm
parents: 54439
diff changeset
   361
  }
2c4940d2edf7 tuned signature;
wenzelm
parents: 54439
diff changeset
   362
54439
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
   363
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
   364
  /* content */
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
   365
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75382
diff changeset
   366
  def array: Array[Byte] = {
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   367
    val buf = new ByteArrayOutputStream(small_size)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   368
    for (a <- subarray_iterator) { buf.write(a.array, a.offset, a.length) }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   369
    buf.toByteArray
69365
c5b3860d29ef avoid loading of font file, to eliminate "Illegal reflective access by com.lowagie.text.pdf.MappedRandomAccessFile$1 (iText-2.1.5.jar) to method java.nio.DirectByteBuffer.cleaner()" -- due to com.lowagie.text.pdf.TrueTypeFont.process() / RandomAccessFileOrArray;
wenzelm
parents: 68167
diff changeset
   370
  }
c5b3860d29ef avoid loading of font file, to eliminate "Illegal reflective access by com.lowagie.text.pdf.MappedRandomAccessFile$1 (iText-2.1.5.jar) to method java.nio.DirectByteBuffer.cleaner()" -- due to com.lowagie.text.pdf.TrueTypeFont.process() / RandomAccessFileOrArray;
wenzelm
parents: 68167
diff changeset
   371
80352
7a6cba7c77c9 minor performance tuning;
wenzelm
parents: 80351
diff changeset
   372
  def text: String =
7a6cba7c77c9 minor performance tuning;
wenzelm
parents: 80351
diff changeset
   373
    if (is_empty) ""
80362
d395b7e14102 clarified signature;
wenzelm
parents: 80361
diff changeset
   374
    else if (byte_iterator.forall(_ >= 0)) {
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   375
      new String(array, UTF8.charset)
80352
7a6cba7c77c9 minor performance tuning;
wenzelm
parents: 80351
diff changeset
   376
    }
7a6cba7c77c9 minor performance tuning;
wenzelm
parents: 80351
diff changeset
   377
    else UTF8.decode_permissive_bytes(this)
65279
fa62e095d8f1 clarified signature (again, see also 3ed43cfc8b14);
wenzelm
parents: 65070
diff changeset
   378
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   379
  def wellformed_text: Option[String] =
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   380
    try {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   381
      val s = text
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   382
      if (this == Bytes(s)) Some(s) else None
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   383
    }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   384
    catch { case ERROR(_) => None }
76236
03dd2f19f1d7 clarified signature: more operations;
wenzelm
parents: 75709
diff changeset
   385
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   386
  def encode_base64: String = Base64.encode(array)
68094
0b66aca9c965 more operations;
wenzelm
parents: 68087
diff changeset
   387
76236
03dd2f19f1d7 clarified signature: more operations;
wenzelm
parents: 75709
diff changeset
   388
  def maybe_encode_base64: (Boolean, String) =
03dd2f19f1d7 clarified signature: more operations;
wenzelm
parents: 75709
diff changeset
   389
    wellformed_text match {
03dd2f19f1d7 clarified signature: more operations;
wenzelm
parents: 75709
diff changeset
   390
      case Some(s) => (false, s)
03dd2f19f1d7 clarified signature: more operations;
wenzelm
parents: 75709
diff changeset
   391
      case None => (true, encode_base64)
03dd2f19f1d7 clarified signature: more operations;
wenzelm
parents: 75709
diff changeset
   392
    }
68106
a514e29db980 return exports as result for Isabelle server;
wenzelm
parents: 68094
diff changeset
   393
72885
1b0f81e556a2 accommodate OpenJDK 15;
wenzelm
parents: 71152
diff changeset
   394
  def proper: Option[Bytes] = if (is_empty) None else Some(this)
1b0f81e556a2 accommodate OpenJDK 15;
wenzelm
parents: 71152
diff changeset
   395
  def proper_text: Option[String] = if (is_empty) None else Some(text)
65630
c41bbf657310 more operations;
wenzelm
parents: 65279
diff changeset
   396
54439
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
   397
  def +(other: Bytes): Bytes =
72885
1b0f81e556a2 accommodate OpenJDK 15;
wenzelm
parents: 71152
diff changeset
   398
    if (other.is_empty) this
1b0f81e556a2 accommodate OpenJDK 15;
wenzelm
parents: 71152
diff changeset
   399
    else if (is_empty) other
54439
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
   400
    else {
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   401
      Bytes.Builder.use(hint = size + other.size) { builder =>
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   402
        for (a <- subarray_iterator ++ other.subarray_iterator) {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   403
          builder += a
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   404
        }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   405
      }
54439
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
   406
    }
54440
2c4940d2edf7 tuned signature;
wenzelm
parents: 54439
diff changeset
   407
2c4940d2edf7 tuned signature;
wenzelm
parents: 54439
diff changeset
   408
64004
b4ece7a3f2ca clarified stream operations;
wenzelm
parents: 64001
diff changeset
   409
  /* streams */
b4ece7a3f2ca clarified stream operations;
wenzelm
parents: 64001
diff changeset
   410
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   411
  def stream(): InputStream =
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   412
    if (chunks.isEmpty) new ByteArrayInputStream(chunk0, offset.toInt, size.toInt)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   413
    else {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   414
      new InputStream {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   415
        private val it = byte_iterator
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   416
        def read(): Int = if (it.hasNext) it.next().toInt & 0xff else -1
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   417
        override def readAllBytes(): Array[Byte] = array
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   418
      }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   419
    }
64004
b4ece7a3f2ca clarified stream operations;
wenzelm
parents: 64001
diff changeset
   420
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   421
  def write_stream(stream: OutputStream): Unit =
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   422
    for (a <- subarray_iterator if a.length > 0) {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   423
      stream.write(a.array, a.offset, a.length)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   424
    }
64004
b4ece7a3f2ca clarified stream operations;
wenzelm
parents: 64001
diff changeset
   425
b4ece7a3f2ca clarified stream operations;
wenzelm
parents: 64001
diff changeset
   426
76351
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala;
wenzelm
parents: 76350
diff changeset
   427
  /* XZ / Zstd data compression */
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala;
wenzelm
parents: 76350
diff changeset
   428
76358
cff0828c374f clarified signature;
wenzelm
parents: 76353
diff changeset
   429
  def detect_xz: Boolean =
80357
fe123d033e76 clarified signature: pro-forma support for Bytes with size: Long;
wenzelm
parents: 80356
diff changeset
   430
    size >= 6 &&
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   431
      byte_unchecked(0) == 0xFD.toByte &&
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   432
      byte_unchecked(1) == 0x37.toByte &&
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   433
      byte_unchecked(2) == 0x7A.toByte &&
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   434
      byte_unchecked(3) == 0x58.toByte &&
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   435
      byte_unchecked(4) == 0x5A.toByte &&
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   436
      byte_unchecked(5) == 0x00.toByte
76351
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala;
wenzelm
parents: 76350
diff changeset
   437
76358
cff0828c374f clarified signature;
wenzelm
parents: 76353
diff changeset
   438
  def detect_zstd: Boolean =
80357
fe123d033e76 clarified signature: pro-forma support for Bytes with size: Long;
wenzelm
parents: 80356
diff changeset
   439
    size >= 4 &&
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   440
      byte_unchecked(0) == 0x28.toByte &&
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   441
      byte_unchecked(1) == 0xB5.toByte &&
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   442
      byte_unchecked(2) == 0x2F.toByte &&
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   443
      byte_unchecked(3) == 0xFD.toByte
76351
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala;
wenzelm
parents: 76350
diff changeset
   444
76358
cff0828c374f clarified signature;
wenzelm
parents: 76353
diff changeset
   445
  def uncompress_xz(cache: Compress.Cache = Compress.Cache.none): Bytes =
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   446
    using(new xz.XZInputStream(stream(), cache.for_xz))(Bytes.read_stream(_, hint = size))
76358
cff0828c374f clarified signature;
wenzelm
parents: 76353
diff changeset
   447
cff0828c374f clarified signature;
wenzelm
parents: 76353
diff changeset
   448
  def uncompress_zstd(cache: Compress.Cache = Compress.Cache.none): Bytes = {
cff0828c374f clarified signature;
wenzelm
parents: 76353
diff changeset
   449
    Zstd.init()
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   450
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   451
    def uncompress_stream(hint: Long): Bytes =
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   452
      using(new zstd.ZstdInputStream(stream(), cache.for_zstd)) { inp =>
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   453
        Bytes.read_stream(inp, hint = hint)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   454
      }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   455
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   456
    if (chunks.isEmpty) {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   457
      zstd.Zstd.decompressedSize(chunk0, offset.toInt, size.toInt) match {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   458
        case 0 => Bytes.empty
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   459
        case n if n <= Bytes.array_size && !is_sliced =>
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   460
          Bytes.reuse_array(zstd.Zstd.decompress(chunk0, n.toInt))
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   461
        case n => uncompress_stream(n)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   462
      }
76358
cff0828c374f clarified signature;
wenzelm
parents: 76353
diff changeset
   463
    }
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   464
    else uncompress_stream(size / 2)
76358
cff0828c374f clarified signature;
wenzelm
parents: 76353
diff changeset
   465
  }
54440
2c4940d2edf7 tuned signature;
wenzelm
parents: 54439
diff changeset
   466
76351
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala;
wenzelm
parents: 76350
diff changeset
   467
  def uncompress(cache: Compress.Cache = Compress.Cache.none): Bytes =
76358
cff0828c374f clarified signature;
wenzelm
parents: 76353
diff changeset
   468
    if (detect_xz) uncompress_xz(cache = cache)
cff0828c374f clarified signature;
wenzelm
parents: 76353
diff changeset
   469
    else if (detect_zstd) uncompress_zstd(cache = cache)
cff0828c374f clarified signature;
wenzelm
parents: 76353
diff changeset
   470
    else error("Cannot detect compression scheme")
64004
b4ece7a3f2ca clarified stream operations;
wenzelm
parents: 64001
diff changeset
   471
76351
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala;
wenzelm
parents: 76350
diff changeset
   472
  def compress(
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala;
wenzelm
parents: 76350
diff changeset
   473
    options: Compress.Options = Compress.Options(),
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala;
wenzelm
parents: 76350
diff changeset
   474
    cache: Compress.Cache = Compress.Cache.none
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala;
wenzelm
parents: 76350
diff changeset
   475
  ): Bytes = {
76358
cff0828c374f clarified signature;
wenzelm
parents: 76353
diff changeset
   476
    options match {
cff0828c374f clarified signature;
wenzelm
parents: 76353
diff changeset
   477
      case options_xz: Compress.Options_XZ =>
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   478
        val out = new ByteArrayOutputStream((size min Bytes.array_size).toInt)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   479
        using(new xz.XZOutputStream(out, options_xz.make, cache.for_xz))(write_stream)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   480
        Bytes(out.toByteArray)
76358
cff0828c374f clarified signature;
wenzelm
parents: 76353
diff changeset
   481
      case options_zstd: Compress.Options_Zstd =>
cff0828c374f clarified signature;
wenzelm
parents: 76353
diff changeset
   482
        Zstd.init()
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   483
        val inp = if (chunks.isEmpty && !is_sliced) chunk0 else array
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   484
        Bytes(zstd.Zstd.compress(inp, options_zstd.level))
76358
cff0828c374f clarified signature;
wenzelm
parents: 76353
diff changeset
   485
    }
64004
b4ece7a3f2ca clarified stream operations;
wenzelm
parents: 64001
diff changeset
   486
  }
68167
327bb0f5f768 clarified implicit compression;
wenzelm
parents: 68150
diff changeset
   487
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75382
diff changeset
   488
  def maybe_compress(
76351
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala;
wenzelm
parents: 76350
diff changeset
   489
    options: Compress.Options = Compress.Options(),
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala;
wenzelm
parents: 76350
diff changeset
   490
    cache: Compress.Cache = Compress.Cache.none
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75382
diff changeset
   491
  ) : (Boolean, Bytes) = {
68167
327bb0f5f768 clarified implicit compression;
wenzelm
parents: 68150
diff changeset
   492
    val compressed = compress(options = options, cache = cache)
80357
fe123d033e76 clarified signature: pro-forma support for Bytes with size: Long;
wenzelm
parents: 80356
diff changeset
   493
    if (compressed.size < size) (true, compressed) else (false, this)
68167
327bb0f5f768 clarified implicit compression;
wenzelm
parents: 68150
diff changeset
   494
  }
54439
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
   495
}