src/Pure/General/bytes.scala
author wenzelm
Sun, 16 Jun 2024 11:59:16 +0200
changeset 80387 afaac8c6048e
parent 80385 605e19319343
child 80388 52f71e3816d8
permissions -rw-r--r--
minor performance tuning;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
54439
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
     1
/*  Title:      Pure/General/bytes.scala
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
     3
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
     4
Immutable byte vectors versus UTF8 strings.
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
     5
*/
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
     6
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
     7
package isabelle
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
     8
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
     9
78855
6fdcd6c8c97a prefer old-style import "=>";
wenzelm
parents: 78243
diff changeset
    10
import java.io.{ByteArrayInputStream, ByteArrayOutputStream, FileInputStream, FileOutputStream,
6fdcd6c8c97a prefer old-style import "=>";
wenzelm
parents: 78243
diff changeset
    11
  InputStream, OutputStream, File => JFile}
77711
25fd62cba347 clarified signature: more general operation Bytes.read_slice;
wenzelm
parents: 76361
diff changeset
    12
import java.nio.ByteBuffer
25fd62cba347 clarified signature: more general operation Bytes.read_slice;
wenzelm
parents: 76361
diff changeset
    13
import java.nio.channels.FileChannel
25fd62cba347 clarified signature: more general operation Bytes.read_slice;
wenzelm
parents: 76361
diff changeset
    14
import java.nio.file.StandardOpenOption
77712
dd4bb80dbc3a tuned performance: much faster low-level operation;
wenzelm
parents: 77711
diff changeset
    15
import java.util.Arrays
76353
3698d0f3da18 clarified signature;
wenzelm
parents: 76351
diff changeset
    16
import org.tukaani.xz
3698d0f3da18 clarified signature;
wenzelm
parents: 76351
diff changeset
    17
import com.github.luben.zstd
54440
2c4940d2edf7 tuned signature;
wenzelm
parents: 54439
diff changeset
    18
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    19
import scala.collection.mutable.ArrayBuffer
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    20
54440
2c4940d2edf7 tuned signature;
wenzelm
parents: 54439
diff changeset
    21
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75382
diff changeset
    22
object Bytes {
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    23
  /* internal sizes */
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    24
80385
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
    25
  private val array_size: Long = Int.MaxValue - 8  // see java.io.InputStream.MAX_BUFFER_SIZE
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
    26
  private val chunk_size: Long = Space.MiB(100).bytes
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
    27
  private val block_size: Int = 8192
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
    28
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
    29
  private def make_size(chunks: Array[Array[Byte]], buffer: Array[Byte]): Long =
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
    30
    chunks.foldLeft(buffer.length.toLong)((n, chunk) => n + chunk.length)
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    31
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    32
  class Too_Large(size: Long) extends IndexOutOfBoundsException {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    33
    override def getMessage: String =
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    34
      "Bytes too large for particular operation: " +
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    35
        Space.bytes(size).print + " > " + Space.bytes(array_size).print
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    36
  }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    37
54439
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
    38
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    39
  /* main constructors */
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    40
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    41
  private def reuse_array(bytes: Array[Byte]): Bytes =
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    42
    if (bytes.length <= chunk_size) new Bytes(None, bytes, 0L, bytes.length.toLong)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    43
    else apply(bytes)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    44
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    45
  val empty: Bytes = reuse_array(new Array(0))
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    46
80373
dc220d47b85e minor performance tuning;
wenzelm
parents: 80372
diff changeset
    47
  def apply(s: CharSequence): Bytes =
dc220d47b85e minor performance tuning;
wenzelm
parents: 80372
diff changeset
    48
    if (s.isEmpty) empty
dc220d47b85e minor performance tuning;
wenzelm
parents: 80372
diff changeset
    49
    else Builder.use(hint = s.length) { builder => builder += s }
54440
2c4940d2edf7 tuned signature;
wenzelm
parents: 54439
diff changeset
    50
63779
9da65bc75610 more operations;
wenzelm
parents: 62527
diff changeset
    51
  def apply(a: Array[Byte]): Bytes = apply(a, 0, a.length)
9da65bc75610 more operations;
wenzelm
parents: 62527
diff changeset
    52
54442
c39972ddd672 more specific Protocol_Output: empty message.body, main content via bytes/text;
wenzelm
parents: 54440
diff changeset
    53
  def apply(a: Array[Byte], offset: Int, length: Int): Bytes =
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    54
    Builder.use(hint = length) { builder => builder += (a, offset, length) }
54442
c39972ddd672 more specific Protocol_Output: empty message.body, main content via bytes/text;
wenzelm
parents: 54440
diff changeset
    55
69454
ef051edd4d10 more uniform multi-language operations;
wenzelm
parents: 69448
diff changeset
    56
  val newline: Bytes = apply("\n")
54440
2c4940d2edf7 tuned signature;
wenzelm
parents: 54439
diff changeset
    57
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    58
  def decode_base64(s: String): Bytes = Bytes.reuse_array(Base64.decode(s))
68108
2277fe496d78 more operations;
wenzelm
parents: 68106
diff changeset
    59
75579
3362b6a5d697 support XZ compression in Isabelle/ML;
wenzelm
parents: 75393
diff changeset
    60
54440
2c4940d2edf7 tuned signature;
wenzelm
parents: 54439
diff changeset
    61
  /* read */
2c4940d2edf7 tuned signature;
wenzelm
parents: 54439
diff changeset
    62
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    63
  def read_stream(stream: InputStream, limit: Long = -1L, hint: Long = 0L): Bytes = {
64004
b4ece7a3f2ca clarified stream operations;
wenzelm
parents: 64001
diff changeset
    64
    if (limit == 0) empty
b4ece7a3f2ca clarified stream operations;
wenzelm
parents: 64001
diff changeset
    65
    else {
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    66
      Builder.use(hint = if (limit > 0) limit else hint) { builder =>
80384
061d672570f5 tuned names;
wenzelm
parents: 80383
diff changeset
    67
        val buf = new Array[Byte](block_size)
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    68
        var m = 0
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    69
        var n = 0L
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    70
        while ({
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    71
          val l = if (limit > 0) ((limit - n) min buf.length).toInt else buf.length
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    72
          m = stream.read(buf, 0, l)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    73
          if (m != -1) {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    74
            builder += (buf, 0, m)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    75
            n += m
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    76
          }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    77
          m != -1 && (limit < 0 || limit > n)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    78
        }) ()
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    79
      }
54440
2c4940d2edf7 tuned signature;
wenzelm
parents: 54439
diff changeset
    80
    }
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    81
  }
64001
7ecb22be8f03 more general read_stream: return actual byte count;
wenzelm
parents: 63779
diff changeset
    82
79510
d8330439823a clarified signature: explicit type isabelle.Url to avoid oddities of java.net.URL (e.g. its "equals" method);
wenzelm
parents: 79509
diff changeset
    83
  def read_url(name: String): Bytes = using(Url(name).open_stream())(read_stream(_))
77717
6a2daddc238c tuned signature;
wenzelm
parents: 77716
diff changeset
    84
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    85
  def read_file(path: Path, offset: Long = 0L, limit: Long = -1L): Bytes = {
78956
12abaffb0346 tuned signature: more operations;
wenzelm
parents: 78954
diff changeset
    86
    val length = File.size(path)
77711
25fd62cba347 clarified signature: more general operation Bytes.read_slice;
wenzelm
parents: 76361
diff changeset
    87
    val start = offset.max(0L)
80384
061d672570f5 tuned names;
wenzelm
parents: 80383
diff changeset
    88
    val stop = (length - start).max(0L).min(if (limit < 0) Long.MaxValue else limit)
061d672570f5 tuned names;
wenzelm
parents: 80383
diff changeset
    89
    if (stop == 0L) empty
77711
25fd62cba347 clarified signature: more general operation Bytes.read_slice;
wenzelm
parents: 76361
diff changeset
    90
    else {
80384
061d672570f5 tuned names;
wenzelm
parents: 80383
diff changeset
    91
      Builder.use(hint = stop) { builder =>
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    92
        using(FileChannel.open(path.java_path, StandardOpenOption.READ)) { channel =>
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    93
          channel.position(start)
80384
061d672570f5 tuned names;
wenzelm
parents: 80383
diff changeset
    94
          val buf = ByteBuffer.allocate(block_size)
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    95
          var m = 0
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    96
          var n = 0L
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    97
          while ({
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    98
            m = channel.read(buf)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
    99
            if (m != -1) {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   100
              builder += (buf.array(), 0, m)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   101
              buf.clear()
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   102
              n += m
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   103
            }
80384
061d672570f5 tuned names;
wenzelm
parents: 80383
diff changeset
   104
            m != -1 && stop > n
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   105
          }) ()
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   106
        }
77711
25fd62cba347 clarified signature: more general operation Bytes.read_slice;
wenzelm
parents: 76361
diff changeset
   107
      }
25fd62cba347 clarified signature: more general operation Bytes.read_slice;
wenzelm
parents: 76361
diff changeset
   108
    }
25fd62cba347 clarified signature: more general operation Bytes.read_slice;
wenzelm
parents: 76361
diff changeset
   109
  }
25fd62cba347 clarified signature: more general operation Bytes.read_slice;
wenzelm
parents: 76361
diff changeset
   110
78953
b6116a86d2ac clarified signature;
wenzelm
parents: 78855
diff changeset
   111
  def read(path: Path): Bytes = read_file(path)
b6116a86d2ac clarified signature;
wenzelm
parents: 78855
diff changeset
   112
  def read(file: JFile): Bytes = read_file(File.path(file))
77718
6ad3a412ed97 clarified signature;
wenzelm
parents: 77717
diff changeset
   113
64229
12aa3980f65c more operations;
wenzelm
parents: 64224
diff changeset
   114
12aa3980f65c more operations;
wenzelm
parents: 64224
diff changeset
   115
  /* write */
12aa3980f65c more operations;
wenzelm
parents: 64224
diff changeset
   116
69393
ed0824ef337e static type for Library.using: avoid Java 11 warnings on "Illegal reflective access";
wenzelm
parents: 69365
diff changeset
   117
  def write(file: JFile, bytes: Bytes): Unit =
ed0824ef337e static type for Library.using: avoid Java 11 warnings on "Illegal reflective access";
wenzelm
parents: 69365
diff changeset
   118
    using(new FileOutputStream(file))(bytes.write_stream(_))
64229
12aa3980f65c more operations;
wenzelm
parents: 64224
diff changeset
   119
12aa3980f65c more operations;
wenzelm
parents: 64224
diff changeset
   120
  def write(path: Path, bytes: Bytes): Unit = write(path.file, bytes)
78194
da721ba809a4 more operations;
wenzelm
parents: 77718
diff changeset
   121
da721ba809a4 more operations;
wenzelm
parents: 77718
diff changeset
   122
da721ba809a4 more operations;
wenzelm
parents: 77718
diff changeset
   123
  /* append */
da721ba809a4 more operations;
wenzelm
parents: 77718
diff changeset
   124
da721ba809a4 more operations;
wenzelm
parents: 77718
diff changeset
   125
  def append(file: JFile, bytes: Bytes): Unit =
da721ba809a4 more operations;
wenzelm
parents: 77718
diff changeset
   126
    using(new FileOutputStream(file, true))(bytes.write_stream(_))
da721ba809a4 more operations;
wenzelm
parents: 77718
diff changeset
   127
da721ba809a4 more operations;
wenzelm
parents: 77718
diff changeset
   128
  def append(path: Path, bytes: Bytes): Unit = append(path.file, bytes)
80351
dbbe26afc319 clarified signature: more accurate types;
wenzelm
parents: 80350
diff changeset
   129
dbbe26afc319 clarified signature: more accurate types;
wenzelm
parents: 80350
diff changeset
   130
80355
5a555acad203 clarified signature (again);
wenzelm
parents: 80353
diff changeset
   131
  /* vector of short unsigned integers */
80351
dbbe26afc319 clarified signature: more accurate types;
wenzelm
parents: 80350
diff changeset
   132
dbbe26afc319 clarified signature: more accurate types;
wenzelm
parents: 80350
diff changeset
   133
  trait Vec {
dbbe26afc319 clarified signature: more accurate types;
wenzelm
parents: 80350
diff changeset
   134
    def size: Long
80355
5a555acad203 clarified signature (again);
wenzelm
parents: 80353
diff changeset
   135
    def apply(i: Long): Char
80351
dbbe26afc319 clarified signature: more accurate types;
wenzelm
parents: 80350
diff changeset
   136
  }
dbbe26afc319 clarified signature: more accurate types;
wenzelm
parents: 80350
diff changeset
   137
dbbe26afc319 clarified signature: more accurate types;
wenzelm
parents: 80350
diff changeset
   138
  class Vec_String(string: String) extends Vec {
dbbe26afc319 clarified signature: more accurate types;
wenzelm
parents: 80350
diff changeset
   139
    override def size: Long = string.length.toLong
80355
5a555acad203 clarified signature (again);
wenzelm
parents: 80353
diff changeset
   140
    override def apply(i: Long): Char =
5a555acad203 clarified signature (again);
wenzelm
parents: 80353
diff changeset
   141
      if (0 <= i && i < size) string(i.toInt)
80351
dbbe26afc319 clarified signature: more accurate types;
wenzelm
parents: 80350
diff changeset
   142
      else throw new IndexOutOfBoundsException
dbbe26afc319 clarified signature: more accurate types;
wenzelm
parents: 80350
diff changeset
   143
  }
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   144
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   145
80380
94d903234f6b Bytes.Builder is unsynchronized, like java.io.OutputBuffer;
wenzelm
parents: 80379
diff changeset
   146
  /* incremental builder: unsynchronized! */
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   147
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   148
  object Builder {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   149
    def use(hint: Long = 0L)(body: Builder => Unit): Bytes = {
80375
wenzelm
parents: 80374
diff changeset
   150
      val builder = new Builder(hint)
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   151
      body(builder)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   152
      builder.done()
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   153
    }
80377
28dd9b91dfe5 more scalable compression, using Bytes.Builder.Stream;
wenzelm
parents: 80376
diff changeset
   154
28dd9b91dfe5 more scalable compression, using Bytes.Builder.Stream;
wenzelm
parents: 80376
diff changeset
   155
    class Stream(hint: Long = 0L) extends OutputStream {
28dd9b91dfe5 more scalable compression, using Bytes.Builder.Stream;
wenzelm
parents: 80376
diff changeset
   156
      val builder = new Builder(hint)
28dd9b91dfe5 more scalable compression, using Bytes.Builder.Stream;
wenzelm
parents: 80376
diff changeset
   157
28dd9b91dfe5 more scalable compression, using Bytes.Builder.Stream;
wenzelm
parents: 80376
diff changeset
   158
      override def write(b: Int): Unit =
28dd9b91dfe5 more scalable compression, using Bytes.Builder.Stream;
wenzelm
parents: 80376
diff changeset
   159
        { builder += b.toByte }
28dd9b91dfe5 more scalable compression, using Bytes.Builder.Stream;
wenzelm
parents: 80376
diff changeset
   160
28dd9b91dfe5 more scalable compression, using Bytes.Builder.Stream;
wenzelm
parents: 80376
diff changeset
   161
      override def write(array: Array[Byte], offset: Int, length: Int): Unit =
28dd9b91dfe5 more scalable compression, using Bytes.Builder.Stream;
wenzelm
parents: 80376
diff changeset
   162
        { builder += (array, offset, length) }
28dd9b91dfe5 more scalable compression, using Bytes.Builder.Stream;
wenzelm
parents: 80376
diff changeset
   163
    }
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   164
  }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   165
80375
wenzelm
parents: 80374
diff changeset
   166
  final class Builder private[Bytes](hint: Long) {
80374
4f1374f56c0b minor performance tuning;
wenzelm
parents: 80373
diff changeset
   167
    private var size = 0L
80375
wenzelm
parents: 80374
diff changeset
   168
    private var chunks =
wenzelm
parents: 80374
diff changeset
   169
      new ArrayBuffer[Array[Byte]](if (hint <= 0) 16 else (hint / chunk_size).toInt)
wenzelm
parents: 80374
diff changeset
   170
    private var buffer =
wenzelm
parents: 80374
diff changeset
   171
      new ByteArrayOutputStream(if (hint <= 0) 1024 else (hint min chunk_size min array_size).toInt)
80370
wenzelm
parents: 80369
diff changeset
   172
80369
wenzelm
parents: 80368
diff changeset
   173
    private def buffer_free(): Int = chunk_size.toInt - buffer.size()
80370
wenzelm
parents: 80369
diff changeset
   174
    private def buffer_check(): Unit =
wenzelm
parents: 80369
diff changeset
   175
      if (buffer_free() == 0) {
wenzelm
parents: 80369
diff changeset
   176
        chunks += buffer.toByteArray
80383
224cdaaaf0cd imitate internal policy of ByteArrayOutputStream: capacity is doubled after first push;
wenzelm
parents: 80382
diff changeset
   177
        buffer = new ByteArrayOutputStream(chunk_size.toInt)
80370
wenzelm
parents: 80369
diff changeset
   178
      }
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   179
80380
94d903234f6b Bytes.Builder is unsynchronized, like java.io.OutputBuffer;
wenzelm
parents: 80379
diff changeset
   180
    def += (b: Byte): Unit = {
80377
28dd9b91dfe5 more scalable compression, using Bytes.Builder.Stream;
wenzelm
parents: 80376
diff changeset
   181
      size += 1
28dd9b91dfe5 more scalable compression, using Bytes.Builder.Stream;
wenzelm
parents: 80376
diff changeset
   182
      buffer.write(b)
28dd9b91dfe5 more scalable compression, using Bytes.Builder.Stream;
wenzelm
parents: 80376
diff changeset
   183
      buffer_check()
28dd9b91dfe5 more scalable compression, using Bytes.Builder.Stream;
wenzelm
parents: 80376
diff changeset
   184
    }
28dd9b91dfe5 more scalable compression, using Bytes.Builder.Stream;
wenzelm
parents: 80376
diff changeset
   185
80373
dc220d47b85e minor performance tuning;
wenzelm
parents: 80372
diff changeset
   186
    def += (s: CharSequence): Unit = {
dc220d47b85e minor performance tuning;
wenzelm
parents: 80372
diff changeset
   187
      val n = s.length
dc220d47b85e minor performance tuning;
wenzelm
parents: 80372
diff changeset
   188
      if (n > 0) {
dc220d47b85e minor performance tuning;
wenzelm
parents: 80372
diff changeset
   189
        if (UTF8.relevant(s)) { this += UTF8.bytes(s.toString) }
80372
6e74f0fc8a52 minor performance tuning;
wenzelm
parents: 80370
diff changeset
   190
        else {
80387
afaac8c6048e minor performance tuning;
wenzelm
parents: 80385
diff changeset
   191
          var i = 0
afaac8c6048e minor performance tuning;
wenzelm
parents: 80385
diff changeset
   192
          while (i < n) {
afaac8c6048e minor performance tuning;
wenzelm
parents: 80385
diff changeset
   193
            buffer.write(s.charAt(i).toByte)
80380
94d903234f6b Bytes.Builder is unsynchronized, like java.io.OutputBuffer;
wenzelm
parents: 80379
diff changeset
   194
            size += 1
80387
afaac8c6048e minor performance tuning;
wenzelm
parents: 80385
diff changeset
   195
            i += 1
80380
94d903234f6b Bytes.Builder is unsynchronized, like java.io.OutputBuffer;
wenzelm
parents: 80379
diff changeset
   196
            buffer_check()
80372
6e74f0fc8a52 minor performance tuning;
wenzelm
parents: 80370
diff changeset
   197
          }
6e74f0fc8a52 minor performance tuning;
wenzelm
parents: 80370
diff changeset
   198
        }
6e74f0fc8a52 minor performance tuning;
wenzelm
parents: 80370
diff changeset
   199
      }
80373
dc220d47b85e minor performance tuning;
wenzelm
parents: 80372
diff changeset
   200
    }
80372
6e74f0fc8a52 minor performance tuning;
wenzelm
parents: 80370
diff changeset
   201
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   202
    def += (array: Array[Byte], offset: Int, length: Int): Unit = {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   203
      if (offset < 0 || length < 0 || offset.toLong + length.toLong > array.length) {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   204
        throw new IndexOutOfBoundsException
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   205
      }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   206
      else if (length > 0) {
80380
94d903234f6b Bytes.Builder is unsynchronized, like java.io.OutputBuffer;
wenzelm
parents: 80379
diff changeset
   207
        var i = offset
94d903234f6b Bytes.Builder is unsynchronized, like java.io.OutputBuffer;
wenzelm
parents: 80379
diff changeset
   208
        var n = length
94d903234f6b Bytes.Builder is unsynchronized, like java.io.OutputBuffer;
wenzelm
parents: 80379
diff changeset
   209
        while (n > 0) {
94d903234f6b Bytes.Builder is unsynchronized, like java.io.OutputBuffer;
wenzelm
parents: 80379
diff changeset
   210
          val m = buffer_free()
94d903234f6b Bytes.Builder is unsynchronized, like java.io.OutputBuffer;
wenzelm
parents: 80379
diff changeset
   211
          if (m > 0) {
94d903234f6b Bytes.Builder is unsynchronized, like java.io.OutputBuffer;
wenzelm
parents: 80379
diff changeset
   212
            val l = m min n
94d903234f6b Bytes.Builder is unsynchronized, like java.io.OutputBuffer;
wenzelm
parents: 80379
diff changeset
   213
            buffer.write(array, i, l)
94d903234f6b Bytes.Builder is unsynchronized, like java.io.OutputBuffer;
wenzelm
parents: 80379
diff changeset
   214
            size += l
94d903234f6b Bytes.Builder is unsynchronized, like java.io.OutputBuffer;
wenzelm
parents: 80379
diff changeset
   215
            i += l
94d903234f6b Bytes.Builder is unsynchronized, like java.io.OutputBuffer;
wenzelm
parents: 80379
diff changeset
   216
            n -= l
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   217
          }
80380
94d903234f6b Bytes.Builder is unsynchronized, like java.io.OutputBuffer;
wenzelm
parents: 80379
diff changeset
   218
          buffer_check()
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   219
        }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   220
      }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   221
    }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   222
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   223
    def += (array: Array[Byte]): Unit = { this += (array, 0, array.length) }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   224
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   225
    def += (a: Subarray): Unit = { this += (a.array, a.offset, a.length) }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   226
80380
94d903234f6b Bytes.Builder is unsynchronized, like java.io.OutputBuffer;
wenzelm
parents: 80379
diff changeset
   227
    private[Bytes] def done(): Bytes = {
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   228
      val cs = chunks.toArray
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   229
      val b = buffer.toByteArray
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   230
      chunks = null
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   231
      buffer = null
80374
4f1374f56c0b minor performance tuning;
wenzelm
parents: 80373
diff changeset
   232
      new Bytes(if (cs.isEmpty) None else Some(cs), b, 0L, size)
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   233
    }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   234
  }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   235
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   236
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   237
  /* subarray */
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   238
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   239
  object Subarray {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   240
    val empty: Subarray = new Subarray(new Array[Byte](0), 0, 0)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   241
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   242
    def apply(array: Array[Byte], offset: Int, length: Int): Subarray = {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   243
      val n = array.length
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   244
      if (0 <= offset && offset < n && 0 <= length && offset + length <= n) {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   245
        if (length == 0) empty
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   246
        else new Subarray(array, offset, length)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   247
      }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   248
      else throw new IndexOutOfBoundsException
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   249
    }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   250
  }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   251
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   252
  final class Subarray private(
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   253
    val array: Array[Byte],
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   254
    val offset: Int,
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   255
    val length: Int
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   256
  ) {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   257
    override def toString: String = "Bytes.Subarray(" + Space.bytes(length).print + ")"
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   258
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   259
    def byte_iterator: Iterator[Byte] =
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   260
      if (length == 0) Iterator.empty
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   261
      else { for (i <- (offset until (offset + length)).iterator) yield array(i) }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   262
  }
54439
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
   263
}
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
   264
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
   265
final class Bytes private(
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   266
  protected val chunks: Option[Array[Array[Byte]]],
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   267
  protected val chunk0: Array[Byte],
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   268
  protected val offset: Long,
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   269
  val size: Long
80361
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   270
) extends Bytes.Vec {
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   271
  assert(
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   272
    (chunks.isEmpty ||
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   273
      chunks.get.nonEmpty &&
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   274
      chunks.get.forall(chunk => chunk.length == Bytes.chunk_size)) &&
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   275
    chunk0.length < Bytes.chunk_size)
80361
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   276
80385
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   277
  def small_size: Int =
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   278
    if (size > Bytes.array_size) throw new Bytes.Too_Large(size)
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   279
    else size.toInt
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   280
80361
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   281
  def is_empty: Boolean = size == 0
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   282
80385
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   283
  def proper: Option[Bytes] = if (is_empty) None else Some(this)
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   284
80361
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   285
  def is_sliced: Boolean =
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   286
    offset != 0L || {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   287
      chunks match {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   288
        case None => size != chunk0.length
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   289
        case Some(cs) => size != Bytes.make_size(cs, chunk0)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   290
      }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   291
    }
80361
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   292
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   293
  override def toString: String =
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   294
    if (is_empty) "Bytes.empty"
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   295
    else "Bytes(" + Space.bytes(size).print + if_proper(is_sliced, ", sliced") + ")"
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   296
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   297
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   298
  /* elements: signed Byte or unsigned Char */
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   299
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   300
  protected def byte_unchecked(i: Long): Byte = {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   301
    val a = offset + i
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   302
    chunks match {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   303
      case None => chunk0(a.toInt)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   304
      case Some(cs) =>
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   305
        val b = a % Bytes.chunk_size
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   306
        val c = a / Bytes.chunk_size
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   307
        if (c < cs.length) cs(c.toInt)(b.toInt) else chunk0(b.toInt)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   308
    }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   309
  }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   310
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   311
  def byte(i: Long): Byte =
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   312
    if (0 <= i && i < size) byte_unchecked(i)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   313
    else throw new IndexOutOfBoundsException
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   314
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   315
  def apply(i: Long): Char = (byte(i).toInt & 0xff).toChar
80361
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   316
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   317
  protected def subarray_iterator: Iterator[Bytes.Subarray] =
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   318
    if (is_empty) Iterator.empty
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   319
    else if (chunks.isEmpty) Iterator(Bytes.Subarray(chunk0, offset.toInt, size.toInt))
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   320
    else {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   321
      val end_offset = offset + size
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   322
      for ((array, index) <- (chunks.get.iterator ++ Iterator(chunk0)).zipWithIndex) yield {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   323
        val array_start = Bytes.chunk_size * index
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   324
        val array_stop = array_start + array.length
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   325
        if (offset < array_stop && array_start < end_offset) {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   326
          val i = (array_start max offset) - array_start
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   327
          val j = (array_stop min end_offset) - array_start
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   328
          Bytes.Subarray(array, i.toInt, (j - i).toInt)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   329
        }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   330
        else Bytes.Subarray.empty
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   331
      }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   332
    }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   333
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   334
  def byte_iterator: Iterator[Byte] =
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   335
    for {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   336
      a <- subarray_iterator
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   337
      b <- a.byte_iterator
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   338
    } yield b
80361
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   339
54a83e8e447b tuned source structure;
wenzelm
parents: 80360
diff changeset
   340
80385
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   341
  /* slice */
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   342
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   343
  def slice(i: Long, j: Long): Bytes =
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   344
    if (0 <= i && i <= j && j <= size) {
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   345
      if (i == j) Bytes.empty
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   346
      else new Bytes(chunks, chunk0, offset + i, j - i)
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   347
    }
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   348
    else throw new IndexOutOfBoundsException
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   349
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   350
  def unslice: Bytes =
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   351
    if (is_sliced) {
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   352
      Bytes.Builder.use(hint = size) { builder =>
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   353
        for (a <- subarray_iterator) { builder += a }
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   354
      }
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   355
    }
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   356
    else this
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   357
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   358
  def trim_line: Bytes =
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   359
    if (size >= 2 && byte_unchecked(size - 2) == 13 && byte_unchecked(size - 1) == 10) {
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   360
      slice(0, size - 2)
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   361
    }
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   362
    else if (size >= 1 && (byte_unchecked(size - 1) == 13 || byte_unchecked(size - 1) == 10)) {
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   363
      slice(0, size - 1)
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   364
    }
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   365
    else this
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   366
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   367
80363
306f273c91ec clarified hash and equality: depend on sha1 digest to be collision-free;
wenzelm
parents: 80362
diff changeset
   368
  /* hash and equality */
306f273c91ec clarified hash and equality: depend on sha1 digest to be collision-free;
wenzelm
parents: 80362
diff changeset
   369
306f273c91ec clarified hash and equality: depend on sha1 digest to be collision-free;
wenzelm
parents: 80362
diff changeset
   370
  lazy val sha1_digest: SHA1.Digest =
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   371
    if (is_empty) SHA1.digest_empty
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   372
    else {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   373
      SHA1.make_digest { sha =>
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   374
        for (a <- subarray_iterator if a.length > 0) {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   375
          sha.update(a.array, a.offset, a.length)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   376
        }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   377
      }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   378
    }
80363
306f273c91ec clarified hash and equality: depend on sha1 digest to be collision-free;
wenzelm
parents: 80362
diff changeset
   379
306f273c91ec clarified hash and equality: depend on sha1 digest to be collision-free;
wenzelm
parents: 80362
diff changeset
   380
  override def hashCode(): Int = sha1_digest.hashCode()
54439
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
   381
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75382
diff changeset
   382
  override def equals(that: Any): Boolean = {
54440
2c4940d2edf7 tuned signature;
wenzelm
parents: 54439
diff changeset
   383
    that match {
2c4940d2edf7 tuned signature;
wenzelm
parents: 54439
diff changeset
   384
      case other: Bytes =>
80363
306f273c91ec clarified hash and equality: depend on sha1 digest to be collision-free;
wenzelm
parents: 80362
diff changeset
   385
        if (this.eq(other)) true
306f273c91ec clarified hash and equality: depend on sha1 digest to be collision-free;
wenzelm
parents: 80362
diff changeset
   386
        else if (size != other.size) false
80378
ab4badc7db7f more efficient equals: avoid somewhat slow sha1_digest (see also 29b761e290c5, 306f273c91ec);
wenzelm
parents: 80377
diff changeset
   387
        else {
ab4badc7db7f more efficient equals: avoid somewhat slow sha1_digest (see also 29b761e290c5, 306f273c91ec);
wenzelm
parents: 80377
diff changeset
   388
          if (chunks.isEmpty && other.chunks.isEmpty) {
ab4badc7db7f more efficient equals: avoid somewhat slow sha1_digest (see also 29b761e290c5, 306f273c91ec);
wenzelm
parents: 80377
diff changeset
   389
            Arrays.equals(chunk0, offset.toInt, (offset + size).toInt,
ab4badc7db7f more efficient equals: avoid somewhat slow sha1_digest (see also 29b761e290c5, 306f273c91ec);
wenzelm
parents: 80377
diff changeset
   390
              other.chunk0, other.offset.toInt, (other.offset + other.size).toInt)
ab4badc7db7f more efficient equals: avoid somewhat slow sha1_digest (see also 29b761e290c5, 306f273c91ec);
wenzelm
parents: 80377
diff changeset
   391
          }
ab4badc7db7f more efficient equals: avoid somewhat slow sha1_digest (see also 29b761e290c5, 306f273c91ec);
wenzelm
parents: 80377
diff changeset
   392
          else if (!is_sliced && !other.is_sliced) {
ab4badc7db7f more efficient equals: avoid somewhat slow sha1_digest (see also 29b761e290c5, 306f273c91ec);
wenzelm
parents: 80377
diff changeset
   393
            (subarray_iterator zip other.subarray_iterator)
ab4badc7db7f more efficient equals: avoid somewhat slow sha1_digest (see also 29b761e290c5, 306f273c91ec);
wenzelm
parents: 80377
diff changeset
   394
              .forall((a, b) => Arrays.equals(a.array, b.array))
ab4badc7db7f more efficient equals: avoid somewhat slow sha1_digest (see also 29b761e290c5, 306f273c91ec);
wenzelm
parents: 80377
diff changeset
   395
          }
ab4badc7db7f more efficient equals: avoid somewhat slow sha1_digest (see also 29b761e290c5, 306f273c91ec);
wenzelm
parents: 80377
diff changeset
   396
          else sha1_digest == other.sha1_digest
80363
306f273c91ec clarified hash and equality: depend on sha1 digest to be collision-free;
wenzelm
parents: 80362
diff changeset
   397
        }
54440
2c4940d2edf7 tuned signature;
wenzelm
parents: 54439
diff changeset
   398
      case _ => false
2c4940d2edf7 tuned signature;
wenzelm
parents: 54439
diff changeset
   399
    }
2c4940d2edf7 tuned signature;
wenzelm
parents: 54439
diff changeset
   400
  }
2c4940d2edf7 tuned signature;
wenzelm
parents: 54439
diff changeset
   401
54439
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
   402
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
   403
  /* content */
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
   404
80385
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   405
  def + (other: Bytes): Bytes =
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   406
    if (other.is_empty) this
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   407
    else if (is_empty) other
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   408
    else {
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   409
      Bytes.Builder.use(hint = size + other.size) { builder =>
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   410
        for (a <- subarray_iterator ++ other.subarray_iterator) {
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   411
          builder += a
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   412
        }
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   413
      }
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   414
    }
605e19319343 tuned module structure;
wenzelm
parents: 80384
diff changeset
   415
80368
9db395953106 clarified signature;
wenzelm
parents: 80367
diff changeset
   416
  def make_array: Array[Byte] = {
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   417
    val buf = new ByteArrayOutputStream(small_size)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   418
    for (a <- subarray_iterator) { buf.write(a.array, a.offset, a.length) }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   419
    buf.toByteArray
69365
c5b3860d29ef avoid loading of font file, to eliminate "Illegal reflective access by com.lowagie.text.pdf.MappedRandomAccessFile$1 (iText-2.1.5.jar) to method java.nio.DirectByteBuffer.cleaner()" -- due to com.lowagie.text.pdf.TrueTypeFont.process() / RandomAccessFileOrArray;
wenzelm
parents: 68167
diff changeset
   420
  }
c5b3860d29ef avoid loading of font file, to eliminate "Illegal reflective access by com.lowagie.text.pdf.MappedRandomAccessFile$1 (iText-2.1.5.jar) to method java.nio.DirectByteBuffer.cleaner()" -- due to com.lowagie.text.pdf.TrueTypeFont.process() / RandomAccessFileOrArray;
wenzelm
parents: 68167
diff changeset
   421
80352
7a6cba7c77c9 minor performance tuning;
wenzelm
parents: 80351
diff changeset
   422
  def text: String =
7a6cba7c77c9 minor performance tuning;
wenzelm
parents: 80351
diff changeset
   423
    if (is_empty) ""
80379
1f1ce661c71c notable performance tuning: avoid overhead of higher-order functions;
wenzelm
parents: 80378
diff changeset
   424
    else {
1f1ce661c71c notable performance tuning: avoid overhead of higher-order functions;
wenzelm
parents: 80378
diff changeset
   425
      var i = 0L
1f1ce661c71c notable performance tuning: avoid overhead of higher-order functions;
wenzelm
parents: 80378
diff changeset
   426
      var utf8 = false
1f1ce661c71c notable performance tuning: avoid overhead of higher-order functions;
wenzelm
parents: 80378
diff changeset
   427
      while (i < size && !utf8) {
1f1ce661c71c notable performance tuning: avoid overhead of higher-order functions;
wenzelm
parents: 80378
diff changeset
   428
        if (byte_unchecked(i) < 0) { utf8 = true }
1f1ce661c71c notable performance tuning: avoid overhead of higher-order functions;
wenzelm
parents: 80378
diff changeset
   429
        i += 1
1f1ce661c71c notable performance tuning: avoid overhead of higher-order functions;
wenzelm
parents: 80378
diff changeset
   430
      }
1f1ce661c71c notable performance tuning: avoid overhead of higher-order functions;
wenzelm
parents: 80378
diff changeset
   431
      utf8
1f1ce661c71c notable performance tuning: avoid overhead of higher-order functions;
wenzelm
parents: 80378
diff changeset
   432
1f1ce661c71c notable performance tuning: avoid overhead of higher-order functions;
wenzelm
parents: 80378
diff changeset
   433
      if (utf8) UTF8.decode_permissive_bytes(this)
1f1ce661c71c notable performance tuning: avoid overhead of higher-order functions;
wenzelm
parents: 80378
diff changeset
   434
      else new String(make_array, UTF8.charset)
80352
7a6cba7c77c9 minor performance tuning;
wenzelm
parents: 80351
diff changeset
   435
    }
65279
fa62e095d8f1 clarified signature (again, see also 3ed43cfc8b14);
wenzelm
parents: 65070
diff changeset
   436
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   437
  def wellformed_text: Option[String] =
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   438
    try {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   439
      val s = text
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   440
      if (this == Bytes(s)) Some(s) else None
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   441
    }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   442
    catch { case ERROR(_) => None }
76236
03dd2f19f1d7 clarified signature: more operations;
wenzelm
parents: 75709
diff changeset
   443
80368
9db395953106 clarified signature;
wenzelm
parents: 80367
diff changeset
   444
  def encode_base64: String = Base64.encode(make_array)
68094
0b66aca9c965 more operations;
wenzelm
parents: 68087
diff changeset
   445
76236
03dd2f19f1d7 clarified signature: more operations;
wenzelm
parents: 75709
diff changeset
   446
  def maybe_encode_base64: (Boolean, String) =
03dd2f19f1d7 clarified signature: more operations;
wenzelm
parents: 75709
diff changeset
   447
    wellformed_text match {
03dd2f19f1d7 clarified signature: more operations;
wenzelm
parents: 75709
diff changeset
   448
      case Some(s) => (false, s)
03dd2f19f1d7 clarified signature: more operations;
wenzelm
parents: 75709
diff changeset
   449
      case None => (true, encode_base64)
03dd2f19f1d7 clarified signature: more operations;
wenzelm
parents: 75709
diff changeset
   450
    }
68106
a514e29db980 return exports as result for Isabelle server;
wenzelm
parents: 68094
diff changeset
   451
54440
2c4940d2edf7 tuned signature;
wenzelm
parents: 54439
diff changeset
   452
64004
b4ece7a3f2ca clarified stream operations;
wenzelm
parents: 64001
diff changeset
   453
  /* streams */
b4ece7a3f2ca clarified stream operations;
wenzelm
parents: 64001
diff changeset
   454
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   455
  def stream(): InputStream =
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   456
    if (chunks.isEmpty) new ByteArrayInputStream(chunk0, offset.toInt, size.toInt)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   457
    else {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   458
      new InputStream {
80367
a6c1526600b3 minor performance tuning;
wenzelm
parents: 80366
diff changeset
   459
        private var index = 0L
a6c1526600b3 minor performance tuning;
wenzelm
parents: 80366
diff changeset
   460
a6c1526600b3 minor performance tuning;
wenzelm
parents: 80366
diff changeset
   461
        def read(): Int = {
a6c1526600b3 minor performance tuning;
wenzelm
parents: 80366
diff changeset
   462
          if (index < size) {
a6c1526600b3 minor performance tuning;
wenzelm
parents: 80366
diff changeset
   463
            val res = byte_unchecked(index).toInt & 0xff
a6c1526600b3 minor performance tuning;
wenzelm
parents: 80366
diff changeset
   464
            index += 1
a6c1526600b3 minor performance tuning;
wenzelm
parents: 80366
diff changeset
   465
            res
a6c1526600b3 minor performance tuning;
wenzelm
parents: 80366
diff changeset
   466
          }
a6c1526600b3 minor performance tuning;
wenzelm
parents: 80366
diff changeset
   467
          else -1
a6c1526600b3 minor performance tuning;
wenzelm
parents: 80366
diff changeset
   468
        }
a6c1526600b3 minor performance tuning;
wenzelm
parents: 80366
diff changeset
   469
80368
9db395953106 clarified signature;
wenzelm
parents: 80367
diff changeset
   470
        override def readAllBytes(): Array[Byte] = make_array
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   471
      }
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   472
    }
64004
b4ece7a3f2ca clarified stream operations;
wenzelm
parents: 64001
diff changeset
   473
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   474
  def write_stream(stream: OutputStream): Unit =
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   475
    for (a <- subarray_iterator if a.length > 0) {
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   476
      stream.write(a.array, a.offset, a.length)
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   477
    }
64004
b4ece7a3f2ca clarified stream operations;
wenzelm
parents: 64001
diff changeset
   478
b4ece7a3f2ca clarified stream operations;
wenzelm
parents: 64001
diff changeset
   479
76351
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala;
wenzelm
parents: 76350
diff changeset
   480
  /* XZ / Zstd data compression */
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala;
wenzelm
parents: 76350
diff changeset
   481
76358
cff0828c374f clarified signature;
wenzelm
parents: 76353
diff changeset
   482
  def detect_xz: Boolean =
80357
fe123d033e76 clarified signature: pro-forma support for Bytes with size: Long;
wenzelm
parents: 80356
diff changeset
   483
    size >= 6 &&
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   484
      byte_unchecked(0) == 0xFD.toByte &&
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   485
      byte_unchecked(1) == 0x37.toByte &&
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   486
      byte_unchecked(2) == 0x7A.toByte &&
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   487
      byte_unchecked(3) == 0x58.toByte &&
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   488
      byte_unchecked(4) == 0x5A.toByte &&
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   489
      byte_unchecked(5) == 0x00.toByte
76351
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala;
wenzelm
parents: 76350
diff changeset
   490
76358
cff0828c374f clarified signature;
wenzelm
parents: 76353
diff changeset
   491
  def detect_zstd: Boolean =
80357
fe123d033e76 clarified signature: pro-forma support for Bytes with size: Long;
wenzelm
parents: 80356
diff changeset
   492
    size >= 4 &&
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   493
      byte_unchecked(0) == 0x28.toByte &&
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   494
      byte_unchecked(1) == 0xB5.toByte &&
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   495
      byte_unchecked(2) == 0x2F.toByte &&
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   496
      byte_unchecked(3) == 0xFD.toByte
76351
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala;
wenzelm
parents: 76350
diff changeset
   497
76358
cff0828c374f clarified signature;
wenzelm
parents: 76353
diff changeset
   498
  def uncompress_xz(cache: Compress.Cache = Compress.Cache.none): Bytes =
80366
ac4d53bc8f6b support large byte arrays, using multiple "chunks";
wenzelm
parents: 80364
diff changeset
   499
    using(new xz.XZInputStream(stream(), cache.for_xz))(Bytes.read_stream(_, hint = size))
76358
cff0828c374f clarified signature;
wenzelm
parents: 76353
diff changeset
   500
cff0828c374f clarified signature;
wenzelm
parents: 76353
diff changeset
   501
  def uncompress_zstd(cache: Compress.Cache = Compress.Cache.none): Bytes = {
cff0828c374f clarified signature;
wenzelm
parents: 76353
diff changeset
   502
    Zstd.init()
80376
201af0b45e57 tuned: more uniform, less ambitious;
wenzelm
parents: 80375
diff changeset
   503
    using(new zstd.ZstdInputStream(stream(), cache.for_zstd))(Bytes.read_stream(_, hint = size))
76358
cff0828c374f clarified signature;
wenzelm
parents: 76353
diff changeset
   504
  }
54440
2c4940d2edf7 tuned signature;
wenzelm
parents: 54439
diff changeset
   505
76351
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala;
wenzelm
parents: 76350
diff changeset
   506
  def uncompress(cache: Compress.Cache = Compress.Cache.none): Bytes =
76358
cff0828c374f clarified signature;
wenzelm
parents: 76353
diff changeset
   507
    if (detect_xz) uncompress_xz(cache = cache)
cff0828c374f clarified signature;
wenzelm
parents: 76353
diff changeset
   508
    else if (detect_zstd) uncompress_zstd(cache = cache)
cff0828c374f clarified signature;
wenzelm
parents: 76353
diff changeset
   509
    else error("Cannot detect compression scheme")
64004
b4ece7a3f2ca clarified stream operations;
wenzelm
parents: 64001
diff changeset
   510
76351
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala;
wenzelm
parents: 76350
diff changeset
   511
  def compress(
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala;
wenzelm
parents: 76350
diff changeset
   512
    options: Compress.Options = Compress.Options(),
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala;
wenzelm
parents: 76350
diff changeset
   513
    cache: Compress.Cache = Compress.Cache.none
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala;
wenzelm
parents: 76350
diff changeset
   514
  ): Bytes = {
80377
28dd9b91dfe5 more scalable compression, using Bytes.Builder.Stream;
wenzelm
parents: 80376
diff changeset
   515
    val bytes = new Bytes.Builder.Stream(hint = size)
28dd9b91dfe5 more scalable compression, using Bytes.Builder.Stream;
wenzelm
parents: 80376
diff changeset
   516
    using(
28dd9b91dfe5 more scalable compression, using Bytes.Builder.Stream;
wenzelm
parents: 80376
diff changeset
   517
      options match {
28dd9b91dfe5 more scalable compression, using Bytes.Builder.Stream;
wenzelm
parents: 80376
diff changeset
   518
        case options_xz: Compress.Options_XZ =>
28dd9b91dfe5 more scalable compression, using Bytes.Builder.Stream;
wenzelm
parents: 80376
diff changeset
   519
          new xz.XZOutputStream(bytes, options_xz.make, cache.for_xz)
28dd9b91dfe5 more scalable compression, using Bytes.Builder.Stream;
wenzelm
parents: 80376
diff changeset
   520
        case options_zstd: Compress.Options_Zstd =>
28dd9b91dfe5 more scalable compression, using Bytes.Builder.Stream;
wenzelm
parents: 80376
diff changeset
   521
          new zstd.ZstdOutputStream(bytes, cache.for_zstd, options_zstd.level)
28dd9b91dfe5 more scalable compression, using Bytes.Builder.Stream;
wenzelm
parents: 80376
diff changeset
   522
      }
28dd9b91dfe5 more scalable compression, using Bytes.Builder.Stream;
wenzelm
parents: 80376
diff changeset
   523
    ) { s => for (a <- subarray_iterator) s.write(a.array, a.offset, a.length) }
28dd9b91dfe5 more scalable compression, using Bytes.Builder.Stream;
wenzelm
parents: 80376
diff changeset
   524
    bytes.builder.done()
64004
b4ece7a3f2ca clarified stream operations;
wenzelm
parents: 64001
diff changeset
   525
  }
68167
327bb0f5f768 clarified implicit compression;
wenzelm
parents: 68150
diff changeset
   526
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75382
diff changeset
   527
  def maybe_compress(
76351
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala;
wenzelm
parents: 76350
diff changeset
   528
    options: Compress.Options = Compress.Options(),
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala;
wenzelm
parents: 76350
diff changeset
   529
    cache: Compress.Cache = Compress.Cache.none
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75382
diff changeset
   530
  ) : (Boolean, Bytes) = {
68167
327bb0f5f768 clarified implicit compression;
wenzelm
parents: 68150
diff changeset
   531
    val compressed = compress(options = options, cache = cache)
80357
fe123d033e76 clarified signature: pro-forma support for Bytes with size: Long;
wenzelm
parents: 80356
diff changeset
   532
    if (compressed.size < size) (true, compressed) else (false, this)
68167
327bb0f5f768 clarified implicit compression;
wenzelm
parents: 68150
diff changeset
   533
  }
54439
621a155c7715 immutable byte vectors versus UTF8 strings;
wenzelm
parents:
diff changeset
   534
}