| author | wenzelm | 
| Thu, 21 Dec 2023 11:58:19 +0100 | |
| changeset 79326 | 8a2921053511 | 
| parent 78956 | 12abaffb0346 | 
| child 79509 | e82448aacf48 | 
| permissions | -rw-r--r-- | 
| 54439 | 1  | 
/* Title: Pure/General/bytes.scala  | 
2  | 
Author: Makarius  | 
|
3  | 
||
4  | 
Immutable byte vectors versus UTF8 strings.  | 
|
5  | 
*/  | 
|
6  | 
||
7  | 
package isabelle  | 
|
8  | 
||
9  | 
||
| 78855 | 10  | 
import java.io.{ByteArrayInputStream, ByteArrayOutputStream, FileInputStream, FileOutputStream,
 | 
11  | 
InputStream, OutputStream, File => JFile}  | 
|
| 65070 | 12  | 
import java.net.URL  | 
| 
77711
 
25fd62cba347
clarified signature: more general operation Bytes.read_slice;
 
wenzelm 
parents: 
76361 
diff
changeset
 | 
13  | 
import java.nio.ByteBuffer  | 
| 
 
25fd62cba347
clarified signature: more general operation Bytes.read_slice;
 
wenzelm 
parents: 
76361 
diff
changeset
 | 
14  | 
import java.nio.channels.FileChannel  | 
| 
 
25fd62cba347
clarified signature: more general operation Bytes.read_slice;
 
wenzelm 
parents: 
76361 
diff
changeset
 | 
15  | 
import java.nio.file.StandardOpenOption  | 
| 
77712
 
dd4bb80dbc3a
tuned performance: much faster low-level operation;
 
wenzelm 
parents: 
77711 
diff
changeset
 | 
16  | 
import java.util.Arrays  | 
| 76353 | 17  | 
import org.tukaani.xz  | 
18  | 
import com.github.luben.zstd  | 
|
| 54440 | 19  | 
|
20  | 
||
| 75393 | 21  | 
object Bytes {
 | 
| 54439 | 22  | 
val empty: Bytes = new Bytes(Array[Byte](), 0, 0)  | 
23  | 
||
| 75393 | 24  | 
  def apply(s: CharSequence): Bytes = {
 | 
| 54439 | 25  | 
val str = s.toString  | 
26  | 
if (str.isEmpty) empty  | 
|
27  | 
    else {
 | 
|
| 62527 | 28  | 
val b = UTF8.bytes(str)  | 
| 
54442
 
c39972ddd672
more specific Protocol_Output: empty message.body, main content via bytes/text;
 
wenzelm 
parents: 
54440 
diff
changeset
 | 
29  | 
new Bytes(b, 0, b.length)  | 
| 54439 | 30  | 
}  | 
31  | 
}  | 
|
| 54440 | 32  | 
|
| 63779 | 33  | 
def apply(a: Array[Byte]): Bytes = apply(a, 0, a.length)  | 
34  | 
||
| 
54442
 
c39972ddd672
more specific Protocol_Output: empty message.body, main content via bytes/text;
 
wenzelm 
parents: 
54440 
diff
changeset
 | 
35  | 
def apply(a: Array[Byte], offset: Int, length: Int): Bytes =  | 
| 
 
c39972ddd672
more specific Protocol_Output: empty message.body, main content via bytes/text;
 
wenzelm 
parents: 
54440 
diff
changeset
 | 
36  | 
if (length == 0) empty  | 
| 
 
c39972ddd672
more specific Protocol_Output: empty message.body, main content via bytes/text;
 
wenzelm 
parents: 
54440 
diff
changeset
 | 
37  | 
    else {
 | 
| 
 
c39972ddd672
more specific Protocol_Output: empty message.body, main content via bytes/text;
 
wenzelm 
parents: 
54440 
diff
changeset
 | 
38  | 
val b = new Array[Byte](length)  | 
| 55618 | 39  | 
System.arraycopy(a, offset, b, 0, length)  | 
| 
54442
 
c39972ddd672
more specific Protocol_Output: empty message.body, main content via bytes/text;
 
wenzelm 
parents: 
54440 
diff
changeset
 | 
40  | 
new Bytes(b, 0, b.length)  | 
| 
 
c39972ddd672
more specific Protocol_Output: empty message.body, main content via bytes/text;
 
wenzelm 
parents: 
54440 
diff
changeset
 | 
41  | 
}  | 
| 
 
c39972ddd672
more specific Protocol_Output: empty message.body, main content via bytes/text;
 
wenzelm 
parents: 
54440 
diff
changeset
 | 
42  | 
|
| 69454 | 43  | 
  val newline: Bytes = apply("\n")
 | 
| 54440 | 44  | 
|
| 73576 | 45  | 
|
46  | 
/* base64 */  | 
|
47  | 
||
| 75587 | 48  | 
  def decode_base64(s: String): Bytes = {
 | 
| 75620 | 49  | 
val a = Base64.decode(s)  | 
| 68108 | 50  | 
new Bytes(a, 0, a.length)  | 
51  | 
}  | 
|
52  | 
||
| 75579 | 53  | 
|
| 54440 | 54  | 
/* read */  | 
55  | 
||
| 78243 | 56  | 
def read_stream(stream: InputStream, limit: Int = Int.MaxValue, hint: Int = 1024): Bytes =  | 
| 64004 | 57  | 
if (limit == 0) empty  | 
58  | 
    else {
 | 
|
| 78243 | 59  | 
val out_size = (if (limit == Int.MaxValue) hint else limit) max 1024  | 
| 73414 | 60  | 
val out = new ByteArrayOutputStream(out_size)  | 
| 
64005
 
f6e965cf1617
clarified magic values (see also java/io/BufferedInputStream.java);
 
wenzelm 
parents: 
64004 
diff
changeset
 | 
61  | 
val buf = new Array[Byte](8192)  | 
| 64004 | 62  | 
var m = 0  | 
| 54440 | 63  | 
|
| 75709 | 64  | 
      while ({
 | 
| 73554 | 65  | 
m = stream.read(buf, 0, buf.length min (limit - out.size))  | 
| 64004 | 66  | 
if (m != -1) out.write(buf, 0, m)  | 
| 75709 | 67  | 
m != -1 && limit > out.size  | 
68  | 
}) ()  | 
|
| 64004 | 69  | 
|
70  | 
new Bytes(out.toByteArray, 0, out.size)  | 
|
| 54440 | 71  | 
}  | 
| 
64001
 
7ecb22be8f03
more general read_stream: return actual byte count;
 
wenzelm 
parents: 
63779 
diff
changeset
 | 
72  | 
|
| 77717 | 73  | 
def read_url(name: String): Bytes = using(Url(name).openStream)(read_stream(_))  | 
74  | 
||
| 78953 | 75  | 
  def read_file(path: Path, offset: Long = 0L, limit: Long = Long.MaxValue): Bytes = {
 | 
| 78956 | 76  | 
val length = File.size(path)  | 
| 
77711
 
25fd62cba347
clarified signature: more general operation Bytes.read_slice;
 
wenzelm 
parents: 
76361 
diff
changeset
 | 
77  | 
val start = offset.max(0L)  | 
| 78953 | 78  | 
val len = (length - start).max(0L).min(limit)  | 
| 78243 | 79  | 
    if (len > Int.MaxValue) error("Cannot read large file slice: " + Space.bytes(len).print)
 | 
| 
77711
 
25fd62cba347
clarified signature: more general operation Bytes.read_slice;
 
wenzelm 
parents: 
76361 
diff
changeset
 | 
80  | 
else if (len == 0L) empty  | 
| 
 
25fd62cba347
clarified signature: more general operation Bytes.read_slice;
 
wenzelm 
parents: 
76361 
diff
changeset
 | 
81  | 
    else {
 | 
| 78953 | 82  | 
      using(FileChannel.open(path.java_path, StandardOpenOption.READ)) { java_path =>
 | 
| 77718 | 83  | 
java_path.position(start)  | 
| 
77711
 
25fd62cba347
clarified signature: more general operation Bytes.read_slice;
 
wenzelm 
parents: 
76361 
diff
changeset
 | 
84  | 
val n = len.toInt  | 
| 
 
25fd62cba347
clarified signature: more general operation Bytes.read_slice;
 
wenzelm 
parents: 
76361 
diff
changeset
 | 
85  | 
val buf = ByteBuffer.allocate(n)  | 
| 
 
25fd62cba347
clarified signature: more general operation Bytes.read_slice;
 
wenzelm 
parents: 
76361 
diff
changeset
 | 
86  | 
var i = 0  | 
| 
 
25fd62cba347
clarified signature: more general operation Bytes.read_slice;
 
wenzelm 
parents: 
76361 
diff
changeset
 | 
87  | 
var m = 0  | 
| 
 
25fd62cba347
clarified signature: more general operation Bytes.read_slice;
 
wenzelm 
parents: 
76361 
diff
changeset
 | 
88  | 
        while ({
 | 
| 77718 | 89  | 
m = java_path.read(buf)  | 
| 
77711
 
25fd62cba347
clarified signature: more general operation Bytes.read_slice;
 
wenzelm 
parents: 
76361 
diff
changeset
 | 
90  | 
if (m != -1) i += m  | 
| 
 
25fd62cba347
clarified signature: more general operation Bytes.read_slice;
 
wenzelm 
parents: 
76361 
diff
changeset
 | 
91  | 
m != -1 && n > i  | 
| 
 
25fd62cba347
clarified signature: more general operation Bytes.read_slice;
 
wenzelm 
parents: 
76361 
diff
changeset
 | 
92  | 
}) ()  | 
| 
 
25fd62cba347
clarified signature: more general operation Bytes.read_slice;
 
wenzelm 
parents: 
76361 
diff
changeset
 | 
93  | 
new Bytes(buf.array, 0, i)  | 
| 
 
25fd62cba347
clarified signature: more general operation Bytes.read_slice;
 
wenzelm 
parents: 
76361 
diff
changeset
 | 
94  | 
}  | 
| 
 
25fd62cba347
clarified signature: more general operation Bytes.read_slice;
 
wenzelm 
parents: 
76361 
diff
changeset
 | 
95  | 
}  | 
| 
 
25fd62cba347
clarified signature: more general operation Bytes.read_slice;
 
wenzelm 
parents: 
76361 
diff
changeset
 | 
96  | 
}  | 
| 
 
25fd62cba347
clarified signature: more general operation Bytes.read_slice;
 
wenzelm 
parents: 
76361 
diff
changeset
 | 
97  | 
|
| 78953 | 98  | 
def read(path: Path): Bytes = read_file(path)  | 
99  | 
def read(file: JFile): Bytes = read_file(File.path(file))  | 
|
| 77718 | 100  | 
|
| 64229 | 101  | 
|
102  | 
/* write */  | 
|
103  | 
||
| 
69393
 
ed0824ef337e
static type for Library.using: avoid Java 11 warnings on "Illegal reflective access";
 
wenzelm 
parents: 
69365 
diff
changeset
 | 
104  | 
def write(file: JFile, bytes: Bytes): Unit =  | 
| 
 
ed0824ef337e
static type for Library.using: avoid Java 11 warnings on "Illegal reflective access";
 
wenzelm 
parents: 
69365 
diff
changeset
 | 
105  | 
using(new FileOutputStream(file))(bytes.write_stream(_))  | 
| 64229 | 106  | 
|
107  | 
def write(path: Path, bytes: Bytes): Unit = write(path.file, bytes)  | 
|
| 78194 | 108  | 
|
109  | 
||
110  | 
/* append */  | 
|
111  | 
||
112  | 
def append(file: JFile, bytes: Bytes): Unit =  | 
|
113  | 
using(new FileOutputStream(file, true))(bytes.write_stream(_))  | 
|
114  | 
||
115  | 
def append(path: Path, bytes: Bytes): Unit = append(path.file, bytes)  | 
|
| 54439 | 116  | 
}  | 
117  | 
||
118  | 
final class Bytes private(  | 
|
119  | 
protected val bytes: Array[Byte],  | 
|
120  | 
protected val offset: Int,  | 
|
| 75393 | 121  | 
  val length: Int) extends CharSequence {
 | 
| 54439 | 122  | 
/* equality */  | 
123  | 
||
| 75393 | 124  | 
  override def equals(that: Any): Boolean = {
 | 
| 54440 | 125  | 
    that match {
 | 
126  | 
case other: Bytes =>  | 
|
| 
77712
 
dd4bb80dbc3a
tuned performance: much faster low-level operation;
 
wenzelm 
parents: 
77711 
diff
changeset
 | 
127  | 
this.eq(other) ||  | 
| 
 
dd4bb80dbc3a
tuned performance: much faster low-level operation;
 
wenzelm 
parents: 
77711 
diff
changeset
 | 
128  | 
Arrays.equals(bytes, offset, offset + length,  | 
| 
 
dd4bb80dbc3a
tuned performance: much faster low-level operation;
 
wenzelm 
parents: 
77711 
diff
changeset
 | 
129  | 
other.bytes, other.offset, other.offset + other.length)  | 
| 54440 | 130  | 
case _ => false  | 
131  | 
}  | 
|
132  | 
}  | 
|
133  | 
||
| 75393 | 134  | 
  private lazy val hash: Int = {
 | 
| 54439 | 135  | 
var h = 0  | 
136  | 
    for (i <- offset until offset + length) {
 | 
|
137  | 
val b = bytes(i).asInstanceOf[Int] & 0xFF  | 
|
138  | 
h = 31 * h + b  | 
|
139  | 
}  | 
|
140  | 
h  | 
|
141  | 
}  | 
|
142  | 
||
143  | 
override def hashCode(): Int = hash  | 
|
144  | 
||
145  | 
||
146  | 
/* content */  | 
|
147  | 
||
| 54512 | 148  | 
lazy val sha1_digest: SHA1.Digest = SHA1.digest(bytes)  | 
| 54440 | 149  | 
|
| 69448 | 150  | 
def is_empty: Boolean = length == 0  | 
151  | 
||
152  | 
def iterator: Iterator[Byte] =  | 
|
153  | 
for (i <- (offset until (offset + length)).iterator)  | 
|
154  | 
yield bytes(i)  | 
|
155  | 
||
| 75393 | 156  | 
  def array: Array[Byte] = {
 | 
| 
69365
 
c5b3860d29ef
avoid loading of font file, to eliminate "Illegal reflective access by com.lowagie.text.pdf.MappedRandomAccessFile$1 (iText-2.1.5.jar) to method java.nio.DirectByteBuffer.cleaner()" -- due to com.lowagie.text.pdf.TrueTypeFont.process() / RandomAccessFileOrArray;
 
wenzelm 
parents: 
68167 
diff
changeset
 | 
157  | 
val a = new Array[Byte](length)  | 
| 
 
c5b3860d29ef
avoid loading of font file, to eliminate "Illegal reflective access by com.lowagie.text.pdf.MappedRandomAccessFile$1 (iText-2.1.5.jar) to method java.nio.DirectByteBuffer.cleaner()" -- due to com.lowagie.text.pdf.TrueTypeFont.process() / RandomAccessFileOrArray;
 
wenzelm 
parents: 
68167 
diff
changeset
 | 
158  | 
System.arraycopy(bytes, offset, a, 0, length)  | 
| 
 
c5b3860d29ef
avoid loading of font file, to eliminate "Illegal reflective access by com.lowagie.text.pdf.MappedRandomAccessFile$1 (iText-2.1.5.jar) to method java.nio.DirectByteBuffer.cleaner()" -- due to com.lowagie.text.pdf.TrueTypeFont.process() / RandomAccessFileOrArray;
 
wenzelm 
parents: 
68167 
diff
changeset
 | 
159  | 
a  | 
| 
 
c5b3860d29ef
avoid loading of font file, to eliminate "Illegal reflective access by com.lowagie.text.pdf.MappedRandomAccessFile$1 (iText-2.1.5.jar) to method java.nio.DirectByteBuffer.cleaner()" -- due to com.lowagie.text.pdf.TrueTypeFont.process() / RandomAccessFileOrArray;
 
wenzelm 
parents: 
68167 
diff
changeset
 | 
160  | 
}  | 
| 
 
c5b3860d29ef
avoid loading of font file, to eliminate "Illegal reflective access by com.lowagie.text.pdf.MappedRandomAccessFile$1 (iText-2.1.5.jar) to method java.nio.DirectByteBuffer.cleaner()" -- due to com.lowagie.text.pdf.TrueTypeFont.process() / RandomAccessFileOrArray;
 
wenzelm 
parents: 
68167 
diff
changeset
 | 
161  | 
|
| 
73561
 
c83152933579
clarified signature: Bytes extends CharSequence already (see d201996f72a8);
 
wenzelm 
parents: 
73559 
diff
changeset
 | 
162  | 
def text: String = UTF8.decode_permissive(this)  | 
| 
65279
 
fa62e095d8f1
clarified signature (again, see also 3ed43cfc8b14);
 
wenzelm 
parents: 
65070 
diff
changeset
 | 
163  | 
|
| 76236 | 164  | 
  def wellformed_text: Option[String] = {
 | 
165  | 
val s = text  | 
|
166  | 
if (this == Bytes(s)) Some(s) else None  | 
|
167  | 
}  | 
|
168  | 
||
| 75587 | 169  | 
  def encode_base64: String = {
 | 
| 68094 | 170  | 
val b =  | 
171  | 
if (offset == 0 && length == bytes.length) bytes  | 
|
172  | 
else Bytes(bytes, offset, length).bytes  | 
|
| 75620 | 173  | 
Base64.encode(b)  | 
| 68094 | 174  | 
}  | 
175  | 
||
| 76236 | 176  | 
def maybe_encode_base64: (Boolean, String) =  | 
177  | 
    wellformed_text match {
 | 
|
178  | 
case Some(s) => (false, s)  | 
|
179  | 
case None => (true, encode_base64)  | 
|
180  | 
}  | 
|
| 68106 | 181  | 
|
| 77716 | 182  | 
override def toString: String =  | 
183  | 
    if (is_empty) "Bytes.empty" else "Bytes(" + Space.bytes(length).print + ")"
 | 
|
| 54439 | 184  | 
|
| 72885 | 185  | 
def proper: Option[Bytes] = if (is_empty) None else Some(this)  | 
186  | 
def proper_text: Option[String] = if (is_empty) None else Some(text)  | 
|
| 65630 | 187  | 
|
| 54439 | 188  | 
def +(other: Bytes): Bytes =  | 
| 72885 | 189  | 
if (other.is_empty) this  | 
190  | 
else if (is_empty) other  | 
|
| 54439 | 191  | 
    else {
 | 
192  | 
val new_bytes = new Array[Byte](length + other.length)  | 
|
| 55618 | 193  | 
System.arraycopy(bytes, offset, new_bytes, 0, length)  | 
194  | 
System.arraycopy(other.bytes, other.offset, new_bytes, length, other.length)  | 
|
| 54439 | 195  | 
new Bytes(new_bytes, 0, new_bytes.length)  | 
196  | 
}  | 
|
| 54440 | 197  | 
|
198  | 
||
| 60833 | 199  | 
/* CharSequence operations */  | 
200  | 
||
201  | 
def charAt(i: Int): Char =  | 
|
202  | 
if (0 <= i && i < length) (bytes(offset + i).asInstanceOf[Int] & 0xFF).asInstanceOf[Char]  | 
|
203  | 
else throw new IndexOutOfBoundsException  | 
|
204  | 
||
| 75393 | 205  | 
  def subSequence(i: Int, j: Int): Bytes = {
 | 
| 60833 | 206  | 
if (0 <= i && i <= j && j <= length) new Bytes(bytes, offset + i, j - i)  | 
207  | 
else throw new IndexOutOfBoundsException  | 
|
208  | 
}  | 
|
209  | 
||
| 69448 | 210  | 
def trim_line: Bytes =  | 
| 77714 | 211  | 
    if (length >= 2 && charAt(length - 2) == 13 && charAt(length - 1) == 10) {
 | 
| 69448 | 212  | 
subSequence(0, length - 2)  | 
| 77714 | 213  | 
}  | 
214  | 
    else if (length >= 1 && (charAt(length - 1) == 13 || charAt(length - 1) == 10)) {
 | 
|
| 69448 | 215  | 
subSequence(0, length - 1)  | 
| 77714 | 216  | 
}  | 
| 69448 | 217  | 
else this  | 
218  | 
||
| 60833 | 219  | 
|
| 64004 | 220  | 
/* streams */  | 
221  | 
||
222  | 
def stream(): ByteArrayInputStream = new ByteArrayInputStream(bytes, offset, length)  | 
|
223  | 
||
224  | 
def write_stream(stream: OutputStream): Unit = stream.write(bytes, offset, length)  | 
|
225  | 
||
226  | 
||
| 
76351
 
2cee31cd92f0
generic support for XZ and Zstd compression in Isabelle/Scala;
 
wenzelm 
parents: 
76350 
diff
changeset
 | 
227  | 
/* XZ / Zstd data compression */  | 
| 
 
2cee31cd92f0
generic support for XZ and Zstd compression in Isabelle/Scala;
 
wenzelm 
parents: 
76350 
diff
changeset
 | 
228  | 
|
| 76358 | 229  | 
def detect_xz: Boolean =  | 
| 
76351
 
2cee31cd92f0
generic support for XZ and Zstd compression in Isabelle/Scala;
 
wenzelm 
parents: 
76350 
diff
changeset
 | 
230  | 
length >= 6 &&  | 
| 
 
2cee31cd92f0
generic support for XZ and Zstd compression in Isabelle/Scala;
 
wenzelm 
parents: 
76350 
diff
changeset
 | 
231  | 
bytes(offset) == 0xFD.toByte &&  | 
| 
 
2cee31cd92f0
generic support for XZ and Zstd compression in Isabelle/Scala;
 
wenzelm 
parents: 
76350 
diff
changeset
 | 
232  | 
bytes(offset + 1) == 0x37.toByte &&  | 
| 
 
2cee31cd92f0
generic support for XZ and Zstd compression in Isabelle/Scala;
 
wenzelm 
parents: 
76350 
diff
changeset
 | 
233  | 
bytes(offset + 2) == 0x7A.toByte &&  | 
| 
 
2cee31cd92f0
generic support for XZ and Zstd compression in Isabelle/Scala;
 
wenzelm 
parents: 
76350 
diff
changeset
 | 
234  | 
bytes(offset + 3) == 0x58.toByte &&  | 
| 
 
2cee31cd92f0
generic support for XZ and Zstd compression in Isabelle/Scala;
 
wenzelm 
parents: 
76350 
diff
changeset
 | 
235  | 
bytes(offset + 4) == 0x5A.toByte &&  | 
| 
 
2cee31cd92f0
generic support for XZ and Zstd compression in Isabelle/Scala;
 
wenzelm 
parents: 
76350 
diff
changeset
 | 
236  | 
bytes(offset + 5) == 0x00.toByte  | 
| 
 
2cee31cd92f0
generic support for XZ and Zstd compression in Isabelle/Scala;
 
wenzelm 
parents: 
76350 
diff
changeset
 | 
237  | 
|
| 76358 | 238  | 
def detect_zstd: Boolean =  | 
| 
76351
 
2cee31cd92f0
generic support for XZ and Zstd compression in Isabelle/Scala;
 
wenzelm 
parents: 
76350 
diff
changeset
 | 
239  | 
length >= 4 &&  | 
| 
 
2cee31cd92f0
generic support for XZ and Zstd compression in Isabelle/Scala;
 
wenzelm 
parents: 
76350 
diff
changeset
 | 
240  | 
bytes(offset) == 0x28.toByte &&  | 
| 
 
2cee31cd92f0
generic support for XZ and Zstd compression in Isabelle/Scala;
 
wenzelm 
parents: 
76350 
diff
changeset
 | 
241  | 
bytes(offset + 1) == 0xB5.toByte &&  | 
| 
 
2cee31cd92f0
generic support for XZ and Zstd compression in Isabelle/Scala;
 
wenzelm 
parents: 
76350 
diff
changeset
 | 
242  | 
bytes(offset + 2) == 0x2F.toByte &&  | 
| 
 
2cee31cd92f0
generic support for XZ and Zstd compression in Isabelle/Scala;
 
wenzelm 
parents: 
76350 
diff
changeset
 | 
243  | 
bytes(offset + 3) == 0xFD.toByte  | 
| 
 
2cee31cd92f0
generic support for XZ and Zstd compression in Isabelle/Scala;
 
wenzelm 
parents: 
76350 
diff
changeset
 | 
244  | 
|
| 76358 | 245  | 
def uncompress_xz(cache: Compress.Cache = Compress.Cache.none): Bytes =  | 
246  | 
using(new xz.XZInputStream(stream(), cache.for_xz))(Bytes.read_stream(_, hint = length))  | 
|
247  | 
||
248  | 
  def uncompress_zstd(cache: Compress.Cache = Compress.Cache.none): Bytes = {
 | 
|
249  | 
Zstd.init()  | 
|
250  | 
val n = zstd.Zstd.decompressedSize(bytes, offset, length)  | 
|
| 78243 | 251  | 
    if (n > 0 && n < Int.MaxValue) {
 | 
| 76358 | 252  | 
Bytes(zstd.Zstd.decompress(array, n.toInt))  | 
253  | 
}  | 
|
254  | 
    else {
 | 
|
255  | 
using(new zstd.ZstdInputStream(stream(), cache.for_zstd))(Bytes.read_stream(_, hint = length))  | 
|
256  | 
}  | 
|
257  | 
}  | 
|
| 54440 | 258  | 
|
| 
76351
 
2cee31cd92f0
generic support for XZ and Zstd compression in Isabelle/Scala;
 
wenzelm 
parents: 
76350 
diff
changeset
 | 
259  | 
def uncompress(cache: Compress.Cache = Compress.Cache.none): Bytes =  | 
| 76358 | 260  | 
if (detect_xz) uncompress_xz(cache = cache)  | 
261  | 
else if (detect_zstd) uncompress_zstd(cache = cache)  | 
|
262  | 
    else error("Cannot detect compression scheme")
 | 
|
| 64004 | 263  | 
|
| 
76351
 
2cee31cd92f0
generic support for XZ and Zstd compression in Isabelle/Scala;
 
wenzelm 
parents: 
76350 
diff
changeset
 | 
264  | 
def compress(  | 
| 
 
2cee31cd92f0
generic support for XZ and Zstd compression in Isabelle/Scala;
 
wenzelm 
parents: 
76350 
diff
changeset
 | 
265  | 
options: Compress.Options = Compress.Options(),  | 
| 
 
2cee31cd92f0
generic support for XZ and Zstd compression in Isabelle/Scala;
 
wenzelm 
parents: 
76350 
diff
changeset
 | 
266  | 
cache: Compress.Cache = Compress.Cache.none  | 
| 
 
2cee31cd92f0
generic support for XZ and Zstd compression in Isabelle/Scala;
 
wenzelm 
parents: 
76350 
diff
changeset
 | 
267  | 
  ): Bytes = {
 | 
| 76358 | 268  | 
    options match {
 | 
269  | 
case options_xz: Compress.Options_XZ =>  | 
|
270  | 
val result = new ByteArrayOutputStream(length)  | 
|
271  | 
using(new xz.XZOutputStream(result, options_xz.make, cache.for_xz))(write_stream)  | 
|
272  | 
new Bytes(result.toByteArray, 0, result.size)  | 
|
273  | 
case options_zstd: Compress.Options_Zstd =>  | 
|
274  | 
Zstd.init()  | 
|
| 
76361
 
3b9f36ef7365
tuned: avoid redundant copy of potentially large array;
 
wenzelm 
parents: 
76358 
diff
changeset
 | 
275  | 
Bytes(zstd.Zstd.compress(if (offset == 0) bytes else array, options_zstd.level))  | 
| 76358 | 276  | 
}  | 
| 64004 | 277  | 
}  | 
| 68167 | 278  | 
|
| 75393 | 279  | 
def maybe_compress(  | 
| 
76351
 
2cee31cd92f0
generic support for XZ and Zstd compression in Isabelle/Scala;
 
wenzelm 
parents: 
76350 
diff
changeset
 | 
280  | 
options: Compress.Options = Compress.Options(),  | 
| 
 
2cee31cd92f0
generic support for XZ and Zstd compression in Isabelle/Scala;
 
wenzelm 
parents: 
76350 
diff
changeset
 | 
281  | 
cache: Compress.Cache = Compress.Cache.none  | 
| 75393 | 282  | 
  ) : (Boolean, Bytes) = {
 | 
| 68167 | 283  | 
val compressed = compress(options = options, cache = cache)  | 
284  | 
if (compressed.length < length) (true, compressed) else (false, this)  | 
|
285  | 
}  | 
|
| 54439 | 286  | 
}  |