isabelle: src/Pure/General/bytes.scala@8a2921053511 (annotated)

54439 621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	1	/* Title: Pure/General/bytes.scala
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	2	Author: Makarius
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	3
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	4	Immutable byte vectors versus UTF8 strings.
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	5	*/
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	6
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	7	package isabelle
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	8
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	9
78855 6fdcd6c8c97a prefer old-style import "=>"; wenzelm parents: 78243 diff changeset	10	import java.io.{ByteArrayInputStream, ByteArrayOutputStream, FileInputStream, FileOutputStream,
6fdcd6c8c97a prefer old-style import "=>"; wenzelm parents: 78243 diff changeset	11	InputStream, OutputStream, File => JFile}
65070 1222c010bff7 more operations; wenzelm parents: 64370 diff changeset	12	import java.net.URL
77711 25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	13	import java.nio.ByteBuffer
25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	14	import java.nio.channels.FileChannel
25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	15	import java.nio.file.StandardOpenOption
77712 dd4bb80dbc3a tuned performance: much faster low-level operation; wenzelm parents: 77711 diff changeset	16	import java.util.Arrays
76353 3698d0f3da18 clarified signature; wenzelm parents: 76351 diff changeset	17	import org.tukaani.xz
3698d0f3da18 clarified signature; wenzelm parents: 76351 diff changeset	18	import com.github.luben.zstd
54440 2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	19
2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	20
75393 87ebf5a50283 clarified formatting, for the sake of scala3; wenzelm parents: 75382 diff changeset	21	object Bytes {
54439 621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	22	val empty: Bytes = new Bytes(Array[Byte](), 0, 0)
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	23
75393 87ebf5a50283 clarified formatting, for the sake of scala3; wenzelm parents: 75382 diff changeset	24	def apply(s: CharSequence): Bytes = {
54439 621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	25	val str = s.toString
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	26	if (str.isEmpty) empty
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	27	else {
62527 aae9a2a855e0 tuned signature; wenzelm parents: 60833 diff changeset	28	val b = UTF8.bytes(str)
54442 c39972ddd672 more specific Protocol_Output: empty message.body, main content via bytes/text; wenzelm parents: 54440 diff changeset	29	new Bytes(b, 0, b.length)
54439 621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	30	}
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	31	}
54440 2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	32
63779 9da65bc75610 more operations; wenzelm parents: 62527 diff changeset	33	def apply(a: Array[Byte]): Bytes = apply(a, 0, a.length)
9da65bc75610 more operations; wenzelm parents: 62527 diff changeset	34
54442 c39972ddd672 more specific Protocol_Output: empty message.body, main content via bytes/text; wenzelm parents: 54440 diff changeset	35	def apply(a: Array[Byte], offset: Int, length: Int): Bytes =
c39972ddd672 more specific Protocol_Output: empty message.body, main content via bytes/text; wenzelm parents: 54440 diff changeset	36	if (length == 0) empty
c39972ddd672 more specific Protocol_Output: empty message.body, main content via bytes/text; wenzelm parents: 54440 diff changeset	37	else {
c39972ddd672 more specific Protocol_Output: empty message.body, main content via bytes/text; wenzelm parents: 54440 diff changeset	38	val b = new Array[Byte](length)
55618 995162143ef4 tuned imports; wenzelm parents: 54512 diff changeset	39	System.arraycopy(a, offset, b, 0, length)
54442 c39972ddd672 more specific Protocol_Output: empty message.body, main content via bytes/text; wenzelm parents: 54440 diff changeset	40	new Bytes(b, 0, b.length)
c39972ddd672 more specific Protocol_Output: empty message.body, main content via bytes/text; wenzelm parents: 54440 diff changeset	41	}
c39972ddd672 more specific Protocol_Output: empty message.body, main content via bytes/text; wenzelm parents: 54440 diff changeset	42
69454 ef051edd4d10 more uniform multi-language operations; wenzelm parents: 69448 diff changeset	43	val newline: Bytes = apply("\n")
54440 2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	44
73576 b50f8cc8c08e support for base64 via Isabelle/Scala/ML; wenzelm parents: 73561 diff changeset	45
b50f8cc8c08e support for base64 via Isabelle/Scala/ML; wenzelm parents: 73561 diff changeset	46	/* base64 */
b50f8cc8c08e support for base64 via Isabelle/Scala/ML; wenzelm parents: 73561 diff changeset	47
75587 79b4efd17d2b tuned signature; wenzelm parents: 75586 diff changeset	48	def decode_base64(s: String): Bytes = {
75620 44815dc2b8f9 clarified modules; wenzelm parents: 75588 diff changeset	49	val a = Base64.decode(s)
68108 2277fe496d78 more operations; wenzelm parents: 68106 diff changeset	50	new Bytes(a, 0, a.length)
2277fe496d78 more operations; wenzelm parents: 68106 diff changeset	51	}
2277fe496d78 more operations; wenzelm parents: 68106 diff changeset	52
75579 3362b6a5d697 support XZ compression in Isabelle/ML; wenzelm parents: 75393 diff changeset	53
54440 2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	54	/* read */
2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	55
78243 0e221a8128e4 tuned: prefer Scala over Java; wenzelm parents: 78194 diff changeset	56	def read_stream(stream: InputStream, limit: Int = Int.MaxValue, hint: Int = 1024): Bytes =
64004 b4ece7a3f2ca clarified stream operations; wenzelm parents: 64001 diff changeset	57	if (limit == 0) empty
b4ece7a3f2ca clarified stream operations; wenzelm parents: 64001 diff changeset	58	else {
78243 0e221a8128e4 tuned: prefer Scala over Java; wenzelm parents: 78194 diff changeset	59	val out_size = (if (limit == Int.MaxValue) hint else limit) max 1024
73414 7411d71b9fb8 more robust; wenzelm parents: 73024 diff changeset	60	val out = new ByteArrayOutputStream(out_size)
64005 f6e965cf1617 clarified magic values (see also java/io/BufferedInputStream.java); wenzelm parents: 64004 diff changeset	61	val buf = new Array[Byte](8192)
64004 b4ece7a3f2ca clarified stream operations; wenzelm parents: 64001 diff changeset	62	var m = 0
54440 2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	63
75709 a068fb7346ef clarified while-loops; wenzelm parents: 75620 diff changeset	64	while ({
73554 c973b5300025 tuned; wenzelm parents: 73414 diff changeset	65	m = stream.read(buf, 0, buf.length min (limit - out.size))
64004 b4ece7a3f2ca clarified stream operations; wenzelm parents: 64001 diff changeset	66	if (m != -1) out.write(buf, 0, m)
75709 a068fb7346ef clarified while-loops; wenzelm parents: 75620 diff changeset	67	m != -1 && limit > out.size
a068fb7346ef clarified while-loops; wenzelm parents: 75620 diff changeset	68	}) ()
64004 b4ece7a3f2ca clarified stream operations; wenzelm parents: 64001 diff changeset	69
b4ece7a3f2ca clarified stream operations; wenzelm parents: 64001 diff changeset	70	new Bytes(out.toByteArray, 0, out.size)
54440 2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	71	}
64001 7ecb22be8f03 more general read_stream: return actual byte count; wenzelm parents: 63779 diff changeset	72
77717 6a2daddc238c tuned signature; wenzelm parents: 77716 diff changeset	73	def read_url(name: String): Bytes = using(Url(name).openStream)(read_stream(_))
6a2daddc238c tuned signature; wenzelm parents: 77716 diff changeset	74
78953 b6116a86d2ac clarified signature; wenzelm parents: 78855 diff changeset	75	def read_file(path: Path, offset: Long = 0L, limit: Long = Long.MaxValue): Bytes = {
78956 12abaffb0346 tuned signature: more operations; wenzelm parents: 78954 diff changeset	76	val length = File.size(path)
77711 25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	77	val start = offset.max(0L)
78953 b6116a86d2ac clarified signature; wenzelm parents: 78855 diff changeset	78	val len = (length - start).max(0L).min(limit)
78243 0e221a8128e4 tuned: prefer Scala over Java; wenzelm parents: 78194 diff changeset	79	if (len > Int.MaxValue) error("Cannot read large file slice: " + Space.bytes(len).print)
77711 25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	80	else if (len == 0L) empty
25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	81	else {
78953 b6116a86d2ac clarified signature; wenzelm parents: 78855 diff changeset	82	using(FileChannel.open(path.java_path, StandardOpenOption.READ)) { java_path =>
77718 6ad3a412ed97 clarified signature; wenzelm parents: 77717 diff changeset	83	java_path.position(start)
77711 25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	84	val n = len.toInt
25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	85	val buf = ByteBuffer.allocate(n)
25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	86	var i = 0
25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	87	var m = 0
25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	88	while ({
77718 6ad3a412ed97 clarified signature; wenzelm parents: 77717 diff changeset	89	m = java_path.read(buf)
77711 25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	90	if (m != -1) i += m
25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	91	m != -1 && n > i
25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	92	}) ()
25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	93	new Bytes(buf.array, 0, i)
25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	94	}
25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	95	}
25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	96	}
25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	97
78953 b6116a86d2ac clarified signature; wenzelm parents: 78855 diff changeset	98	def read(path: Path): Bytes = read_file(path)
b6116a86d2ac clarified signature; wenzelm parents: 78855 diff changeset	99	def read(file: JFile): Bytes = read_file(File.path(file))
77718 6ad3a412ed97 clarified signature; wenzelm parents: 77717 diff changeset	100
64229 12aa3980f65c more operations; wenzelm parents: 64224 diff changeset	101
12aa3980f65c more operations; wenzelm parents: 64224 diff changeset	102	/* write */
12aa3980f65c more operations; wenzelm parents: 64224 diff changeset	103
69393 ed0824ef337e static type for Library.using: avoid Java 11 warnings on "Illegal reflective access"; wenzelm parents: 69365 diff changeset	104	def write(file: JFile, bytes: Bytes): Unit =
ed0824ef337e static type for Library.using: avoid Java 11 warnings on "Illegal reflective access"; wenzelm parents: 69365 diff changeset	105	using(new FileOutputStream(file))(bytes.write_stream(_))
64229 12aa3980f65c more operations; wenzelm parents: 64224 diff changeset	106
12aa3980f65c more operations; wenzelm parents: 64224 diff changeset	107	def write(path: Path, bytes: Bytes): Unit = write(path.file, bytes)
78194 da721ba809a4 more operations; wenzelm parents: 77718 diff changeset	108
da721ba809a4 more operations; wenzelm parents: 77718 diff changeset	109
da721ba809a4 more operations; wenzelm parents: 77718 diff changeset	110	/* append */
da721ba809a4 more operations; wenzelm parents: 77718 diff changeset	111
da721ba809a4 more operations; wenzelm parents: 77718 diff changeset	112	def append(file: JFile, bytes: Bytes): Unit =
da721ba809a4 more operations; wenzelm parents: 77718 diff changeset	113	using(new FileOutputStream(file, true))(bytes.write_stream(_))
da721ba809a4 more operations; wenzelm parents: 77718 diff changeset	114
da721ba809a4 more operations; wenzelm parents: 77718 diff changeset	115	def append(path: Path, bytes: Bytes): Unit = append(path.file, bytes)
54439 621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	116	}
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	117
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	118	final class Bytes private(
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	119	protected val bytes: Array[Byte],
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	120	protected val offset: Int,
75393 87ebf5a50283 clarified formatting, for the sake of scala3; wenzelm parents: 75382 diff changeset	121	val length: Int) extends CharSequence {
54439 621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	122	/* equality */
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	123
75393 87ebf5a50283 clarified formatting, for the sake of scala3; wenzelm parents: 75382 diff changeset	124	override def equals(that: Any): Boolean = {
54440 2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	125	that match {
2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	126	case other: Bytes =>
77712 dd4bb80dbc3a tuned performance: much faster low-level operation; wenzelm parents: 77711 diff changeset	127	this.eq(other) \|\|
dd4bb80dbc3a tuned performance: much faster low-level operation; wenzelm parents: 77711 diff changeset	128	Arrays.equals(bytes, offset, offset + length,
dd4bb80dbc3a tuned performance: much faster low-level operation; wenzelm parents: 77711 diff changeset	129	other.bytes, other.offset, other.offset + other.length)
54440 2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	130	case _ => false
2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	131	}
2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	132	}
2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	133
75393 87ebf5a50283 clarified formatting, for the sake of scala3; wenzelm parents: 75382 diff changeset	134	private lazy val hash: Int = {
54439 621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	135	var h = 0
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	136	for (i <- offset until offset + length) {
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	137	val b = bytes(i).asInstanceOf[Int] & 0xFF
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	138	h = 31 * h + b
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	139	}
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	140	h
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	141	}
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	142
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	143	override def hashCode(): Int = hash
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	144
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	145
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	146	/* content */
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	147
54512 7a92ed889da4 persistent value; wenzelm parents: 54444 diff changeset	148	lazy val sha1_digest: SHA1.Digest = SHA1.digest(bytes)
54440 2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	149
69448 51e696887b81 more uniform multi-language operations; wenzelm parents: 69393 diff changeset	150	def is_empty: Boolean = length == 0
51e696887b81 more uniform multi-language operations; wenzelm parents: 69393 diff changeset	151
51e696887b81 more uniform multi-language operations; wenzelm parents: 69393 diff changeset	152	def iterator: Iterator[Byte] =
51e696887b81 more uniform multi-language operations; wenzelm parents: 69393 diff changeset	153	for (i <- (offset until (offset + length)).iterator)
51e696887b81 more uniform multi-language operations; wenzelm parents: 69393 diff changeset	154	yield bytes(i)
51e696887b81 more uniform multi-language operations; wenzelm parents: 69393 diff changeset	155
75393 87ebf5a50283 clarified formatting, for the sake of scala3; wenzelm parents: 75382 diff changeset	156	def array: Array[Byte] = {
69365 c5b3860d29ef avoid loading of font file, to eliminate "Illegal reflective access by com.lowagie.text.pdf.MappedRandomAccessFile$1 (iText-2.1.5.jar) to method java.nio.DirectByteBuffer.cleaner()" -- due to com.lowagie.text.pdf.TrueTypeFont.process() / RandomAccessFileOrArray; wenzelm parents: 68167 diff changeset	157	val a = new Array[Byte](length)
c5b3860d29ef avoid loading of font file, to eliminate "Illegal reflective access by com.lowagie.text.pdf.MappedRandomAccessFile$1 (iText-2.1.5.jar) to method java.nio.DirectByteBuffer.cleaner()" -- due to com.lowagie.text.pdf.TrueTypeFont.process() / RandomAccessFileOrArray; wenzelm parents: 68167 diff changeset	158	System.arraycopy(bytes, offset, a, 0, length)
c5b3860d29ef avoid loading of font file, to eliminate "Illegal reflective access by com.lowagie.text.pdf.MappedRandomAccessFile$1 (iText-2.1.5.jar) to method java.nio.DirectByteBuffer.cleaner()" -- due to com.lowagie.text.pdf.TrueTypeFont.process() / RandomAccessFileOrArray; wenzelm parents: 68167 diff changeset	159	a
c5b3860d29ef avoid loading of font file, to eliminate "Illegal reflective access by com.lowagie.text.pdf.MappedRandomAccessFile$1 (iText-2.1.5.jar) to method java.nio.DirectByteBuffer.cleaner()" -- due to com.lowagie.text.pdf.TrueTypeFont.process() / RandomAccessFileOrArray; wenzelm parents: 68167 diff changeset	160	}
c5b3860d29ef avoid loading of font file, to eliminate "Illegal reflective access by com.lowagie.text.pdf.MappedRandomAccessFile$1 (iText-2.1.5.jar) to method java.nio.DirectByteBuffer.cleaner()" -- due to com.lowagie.text.pdf.TrueTypeFont.process() / RandomAccessFileOrArray; wenzelm parents: 68167 diff changeset	161
73561 c83152933579 clarified signature: Bytes extends CharSequence already (see d201996f72a8); wenzelm parents: 73559 diff changeset	162	def text: String = UTF8.decode_permissive(this)
65279 fa62e095d8f1 clarified signature (again, see also 3ed43cfc8b14); wenzelm parents: 65070 diff changeset	163
76236 03dd2f19f1d7 clarified signature: more operations; wenzelm parents: 75709 diff changeset	164	def wellformed_text: Option[String] = {
03dd2f19f1d7 clarified signature: more operations; wenzelm parents: 75709 diff changeset	165	val s = text
03dd2f19f1d7 clarified signature: more operations; wenzelm parents: 75709 diff changeset	166	if (this == Bytes(s)) Some(s) else None
03dd2f19f1d7 clarified signature: more operations; wenzelm parents: 75709 diff changeset	167	}
03dd2f19f1d7 clarified signature: more operations; wenzelm parents: 75709 diff changeset	168
75587 79b4efd17d2b tuned signature; wenzelm parents: 75586 diff changeset	169	def encode_base64: String = {
68094 0b66aca9c965 more operations; wenzelm parents: 68087 diff changeset	170	val b =
0b66aca9c965 more operations; wenzelm parents: 68087 diff changeset	171	if (offset == 0 && length == bytes.length) bytes
0b66aca9c965 more operations; wenzelm parents: 68087 diff changeset	172	else Bytes(bytes, offset, length).bytes
75620 44815dc2b8f9 clarified modules; wenzelm parents: 75588 diff changeset	173	Base64.encode(b)
68094 0b66aca9c965 more operations; wenzelm parents: 68087 diff changeset	174	}
0b66aca9c965 more operations; wenzelm parents: 68087 diff changeset	175
76236 03dd2f19f1d7 clarified signature: more operations; wenzelm parents: 75709 diff changeset	176	def maybe_encode_base64: (Boolean, String) =
03dd2f19f1d7 clarified signature: more operations; wenzelm parents: 75709 diff changeset	177	wellformed_text match {
03dd2f19f1d7 clarified signature: more operations; wenzelm parents: 75709 diff changeset	178	case Some(s) => (false, s)
03dd2f19f1d7 clarified signature: more operations; wenzelm parents: 75709 diff changeset	179	case None => (true, encode_base64)
03dd2f19f1d7 clarified signature: more operations; wenzelm parents: 75709 diff changeset	180	}
68106 a514e29db980 return exports as result for Isabelle server; wenzelm parents: 68094 diff changeset	181
77716 3f4163b83d4f tuned output; wenzelm parents: 77714 diff changeset	182	override def toString: String =
3f4163b83d4f tuned output; wenzelm parents: 77714 diff changeset	183	if (is_empty) "Bytes.empty" else "Bytes(" + Space.bytes(length).print + ")"
54439 621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	184
72885 1b0f81e556a2 accommodate OpenJDK 15; wenzelm parents: 71152 diff changeset	185	def proper: Option[Bytes] = if (is_empty) None else Some(this)
1b0f81e556a2 accommodate OpenJDK 15; wenzelm parents: 71152 diff changeset	186	def proper_text: Option[String] = if (is_empty) None else Some(text)
65630 c41bbf657310 more operations; wenzelm parents: 65279 diff changeset	187
54439 621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	188	def +(other: Bytes): Bytes =
72885 1b0f81e556a2 accommodate OpenJDK 15; wenzelm parents: 71152 diff changeset	189	if (other.is_empty) this
1b0f81e556a2 accommodate OpenJDK 15; wenzelm parents: 71152 diff changeset	190	else if (is_empty) other
54439 621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	191	else {
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	192	val new_bytes = new Array[Byte](length + other.length)
55618 995162143ef4 tuned imports; wenzelm parents: 54512 diff changeset	193	System.arraycopy(bytes, offset, new_bytes, 0, length)
995162143ef4 tuned imports; wenzelm parents: 54512 diff changeset	194	System.arraycopy(other.bytes, other.offset, new_bytes, length, other.length)
54439 621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	195	new Bytes(new_bytes, 0, new_bytes.length)
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	196	}
54440 2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	197
2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	198
60833 d201996f72a8 provide CharSequence operations as well; wenzelm parents: 55618 diff changeset	199	/* CharSequence operations */
d201996f72a8 provide CharSequence operations as well; wenzelm parents: 55618 diff changeset	200
d201996f72a8 provide CharSequence operations as well; wenzelm parents: 55618 diff changeset	201	def charAt(i: Int): Char =
d201996f72a8 provide CharSequence operations as well; wenzelm parents: 55618 diff changeset	202	if (0 <= i && i < length) (bytes(offset + i).asInstanceOf[Int] & 0xFF).asInstanceOf[Char]
d201996f72a8 provide CharSequence operations as well; wenzelm parents: 55618 diff changeset	203	else throw new IndexOutOfBoundsException
d201996f72a8 provide CharSequence operations as well; wenzelm parents: 55618 diff changeset	204
75393 87ebf5a50283 clarified formatting, for the sake of scala3; wenzelm parents: 75382 diff changeset	205	def subSequence(i: Int, j: Int): Bytes = {
60833 d201996f72a8 provide CharSequence operations as well; wenzelm parents: 55618 diff changeset	206	if (0 <= i && i <= j && j <= length) new Bytes(bytes, offset + i, j - i)
d201996f72a8 provide CharSequence operations as well; wenzelm parents: 55618 diff changeset	207	else throw new IndexOutOfBoundsException
d201996f72a8 provide CharSequence operations as well; wenzelm parents: 55618 diff changeset	208	}
d201996f72a8 provide CharSequence operations as well; wenzelm parents: 55618 diff changeset	209
69448 51e696887b81 more uniform multi-language operations; wenzelm parents: 69393 diff changeset	210	def trim_line: Bytes =
77714 be0b9396604e tuned; wenzelm parents: 77713 diff changeset	211	if (length >= 2 && charAt(length - 2) == 13 && charAt(length - 1) == 10) {
69448 51e696887b81 more uniform multi-language operations; wenzelm parents: 69393 diff changeset	212	subSequence(0, length - 2)
77714 be0b9396604e tuned; wenzelm parents: 77713 diff changeset	213	}
be0b9396604e tuned; wenzelm parents: 77713 diff changeset	214	else if (length >= 1 && (charAt(length - 1) == 13 \|\| charAt(length - 1) == 10)) {
69448 51e696887b81 more uniform multi-language operations; wenzelm parents: 69393 diff changeset	215	subSequence(0, length - 1)
77714 be0b9396604e tuned; wenzelm parents: 77713 diff changeset	216	}
69448 51e696887b81 more uniform multi-language operations; wenzelm parents: 69393 diff changeset	217	else this
51e696887b81 more uniform multi-language operations; wenzelm parents: 69393 diff changeset	218
60833 d201996f72a8 provide CharSequence operations as well; wenzelm parents: 55618 diff changeset	219
64004 b4ece7a3f2ca clarified stream operations; wenzelm parents: 64001 diff changeset	220	/* streams */
b4ece7a3f2ca clarified stream operations; wenzelm parents: 64001 diff changeset	221
b4ece7a3f2ca clarified stream operations; wenzelm parents: 64001 diff changeset	222	def stream(): ByteArrayInputStream = new ByteArrayInputStream(bytes, offset, length)
b4ece7a3f2ca clarified stream operations; wenzelm parents: 64001 diff changeset	223
b4ece7a3f2ca clarified stream operations; wenzelm parents: 64001 diff changeset	224	def write_stream(stream: OutputStream): Unit = stream.write(bytes, offset, length)
b4ece7a3f2ca clarified stream operations; wenzelm parents: 64001 diff changeset	225
b4ece7a3f2ca clarified stream operations; wenzelm parents: 64001 diff changeset	226
76351 2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	227	/* XZ / Zstd data compression */
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	228
76358 cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	229	def detect_xz: Boolean =
76351 2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	230	length >= 6 &&
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	231	bytes(offset) == 0xFD.toByte &&
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	232	bytes(offset + 1) == 0x37.toByte &&
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	233	bytes(offset + 2) == 0x7A.toByte &&
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	234	bytes(offset + 3) == 0x58.toByte &&
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	235	bytes(offset + 4) == 0x5A.toByte &&
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	236	bytes(offset + 5) == 0x00.toByte
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	237
76358 cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	238	def detect_zstd: Boolean =
76351 2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	239	length >= 4 &&
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	240	bytes(offset) == 0x28.toByte &&
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	241	bytes(offset + 1) == 0xB5.toByte &&
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	242	bytes(offset + 2) == 0x2F.toByte &&
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	243	bytes(offset + 3) == 0xFD.toByte
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	244
76358 cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	245	def uncompress_xz(cache: Compress.Cache = Compress.Cache.none): Bytes =
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	246	using(new xz.XZInputStream(stream(), cache.for_xz))(Bytes.read_stream(_, hint = length))
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	247
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	248	def uncompress_zstd(cache: Compress.Cache = Compress.Cache.none): Bytes = {
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	249	Zstd.init()
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	250	val n = zstd.Zstd.decompressedSize(bytes, offset, length)
78243 0e221a8128e4 tuned: prefer Scala over Java; wenzelm parents: 78194 diff changeset	251	if (n > 0 && n < Int.MaxValue) {
76358 cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	252	Bytes(zstd.Zstd.decompress(array, n.toInt))
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	253	}
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	254	else {
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	255	using(new zstd.ZstdInputStream(stream(), cache.for_zstd))(Bytes.read_stream(_, hint = length))
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	256	}
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	257	}
54440 2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	258
76351 2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	259	def uncompress(cache: Compress.Cache = Compress.Cache.none): Bytes =
76358 cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	260	if (detect_xz) uncompress_xz(cache = cache)
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	261	else if (detect_zstd) uncompress_zstd(cache = cache)
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	262	else error("Cannot detect compression scheme")
64004 b4ece7a3f2ca clarified stream operations; wenzelm parents: 64001 diff changeset	263
76351 2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	264	def compress(
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	265	options: Compress.Options = Compress.Options(),
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	266	cache: Compress.Cache = Compress.Cache.none
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	267	): Bytes = {
76358 cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	268	options match {
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	269	case options_xz: Compress.Options_XZ =>
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	270	val result = new ByteArrayOutputStream(length)
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	271	using(new xz.XZOutputStream(result, options_xz.make, cache.for_xz))(write_stream)
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	272	new Bytes(result.toByteArray, 0, result.size)
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	273	case options_zstd: Compress.Options_Zstd =>
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	274	Zstd.init()
76361 3b9f36ef7365 tuned: avoid redundant copy of potentially large array; wenzelm parents: 76358 diff changeset	275	Bytes(zstd.Zstd.compress(if (offset == 0) bytes else array, options_zstd.level))
76358 cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	276	}
64004 b4ece7a3f2ca clarified stream operations; wenzelm parents: 64001 diff changeset	277	}
68167 327bb0f5f768 clarified implicit compression; wenzelm parents: 68150 diff changeset	278
75393 87ebf5a50283 clarified formatting, for the sake of scala3; wenzelm parents: 75382 diff changeset	279	def maybe_compress(
76351 2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	280	options: Compress.Options = Compress.Options(),
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	281	cache: Compress.Cache = Compress.Cache.none
75393 87ebf5a50283 clarified formatting, for the sake of scala3; wenzelm parents: 75382 diff changeset	282	) : (Boolean, Bytes) = {
68167 327bb0f5f768 clarified implicit compression; wenzelm parents: 68150 diff changeset	283	val compressed = compress(options = options, cache = cache)
327bb0f5f768 clarified implicit compression; wenzelm parents: 68150 diff changeset	284	if (compressed.length < length) (true, compressed) else (false, this)
327bb0f5f768 clarified implicit compression; wenzelm parents: 68150 diff changeset	285	}
54439 621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	286	}

author	wenzelm
	Thu, 21 Dec 2023 11:58:19 +0100
changeset 79326	8a2921053511
parent 78956	12abaffb0346
child 79509	e82448aacf48
permissions	-rw-r--r--