isabelle: src/Pure/General/bytes.scala@d8330439823a (annotated)

54439 621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	1	/* Title: Pure/General/bytes.scala
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	2	Author: Makarius
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	3
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	4	Immutable byte vectors versus UTF8 strings.
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	5	*/
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	6
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	7	package isabelle
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	8
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	9
78855 6fdcd6c8c97a prefer old-style import "=>"; wenzelm parents: 78243 diff changeset	10	import java.io.{ByteArrayInputStream, ByteArrayOutputStream, FileInputStream, FileOutputStream,
6fdcd6c8c97a prefer old-style import "=>"; wenzelm parents: 78243 diff changeset	11	InputStream, OutputStream, File => JFile}
77711 25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	12	import java.nio.ByteBuffer
25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	13	import java.nio.channels.FileChannel
25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	14	import java.nio.file.StandardOpenOption
77712 dd4bb80dbc3a tuned performance: much faster low-level operation; wenzelm parents: 77711 diff changeset	15	import java.util.Arrays
76353 3698d0f3da18 clarified signature; wenzelm parents: 76351 diff changeset	16	import org.tukaani.xz
3698d0f3da18 clarified signature; wenzelm parents: 76351 diff changeset	17	import com.github.luben.zstd
54440 2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	18
2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	19
75393 87ebf5a50283 clarified formatting, for the sake of scala3; wenzelm parents: 75382 diff changeset	20	object Bytes {
54439 621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	21	val empty: Bytes = new Bytes(Array[Byte](), 0, 0)
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	22
75393 87ebf5a50283 clarified formatting, for the sake of scala3; wenzelm parents: 75382 diff changeset	23	def apply(s: CharSequence): Bytes = {
54439 621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	24	val str = s.toString
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	25	if (str.isEmpty) empty
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	26	else {
62527 aae9a2a855e0 tuned signature; wenzelm parents: 60833 diff changeset	27	val b = UTF8.bytes(str)
54442 c39972ddd672 more specific Protocol_Output: empty message.body, main content via bytes/text; wenzelm parents: 54440 diff changeset	28	new Bytes(b, 0, b.length)
54439 621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	29	}
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	30	}
54440 2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	31
63779 9da65bc75610 more operations; wenzelm parents: 62527 diff changeset	32	def apply(a: Array[Byte]): Bytes = apply(a, 0, a.length)
9da65bc75610 more operations; wenzelm parents: 62527 diff changeset	33
54442 c39972ddd672 more specific Protocol_Output: empty message.body, main content via bytes/text; wenzelm parents: 54440 diff changeset	34	def apply(a: Array[Byte], offset: Int, length: Int): Bytes =
c39972ddd672 more specific Protocol_Output: empty message.body, main content via bytes/text; wenzelm parents: 54440 diff changeset	35	if (length == 0) empty
c39972ddd672 more specific Protocol_Output: empty message.body, main content via bytes/text; wenzelm parents: 54440 diff changeset	36	else {
c39972ddd672 more specific Protocol_Output: empty message.body, main content via bytes/text; wenzelm parents: 54440 diff changeset	37	val b = new Array[Byte](length)
55618 995162143ef4 tuned imports; wenzelm parents: 54512 diff changeset	38	System.arraycopy(a, offset, b, 0, length)
54442 c39972ddd672 more specific Protocol_Output: empty message.body, main content via bytes/text; wenzelm parents: 54440 diff changeset	39	new Bytes(b, 0, b.length)
c39972ddd672 more specific Protocol_Output: empty message.body, main content via bytes/text; wenzelm parents: 54440 diff changeset	40	}
c39972ddd672 more specific Protocol_Output: empty message.body, main content via bytes/text; wenzelm parents: 54440 diff changeset	41
69454 ef051edd4d10 more uniform multi-language operations; wenzelm parents: 69448 diff changeset	42	val newline: Bytes = apply("\n")
54440 2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	43
73576 b50f8cc8c08e support for base64 via Isabelle/Scala/ML; wenzelm parents: 73561 diff changeset	44
b50f8cc8c08e support for base64 via Isabelle/Scala/ML; wenzelm parents: 73561 diff changeset	45	/* base64 */
b50f8cc8c08e support for base64 via Isabelle/Scala/ML; wenzelm parents: 73561 diff changeset	46
75587 79b4efd17d2b tuned signature; wenzelm parents: 75586 diff changeset	47	def decode_base64(s: String): Bytes = {
75620 44815dc2b8f9 clarified modules; wenzelm parents: 75588 diff changeset	48	val a = Base64.decode(s)
68108 2277fe496d78 more operations; wenzelm parents: 68106 diff changeset	49	new Bytes(a, 0, a.length)
2277fe496d78 more operations; wenzelm parents: 68106 diff changeset	50	}
2277fe496d78 more operations; wenzelm parents: 68106 diff changeset	51
75579 3362b6a5d697 support XZ compression in Isabelle/ML; wenzelm parents: 75393 diff changeset	52
54440 2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	53	/* read */
2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	54
78243 0e221a8128e4 tuned: prefer Scala over Java; wenzelm parents: 78194 diff changeset	55	def read_stream(stream: InputStream, limit: Int = Int.MaxValue, hint: Int = 1024): Bytes =
64004 b4ece7a3f2ca clarified stream operations; wenzelm parents: 64001 diff changeset	56	if (limit == 0) empty
b4ece7a3f2ca clarified stream operations; wenzelm parents: 64001 diff changeset	57	else {
78243 0e221a8128e4 tuned: prefer Scala over Java; wenzelm parents: 78194 diff changeset	58	val out_size = (if (limit == Int.MaxValue) hint else limit) max 1024
73414 7411d71b9fb8 more robust; wenzelm parents: 73024 diff changeset	59	val out = new ByteArrayOutputStream(out_size)
64005 f6e965cf1617 clarified magic values (see also java/io/BufferedInputStream.java); wenzelm parents: 64004 diff changeset	60	val buf = new Array[Byte](8192)
64004 b4ece7a3f2ca clarified stream operations; wenzelm parents: 64001 diff changeset	61	var m = 0
54440 2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	62
75709 a068fb7346ef clarified while-loops; wenzelm parents: 75620 diff changeset	63	while ({
73554 c973b5300025 tuned; wenzelm parents: 73414 diff changeset	64	m = stream.read(buf, 0, buf.length min (limit - out.size))
64004 b4ece7a3f2ca clarified stream operations; wenzelm parents: 64001 diff changeset	65	if (m != -1) out.write(buf, 0, m)
75709 a068fb7346ef clarified while-loops; wenzelm parents: 75620 diff changeset	66	m != -1 && limit > out.size
a068fb7346ef clarified while-loops; wenzelm parents: 75620 diff changeset	67	}) ()
64004 b4ece7a3f2ca clarified stream operations; wenzelm parents: 64001 diff changeset	68
b4ece7a3f2ca clarified stream operations; wenzelm parents: 64001 diff changeset	69	new Bytes(out.toByteArray, 0, out.size)
54440 2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	70	}
64001 7ecb22be8f03 more general read_stream: return actual byte count; wenzelm parents: 63779 diff changeset	71
79510 d8330439823a clarified signature: explicit type isabelle.Url to avoid oddities of java.net.URL (e.g. its "equals" method); wenzelm parents: 79509 diff changeset	72	def read_url(name: String): Bytes = using(Url(name).open_stream())(read_stream(_))
77717 6a2daddc238c tuned signature; wenzelm parents: 77716 diff changeset	73
78953 b6116a86d2ac clarified signature; wenzelm parents: 78855 diff changeset	74	def read_file(path: Path, offset: Long = 0L, limit: Long = Long.MaxValue): Bytes = {
78956 12abaffb0346 tuned signature: more operations; wenzelm parents: 78954 diff changeset	75	val length = File.size(path)
77711 25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	76	val start = offset.max(0L)
78953 b6116a86d2ac clarified signature; wenzelm parents: 78855 diff changeset	77	val len = (length - start).max(0L).min(limit)
78243 0e221a8128e4 tuned: prefer Scala over Java; wenzelm parents: 78194 diff changeset	78	if (len > Int.MaxValue) error("Cannot read large file slice: " + Space.bytes(len).print)
77711 25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	79	else if (len == 0L) empty
25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	80	else {
78953 b6116a86d2ac clarified signature; wenzelm parents: 78855 diff changeset	81	using(FileChannel.open(path.java_path, StandardOpenOption.READ)) { java_path =>
77718 6ad3a412ed97 clarified signature; wenzelm parents: 77717 diff changeset	82	java_path.position(start)
77711 25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	83	val n = len.toInt
25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	84	val buf = ByteBuffer.allocate(n)
25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	85	var i = 0
25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	86	var m = 0
25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	87	while ({
77718 6ad3a412ed97 clarified signature; wenzelm parents: 77717 diff changeset	88	m = java_path.read(buf)
77711 25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	89	if (m != -1) i += m
25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	90	m != -1 && n > i
25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	91	}) ()
25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	92	new Bytes(buf.array, 0, i)
25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	93	}
25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	94	}
25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	95	}
25fd62cba347 clarified signature: more general operation Bytes.read_slice; wenzelm parents: 76361 diff changeset	96
78953 b6116a86d2ac clarified signature; wenzelm parents: 78855 diff changeset	97	def read(path: Path): Bytes = read_file(path)
b6116a86d2ac clarified signature; wenzelm parents: 78855 diff changeset	98	def read(file: JFile): Bytes = read_file(File.path(file))
77718 6ad3a412ed97 clarified signature; wenzelm parents: 77717 diff changeset	99
64229 12aa3980f65c more operations; wenzelm parents: 64224 diff changeset	100
12aa3980f65c more operations; wenzelm parents: 64224 diff changeset	101	/* write */
12aa3980f65c more operations; wenzelm parents: 64224 diff changeset	102
69393 ed0824ef337e static type for Library.using: avoid Java 11 warnings on "Illegal reflective access"; wenzelm parents: 69365 diff changeset	103	def write(file: JFile, bytes: Bytes): Unit =
ed0824ef337e static type for Library.using: avoid Java 11 warnings on "Illegal reflective access"; wenzelm parents: 69365 diff changeset	104	using(new FileOutputStream(file))(bytes.write_stream(_))
64229 12aa3980f65c more operations; wenzelm parents: 64224 diff changeset	105
12aa3980f65c more operations; wenzelm parents: 64224 diff changeset	106	def write(path: Path, bytes: Bytes): Unit = write(path.file, bytes)
78194 da721ba809a4 more operations; wenzelm parents: 77718 diff changeset	107
da721ba809a4 more operations; wenzelm parents: 77718 diff changeset	108
da721ba809a4 more operations; wenzelm parents: 77718 diff changeset	109	/* append */
da721ba809a4 more operations; wenzelm parents: 77718 diff changeset	110
da721ba809a4 more operations; wenzelm parents: 77718 diff changeset	111	def append(file: JFile, bytes: Bytes): Unit =
da721ba809a4 more operations; wenzelm parents: 77718 diff changeset	112	using(new FileOutputStream(file, true))(bytes.write_stream(_))
da721ba809a4 more operations; wenzelm parents: 77718 diff changeset	113
da721ba809a4 more operations; wenzelm parents: 77718 diff changeset	114	def append(path: Path, bytes: Bytes): Unit = append(path.file, bytes)
54439 621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	115	}
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	116
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	117	final class Bytes private(
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	118	protected val bytes: Array[Byte],
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	119	protected val offset: Int,
75393 87ebf5a50283 clarified formatting, for the sake of scala3; wenzelm parents: 75382 diff changeset	120	val length: Int) extends CharSequence {
54439 621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	121	/* equality */
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	122
75393 87ebf5a50283 clarified formatting, for the sake of scala3; wenzelm parents: 75382 diff changeset	123	override def equals(that: Any): Boolean = {
54440 2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	124	that match {
2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	125	case other: Bytes =>
77712 dd4bb80dbc3a tuned performance: much faster low-level operation; wenzelm parents: 77711 diff changeset	126	this.eq(other) \|\|
dd4bb80dbc3a tuned performance: much faster low-level operation; wenzelm parents: 77711 diff changeset	127	Arrays.equals(bytes, offset, offset + length,
dd4bb80dbc3a tuned performance: much faster low-level operation; wenzelm parents: 77711 diff changeset	128	other.bytes, other.offset, other.offset + other.length)
54440 2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	129	case _ => false
2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	130	}
2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	131	}
2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	132
75393 87ebf5a50283 clarified formatting, for the sake of scala3; wenzelm parents: 75382 diff changeset	133	private lazy val hash: Int = {
54439 621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	134	var h = 0
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	135	for (i <- offset until offset + length) {
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	136	val b = bytes(i).asInstanceOf[Int] & 0xFF
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	137	h = 31 * h + b
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	138	}
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	139	h
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	140	}
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	141
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	142	override def hashCode(): Int = hash
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	143
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	144
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	145	/* content */
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	146
54512 7a92ed889da4 persistent value; wenzelm parents: 54444 diff changeset	147	lazy val sha1_digest: SHA1.Digest = SHA1.digest(bytes)
54440 2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	148
69448 51e696887b81 more uniform multi-language operations; wenzelm parents: 69393 diff changeset	149	def is_empty: Boolean = length == 0
51e696887b81 more uniform multi-language operations; wenzelm parents: 69393 diff changeset	150
51e696887b81 more uniform multi-language operations; wenzelm parents: 69393 diff changeset	151	def iterator: Iterator[Byte] =
51e696887b81 more uniform multi-language operations; wenzelm parents: 69393 diff changeset	152	for (i <- (offset until (offset + length)).iterator)
51e696887b81 more uniform multi-language operations; wenzelm parents: 69393 diff changeset	153	yield bytes(i)
51e696887b81 more uniform multi-language operations; wenzelm parents: 69393 diff changeset	154
75393 87ebf5a50283 clarified formatting, for the sake of scala3; wenzelm parents: 75382 diff changeset	155	def array: Array[Byte] = {
69365 c5b3860d29ef avoid loading of font file, to eliminate "Illegal reflective access by com.lowagie.text.pdf.MappedRandomAccessFile$1 (iText-2.1.5.jar) to method java.nio.DirectByteBuffer.cleaner()" -- due to com.lowagie.text.pdf.TrueTypeFont.process() / RandomAccessFileOrArray; wenzelm parents: 68167 diff changeset	156	val a = new Array[Byte](length)
c5b3860d29ef avoid loading of font file, to eliminate "Illegal reflective access by com.lowagie.text.pdf.MappedRandomAccessFile$1 (iText-2.1.5.jar) to method java.nio.DirectByteBuffer.cleaner()" -- due to com.lowagie.text.pdf.TrueTypeFont.process() / RandomAccessFileOrArray; wenzelm parents: 68167 diff changeset	157	System.arraycopy(bytes, offset, a, 0, length)
c5b3860d29ef avoid loading of font file, to eliminate "Illegal reflective access by com.lowagie.text.pdf.MappedRandomAccessFile$1 (iText-2.1.5.jar) to method java.nio.DirectByteBuffer.cleaner()" -- due to com.lowagie.text.pdf.TrueTypeFont.process() / RandomAccessFileOrArray; wenzelm parents: 68167 diff changeset	158	a
c5b3860d29ef avoid loading of font file, to eliminate "Illegal reflective access by com.lowagie.text.pdf.MappedRandomAccessFile$1 (iText-2.1.5.jar) to method java.nio.DirectByteBuffer.cleaner()" -- due to com.lowagie.text.pdf.TrueTypeFont.process() / RandomAccessFileOrArray; wenzelm parents: 68167 diff changeset	159	}
c5b3860d29ef avoid loading of font file, to eliminate "Illegal reflective access by com.lowagie.text.pdf.MappedRandomAccessFile$1 (iText-2.1.5.jar) to method java.nio.DirectByteBuffer.cleaner()" -- due to com.lowagie.text.pdf.TrueTypeFont.process() / RandomAccessFileOrArray; wenzelm parents: 68167 diff changeset	160
73561 c83152933579 clarified signature: Bytes extends CharSequence already (see d201996f72a8); wenzelm parents: 73559 diff changeset	161	def text: String = UTF8.decode_permissive(this)
65279 fa62e095d8f1 clarified signature (again, see also 3ed43cfc8b14); wenzelm parents: 65070 diff changeset	162
76236 03dd2f19f1d7 clarified signature: more operations; wenzelm parents: 75709 diff changeset	163	def wellformed_text: Option[String] = {
03dd2f19f1d7 clarified signature: more operations; wenzelm parents: 75709 diff changeset	164	val s = text
03dd2f19f1d7 clarified signature: more operations; wenzelm parents: 75709 diff changeset	165	if (this == Bytes(s)) Some(s) else None
03dd2f19f1d7 clarified signature: more operations; wenzelm parents: 75709 diff changeset	166	}
03dd2f19f1d7 clarified signature: more operations; wenzelm parents: 75709 diff changeset	167
75587 79b4efd17d2b tuned signature; wenzelm parents: 75586 diff changeset	168	def encode_base64: String = {
68094 0b66aca9c965 more operations; wenzelm parents: 68087 diff changeset	169	val b =
0b66aca9c965 more operations; wenzelm parents: 68087 diff changeset	170	if (offset == 0 && length == bytes.length) bytes
0b66aca9c965 more operations; wenzelm parents: 68087 diff changeset	171	else Bytes(bytes, offset, length).bytes
75620 44815dc2b8f9 clarified modules; wenzelm parents: 75588 diff changeset	172	Base64.encode(b)
68094 0b66aca9c965 more operations; wenzelm parents: 68087 diff changeset	173	}
0b66aca9c965 more operations; wenzelm parents: 68087 diff changeset	174
76236 03dd2f19f1d7 clarified signature: more operations; wenzelm parents: 75709 diff changeset	175	def maybe_encode_base64: (Boolean, String) =
03dd2f19f1d7 clarified signature: more operations; wenzelm parents: 75709 diff changeset	176	wellformed_text match {
03dd2f19f1d7 clarified signature: more operations; wenzelm parents: 75709 diff changeset	177	case Some(s) => (false, s)
03dd2f19f1d7 clarified signature: more operations; wenzelm parents: 75709 diff changeset	178	case None => (true, encode_base64)
03dd2f19f1d7 clarified signature: more operations; wenzelm parents: 75709 diff changeset	179	}
68106 a514e29db980 return exports as result for Isabelle server; wenzelm parents: 68094 diff changeset	180
77716 3f4163b83d4f tuned output; wenzelm parents: 77714 diff changeset	181	override def toString: String =
3f4163b83d4f tuned output; wenzelm parents: 77714 diff changeset	182	if (is_empty) "Bytes.empty" else "Bytes(" + Space.bytes(length).print + ")"
54439 621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	183
72885 1b0f81e556a2 accommodate OpenJDK 15; wenzelm parents: 71152 diff changeset	184	def proper: Option[Bytes] = if (is_empty) None else Some(this)
1b0f81e556a2 accommodate OpenJDK 15; wenzelm parents: 71152 diff changeset	185	def proper_text: Option[String] = if (is_empty) None else Some(text)
65630 c41bbf657310 more operations; wenzelm parents: 65279 diff changeset	186
54439 621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	187	def +(other: Bytes): Bytes =
72885 1b0f81e556a2 accommodate OpenJDK 15; wenzelm parents: 71152 diff changeset	188	if (other.is_empty) this
1b0f81e556a2 accommodate OpenJDK 15; wenzelm parents: 71152 diff changeset	189	else if (is_empty) other
54439 621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	190	else {
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	191	val new_bytes = new Array[Byte](length + other.length)
55618 995162143ef4 tuned imports; wenzelm parents: 54512 diff changeset	192	System.arraycopy(bytes, offset, new_bytes, 0, length)
995162143ef4 tuned imports; wenzelm parents: 54512 diff changeset	193	System.arraycopy(other.bytes, other.offset, new_bytes, length, other.length)
54439 621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	194	new Bytes(new_bytes, 0, new_bytes.length)
621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	195	}
54440 2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	196
2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	197
60833 d201996f72a8 provide CharSequence operations as well; wenzelm parents: 55618 diff changeset	198	/* CharSequence operations */
d201996f72a8 provide CharSequence operations as well; wenzelm parents: 55618 diff changeset	199
d201996f72a8 provide CharSequence operations as well; wenzelm parents: 55618 diff changeset	200	def charAt(i: Int): Char =
d201996f72a8 provide CharSequence operations as well; wenzelm parents: 55618 diff changeset	201	if (0 <= i && i < length) (bytes(offset + i).asInstanceOf[Int] & 0xFF).asInstanceOf[Char]
d201996f72a8 provide CharSequence operations as well; wenzelm parents: 55618 diff changeset	202	else throw new IndexOutOfBoundsException
d201996f72a8 provide CharSequence operations as well; wenzelm parents: 55618 diff changeset	203
75393 87ebf5a50283 clarified formatting, for the sake of scala3; wenzelm parents: 75382 diff changeset	204	def subSequence(i: Int, j: Int): Bytes = {
60833 d201996f72a8 provide CharSequence operations as well; wenzelm parents: 55618 diff changeset	205	if (0 <= i && i <= j && j <= length) new Bytes(bytes, offset + i, j - i)
d201996f72a8 provide CharSequence operations as well; wenzelm parents: 55618 diff changeset	206	else throw new IndexOutOfBoundsException
d201996f72a8 provide CharSequence operations as well; wenzelm parents: 55618 diff changeset	207	}
d201996f72a8 provide CharSequence operations as well; wenzelm parents: 55618 diff changeset	208
69448 51e696887b81 more uniform multi-language operations; wenzelm parents: 69393 diff changeset	209	def trim_line: Bytes =
77714 be0b9396604e tuned; wenzelm parents: 77713 diff changeset	210	if (length >= 2 && charAt(length - 2) == 13 && charAt(length - 1) == 10) {
69448 51e696887b81 more uniform multi-language operations; wenzelm parents: 69393 diff changeset	211	subSequence(0, length - 2)
77714 be0b9396604e tuned; wenzelm parents: 77713 diff changeset	212	}
be0b9396604e tuned; wenzelm parents: 77713 diff changeset	213	else if (length >= 1 && (charAt(length - 1) == 13 \|\| charAt(length - 1) == 10)) {
69448 51e696887b81 more uniform multi-language operations; wenzelm parents: 69393 diff changeset	214	subSequence(0, length - 1)
77714 be0b9396604e tuned; wenzelm parents: 77713 diff changeset	215	}
69448 51e696887b81 more uniform multi-language operations; wenzelm parents: 69393 diff changeset	216	else this
51e696887b81 more uniform multi-language operations; wenzelm parents: 69393 diff changeset	217
60833 d201996f72a8 provide CharSequence operations as well; wenzelm parents: 55618 diff changeset	218
64004 b4ece7a3f2ca clarified stream operations; wenzelm parents: 64001 diff changeset	219	/* streams */
b4ece7a3f2ca clarified stream operations; wenzelm parents: 64001 diff changeset	220
b4ece7a3f2ca clarified stream operations; wenzelm parents: 64001 diff changeset	221	def stream(): ByteArrayInputStream = new ByteArrayInputStream(bytes, offset, length)
b4ece7a3f2ca clarified stream operations; wenzelm parents: 64001 diff changeset	222
b4ece7a3f2ca clarified stream operations; wenzelm parents: 64001 diff changeset	223	def write_stream(stream: OutputStream): Unit = stream.write(bytes, offset, length)
b4ece7a3f2ca clarified stream operations; wenzelm parents: 64001 diff changeset	224
b4ece7a3f2ca clarified stream operations; wenzelm parents: 64001 diff changeset	225
76351 2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	226	/* XZ / Zstd data compression */
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	227
76358 cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	228	def detect_xz: Boolean =
76351 2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	229	length >= 6 &&
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	230	bytes(offset) == 0xFD.toByte &&
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	231	bytes(offset + 1) == 0x37.toByte &&
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	232	bytes(offset + 2) == 0x7A.toByte &&
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	233	bytes(offset + 3) == 0x58.toByte &&
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	234	bytes(offset + 4) == 0x5A.toByte &&
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	235	bytes(offset + 5) == 0x00.toByte
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	236
76358 cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	237	def detect_zstd: Boolean =
76351 2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	238	length >= 4 &&
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	239	bytes(offset) == 0x28.toByte &&
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	240	bytes(offset + 1) == 0xB5.toByte &&
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	241	bytes(offset + 2) == 0x2F.toByte &&
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	242	bytes(offset + 3) == 0xFD.toByte
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	243
76358 cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	244	def uncompress_xz(cache: Compress.Cache = Compress.Cache.none): Bytes =
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	245	using(new xz.XZInputStream(stream(), cache.for_xz))(Bytes.read_stream(_, hint = length))
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	246
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	247	def uncompress_zstd(cache: Compress.Cache = Compress.Cache.none): Bytes = {
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	248	Zstd.init()
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	249	val n = zstd.Zstd.decompressedSize(bytes, offset, length)
78243 0e221a8128e4 tuned: prefer Scala over Java; wenzelm parents: 78194 diff changeset	250	if (n > 0 && n < Int.MaxValue) {
76358 cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	251	Bytes(zstd.Zstd.decompress(array, n.toInt))
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	252	}
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	253	else {
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	254	using(new zstd.ZstdInputStream(stream(), cache.for_zstd))(Bytes.read_stream(_, hint = length))
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	255	}
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	256	}
54440 2c4940d2edf7 tuned signature; wenzelm parents: 54439 diff changeset	257
76351 2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	258	def uncompress(cache: Compress.Cache = Compress.Cache.none): Bytes =
76358 cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	259	if (detect_xz) uncompress_xz(cache = cache)
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	260	else if (detect_zstd) uncompress_zstd(cache = cache)
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	261	else error("Cannot detect compression scheme")
64004 b4ece7a3f2ca clarified stream operations; wenzelm parents: 64001 diff changeset	262
76351 2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	263	def compress(
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	264	options: Compress.Options = Compress.Options(),
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	265	cache: Compress.Cache = Compress.Cache.none
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	266	): Bytes = {
76358 cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	267	options match {
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	268	case options_xz: Compress.Options_XZ =>
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	269	val result = new ByteArrayOutputStream(length)
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	270	using(new xz.XZOutputStream(result, options_xz.make, cache.for_xz))(write_stream)
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	271	new Bytes(result.toByteArray, 0, result.size)
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	272	case options_zstd: Compress.Options_Zstd =>
cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	273	Zstd.init()
76361 3b9f36ef7365 tuned: avoid redundant copy of potentially large array; wenzelm parents: 76358 diff changeset	274	Bytes(zstd.Zstd.compress(if (offset == 0) bytes else array, options_zstd.level))
76358 cff0828c374f clarified signature; wenzelm parents: 76353 diff changeset	275	}
64004 b4ece7a3f2ca clarified stream operations; wenzelm parents: 64001 diff changeset	276	}
68167 327bb0f5f768 clarified implicit compression; wenzelm parents: 68150 diff changeset	277
75393 87ebf5a50283 clarified formatting, for the sake of scala3; wenzelm parents: 75382 diff changeset	278	def maybe_compress(
76351 2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	279	options: Compress.Options = Compress.Options(),
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala; wenzelm parents: 76350 diff changeset	280	cache: Compress.Cache = Compress.Cache.none
75393 87ebf5a50283 clarified formatting, for the sake of scala3; wenzelm parents: 75382 diff changeset	281	) : (Boolean, Bytes) = {
68167 327bb0f5f768 clarified implicit compression; wenzelm parents: 68150 diff changeset	282	val compressed = compress(options = options, cache = cache)
327bb0f5f768 clarified implicit compression; wenzelm parents: 68150 diff changeset	283	if (compressed.length < length) (true, compressed) else (false, this)
327bb0f5f768 clarified implicit compression; wenzelm parents: 68150 diff changeset	284	}
54439 621a155c7715 immutable byte vectors versus UTF8 strings; wenzelm parents: diff changeset	285	}

author	wenzelm
	Sun, 21 Jan 2024 14:05:14 +0100
changeset 79510	d8330439823a
parent 79509	e82448aacf48
child 80350	96843eb96493
permissions	-rw-r--r--