generic support for XZ and Zstd compression in Isabelle/Scala;
support for Zstd compression in Isabelle/ML;
--- a/NEWS Fri Oct 21 14:45:13 2022 +0200
+++ b/NEWS Fri Oct 21 16:39:31 2022 +0200
@@ -50,6 +50,22 @@
- Added lemma wfP_subset_mset[simp].
+*** ML ***
+
+* Operations for Zstd compression (via Isabelle/Scala):
+
+ Zstd.compress: Bytes.T -> Bytes.T
+ Zstd.uncompress: Bytes.T -> Bytes.T
+
+
+*** System ***
+
+* Isabelle/Scala provides generic support for XZ and Zstd compression,
+via Compress.Options and Compress.Cache. Bytes.uncompress automatically
+detects the compression scheme.
+
+
+
New in Isabelle2022 (October 2022)
----------------------------------
--- a/etc/build.props Fri Oct 21 14:45:13 2022 +0200
+++ b/etc/build.props Fri Oct 21 16:39:31 2022 +0200
@@ -63,6 +63,7 @@
src/Pure/General/codepoint.scala \
src/Pure/General/comment.scala \
src/Pure/General/completion.scala \
+ src/Pure/General/compress.scala \
src/Pure/General/csv.scala \
src/Pure/General/date.scala \
src/Pure/General/exn.scala \
@@ -100,7 +101,6 @@
src/Pure/General/uuid.scala \
src/Pure/General/value.scala \
src/Pure/General/word.scala \
- src/Pure/General/xz.scala \
src/Pure/General/zstd.scala \
src/Pure/Isar/document_structure.scala \
src/Pure/Isar/keyword.scala \
--- a/src/Pure/Admin/build_history.scala Fri Oct 21 14:45:13 2022 +0200
+++ b/src/Pure/Admin/build_history.scala Fri Oct 21 16:39:31 2022 +0200
@@ -359,7 +359,7 @@
session_build_info :::
ml_statistics.map(Protocol.ML_Statistics_Marker.apply) :::
session_errors.map(Protocol.Error_Message_Marker.apply) :::
- heap_sizes), XZ.options(6))
+ heap_sizes), Compress.Options_XZ(6))
/* next build */
--- a/src/Pure/Admin/build_log.scala Fri Oct 21 14:45:13 2022 +0200
+++ b/src/Pure/Admin/build_log.scala Fri Oct 21 16:39:31 2022 +0200
@@ -616,7 +616,10 @@
errors = log_file.filter(Protocol.Error_Message_Marker))
}
- def compress_errors(errors: List[String], cache: XZ.Cache = XZ.Cache.none): Option[Bytes] =
+ def compress_errors(
+ errors: List[String],
+ cache: Compress.Cache = Compress.Cache.none
+ ): Option[Bytes] =
if (errors.isEmpty) None
else {
Some(Bytes(YXML.string_of_body(XML.Encode.list(XML.Encode.string)(errors))).
@@ -627,7 +630,7 @@
if (bytes.is_empty) Nil
else {
XML.Decode.list(YXML.string_of_body)(
- YXML.parse_body(bytes.uncompress(cache = cache.xz).text, cache = cache))
+ YXML.parse_body(bytes.uncompress(cache = cache.compress).text, cache = cache))
}
@@ -954,7 +957,7 @@
stmt.double(13) = session.ml_timing.factor
stmt.long(14) = session.heap_size
stmt.string(15) = session.status.map(_.toString)
- stmt.bytes(16) = compress_errors(session.errors, cache = cache.xz)
+ stmt.bytes(16) = compress_errors(session.errors, cache = cache.compress)
stmt.string(17) = session.sources
stmt.execute()
}
@@ -988,7 +991,7 @@
db.using_statement(db.insert_permissive(table)) { stmt =>
val ml_stats: List[(String, Option[Bytes])] =
Par_List.map[(String, Session_Entry), (String, Option[Bytes])](
- { case (a, b) => (a, Properties.compress(b.ml_statistics, cache = cache.xz).proper) },
+ { case (a, b) => (a, Properties.compress(b.ml_statistics, cache = cache.compress).proper) },
build_info.sessions.iterator.filter(p => p._2.ml_statistics.nonEmpty).toList)
val entries = if (ml_stats.nonEmpty) ml_stats else List("" -> None)
for ((session_name, ml_statistics) <- entries) {
--- a/src/Pure/Admin/jenkins.scala Fri Oct 21 14:45:13 2022 +0200
+++ b/src/Pure/Admin/jenkins.scala Fri Oct 21 16:39:31 2022 +0200
@@ -112,7 +112,7 @@
File.write_xz(log_path,
terminate_lines(Url.read(main_log) ::
ml_statistics.map(Protocol.ML_Statistics_Marker.apply)),
- XZ.options(6))
+ Compress.Options_XZ(6))
}
}
}
--- a/src/Pure/General/bytes.scala Fri Oct 21 14:45:13 2022 +0200
+++ b/src/Pure/General/bytes.scala Fri Oct 21 16:39:31 2022 +0200
@@ -7,10 +7,10 @@
package isabelle
-import java.io.{File => JFile, ByteArrayOutputStream, ByteArrayInputStream,
- OutputStream, InputStream, FileInputStream, FileOutputStream}
+import com.github.luben.zstd.{ZstdInputStream, ZstdOutputStream}
+
+import java.io.{ByteArrayInputStream, ByteArrayOutputStream, FileInputStream, FileOutputStream, InputStream, OutputStream, File as JFile}
import java.net.URL
-
import org.tukaani.xz.{XZInputStream, XZOutputStream}
@@ -191,20 +191,59 @@
def write_stream(stream: OutputStream): Unit = stream.write(bytes, offset, length)
- /* XZ data compression */
+ /* XZ / Zstd data compression */
+
+ private def detect_xz: Boolean =
+ length >= 6 &&
+ bytes(offset) == 0xFD.toByte &&
+ bytes(offset + 1) == 0x37.toByte &&
+ bytes(offset + 2) == 0x7A.toByte &&
+ bytes(offset + 3) == 0x58.toByte &&
+ bytes(offset + 4) == 0x5A.toByte &&
+ bytes(offset + 5) == 0x00.toByte
+
+ private def detect_zstd: Boolean =
+ length >= 4 &&
+ bytes(offset) == 0x28.toByte &&
+ bytes(offset + 1) == 0xB5.toByte &&
+ bytes(offset + 2) == 0x2F.toByte &&
+ bytes(offset + 3) == 0xFD.toByte
+
+ private def detect_error(name: String = ""): Nothing =
+ error("Cannot detect compression scheme" + (if (name.isEmpty) "" else " " + name))
- def uncompress(cache: XZ.Cache = XZ.Cache.none): Bytes =
- using(new XZInputStream(stream(), cache))(Bytes.read_stream(_, hint = length))
+ def uncompress(cache: Compress.Cache = Compress.Cache.none): Bytes =
+ using(
+ if (detect_xz) new XZInputStream(stream(), cache.xz)
+ else if (detect_zstd) { Zstd.init(); new ZstdInputStream(stream(), cache.zstd) }
+ else detect_error()
+ )(Bytes.read_stream(_, hint = length))
+
+ def uncompress_xz(cache: Compress.Cache = Compress.Cache.none): Bytes =
+ if (detect_xz) uncompress(cache = cache) else detect_error("XZ")
+
+ def uncompress_zstd(cache: Compress.Cache = Compress.Cache.none): Bytes =
+ if (detect_zstd) uncompress(cache = cache) else detect_error("Zstd")
- def compress(options: XZ.Options = XZ.options(), cache: XZ.Cache = XZ.Cache.none): Bytes = {
+ def compress(
+ options: Compress.Options = Compress.Options(),
+ cache: Compress.Cache = Compress.Cache.none
+ ): Bytes = {
val result = new ByteArrayOutputStream(length)
- using(new XZOutputStream(result, options, cache))(write_stream(_))
+ using(
+ options match {
+ case options_xz: Compress.Options_XZ =>
+ new XZOutputStream(result, options_xz.make, cache.xz)
+ case options_zstd: Compress.Options_Zstd =>
+ Zstd.init()
+ new ZstdOutputStream(result, cache.zstd, options_zstd.level)
+ })(write_stream)
new Bytes(result.toByteArray, 0, result.size)
}
def maybe_compress(
- options: XZ.Options = XZ.options(),
- cache: XZ.Cache = XZ.Cache.none
+ options: Compress.Options = Compress.Options(),
+ cache: Compress.Cache = Compress.Cache.none
) : (Boolean, Bytes) = {
val compressed = compress(options = options, cache = cache)
if (compressed.length < length) (true, compressed) else (false, this)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Pure/General/compress.scala Fri Oct 21 16:39:31 2022 +0200
@@ -0,0 +1,63 @@
+/* Title: Pure/General/compress.scala
+ Author: Makarius
+
+Support for generic data compression.
+*/
+
+package isabelle
+
+
+import org.tukaani.xz.{LZMA2Options, ArrayCache, BasicArrayCache}
+import com.github.luben.zstd.{BufferPool, NoPool, RecyclingBufferPool,
+ ZstdInputStream, ZstdOutputStream}
+
+
+object Compress {
+ /* options */
+
+ object Options {
+ def apply(): Options = Options_XZ()
+ }
+ sealed abstract class Options
+ case class Options_XZ(level: Int = 3) extends Options {
+ def make: LZMA2Options = {
+ val opts = new LZMA2Options
+ opts.setPreset(level)
+ opts
+ }
+ }
+ case class Options_Zstd(level: Int = 3) extends Options
+
+
+ /* cache */
+
+ class Cache private(val xz: ArrayCache, val zstd: BufferPool)
+
+ object Cache {
+ def none: Cache = { Zstd.init(); new Cache(ArrayCache.getDummyCache(), NoPool.INSTANCE) }
+ def make(): Cache = { Zstd.init(); new Cache(new BasicArrayCache, RecyclingBufferPool.INSTANCE) } // FIXME new pool
+ }
+
+
+ /* Scala functions in ML */
+
+ object XZ_Compress extends Scala.Fun_Bytes("XZ.compress") {
+ val here = Scala_Project.here
+ def apply(arg: Bytes): Bytes = arg.compress(Options_XZ())
+ }
+
+ object XZ_Uncompress extends Scala.Fun_Bytes("XZ.uncompress") {
+ val here = Scala_Project.here
+ def apply(arg: Bytes): Bytes = arg.uncompress_xz()
+ }
+
+ object Zstd_Compress extends Scala.Fun_Bytes("Zstd.compress") {
+ val here = Scala_Project.here
+ def apply(arg: Bytes): Bytes = arg.compress(Options_Zstd())
+ }
+
+ object Zstd_Uncompress extends Scala.Fun_Bytes("Zstd.uncompress") {
+ val here = Scala_Project.here
+ def apply(arg: Bytes): Bytes = arg.uncompress_zstd()
+ }
+}
--- a/src/Pure/General/file.scala Fri Oct 21 14:45:13 2022 +0200
+++ b/src/Pure/General/file.scala Fri Oct 21 16:39:31 2022 +0200
@@ -249,18 +249,23 @@
write_file(file, text, (s: OutputStream) => new GZIPOutputStream(new BufferedOutputStream(s)))
def write_gzip(path: Path, text: String): Unit = write_gzip(path.file, text)
- def write_xz(file: JFile, text: String, options: XZ.Options): Unit =
- File.write_file(file, text, s => new XZOutputStream(new BufferedOutputStream(s), options))
- def write_xz(file: JFile, text: String): Unit = write_xz(file, text, XZ.options())
- def write_xz(path: Path, text: String, options: XZ.Options): Unit =
+ def write_xz(file: JFile, text: String, options: Compress.Options_XZ): Unit =
+ File.write_file(file, text, s => new XZOutputStream(new BufferedOutputStream(s), options.make))
+ def write_xz(file: JFile, text: String): Unit = write_xz(file, text, Compress.Options_XZ())
+ def write_xz(path: Path, text: String, options: Compress.Options_XZ): Unit =
write_xz(path.file, text, options)
- def write_xz(path: Path, text: String): Unit = write_xz(path, text, XZ.options())
+ def write_xz(path: Path, text: String): Unit = write_xz(path, text, Compress.Options_XZ())
- def write_zstd(file: JFile, text: String): Unit = {
+ def write_zstd(file: JFile, text: String, options: Compress.Options_Zstd): Unit = {
Zstd.init()
- write_file(file, text, (s: OutputStream) => new ZstdOutputStream(new BufferedOutputStream(s)))
+ File.write_file(file, text, s => new ZstdOutputStream(new BufferedOutputStream(s), options.level))
}
- def write_zstd(path: Path, text: String): Unit = write_zstd(path.file, text)
+ def write_zstd(file: JFile, text: String): Unit =
+ write_zstd(file, text, Compress.Options_Zstd())
+ def write_zstd(path: Path, text: String, options: Compress.Options_Zstd): Unit =
+ write_zstd(path.file, text, options)
+ def write_zstd(path: Path, text: String): Unit =
+ write_zstd(path, text, Compress.Options_Zstd())
def write_backup(path: Path, text: String): Unit = {
if (path.is_file) Isabelle_System.move_file(path, path.backup)
--- a/src/Pure/General/properties.scala Fri Oct 21 14:45:13 2022 +0200
+++ b/src/Pure/General/properties.scala Fri Oct 21 16:39:31 2022 +0200
@@ -54,8 +54,8 @@
}
def compress(ps: List[T],
- options: XZ.Options = XZ.options(),
- cache: XZ.Cache = XZ.Cache.none
+ options: Compress.Options = Compress.Options(),
+ cache: Compress.Cache = Compress.Cache.none
): Bytes = {
if (ps.isEmpty) Bytes.empty
else {
@@ -69,7 +69,7 @@
else {
val ps =
XML.Decode.list(XML.Decode.properties)(
- YXML.parse_body(bs.uncompress(cache = cache.xz).text))
+ YXML.parse_body(bs.uncompress(cache = cache.compress).text))
if (cache.no_cache) ps else ps.map(cache.props)
}
}
--- a/src/Pure/General/xz.scala Fri Oct 21 14:45:13 2022 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,46 +0,0 @@
-/* Title: Pure/General/xz.scala
- Author: Makarius
-
-Support for XZ data compression.
-*/
-
-package isabelle
-
-
-import org.tukaani.xz.{LZMA2Options, ArrayCache, BasicArrayCache}
-
-
-object XZ {
- /* options */
-
- type Options = LZMA2Options
-
- def options(preset: Int = 3): Options = {
- val opts = new LZMA2Options
- opts.setPreset(preset)
- opts
- }
-
-
- /* cache */
-
- type Cache = ArrayCache
-
- object Cache {
- def none: ArrayCache = ArrayCache.getDummyCache()
- def make(): ArrayCache = new BasicArrayCache
- }
-
-
- /* Scala functions in ML */
-
- object Compress extends Scala.Fun_Bytes("XZ.compress") {
- val here = Scala_Project.here
- def apply(arg: Bytes): Bytes = arg.compress()
- }
-
- object Uncompress extends Scala.Fun_Bytes("XZ.uncompress") {
- val here = Scala_Project.here
- def apply(arg: Bytes): Bytes = arg.uncompress()
- }
-}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Pure/General/zstd.ML Fri Oct 21 16:39:31 2022 +0200
@@ -0,0 +1,19 @@
+(* Title: Pure/General/zstd.ML
+ Author: Makarius
+
+Support for Zstd compression (via Isabelle/Scala).
+*)
+
+signature Zstd =
+sig
+ val compress: Bytes.T -> Bytes.T
+ val uncompress: Bytes.T -> Bytes.T
+end;
+
+structure Zstd: Zstd =
+struct
+
+val compress = \<^scala>\<open>Zstd.compress\<close>;
+val uncompress = \<^scala>\<open>Zstd.uncompress\<close>;
+
+end;
--- a/src/Pure/PIDE/xml.scala Fri Oct 21 14:45:13 2022 +0200
+++ b/src/Pure/PIDE/xml.scala Fri Oct 21 16:39:31 2022 +0200
@@ -189,15 +189,15 @@
object Cache {
def make(
- xz: XZ.Cache = XZ.Cache.make(),
+ compress: Compress.Cache = Compress.Cache.make(),
max_string: Int = isabelle.Cache.default_max_string,
initial_size: Int = isabelle.Cache.default_initial_size): Cache =
- new Cache(xz, max_string, initial_size)
+ new Cache(compress, max_string, initial_size)
- val none: Cache = make(XZ.Cache.none, max_string = 0)
+ val none: Cache = make(Compress.Cache.none, max_string = 0)
}
- class Cache(val xz: XZ.Cache, max_string: Int, initial_size: Int)
+ class Cache(val compress: Compress.Cache, max_string: Int, initial_size: Int)
extends isabelle.Cache(max_string, initial_size) {
protected def cache_props(x: Properties.T): Properties.T = {
if (x.isEmpty) x
--- a/src/Pure/ROOT.ML Fri Oct 21 14:45:13 2022 +0200
+++ b/src/Pure/ROOT.ML Fri Oct 21 16:39:31 2022 +0200
@@ -348,6 +348,7 @@
ML_file "General/base64.ML";
ML_file "General/xz.ML";
+ML_file "General/zstd.ML";
ML_file "Tools/build.ML";
ML_file "Tools/named_thms.ML";
ML_file "Tools/print_operation.ML";
--- a/src/Pure/System/scala.scala Fri Oct 21 14:45:13 2022 +0200
+++ b/src/Pure/System/scala.scala Fri Oct 21 16:39:31 2022 +0200
@@ -368,8 +368,10 @@
Scala_Build.Scala_Fun,
Base64.Decode,
Base64.Encode,
- XZ.Compress,
- XZ.Uncompress,
+ Compress.XZ_Compress,
+ Compress.XZ_Uncompress,
+ Compress.Zstd_Compress,
+ Compress.Zstd_Uncompress,
Doc.Doc_Names,
Bibtex.Check_Database,
Isabelle_System.Make_Directory,
--- a/src/Pure/Thy/export.scala Fri Oct 21 14:45:13 2022 +0200
+++ b/src/Pure/Thy/export.scala Fri Oct 21 16:39:31 2022 +0200
@@ -134,7 +134,7 @@
def uncompressed: Bytes = {
val (compressed, bytes) = body.join
- if (compressed) bytes.uncompress(cache = cache.xz) else bytes
+ if (compressed) bytes.uncompress(cache = cache.compress) else bytes
}
def uncompressed_yxml: XML.Body =
@@ -183,7 +183,7 @@
cache: XML.Cache
): Entry = {
val body =
- if (args.compress) Future.fork(bytes.maybe_compress(cache = cache.xz))
+ if (args.compress) Future.fork(bytes.maybe_compress(cache = cache.compress))
else Future.value((false, bytes))
val entry_name = Entry_Name(session = session_name, theory = args.theory_name, name = args.name)
Entry(entry_name, args.executable, body, cache)
--- a/src/Pure/Thy/sessions.scala Fri Oct 21 14:45:13 2022 +0200
+++ b/src/Pure/Thy/sessions.scala Fri Oct 21 16:39:31 2022 +0200
@@ -1488,11 +1488,11 @@
db.using_statement(table.insert()) { stmt =>
stmt.string(1) = name
stmt.bytes(2) = Properties.encode(build_log.session_timing)
- stmt.bytes(3) = Properties.compress(build_log.command_timings, cache = cache.xz)
- stmt.bytes(4) = Properties.compress(build_log.theory_timings, cache = cache.xz)
- stmt.bytes(5) = Properties.compress(build_log.ml_statistics, cache = cache.xz)
- stmt.bytes(6) = Properties.compress(build_log.task_statistics, cache = cache.xz)
- stmt.bytes(7) = Build_Log.compress_errors(build_log.errors, cache = cache.xz)
+ stmt.bytes(3) = Properties.compress(build_log.command_timings, cache = cache.compress)
+ stmt.bytes(4) = Properties.compress(build_log.theory_timings, cache = cache.compress)
+ stmt.bytes(5) = Properties.compress(build_log.ml_statistics, cache = cache.compress)
+ stmt.bytes(6) = Properties.compress(build_log.task_statistics, cache = cache.compress)
+ stmt.bytes(7) = Build_Log.compress_errors(build_log.errors, cache = cache.compress)
stmt.string(8) = build.sources
stmt.string(9) = cat_lines(build.input_heaps)
stmt.string(10) = build.output_heap getOrElse ""
--- a/src/Pure/term.scala Fri Oct 21 14:45:13 2022 +0200
+++ b/src/Pure/term.scala Fri Oct 21 16:39:31 2022 +0200
@@ -177,16 +177,16 @@
object Cache {
def make(
- xz: XZ.Cache = XZ.Cache.make(),
+ compress: Compress.Cache = Compress.Cache.make(),
max_string: Int = isabelle.Cache.default_max_string,
initial_size: Int = isabelle.Cache.default_initial_size): Cache =
- new Cache(xz, initial_size, max_string)
+ new Cache(compress, initial_size, max_string)
val none: Cache = make(max_string = 0)
}
- class Cache(xz: XZ.Cache, max_string: Int, initial_size: Int)
- extends XML.Cache(xz, max_string, initial_size) {
+ class Cache(compress: Compress.Cache, max_string: Int, initial_size: Int)
+ extends XML.Cache(compress, max_string, initial_size) {
protected def cache_indexname(x: Indexname): Indexname =
lookup(x) getOrElse store(Indexname(cache_string(x.name), x.index))