support for scalable byte strings, with incremental construction;
authorwenzelm
Tue, 21 Jun 2022 13:14:09 +0200
changeset 75567 94321fd25c8a
parent 75566 389b193af8ae
child 75568 a8539b1c8775
support for scalable byte strings, with incremental construction;
src/Pure/General/bytes.ML
src/Pure/ROOT.ML
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Pure/General/bytes.ML	Tue Jun 21 13:14:09 2022 +0200
@@ -0,0 +1,80 @@
+(*  Title:      Pure/General/bytes.ML
+    Author:     Makarius
+
+Scalable byte strings, with incremental construction (add content to the
+end). Note: type string is implicitly limited by String.maxSize (approx. 64
+MB on 64_32 architecture).
+*)
+
+signature BYTES =
+sig
+  val chunk_size: int
+  type T
+  val length: T -> int
+  val contents: T -> string list
+  val content: T -> string
+  val is_empty: T -> bool
+  val empty: T
+  val add: string -> T -> T
+  val build: (T -> T) -> T
+  val string: string -> T
+  val buffer: Buffer.T -> T
+  val read: Path.T -> T
+end;
+
+structure Bytes: BYTES =
+struct
+
+val chunk_size = 1024 * 1024;
+
+abstype T = Bytes of
+  {buffer: string list, chunks: string list, m: int (*buffer size*), n: int (*chunks size*)}
+with
+
+fun length (Bytes {m, n, ...}) = m + n;
+
+val compact = implode o rev;
+
+fun contents (Bytes {buffer, chunks, ...}) =
+  rev (chunks |> not (null buffer) ? cons (compact buffer));
+
+val content = implode o contents;
+
+fun is_empty bytes = length bytes = 0;
+
+val empty = Bytes {buffer = [], chunks = [], m = 0, n = 0};
+
+fun add "" bytes = bytes
+  | add s (Bytes {buffer, chunks, m, n}) =
+      let val l = size s in
+        if l + m < chunk_size
+        then Bytes {buffer = s :: buffer, chunks = chunks, m = l + m, n = n}
+        else
+          let
+            val k = chunk_size - m;
+            val chunk = compact (String.substring (s, 0, k) :: buffer);
+            val s' = String.substring (s, k, l - k);
+            val bytes' = Bytes {buffer = [], chunks = chunk :: chunks, m = 0, n = chunk_size + n};
+          in add s' bytes' end
+      end;
+
+end;
+
+fun build (f: T -> T) = f empty;
+
+val string = build o add;
+
+val buffer = build o fold add o Buffer.contents;
+
+fun read_chunk stream =
+  Byte.bytesToString (BinIO.inputN (stream, chunk_size));
+
+val read = File.open_input (fn stream =>
+  let
+    fun read_bytes bytes =
+      (case read_chunk stream of
+        "" => bytes
+      | s => read_bytes (add s bytes))
+  in read_bytes empty end);
+
+end;
--- a/src/Pure/ROOT.ML	Mon Jun 20 16:15:07 2022 +0200
+++ b/src/Pure/ROOT.ML	Tue Jun 21 13:14:09 2022 +0200
@@ -79,6 +79,7 @@
 ML_file "General/url.ML";
 ML_file "System/bash.ML";
 ML_file "General/file.ML";
+ML_file "General/bytes.ML";
 ML_file "General/long_name.ML";
 ML_file "General/binding.ML";
 ML_file "General/seq.ML";