src/Pure/General/bytes.ML
author nipkow
Tue, 17 Jun 2025 14:11:40 +0200
changeset 82733 8b537e1af2ec
parent 82088 555c95138e26
permissions -rw-r--r--
reinstated intersection of lists as inter_list_set
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
     1
(*  Title:      Pure/General/bytes.ML
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
     3
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
     4
Scalable byte strings, with incremental construction (add content to the
75572
833f26c3a3af tuned comments;
wenzelm
parents: 75571
diff changeset
     5
end).
833f26c3a3af tuned comments;
wenzelm
parents: 75571
diff changeset
     6
833f26c3a3af tuned comments;
wenzelm
parents: 75571
diff changeset
     7
Note: type string is implicitly limited by String.maxSize (approx. 64 MB on
833f26c3a3af tuned comments;
wenzelm
parents: 75571
diff changeset
     8
64_32 architecture).
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
     9
*)
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    10
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    11
signature BYTES =
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    12
sig
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    13
  val chunk_size: int
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    14
  type T
75603
fc8d64a578e4 more operations;
wenzelm
parents: 75602
diff changeset
    15
  val eq: T * T -> bool
80565
2acdaa0a7d29 clarified signature, following Isabelle/Scala;
wenzelm
parents: 77847
diff changeset
    16
  val size: T -> int
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    17
  val contents: T -> string list
75577
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    18
  val contents_blob: T -> XML.body
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    19
  val content: T -> string
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    20
  val is_empty: T -> bool
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    21
  val empty: T
75577
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    22
  val build: (T -> T) -> T
75568
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    23
  val add_substring: substring -> T -> T
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    24
  val add: string -> T -> T
75577
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    25
  val beginning: int -> T -> string
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    26
  val exists_string: (string -> bool) -> T -> bool
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    27
  val forall_string: (string -> bool) -> T -> bool
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    28
  val last_string: T -> string option
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    29
  val trim_line: T -> T
75574
5945c6f5126a clarified signature: more operations;
wenzelm
parents: 75573
diff changeset
    30
  val append: T -> T -> T
75603
fc8d64a578e4 more operations;
wenzelm
parents: 75602
diff changeset
    31
  val appends: T list -> T
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    32
  val string: string -> T
75577
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    33
  val newline: T
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    34
  val buffer: Buffer.T -> T
75594
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
    35
  val space_explode: string -> T -> string list
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
    36
  val split_lines: T -> string list
75596
7ff9745609d7 more operations;
wenzelm
parents: 75594
diff changeset
    37
  val trim_split_lines: T -> string list
75594
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
    38
  val cat_lines: string list -> T
82088
555c95138e26 more operations;
wenzelm
parents: 80877
diff changeset
    39
  val terminate_lines: string list -> T
75577
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    40
  val read_block: int -> BinIO.instream -> string
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    41
  val read_stream: int -> BinIO.instream -> T
75570
ad957ab06f72 tuned signature: more operations;
wenzelm
parents: 75569
diff changeset
    42
  val write_stream: BinIO.outstream -> T -> unit
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    43
  val read: Path.T -> T
75570
ad957ab06f72 tuned signature: more operations;
wenzelm
parents: 75569
diff changeset
    44
  val write: Path.T -> T -> unit
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    45
end;
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    46
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    47
structure Bytes: BYTES =
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    48
struct
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    49
75576
8c5eedb6c983 tuned comments;
wenzelm
parents: 75575
diff changeset
    50
(* primitive operations *)
8c5eedb6c983 tuned comments;
wenzelm
parents: 75575
diff changeset
    51
80877
e55723709fab less ambitious Bytes.chunk_size for potentially more stable Poly/ML GC (see f0d8f659b19a, but also java.io.InputStream.DEFAULT_BUFFER_SIZE);
wenzelm
parents: 80803
diff changeset
    52
val chunk_size = 16384;
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    53
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    54
abstype T = Bytes of
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    55
  {buffer: string list, chunks: string list, m: int (*buffer size*), n: int (*chunks size*)}
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    56
with
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    57
80565
2acdaa0a7d29 clarified signature, following Isabelle/Scala;
wenzelm
parents: 77847
diff changeset
    58
fun size (Bytes {m, n, ...}) = m + n;
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    59
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    60
val compact = implode o rev;
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    61
75603
fc8d64a578e4 more operations;
wenzelm
parents: 75602
diff changeset
    62
fun eq (Bytes {buffer, chunks, m, n}, Bytes {buffer = buffer', chunks = chunks', m = m', n = n'}) =
fc8d64a578e4 more operations;
wenzelm
parents: 75602
diff changeset
    63
  m = m' andalso n = n' andalso chunks = chunks' andalso
fc8d64a578e4 more operations;
wenzelm
parents: 75602
diff changeset
    64
  (buffer = buffer' orelse compact buffer = compact buffer);
fc8d64a578e4 more operations;
wenzelm
parents: 75602
diff changeset
    65
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    66
fun contents (Bytes {buffer, chunks, ...}) =
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    67
  rev (chunks |> not (null buffer) ? cons (compact buffer));
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    68
80794
d4c489401844 avoid redundant XML.blob: Bytes.contents consist of larger chunks;
wenzelm
parents: 80567
diff changeset
    69
val contents_blob = contents #> map XML.Text;
75577
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    70
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    71
val content = implode o contents;
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    72
80565
2acdaa0a7d29 clarified signature, following Isabelle/Scala;
wenzelm
parents: 77847
diff changeset
    73
fun is_empty bytes = size bytes = 0;
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    74
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    75
val empty = Bytes {buffer = [], chunks = [], m = 0, n = 0};
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    76
75577
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    77
fun build (f: T -> T) = f empty;
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    78
75568
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    79
fun add_substring s (bytes as Bytes {buffer, chunks, m, n}) =
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    80
  if Substring.isEmpty s then bytes
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    81
  else
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    82
    let val l = Substring.size s in
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    83
      if l + m < chunk_size
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    84
      then Bytes {buffer = Substring.string s :: buffer, chunks = chunks, m = l + m, n = n}
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    85
      else
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    86
        let
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    87
          val k = chunk_size - m;
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    88
          val chunk = compact (Substring.string (Substring.slice (s, 0, SOME k)) :: buffer);
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    89
          val s' = Substring.slice (s, k, SOME (l - k));
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    90
          val bytes' = Bytes {buffer = [], chunks = chunk :: chunks, m = 0, n = chunk_size + n};
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    91
        in add_substring s' bytes' end
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    92
    end;
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    93
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    94
val add = add_substring o Substring.full;
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    95
75577
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    96
fun exists_string pred (Bytes {buffer, chunks, ...}) =
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    97
  let val ex = (exists o Library.exists_string) pred
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    98
  in ex buffer orelse ex chunks end;
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    99
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   100
fun forall_string pred = not o exists_string (not o pred);
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   101
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   102
fun last_string (Bytes {buffer, chunks, ...}) =
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   103
  (case buffer of
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   104
    s :: _ => Library.last_string s
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   105
  | [] =>
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   106
      (case chunks of
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   107
        s :: _ => Library.last_string s
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   108
      | [] => NONE));
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   109
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   110
fun trim_line (bytes as Bytes {buffer, chunks, ...}) =
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   111
  let
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   112
    val is_line =
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   113
      (case last_string bytes of
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   114
        SOME s => Symbol.is_ascii_line_terminator s
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   115
      | NONE => false);
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   116
  in
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   117
    if is_line then
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   118
      let
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   119
        val (last_chunk, chunks') =
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   120
          (case chunks of
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   121
            [] => ("", [])
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   122
          | c :: cs => (c, cs));
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   123
        val trimed = Library.trim_line (last_chunk ^ compact buffer);
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   124
      in build (fold_rev add chunks' #> add trimed) end
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   125
    else bytes
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   126
  end;
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   127
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
   128
end;
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
   129
75576
8c5eedb6c983 tuned comments;
wenzelm
parents: 75575
diff changeset
   130
8c5eedb6c983 tuned comments;
wenzelm
parents: 75575
diff changeset
   131
(* derived operations *)
8c5eedb6c983 tuned comments;
wenzelm
parents: 75575
diff changeset
   132
75577
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   133
fun beginning n bytes =
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   134
  let
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   135
    val dots = " ...";
80565
2acdaa0a7d29 clarified signature, following Isabelle/Scala;
wenzelm
parents: 77847
diff changeset
   136
    val m = (String.maxSize - String.size dots) div chunk_size;
75577
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   137
    val a = implode (take m (contents bytes));
80565
2acdaa0a7d29 clarified signature, following Isabelle/Scala;
wenzelm
parents: 77847
diff changeset
   138
    val b = String.substring (a, 0, Int.min (n, String.size a));
2acdaa0a7d29 clarified signature, following Isabelle/Scala;
wenzelm
parents: 77847
diff changeset
   139
  in if String.size b < size bytes then b ^ dots else b end;
75577
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   140
75574
5945c6f5126a clarified signature: more operations;
wenzelm
parents: 75573
diff changeset
   141
fun append bytes1 bytes2 =  (*left-associative*)
5945c6f5126a clarified signature: more operations;
wenzelm
parents: 75573
diff changeset
   142
  if is_empty bytes1 then bytes2
5945c6f5126a clarified signature: more operations;
wenzelm
parents: 75573
diff changeset
   143
  else if is_empty bytes2 then bytes1
5945c6f5126a clarified signature: more operations;
wenzelm
parents: 75573
diff changeset
   144
  else bytes1 |> fold add (contents bytes2);
5945c6f5126a clarified signature: more operations;
wenzelm
parents: 75573
diff changeset
   145
75603
fc8d64a578e4 more operations;
wenzelm
parents: 75602
diff changeset
   146
val appends = build o fold (fn b => fn a => append a b);
fc8d64a578e4 more operations;
wenzelm
parents: 75602
diff changeset
   147
75577
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   148
val string = build o add;
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
   149
75577
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   150
val newline = string "\n";
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
   151
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
   152
val buffer = build o fold add o Buffer.contents;
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
   153
75592
f72b88f2842c tuned comments;
wenzelm
parents: 75581
diff changeset
   154
75594
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   155
(* space_explode *)
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   156
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   157
fun space_explode sep bytes =
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   158
  if is_empty bytes then []
80565
2acdaa0a7d29 clarified signature, following Isabelle/Scala;
wenzelm
parents: 77847
diff changeset
   159
  else if String.size sep <> 1 then [content bytes]
75594
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   160
  else
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   161
    let
77847
3bb6468d202e more operations, following Isabelle/ML conventions;
wenzelm
parents: 76277
diff changeset
   162
      val sep_char = String.nth sep 0;
75594
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   163
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   164
      fun add_part s part =
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   165
        Buffer.add (Substring.string s) (the_default Buffer.empty part);
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   166
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   167
      fun explode head tail part res =
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   168
        if Substring.isEmpty head then
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   169
          (case tail of
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   170
            [] =>
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   171
              (case part of
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   172
                NONE => rev res
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   173
              | SOME buf => rev (Buffer.content buf :: res))
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   174
          | chunk :: chunks => explode (Substring.full chunk) chunks part res)
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   175
        else
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   176
          (case Substring.fields (fn c => c = sep_char) head of
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   177
            [a] => explode Substring.empty tail (SOME (add_part a part)) res
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   178
          | a :: rest =>
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   179
              let
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   180
                val (bs, c) = split_last rest;
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   181
                val res' = res
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   182
                  |> cons (Buffer.content (add_part a part))
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   183
                  |> fold (cons o Substring.string) bs;
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   184
                val part' = SOME (add_part c NONE);
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   185
              in explode Substring.empty tail part' res' end)
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   186
    in explode Substring.empty (contents bytes) NONE [] end;
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   187
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   188
val split_lines = space_explode "\n";
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   189
75596
7ff9745609d7 more operations;
wenzelm
parents: 75594
diff changeset
   190
val trim_split_lines = trim_line #> split_lines #> map Library.trim_line;
7ff9745609d7 more operations;
wenzelm
parents: 75594
diff changeset
   191
75594
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   192
fun cat_lines lines = build (fold add (separate "\n" lines));
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   193
82088
555c95138e26 more operations;
wenzelm
parents: 80877
diff changeset
   194
fun terminate_lines lines = build (fold (fn line => add line #> add "\n") lines);
555c95138e26 more operations;
wenzelm
parents: 80877
diff changeset
   195
75594
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   196
75592
f72b88f2842c tuned comments;
wenzelm
parents: 75581
diff changeset
   197
(* IO *)
f72b88f2842c tuned comments;
wenzelm
parents: 75581
diff changeset
   198
75577
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   199
fun read_block limit =
75615
4494cd69f97f clarified modules;
wenzelm
parents: 75603
diff changeset
   200
  File_Stream.input_size (if limit < 0 then chunk_size else Int.min (chunk_size, limit));
75573
4fc8a35579b2 tuned signature;
wenzelm
parents: 75572
diff changeset
   201
75577
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   202
fun read_stream limit stream =
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
   203
  let
75577
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   204
    fun read bytes =
80565
2acdaa0a7d29 clarified signature, following Isabelle/Scala;
wenzelm
parents: 77847
diff changeset
   205
      (case read_block (limit - size bytes) stream of
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
   206
        "" => bytes
75577
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   207
      | s => read (add s bytes))
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   208
  in read empty end;
75570
ad957ab06f72 tuned signature: more operations;
wenzelm
parents: 75569
diff changeset
   209
75602
7a0d4c126f79 proper execution of Bytes.write;
wenzelm
parents: 75596
diff changeset
   210
fun write_stream stream bytes =
75615
4494cd69f97f clarified modules;
wenzelm
parents: 75603
diff changeset
   211
  File_Stream.outputs stream (contents bytes);
75570
ad957ab06f72 tuned signature: more operations;
wenzelm
parents: 75569
diff changeset
   212
75615
4494cd69f97f clarified modules;
wenzelm
parents: 75603
diff changeset
   213
fun read path = File_Stream.open_input (fn stream => read_stream ~1 stream) path;
75602
7a0d4c126f79 proper execution of Bytes.write;
wenzelm
parents: 75596
diff changeset
   214
75615
4494cd69f97f clarified modules;
wenzelm
parents: 75603
diff changeset
   215
fun write path bytes = File_Stream.open_output (fn stream => write_stream stream bytes) path;
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
   216
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
   217
end;