src/Pure/General/bytes.ML
author wenzelm
Thu, 23 Jun 2022 21:25:56 +0200
changeset 75603 fc8d64a578e4
parent 75602 7a0d4c126f79
child 75615 4494cd69f97f
permissions -rw-r--r--
more operations;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
     1
(*  Title:      Pure/General/bytes.ML
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
     3
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
     4
Scalable byte strings, with incremental construction (add content to the
75572
833f26c3a3af tuned comments;
wenzelm
parents: 75571
diff changeset
     5
end).
833f26c3a3af tuned comments;
wenzelm
parents: 75571
diff changeset
     6
833f26c3a3af tuned comments;
wenzelm
parents: 75571
diff changeset
     7
Note: type string is implicitly limited by String.maxSize (approx. 64 MB on
833f26c3a3af tuned comments;
wenzelm
parents: 75571
diff changeset
     8
64_32 architecture).
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
     9
*)
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    10
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    11
signature BYTES =
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    12
sig
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    13
  val chunk_size: int
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    14
  type T
75603
fc8d64a578e4 more operations;
wenzelm
parents: 75602
diff changeset
    15
  val eq: T * T -> bool
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    16
  val length: T -> int
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    17
  val contents: T -> string list
75577
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    18
  val contents_blob: T -> XML.body
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    19
  val content: T -> string
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    20
  val is_empty: T -> bool
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    21
  val empty: T
75577
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    22
  val build: (T -> T) -> T
75568
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    23
  val add_substring: substring -> T -> T
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    24
  val add: string -> T -> T
75577
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    25
  val beginning: int -> T -> string
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    26
  val exists_string: (string -> bool) -> T -> bool
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    27
  val forall_string: (string -> bool) -> T -> bool
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    28
  val last_string: T -> string option
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    29
  val trim_line: T -> T
75574
5945c6f5126a clarified signature: more operations;
wenzelm
parents: 75573
diff changeset
    30
  val append: T -> T -> T
75603
fc8d64a578e4 more operations;
wenzelm
parents: 75602
diff changeset
    31
  val appends: T list -> T
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    32
  val string: string -> T
75577
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    33
  val newline: T
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    34
  val buffer: Buffer.T -> T
75594
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
    35
  val space_explode: string -> T -> string list
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
    36
  val split_lines: T -> string list
75596
7ff9745609d7 more operations;
wenzelm
parents: 75594
diff changeset
    37
  val trim_split_lines: T -> string list
75594
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
    38
  val cat_lines: string list -> T
75577
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    39
  val read_block: int -> BinIO.instream -> string
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    40
  val read_stream: int -> BinIO.instream -> T
75570
ad957ab06f72 tuned signature: more operations;
wenzelm
parents: 75569
diff changeset
    41
  val write_stream: BinIO.outstream -> T -> unit
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    42
  val read: Path.T -> T
75570
ad957ab06f72 tuned signature: more operations;
wenzelm
parents: 75569
diff changeset
    43
  val write: Path.T -> T -> unit
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    44
end;
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    45
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    46
structure Bytes: BYTES =
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    47
struct
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    48
75576
8c5eedb6c983 tuned comments;
wenzelm
parents: 75575
diff changeset
    49
(* primitive operations *)
8c5eedb6c983 tuned comments;
wenzelm
parents: 75575
diff changeset
    50
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    51
val chunk_size = 1024 * 1024;
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    52
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    53
abstype T = Bytes of
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    54
  {buffer: string list, chunks: string list, m: int (*buffer size*), n: int (*chunks size*)}
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    55
with
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    56
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    57
fun length (Bytes {m, n, ...}) = m + n;
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    58
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    59
val compact = implode o rev;
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    60
75603
fc8d64a578e4 more operations;
wenzelm
parents: 75602
diff changeset
    61
fun eq (Bytes {buffer, chunks, m, n}, Bytes {buffer = buffer', chunks = chunks', m = m', n = n'}) =
fc8d64a578e4 more operations;
wenzelm
parents: 75602
diff changeset
    62
  m = m' andalso n = n' andalso chunks = chunks' andalso
fc8d64a578e4 more operations;
wenzelm
parents: 75602
diff changeset
    63
  (buffer = buffer' orelse compact buffer = compact buffer);
fc8d64a578e4 more operations;
wenzelm
parents: 75602
diff changeset
    64
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    65
fun contents (Bytes {buffer, chunks, ...}) =
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    66
  rev (chunks |> not (null buffer) ? cons (compact buffer));
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    67
75577
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    68
val contents_blob = contents #> XML.blob;
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    69
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    70
val content = implode o contents;
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    71
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    72
fun is_empty bytes = length bytes = 0;
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    73
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    74
val empty = Bytes {buffer = [], chunks = [], m = 0, n = 0};
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    75
75577
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    76
fun build (f: T -> T) = f empty;
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    77
75568
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    78
fun add_substring s (bytes as Bytes {buffer, chunks, m, n}) =
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    79
  if Substring.isEmpty s then bytes
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    80
  else
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    81
    let val l = Substring.size s in
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    82
      if l + m < chunk_size
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    83
      then Bytes {buffer = Substring.string s :: buffer, chunks = chunks, m = l + m, n = n}
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    84
      else
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    85
        let
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    86
          val k = chunk_size - m;
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    87
          val chunk = compact (Substring.string (Substring.slice (s, 0, SOME k)) :: buffer);
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    88
          val s' = Substring.slice (s, k, SOME (l - k));
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    89
          val bytes' = Bytes {buffer = [], chunks = chunk :: chunks, m = 0, n = chunk_size + n};
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    90
        in add_substring s' bytes' end
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    91
    end;
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    92
a8539b1c8775 clarified signature: avoid repeated string copying via Substring.slice;
wenzelm
parents: 75567
diff changeset
    93
val add = add_substring o Substring.full;
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
    94
75577
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    95
fun exists_string pred (Bytes {buffer, chunks, ...}) =
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    96
  let val ex = (exists o Library.exists_string) pred
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    97
  in ex buffer orelse ex chunks end;
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    98
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
    99
fun forall_string pred = not o exists_string (not o pred);
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   100
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   101
fun last_string (Bytes {buffer, chunks, ...}) =
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   102
  (case buffer of
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   103
    s :: _ => Library.last_string s
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   104
  | [] =>
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   105
      (case chunks of
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   106
        s :: _ => Library.last_string s
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   107
      | [] => NONE));
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   108
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   109
fun trim_line (bytes as Bytes {buffer, chunks, ...}) =
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   110
  let
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   111
    val is_line =
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   112
      (case last_string bytes of
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   113
        SOME s => Symbol.is_ascii_line_terminator s
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   114
      | NONE => false);
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   115
  in
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   116
    if is_line then
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   117
      let
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   118
        val (last_chunk, chunks') =
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   119
          (case chunks of
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   120
            [] => ("", [])
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   121
          | c :: cs => (c, cs));
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   122
        val trimed = Library.trim_line (last_chunk ^ compact buffer);
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   123
      in build (fold_rev add chunks' #> add trimed) end
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   124
    else bytes
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   125
  end;
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   126
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
   127
end;
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
   128
75576
8c5eedb6c983 tuned comments;
wenzelm
parents: 75575
diff changeset
   129
8c5eedb6c983 tuned comments;
wenzelm
parents: 75575
diff changeset
   130
(* derived operations *)
8c5eedb6c983 tuned comments;
wenzelm
parents: 75575
diff changeset
   131
75577
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   132
fun beginning n bytes =
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   133
  let
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   134
    val dots = " ...";
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   135
    val m = (String.maxSize - size dots) div chunk_size;
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   136
    val a = implode (take m (contents bytes));
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   137
    val b = String.substring (a, 0, Int.min (n, size a));
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   138
  in if size b < length bytes then b ^ dots else b end;
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   139
75574
5945c6f5126a clarified signature: more operations;
wenzelm
parents: 75573
diff changeset
   140
fun append bytes1 bytes2 =  (*left-associative*)
5945c6f5126a clarified signature: more operations;
wenzelm
parents: 75573
diff changeset
   141
  if is_empty bytes1 then bytes2
5945c6f5126a clarified signature: more operations;
wenzelm
parents: 75573
diff changeset
   142
  else if is_empty bytes2 then bytes1
5945c6f5126a clarified signature: more operations;
wenzelm
parents: 75573
diff changeset
   143
  else bytes1 |> fold add (contents bytes2);
5945c6f5126a clarified signature: more operations;
wenzelm
parents: 75573
diff changeset
   144
75603
fc8d64a578e4 more operations;
wenzelm
parents: 75602
diff changeset
   145
val appends = build o fold (fn b => fn a => append a b);
fc8d64a578e4 more operations;
wenzelm
parents: 75602
diff changeset
   146
75577
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   147
val string = build o add;
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
   148
75577
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   149
val newline = string "\n";
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
   150
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
   151
val buffer = build o fold add o Buffer.contents;
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
   152
75592
f72b88f2842c tuned comments;
wenzelm
parents: 75581
diff changeset
   153
75594
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   154
(* space_explode *)
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   155
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   156
fun space_explode sep bytes =
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   157
  if is_empty bytes then []
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   158
  else if size sep <> 1 then [content bytes]
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   159
  else
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   160
    let
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   161
      val sep_char = String.sub (sep, 0);
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   162
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   163
      fun add_part s part =
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   164
        Buffer.add (Substring.string s) (the_default Buffer.empty part);
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   165
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   166
      fun explode head tail part res =
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   167
        if Substring.isEmpty head then
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   168
          (case tail of
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   169
            [] =>
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   170
              (case part of
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   171
                NONE => rev res
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   172
              | SOME buf => rev (Buffer.content buf :: res))
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   173
          | chunk :: chunks => explode (Substring.full chunk) chunks part res)
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   174
        else
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   175
          (case Substring.fields (fn c => c = sep_char) head of
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   176
            [a] => explode Substring.empty tail (SOME (add_part a part)) res
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   177
          | a :: rest =>
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   178
              let
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   179
                val (bs, c) = split_last rest;
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   180
                val res' = res
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   181
                  |> cons (Buffer.content (add_part a part))
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   182
                  |> fold (cons o Substring.string) bs;
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   183
                val part' = SOME (add_part c NONE);
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   184
              in explode Substring.empty tail part' res' end)
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   185
    in explode Substring.empty (contents bytes) NONE [] end;
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   186
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   187
val split_lines = space_explode "\n";
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   188
75596
7ff9745609d7 more operations;
wenzelm
parents: 75594
diff changeset
   189
val trim_split_lines = trim_line #> split_lines #> map Library.trim_line;
7ff9745609d7 more operations;
wenzelm
parents: 75594
diff changeset
   190
75594
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   191
fun cat_lines lines = build (fold add (separate "\n" lines));
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   192
303f885d4a8c clarified signature: more operations;
wenzelm
parents: 75592
diff changeset
   193
75592
f72b88f2842c tuned comments;
wenzelm
parents: 75581
diff changeset
   194
(* IO *)
f72b88f2842c tuned comments;
wenzelm
parents: 75581
diff changeset
   195
75577
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   196
fun read_block limit =
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   197
  File.input_size (if limit < 0 then chunk_size else Int.min (chunk_size, limit));
75573
4fc8a35579b2 tuned signature;
wenzelm
parents: 75572
diff changeset
   198
75577
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   199
fun read_stream limit stream =
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
   200
  let
75577
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   201
    fun read bytes =
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   202
      (case read_block (limit - length bytes) stream of
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
   203
        "" => bytes
75577
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   204
      | s => read (add s bytes))
c51e1cef1eae more scalable byte messages, notably for Scala functions in ML;
wenzelm
parents: 75576
diff changeset
   205
  in read empty end;
75570
ad957ab06f72 tuned signature: more operations;
wenzelm
parents: 75569
diff changeset
   206
75602
7a0d4c126f79 proper execution of Bytes.write;
wenzelm
parents: 75596
diff changeset
   207
fun write_stream stream bytes =
7a0d4c126f79 proper execution of Bytes.write;
wenzelm
parents: 75596
diff changeset
   208
  File.outputs stream (contents bytes);
75570
ad957ab06f72 tuned signature: more operations;
wenzelm
parents: 75569
diff changeset
   209
75602
7a0d4c126f79 proper execution of Bytes.write;
wenzelm
parents: 75596
diff changeset
   210
fun read path = File.open_input (fn stream => read_stream ~1 stream) path;
7a0d4c126f79 proper execution of Bytes.write;
wenzelm
parents: 75596
diff changeset
   211
7a0d4c126f79 proper execution of Bytes.write;
wenzelm
parents: 75596
diff changeset
   212
fun write path bytes = File.open_output (fn stream => write_stream stream bytes) path;
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
   213
75576
8c5eedb6c983 tuned comments;
wenzelm
parents: 75575
diff changeset
   214
8c5eedb6c983 tuned comments;
wenzelm
parents: 75575
diff changeset
   215
(* ML pretty printing *)
8c5eedb6c983 tuned comments;
wenzelm
parents: 75575
diff changeset
   216
75575
06f8b072f28e clarified ML pretty printing;
wenzelm
parents: 75574
diff changeset
   217
val _ =
06f8b072f28e clarified ML pretty printing;
wenzelm
parents: 75574
diff changeset
   218
  ML_system_pp (fn _ => fn _ => fn bytes =>
06f8b072f28e clarified ML pretty printing;
wenzelm
parents: 75574
diff changeset
   219
    PolyML.PrettyString ("Bytes {length = " ^ string_of_int (length bytes) ^ "}"))
06f8b072f28e clarified ML pretty printing;
wenzelm
parents: 75574
diff changeset
   220
75567
94321fd25c8a support for scalable byte strings, with incremental construction;
wenzelm
parents:
diff changeset
   221
end;