| author | wenzelm | 
| Wed, 02 Apr 2025 13:39:12 +0200 | |
| changeset 82413 | a6046b6d23b4 | 
| parent 82088 | 555c95138e26 | 
| permissions | -rw-r--r-- | 
| 
75567
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
1  | 
(* Title: Pure/General/bytes.ML  | 
| 
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
2  | 
Author: Makarius  | 
| 
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
3  | 
|
| 
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
4  | 
Scalable byte strings, with incremental construction (add content to the  | 
| 75572 | 5  | 
end).  | 
6  | 
||
7  | 
Note: type string is implicitly limited by String.maxSize (approx. 64 MB on  | 
|
8  | 
64_32 architecture).  | 
|
| 
75567
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
9  | 
*)  | 
| 
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
10  | 
|
| 
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
11  | 
signature BYTES =  | 
| 
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
12  | 
sig  | 
| 
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
13  | 
val chunk_size: int  | 
| 
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
14  | 
type T  | 
| 75603 | 15  | 
val eq: T * T -> bool  | 
| 80565 | 16  | 
val size: T -> int  | 
| 
75567
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
17  | 
val contents: T -> string list  | 
| 
75577
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
18  | 
val contents_blob: T -> XML.body  | 
| 
75567
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
19  | 
val content: T -> string  | 
| 
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
20  | 
val is_empty: T -> bool  | 
| 
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
21  | 
val empty: T  | 
| 
75577
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
22  | 
val build: (T -> T) -> T  | 
| 
75568
 
a8539b1c8775
clarified signature: avoid repeated string copying via Substring.slice;
 
wenzelm 
parents: 
75567 
diff
changeset
 | 
23  | 
val add_substring: substring -> T -> T  | 
| 
75567
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
24  | 
val add: string -> T -> T  | 
| 
75577
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
25  | 
val beginning: int -> T -> string  | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
26  | 
val exists_string: (string -> bool) -> T -> bool  | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
27  | 
val forall_string: (string -> bool) -> T -> bool  | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
28  | 
val last_string: T -> string option  | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
29  | 
val trim_line: T -> T  | 
| 75574 | 30  | 
val append: T -> T -> T  | 
| 75603 | 31  | 
val appends: T list -> T  | 
| 
75567
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
32  | 
val string: string -> T  | 
| 
75577
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
33  | 
val newline: T  | 
| 
75567
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
34  | 
val buffer: Buffer.T -> T  | 
| 75594 | 35  | 
val space_explode: string -> T -> string list  | 
36  | 
val split_lines: T -> string list  | 
|
| 75596 | 37  | 
val trim_split_lines: T -> string list  | 
| 75594 | 38  | 
val cat_lines: string list -> T  | 
| 82088 | 39  | 
val terminate_lines: string list -> T  | 
| 
75577
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
40  | 
val read_block: int -> BinIO.instream -> string  | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
41  | 
val read_stream: int -> BinIO.instream -> T  | 
| 75570 | 42  | 
val write_stream: BinIO.outstream -> T -> unit  | 
| 
75567
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
43  | 
val read: Path.T -> T  | 
| 75570 | 44  | 
val write: Path.T -> T -> unit  | 
| 
75567
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
45  | 
end;  | 
| 
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
46  | 
|
| 
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
47  | 
structure Bytes: BYTES =  | 
| 
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
48  | 
struct  | 
| 
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
49  | 
|
| 75576 | 50  | 
(* primitive operations *)  | 
51  | 
||
| 
80877
 
e55723709fab
less ambitious Bytes.chunk_size for potentially more stable Poly/ML GC (see f0d8f659b19a, but also java.io.InputStream.DEFAULT_BUFFER_SIZE);
 
wenzelm 
parents: 
80803 
diff
changeset
 | 
52  | 
val chunk_size = 16384;  | 
| 
75567
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
53  | 
|
| 
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
54  | 
abstype T = Bytes of  | 
| 
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
55  | 
  {buffer: string list, chunks: string list, m: int (*buffer size*), n: int (*chunks size*)}
 | 
| 
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
56  | 
with  | 
| 
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
57  | 
|
| 80565 | 58  | 
fun size (Bytes {m, n, ...}) = m + n;
 | 
| 
75567
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
59  | 
|
| 
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
60  | 
val compact = implode o rev;  | 
| 
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
61  | 
|
| 75603 | 62  | 
fun eq (Bytes {buffer, chunks, m, n}, Bytes {buffer = buffer', chunks = chunks', m = m', n = n'}) =
 | 
63  | 
m = m' andalso n = n' andalso chunks = chunks' andalso  | 
|
64  | 
(buffer = buffer' orelse compact buffer = compact buffer);  | 
|
65  | 
||
| 
75567
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
66  | 
fun contents (Bytes {buffer, chunks, ...}) =
 | 
| 
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
67  | 
rev (chunks |> not (null buffer) ? cons (compact buffer));  | 
| 
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
68  | 
|
| 
80794
 
d4c489401844
avoid redundant XML.blob: Bytes.contents consist of larger chunks;
 
wenzelm 
parents: 
80567 
diff
changeset
 | 
69  | 
val contents_blob = contents #> map XML.Text;  | 
| 
75577
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
70  | 
|
| 
75567
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
71  | 
val content = implode o contents;  | 
| 
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
72  | 
|
| 80565 | 73  | 
fun is_empty bytes = size bytes = 0;  | 
| 
75567
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
74  | 
|
| 
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
75  | 
val empty = Bytes {buffer = [], chunks = [], m = 0, n = 0};
 | 
| 
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
76  | 
|
| 
75577
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
77  | 
fun build (f: T -> T) = f empty;  | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
78  | 
|
| 
75568
 
a8539b1c8775
clarified signature: avoid repeated string copying via Substring.slice;
 
wenzelm 
parents: 
75567 
diff
changeset
 | 
79  | 
fun add_substring s (bytes as Bytes {buffer, chunks, m, n}) =
 | 
| 
 
a8539b1c8775
clarified signature: avoid repeated string copying via Substring.slice;
 
wenzelm 
parents: 
75567 
diff
changeset
 | 
80  | 
if Substring.isEmpty s then bytes  | 
| 
 
a8539b1c8775
clarified signature: avoid repeated string copying via Substring.slice;
 
wenzelm 
parents: 
75567 
diff
changeset
 | 
81  | 
else  | 
| 
 
a8539b1c8775
clarified signature: avoid repeated string copying via Substring.slice;
 
wenzelm 
parents: 
75567 
diff
changeset
 | 
82  | 
let val l = Substring.size s in  | 
| 
 
a8539b1c8775
clarified signature: avoid repeated string copying via Substring.slice;
 
wenzelm 
parents: 
75567 
diff
changeset
 | 
83  | 
if l + m < chunk_size  | 
| 
 
a8539b1c8775
clarified signature: avoid repeated string copying via Substring.slice;
 
wenzelm 
parents: 
75567 
diff
changeset
 | 
84  | 
      then Bytes {buffer = Substring.string s :: buffer, chunks = chunks, m = l + m, n = n}
 | 
| 
 
a8539b1c8775
clarified signature: avoid repeated string copying via Substring.slice;
 
wenzelm 
parents: 
75567 
diff
changeset
 | 
85  | 
else  | 
| 
 
a8539b1c8775
clarified signature: avoid repeated string copying via Substring.slice;
 
wenzelm 
parents: 
75567 
diff
changeset
 | 
86  | 
let  | 
| 
 
a8539b1c8775
clarified signature: avoid repeated string copying via Substring.slice;
 
wenzelm 
parents: 
75567 
diff
changeset
 | 
87  | 
val k = chunk_size - m;  | 
| 
 
a8539b1c8775
clarified signature: avoid repeated string copying via Substring.slice;
 
wenzelm 
parents: 
75567 
diff
changeset
 | 
88  | 
val chunk = compact (Substring.string (Substring.slice (s, 0, SOME k)) :: buffer);  | 
| 
 
a8539b1c8775
clarified signature: avoid repeated string copying via Substring.slice;
 
wenzelm 
parents: 
75567 
diff
changeset
 | 
89  | 
val s' = Substring.slice (s, k, SOME (l - k));  | 
| 
 
a8539b1c8775
clarified signature: avoid repeated string copying via Substring.slice;
 
wenzelm 
parents: 
75567 
diff
changeset
 | 
90  | 
          val bytes' = Bytes {buffer = [], chunks = chunk :: chunks, m = 0, n = chunk_size + n};
 | 
| 
 
a8539b1c8775
clarified signature: avoid repeated string copying via Substring.slice;
 
wenzelm 
parents: 
75567 
diff
changeset
 | 
91  | 
in add_substring s' bytes' end  | 
| 
 
a8539b1c8775
clarified signature: avoid repeated string copying via Substring.slice;
 
wenzelm 
parents: 
75567 
diff
changeset
 | 
92  | 
end;  | 
| 
 
a8539b1c8775
clarified signature: avoid repeated string copying via Substring.slice;
 
wenzelm 
parents: 
75567 
diff
changeset
 | 
93  | 
|
| 
 
a8539b1c8775
clarified signature: avoid repeated string copying via Substring.slice;
 
wenzelm 
parents: 
75567 
diff
changeset
 | 
94  | 
val add = add_substring o Substring.full;  | 
| 
75567
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
95  | 
|
| 
75577
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
96  | 
fun exists_string pred (Bytes {buffer, chunks, ...}) =
 | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
97  | 
let val ex = (exists o Library.exists_string) pred  | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
98  | 
in ex buffer orelse ex chunks end;  | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
99  | 
|
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
100  | 
fun forall_string pred = not o exists_string (not o pred);  | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
101  | 
|
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
102  | 
fun last_string (Bytes {buffer, chunks, ...}) =
 | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
103  | 
(case buffer of  | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
104  | 
s :: _ => Library.last_string s  | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
105  | 
| [] =>  | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
106  | 
(case chunks of  | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
107  | 
s :: _ => Library.last_string s  | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
108  | 
| [] => NONE));  | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
109  | 
|
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
110  | 
fun trim_line (bytes as Bytes {buffer, chunks, ...}) =
 | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
111  | 
let  | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
112  | 
val is_line =  | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
113  | 
(case last_string bytes of  | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
114  | 
SOME s => Symbol.is_ascii_line_terminator s  | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
115  | 
| NONE => false);  | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
116  | 
in  | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
117  | 
if is_line then  | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
118  | 
let  | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
119  | 
val (last_chunk, chunks') =  | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
120  | 
(case chunks of  | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
121  | 
            [] => ("", [])
 | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
122  | 
| c :: cs => (c, cs));  | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
123  | 
val trimed = Library.trim_line (last_chunk ^ compact buffer);  | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
124  | 
in build (fold_rev add chunks' #> add trimed) end  | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
125  | 
else bytes  | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
126  | 
end;  | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
127  | 
|
| 
75567
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
128  | 
end;  | 
| 
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
129  | 
|
| 75576 | 130  | 
|
131  | 
(* derived operations *)  | 
|
132  | 
||
| 
75577
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
133  | 
fun beginning n bytes =  | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
134  | 
let  | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
135  | 
val dots = " ...";  | 
| 80565 | 136  | 
val m = (String.maxSize - String.size dots) div chunk_size;  | 
| 
75577
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
137  | 
val a = implode (take m (contents bytes));  | 
| 80565 | 138  | 
val b = String.substring (a, 0, Int.min (n, String.size a));  | 
139  | 
in if String.size b < size bytes then b ^ dots else b end;  | 
|
| 
75577
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
140  | 
|
| 75574 | 141  | 
fun append bytes1 bytes2 = (*left-associative*)  | 
142  | 
if is_empty bytes1 then bytes2  | 
|
143  | 
else if is_empty bytes2 then bytes1  | 
|
144  | 
else bytes1 |> fold add (contents bytes2);  | 
|
145  | 
||
| 75603 | 146  | 
val appends = build o fold (fn b => fn a => append a b);  | 
147  | 
||
| 
75577
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
148  | 
val string = build o add;  | 
| 
75567
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
149  | 
|
| 
75577
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
150  | 
val newline = string "\n";  | 
| 
75567
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
151  | 
|
| 
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
152  | 
val buffer = build o fold add o Buffer.contents;  | 
| 
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
153  | 
|
| 75592 | 154  | 
|
| 75594 | 155  | 
(* space_explode *)  | 
156  | 
||
157  | 
fun space_explode sep bytes =  | 
|
158  | 
if is_empty bytes then []  | 
|
| 80565 | 159  | 
else if String.size sep <> 1 then [content bytes]  | 
| 75594 | 160  | 
else  | 
161  | 
let  | 
|
| 
77847
 
3bb6468d202e
more operations, following Isabelle/ML conventions;
 
wenzelm 
parents: 
76277 
diff
changeset
 | 
162  | 
val sep_char = String.nth sep 0;  | 
| 75594 | 163  | 
|
164  | 
fun add_part s part =  | 
|
165  | 
Buffer.add (Substring.string s) (the_default Buffer.empty part);  | 
|
166  | 
||
167  | 
fun explode head tail part res =  | 
|
168  | 
if Substring.isEmpty head then  | 
|
169  | 
(case tail of  | 
|
170  | 
[] =>  | 
|
171  | 
(case part of  | 
|
172  | 
NONE => rev res  | 
|
173  | 
| SOME buf => rev (Buffer.content buf :: res))  | 
|
174  | 
| chunk :: chunks => explode (Substring.full chunk) chunks part res)  | 
|
175  | 
else  | 
|
176  | 
(case Substring.fields (fn c => c = sep_char) head of  | 
|
177  | 
[a] => explode Substring.empty tail (SOME (add_part a part)) res  | 
|
178  | 
| a :: rest =>  | 
|
179  | 
let  | 
|
180  | 
val (bs, c) = split_last rest;  | 
|
181  | 
val res' = res  | 
|
182  | 
|> cons (Buffer.content (add_part a part))  | 
|
183  | 
|> fold (cons o Substring.string) bs;  | 
|
184  | 
val part' = SOME (add_part c NONE);  | 
|
185  | 
in explode Substring.empty tail part' res' end)  | 
|
186  | 
in explode Substring.empty (contents bytes) NONE [] end;  | 
|
187  | 
||
188  | 
val split_lines = space_explode "\n";  | 
|
189  | 
||
| 75596 | 190  | 
val trim_split_lines = trim_line #> split_lines #> map Library.trim_line;  | 
191  | 
||
| 75594 | 192  | 
fun cat_lines lines = build (fold add (separate "\n" lines));  | 
193  | 
||
| 82088 | 194  | 
fun terminate_lines lines = build (fold (fn line => add line #> add "\n") lines);  | 
195  | 
||
| 75594 | 196  | 
|
| 75592 | 197  | 
(* IO *)  | 
198  | 
||
| 
75577
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
199  | 
fun read_block limit =  | 
| 75615 | 200  | 
File_Stream.input_size (if limit < 0 then chunk_size else Int.min (chunk_size, limit));  | 
| 75573 | 201  | 
|
| 
75577
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
202  | 
fun read_stream limit stream =  | 
| 
75567
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
203  | 
let  | 
| 
75577
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
204  | 
fun read bytes =  | 
| 80565 | 205  | 
(case read_block (limit - size bytes) stream of  | 
| 
75567
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
206  | 
"" => bytes  | 
| 
75577
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
207  | 
| s => read (add s bytes))  | 
| 
 
c51e1cef1eae
more scalable byte messages, notably for Scala functions in ML;
 
wenzelm 
parents: 
75576 
diff
changeset
 | 
208  | 
in read empty end;  | 
| 75570 | 209  | 
|
| 75602 | 210  | 
fun write_stream stream bytes =  | 
| 75615 | 211  | 
File_Stream.outputs stream (contents bytes);  | 
| 75570 | 212  | 
|
| 75615 | 213  | 
fun read path = File_Stream.open_input (fn stream => read_stream ~1 stream) path;  | 
| 75602 | 214  | 
|
| 75615 | 215  | 
fun write path bytes = File_Stream.open_output (fn stream => write_stream stream bytes) path;  | 
| 
75567
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
216  | 
|
| 
 
94321fd25c8a
support for scalable byte strings, with incremental construction;
 
wenzelm 
parents:  
diff
changeset
 | 
217  | 
end;  |