| author | haftmann | 
| Fri, 18 Oct 2019 18:41:33 +0000 | |
| changeset 70901 | 94a0c47b8553 | 
| parent 70292 | bc9d02f916c4 | 
| child 70998 | 7926d2fc3c4c | 
| permissions | -rw-r--r-- | 
| 6118 | 1  | 
(* Title: Pure/General/file.ML  | 
| 64698 | 2  | 
Author: Makarius  | 
| 5009 | 3  | 
|
| 64698 | 4  | 
File-system operations.  | 
| 5009 | 5  | 
*)  | 
6  | 
||
7  | 
signature FILE =  | 
|
8  | 
sig  | 
|
| 
60970
 
e08d868ceca9
clarified File.standard_path vs. File.platform_path (like Isabelle/Scala operations);
 
wenzelm 
parents: 
59058 
diff
changeset
 | 
9  | 
val standard_path: Path.T -> string  | 
| 16261 | 10  | 
val platform_path: Path.T -> string  | 
| 
62549
 
9498623b27f0
File.bash_string operations in ML as in Scala -- exclusively for GNU bash, not perl and not user output;
 
wenzelm 
parents: 
62468 
diff
changeset
 | 
11  | 
val bash_path: Path.T -> string  | 
| 69223 | 12  | 
val bash_paths: Path.T list -> string  | 
| 
42003
 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 
wenzelm 
parents: 
41944 
diff
changeset
 | 
13  | 
val full_path: Path.T -> Path.T -> Path.T  | 
| 6182 | 14  | 
val tmp_path: Path.T -> Path.T  | 
| 17826 | 15  | 
val exists: Path.T -> bool  | 
16  | 
val rm: Path.T -> unit  | 
|
| 40785 | 17  | 
val is_dir: Path.T -> bool  | 
| 
69300
 
8b6ab9989bcd
is_file/is_dir/read_dir: more uniform treatment of errors and boundary cases, notably for symlinks in ssh;
 
wenzelm 
parents: 
69223 
diff
changeset
 | 
18  | 
val is_file: Path.T -> bool  | 
| 
42003
 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 
wenzelm 
parents: 
41944 
diff
changeset
 | 
19  | 
val check_dir: Path.T -> Path.T  | 
| 
 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 
wenzelm 
parents: 
41944 
diff
changeset
 | 
20  | 
val check_file: Path.T -> Path.T  | 
| 43848 | 21  | 
val open_dir: (OS.FileSys.dirstream -> 'a) -> Path.T -> 'a  | 
| 
60982
 
67e389f67073
precise BinIO, without newline conversion on Windows;
 
wenzelm 
parents: 
60970 
diff
changeset
 | 
22  | 
val open_input: (BinIO.instream -> 'a) -> Path.T -> 'a  | 
| 
 
67e389f67073
precise BinIO, without newline conversion on Windows;
 
wenzelm 
parents: 
60970 
diff
changeset
 | 
23  | 
val open_output: (BinIO.outstream -> 'a) -> Path.T -> 'a  | 
| 
 
67e389f67073
precise BinIO, without newline conversion on Windows;
 
wenzelm 
parents: 
60970 
diff
changeset
 | 
24  | 
val open_append: (BinIO.outstream -> 'a) -> Path.T -> 'a  | 
| 43848 | 25  | 
val fold_dir: (string -> 'a -> 'a) -> Path.T -> 'a -> 'a  | 
26  | 
val read_dir: Path.T -> string list  | 
|
| 28028 | 27  | 
val fold_lines: (string -> 'a -> 'a) -> Path.T -> 'a -> 'a  | 
| 
43845
 
d89353d17f54
added File.fold_pages for streaming of large files;
 
wenzelm 
parents: 
43616 
diff
changeset
 | 
28  | 
val fold_pages: (string -> 'a -> 'a) -> Path.T -> 'a -> 'a  | 
| 43848 | 29  | 
val read_lines: Path.T -> string list  | 
| 44879 | 30  | 
val read_pages: Path.T -> string list  | 
| 6182 | 31  | 
val read: Path.T -> string  | 
32  | 
val write: Path.T -> string -> unit  | 
|
33  | 
val append: Path.T -> string -> unit  | 
|
| 69223 | 34  | 
val generate: Path.T -> string -> unit  | 
| 
60982
 
67e389f67073
precise BinIO, without newline conversion on Windows;
 
wenzelm 
parents: 
60970 
diff
changeset
 | 
35  | 
val output: BinIO.outstream -> string -> unit  | 
| 16713 | 36  | 
val write_list: Path.T -> string list -> unit  | 
37  | 
val append_list: Path.T -> string list -> unit  | 
|
| 28028 | 38  | 
val write_buffer: Path.T -> Buffer.T -> unit  | 
| 16603 | 39  | 
val eq: Path.T * Path.T -> bool  | 
| 5009 | 40  | 
end;  | 
41  | 
||
42  | 
structure File: FILE =  | 
|
43  | 
struct  | 
|
44  | 
||
| 
26503
 
4dec4460244f
discontinued unused hooks explode_platform_path_fn, platform_path_fn, shell_path_fn;
 
wenzelm 
parents: 
26220 
diff
changeset
 | 
45  | 
(* system path representations *)  | 
| 6224 | 46  | 
|
| 
60970
 
e08d868ceca9
clarified File.standard_path vs. File.platform_path (like Isabelle/Scala operations);
 
wenzelm 
parents: 
59058 
diff
changeset
 | 
47  | 
val standard_path = Path.implode o Path.expand;  | 
| 62468 | 48  | 
val platform_path = ML_System.platform_path o standard_path;  | 
| 
26980
 
f7f48bb9a025
ident: naive caching prevents potentially slow external invocations;
 
wenzelm 
parents: 
26946 
diff
changeset
 | 
49  | 
|
| 64304 | 50  | 
val bash_path = Bash_Syntax.string o standard_path;  | 
| 69223 | 51  | 
val bash_paths = Bash_Syntax.strings o map standard_path;  | 
| 6224 | 52  | 
|
| 70292 | 53  | 
|
| 
42003
 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 
wenzelm 
parents: 
41944 
diff
changeset
 | 
54  | 
(* full_path *)  | 
| 
 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 
wenzelm 
parents: 
41944 
diff
changeset
 | 
55  | 
|
| 
 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 
wenzelm 
parents: 
41944 
diff
changeset
 | 
56  | 
fun full_path dir path =  | 
| 
 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 
wenzelm 
parents: 
41944 
diff
changeset
 | 
57  | 
let  | 
| 
 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 
wenzelm 
parents: 
41944 
diff
changeset
 | 
58  | 
val path' = Path.expand path;  | 
| 
 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 
wenzelm 
parents: 
41944 
diff
changeset
 | 
59  | 
val _ = Path.is_current path' andalso error "Bad file specification";  | 
| 
 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 
wenzelm 
parents: 
41944 
diff
changeset
 | 
60  | 
val path'' = Path.append dir path';  | 
| 
 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 
wenzelm 
parents: 
41944 
diff
changeset
 | 
61  | 
in  | 
| 
 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 
wenzelm 
parents: 
41944 
diff
changeset
 | 
62  | 
if Path.is_absolute path'' then path''  | 
| 62551 | 63  | 
else Path.append (Path.explode (ML_System.standard_path (OS.FileSys.getDir ()))) path''  | 
| 
42003
 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 
wenzelm 
parents: 
41944 
diff
changeset
 | 
64  | 
end;  | 
| 5009 | 65  | 
|
| 6182 | 66  | 
|
67  | 
(* tmp_path *)  | 
|
68  | 
||
69  | 
fun tmp_path path =  | 
|
70  | 
Path.append (Path.variable "ISABELLE_TMP") (Path.base path);  | 
|
| 5009 | 71  | 
|
72  | 
||
| 
26980
 
f7f48bb9a025
ident: naive caching prevents potentially slow external invocations;
 
wenzelm 
parents: 
26946 
diff
changeset
 | 
73  | 
(* directory entries *)  | 
| 
 
f7f48bb9a025
ident: naive caching prevents potentially slow external invocations;
 
wenzelm 
parents: 
26946 
diff
changeset
 | 
74  | 
|
| 
23861
 
72bb3494746f
replaced info by ident (for full identification, potentially content-based);
 
wenzelm 
parents: 
23826 
diff
changeset
 | 
75  | 
val exists = can OS.FileSys.modTime o platform_path;  | 
| 16261 | 76  | 
|
77  | 
val rm = OS.FileSys.remove o platform_path;  | 
|
78  | 
||
| 
69300
 
8b6ab9989bcd
is_file/is_dir/read_dir: more uniform treatment of errors and boundary cases, notably for symlinks in ssh;
 
wenzelm 
parents: 
69223 
diff
changeset
 | 
79  | 
fun test_dir path = the_default false (try OS.FileSys.isDir (platform_path path));  | 
| 
 
8b6ab9989bcd
is_file/is_dir/read_dir: more uniform treatment of errors and boundary cases, notably for symlinks in ssh;
 
wenzelm 
parents: 
69223 
diff
changeset
 | 
80  | 
fun is_dir path = exists path andalso test_dir path;  | 
| 
 
8b6ab9989bcd
is_file/is_dir/read_dir: more uniform treatment of errors and boundary cases, notably for symlinks in ssh;
 
wenzelm 
parents: 
69223 
diff
changeset
 | 
81  | 
fun is_file path = exists path andalso not (test_dir path);  | 
| 40785 | 82  | 
|
| 
42003
 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 
wenzelm 
parents: 
41944 
diff
changeset
 | 
83  | 
fun check_dir path =  | 
| 
69300
 
8b6ab9989bcd
is_file/is_dir/read_dir: more uniform treatment of errors and boundary cases, notably for symlinks in ssh;
 
wenzelm 
parents: 
69223 
diff
changeset
 | 
84  | 
if is_dir path then path  | 
| 63668 | 85  | 
  else error ("No such directory: " ^ Path.print (Path.expand path));
 | 
| 
42003
 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 
wenzelm 
parents: 
41944 
diff
changeset
 | 
86  | 
|
| 
 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 
wenzelm 
parents: 
41944 
diff
changeset
 | 
87  | 
fun check_file path =  | 
| 
69300
 
8b6ab9989bcd
is_file/is_dir/read_dir: more uniform treatment of errors and boundary cases, notably for symlinks in ssh;
 
wenzelm 
parents: 
69223 
diff
changeset
 | 
88  | 
if is_file path then path  | 
| 63668 | 89  | 
  else error ("No such file: " ^ Path.print (Path.expand path));
 | 
| 
42003
 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 
wenzelm 
parents: 
41944 
diff
changeset
 | 
90  | 
|
| 16261 | 91  | 
|
| 43848 | 92  | 
(* open streams *)  | 
| 6224 | 93  | 
|
| 16261 | 94  | 
local  | 
95  | 
||
| 
69483
 
023d92df3d84
more robust open/close bracket, but with potential danger of blocking indefinitely in uninterruptible state;
 
wenzelm 
parents: 
69427 
diff
changeset
 | 
96  | 
fun with_file open_file close_file f =  | 
| 
 
023d92df3d84
more robust open/close bracket, but with potential danger of blocking indefinitely in uninterruptible state;
 
wenzelm 
parents: 
69427 
diff
changeset
 | 
97  | 
Thread_Attributes.uninterruptible (fn restore_attributes => fn path =>  | 
| 
 
023d92df3d84
more robust open/close bracket, but with potential danger of blocking indefinitely in uninterruptible state;
 
wenzelm 
parents: 
69427 
diff
changeset
 | 
98  | 
let  | 
| 
 
023d92df3d84
more robust open/close bracket, but with potential danger of blocking indefinitely in uninterruptible state;
 
wenzelm 
parents: 
69427 
diff
changeset
 | 
99  | 
val file = open_file path;  | 
| 
 
023d92df3d84
more robust open/close bracket, but with potential danger of blocking indefinitely in uninterruptible state;
 
wenzelm 
parents: 
69427 
diff
changeset
 | 
100  | 
val result = Exn.capture (restore_attributes f) file;  | 
| 
 
023d92df3d84
more robust open/close bracket, but with potential danger of blocking indefinitely in uninterruptible state;
 
wenzelm 
parents: 
69427 
diff
changeset
 | 
101  | 
in close_file file; Exn.release result end);  | 
| 6224 | 102  | 
|
| 16261 | 103  | 
in  | 
| 6218 | 104  | 
|
| 43848 | 105  | 
fun open_dir f = with_file OS.FileSys.openDir OS.FileSys.closeDir f o platform_path;  | 
| 
60982
 
67e389f67073
precise BinIO, without newline conversion on Windows;
 
wenzelm 
parents: 
60970 
diff
changeset
 | 
106  | 
fun open_input f = with_file BinIO.openIn BinIO.closeIn f o platform_path;  | 
| 
 
67e389f67073
precise BinIO, without newline conversion on Windows;
 
wenzelm 
parents: 
60970 
diff
changeset
 | 
107  | 
fun open_output f = with_file BinIO.openOut BinIO.closeOut f o platform_path;  | 
| 
 
67e389f67073
precise BinIO, without newline conversion on Windows;
 
wenzelm 
parents: 
60970 
diff
changeset
 | 
108  | 
fun open_append f = with_file BinIO.openAppend BinIO.closeOut f o platform_path;  | 
| 6224 | 109  | 
|
| 28028 | 110  | 
end;  | 
111  | 
||
112  | 
||
| 43848 | 113  | 
(* directory content *)  | 
114  | 
||
| 
69300
 
8b6ab9989bcd
is_file/is_dir/read_dir: more uniform treatment of errors and boundary cases, notably for symlinks in ssh;
 
wenzelm 
parents: 
69223 
diff
changeset
 | 
115  | 
fun fold_dir f path a =  | 
| 
 
8b6ab9989bcd
is_file/is_dir/read_dir: more uniform treatment of errors and boundary cases, notably for symlinks in ssh;
 
wenzelm 
parents: 
69223 
diff
changeset
 | 
116  | 
check_dir path |> open_dir (fn stream =>  | 
| 
 
8b6ab9989bcd
is_file/is_dir/read_dir: more uniform treatment of errors and boundary cases, notably for symlinks in ssh;
 
wenzelm 
parents: 
69223 
diff
changeset
 | 
117  | 
let  | 
| 
 
8b6ab9989bcd
is_file/is_dir/read_dir: more uniform treatment of errors and boundary cases, notably for symlinks in ssh;
 
wenzelm 
parents: 
69223 
diff
changeset
 | 
118  | 
fun read x =  | 
| 
 
8b6ab9989bcd
is_file/is_dir/read_dir: more uniform treatment of errors and boundary cases, notably for symlinks in ssh;
 
wenzelm 
parents: 
69223 
diff
changeset
 | 
119  | 
(case OS.FileSys.readDir stream of  | 
| 
 
8b6ab9989bcd
is_file/is_dir/read_dir: more uniform treatment of errors and boundary cases, notably for symlinks in ssh;
 
wenzelm 
parents: 
69223 
diff
changeset
 | 
120  | 
NONE => x  | 
| 
 
8b6ab9989bcd
is_file/is_dir/read_dir: more uniform treatment of errors and boundary cases, notably for symlinks in ssh;
 
wenzelm 
parents: 
69223 
diff
changeset
 | 
121  | 
| SOME entry => read (f entry x));  | 
| 
 
8b6ab9989bcd
is_file/is_dir/read_dir: more uniform treatment of errors and boundary cases, notably for symlinks in ssh;
 
wenzelm 
parents: 
69223 
diff
changeset
 | 
122  | 
in read a end);  | 
| 43848 | 123  | 
|
| 
69427
 
ff2f39a221d4
clarified operations: uniform sorting of results;
 
wenzelm 
parents: 
69300 
diff
changeset
 | 
124  | 
fun read_dir path = sort_strings (fold_dir cons path []);  | 
| 43848 | 125  | 
|
126  | 
||
| 28028 | 127  | 
(* input *)  | 
128  | 
||
| 44879 | 129  | 
(*  | 
130  | 
scalable iterator:  | 
|
131  | 
. avoid size limit of TextIO.inputAll and overhead of many TextIO.inputLine  | 
|
| 
48911
 
5debc3e4fa81
tuned messages: end-of-input rarely means physical end-of-file from the past;
 
wenzelm 
parents: 
48446 
diff
changeset
 | 
132  | 
. optional terminator at end-of-input  | 
| 44879 | 133  | 
*)  | 
| 
43845
 
d89353d17f54
added File.fold_pages for streaming of large files;
 
wenzelm 
parents: 
43616 
diff
changeset
 | 
134  | 
fun fold_chunks terminator f path a = open_input (fn file =>  | 
| 28028 | 135  | 
let  | 
| 
28510
 
66b95e857bde
fold_lines: more tuning, avoiding extra split_last;
 
wenzelm 
parents: 
28500 
diff
changeset
 | 
136  | 
fun read buf x =  | 
| 
60982
 
67e389f67073
precise BinIO, without newline conversion on Windows;
 
wenzelm 
parents: 
60970 
diff
changeset
 | 
137  | 
(case Byte.bytesToString (BinIO.input file) of  | 
| 
28510
 
66b95e857bde
fold_lines: more tuning, avoiding extra split_last;
 
wenzelm 
parents: 
28500 
diff
changeset
 | 
138  | 
"" => (case Buffer.content buf of "" => x | line => f line x)  | 
| 
 
66b95e857bde
fold_lines: more tuning, avoiding extra split_last;
 
wenzelm 
parents: 
28500 
diff
changeset
 | 
139  | 
| input =>  | 
| 
43845
 
d89353d17f54
added File.fold_pages for streaming of large files;
 
wenzelm 
parents: 
43616 
diff
changeset
 | 
140  | 
(case String.fields (fn c => c = terminator) input of  | 
| 
28510
 
66b95e857bde
fold_lines: more tuning, avoiding extra split_last;
 
wenzelm 
parents: 
28500 
diff
changeset
 | 
141  | 
[rest] => read (Buffer.add rest buf) x  | 
| 
43616
 
9e237a9dc1fd
reverted 782991e4180d: fold_fields was never used
 
noschinl 
parents: 
42329 
diff
changeset
 | 
142  | 
| line :: more => read_lines more (f (Buffer.content (Buffer.add line buf)) x)))  | 
| 
 
9e237a9dc1fd
reverted 782991e4180d: fold_fields was never used
 
noschinl 
parents: 
42329 
diff
changeset
 | 
143  | 
and read_lines [rest] x = read (Buffer.add rest Buffer.empty) x  | 
| 
 
9e237a9dc1fd
reverted 782991e4180d: fold_fields was never used
 
noschinl 
parents: 
42329 
diff
changeset
 | 
144  | 
| read_lines (line :: more) x = read_lines more (f line x);  | 
| 28028 | 145  | 
in read Buffer.empty a end) path;  | 
146  | 
||
| 
43845
 
d89353d17f54
added File.fold_pages for streaming of large files;
 
wenzelm 
parents: 
43616 
diff
changeset
 | 
147  | 
fun fold_lines f = fold_chunks #"\n" f;  | 
| 
 
d89353d17f54
added File.fold_pages for streaming of large files;
 
wenzelm 
parents: 
43616 
diff
changeset
 | 
148  | 
fun fold_pages f = fold_chunks #"\f" f;  | 
| 
 
d89353d17f54
added File.fold_pages for streaming of large files;
 
wenzelm 
parents: 
43616 
diff
changeset
 | 
149  | 
|
| 43848 | 150  | 
fun read_lines path = rev (fold_lines cons path []);  | 
| 44879 | 151  | 
fun read_pages path = rev (fold_pages cons path []);  | 
| 43848 | 152  | 
|
| 
60982
 
67e389f67073
precise BinIO, without newline conversion on Windows;
 
wenzelm 
parents: 
60970 
diff
changeset
 | 
153  | 
val read = open_input (Byte.bytesToString o BinIO.inputAll);  | 
| 5009 | 154  | 
|
| 28028 | 155  | 
|
156  | 
(* output *)  | 
|
157  | 
||
| 
60982
 
67e389f67073
precise BinIO, without newline conversion on Windows;
 
wenzelm 
parents: 
60970 
diff
changeset
 | 
158  | 
fun output file txt = BinIO.output (file, Byte.stringToBytes txt);  | 
| 28028 | 159  | 
|
| 
60982
 
67e389f67073
precise BinIO, without newline conversion on Windows;
 
wenzelm 
parents: 
60970 
diff
changeset
 | 
160  | 
fun output_list txts file = List.app (output file) txts;  | 
| 
 
67e389f67073
precise BinIO, without newline conversion on Windows;
 
wenzelm 
parents: 
60970 
diff
changeset
 | 
161  | 
fun write_list path txts = open_output (output_list txts) path;  | 
| 
 
67e389f67073
precise BinIO, without newline conversion on Windows;
 
wenzelm 
parents: 
60970 
diff
changeset
 | 
162  | 
fun append_list path txts = open_append (output_list txts) path;  | 
| 16713 | 163  | 
|
164  | 
fun write path txt = write_list path [txt];  | 
|
165  | 
fun append path txt = append_list path [txt];  | 
|
| 69223 | 166  | 
fun generate path txt = if try read path = SOME txt then () else write path txt;  | 
| 6182 | 167  | 
|
| 28028 | 168  | 
fun write_buffer path buf = open_output (Buffer.output buf) path;  | 
169  | 
||
170  | 
||
| 
56533
 
cd8b6d849b6a
explicit 'document_files' in session ROOT specifications;
 
wenzelm 
parents: 
48911 
diff
changeset
 | 
171  | 
(* eq *)  | 
| 5009 | 172  | 
|
| 16603 | 173  | 
fun eq paths =  | 
| 
59058
 
a78612c67ec0
renamed "pairself" to "apply2", in accordance to @{apply 2};
 
wenzelm 
parents: 
56533 
diff
changeset
 | 
174  | 
(case try (apply2 (OS.FileSys.fileId o platform_path)) paths of  | 
| 26656 | 175  | 
SOME ids => is_equal (OS.FileSys.compare ids)  | 
| 16603 | 176  | 
| NONE => false);  | 
177  | 
||
| 5009 | 178  | 
end;  |