| author | wenzelm | 
| Wed, 18 Dec 2019 15:10:13 +0100 | |
| changeset 71305 | 2f7da37bab52 | 
| parent 70998 | 7926d2fc3c4c | 
| child 72278 | 199dc903131b | 
| permissions | -rw-r--r-- | 
| 6118 | 1 | (* Title: Pure/General/file.ML | 
| 64698 | 2 | Author: Makarius | 
| 5009 | 3 | |
| 64698 | 4 | File-system operations. | 
| 5009 | 5 | *) | 
| 6 | ||
| 7 | signature FILE = | |
| 8 | sig | |
| 60970 
e08d868ceca9
clarified File.standard_path vs. File.platform_path (like Isabelle/Scala operations);
 wenzelm parents: 
59058diff
changeset | 9 | val standard_path: Path.T -> string | 
| 16261 | 10 | val platform_path: Path.T -> string | 
| 62549 
9498623b27f0
File.bash_string operations in ML as in Scala -- exclusively for GNU bash, not perl and not user output;
 wenzelm parents: 
62468diff
changeset | 11 | val bash_path: Path.T -> string | 
| 69223 | 12 | val bash_paths: Path.T list -> string | 
| 42003 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 wenzelm parents: 
41944diff
changeset | 13 | val full_path: Path.T -> Path.T -> Path.T | 
| 6182 | 14 | val tmp_path: Path.T -> Path.T | 
| 17826 | 15 | val exists: Path.T -> bool | 
| 16 | val rm: Path.T -> unit | |
| 40785 | 17 | val is_dir: Path.T -> bool | 
| 69300 
8b6ab9989bcd
is_file/is_dir/read_dir: more uniform treatment of errors and boundary cases, notably for symlinks in ssh;
 wenzelm parents: 
69223diff
changeset | 18 | val is_file: Path.T -> bool | 
| 42003 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 wenzelm parents: 
41944diff
changeset | 19 | val check_dir: Path.T -> Path.T | 
| 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 wenzelm parents: 
41944diff
changeset | 20 | val check_file: Path.T -> Path.T | 
| 43848 | 21 | val open_dir: (OS.FileSys.dirstream -> 'a) -> Path.T -> 'a | 
| 60982 
67e389f67073
precise BinIO, without newline conversion on Windows;
 wenzelm parents: 
60970diff
changeset | 22 | val open_input: (BinIO.instream -> 'a) -> Path.T -> 'a | 
| 
67e389f67073
precise BinIO, without newline conversion on Windows;
 wenzelm parents: 
60970diff
changeset | 23 | val open_output: (BinIO.outstream -> 'a) -> Path.T -> 'a | 
| 
67e389f67073
precise BinIO, without newline conversion on Windows;
 wenzelm parents: 
60970diff
changeset | 24 | val open_append: (BinIO.outstream -> 'a) -> Path.T -> 'a | 
| 43848 | 25 | val fold_dir: (string -> 'a -> 'a) -> Path.T -> 'a -> 'a | 
| 26 | val read_dir: Path.T -> string list | |
| 28028 | 27 | val fold_lines: (string -> 'a -> 'a) -> Path.T -> 'a -> 'a | 
| 43845 
d89353d17f54
added File.fold_pages for streaming of large files;
 wenzelm parents: 
43616diff
changeset | 28 | val fold_pages: (string -> 'a -> 'a) -> Path.T -> 'a -> 'a | 
| 43848 | 29 | val read_lines: Path.T -> string list | 
| 44879 | 30 | val read_pages: Path.T -> string list | 
| 6182 | 31 | val read: Path.T -> string | 
| 32 | val write: Path.T -> string -> unit | |
| 33 | val append: Path.T -> string -> unit | |
| 69223 | 34 | val generate: Path.T -> string -> unit | 
| 60982 
67e389f67073
precise BinIO, without newline conversion on Windows;
 wenzelm parents: 
60970diff
changeset | 35 | val output: BinIO.outstream -> string -> unit | 
| 16713 | 36 | val write_list: Path.T -> string list -> unit | 
| 37 | val append_list: Path.T -> string list -> unit | |
| 28028 | 38 | val write_buffer: Path.T -> Buffer.T -> unit | 
| 16603 | 39 | val eq: Path.T * Path.T -> bool | 
| 5009 | 40 | end; | 
| 41 | ||
| 42 | structure File: FILE = | |
| 43 | struct | |
| 44 | ||
| 26503 
4dec4460244f
discontinued unused hooks explode_platform_path_fn, platform_path_fn, shell_path_fn;
 wenzelm parents: 
26220diff
changeset | 45 | (* system path representations *) | 
| 6224 | 46 | |
| 60970 
e08d868ceca9
clarified File.standard_path vs. File.platform_path (like Isabelle/Scala operations);
 wenzelm parents: 
59058diff
changeset | 47 | val standard_path = Path.implode o Path.expand; | 
| 62468 | 48 | val platform_path = ML_System.platform_path o standard_path; | 
| 26980 
f7f48bb9a025
ident: naive caching prevents potentially slow external invocations;
 wenzelm parents: 
26946diff
changeset | 49 | |
| 64304 | 50 | val bash_path = Bash_Syntax.string o standard_path; | 
| 69223 | 51 | val bash_paths = Bash_Syntax.strings o map standard_path; | 
| 6224 | 52 | |
| 70292 | 53 | |
| 42003 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 wenzelm parents: 
41944diff
changeset | 54 | (* full_path *) | 
| 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 wenzelm parents: 
41944diff
changeset | 55 | |
| 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 wenzelm parents: 
41944diff
changeset | 56 | fun full_path dir path = | 
| 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 wenzelm parents: 
41944diff
changeset | 57 | let | 
| 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 wenzelm parents: 
41944diff
changeset | 58 | val path' = Path.expand path; | 
| 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 wenzelm parents: 
41944diff
changeset | 59 | val _ = Path.is_current path' andalso error "Bad file specification"; | 
| 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 wenzelm parents: 
41944diff
changeset | 60 | val path'' = Path.append dir path'; | 
| 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 wenzelm parents: 
41944diff
changeset | 61 | in | 
| 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 wenzelm parents: 
41944diff
changeset | 62 | if Path.is_absolute path'' then path'' | 
| 62551 | 63 | else Path.append (Path.explode (ML_System.standard_path (OS.FileSys.getDir ()))) path'' | 
| 42003 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 wenzelm parents: 
41944diff
changeset | 64 | end; | 
| 5009 | 65 | |
| 6182 | 66 | |
| 67 | (* tmp_path *) | |
| 68 | ||
| 69 | fun tmp_path path = | |
| 70 | Path.append (Path.variable "ISABELLE_TMP") (Path.base path); | |
| 5009 | 71 | |
| 72 | ||
| 26980 
f7f48bb9a025
ident: naive caching prevents potentially slow external invocations;
 wenzelm parents: 
26946diff
changeset | 73 | (* directory entries *) | 
| 
f7f48bb9a025
ident: naive caching prevents potentially slow external invocations;
 wenzelm parents: 
26946diff
changeset | 74 | |
| 23861 
72bb3494746f
replaced info by ident (for full identification, potentially content-based);
 wenzelm parents: 
23826diff
changeset | 75 | val exists = can OS.FileSys.modTime o platform_path; | 
| 16261 | 76 | |
| 77 | val rm = OS.FileSys.remove o platform_path; | |
| 78 | ||
| 69300 
8b6ab9989bcd
is_file/is_dir/read_dir: more uniform treatment of errors and boundary cases, notably for symlinks in ssh;
 wenzelm parents: 
69223diff
changeset | 79 | fun test_dir path = the_default false (try OS.FileSys.isDir (platform_path path)); | 
| 
8b6ab9989bcd
is_file/is_dir/read_dir: more uniform treatment of errors and boundary cases, notably for symlinks in ssh;
 wenzelm parents: 
69223diff
changeset | 80 | fun is_dir path = exists path andalso test_dir path; | 
| 
8b6ab9989bcd
is_file/is_dir/read_dir: more uniform treatment of errors and boundary cases, notably for symlinks in ssh;
 wenzelm parents: 
69223diff
changeset | 81 | fun is_file path = exists path andalso not (test_dir path); | 
| 40785 | 82 | |
| 42003 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 wenzelm parents: 
41944diff
changeset | 83 | fun check_dir path = | 
| 69300 
8b6ab9989bcd
is_file/is_dir/read_dir: more uniform treatment of errors and boundary cases, notably for symlinks in ssh;
 wenzelm parents: 
69223diff
changeset | 84 | if is_dir path then path | 
| 63668 | 85 |   else error ("No such directory: " ^ Path.print (Path.expand path));
 | 
| 42003 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 wenzelm parents: 
41944diff
changeset | 86 | |
| 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 wenzelm parents: 
41944diff
changeset | 87 | fun check_file path = | 
| 69300 
8b6ab9989bcd
is_file/is_dir/read_dir: more uniform treatment of errors and boundary cases, notably for symlinks in ssh;
 wenzelm parents: 
69223diff
changeset | 88 | if is_file path then path | 
| 63668 | 89 |   else error ("No such file: " ^ Path.print (Path.expand path));
 | 
| 42003 
6e45dc518ebb
replaced File.check by specific File.check_file, File.check_dir;
 wenzelm parents: 
41944diff
changeset | 90 | |
| 16261 | 91 | |
| 43848 | 92 | (* open streams *) | 
| 6224 | 93 | |
| 16261 | 94 | local | 
| 95 | ||
| 69483 
023d92df3d84
more robust open/close bracket, but with potential danger of blocking indefinitely in uninterruptible state;
 wenzelm parents: 
69427diff
changeset | 96 | fun with_file open_file close_file f = | 
| 
023d92df3d84
more robust open/close bracket, but with potential danger of blocking indefinitely in uninterruptible state;
 wenzelm parents: 
69427diff
changeset | 97 | Thread_Attributes.uninterruptible (fn restore_attributes => fn path => | 
| 
023d92df3d84
more robust open/close bracket, but with potential danger of blocking indefinitely in uninterruptible state;
 wenzelm parents: 
69427diff
changeset | 98 | let | 
| 
023d92df3d84
more robust open/close bracket, but with potential danger of blocking indefinitely in uninterruptible state;
 wenzelm parents: 
69427diff
changeset | 99 | val file = open_file path; | 
| 
023d92df3d84
more robust open/close bracket, but with potential danger of blocking indefinitely in uninterruptible state;
 wenzelm parents: 
69427diff
changeset | 100 | val result = Exn.capture (restore_attributes f) file; | 
| 
023d92df3d84
more robust open/close bracket, but with potential danger of blocking indefinitely in uninterruptible state;
 wenzelm parents: 
69427diff
changeset | 101 | in close_file file; Exn.release result end); | 
| 6224 | 102 | |
| 16261 | 103 | in | 
| 6218 | 104 | |
| 43848 | 105 | fun open_dir f = with_file OS.FileSys.openDir OS.FileSys.closeDir f o platform_path; | 
| 60982 
67e389f67073
precise BinIO, without newline conversion on Windows;
 wenzelm parents: 
60970diff
changeset | 106 | fun open_input f = with_file BinIO.openIn BinIO.closeIn f o platform_path; | 
| 
67e389f67073
precise BinIO, without newline conversion on Windows;
 wenzelm parents: 
60970diff
changeset | 107 | fun open_output f = with_file BinIO.openOut BinIO.closeOut f o platform_path; | 
| 
67e389f67073
precise BinIO, without newline conversion on Windows;
 wenzelm parents: 
60970diff
changeset | 108 | fun open_append f = with_file BinIO.openAppend BinIO.closeOut f o platform_path; | 
| 6224 | 109 | |
| 28028 | 110 | end; | 
| 111 | ||
| 112 | ||
| 43848 | 113 | (* directory content *) | 
| 114 | ||
| 69300 
8b6ab9989bcd
is_file/is_dir/read_dir: more uniform treatment of errors and boundary cases, notably for symlinks in ssh;
 wenzelm parents: 
69223diff
changeset | 115 | fun fold_dir f path a = | 
| 
8b6ab9989bcd
is_file/is_dir/read_dir: more uniform treatment of errors and boundary cases, notably for symlinks in ssh;
 wenzelm parents: 
69223diff
changeset | 116 | check_dir path |> open_dir (fn stream => | 
| 
8b6ab9989bcd
is_file/is_dir/read_dir: more uniform treatment of errors and boundary cases, notably for symlinks in ssh;
 wenzelm parents: 
69223diff
changeset | 117 | let | 
| 
8b6ab9989bcd
is_file/is_dir/read_dir: more uniform treatment of errors and boundary cases, notably for symlinks in ssh;
 wenzelm parents: 
69223diff
changeset | 118 | fun read x = | 
| 
8b6ab9989bcd
is_file/is_dir/read_dir: more uniform treatment of errors and boundary cases, notably for symlinks in ssh;
 wenzelm parents: 
69223diff
changeset | 119 | (case OS.FileSys.readDir stream of | 
| 
8b6ab9989bcd
is_file/is_dir/read_dir: more uniform treatment of errors and boundary cases, notably for symlinks in ssh;
 wenzelm parents: 
69223diff
changeset | 120 | NONE => x | 
| 
8b6ab9989bcd
is_file/is_dir/read_dir: more uniform treatment of errors and boundary cases, notably for symlinks in ssh;
 wenzelm parents: 
69223diff
changeset | 121 | | SOME entry => read (f entry x)); | 
| 
8b6ab9989bcd
is_file/is_dir/read_dir: more uniform treatment of errors and boundary cases, notably for symlinks in ssh;
 wenzelm parents: 
69223diff
changeset | 122 | in read a end); | 
| 43848 | 123 | |
| 69427 
ff2f39a221d4
clarified operations: uniform sorting of results;
 wenzelm parents: 
69300diff
changeset | 124 | fun read_dir path = sort_strings (fold_dir cons path []); | 
| 43848 | 125 | |
| 126 | ||
| 28028 | 127 | (* input *) | 
| 128 | ||
| 44879 | 129 | (* | 
| 130 | scalable iterator: | |
| 131 | . avoid size limit of TextIO.inputAll and overhead of many TextIO.inputLine | |
| 48911 
5debc3e4fa81
tuned messages: end-of-input rarely means physical end-of-file from the past;
 wenzelm parents: 
48446diff
changeset | 132 | . optional terminator at end-of-input | 
| 44879 | 133 | *) | 
| 43845 
d89353d17f54
added File.fold_pages for streaming of large files;
 wenzelm parents: 
43616diff
changeset | 134 | fun fold_chunks terminator f path a = open_input (fn file => | 
| 28028 | 135 | let | 
| 28510 
66b95e857bde
fold_lines: more tuning, avoiding extra split_last;
 wenzelm parents: 
28500diff
changeset | 136 | fun read buf x = | 
| 60982 
67e389f67073
precise BinIO, without newline conversion on Windows;
 wenzelm parents: 
60970diff
changeset | 137 | (case Byte.bytesToString (BinIO.input file) of | 
| 28510 
66b95e857bde
fold_lines: more tuning, avoiding extra split_last;
 wenzelm parents: 
28500diff
changeset | 138 | "" => (case Buffer.content buf of "" => x | line => f line x) | 
| 
66b95e857bde
fold_lines: more tuning, avoiding extra split_last;
 wenzelm parents: 
28500diff
changeset | 139 | | input => | 
| 43845 
d89353d17f54
added File.fold_pages for streaming of large files;
 wenzelm parents: 
43616diff
changeset | 140 | (case String.fields (fn c => c = terminator) input of | 
| 28510 
66b95e857bde
fold_lines: more tuning, avoiding extra split_last;
 wenzelm parents: 
28500diff
changeset | 141 | [rest] => read (Buffer.add rest buf) x | 
| 43616 
9e237a9dc1fd
reverted 782991e4180d: fold_fields was never used
 noschinl parents: 
42329diff
changeset | 142 | | line :: more => read_lines more (f (Buffer.content (Buffer.add line buf)) x))) | 
| 
9e237a9dc1fd
reverted 782991e4180d: fold_fields was never used
 noschinl parents: 
42329diff
changeset | 143 | and read_lines [rest] x = read (Buffer.add rest Buffer.empty) x | 
| 
9e237a9dc1fd
reverted 782991e4180d: fold_fields was never used
 noschinl parents: 
42329diff
changeset | 144 | | read_lines (line :: more) x = read_lines more (f line x); | 
| 28028 | 145 | in read Buffer.empty a end) path; | 
| 146 | ||
| 43845 
d89353d17f54
added File.fold_pages for streaming of large files;
 wenzelm parents: 
43616diff
changeset | 147 | fun fold_lines f = fold_chunks #"\n" f; | 
| 
d89353d17f54
added File.fold_pages for streaming of large files;
 wenzelm parents: 
43616diff
changeset | 148 | fun fold_pages f = fold_chunks #"\f" f; | 
| 
d89353d17f54
added File.fold_pages for streaming of large files;
 wenzelm parents: 
43616diff
changeset | 149 | |
| 43848 | 150 | fun read_lines path = rev (fold_lines cons path []); | 
| 44879 | 151 | fun read_pages path = rev (fold_pages cons path []); | 
| 43848 | 152 | |
| 60982 
67e389f67073
precise BinIO, without newline conversion on Windows;
 wenzelm parents: 
60970diff
changeset | 153 | val read = open_input (Byte.bytesToString o BinIO.inputAll); | 
| 5009 | 154 | |
| 28028 | 155 | |
| 156 | (* output *) | |
| 157 | ||
| 60982 
67e389f67073
precise BinIO, without newline conversion on Windows;
 wenzelm parents: 
60970diff
changeset | 158 | fun output file txt = BinIO.output (file, Byte.stringToBytes txt); | 
| 28028 | 159 | |
| 60982 
67e389f67073
precise BinIO, without newline conversion on Windows;
 wenzelm parents: 
60970diff
changeset | 160 | fun output_list txts file = List.app (output file) txts; | 
| 
67e389f67073
precise BinIO, without newline conversion on Windows;
 wenzelm parents: 
60970diff
changeset | 161 | fun write_list path txts = open_output (output_list txts) path; | 
| 
67e389f67073
precise BinIO, without newline conversion on Windows;
 wenzelm parents: 
60970diff
changeset | 162 | fun append_list path txts = open_append (output_list txts) path; | 
| 16713 | 163 | |
| 164 | fun write path txt = write_list path [txt]; | |
| 165 | fun append path txt = append_list path [txt]; | |
| 69223 | 166 | fun generate path txt = if try read path = SOME txt then () else write path txt; | 
| 6182 | 167 | |
| 70998 
7926d2fc3c4c
back to more elementary Buffer.T -- less intermediate garbage;
 wenzelm parents: 
70292diff
changeset | 168 | fun write_buffer path buf = open_output (Buffer.output buf o output) path; | 
| 28028 | 169 | |
| 170 | ||
| 56533 
cd8b6d849b6a
explicit 'document_files' in session ROOT specifications;
 wenzelm parents: 
48911diff
changeset | 171 | (* eq *) | 
| 5009 | 172 | |
| 16603 | 173 | fun eq paths = | 
| 59058 
a78612c67ec0
renamed "pairself" to "apply2", in accordance to @{apply 2};
 wenzelm parents: 
56533diff
changeset | 174 | (case try (apply2 (OS.FileSys.fileId o platform_path)) paths of | 
| 26656 | 175 | SOME ids => is_equal (OS.FileSys.compare ids) | 
| 16603 | 176 | | NONE => false); | 
| 177 | ||
| 5009 | 178 | end; |