| author | paulson <lp15@cam.ac.uk> | 
| Fri, 28 Feb 2025 13:50:18 +0000 | |
| changeset 82218 | cbf9f856d3e0 | 
| parent 82090 | 023845c29d48 | 
| permissions | -rw-r--r-- | 
| 
35942
 
667fd8553cd5
use internal SHA1 digest implementation for generating hash keys
 
boehmes 
parents: 
35941 
diff
changeset
 | 
1  | 
(* Title: Tools/cache_io.ML  | 
| 35151 | 2  | 
Author: Sascha Boehme, TU Muenchen  | 
3  | 
||
4  | 
Cache for output of external processes.  | 
|
5  | 
*)  | 
|
6  | 
||
7  | 
signature CACHE_IO =  | 
|
8  | 
sig  | 
|
9  | 
type cache  | 
|
| 
50317
 
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
 
wenzelm 
parents: 
50316 
diff
changeset
 | 
10  | 
val unsynchronized_init: Path.T -> cache  | 
| 35151 | 11  | 
val cache_path_of: cache -> Path.T  | 
| 
82087
 
aee15b076916
clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
 
wenzelm 
parents: 
82076 
diff
changeset
 | 
12  | 
val lookup: cache -> string -> Process_Result.T option * string  | 
| 
82090
 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 
wenzelm 
parents: 
82087 
diff
changeset
 | 
13  | 
val run: Bash.params -> string -> Process_Result.T  | 
| 
 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 
wenzelm 
parents: 
82087 
diff
changeset
 | 
14  | 
val run_and_cache: cache -> string -> Bash.params -> string -> Process_Result.T  | 
| 
 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 
wenzelm 
parents: 
82087 
diff
changeset
 | 
15  | 
val run_cached: cache -> Bash.params -> string -> Process_Result.T  | 
| 35151 | 16  | 
end  | 
17  | 
||
18  | 
structure Cache_IO : CACHE_IO =  | 
|
19  | 
struct  | 
|
20  | 
||
21  | 
abstype cache = Cache of {
 | 
|
22  | 
path: Path.T,  | 
|
23  | 
table: (int * (int * int * int) Symtab.table) Synchronized.var }  | 
|
24  | 
with  | 
|
25  | 
||
26  | 
fun cache_path_of (Cache {path, ...}) = path
 | 
|
27  | 
||
| 
50317
 
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
 
wenzelm 
parents: 
50316 
diff
changeset
 | 
28  | 
fun unsynchronized_init cache_path =  | 
| 35151 | 29  | 
let  | 
| 50316 | 30  | 
val table =  | 
31  | 
if File.exists cache_path then  | 
|
32  | 
let  | 
|
| 
82076
 
265aec8a81e4
proper Path.print for user output (amending 9498623b27f0);
 
wenzelm 
parents: 
78629 
diff
changeset
 | 
33  | 
          fun err () = error ("Cache IO: corrupted cache file: " ^ Path.print cache_path)
 | 
| 35151 | 34  | 
|
| 50316 | 35  | 
fun int_of_string s =  | 
36  | 
(case read_int (raw_explode s) of  | 
|
37  | 
(i, []) => i  | 
|
38  | 
| _ => err ())  | 
|
| 35151 | 39  | 
|
| 50316 | 40  | 
fun split line =  | 
41  | 
(case space_explode " " line of  | 
|
42  | 
[key, len1, len2] => (key, int_of_string len1, int_of_string len2)  | 
|
43  | 
| _ => err ())  | 
|
| 35151 | 44  | 
|
| 50316 | 45  | 
fun parse line ((i, l), tab) =  | 
46  | 
if i = l  | 
|
47  | 
then  | 
|
48  | 
let val (key, l1, l2) = split line  | 
|
49  | 
in ((i+1, l+l1+l2+1), Symtab.update (key, (i+1, l1, l2)) tab) end  | 
|
50  | 
else ((i+1, l), tab)  | 
|
| 
75616
 
986506233812
clarified signature: File.read_lines is based on scalable Bytes.T;
 
wenzelm 
parents: 
75614 
diff
changeset
 | 
51  | 
in apfst fst (fold parse (File.read_lines cache_path) ((1, 1), Symtab.empty)) end  | 
| 50316 | 52  | 
else (1, Symtab.empty)  | 
53  | 
  in Cache {path = cache_path, table = Synchronized.var "Cache_IO" table} end
 | 
|
| 35151 | 54  | 
|
| 50316 | 55  | 
fun lookup (Cache {path = cache_path, table}) str =
 | 
| 41954 | 56  | 
let val key = SHA1.rep (SHA1.digest str)  | 
| 35151 | 57  | 
in  | 
| 
50317
 
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
 
wenzelm 
parents: 
50316 
diff
changeset
 | 
58  | 
Synchronized.change_result table (fn tab =>  | 
| 
 
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
 
wenzelm 
parents: 
50316 
diff
changeset
 | 
59  | 
(case Symtab.lookup (snd tab) key of  | 
| 
 
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
 
wenzelm 
parents: 
50316 
diff
changeset
 | 
60  | 
NONE => ((NONE, key), tab)  | 
| 
 
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
 
wenzelm 
parents: 
50316 
diff
changeset
 | 
61  | 
| SOME (p, len1, len2) =>  | 
| 
 
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
 
wenzelm 
parents: 
50316 
diff
changeset
 | 
62  | 
let  | 
| 
 
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
 
wenzelm 
parents: 
50316 
diff
changeset
 | 
63  | 
fun load line (i, xsp) =  | 
| 
 
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
 
wenzelm 
parents: 
50316 
diff
changeset
 | 
64  | 
if i < p then (i+1, xsp)  | 
| 
 
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
 
wenzelm 
parents: 
50316 
diff
changeset
 | 
65  | 
else if i < p + len1 then (i+1, apfst (cons line) xsp)  | 
| 
 
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
 
wenzelm 
parents: 
50316 
diff
changeset
 | 
66  | 
else if i < p + len2 then (i+1, apsnd (cons line) xsp)  | 
| 
 
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
 
wenzelm 
parents: 
50316 
diff
changeset
 | 
67  | 
else (i, xsp)  | 
| 
 
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
 
wenzelm 
parents: 
50316 
diff
changeset
 | 
68  | 
val (out, err) =  | 
| 
75616
 
986506233812
clarified signature: File.read_lines is based on scalable Bytes.T;
 
wenzelm 
parents: 
75614 
diff
changeset
 | 
69  | 
apply2 rev (snd (fold load (File.read_lines cache_path) (1, ([], []))))  | 
| 
82087
 
aee15b076916
clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
 
wenzelm 
parents: 
82076 
diff
changeset
 | 
70  | 
val result =  | 
| 
 
aee15b076916
clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
 
wenzelm 
parents: 
82076 
diff
changeset
 | 
71  | 
              Process_Result.make {rc = 0, out_lines = out, err_lines = err, timing = Timing.zero}
 | 
| 
 
aee15b076916
clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
 
wenzelm 
parents: 
82076 
diff
changeset
 | 
72  | 
in ((SOME result, key), tab) end))  | 
| 35151 | 73  | 
end  | 
74  | 
||
| 
82090
 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 
wenzelm 
parents: 
82087 
diff
changeset
 | 
75  | 
fun run cmd input =  | 
| 
36086
 
8e5454761f26
simplified Cache_IO interface (input is just a string and not already stored in a file)
 
boehmes 
parents: 
35942 
diff
changeset
 | 
76  | 
let  | 
| 
82090
 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 
wenzelm 
parents: 
82087 
diff
changeset
 | 
77  | 
val result = cmd  | 
| 
 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 
wenzelm 
parents: 
82087 
diff
changeset
 | 
78  | 
|> Bash.input (Bytes.string input)  | 
| 
 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 
wenzelm 
parents: 
82087 
diff
changeset
 | 
79  | 
|> Bash.redirect  | 
| 
 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 
wenzelm 
parents: 
82087 
diff
changeset
 | 
80  | 
|> Isabelle_System.bash_process  | 
| 
 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 
wenzelm 
parents: 
82087 
diff
changeset
 | 
81  | 
val rc = Process_Result.rc result  | 
| 
 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 
wenzelm 
parents: 
82087 
diff
changeset
 | 
82  | 
in  | 
| 
 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 
wenzelm 
parents: 
82087 
diff
changeset
 | 
83  | 
if rc = Process_Result.startup_failure_rc then  | 
| 
 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 
wenzelm 
parents: 
82087 
diff
changeset
 | 
84  | 
Process_Result.make  | 
| 
 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 
wenzelm 
parents: 
82087 
diff
changeset
 | 
85  | 
       {rc = rc,
 | 
| 
 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 
wenzelm 
parents: 
82087 
diff
changeset
 | 
86  | 
err_lines = Process_Result.out_lines result,  | 
| 
 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 
wenzelm 
parents: 
82087 
diff
changeset
 | 
87  | 
out_lines = [],  | 
| 
 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 
wenzelm 
parents: 
82087 
diff
changeset
 | 
88  | 
timing = Process_Result.timing result}  | 
| 
 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 
wenzelm 
parents: 
82087 
diff
changeset
 | 
89  | 
else result  | 
| 
 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 
wenzelm 
parents: 
82087 
diff
changeset
 | 
90  | 
end  | 
| 
 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 
wenzelm 
parents: 
82087 
diff
changeset
 | 
91  | 
|
| 
 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 
wenzelm 
parents: 
82087 
diff
changeset
 | 
92  | 
fun run_and_cache (Cache {path = cache_path, table}) key cmd input =
 | 
| 
 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 
wenzelm 
parents: 
82087 
diff
changeset
 | 
93  | 
let  | 
| 
 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 
wenzelm 
parents: 
82087 
diff
changeset
 | 
94  | 
val result = run cmd input  | 
| 
82087
 
aee15b076916
clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
 
wenzelm 
parents: 
82076 
diff
changeset
 | 
95  | 
val out_lines = Process_Result.out_lines result  | 
| 
 
aee15b076916
clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
 
wenzelm 
parents: 
82076 
diff
changeset
 | 
96  | 
val err_lines = Process_Result.err_lines result  | 
| 
 
aee15b076916
clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
 
wenzelm 
parents: 
82076 
diff
changeset
 | 
97  | 
val (l1, l2) = apply2 length (out_lines, err_lines)  | 
| 
36086
 
8e5454761f26
simplified Cache_IO interface (input is just a string and not already stored in a file)
 
boehmes 
parents: 
35942 
diff
changeset
 | 
98  | 
val header = key ^ " " ^ string_of_int l1 ^ " " ^ string_of_int l2  | 
| 
82087
 
aee15b076916
clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
 
wenzelm 
parents: 
82076 
diff
changeset
 | 
99  | 
val lines = map (suffix "\n") (header :: out_lines @ err_lines)  | 
| 
36086
 
8e5454761f26
simplified Cache_IO interface (input is just a string and not already stored in a file)
 
boehmes 
parents: 
35942 
diff
changeset
 | 
100  | 
|
| 
 
8e5454761f26
simplified Cache_IO interface (input is just a string and not already stored in a file)
 
boehmes 
parents: 
35942 
diff
changeset
 | 
101  | 
val _ = Synchronized.change table (fn (p, tab) =>  | 
| 
 
8e5454761f26
simplified Cache_IO interface (input is just a string and not already stored in a file)
 
boehmes 
parents: 
35942 
diff
changeset
 | 
102  | 
if Symtab.defined tab key then (p, tab)  | 
| 
 
8e5454761f26
simplified Cache_IO interface (input is just a string and not already stored in a file)
 
boehmes 
parents: 
35942 
diff
changeset
 | 
103  | 
else  | 
| 
 
8e5454761f26
simplified Cache_IO interface (input is just a string and not already stored in a file)
 
boehmes 
parents: 
35942 
diff
changeset
 | 
104  | 
let val _ = File.append_list cache_path lines  | 
| 
 
8e5454761f26
simplified Cache_IO interface (input is just a string and not already stored in a file)
 
boehmes 
parents: 
35942 
diff
changeset
 | 
105  | 
in (p+l1+l2+1, Symtab.update (key, (p+1, l1, l2)) tab) end)  | 
| 
82087
 
aee15b076916
clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
 
wenzelm 
parents: 
82076 
diff
changeset
 | 
106  | 
in result end  | 
| 
36086
 
8e5454761f26
simplified Cache_IO interface (input is just a string and not already stored in a file)
 
boehmes 
parents: 
35942 
diff
changeset
 | 
107  | 
|
| 
82090
 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 
wenzelm 
parents: 
82087 
diff
changeset
 | 
108  | 
fun run_cached cache cmd input =  | 
| 
 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 
wenzelm 
parents: 
82087 
diff
changeset
 | 
109  | 
(case lookup cache input of  | 
| 
 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 
wenzelm 
parents: 
82087 
diff
changeset
 | 
110  | 
(NONE, key) => run_and_cache cache key cmd input  | 
| 
36086
 
8e5454761f26
simplified Cache_IO interface (input is just a string and not already stored in a file)
 
boehmes 
parents: 
35942 
diff
changeset
 | 
111  | 
| (SOME output, _) => output)  | 
| 35151 | 112  | 
|
113  | 
end  | 
|
| 
40425
 
c9b5e0fcee31
return the process return code along with the process outputs
 
boehmes 
parents: 
37740 
diff
changeset
 | 
114  | 
|
| 35151 | 115  | 
end  |