author | nipkow |
Tue, 17 Jun 2025 14:11:40 +0200 | |
changeset 82733 | 8b537e1af2ec |
parent 82090 | 023845c29d48 |
permissions | -rw-r--r-- |
35942
667fd8553cd5
use internal SHA1 digest implementation for generating hash keys
boehmes
parents:
35941
diff
changeset
|
1 |
(* Title: Tools/cache_io.ML |
35151 | 2 |
Author: Sascha Boehme, TU Muenchen |
3 |
||
4 |
Cache for output of external processes. |
|
5 |
*) |
|
6 |
||
7 |
signature CACHE_IO = |
|
8 |
sig |
|
9 |
type cache |
|
50317
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents:
50316
diff
changeset
|
10 |
val unsynchronized_init: Path.T -> cache |
35151 | 11 |
val cache_path_of: cache -> Path.T |
82087
aee15b076916
clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
wenzelm
parents:
82076
diff
changeset
|
12 |
val lookup: cache -> string -> Process_Result.T option * string |
82090
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents:
82087
diff
changeset
|
13 |
val run: Bash.params -> string -> Process_Result.T |
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents:
82087
diff
changeset
|
14 |
val run_and_cache: cache -> string -> Bash.params -> string -> Process_Result.T |
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents:
82087
diff
changeset
|
15 |
val run_cached: cache -> Bash.params -> string -> Process_Result.T |
35151 | 16 |
end |
17 |
||
18 |
structure Cache_IO : CACHE_IO = |
|
19 |
struct |
|
20 |
||
21 |
abstype cache = Cache of { |
|
22 |
path: Path.T, |
|
23 |
table: (int * (int * int * int) Symtab.table) Synchronized.var } |
|
24 |
with |
|
25 |
||
26 |
fun cache_path_of (Cache {path, ...}) = path |
|
27 |
||
50317
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents:
50316
diff
changeset
|
28 |
fun unsynchronized_init cache_path = |
35151 | 29 |
let |
50316 | 30 |
val table = |
31 |
if File.exists cache_path then |
|
32 |
let |
|
82076
265aec8a81e4
proper Path.print for user output (amending 9498623b27f0);
wenzelm
parents:
78629
diff
changeset
|
33 |
fun err () = error ("Cache IO: corrupted cache file: " ^ Path.print cache_path) |
35151 | 34 |
|
50316 | 35 |
fun int_of_string s = |
36 |
(case read_int (raw_explode s) of |
|
37 |
(i, []) => i |
|
38 |
| _ => err ()) |
|
35151 | 39 |
|
50316 | 40 |
fun split line = |
41 |
(case space_explode " " line of |
|
42 |
[key, len1, len2] => (key, int_of_string len1, int_of_string len2) |
|
43 |
| _ => err ()) |
|
35151 | 44 |
|
50316 | 45 |
fun parse line ((i, l), tab) = |
46 |
if i = l |
|
47 |
then |
|
48 |
let val (key, l1, l2) = split line |
|
49 |
in ((i+1, l+l1+l2+1), Symtab.update (key, (i+1, l1, l2)) tab) end |
|
50 |
else ((i+1, l), tab) |
|
75616
986506233812
clarified signature: File.read_lines is based on scalable Bytes.T;
wenzelm
parents:
75614
diff
changeset
|
51 |
in apfst fst (fold parse (File.read_lines cache_path) ((1, 1), Symtab.empty)) end |
50316 | 52 |
else (1, Symtab.empty) |
53 |
in Cache {path = cache_path, table = Synchronized.var "Cache_IO" table} end |
|
35151 | 54 |
|
50316 | 55 |
fun lookup (Cache {path = cache_path, table}) str = |
41954 | 56 |
let val key = SHA1.rep (SHA1.digest str) |
35151 | 57 |
in |
50317
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents:
50316
diff
changeset
|
58 |
Synchronized.change_result table (fn tab => |
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents:
50316
diff
changeset
|
59 |
(case Symtab.lookup (snd tab) key of |
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents:
50316
diff
changeset
|
60 |
NONE => ((NONE, key), tab) |
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents:
50316
diff
changeset
|
61 |
| SOME (p, len1, len2) => |
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents:
50316
diff
changeset
|
62 |
let |
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents:
50316
diff
changeset
|
63 |
fun load line (i, xsp) = |
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents:
50316
diff
changeset
|
64 |
if i < p then (i+1, xsp) |
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents:
50316
diff
changeset
|
65 |
else if i < p + len1 then (i+1, apfst (cons line) xsp) |
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents:
50316
diff
changeset
|
66 |
else if i < p + len2 then (i+1, apsnd (cons line) xsp) |
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents:
50316
diff
changeset
|
67 |
else (i, xsp) |
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents:
50316
diff
changeset
|
68 |
val (out, err) = |
75616
986506233812
clarified signature: File.read_lines is based on scalable Bytes.T;
wenzelm
parents:
75614
diff
changeset
|
69 |
apply2 rev (snd (fold load (File.read_lines cache_path) (1, ([], [])))) |
82087
aee15b076916
clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
wenzelm
parents:
82076
diff
changeset
|
70 |
val result = |
aee15b076916
clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
wenzelm
parents:
82076
diff
changeset
|
71 |
Process_Result.make {rc = 0, out_lines = out, err_lines = err, timing = Timing.zero} |
aee15b076916
clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
wenzelm
parents:
82076
diff
changeset
|
72 |
in ((SOME result, key), tab) end)) |
35151 | 73 |
end |
74 |
||
82090
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents:
82087
diff
changeset
|
75 |
fun run cmd input = |
36086
8e5454761f26
simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents:
35942
diff
changeset
|
76 |
let |
82090
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents:
82087
diff
changeset
|
77 |
val result = cmd |
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents:
82087
diff
changeset
|
78 |
|> Bash.input (Bytes.string input) |
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents:
82087
diff
changeset
|
79 |
|> Bash.redirect |
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents:
82087
diff
changeset
|
80 |
|> Isabelle_System.bash_process |
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents:
82087
diff
changeset
|
81 |
val rc = Process_Result.rc result |
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents:
82087
diff
changeset
|
82 |
in |
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents:
82087
diff
changeset
|
83 |
if rc = Process_Result.startup_failure_rc then |
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents:
82087
diff
changeset
|
84 |
Process_Result.make |
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents:
82087
diff
changeset
|
85 |
{rc = rc, |
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents:
82087
diff
changeset
|
86 |
err_lines = Process_Result.out_lines result, |
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents:
82087
diff
changeset
|
87 |
out_lines = [], |
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents:
82087
diff
changeset
|
88 |
timing = Process_Result.timing result} |
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents:
82087
diff
changeset
|
89 |
else result |
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents:
82087
diff
changeset
|
90 |
end |
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents:
82087
diff
changeset
|
91 |
|
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents:
82087
diff
changeset
|
92 |
fun run_and_cache (Cache {path = cache_path, table}) key cmd input = |
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents:
82087
diff
changeset
|
93 |
let |
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents:
82087
diff
changeset
|
94 |
val result = run cmd input |
82087
aee15b076916
clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
wenzelm
parents:
82076
diff
changeset
|
95 |
val out_lines = Process_Result.out_lines result |
aee15b076916
clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
wenzelm
parents:
82076
diff
changeset
|
96 |
val err_lines = Process_Result.err_lines result |
aee15b076916
clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
wenzelm
parents:
82076
diff
changeset
|
97 |
val (l1, l2) = apply2 length (out_lines, err_lines) |
36086
8e5454761f26
simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents:
35942
diff
changeset
|
98 |
val header = key ^ " " ^ string_of_int l1 ^ " " ^ string_of_int l2 |
82087
aee15b076916
clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
wenzelm
parents:
82076
diff
changeset
|
99 |
val lines = map (suffix "\n") (header :: out_lines @ err_lines) |
36086
8e5454761f26
simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents:
35942
diff
changeset
|
100 |
|
8e5454761f26
simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents:
35942
diff
changeset
|
101 |
val _ = Synchronized.change table (fn (p, tab) => |
8e5454761f26
simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents:
35942
diff
changeset
|
102 |
if Symtab.defined tab key then (p, tab) |
8e5454761f26
simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents:
35942
diff
changeset
|
103 |
else |
8e5454761f26
simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents:
35942
diff
changeset
|
104 |
let val _ = File.append_list cache_path lines |
8e5454761f26
simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents:
35942
diff
changeset
|
105 |
in (p+l1+l2+1, Symtab.update (key, (p+1, l1, l2)) tab) end) |
82087
aee15b076916
clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
wenzelm
parents:
82076
diff
changeset
|
106 |
in result end |
36086
8e5454761f26
simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents:
35942
diff
changeset
|
107 |
|
82090
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents:
82087
diff
changeset
|
108 |
fun run_cached cache cmd input = |
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents:
82087
diff
changeset
|
109 |
(case lookup cache input of |
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents:
82087
diff
changeset
|
110 |
(NONE, key) => run_and_cache cache key cmd input |
36086
8e5454761f26
simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents:
35942
diff
changeset
|
111 |
| (SOME output, _) => output) |
35151 | 112 |
|
113 |
end |
|
40425
c9b5e0fcee31
return the process return code along with the process outputs
boehmes
parents:
37740
diff
changeset
|
114 |
|
35151 | 115 |
end |