| author | wenzelm | 
| Thu, 13 Feb 2025 14:10:50 +0100 | |
| changeset 82153 | 3c2451d482bd | 
| parent 82090 | 023845c29d48 | 
| permissions | -rw-r--r-- | 
| 35942 
667fd8553cd5
use internal SHA1 digest implementation for generating hash keys
 boehmes parents: 
35941diff
changeset | 1 | (* Title: Tools/cache_io.ML | 
| 35151 | 2 | Author: Sascha Boehme, TU Muenchen | 
| 3 | ||
| 4 | Cache for output of external processes. | |
| 5 | *) | |
| 6 | ||
| 7 | signature CACHE_IO = | |
| 8 | sig | |
| 9 | type cache | |
| 50317 
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
 wenzelm parents: 
50316diff
changeset | 10 | val unsynchronized_init: Path.T -> cache | 
| 35151 | 11 | val cache_path_of: cache -> Path.T | 
| 82087 
aee15b076916
clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
 wenzelm parents: 
82076diff
changeset | 12 | val lookup: cache -> string -> Process_Result.T option * string | 
| 82090 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 wenzelm parents: 
82087diff
changeset | 13 | val run: Bash.params -> string -> Process_Result.T | 
| 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 wenzelm parents: 
82087diff
changeset | 14 | val run_and_cache: cache -> string -> Bash.params -> string -> Process_Result.T | 
| 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 wenzelm parents: 
82087diff
changeset | 15 | val run_cached: cache -> Bash.params -> string -> Process_Result.T | 
| 35151 | 16 | end | 
| 17 | ||
| 18 | structure Cache_IO : CACHE_IO = | |
| 19 | struct | |
| 20 | ||
| 21 | abstype cache = Cache of {
 | |
| 22 | path: Path.T, | |
| 23 | table: (int * (int * int * int) Symtab.table) Synchronized.var } | |
| 24 | with | |
| 25 | ||
| 26 | fun cache_path_of (Cache {path, ...}) = path
 | |
| 27 | ||
| 50317 
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
 wenzelm parents: 
50316diff
changeset | 28 | fun unsynchronized_init cache_path = | 
| 35151 | 29 | let | 
| 50316 | 30 | val table = | 
| 31 | if File.exists cache_path then | |
| 32 | let | |
| 82076 
265aec8a81e4
proper Path.print for user output (amending 9498623b27f0);
 wenzelm parents: 
78629diff
changeset | 33 |           fun err () = error ("Cache IO: corrupted cache file: " ^ Path.print cache_path)
 | 
| 35151 | 34 | |
| 50316 | 35 | fun int_of_string s = | 
| 36 | (case read_int (raw_explode s) of | |
| 37 | (i, []) => i | |
| 38 | | _ => err ()) | |
| 35151 | 39 | |
| 50316 | 40 | fun split line = | 
| 41 | (case space_explode " " line of | |
| 42 | [key, len1, len2] => (key, int_of_string len1, int_of_string len2) | |
| 43 | | _ => err ()) | |
| 35151 | 44 | |
| 50316 | 45 | fun parse line ((i, l), tab) = | 
| 46 | if i = l | |
| 47 | then | |
| 48 | let val (key, l1, l2) = split line | |
| 49 | in ((i+1, l+l1+l2+1), Symtab.update (key, (i+1, l1, l2)) tab) end | |
| 50 | else ((i+1, l), tab) | |
| 75616 
986506233812
clarified signature: File.read_lines is based on scalable Bytes.T;
 wenzelm parents: 
75614diff
changeset | 51 | in apfst fst (fold parse (File.read_lines cache_path) ((1, 1), Symtab.empty)) end | 
| 50316 | 52 | else (1, Symtab.empty) | 
| 53 |   in Cache {path = cache_path, table = Synchronized.var "Cache_IO" table} end
 | |
| 35151 | 54 | |
| 50316 | 55 | fun lookup (Cache {path = cache_path, table}) str =
 | 
| 41954 | 56 | let val key = SHA1.rep (SHA1.digest str) | 
| 35151 | 57 | in | 
| 50317 
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
 wenzelm parents: 
50316diff
changeset | 58 | Synchronized.change_result table (fn tab => | 
| 
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
 wenzelm parents: 
50316diff
changeset | 59 | (case Symtab.lookup (snd tab) key of | 
| 
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
 wenzelm parents: 
50316diff
changeset | 60 | NONE => ((NONE, key), tab) | 
| 
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
 wenzelm parents: 
50316diff
changeset | 61 | | SOME (p, len1, len2) => | 
| 
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
 wenzelm parents: 
50316diff
changeset | 62 | let | 
| 
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
 wenzelm parents: 
50316diff
changeset | 63 | fun load line (i, xsp) = | 
| 
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
 wenzelm parents: 
50316diff
changeset | 64 | if i < p then (i+1, xsp) | 
| 
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
 wenzelm parents: 
50316diff
changeset | 65 | else if i < p + len1 then (i+1, apfst (cons line) xsp) | 
| 
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
 wenzelm parents: 
50316diff
changeset | 66 | else if i < p + len2 then (i+1, apsnd (cons line) xsp) | 
| 
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
 wenzelm parents: 
50316diff
changeset | 67 | else (i, xsp) | 
| 
4d1590544b91
synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
 wenzelm parents: 
50316diff
changeset | 68 | val (out, err) = | 
| 75616 
986506233812
clarified signature: File.read_lines is based on scalable Bytes.T;
 wenzelm parents: 
75614diff
changeset | 69 | apply2 rev (snd (fold load (File.read_lines cache_path) (1, ([], [])))) | 
| 82087 
aee15b076916
clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
 wenzelm parents: 
82076diff
changeset | 70 | val result = | 
| 
aee15b076916
clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
 wenzelm parents: 
82076diff
changeset | 71 |               Process_Result.make {rc = 0, out_lines = out, err_lines = err, timing = Timing.zero}
 | 
| 
aee15b076916
clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
 wenzelm parents: 
82076diff
changeset | 72 | in ((SOME result, key), tab) end)) | 
| 35151 | 73 | end | 
| 74 | ||
| 82090 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 wenzelm parents: 
82087diff
changeset | 75 | fun run cmd input = | 
| 36086 
8e5454761f26
simplified Cache_IO interface (input is just a string and not already stored in a file)
 boehmes parents: 
35942diff
changeset | 76 | let | 
| 82090 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 wenzelm parents: 
82087diff
changeset | 77 | val result = cmd | 
| 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 wenzelm parents: 
82087diff
changeset | 78 | |> Bash.input (Bytes.string input) | 
| 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 wenzelm parents: 
82087diff
changeset | 79 | |> Bash.redirect | 
| 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 wenzelm parents: 
82087diff
changeset | 80 | |> Isabelle_System.bash_process | 
| 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 wenzelm parents: 
82087diff
changeset | 81 | val rc = Process_Result.rc result | 
| 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 wenzelm parents: 
82087diff
changeset | 82 | in | 
| 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 wenzelm parents: 
82087diff
changeset | 83 | if rc = Process_Result.startup_failure_rc then | 
| 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 wenzelm parents: 
82087diff
changeset | 84 | Process_Result.make | 
| 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 wenzelm parents: 
82087diff
changeset | 85 |        {rc = rc,
 | 
| 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 wenzelm parents: 
82087diff
changeset | 86 | err_lines = Process_Result.out_lines result, | 
| 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 wenzelm parents: 
82087diff
changeset | 87 | out_lines = [], | 
| 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 wenzelm parents: 
82087diff
changeset | 88 | timing = Process_Result.timing result} | 
| 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 wenzelm parents: 
82087diff
changeset | 89 | else result | 
| 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 wenzelm parents: 
82087diff
changeset | 90 | end | 
| 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 wenzelm parents: 
82087diff
changeset | 91 | |
| 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 wenzelm parents: 
82087diff
changeset | 92 | fun run_and_cache (Cache {path = cache_path, table}) key cmd input =
 | 
| 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 wenzelm parents: 
82087diff
changeset | 93 | let | 
| 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 wenzelm parents: 
82087diff
changeset | 94 | val result = run cmd input | 
| 82087 
aee15b076916
clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
 wenzelm parents: 
82076diff
changeset | 95 | val out_lines = Process_Result.out_lines result | 
| 
aee15b076916
clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
 wenzelm parents: 
82076diff
changeset | 96 | val err_lines = Process_Result.err_lines result | 
| 
aee15b076916
clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
 wenzelm parents: 
82076diff
changeset | 97 | val (l1, l2) = apply2 length (out_lines, err_lines) | 
| 36086 
8e5454761f26
simplified Cache_IO interface (input is just a string and not already stored in a file)
 boehmes parents: 
35942diff
changeset | 98 | val header = key ^ " " ^ string_of_int l1 ^ " " ^ string_of_int l2 | 
| 82087 
aee15b076916
clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
 wenzelm parents: 
82076diff
changeset | 99 | val lines = map (suffix "\n") (header :: out_lines @ err_lines) | 
| 36086 
8e5454761f26
simplified Cache_IO interface (input is just a string and not already stored in a file)
 boehmes parents: 
35942diff
changeset | 100 | |
| 
8e5454761f26
simplified Cache_IO interface (input is just a string and not already stored in a file)
 boehmes parents: 
35942diff
changeset | 101 | val _ = Synchronized.change table (fn (p, tab) => | 
| 
8e5454761f26
simplified Cache_IO interface (input is just a string and not already stored in a file)
 boehmes parents: 
35942diff
changeset | 102 | if Symtab.defined tab key then (p, tab) | 
| 
8e5454761f26
simplified Cache_IO interface (input is just a string and not already stored in a file)
 boehmes parents: 
35942diff
changeset | 103 | else | 
| 
8e5454761f26
simplified Cache_IO interface (input is just a string and not already stored in a file)
 boehmes parents: 
35942diff
changeset | 104 | let val _ = File.append_list cache_path lines | 
| 
8e5454761f26
simplified Cache_IO interface (input is just a string and not already stored in a file)
 boehmes parents: 
35942diff
changeset | 105 | in (p+l1+l2+1, Symtab.update (key, (p+1, l1, l2)) tab) end) | 
| 82087 
aee15b076916
clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
 wenzelm parents: 
82076diff
changeset | 106 | in result end | 
| 36086 
8e5454761f26
simplified Cache_IO interface (input is just a string and not already stored in a file)
 boehmes parents: 
35942diff
changeset | 107 | |
| 82090 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 wenzelm parents: 
82087diff
changeset | 108 | fun run_cached cache cmd input = | 
| 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 wenzelm parents: 
82087diff
changeset | 109 | (case lookup cache input of | 
| 
023845c29d48
avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
 wenzelm parents: 
82087diff
changeset | 110 | (NONE, key) => run_and_cache cache key cmd input | 
| 36086 
8e5454761f26
simplified Cache_IO interface (input is just a string and not already stored in a file)
 boehmes parents: 
35942diff
changeset | 111 | | (SOME output, _) => output) | 
| 35151 | 112 | |
| 113 | end | |
| 40425 
c9b5e0fcee31
return the process return code along with the process outputs
 boehmes parents: 
37740diff
changeset | 114 | |
| 35151 | 115 | end |