src/Tools/cache_io.ML
author nipkow
Tue, 17 Jun 2025 14:11:40 +0200
changeset 82733 8b537e1af2ec
parent 82090 023845c29d48
permissions -rw-r--r--
reinstated intersection of lists as inter_list_set
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
35942
667fd8553cd5 use internal SHA1 digest implementation for generating hash keys
boehmes
parents: 35941
diff changeset
     1
(*  Title:      Tools/cache_io.ML
35151
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
     2
    Author:     Sascha Boehme, TU Muenchen
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
     3
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
     4
Cache for output of external processes.
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
     5
*)
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
     6
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
     7
signature CACHE_IO =
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
     8
sig
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
     9
  type cache
50317
4d1590544b91 synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents: 50316
diff changeset
    10
  val unsynchronized_init: Path.T -> cache
35151
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    11
  val cache_path_of: cache -> Path.T
82087
aee15b076916 clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
wenzelm
parents: 82076
diff changeset
    12
  val lookup: cache -> string -> Process_Result.T option * string
82090
023845c29d48 avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents: 82087
diff changeset
    13
  val run: Bash.params -> string -> Process_Result.T
023845c29d48 avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents: 82087
diff changeset
    14
  val run_and_cache: cache -> string -> Bash.params -> string -> Process_Result.T
023845c29d48 avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents: 82087
diff changeset
    15
  val run_cached: cache -> Bash.params -> string -> Process_Result.T
35151
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    16
end
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    17
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    18
structure Cache_IO : CACHE_IO =
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    19
struct
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    20
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    21
abstype cache = Cache of {
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    22
  path: Path.T,
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    23
  table: (int * (int * int * int) Symtab.table) Synchronized.var }
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    24
with
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    25
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    26
fun cache_path_of (Cache {path, ...}) = path
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    27
50317
4d1590544b91 synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents: 50316
diff changeset
    28
fun unsynchronized_init cache_path =
35151
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    29
  let
50316
wenzelm
parents: 43850
diff changeset
    30
    val table =
wenzelm
parents: 43850
diff changeset
    31
      if File.exists cache_path then
wenzelm
parents: 43850
diff changeset
    32
        let
82076
265aec8a81e4 proper Path.print for user output (amending 9498623b27f0);
wenzelm
parents: 78629
diff changeset
    33
          fun err () = error ("Cache IO: corrupted cache file: " ^ Path.print cache_path)
35151
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    34
50316
wenzelm
parents: 43850
diff changeset
    35
          fun int_of_string s =
wenzelm
parents: 43850
diff changeset
    36
            (case read_int (raw_explode s) of
wenzelm
parents: 43850
diff changeset
    37
              (i, []) => i
wenzelm
parents: 43850
diff changeset
    38
            | _ => err ())
35151
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    39
50316
wenzelm
parents: 43850
diff changeset
    40
          fun split line =
wenzelm
parents: 43850
diff changeset
    41
            (case space_explode " " line of
wenzelm
parents: 43850
diff changeset
    42
              [key, len1, len2] => (key, int_of_string len1, int_of_string len2)
wenzelm
parents: 43850
diff changeset
    43
            | _ => err ())
35151
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    44
50316
wenzelm
parents: 43850
diff changeset
    45
          fun parse line ((i, l), tab) =
wenzelm
parents: 43850
diff changeset
    46
            if i = l
wenzelm
parents: 43850
diff changeset
    47
            then
wenzelm
parents: 43850
diff changeset
    48
              let val (key, l1, l2) = split line
wenzelm
parents: 43850
diff changeset
    49
              in ((i+1, l+l1+l2+1), Symtab.update (key, (i+1, l1, l2)) tab) end
wenzelm
parents: 43850
diff changeset
    50
            else ((i+1, l), tab)
75616
986506233812 clarified signature: File.read_lines is based on scalable Bytes.T;
wenzelm
parents: 75614
diff changeset
    51
        in apfst fst (fold parse (File.read_lines cache_path) ((1, 1), Symtab.empty)) end
50316
wenzelm
parents: 43850
diff changeset
    52
      else (1, Symtab.empty)
wenzelm
parents: 43850
diff changeset
    53
  in Cache {path = cache_path, table = Synchronized.var "Cache_IO" table} end
35151
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    54
50316
wenzelm
parents: 43850
diff changeset
    55
fun lookup (Cache {path = cache_path, table}) str =
41954
fb94df4505a0 explicit type SHA1.digest;
wenzelm
parents: 41945
diff changeset
    56
  let val key = SHA1.rep (SHA1.digest str)
35151
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    57
  in
50317
4d1590544b91 synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents: 50316
diff changeset
    58
    Synchronized.change_result table (fn tab =>
4d1590544b91 synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents: 50316
diff changeset
    59
      (case Symtab.lookup (snd tab) key of
4d1590544b91 synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents: 50316
diff changeset
    60
        NONE => ((NONE, key), tab)
4d1590544b91 synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents: 50316
diff changeset
    61
      | SOME (p, len1, len2) =>
4d1590544b91 synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents: 50316
diff changeset
    62
          let
4d1590544b91 synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents: 50316
diff changeset
    63
            fun load line (i, xsp) =
4d1590544b91 synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents: 50316
diff changeset
    64
              if i < p then (i+1, xsp)
4d1590544b91 synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents: 50316
diff changeset
    65
              else if i < p + len1 then (i+1, apfst (cons line) xsp)
4d1590544b91 synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents: 50316
diff changeset
    66
              else if i < p + len2 then (i+1, apsnd (cons line) xsp)
4d1590544b91 synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents: 50316
diff changeset
    67
              else (i, xsp)
4d1590544b91 synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents: 50316
diff changeset
    68
            val (out, err) =
75616
986506233812 clarified signature: File.read_lines is based on scalable Bytes.T;
wenzelm
parents: 75614
diff changeset
    69
              apply2 rev (snd (fold load (File.read_lines cache_path) (1, ([], []))))
82087
aee15b076916 clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
wenzelm
parents: 82076
diff changeset
    70
            val result =
aee15b076916 clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
wenzelm
parents: 82076
diff changeset
    71
              Process_Result.make {rc = 0, out_lines = out, err_lines = err, timing = Timing.zero}
aee15b076916 clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
wenzelm
parents: 82076
diff changeset
    72
          in ((SOME result, key), tab) end))
35151
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    73
  end
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    74
82090
023845c29d48 avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents: 82087
diff changeset
    75
fun run cmd input =
36086
8e5454761f26 simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents: 35942
diff changeset
    76
  let
82090
023845c29d48 avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents: 82087
diff changeset
    77
    val result = cmd
023845c29d48 avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents: 82087
diff changeset
    78
      |> Bash.input (Bytes.string input)
023845c29d48 avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents: 82087
diff changeset
    79
      |> Bash.redirect
023845c29d48 avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents: 82087
diff changeset
    80
      |> Isabelle_System.bash_process
023845c29d48 avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents: 82087
diff changeset
    81
    val rc = Process_Result.rc result
023845c29d48 avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents: 82087
diff changeset
    82
  in
023845c29d48 avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents: 82087
diff changeset
    83
    if rc = Process_Result.startup_failure_rc then
023845c29d48 avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents: 82087
diff changeset
    84
      Process_Result.make
023845c29d48 avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents: 82087
diff changeset
    85
       {rc = rc,
023845c29d48 avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents: 82087
diff changeset
    86
        err_lines = Process_Result.out_lines result,
023845c29d48 avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents: 82087
diff changeset
    87
        out_lines = [],
023845c29d48 avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents: 82087
diff changeset
    88
        timing = Process_Result.timing result}
023845c29d48 avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents: 82087
diff changeset
    89
    else result
023845c29d48 avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents: 82087
diff changeset
    90
  end
023845c29d48 avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents: 82087
diff changeset
    91
023845c29d48 avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents: 82087
diff changeset
    92
fun run_and_cache (Cache {path = cache_path, table}) key cmd input =
023845c29d48 avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents: 82087
diff changeset
    93
  let
023845c29d48 avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents: 82087
diff changeset
    94
    val result = run cmd input
82087
aee15b076916 clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
wenzelm
parents: 82076
diff changeset
    95
    val out_lines = Process_Result.out_lines result
aee15b076916 clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
wenzelm
parents: 82076
diff changeset
    96
    val err_lines = Process_Result.err_lines result
aee15b076916 clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
wenzelm
parents: 82076
diff changeset
    97
    val (l1, l2) = apply2 length (out_lines, err_lines)
36086
8e5454761f26 simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents: 35942
diff changeset
    98
    val header = key ^ " " ^ string_of_int l1 ^ " " ^ string_of_int l2
82087
aee15b076916 clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
wenzelm
parents: 82076
diff changeset
    99
    val lines = map (suffix "\n") (header :: out_lines @ err_lines)
36086
8e5454761f26 simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents: 35942
diff changeset
   100
8e5454761f26 simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents: 35942
diff changeset
   101
    val _ = Synchronized.change table (fn (p, tab) =>
8e5454761f26 simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents: 35942
diff changeset
   102
      if Symtab.defined tab key then (p, tab)
8e5454761f26 simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents: 35942
diff changeset
   103
      else
8e5454761f26 simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents: 35942
diff changeset
   104
        let val _ = File.append_list cache_path lines
8e5454761f26 simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents: 35942
diff changeset
   105
        in (p+l1+l2+1, Symtab.update (key, (p+1, l1, l2)) tab) end)
82087
aee15b076916 clarified signature: re-use existing Process_Result.T (NB: err_lines are de-facto program startup errors, anything else is redirected to out_lines);
wenzelm
parents: 82076
diff changeset
   106
  in result end
36086
8e5454761f26 simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents: 35942
diff changeset
   107
82090
023845c29d48 avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents: 82087
diff changeset
   108
fun run_cached cache cmd input =
023845c29d48 avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents: 82087
diff changeset
   109
  (case lookup cache input of
023845c29d48 avoid tmp files to improve robustness on Windows, where ERROR_PATH_NOT_FOUND has been seen (see also 569135d7352a);
wenzelm
parents: 82087
diff changeset
   110
    (NONE, key) => run_and_cache cache key cmd input
36086
8e5454761f26 simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents: 35942
diff changeset
   111
  | (SOME output, _) => output)
35151
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
   112
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
   113
end
40425
c9b5e0fcee31 return the process return code along with the process outputs
boehmes
parents: 37740
diff changeset
   114
35151
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
   115
end