src/Tools/cache_io.ML
author wenzelm
Fri, 26 Apr 2024 13:25:44 +0200
changeset 80150 96f60533ec1d
parent 78629 569135d7352a
permissions -rw-r--r--
update Windows test machines;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
35942
667fd8553cd5 use internal SHA1 digest implementation for generating hash keys
boehmes
parents: 35941
diff changeset
     1
(*  Title:      Tools/cache_io.ML
35151
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
     2
    Author:     Sascha Boehme, TU Muenchen
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
     3
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
     4
Cache for output of external processes.
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
     5
*)
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
     6
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
     7
signature CACHE_IO =
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
     8
sig
40425
c9b5e0fcee31 return the process return code along with the process outputs
boehmes
parents: 37740
diff changeset
     9
  (*IO wrapper*)
40538
b8482ff0bc92 check the return code of the SMT solver and raise an exception if the prover failed
boehmes
parents: 40425
diff changeset
    10
  type result = {
b8482ff0bc92 check the return code of the SMT solver and raise an exception if the prover failed
boehmes
parents: 40425
diff changeset
    11
    output: string list,
b8482ff0bc92 check the return code of the SMT solver and raise an exception if the prover failed
boehmes
parents: 40425
diff changeset
    12
    redirected_output: string list,
b8482ff0bc92 check the return code of the SMT solver and raise an exception if the prover failed
boehmes
parents: 40425
diff changeset
    13
    return_code: int}
50316
wenzelm
parents: 43850
diff changeset
    14
  val raw_run: (Path.T -> Path.T -> string) -> string -> Path.T -> Path.T -> result
40538
b8482ff0bc92 check the return code of the SMT solver and raise an exception if the prover failed
boehmes
parents: 40425
diff changeset
    15
  val run: (Path.T -> Path.T -> string) -> string -> result
35151
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    16
40425
c9b5e0fcee31 return the process return code along with the process outputs
boehmes
parents: 37740
diff changeset
    17
  (*cache*)
35151
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    18
  type cache
50317
4d1590544b91 synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents: 50316
diff changeset
    19
  val unsynchronized_init: Path.T -> cache
35151
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    20
  val cache_path_of: cache -> Path.T
40538
b8482ff0bc92 check the return code of the SMT solver and raise an exception if the prover failed
boehmes
parents: 40425
diff changeset
    21
  val lookup: cache -> string -> result option * string
50316
wenzelm
parents: 43850
diff changeset
    22
  val run_and_cache: cache -> string -> (Path.T -> Path.T -> string) -> string -> result
40538
b8482ff0bc92 check the return code of the SMT solver and raise an exception if the prover failed
boehmes
parents: 40425
diff changeset
    23
  val run_cached: cache -> (Path.T -> Path.T -> string) -> string -> result
35151
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    24
end
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    25
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    26
structure Cache_IO : CACHE_IO =
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    27
struct
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    28
40425
c9b5e0fcee31 return the process return code along with the process outputs
boehmes
parents: 37740
diff changeset
    29
(* IO wrapper *)
c9b5e0fcee31 return the process return code along with the process outputs
boehmes
parents: 37740
diff changeset
    30
36086
8e5454761f26 simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents: 35942
diff changeset
    31
val cache_io_prefix = "cache-io-"
8e5454761f26 simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents: 35942
diff changeset
    32
40538
b8482ff0bc92 check the return code of the SMT solver and raise an exception if the prover failed
boehmes
parents: 40425
diff changeset
    33
type result = {
b8482ff0bc92 check the return code of the SMT solver and raise an exception if the prover failed
boehmes
parents: 40425
diff changeset
    34
  output: string list,
b8482ff0bc92 check the return code of the SMT solver and raise an exception if the prover failed
boehmes
parents: 40425
diff changeset
    35
  redirected_output: string list,
b8482ff0bc92 check the return code of the SMT solver and raise an exception if the prover failed
boehmes
parents: 40425
diff changeset
    36
  return_code: int}
b8482ff0bc92 check the return code of the SMT solver and raise an exception if the prover failed
boehmes
parents: 40425
diff changeset
    37
78629
569135d7352a more robust access to output file of external smt, notably for Windows 11, where transient ERROR_SHARING_VIOLATION has been seen;
wenzelm
parents: 75616
diff changeset
    38
fun try_read_lines path =
569135d7352a more robust access to output file of external smt, notably for Windows 11, where transient ERROR_SHARING_VIOLATION has been seen;
wenzelm
parents: 75616
diff changeset
    39
  let
569135d7352a more robust access to output file of external smt, notably for Windows 11, where transient ERROR_SHARING_VIOLATION has been seen;
wenzelm
parents: 75616
diff changeset
    40
    fun loop n =
569135d7352a more robust access to output file of external smt, notably for Windows 11, where transient ERROR_SHARING_VIOLATION has been seen;
wenzelm
parents: 75616
diff changeset
    41
      (case try File.read_lines path of
569135d7352a more robust access to output file of external smt, notably for Windows 11, where transient ERROR_SHARING_VIOLATION has been seen;
wenzelm
parents: 75616
diff changeset
    42
        SOME lines => lines
569135d7352a more robust access to output file of external smt, notably for Windows 11, where transient ERROR_SHARING_VIOLATION has been seen;
wenzelm
parents: 75616
diff changeset
    43
      | NONE => if n > 0 then (OS.Process.sleep (seconds 0.05); loop (n - 1)) else [])
569135d7352a more robust access to output file of external smt, notably for Windows 11, where transient ERROR_SHARING_VIOLATION has been seen;
wenzelm
parents: 75616
diff changeset
    44
  in if File.exists path then loop (if ML_System.platform_is_windows then 20 else 0) else [] end
569135d7352a more robust access to output file of external smt, notably for Windows 11, where transient ERROR_SHARING_VIOLATION has been seen;
wenzelm
parents: 75616
diff changeset
    45
40578
2b098a549450 keep input and output files used to communicate with the SMT solver (for debugging purposes mainly)
boehmes
parents: 40538
diff changeset
    46
fun raw_run make_cmd str in_path out_path =
2b098a549450 keep input and output files used to communicate with the SMT solver (for debugging purposes mainly)
boehmes
parents: 40538
diff changeset
    47
  let
2b098a549450 keep input and output files used to communicate with the SMT solver (for debugging purposes mainly)
boehmes
parents: 40538
diff changeset
    48
    val _ = File.write in_path str
43850
7f2cbc713344 moved bash operations to Isabelle_System (cf. Scala version);
wenzelm
parents: 42127
diff changeset
    49
    val (out2, rc) = Isabelle_System.bash_output (make_cmd in_path out_path)
78629
569135d7352a more robust access to output file of external smt, notably for Windows 11, where transient ERROR_SHARING_VIOLATION has been seen;
wenzelm
parents: 75616
diff changeset
    50
    val out1 = try_read_lines out_path
50316
wenzelm
parents: 43850
diff changeset
    51
  in {output = split_lines out2, redirected_output = out1, return_code = rc} end
40578
2b098a549450 keep input and output files used to communicate with the SMT solver (for debugging purposes mainly)
boehmes
parents: 40538
diff changeset
    52
36086
8e5454761f26 simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents: 35942
diff changeset
    53
fun run make_cmd str =
42127
8223e7f4b0da Isabelle_System.create_tmp_path/with_tmp_file: optional extension;
wenzelm
parents: 41954
diff changeset
    54
  Isabelle_System.with_tmp_file cache_io_prefix "" (fn in_path =>
8223e7f4b0da Isabelle_System.create_tmp_path/with_tmp_file: optional extension;
wenzelm
parents: 41954
diff changeset
    55
    Isabelle_System.with_tmp_file cache_io_prefix "" (fn out_path =>
41307
bb8468ae414e slightly more standard Isabelle_System.with_tmp_file/with_tmp_dir (cf. Scala version);
wenzelm
parents: 40743
diff changeset
    56
      raw_run make_cmd str in_path out_path))
35151
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    57
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    58
40425
c9b5e0fcee31 return the process return code along with the process outputs
boehmes
parents: 37740
diff changeset
    59
(* cache *)
35151
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    60
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    61
abstype cache = Cache of {
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    62
  path: Path.T,
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    63
  table: (int * (int * int * int) Symtab.table) Synchronized.var }
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    64
with
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    65
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    66
fun cache_path_of (Cache {path, ...}) = path
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    67
50317
4d1590544b91 synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents: 50316
diff changeset
    68
fun unsynchronized_init cache_path =
35151
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    69
  let
50316
wenzelm
parents: 43850
diff changeset
    70
    val table =
wenzelm
parents: 43850
diff changeset
    71
      if File.exists cache_path then
wenzelm
parents: 43850
diff changeset
    72
        let
62549
9498623b27f0 File.bash_string operations in ML as in Scala -- exclusively for GNU bash, not perl and not user output;
wenzelm
parents: 59058
diff changeset
    73
          fun err () = error ("Cache IO: corrupted cache file: " ^ File.bash_path cache_path)
35151
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    74
50316
wenzelm
parents: 43850
diff changeset
    75
          fun int_of_string s =
wenzelm
parents: 43850
diff changeset
    76
            (case read_int (raw_explode s) of
wenzelm
parents: 43850
diff changeset
    77
              (i, []) => i
wenzelm
parents: 43850
diff changeset
    78
            | _ => err ())
35151
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    79
50316
wenzelm
parents: 43850
diff changeset
    80
          fun split line =
wenzelm
parents: 43850
diff changeset
    81
            (case space_explode " " line of
wenzelm
parents: 43850
diff changeset
    82
              [key, len1, len2] => (key, int_of_string len1, int_of_string len2)
wenzelm
parents: 43850
diff changeset
    83
            | _ => err ())
35151
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    84
50316
wenzelm
parents: 43850
diff changeset
    85
          fun parse line ((i, l), tab) =
wenzelm
parents: 43850
diff changeset
    86
            if i = l
wenzelm
parents: 43850
diff changeset
    87
            then
wenzelm
parents: 43850
diff changeset
    88
              let val (key, l1, l2) = split line
wenzelm
parents: 43850
diff changeset
    89
              in ((i+1, l+l1+l2+1), Symtab.update (key, (i+1, l1, l2)) tab) end
wenzelm
parents: 43850
diff changeset
    90
            else ((i+1, l), tab)
75616
986506233812 clarified signature: File.read_lines is based on scalable Bytes.T;
wenzelm
parents: 75614
diff changeset
    91
        in apfst fst (fold parse (File.read_lines cache_path) ((1, 1), Symtab.empty)) end
50316
wenzelm
parents: 43850
diff changeset
    92
      else (1, Symtab.empty)
wenzelm
parents: 43850
diff changeset
    93
  in Cache {path = cache_path, table = Synchronized.var "Cache_IO" table} end
35151
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    94
50316
wenzelm
parents: 43850
diff changeset
    95
fun lookup (Cache {path = cache_path, table}) str =
41954
fb94df4505a0 explicit type SHA1.digest;
wenzelm
parents: 41945
diff changeset
    96
  let val key = SHA1.rep (SHA1.digest str)
35151
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
    97
  in
50317
4d1590544b91 synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents: 50316
diff changeset
    98
    Synchronized.change_result table (fn tab =>
4d1590544b91 synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents: 50316
diff changeset
    99
      (case Symtab.lookup (snd tab) key of
4d1590544b91 synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents: 50316
diff changeset
   100
        NONE => ((NONE, key), tab)
4d1590544b91 synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents: 50316
diff changeset
   101
      | SOME (p, len1, len2) =>
4d1590544b91 synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents: 50316
diff changeset
   102
          let
4d1590544b91 synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents: 50316
diff changeset
   103
            fun load line (i, xsp) =
4d1590544b91 synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents: 50316
diff changeset
   104
              if i < p then (i+1, xsp)
4d1590544b91 synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents: 50316
diff changeset
   105
              else if i < p + len1 then (i+1, apfst (cons line) xsp)
4d1590544b91 synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents: 50316
diff changeset
   106
              else if i < p + len2 then (i+1, apsnd (cons line) xsp)
4d1590544b91 synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents: 50316
diff changeset
   107
              else (i, xsp)
4d1590544b91 synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents: 50316
diff changeset
   108
            val (out, err) =
75616
986506233812 clarified signature: File.read_lines is based on scalable Bytes.T;
wenzelm
parents: 75614
diff changeset
   109
              apply2 rev (snd (fold load (File.read_lines cache_path) (1, ([], []))))
50317
4d1590544b91 synchronized read access to cache file -- avoid potential conflict with ongoing write (which is non-atomic);
wenzelm
parents: 50316
diff changeset
   110
          in ((SOME {output = err, redirected_output = out, return_code = 0}, key), tab) end))
35151
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
   111
  end
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
   112
50316
wenzelm
parents: 43850
diff changeset
   113
fun run_and_cache (Cache {path = cache_path, table}) key make_cmd str =
36086
8e5454761f26 simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents: 35942
diff changeset
   114
  let
50316
wenzelm
parents: 43850
diff changeset
   115
    val {output = err, redirected_output=out, return_code} = run make_cmd str
59058
a78612c67ec0 renamed "pairself" to "apply2", in accordance to @{apply 2};
wenzelm
parents: 50317
diff changeset
   116
    val (l1, l2) = apply2 length (out, err)
36086
8e5454761f26 simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents: 35942
diff changeset
   117
    val header = key ^ " " ^ string_of_int l1 ^ " " ^ string_of_int l2
8e5454761f26 simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents: 35942
diff changeset
   118
    val lines = map (suffix "\n") (header :: out @ err)
8e5454761f26 simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents: 35942
diff changeset
   119
8e5454761f26 simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents: 35942
diff changeset
   120
    val _ = Synchronized.change table (fn (p, tab) =>
8e5454761f26 simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents: 35942
diff changeset
   121
      if Symtab.defined tab key then (p, tab)
8e5454761f26 simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents: 35942
diff changeset
   122
      else
8e5454761f26 simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents: 35942
diff changeset
   123
        let val _ = File.append_list cache_path lines
8e5454761f26 simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents: 35942
diff changeset
   124
        in (p+l1+l2+1, Symtab.update (key, (p+1, l1, l2)) tab) end)
50316
wenzelm
parents: 43850
diff changeset
   125
  in {output = err, redirected_output = out, return_code = return_code} end
36086
8e5454761f26 simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents: 35942
diff changeset
   126
8e5454761f26 simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents: 35942
diff changeset
   127
fun run_cached cache make_cmd str =
8e5454761f26 simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents: 35942
diff changeset
   128
  (case lookup cache str of
8e5454761f26 simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents: 35942
diff changeset
   129
    (NONE, key) => run_and_cache cache key make_cmd str
8e5454761f26 simplified Cache_IO interface (input is just a string and not already stored in a file)
boehmes
parents: 35942
diff changeset
   130
  | (SOME output, _) => output)
35151
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
   131
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
   132
end
40425
c9b5e0fcee31 return the process return code along with the process outputs
boehmes
parents: 37740
diff changeset
   133
35151
117247018b54 added Cache_IO: cache for output of external tools,
boehmes
parents:
diff changeset
   134
end