added Cache_IO: cache for output of external tools,
authorboehmes
Tue, 16 Feb 2010 15:25:36 +0100
changeset 35151 117247018b54
parent 35150 082fa4bd403d
child 35152 6007909a28bc
added Cache_IO: cache for output of external tools, changed SMT solver interface to use Cache_IO
etc/components
src/HOL/IsaMakefile
src/HOL/SMT/SMT_Base.thy
src/HOL/SMT/Tools/smt_solver.ML
src/HOL/SMT/lib/scripts/run_smt_solver
src/Tools/Cache_IO/cache_io.ML
src/Tools/Cache_IO/etc/settings
src/Tools/Cache_IO/lib/scripts/compute_hash_key
--- a/etc/components	Tue Feb 16 15:16:33 2010 +0100
+++ b/etc/components	Tue Feb 16 15:25:36 2010 +0100
@@ -13,6 +13,7 @@
 #misc components
 src/Tools/Code
 src/Tools/WWW_Find
+src/Tools/Cache_IO
 src/HOL/Tools/ATP_Manager
 src/HOL/Mirabelle
 src/HOL/Library/Sum_Of_Squares
--- a/src/HOL/IsaMakefile	Tue Feb 16 15:16:33 2010 +0100
+++ b/src/HOL/IsaMakefile	Tue Feb 16 15:25:36 2010 +0100
@@ -1226,7 +1226,8 @@
   SMT/Tools/smt_solver.ML SMT/Tools/cvc3_solver.ML                      \
   SMT/Tools/yices_solver.ML SMT/Tools/z3_proof_terms.ML                 \
   SMT/Tools/z3_proof_rules.ML SMT/Tools/z3_proof.ML                     \
-  SMT/Tools/z3_model.ML SMT/Tools/z3_interface.ML SMT/Tools/z3_solver.ML
+  SMT/Tools/z3_model.ML SMT/Tools/z3_interface.ML			\
+  SMT/Tools/z3_solver.ML $(SRC)/Tools/Cache_IO/cache_io.ML
 	@cd SMT; $(ISABELLE_TOOL) usedir -b -g true $(OUT)/HOL-Word HOL-SMT
 
 
--- a/src/HOL/SMT/SMT_Base.thy	Tue Feb 16 15:16:33 2010 +0100
+++ b/src/HOL/SMT/SMT_Base.thy	Tue Feb 16 15:25:36 2010 +0100
@@ -8,6 +8,7 @@
 imports Real "~~/src/HOL/Word/Word"
   "~~/src/HOL/Decision_Procs/Dense_Linear_Order"
 uses
+  "~~/src/Tools/Cache_IO/cache_io.ML"
   ("Tools/smt_normalize.ML")
   ("Tools/smt_monomorph.ML")
   ("Tools/smt_translate.ML")
--- a/src/HOL/SMT/Tools/smt_solver.ML	Tue Feb 16 15:16:33 2010 +0100
+++ b/src/HOL/SMT/Tools/smt_solver.ML	Tue Feb 16 15:25:36 2010 +0100
@@ -28,8 +28,10 @@
   val with_timeout: Proof.context -> ('a -> 'b) -> 'a -> 'b
   val trace: bool Config.T
   val trace_msg: Proof.context -> ('a -> string) -> 'a -> unit
+
+  (*certificates*)
   val record: bool Config.T
-  val certificates: string Config.T
+  val select_certificates: string -> Context.generic -> Context.generic
 
   (*solvers*)
   type solver = Proof.context -> thm list -> thm
@@ -88,30 +90,29 @@
 fun trace_msg ctxt f x =
   if Config.get ctxt trace then tracing (f x) else ()
 
+
+(* SMT certificates *)
+
 val (record, setup_record) = Attrib.config_bool "smt_record" true
-val no_certificates = ""
-val (certificates, setup_certificates) =
-  Attrib.config_string "smt_certificates" no_certificates
 
+structure Certificates = Generic_Data
+(
+  type T = Cache_IO.cache option
+  val empty = NONE
+  val extend = I
+  fun merge (s, _) = s
+)
+
+fun select_certificates name = Certificates.put (
+  if name = "" then NONE
+  else SOME (Cache_IO.make (Path.explode name)))
 
 
 (* interface to external solvers *)
 
 local
 
-fun with_files ctxt f =
-  let
-    val paths as (problem_path, proof_path) =
-      "smt-" ^ serial_string ()
-      |> (fn n => (n, n ^ ".proof"))
-      |> pairself (File.tmp_path o Path.explode)
-
-    val y = Exn.capture f (problem_path, proof_path)
-
-    val _ = pairself (try File.rm) paths
-  in Exn.release y end
-
-fun invoke ctxt output f (paths as (problem_path, proof_path)) =
+fun invoke ctxt output f problem_path =
   let
     fun pretty tag ls = Pretty.string_of (Pretty.big_list tag
       (map Pretty.str ls))
@@ -120,11 +121,10 @@
     val _ = trace_msg ctxt (pretty "SMT problem:" o split_lines o File.read)
       problem_path
 
-    val (s, _) = with_timeout ctxt f paths
-    val _ = trace_msg ctxt (pretty "SMT solver:") (split_lines s)
+    val (res, err) = with_timeout ctxt f problem_path
+    val _ = trace_msg ctxt (pretty "SMT solver:") err
 
-    fun lines_of path = the_default [] (try (File.fold_lines cons path) [])
-    val ls = rev (dropwhile (equal "") (lines_of proof_path))
+    val ls = rev (dropwhile (equal "") (rev res))
     val _ = trace_msg ctxt (pretty "SMT result:") ls
   in (x, ls) end
 
@@ -135,37 +135,40 @@
     val remote_url = getenv "REMOTE_SMT_URL"
   in
     if local_solver <> ""
-    then (["local", local_solver],
-      "Invoking local SMT solver " ^ quote local_solver ^ " ...")
-    else if remote_solver <> "" andalso remote_url <> ""
-    then (["remote", remote_solver],
-      "Invoking remote SMT solver " ^ quote remote_solver ^ " at " ^
-      quote remote_url ^ " ...")
+    then 
+     (tracing ("Invoking local SMT solver " ^ quote local_solver ^ " ...");
+      [local_solver])
+    else if remote_solver <> ""
+    then
+     (tracing ("Invoking remote SMT solver " ^ quote remote_solver ^ " at " ^
+        quote remote_url ^ " ...");
+      [getenv "REMOTE_SMT", remote_solver])
     else error ("Undefined Isabelle environment variable: " ^ quote env_var)
   end
 
-fun run ctxt cmd args (problem_path, proof_path) =
-  let
-    val certs = Config.get ctxt certificates
-    val certs' = 
-      if certs = no_certificates then "-"
-      else File.shell_path (Path.explode certs)
-    val (solver, msg) =
-      if certs = no_certificates orelse Config.get ctxt record
-      then choose cmd
-      else (["certificate"], "Using certificate from " ^ quote certs' ^ " ...")
-    val _ = tracing msg
+fun make_cmd solver args problem_path proof_path = space_implode " " (
+  map File.shell_quote (solver @ args) @
+  [File.shell_path problem_path, "2>&1", ">", File.shell_path proof_path])
+
+fun no_cmd _ _ = error ("Bad certificates cache: missing certificate")
+
+fun run ctxt cmd args problem_path =
+  let val certs = Certificates.get (Context.Proof ctxt)
   in
-    bash_output (space_implode " " (
-      File.shell_path (Path.explode (getenv "RUN_SMT_SOLVER")) :: certs' ::
-      map File.shell_quote (solver @ args) @
-      map File.shell_path [problem_path, proof_path]) ^ " 2>&1")
+    if is_none certs 
+    then Cache_IO.run' (make_cmd (choose cmd) args) problem_path
+    else if Config.get ctxt record
+    then Cache_IO.cached' (the certs) (make_cmd (choose cmd) args) problem_path
+    else
+     (tracing ("Using cached certificate from " ^
+        File.shell_path (Cache_IO.cache_path_of (the certs)) ^ " ...");
+      Cache_IO.cached' (the certs) no_cmd problem_path)
   end
 
 in
 
 fun run_solver ctxt cmd args output =
-  with_files ctxt (invoke ctxt output (run ctxt cmd args))
+  Cache_IO.with_tmp_file "smt-" (invoke ctxt output (run ctxt cmd args))
 
 end
 
@@ -278,7 +281,10 @@
   setup_timeout #>
   setup_trace #>
   setup_record #>
-  setup_certificates #>
+  Attrib.setup (Binding.name "smt_certificates")
+    (Scan.lift (OuterParse.$$$ "=" |-- Args.name) >>
+      (Thm.declaration_attribute o K o select_certificates))
+    "SMT certificates" #>
   Method.setup (Binding.name "smt") smt_method
     "Applies an SMT solver to the current goal."
 
--- a/src/HOL/SMT/lib/scripts/run_smt_solver	Tue Feb 16 15:16:33 2010 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,93 +0,0 @@
-#!/usr/bin/env perl
-#
-# Author: Sascha Boehme, TU Muenchen
-#
-# Cache for prover results, based on discriminating problems using MD5.
-
-use strict;
-use warnings;
-use Digest::MD5;
-
-
-# arguments
-
-my $certs_file = shift @ARGV;
-my $location = shift @ARGV;
-my $result_file = pop @ARGV;
-my $problem_file = $ARGV[-1];
-
-my $use_certs = not ($certs_file eq "-");
-
-
-# create MD5 problem digest
-
-my $problem_digest = "";
-if ($use_certs) {
-  my $md5 = Digest::MD5->new;
-  open FILE, "<$problem_file" or
-    die "ERROR: Failed to open '$problem_file' ($!)";
-  $md5->addfile(*FILE);
-  close FILE;
-  $problem_digest = $md5->b64digest;
-}
-
-
-# lookup problem digest among existing certificates and
-# return a cached result (if there is a certificate for the problem)
-
-if ($use_certs and -e $certs_file) {
-  my $cached = 0;
-  open CERTS, "<$certs_file" or die "ERROR: Failed to open '$certs_file' ($!)";
-  while (<CERTS>) {
-    if (m/(\S+) (\d+)/) {
-      if ($1 eq $problem_digest) {
-        my $start = tell CERTS;
-        open FILE, ">$result_file" or
-          die "ERROR: Failed to open '$result_file ($!)";
-        while ((tell CERTS) - $start < $2) {
-          my $line = <CERTS>;
-          print FILE $line;
-        }
-        close FILE;
-        $cached = 1;
-        last;
-      }
-      else { seek CERTS, $2, 1; }
-    }
-    else { die "ERROR: Invalid file format in '$certs_file'"; }
-  }
-  close CERTS;
-  if ($cached) { exit 0; }
-}
-
-
-# invoke (remote or local) prover
-
-my $result;
-
-if ($location eq "remote") {
-  $result = system "$ENV{'REMOTE_SMT'} @ARGV >$result_file 2>&1";
-}
-elsif ($location eq "local") {
-  $result = system "@ARGV >$result_file 2>&1";
-}
-else { die "ERROR: No SMT solver invoked"; }
-
-
-# cache result
-
-if ($use_certs) {
-  open CERTS, ">>$certs_file" or
-    die "ERROR: Failed to open '$certs_file' ($!)";
-  print CERTS
-    ("$problem_digest " . ((-s $result_file) + (length "\n")) . "\n");
-
-  open FILE, "<$result_file" or
-    die "ERROR: Failed to open '$result_file' ($!)";
-  foreach (<FILE>) { print CERTS $_; }
-  close FILE; 
-
-  print CERTS "\n";
-  close CERTS;
-}
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Tools/Cache_IO/cache_io.ML	Tue Feb 16 15:25:36 2010 +0100
@@ -0,0 +1,121 @@
+(*  Title:      Tools/Cache_IO/cache_io.ML
+    Author:     Sascha Boehme, TU Muenchen
+
+Cache for output of external processes.
+*)
+
+signature CACHE_IO =
+sig
+  val with_tmp_file: string -> (Path.T -> 'a) -> 'a
+  val run: (Path.T -> string) -> Path.T -> string list
+  val run': (Path.T -> Path.T -> string) -> Path.T -> string list * string list
+
+  type cache
+  val make: Path.T -> cache
+  val cache_path_of: cache -> Path.T
+  val cached: cache -> (Path.T -> string) -> Path.T -> string list
+  val cached': cache -> (Path.T -> Path.T -> string) -> Path.T ->
+    string list * string list
+end
+
+structure Cache_IO : CACHE_IO =
+struct
+
+fun with_tmp_file name f =
+  let
+    val path = File.tmp_path (Path.explode (name ^ serial_string ()))
+    val x = Exn.capture f path
+    val _ = try File.rm path
+  in Exn.release x end
+
+fun run' make_cmd in_path =
+  with_tmp_file "cache-io-" (fn out_path =>
+    let
+      val (out2, _) = bash_output (make_cmd in_path out_path)
+      val out1 = the_default [] (try (rev o File.fold_lines cons out_path) [])
+    in (out1, split_lines out2) end)
+
+fun run make_cmd = snd o run' (fn in_path => fn _ => make_cmd in_path)
+
+
+
+abstype cache = Cache of {
+  path: Path.T,
+  table: (int * (int * int * int) Symtab.table) Synchronized.var }
+with
+
+
+fun cache_path_of (Cache {path, ...}) = path
+
+
+fun load cache_path =
+  let
+    fun err () = error ("Cache IO: corrupted cache file: " ^
+      File.shell_path cache_path)
+
+    fun int_of_string s =
+      (case read_int (explode s) of
+        (i, []) => i
+      | _ => err ())    
+
+    fun split line =
+      (case space_explode " " line of
+        [key, len1, len2] => (key, int_of_string len1, int_of_string len2)
+      | _ => err ())
+
+    fun parse line ((i, l), tab) =
+      if i = l
+      then
+        let val (key, l1, l2) = split line
+        in ((i+1, l+l1+l2+1), Symtab.update (key, (i+1, l1, l2)) tab) end
+      else ((i+1, l), tab)
+  in apfst fst (File.fold_lines parse cache_path ((1, 1), Symtab.empty)) end 
+
+fun make path =
+  let val table = if File.exists path then load path else (1, Symtab.empty)
+  in Cache {path=path, table=Synchronized.var (Path.implode path) table} end
+
+
+fun get_hash_key path =
+  let
+    val arg = File.shell_path path
+    val (out, res) = bash_output (getenv "COMPUTE_HASH_KEY" ^ " " ^ arg)
+  in
+    if res = 0 then hd (split_lines out)
+    else error ("Cache IO: failed to generate hash key for file " ^ arg)
+  end
+
+fun load_cached_result cache_path (p, len1, len2) =
+  let
+    fun load line (i, xsp) =
+      if i < p then (i+1, xsp)
+      else if i < p + len1 then (i+1, apfst (cons line) xsp)
+      else if i < p + len2 then (i+1, apsnd (cons line) xsp)
+      else (i, xsp)
+  in pairself rev (snd (File.fold_lines load cache_path (1, ([], [])))) end
+
+fun cached' (Cache {path=cache_path, table}) make_cmd in_path =
+  let val key = get_hash_key in_path
+  in
+    (case Symtab.lookup (snd (Synchronized.value table)) key of
+      SOME pos => load_cached_result cache_path pos
+    | NONE =>
+        let
+          val res as (out, err) = run' make_cmd in_path
+          val (l1, l2) = pairself length res
+          val header = key ^ " " ^ string_of_int l1 ^ " " ^ string_of_int l2
+          val lines = map (suffix "\n") (header :: out @ err)
+
+          val _ = Synchronized.change table (fn (p, tab) =>
+            if Symtab.defined tab key then (p, tab)
+            else
+              let val _ = File.append_list cache_path lines
+              in (p+l1+l2+1, Symtab.update (key, (p+1, l1, l2)) tab) end)
+        in res end)
+  end
+
+fun cached cache make_cmd =
+  snd o cached' cache (fn in_path => fn _ => make_cmd in_path)
+
+end
+end
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Tools/Cache_IO/etc/settings	Tue Feb 16 15:25:36 2010 +0100
@@ -0,0 +1,4 @@
+ISABELLE_CACHE_IO="$COMPONENT"
+
+COMPUTE_HASH_KEY="$ISABELLE_CACHE_IO/lib/scripts/compute_hash_key"
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Tools/Cache_IO/lib/scripts/compute_hash_key	Tue Feb 16 15:25:36 2010 +0100
@@ -0,0 +1,24 @@
+#!/usr/bin/env perl
+#
+# Author: Sascha Boehme, TU Muenchen
+#
+# Compute MD5 hash key.
+
+use strict;
+use warnings;
+use Digest::MD5;
+
+
+# argument
+
+my $file = $ARGV[0];
+
+
+# compute MD5 hash key
+
+my $md5 = Digest::MD5->new;
+open FILE, "<$file" or die "ERROR: Failed to open '$file' ($!)";
+$md5->addfile(*FILE);
+close FILE;
+print $md5->b64digest . "\n";
+