handle ATP proof delimiters in a cleaner, more extensible fashion
authorblanchet
Fri, 23 Apr 2010 13:16:50 +0200
changeset 36369 d2cd0d04b8e6
parent 36294 59a55dfa76d5
child 36370 a4f601daa175
handle ATP proof delimiters in a cleaner, more extensible fashion
src/HOL/Tools/ATP_Manager/atp_manager.ML
src/HOL/Tools/ATP_Manager/atp_minimal.ML
src/HOL/Tools/ATP_Manager/atp_wrapper.ML
src/HOL/Tools/Sledgehammer/sledgehammer_proof_reconstruct.ML
--- a/src/HOL/Tools/ATP_Manager/atp_manager.ML	Fri Apr 23 12:24:30 2010 +0200
+++ b/src/HOL/Tools/ATP_Manager/atp_manager.ML	Fri Apr 23 13:16:50 2010 +0200
@@ -39,6 +39,7 @@
      message: string,
      relevant_thm_names: string list,
      atp_run_time_in_msecs: int,
+     output: string,
      proof: string,
      internal_thm_names: string Vector.vector,
      filtered_clauses: (thm * (string * int)) list}
@@ -99,6 +100,7 @@
    message: string,
    relevant_thm_names: string list,
    atp_run_time_in_msecs: int,
+   output: string,
    proof: string,
    internal_thm_names: string Vector.vector,
    filtered_clauses: (thm * (string * int)) list};
--- a/src/HOL/Tools/ATP_Manager/atp_minimal.ML	Fri Apr 23 12:24:30 2010 +0200
+++ b/src/HOL/Tools/ATP_Manager/atp_minimal.ML	Fri Apr 23 13:16:50 2010 +0200
@@ -45,6 +45,7 @@
    | Timeout => "Timeout"
    | Error => "Error"
 
+(* FIXME: move to "atp_wrapper.ML" *)
 val failure_strings =
   [("SPASS beiseite: Ran out of time.", Timeout),
    ("Timeout", Timeout),
@@ -52,11 +53,11 @@
    ("# Cannot determine problem status within resource limit", Timeout),
    ("Error", Error)]
 
-fun outcome_of_result (result as {success, proof, ...} : prover_result) =
+fun outcome_of_result (result as {success, output, ...} : prover_result) =
   if success then
     Success
   else case get_first (fn (s, outcome) =>
-                          if String.isSubstring s proof then SOME outcome
+                          if String.isSubstring s output then SOME outcome
                           else NONE) failure_strings of
     SOME outcome => outcome
   | NONE => Failure
--- a/src/HOL/Tools/ATP_Manager/atp_wrapper.ML	Fri Apr 23 12:24:30 2010 +0200
+++ b/src/HOL/Tools/ATP_Manager/atp_wrapper.ML	Fri Apr 23 13:16:50 2010 +0200
@@ -49,6 +49,7 @@
  {home: string,
   executable: string,
   arguments: Time.time -> string,
+  proof_delims: (string * string) list,
   known_failures: (string list * string) list,
   max_new_clauses: int,
   prefers_theory_relevant: bool};
@@ -60,21 +61,31 @@
   Exn.capture f path
   |> tap (fn _ => cleanup path)
   |> Exn.release
-  |> tap (after path);
+  |> tap (after path)
+
+(* Splits by the first possible of a list of delimiters. *)
+fun extract_proof delims output =
+  case pairself (find_first (fn s => String.isSubstring s output))
+                (ListPair.unzip delims) of
+    (SOME begin_delim, SOME end_delim) =>
+    output |> first_field begin_delim |> the |> snd
+           |> first_field end_delim |> the |> fst
+  | _ => ""
 
-fun find_known_failure known_failures proof =
-  case map_filter (fn (patterns, message) =>
-                      if exists (fn pattern => String.isSubstring pattern proof)
-                                patterns then
-                        SOME message
-                      else
-                        NONE) known_failures of
-    [] => if is_proof_well_formed proof then ""
-          else "Error: The ATP output is ill-formed."
-  | (message :: _) => message
+fun extract_proof_or_failure proof_delims known_failures output =
+  case map_filter
+           (fn (patterns, message) =>
+               if exists (fn p => String.isSubstring p output) patterns then
+                 SOME message
+               else
+                 NONE) known_failures of
+    [] => (case extract_proof proof_delims output of
+             "" => ("", "Error: The ATP output is malformed.")
+           | proof => (proof, ""))
+  | (message :: _) => ("", message)
 
 fun generic_prover overlord get_facts prepare write_file home executable args
-        known_failures name
+        proof_delims known_failures name
         ({debug, full_types, explicit_apply, isar_proof, modulus, sorts, ...}
          : params) minimize_command
         ({subgoal, goal, relevance_override, axiom_clauses, filtered_clauses}
@@ -135,14 +146,14 @@
     fun split_time s =
       let
         val split = String.tokens (fn c => str c = "\n");
-        val (proof, t) = s |> split |> split_last |> apfst cat_lines;
+        val (output, t) = s |> split |> split_last |> apfst cat_lines;
         fun as_num f = f >> (fst o read_int);
         val num = as_num (Scan.many1 Symbol.is_ascii_digit);
         val digit = Scan.one Symbol.is_ascii_digit;
         val num3 = as_num (digit ::: digit ::: (digit >> single));
         val time = num --| Scan.$$ "." -- num3 >> (fn (a, b) => a * 1000 + b);
         val as_time = the_default 0 o Scan.read Symbol.stopper time o explode;
-      in (proof, as_time t) end;
+      in (output, as_time t) end;
     fun split_time' s =
       if Config.get ctxt measure_runtime then split_time s else (s, 0)
     fun run_on probfile =
@@ -154,7 +165,7 @@
     (* If the problem file has not been exported, remove it; otherwise, export
        the proof file too. *)
     fun cleanup probfile = if destdir' = "" then try File.rm probfile else NONE;
-    fun export probfile (((proof, _), _), _) =
+    fun export probfile (((output, _), _), _) =
       if destdir' = "" then
         ()
       else
@@ -163,14 +174,15 @@
                        "% " ^ command_line probfile ^ "\n% " ^ timestamp () ^
                        "\n"
                      else
-                        "") ^ proof)
+                        "") ^ output)
 
-    val (((proof, atp_run_time_in_msecs), rc), _) =
+    val (((output, atp_run_time_in_msecs), rc), _) =
       with_path cleanup export run_on (prob_pathname subgoal);
 
     (* Check for success and print out some information on failure. *)
-    val failure = find_known_failure known_failures proof;
-    val success = rc = 0 andalso failure = "";
+    val (proof, failure) =
+      extract_proof_or_failure proof_delims known_failures output
+    val success = (rc = 0 andalso failure = "")
     val (message, relevant_thm_names) =
       if success then
         proof_text isar_proof debug modulus sorts ctxt
@@ -178,12 +190,12 @@
       else if failure <> "" then
         (failure ^ "\n", [])
       else
-        ("Unknown ATP error: " ^ proof ^ ".\n", [])
+        ("Unknown ATP error: " ^ output ^ ".\n", [])
   in
     {success = success, message = message,
      relevant_thm_names = relevant_thm_names,
-     atp_run_time_in_msecs = atp_run_time_in_msecs, proof = proof,
-     internal_thm_names = internal_thm_names,
+     atp_run_time_in_msecs = atp_run_time_in_msecs, output = output,
+     proof = proof, internal_thm_names = internal_thm_names,
      filtered_clauses = the_filtered_clauses}
   end;
 
@@ -191,8 +203,8 @@
 (* generic TPTP-based provers *)
 
 fun generic_tptp_prover
-        (name, {home, executable, arguments, known_failures, max_new_clauses,
-                prefers_theory_relevant})
+        (name, {home, executable, arguments, proof_delims, known_failures,
+                max_new_clauses, prefers_theory_relevant})
         (params as {debug, overlord, respect_no_atp, relevance_threshold,
                     convergence, theory_relevant, higher_order, follow_defs,
                     isar_proof, ...})
@@ -203,7 +215,8 @@
                           (the_default prefers_theory_relevant theory_relevant))
       (prepare_clauses higher_order false)
       (write_tptp_file (debug andalso overlord andalso not isar_proof)) home
-      executable (arguments timeout) known_failures name params minimize_command
+      executable (arguments timeout) proof_delims known_failures name params
+      minimize_command
 
 fun tptp_prover name p = (name, generic_tptp_prover (name, p));
 
@@ -221,6 +234,8 @@
    executable = "vampire",
    arguments = (fn timeout => "--output_syntax tptp --mode casc -t " ^
                               string_of_int (generous_to_secs timeout)),
+   proof_delims = [("=========== Refutation ==========",
+                    "======= End of refutation =======")],
    known_failures =
      [(["Satisfiability detected", "CANNOT PROVE"],
        "The ATP problem is unprovable."),
@@ -233,12 +248,16 @@
 
 (* E prover *)
 
+val tstp_proof_delims =
+  ("# SZS output start CNFRefutation.", "# SZS output end CNFRefutation")
+
 val e_config : prover_config =
   {home = getenv "E_HOME",
    executable = "eproof",
    arguments = (fn timeout => "--tstp-in --tstp-out -l5 -xAutoDev \
                               \-tAutoDev --silent --cpu-limit=" ^
                               string_of_int (generous_to_secs timeout)),
+   proof_delims = [tstp_proof_delims],
    known_failures =
        [(["SZS status: Satisfiable", "SZS status Satisfiable"],
          "The ATP problem is unprovable."),
@@ -254,8 +273,8 @@
 (* SPASS *)
 
 fun generic_dfg_prover
-        (name, {home, executable, arguments, known_failures, max_new_clauses,
-                prefers_theory_relevant})
+        (name, {home, executable, arguments, proof_delims, known_failures,
+                max_new_clauses, prefers_theory_relevant})
         (params as {overlord, respect_no_atp, relevance_threshold, convergence,
                     theory_relevant, higher_order, follow_defs, ...})
         minimize_command timeout =
@@ -264,7 +283,8 @@
                           higher_order follow_defs max_new_clauses
                           (the_default prefers_theory_relevant theory_relevant))
       (prepare_clauses higher_order true) write_dfg_file home executable
-      (arguments timeout) known_failures name params minimize_command
+      (arguments timeout) proof_delims known_failures name params
+      minimize_command
 
 fun dfg_prover name p = (name, generic_dfg_prover (name, p))
 
@@ -276,6 +296,7 @@
    arguments = (fn timeout => "-Auto -SOS=1 -PGiven=0 -PProblem=0 -Splits=0" ^
      " -FullRed=0 -DocProof -VarWeight=3 -TimeLimit=" ^
      string_of_int (generous_to_secs timeout)),
+   proof_delims = [("Here is a proof", "Formulae used in the proof")],
    known_failures =
      [(["SPASS beiseite: Completion found."], "The ATP problem is unprovable."),
       (["SPASS beiseite: Ran out of time."], "The ATP timed out."),
@@ -297,6 +318,7 @@
   {home = #home spass_config,
    executable = #executable spass_config,
    arguments = prefix "-TPTP " o #arguments spass_config,
+   proof_delims = #proof_delims spass_config,
    known_failures =
      #known_failures spass_config @
      [(["unrecognized option `-TPTP'", "Unrecognized option TPTP"],
@@ -343,13 +365,14 @@
     "Error: The remote ATP proof is ill-formed.")]
 
 fun remote_prover_config prover_prefix args
-        ({known_failures, max_new_clauses, prefers_theory_relevant, ...}
-         : prover_config) : prover_config =
+        ({proof_delims, known_failures, max_new_clauses,
+          prefers_theory_relevant, ...} : prover_config) : prover_config =
   {home = getenv "ISABELLE_ATP_MANAGER",
    executable = "SystemOnTPTP",
    arguments = (fn timeout =>
      args ^ " -t " ^ string_of_int (generous_to_secs timeout) ^ " -s " ^
      the_system prover_prefix),
+   proof_delims = insert (op =) tstp_proof_delims proof_delims,
    known_failures = remote_known_failures @ known_failures,
    max_new_clauses = max_new_clauses,
    prefers_theory_relevant = prefers_theory_relevant}
--- a/src/HOL/Tools/Sledgehammer/sledgehammer_proof_reconstruct.ML	Fri Apr 23 12:24:30 2010 +0200
+++ b/src/HOL/Tools/Sledgehammer/sledgehammer_proof_reconstruct.ML	Fri Apr 23 13:16:50 2010 +0200
@@ -14,7 +14,6 @@
   val num_typargs: theory -> string -> int
   val make_tvar: string -> typ
   val strip_prefix: string -> string -> string option
-  val is_proof_well_formed: string -> bool
   val metis_line: int -> int -> string list -> string
   val metis_proof_text:
     minimize_command * string * string vector * thm * int
@@ -69,7 +68,7 @@
 fun is_digit s = Char.isDigit (String.sub(s,0));
 val integer = Scan.many1 is_digit >> (the o Int.fromString o implode);
 
-(* needed for SPASS's nonstandard output format *)
+(* needed for SPASS's output format *)
 fun fix_symbol "equal" = "c_equal"
   | fix_symbol s = s
 
@@ -521,47 +520,22 @@
   handle STREE _ => raise Fail "Cannot parse ATP output";
 
 
-(*=== EXTRACTING PROOF-TEXT === *)
-
-val begin_proof_strs = ["# SZS output start CNFRefutation.",
-  "=========== Refutation ==========",
-  "Here is a proof"];
-
-val end_proof_strs = ["# SZS output end CNFRefutation",
-  "======= End of refutation =======",
-  "Formulae used in the proof"];
-
-fun get_proof_extract proof =
-  (* Splits by the first possible of a list of splitters. *)
-  case pairself (find_first (fn s => String.isSubstring s proof))
-                (begin_proof_strs, end_proof_strs) of
-    (SOME begin_string, SOME end_string) =>
-    proof |> first_field begin_string |> the |> snd
-          |> first_field end_string |> the |> fst
-  | _ => raise Fail "Cannot extract proof"
-
-(* ==== CHECK IF PROOF WAS SUCCESSFUL === *)
-
-fun is_proof_well_formed proof =
-  forall (exists (fn s => String.isSubstring s proof))
-         [begin_proof_strs, end_proof_strs]
-
 (* === EXTRACTING LEMMAS === *)
 (* A list consisting of the first number in each line is returned.
    TPTP: Interesting lines have the form "cnf(108, axiom, ...)", where the
    number (108) is extracted.
    DFG: Lines have the form "108[0:Inp] ...", where the first number (108) is
    extracted. *)
-fun get_step_nums proof_extract =
+fun get_step_nums proof =
   let
     val toks = String.tokens (not o is_ident_char)
     fun inputno ("cnf" :: ntok :: "axiom" :: _) = Int.fromString ntok
       | inputno ("cnf" :: ntok :: "negated_conjecture" :: _) =
         Int.fromString ntok
-      | inputno (ntok::"0"::"Inp"::_) = Int.fromString ntok  (* DFG format *)
+      | inputno (ntok :: "0" :: "Inp" :: _) =
+        Int.fromString ntok  (* SPASS's output format *)
       | inputno _ = NONE
-    val lines = split_lines proof_extract
-  in map_filter (inputno o toks) lines end
+  in map_filter (inputno o toks) (split_lines proof) end
   
 (*Used to label theorems chained into the sledgehammer call*)
 val chained_hint = "CHAINED";
@@ -588,8 +562,7 @@
 fun metis_proof_text (minimize_command, proof, thm_names, goal, i) =
   let
     val lemmas =
-      proof |> get_proof_extract
-            |> get_step_nums
+      proof |> get_step_nums
             |> filter (is_axiom thm_names)
             |> map (fn i => Vector.sub (thm_names, i - 1))
             |> filter (fn x => x <> "??.unknown")
@@ -625,8 +598,7 @@
 fun isar_proof_text debug modulus sorts ctxt
                     (minimize_command, proof, thm_names, goal, i) =
   let
-    val cnfs = proof |> get_proof_extract |> split_lines |> map strip_spaces
-                     |> filter is_proof_line
+    val cnfs = proof |> split_lines |> map strip_spaces |> filter is_proof_line
     val (one_line_proof, lemma_names) =
       metis_proof_text (minimize_command, proof, thm_names, goal, i)
     val tokens = String.tokens (fn c => c = #" ") one_line_proof