handle ATP proof delimiters in a cleaner, more extensible fashion
authorblanchet
Fri Apr 23 13:16:50 2010 +0200 (2010-04-23)
changeset 36369d2cd0d04b8e6
parent 36294 59a55dfa76d5
child 36370 a4f601daa175
handle ATP proof delimiters in a cleaner, more extensible fashion
src/HOL/Tools/ATP_Manager/atp_manager.ML
src/HOL/Tools/ATP_Manager/atp_minimal.ML
src/HOL/Tools/ATP_Manager/atp_wrapper.ML
src/HOL/Tools/Sledgehammer/sledgehammer_proof_reconstruct.ML
     1.1 --- a/src/HOL/Tools/ATP_Manager/atp_manager.ML	Fri Apr 23 12:24:30 2010 +0200
     1.2 +++ b/src/HOL/Tools/ATP_Manager/atp_manager.ML	Fri Apr 23 13:16:50 2010 +0200
     1.3 @@ -39,6 +39,7 @@
     1.4       message: string,
     1.5       relevant_thm_names: string list,
     1.6       atp_run_time_in_msecs: int,
     1.7 +     output: string,
     1.8       proof: string,
     1.9       internal_thm_names: string Vector.vector,
    1.10       filtered_clauses: (thm * (string * int)) list}
    1.11 @@ -99,6 +100,7 @@
    1.12     message: string,
    1.13     relevant_thm_names: string list,
    1.14     atp_run_time_in_msecs: int,
    1.15 +   output: string,
    1.16     proof: string,
    1.17     internal_thm_names: string Vector.vector,
    1.18     filtered_clauses: (thm * (string * int)) list};
     2.1 --- a/src/HOL/Tools/ATP_Manager/atp_minimal.ML	Fri Apr 23 12:24:30 2010 +0200
     2.2 +++ b/src/HOL/Tools/ATP_Manager/atp_minimal.ML	Fri Apr 23 13:16:50 2010 +0200
     2.3 @@ -45,6 +45,7 @@
     2.4     | Timeout => "Timeout"
     2.5     | Error => "Error"
     2.6  
     2.7 +(* FIXME: move to "atp_wrapper.ML" *)
     2.8  val failure_strings =
     2.9    [("SPASS beiseite: Ran out of time.", Timeout),
    2.10     ("Timeout", Timeout),
    2.11 @@ -52,11 +53,11 @@
    2.12     ("# Cannot determine problem status within resource limit", Timeout),
    2.13     ("Error", Error)]
    2.14  
    2.15 -fun outcome_of_result (result as {success, proof, ...} : prover_result) =
    2.16 +fun outcome_of_result (result as {success, output, ...} : prover_result) =
    2.17    if success then
    2.18      Success
    2.19    else case get_first (fn (s, outcome) =>
    2.20 -                          if String.isSubstring s proof then SOME outcome
    2.21 +                          if String.isSubstring s output then SOME outcome
    2.22                            else NONE) failure_strings of
    2.23      SOME outcome => outcome
    2.24    | NONE => Failure
     3.1 --- a/src/HOL/Tools/ATP_Manager/atp_wrapper.ML	Fri Apr 23 12:24:30 2010 +0200
     3.2 +++ b/src/HOL/Tools/ATP_Manager/atp_wrapper.ML	Fri Apr 23 13:16:50 2010 +0200
     3.3 @@ -49,6 +49,7 @@
     3.4   {home: string,
     3.5    executable: string,
     3.6    arguments: Time.time -> string,
     3.7 +  proof_delims: (string * string) list,
     3.8    known_failures: (string list * string) list,
     3.9    max_new_clauses: int,
    3.10    prefers_theory_relevant: bool};
    3.11 @@ -60,21 +61,31 @@
    3.12    Exn.capture f path
    3.13    |> tap (fn _ => cleanup path)
    3.14    |> Exn.release
    3.15 -  |> tap (after path);
    3.16 +  |> tap (after path)
    3.17 +
    3.18 +(* Splits by the first possible of a list of delimiters. *)
    3.19 +fun extract_proof delims output =
    3.20 +  case pairself (find_first (fn s => String.isSubstring s output))
    3.21 +                (ListPair.unzip delims) of
    3.22 +    (SOME begin_delim, SOME end_delim) =>
    3.23 +    output |> first_field begin_delim |> the |> snd
    3.24 +           |> first_field end_delim |> the |> fst
    3.25 +  | _ => ""
    3.26  
    3.27 -fun find_known_failure known_failures proof =
    3.28 -  case map_filter (fn (patterns, message) =>
    3.29 -                      if exists (fn pattern => String.isSubstring pattern proof)
    3.30 -                                patterns then
    3.31 -                        SOME message
    3.32 -                      else
    3.33 -                        NONE) known_failures of
    3.34 -    [] => if is_proof_well_formed proof then ""
    3.35 -          else "Error: The ATP output is ill-formed."
    3.36 -  | (message :: _) => message
    3.37 +fun extract_proof_or_failure proof_delims known_failures output =
    3.38 +  case map_filter
    3.39 +           (fn (patterns, message) =>
    3.40 +               if exists (fn p => String.isSubstring p output) patterns then
    3.41 +                 SOME message
    3.42 +               else
    3.43 +                 NONE) known_failures of
    3.44 +    [] => (case extract_proof proof_delims output of
    3.45 +             "" => ("", "Error: The ATP output is malformed.")
    3.46 +           | proof => (proof, ""))
    3.47 +  | (message :: _) => ("", message)
    3.48  
    3.49  fun generic_prover overlord get_facts prepare write_file home executable args
    3.50 -        known_failures name
    3.51 +        proof_delims known_failures name
    3.52          ({debug, full_types, explicit_apply, isar_proof, modulus, sorts, ...}
    3.53           : params) minimize_command
    3.54          ({subgoal, goal, relevance_override, axiom_clauses, filtered_clauses}
    3.55 @@ -135,14 +146,14 @@
    3.56      fun split_time s =
    3.57        let
    3.58          val split = String.tokens (fn c => str c = "\n");
    3.59 -        val (proof, t) = s |> split |> split_last |> apfst cat_lines;
    3.60 +        val (output, t) = s |> split |> split_last |> apfst cat_lines;
    3.61          fun as_num f = f >> (fst o read_int);
    3.62          val num = as_num (Scan.many1 Symbol.is_ascii_digit);
    3.63          val digit = Scan.one Symbol.is_ascii_digit;
    3.64          val num3 = as_num (digit ::: digit ::: (digit >> single));
    3.65          val time = num --| Scan.$$ "." -- num3 >> (fn (a, b) => a * 1000 + b);
    3.66          val as_time = the_default 0 o Scan.read Symbol.stopper time o explode;
    3.67 -      in (proof, as_time t) end;
    3.68 +      in (output, as_time t) end;
    3.69      fun split_time' s =
    3.70        if Config.get ctxt measure_runtime then split_time s else (s, 0)
    3.71      fun run_on probfile =
    3.72 @@ -154,7 +165,7 @@
    3.73      (* If the problem file has not been exported, remove it; otherwise, export
    3.74         the proof file too. *)
    3.75      fun cleanup probfile = if destdir' = "" then try File.rm probfile else NONE;
    3.76 -    fun export probfile (((proof, _), _), _) =
    3.77 +    fun export probfile (((output, _), _), _) =
    3.78        if destdir' = "" then
    3.79          ()
    3.80        else
    3.81 @@ -163,14 +174,15 @@
    3.82                         "% " ^ command_line probfile ^ "\n% " ^ timestamp () ^
    3.83                         "\n"
    3.84                       else
    3.85 -                        "") ^ proof)
    3.86 +                        "") ^ output)
    3.87  
    3.88 -    val (((proof, atp_run_time_in_msecs), rc), _) =
    3.89 +    val (((output, atp_run_time_in_msecs), rc), _) =
    3.90        with_path cleanup export run_on (prob_pathname subgoal);
    3.91  
    3.92      (* Check for success and print out some information on failure. *)
    3.93 -    val failure = find_known_failure known_failures proof;
    3.94 -    val success = rc = 0 andalso failure = "";
    3.95 +    val (proof, failure) =
    3.96 +      extract_proof_or_failure proof_delims known_failures output
    3.97 +    val success = (rc = 0 andalso failure = "")
    3.98      val (message, relevant_thm_names) =
    3.99        if success then
   3.100          proof_text isar_proof debug modulus sorts ctxt
   3.101 @@ -178,12 +190,12 @@
   3.102        else if failure <> "" then
   3.103          (failure ^ "\n", [])
   3.104        else
   3.105 -        ("Unknown ATP error: " ^ proof ^ ".\n", [])
   3.106 +        ("Unknown ATP error: " ^ output ^ ".\n", [])
   3.107    in
   3.108      {success = success, message = message,
   3.109       relevant_thm_names = relevant_thm_names,
   3.110 -     atp_run_time_in_msecs = atp_run_time_in_msecs, proof = proof,
   3.111 -     internal_thm_names = internal_thm_names,
   3.112 +     atp_run_time_in_msecs = atp_run_time_in_msecs, output = output,
   3.113 +     proof = proof, internal_thm_names = internal_thm_names,
   3.114       filtered_clauses = the_filtered_clauses}
   3.115    end;
   3.116  
   3.117 @@ -191,8 +203,8 @@
   3.118  (* generic TPTP-based provers *)
   3.119  
   3.120  fun generic_tptp_prover
   3.121 -        (name, {home, executable, arguments, known_failures, max_new_clauses,
   3.122 -                prefers_theory_relevant})
   3.123 +        (name, {home, executable, arguments, proof_delims, known_failures,
   3.124 +                max_new_clauses, prefers_theory_relevant})
   3.125          (params as {debug, overlord, respect_no_atp, relevance_threshold,
   3.126                      convergence, theory_relevant, higher_order, follow_defs,
   3.127                      isar_proof, ...})
   3.128 @@ -203,7 +215,8 @@
   3.129                            (the_default prefers_theory_relevant theory_relevant))
   3.130        (prepare_clauses higher_order false)
   3.131        (write_tptp_file (debug andalso overlord andalso not isar_proof)) home
   3.132 -      executable (arguments timeout) known_failures name params minimize_command
   3.133 +      executable (arguments timeout) proof_delims known_failures name params
   3.134 +      minimize_command
   3.135  
   3.136  fun tptp_prover name p = (name, generic_tptp_prover (name, p));
   3.137  
   3.138 @@ -221,6 +234,8 @@
   3.139     executable = "vampire",
   3.140     arguments = (fn timeout => "--output_syntax tptp --mode casc -t " ^
   3.141                                string_of_int (generous_to_secs timeout)),
   3.142 +   proof_delims = [("=========== Refutation ==========",
   3.143 +                    "======= End of refutation =======")],
   3.144     known_failures =
   3.145       [(["Satisfiability detected", "CANNOT PROVE"],
   3.146         "The ATP problem is unprovable."),
   3.147 @@ -233,12 +248,16 @@
   3.148  
   3.149  (* E prover *)
   3.150  
   3.151 +val tstp_proof_delims =
   3.152 +  ("# SZS output start CNFRefutation.", "# SZS output end CNFRefutation")
   3.153 +
   3.154  val e_config : prover_config =
   3.155    {home = getenv "E_HOME",
   3.156     executable = "eproof",
   3.157     arguments = (fn timeout => "--tstp-in --tstp-out -l5 -xAutoDev \
   3.158                                \-tAutoDev --silent --cpu-limit=" ^
   3.159                                string_of_int (generous_to_secs timeout)),
   3.160 +   proof_delims = [tstp_proof_delims],
   3.161     known_failures =
   3.162         [(["SZS status: Satisfiable", "SZS status Satisfiable"],
   3.163           "The ATP problem is unprovable."),
   3.164 @@ -254,8 +273,8 @@
   3.165  (* SPASS *)
   3.166  
   3.167  fun generic_dfg_prover
   3.168 -        (name, {home, executable, arguments, known_failures, max_new_clauses,
   3.169 -                prefers_theory_relevant})
   3.170 +        (name, {home, executable, arguments, proof_delims, known_failures,
   3.171 +                max_new_clauses, prefers_theory_relevant})
   3.172          (params as {overlord, respect_no_atp, relevance_threshold, convergence,
   3.173                      theory_relevant, higher_order, follow_defs, ...})
   3.174          minimize_command timeout =
   3.175 @@ -264,7 +283,8 @@
   3.176                            higher_order follow_defs max_new_clauses
   3.177                            (the_default prefers_theory_relevant theory_relevant))
   3.178        (prepare_clauses higher_order true) write_dfg_file home executable
   3.179 -      (arguments timeout) known_failures name params minimize_command
   3.180 +      (arguments timeout) proof_delims known_failures name params
   3.181 +      minimize_command
   3.182  
   3.183  fun dfg_prover name p = (name, generic_dfg_prover (name, p))
   3.184  
   3.185 @@ -276,6 +296,7 @@
   3.186     arguments = (fn timeout => "-Auto -SOS=1 -PGiven=0 -PProblem=0 -Splits=0" ^
   3.187       " -FullRed=0 -DocProof -VarWeight=3 -TimeLimit=" ^
   3.188       string_of_int (generous_to_secs timeout)),
   3.189 +   proof_delims = [("Here is a proof", "Formulae used in the proof")],
   3.190     known_failures =
   3.191       [(["SPASS beiseite: Completion found."], "The ATP problem is unprovable."),
   3.192        (["SPASS beiseite: Ran out of time."], "The ATP timed out."),
   3.193 @@ -297,6 +318,7 @@
   3.194    {home = #home spass_config,
   3.195     executable = #executable spass_config,
   3.196     arguments = prefix "-TPTP " o #arguments spass_config,
   3.197 +   proof_delims = #proof_delims spass_config,
   3.198     known_failures =
   3.199       #known_failures spass_config @
   3.200       [(["unrecognized option `-TPTP'", "Unrecognized option TPTP"],
   3.201 @@ -343,13 +365,14 @@
   3.202      "Error: The remote ATP proof is ill-formed.")]
   3.203  
   3.204  fun remote_prover_config prover_prefix args
   3.205 -        ({known_failures, max_new_clauses, prefers_theory_relevant, ...}
   3.206 -         : prover_config) : prover_config =
   3.207 +        ({proof_delims, known_failures, max_new_clauses,
   3.208 +          prefers_theory_relevant, ...} : prover_config) : prover_config =
   3.209    {home = getenv "ISABELLE_ATP_MANAGER",
   3.210     executable = "SystemOnTPTP",
   3.211     arguments = (fn timeout =>
   3.212       args ^ " -t " ^ string_of_int (generous_to_secs timeout) ^ " -s " ^
   3.213       the_system prover_prefix),
   3.214 +   proof_delims = insert (op =) tstp_proof_delims proof_delims,
   3.215     known_failures = remote_known_failures @ known_failures,
   3.216     max_new_clauses = max_new_clauses,
   3.217     prefers_theory_relevant = prefers_theory_relevant}
     4.1 --- a/src/HOL/Tools/Sledgehammer/sledgehammer_proof_reconstruct.ML	Fri Apr 23 12:24:30 2010 +0200
     4.2 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_proof_reconstruct.ML	Fri Apr 23 13:16:50 2010 +0200
     4.3 @@ -14,7 +14,6 @@
     4.4    val num_typargs: theory -> string -> int
     4.5    val make_tvar: string -> typ
     4.6    val strip_prefix: string -> string -> string option
     4.7 -  val is_proof_well_formed: string -> bool
     4.8    val metis_line: int -> int -> string list -> string
     4.9    val metis_proof_text:
    4.10      minimize_command * string * string vector * thm * int
    4.11 @@ -69,7 +68,7 @@
    4.12  fun is_digit s = Char.isDigit (String.sub(s,0));
    4.13  val integer = Scan.many1 is_digit >> (the o Int.fromString o implode);
    4.14  
    4.15 -(* needed for SPASS's nonstandard output format *)
    4.16 +(* needed for SPASS's output format *)
    4.17  fun fix_symbol "equal" = "c_equal"
    4.18    | fix_symbol s = s
    4.19  
    4.20 @@ -521,47 +520,22 @@
    4.21    handle STREE _ => raise Fail "Cannot parse ATP output";
    4.22  
    4.23  
    4.24 -(*=== EXTRACTING PROOF-TEXT === *)
    4.25 -
    4.26 -val begin_proof_strs = ["# SZS output start CNFRefutation.",
    4.27 -  "=========== Refutation ==========",
    4.28 -  "Here is a proof"];
    4.29 -
    4.30 -val end_proof_strs = ["# SZS output end CNFRefutation",
    4.31 -  "======= End of refutation =======",
    4.32 -  "Formulae used in the proof"];
    4.33 -
    4.34 -fun get_proof_extract proof =
    4.35 -  (* Splits by the first possible of a list of splitters. *)
    4.36 -  case pairself (find_first (fn s => String.isSubstring s proof))
    4.37 -                (begin_proof_strs, end_proof_strs) of
    4.38 -    (SOME begin_string, SOME end_string) =>
    4.39 -    proof |> first_field begin_string |> the |> snd
    4.40 -          |> first_field end_string |> the |> fst
    4.41 -  | _ => raise Fail "Cannot extract proof"
    4.42 -
    4.43 -(* ==== CHECK IF PROOF WAS SUCCESSFUL === *)
    4.44 -
    4.45 -fun is_proof_well_formed proof =
    4.46 -  forall (exists (fn s => String.isSubstring s proof))
    4.47 -         [begin_proof_strs, end_proof_strs]
    4.48 -
    4.49  (* === EXTRACTING LEMMAS === *)
    4.50  (* A list consisting of the first number in each line is returned.
    4.51     TPTP: Interesting lines have the form "cnf(108, axiom, ...)", where the
    4.52     number (108) is extracted.
    4.53     DFG: Lines have the form "108[0:Inp] ...", where the first number (108) is
    4.54     extracted. *)
    4.55 -fun get_step_nums proof_extract =
    4.56 +fun get_step_nums proof =
    4.57    let
    4.58      val toks = String.tokens (not o is_ident_char)
    4.59      fun inputno ("cnf" :: ntok :: "axiom" :: _) = Int.fromString ntok
    4.60        | inputno ("cnf" :: ntok :: "negated_conjecture" :: _) =
    4.61          Int.fromString ntok
    4.62 -      | inputno (ntok::"0"::"Inp"::_) = Int.fromString ntok  (* DFG format *)
    4.63 +      | inputno (ntok :: "0" :: "Inp" :: _) =
    4.64 +        Int.fromString ntok  (* SPASS's output format *)
    4.65        | inputno _ = NONE
    4.66 -    val lines = split_lines proof_extract
    4.67 -  in map_filter (inputno o toks) lines end
    4.68 +  in map_filter (inputno o toks) (split_lines proof) end
    4.69    
    4.70  (*Used to label theorems chained into the sledgehammer call*)
    4.71  val chained_hint = "CHAINED";
    4.72 @@ -588,8 +562,7 @@
    4.73  fun metis_proof_text (minimize_command, proof, thm_names, goal, i) =
    4.74    let
    4.75      val lemmas =
    4.76 -      proof |> get_proof_extract
    4.77 -            |> get_step_nums
    4.78 +      proof |> get_step_nums
    4.79              |> filter (is_axiom thm_names)
    4.80              |> map (fn i => Vector.sub (thm_names, i - 1))
    4.81              |> filter (fn x => x <> "??.unknown")
    4.82 @@ -625,8 +598,7 @@
    4.83  fun isar_proof_text debug modulus sorts ctxt
    4.84                      (minimize_command, proof, thm_names, goal, i) =
    4.85    let
    4.86 -    val cnfs = proof |> get_proof_extract |> split_lines |> map strip_spaces
    4.87 -                     |> filter is_proof_line
    4.88 +    val cnfs = proof |> split_lines |> map strip_spaces |> filter is_proof_line
    4.89      val (one_line_proof, lemma_names) =
    4.90        metis_proof_text (minimize_command, proof, thm_names, goal, i)
    4.91      val tokens = String.tokens (fn c => c = #" ") one_line_proof