src/HOL/Tools/ATP_Manager/atp_wrapper.ML
changeset 36369 d2cd0d04b8e6
parent 36289 f75b6a3e1450
child 36370 a4f601daa175
     1.1 --- a/src/HOL/Tools/ATP_Manager/atp_wrapper.ML	Fri Apr 23 12:24:30 2010 +0200
     1.2 +++ b/src/HOL/Tools/ATP_Manager/atp_wrapper.ML	Fri Apr 23 13:16:50 2010 +0200
     1.3 @@ -49,6 +49,7 @@
     1.4   {home: string,
     1.5    executable: string,
     1.6    arguments: Time.time -> string,
     1.7 +  proof_delims: (string * string) list,
     1.8    known_failures: (string list * string) list,
     1.9    max_new_clauses: int,
    1.10    prefers_theory_relevant: bool};
    1.11 @@ -60,21 +61,31 @@
    1.12    Exn.capture f path
    1.13    |> tap (fn _ => cleanup path)
    1.14    |> Exn.release
    1.15 -  |> tap (after path);
    1.16 +  |> tap (after path)
    1.17 +
    1.18 +(* Splits by the first possible of a list of delimiters. *)
    1.19 +fun extract_proof delims output =
    1.20 +  case pairself (find_first (fn s => String.isSubstring s output))
    1.21 +                (ListPair.unzip delims) of
    1.22 +    (SOME begin_delim, SOME end_delim) =>
    1.23 +    output |> first_field begin_delim |> the |> snd
    1.24 +           |> first_field end_delim |> the |> fst
    1.25 +  | _ => ""
    1.26  
    1.27 -fun find_known_failure known_failures proof =
    1.28 -  case map_filter (fn (patterns, message) =>
    1.29 -                      if exists (fn pattern => String.isSubstring pattern proof)
    1.30 -                                patterns then
    1.31 -                        SOME message
    1.32 -                      else
    1.33 -                        NONE) known_failures of
    1.34 -    [] => if is_proof_well_formed proof then ""
    1.35 -          else "Error: The ATP output is ill-formed."
    1.36 -  | (message :: _) => message
    1.37 +fun extract_proof_or_failure proof_delims known_failures output =
    1.38 +  case map_filter
    1.39 +           (fn (patterns, message) =>
    1.40 +               if exists (fn p => String.isSubstring p output) patterns then
    1.41 +                 SOME message
    1.42 +               else
    1.43 +                 NONE) known_failures of
    1.44 +    [] => (case extract_proof proof_delims output of
    1.45 +             "" => ("", "Error: The ATP output is malformed.")
    1.46 +           | proof => (proof, ""))
    1.47 +  | (message :: _) => ("", message)
    1.48  
    1.49  fun generic_prover overlord get_facts prepare write_file home executable args
    1.50 -        known_failures name
    1.51 +        proof_delims known_failures name
    1.52          ({debug, full_types, explicit_apply, isar_proof, modulus, sorts, ...}
    1.53           : params) minimize_command
    1.54          ({subgoal, goal, relevance_override, axiom_clauses, filtered_clauses}
    1.55 @@ -135,14 +146,14 @@
    1.56      fun split_time s =
    1.57        let
    1.58          val split = String.tokens (fn c => str c = "\n");
    1.59 -        val (proof, t) = s |> split |> split_last |> apfst cat_lines;
    1.60 +        val (output, t) = s |> split |> split_last |> apfst cat_lines;
    1.61          fun as_num f = f >> (fst o read_int);
    1.62          val num = as_num (Scan.many1 Symbol.is_ascii_digit);
    1.63          val digit = Scan.one Symbol.is_ascii_digit;
    1.64          val num3 = as_num (digit ::: digit ::: (digit >> single));
    1.65          val time = num --| Scan.$$ "." -- num3 >> (fn (a, b) => a * 1000 + b);
    1.66          val as_time = the_default 0 o Scan.read Symbol.stopper time o explode;
    1.67 -      in (proof, as_time t) end;
    1.68 +      in (output, as_time t) end;
    1.69      fun split_time' s =
    1.70        if Config.get ctxt measure_runtime then split_time s else (s, 0)
    1.71      fun run_on probfile =
    1.72 @@ -154,7 +165,7 @@
    1.73      (* If the problem file has not been exported, remove it; otherwise, export
    1.74         the proof file too. *)
    1.75      fun cleanup probfile = if destdir' = "" then try File.rm probfile else NONE;
    1.76 -    fun export probfile (((proof, _), _), _) =
    1.77 +    fun export probfile (((output, _), _), _) =
    1.78        if destdir' = "" then
    1.79          ()
    1.80        else
    1.81 @@ -163,14 +174,15 @@
    1.82                         "% " ^ command_line probfile ^ "\n% " ^ timestamp () ^
    1.83                         "\n"
    1.84                       else
    1.85 -                        "") ^ proof)
    1.86 +                        "") ^ output)
    1.87  
    1.88 -    val (((proof, atp_run_time_in_msecs), rc), _) =
    1.89 +    val (((output, atp_run_time_in_msecs), rc), _) =
    1.90        with_path cleanup export run_on (prob_pathname subgoal);
    1.91  
    1.92      (* Check for success and print out some information on failure. *)
    1.93 -    val failure = find_known_failure known_failures proof;
    1.94 -    val success = rc = 0 andalso failure = "";
    1.95 +    val (proof, failure) =
    1.96 +      extract_proof_or_failure proof_delims known_failures output
    1.97 +    val success = (rc = 0 andalso failure = "")
    1.98      val (message, relevant_thm_names) =
    1.99        if success then
   1.100          proof_text isar_proof debug modulus sorts ctxt
   1.101 @@ -178,12 +190,12 @@
   1.102        else if failure <> "" then
   1.103          (failure ^ "\n", [])
   1.104        else
   1.105 -        ("Unknown ATP error: " ^ proof ^ ".\n", [])
   1.106 +        ("Unknown ATP error: " ^ output ^ ".\n", [])
   1.107    in
   1.108      {success = success, message = message,
   1.109       relevant_thm_names = relevant_thm_names,
   1.110 -     atp_run_time_in_msecs = atp_run_time_in_msecs, proof = proof,
   1.111 -     internal_thm_names = internal_thm_names,
   1.112 +     atp_run_time_in_msecs = atp_run_time_in_msecs, output = output,
   1.113 +     proof = proof, internal_thm_names = internal_thm_names,
   1.114       filtered_clauses = the_filtered_clauses}
   1.115    end;
   1.116  
   1.117 @@ -191,8 +203,8 @@
   1.118  (* generic TPTP-based provers *)
   1.119  
   1.120  fun generic_tptp_prover
   1.121 -        (name, {home, executable, arguments, known_failures, max_new_clauses,
   1.122 -                prefers_theory_relevant})
   1.123 +        (name, {home, executable, arguments, proof_delims, known_failures,
   1.124 +                max_new_clauses, prefers_theory_relevant})
   1.125          (params as {debug, overlord, respect_no_atp, relevance_threshold,
   1.126                      convergence, theory_relevant, higher_order, follow_defs,
   1.127                      isar_proof, ...})
   1.128 @@ -203,7 +215,8 @@
   1.129                            (the_default prefers_theory_relevant theory_relevant))
   1.130        (prepare_clauses higher_order false)
   1.131        (write_tptp_file (debug andalso overlord andalso not isar_proof)) home
   1.132 -      executable (arguments timeout) known_failures name params minimize_command
   1.133 +      executable (arguments timeout) proof_delims known_failures name params
   1.134 +      minimize_command
   1.135  
   1.136  fun tptp_prover name p = (name, generic_tptp_prover (name, p));
   1.137  
   1.138 @@ -221,6 +234,8 @@
   1.139     executable = "vampire",
   1.140     arguments = (fn timeout => "--output_syntax tptp --mode casc -t " ^
   1.141                                string_of_int (generous_to_secs timeout)),
   1.142 +   proof_delims = [("=========== Refutation ==========",
   1.143 +                    "======= End of refutation =======")],
   1.144     known_failures =
   1.145       [(["Satisfiability detected", "CANNOT PROVE"],
   1.146         "The ATP problem is unprovable."),
   1.147 @@ -233,12 +248,16 @@
   1.148  
   1.149  (* E prover *)
   1.150  
   1.151 +val tstp_proof_delims =
   1.152 +  ("# SZS output start CNFRefutation.", "# SZS output end CNFRefutation")
   1.153 +
   1.154  val e_config : prover_config =
   1.155    {home = getenv "E_HOME",
   1.156     executable = "eproof",
   1.157     arguments = (fn timeout => "--tstp-in --tstp-out -l5 -xAutoDev \
   1.158                                \-tAutoDev --silent --cpu-limit=" ^
   1.159                                string_of_int (generous_to_secs timeout)),
   1.160 +   proof_delims = [tstp_proof_delims],
   1.161     known_failures =
   1.162         [(["SZS status: Satisfiable", "SZS status Satisfiable"],
   1.163           "The ATP problem is unprovable."),
   1.164 @@ -254,8 +273,8 @@
   1.165  (* SPASS *)
   1.166  
   1.167  fun generic_dfg_prover
   1.168 -        (name, {home, executable, arguments, known_failures, max_new_clauses,
   1.169 -                prefers_theory_relevant})
   1.170 +        (name, {home, executable, arguments, proof_delims, known_failures,
   1.171 +                max_new_clauses, prefers_theory_relevant})
   1.172          (params as {overlord, respect_no_atp, relevance_threshold, convergence,
   1.173                      theory_relevant, higher_order, follow_defs, ...})
   1.174          minimize_command timeout =
   1.175 @@ -264,7 +283,8 @@
   1.176                            higher_order follow_defs max_new_clauses
   1.177                            (the_default prefers_theory_relevant theory_relevant))
   1.178        (prepare_clauses higher_order true) write_dfg_file home executable
   1.179 -      (arguments timeout) known_failures name params minimize_command
   1.180 +      (arguments timeout) proof_delims known_failures name params
   1.181 +      minimize_command
   1.182  
   1.183  fun dfg_prover name p = (name, generic_dfg_prover (name, p))
   1.184  
   1.185 @@ -276,6 +296,7 @@
   1.186     arguments = (fn timeout => "-Auto -SOS=1 -PGiven=0 -PProblem=0 -Splits=0" ^
   1.187       " -FullRed=0 -DocProof -VarWeight=3 -TimeLimit=" ^
   1.188       string_of_int (generous_to_secs timeout)),
   1.189 +   proof_delims = [("Here is a proof", "Formulae used in the proof")],
   1.190     known_failures =
   1.191       [(["SPASS beiseite: Completion found."], "The ATP problem is unprovable."),
   1.192        (["SPASS beiseite: Ran out of time."], "The ATP timed out."),
   1.193 @@ -297,6 +318,7 @@
   1.194    {home = #home spass_config,
   1.195     executable = #executable spass_config,
   1.196     arguments = prefix "-TPTP " o #arguments spass_config,
   1.197 +   proof_delims = #proof_delims spass_config,
   1.198     known_failures =
   1.199       #known_failures spass_config @
   1.200       [(["unrecognized option `-TPTP'", "Unrecognized option TPTP"],
   1.201 @@ -343,13 +365,14 @@
   1.202      "Error: The remote ATP proof is ill-formed.")]
   1.203  
   1.204  fun remote_prover_config prover_prefix args
   1.205 -        ({known_failures, max_new_clauses, prefers_theory_relevant, ...}
   1.206 -         : prover_config) : prover_config =
   1.207 +        ({proof_delims, known_failures, max_new_clauses,
   1.208 +          prefers_theory_relevant, ...} : prover_config) : prover_config =
   1.209    {home = getenv "ISABELLE_ATP_MANAGER",
   1.210     executable = "SystemOnTPTP",
   1.211     arguments = (fn timeout =>
   1.212       args ^ " -t " ^ string_of_int (generous_to_secs timeout) ^ " -s " ^
   1.213       the_system prover_prefix),
   1.214 +   proof_delims = insert (op =) tstp_proof_delims proof_delims,
   1.215     known_failures = remote_known_failures @ known_failures,
   1.216     max_new_clauses = max_new_clauses,
   1.217     prefers_theory_relevant = prefers_theory_relevant}