# HG changeset patch # User wenzelm # Date 1344371534 -7200 # Node ID 9775c2957000a4ad51f2aa5524ea8f45f01b2b50 # Parent 1d2a12bb0640e37dcae63af419a43ac92f64dbbf# Parent 73e6c22e2d949a283fe9f23bed47efb0a575884c merged diff -r 73e6c22e2d94 -r 9775c2957000 doc-src/Sledgehammer/sledgehammer.tex --- a/doc-src/Sledgehammer/sledgehammer.tex Tue Aug 07 22:25:17 2012 +0200 +++ b/doc-src/Sledgehammer/sledgehammer.tex Tue Aug 07 22:32:14 2012 +0200 @@ -871,15 +871,15 @@ \item[\labelitemi] \textbf{\textit{iprover}:} iProver is a pure instantiation-based prover developed by Konstantin Korovin \cite{korovin-2009}. To use iProver, set the environment variable \texttt{IPROVER\_HOME} to the -directory that contains the \texttt{iprover} executable. Sledgehammer has been -tested with version 0.99. +directory that contains the \texttt{iprover} and \texttt{vclausify\_rel} +executables. Sledgehammer has been tested with version 0.99. \item[\labelitemi] \textbf{\textit{iprover\_eq}:} iProver-Eq is an instantiation-based prover with native support for equality developed by Konstantin Korovin and Christoph Sticksel \cite{korovin-sticksel-2010}. To use iProver-Eq, set the environment variable \texttt{IPROVER\_EQ\_HOME} to the -directory that contains the \texttt{iprover-eq} executable. Sledgehammer has -been tested with version 0.8. +directory that contains the \texttt{iprover-eq} and \texttt{vclausify\_rel} +executables. Sledgehammer has been tested with version 0.8. \item[\labelitemi] \textbf{\textit{leo2}:} LEO-II is an automatic higher-order prover developed by Christoph Benzm\"uller et al.\ \cite{leo2}, diff -r 73e6c22e2d94 -r 9775c2957000 src/HOL/TPTP/atp_theory_export.ML --- a/src/HOL/TPTP/atp_theory_export.ML Tue Aug 07 22:25:17 2012 +0200 +++ b/src/HOL/TPTP/atp_theory_export.ML Tue Aug 07 22:32:14 2012 +0200 @@ -74,7 +74,7 @@ in TimeLimit.timeLimit (seconds 0.3) Isabelle_System.bash_output command |> fst - |> extract_tstplike_proof_and_outcome false true proof_delims known_failures + |> extract_tstplike_proof_and_outcome false proof_delims known_failures |> snd end handle TimeLimit.TimeOut => SOME TimedOut diff -r 73e6c22e2d94 -r 9775c2957000 src/HOL/Tools/ATP/atp_proof.ML --- a/src/HOL/Tools/ATP/atp_proof.ML Tue Aug 07 22:25:17 2012 +0200 +++ b/src/HOL/Tools/ATP/atp_proof.ML Tue Aug 07 22:32:14 2012 +0200 @@ -48,7 +48,7 @@ val extract_known_failure : (failure * string) list -> string -> failure option val extract_tstplike_proof_and_outcome : - bool -> bool -> (string * string) list -> (failure * string) list -> string + bool -> (string * string) list -> (failure * string) list -> string -> string * failure option val is_same_atp_step : step_name -> step_name -> bool val scan_general_id : string list -> string * string list @@ -182,16 +182,14 @@ |> find_first (fn (_, pattern) => String.isSubstring pattern output) |> Option.map fst -fun extract_tstplike_proof_and_outcome verbose complete proof_delims - known_failures output = +fun extract_tstplike_proof_and_outcome verbose proof_delims known_failures + output = case (extract_tstplike_proof proof_delims output, extract_known_failure known_failures output) of (_, SOME ProofIncomplete) => ("", NONE) | ("", SOME ProofMissing) => ("", NONE) - | ("", SOME failure) => - ("", SOME (if failure = GaveUp andalso complete then Unprovable - else failure)) | ("", NONE) => ("", SOME (UnknownError (short_output verbose output))) + | res as ("", _) => res | (tstplike_proof, _) => (tstplike_proof, NONE) type step_name = string * string list diff -r 73e6c22e2d94 -r 9775c2957000 src/HOL/Tools/ATP/atp_systems.ML --- a/src/HOL/Tools/ATP/atp_systems.ML Tue Aug 07 22:25:17 2012 +0200 +++ b/src/HOL/Tools/ATP/atp_systems.ML Tue Aug 07 22:32:14 2012 +0200 @@ -22,8 +22,7 @@ proof_delims : (string * string) list, known_failures : (failure * string) list, prem_role : formula_role, - best_slices : - Proof.context -> (real * (bool * (slice_spec * string))) list, + best_slices : Proof.context -> (real * (slice_spec * string)) list, best_max_mono_iters : int, best_max_new_mono_instances : int} @@ -95,19 +94,18 @@ proof_delims : (string * string) list, known_failures : (failure * string) list, prem_role : formula_role, - best_slices : Proof.context -> (real * (bool * (slice_spec * string))) list, + best_slices : Proof.context -> (real * (slice_spec * string)) list, best_max_mono_iters : int, best_max_new_mono_instances : int} (* "best_slices" must be found empirically, taking a wholistic approach since the ATPs are run in parallel. The "real" component gives the faction of the - time available given to the slice and should add up to 1.0. The first "bool" - component indicates whether the slice's strategy is complete; the "int", the - preferred number of facts to pass; the first "string", the preferred type - system (which should be sound or quasi-sound); the second "string", the - preferred lambda translation scheme; the second "bool", whether uncurried - aliased should be generated; the third "string", extra information to - the prover (e.g., SOS or no SOS). + time available given to the slice and should add up to 1.0. The "int" + component indicates the preferred number of facts to pass; the first + "string", the preferred type system (which should be sound or quasi-sound); + the second "string", the preferred lambda translation scheme; the "bool", + whether uncurried aliased should be generated; the third "string", extra + information to the prover (e.g., SOS or no SOS). The last slice should be the most "normal" one, because it will get all the time available if the other slices fail early and also because it is used if @@ -202,7 +200,7 @@ prem_role = Hypothesis, best_slices = fn _ => (* FUDGE *) - [(1.0, (false, ((100, alt_ergo_tff1, "poly_native", liftingN, false), "")))], + [(1.0, ((100, alt_ergo_tff1, "poly_native", liftingN, false), ""))], best_max_mono_iters = default_max_mono_iters, best_max_new_mono_instances = default_max_new_mono_instances} @@ -320,11 +318,11 @@ let val heuristic = effective_e_selection_heuristic ctxt in (* FUDGE *) if heuristic = e_smartN then - [(0.333, (true, ((500, FOF, "mono_tags??", combsN, false), e_fun_weightN))), - (0.334, (true, ((50, FOF, "mono_guards??", combsN, false), e_fun_weightN))), - (0.333, (true, ((1000, FOF, "mono_tags??", combsN, false), e_sym_offset_weightN)))] + [(0.333, ((500, FOF, "mono_tags??", combsN, false), e_fun_weightN)), + (0.334, ((50, FOF, "mono_guards??", combsN, false), e_fun_weightN)), + (0.333, ((1000, FOF, "mono_tags??", combsN, false), e_sym_offset_weightN))] else - [(1.0, (true, ((500, FOF, "mono_tags??", combsN, false), heuristic)))] + [(1.0, ((500, FOF, "mono_tags??", combsN, false), heuristic))] end, best_max_mono_iters = default_max_mono_iters, best_max_new_mono_instances = default_max_new_mono_instances} @@ -343,7 +341,7 @@ prem_role = Conjecture, best_slices = (* FUDGE *) - K [(1.0, (true, ((1000, FOF, "mono_tags??", combsN, false), "")))], + K [(1.0, ((1000, FOF, "mono_tags??", combsN, false), ""))], best_max_mono_iters = default_max_mono_iters, best_max_new_mono_instances = default_max_new_mono_instances} @@ -355,7 +353,7 @@ val iprover_config : atp_config = {exec = (["IPROVER_HOME"], ["iprover"]), arguments = fn _ => fn _ => fn _ => fn timeout => fn _ => - "--clausifier vclausify_rel --time_out_real " ^ + "--clausifier $IPROVER_HOME/vclausify_rel --time_out_real " ^ string_of_real (Time.toReal timeout), proof_delims = tstp_proof_delims, known_failures = @@ -364,7 +362,7 @@ prem_role = Hypothesis, best_slices = (* FUDGE *) - K [(1.0, (true, ((150, FOF, "mono_guards??", liftingN, false), "")))], + K [(1.0, ((150, FOF, "mono_guards??", liftingN, false), ""))], best_max_mono_iters = default_max_mono_iters, best_max_new_mono_instances = default_max_new_mono_instances} @@ -407,7 +405,7 @@ prem_role = Hypothesis, best_slices = (* FUDGE *) - K [(1.0, (true, ((40, leo2_thf0, "mono_native_higher", keep_lamsN, false), "")))], + K [(1.0, ((40, leo2_thf0, "mono_native_higher", keep_lamsN, false), ""))], best_max_mono_iters = default_max_mono_iters - 1 (* FUDGE *), best_max_new_mono_instances = default_max_new_mono_instances div 2 (* FUDGE *)} @@ -429,7 +427,7 @@ prem_role = Hypothesis, best_slices = (* FUDGE *) - K [(1.0, (true, ((60, satallax_thf0, "mono_native_higher", keep_lamsN, false), "")))], + K [(1.0, ((60, satallax_thf0, "mono_native_higher", keep_lamsN, false), ""))], best_max_mono_iters = default_max_mono_iters - 1 (* FUDGE *), best_max_new_mono_instances = default_max_new_mono_instances div 2 (* FUDGE *)} @@ -464,14 +462,14 @@ prem_role = Conjecture, best_slices = fn _ => (* FUDGE *) - [(0.1667, (false, ((150, DFG Monomorphic, "mono_native", combsN, true), ""))), - (0.1667, (false, ((500, DFG Monomorphic, "mono_native", liftingN, true), spass_H2SOS))), - (0.1666, (false, ((50, DFG Monomorphic, "mono_native", liftingN, true), spass_H2LR0LT0))), - (0.1000, (false, ((250, DFG Monomorphic, "mono_native", combsN, true), spass_H2NuVS0))), - (0.1000, (false, ((1000, DFG Monomorphic, "mono_native", liftingN, true), spass_H1SOS))), - (0.1000, (false, ((150, DFG Monomorphic, "poly_guards??", liftingN, false), spass_H2NuVS0Red2))), - (0.1000, (false, ((300, DFG Monomorphic, "mono_native", combsN, true), spass_H2SOS))), - (0.1000, (false, ((100, DFG Monomorphic, "mono_native", combs_and_liftingN, true), "")))], + [(0.1667, ((150, DFG Monomorphic, "mono_native", combsN, true), "")), + (0.1667, ((500, DFG Monomorphic, "mono_native", liftingN, true), spass_H2SOS)), + (0.1666, ((50, DFG Monomorphic, "mono_native", liftingN, true), spass_H2LR0LT0)), + (0.1000, ((250, DFG Monomorphic, "mono_native", combsN, true), spass_H2NuVS0)), + (0.1000, ((1000, DFG Monomorphic, "mono_native", liftingN, true), spass_H1SOS)), + (0.1000, ((150, DFG Monomorphic, "poly_guards??", liftingN, false), spass_H2NuVS0Red2)), + (0.1000, ((300, DFG Monomorphic, "mono_native", combsN, true), spass_H2SOS)), + (0.1000, ((100, DFG Monomorphic, "mono_native", combs_and_liftingN, true), ""))], best_max_mono_iters = default_max_mono_iters, best_max_new_mono_instances = default_max_new_mono_instances} @@ -515,13 +513,13 @@ best_slices = fn ctxt => (* FUDGE *) (if is_vampire_beyond_1_8 () then - [(0.333, (false, ((500, vampire_tff0, "mono_native", combs_or_liftingN, false), sosN))), - (0.333, (false, ((150, vampire_tff0, "poly_guards??", combs_or_liftingN, false), sosN))), - (0.334, (true, ((50, vampire_tff0, "mono_native", combs_or_liftingN, false), no_sosN)))] + [(0.333, ((500, vampire_tff0, "mono_native", combs_or_liftingN, false), sosN)), + (0.333, ((150, vampire_tff0, "poly_guards??", combs_or_liftingN, false), sosN)), + (0.334, ((50, vampire_tff0, "mono_native", combs_or_liftingN, false), no_sosN))] else - [(0.333, (false, ((150, FOF, "poly_guards??", combs_or_liftingN, false), sosN))), - (0.333, (false, ((500, FOF, "mono_tags??", combs_or_liftingN, false), sosN))), - (0.334, (true, ((50, FOF, "mono_guards??", combs_or_liftingN, false), no_sosN)))]) + [(0.333, ((150, FOF, "poly_guards??", combs_or_liftingN, false), sosN)), + (0.333, ((500, FOF, "mono_tags??", combs_or_liftingN, false), sosN)), + (0.334, ((50, FOF, "mono_guards??", combs_or_liftingN, false), no_sosN))]) |> (if Config.get ctxt force_sos then hd #> apfst (K 1.0) #> single else I), best_max_mono_iters = default_max_mono_iters, @@ -544,10 +542,10 @@ prem_role = Hypothesis, best_slices = (* FUDGE *) - K [(0.5, (false, ((250, z3_tff0, "mono_native", combsN, false), ""))), - (0.25, (false, ((125, z3_tff0, "mono_native", combsN, false), ""))), - (0.125, (false, ((62, z3_tff0, "mono_native", combsN, false), ""))), - (0.125, (false, ((31, z3_tff0, "mono_native", combsN, false), "")))], + K [(0.5, ((250, z3_tff0, "mono_native", combsN, false), "")), + (0.25, ((125, z3_tff0, "mono_native", combsN, false), "")), + (0.125, ((62, z3_tff0, "mono_native", combsN, false), "")), + (0.125, ((31, z3_tff0, "mono_native", combsN, false), ""))], best_max_mono_iters = default_max_mono_iters, best_max_new_mono_instances = default_max_new_mono_instances} @@ -563,9 +561,9 @@ known_failures = known_szs_status_failures, prem_role = Hypothesis, best_slices = - K [(1.0, (false, ((200, format, type_enc, - if is_format_higher_order format then keep_lamsN - else combsN, false), "")))], + K [(1.0, ((200, format, type_enc, + if is_format_higher_order format then keep_lamsN + else combsN, false), ""))], best_max_mono_iters = default_max_mono_iters, best_max_new_mono_instances = default_max_new_mono_instances} @@ -628,7 +626,7 @@ proof_delims = union (op =) tstp_proof_delims proof_delims, known_failures = known_failures @ known_perl_failures @ known_says_failures, prem_role = prem_role, - best_slices = fn ctxt => [(1.0, (false, best_slice ctxt))], + best_slices = fn ctxt => [(1.0, best_slice ctxt)], best_max_mono_iters = default_max_mono_iters, best_max_new_mono_instances = default_max_new_mono_instances} diff -r 73e6c22e2d94 -r 9775c2957000 src/HOL/Tools/Sledgehammer/sledgehammer_provers.ML --- a/src/HOL/Tools/Sledgehammer/sledgehammer_provers.ML Tue Aug 07 22:25:17 2012 +0200 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_provers.ML Tue Aug 07 22:32:14 2012 +0200 @@ -170,7 +170,7 @@ let val thy = Proof_Context.theory_of ctxt in case try (get_atp thy) name of SOME config => - exists (fn (_, (_, ((_, format, _, _, _), _))) => is_format format) + exists (fn (_, ((_, format, _, _, _), _)) => is_format format) (#best_slices (config ()) ctxt) | NONE => false end @@ -197,7 +197,7 @@ if is_reconstructor name then reconstructor_default_max_facts else if is_atp thy name then - fold (Integer.max o #1 o fst o snd o snd o snd) + fold (Integer.max o #1 o fst o snd o snd) (get_slices slice (#best_slices (get_atp thy name ()) ctxt)) 0 else (* is_smt_prover ctxt name *) SMT_Solver.default_max_relevant ctxt name @@ -709,10 +709,10 @@ map (pair name o zero_var_indexes o snd) rths) end fun run_slice time_left (cache_key, cache_value) - (slice, (time_frac, (complete, + (slice, (time_frac, (key as (best_max_facts, format, best_type_enc, best_lam_trans, best_uncurried_aliases), - extra)))) = + extra))) = let val num_facts = length facts |> is_none max_facts ? Integer.min best_max_facts @@ -787,8 +787,8 @@ |> fst |> split_time |> (fn accum as (output, _) => (accum, - extract_tstplike_proof_and_outcome verbose complete - proof_delims known_failures output + extract_tstplike_proof_and_outcome verbose proof_delims + known_failures output |>> atp_proof_from_tstplike_proof atp_problem handle UNRECOGNIZED_ATP_PROOF () => ([], SOME ProofIncomplete)))