55205
|
1 |
(* Title: HOL/Tools/Sledgehammer/sledgehammer_prover_atp.ML
|
|
2 |
Author: Fabian Immler, TU Muenchen
|
|
3 |
Author: Makarius
|
|
4 |
Author: Jasmin Blanchette, TU Muenchen
|
|
5 |
|
|
6 |
ATPs as Sledgehammer provers.
|
|
7 |
*)
|
|
8 |
|
|
9 |
signature SLEDGEHAMMER_PROVER_ATP =
|
|
10 |
sig
|
|
11 |
type mode = Sledgehammer_Prover.mode
|
|
12 |
type prover = Sledgehammer_Prover.prover
|
|
13 |
|
55212
|
14 |
val atp_dest_dir : string Config.T
|
|
15 |
val atp_problem_prefix : string Config.T
|
|
16 |
val atp_completish : bool Config.T
|
|
17 |
val atp_full_names : bool Config.T
|
|
18 |
|
|
19 |
val is_ho_atp : Proof.context -> string -> bool
|
|
20 |
|
55205
|
21 |
val run_atp : mode -> string -> prover
|
|
22 |
end;
|
|
23 |
|
|
24 |
structure Sledgehammer_Prover_ATP : SLEDGEHAMMER_PROVER_ATP =
|
|
25 |
struct
|
|
26 |
|
|
27 |
open ATP_Util
|
|
28 |
open ATP_Problem
|
|
29 |
open ATP_Proof
|
|
30 |
open ATP_Problem_Generate
|
|
31 |
open ATP_Proof_Reconstruct
|
|
32 |
open ATP_Systems
|
|
33 |
open Sledgehammer_Util
|
|
34 |
open Sledgehammer_Reconstructor
|
|
35 |
open Sledgehammer_Isar
|
|
36 |
open Sledgehammer_Prover
|
|
37 |
|
55212
|
38 |
(* Empty string means create files in Isabelle's temporary files directory. *)
|
|
39 |
val atp_dest_dir = Attrib.setup_config_string @{binding sledgehammer_atp_dest_dir} (K "")
|
|
40 |
val atp_problem_prefix =
|
|
41 |
Attrib.setup_config_string @{binding sledgehammer_atp_problem_prefix} (K "prob")
|
|
42 |
val atp_completish = Attrib.setup_config_bool @{binding sledgehammer_atp_completish} (K false)
|
|
43 |
(* In addition to being easier to read, readable names are often much shorter, especially if types
|
|
44 |
are mangled in names. This makes a difference for some provers (e.g., E). For these reason, short
|
|
45 |
names are enabled by default. *)
|
|
46 |
val atp_full_names = Attrib.setup_config_bool @{binding sledgehammer_atp_full_names} (K false)
|
|
47 |
|
|
48 |
fun is_atp_of_format is_format ctxt name =
|
|
49 |
let val thy = Proof_Context.theory_of ctxt in
|
|
50 |
(case try (get_atp thy) name of
|
|
51 |
SOME config =>
|
|
52 |
exists (fn (_, ((_, format, _, _, _), _)) => is_format format) (#best_slices (config ()) ctxt)
|
|
53 |
| NONE => false)
|
|
54 |
end
|
|
55 |
|
|
56 |
val is_ho_atp = is_atp_of_format is_format_higher_order
|
|
57 |
|
55205
|
58 |
fun choose_type_enc strictness best_type_enc format =
|
|
59 |
the_default best_type_enc
|
|
60 |
#> type_enc_of_string strictness
|
|
61 |
#> adjust_type_enc format
|
|
62 |
|
|
63 |
fun has_bound_or_var_of_type pred =
|
|
64 |
exists_subterm (fn Var (_, T as Type _) => pred T
|
|
65 |
| Abs (_, T as Type _, _) => pred T
|
|
66 |
| _ => false)
|
|
67 |
|
|
68 |
(* Unwanted equalities are those between a (bound or schematic) variable that does not properly
|
|
69 |
occur in the second operand. *)
|
|
70 |
val is_exhaustive_finite =
|
|
71 |
let
|
|
72 |
fun is_bad_equal (Var z) t =
|
|
73 |
not (exists_subterm (fn Var z' => z = z' | _ => false) t)
|
|
74 |
| is_bad_equal (Bound j) t = not (loose_bvar1 (t, j))
|
|
75 |
| is_bad_equal _ _ = false
|
|
76 |
fun do_equals t1 t2 = is_bad_equal t1 t2 orelse is_bad_equal t2 t1
|
|
77 |
fun do_formula pos t =
|
55208
|
78 |
(case (pos, t) of
|
55205
|
79 |
(_, @{const Trueprop} $ t1) => do_formula pos t1
|
55208
|
80 |
| (true, Const (@{const_name all}, _) $ Abs (_, _, t')) => do_formula pos t'
|
|
81 |
| (true, Const (@{const_name All}, _) $ Abs (_, _, t')) => do_formula pos t'
|
|
82 |
| (false, Const (@{const_name Ex}, _) $ Abs (_, _, t')) => do_formula pos t'
|
55205
|
83 |
| (_, @{const "==>"} $ t1 $ t2) =>
|
55208
|
84 |
do_formula (not pos) t1 andalso (t2 = @{prop False} orelse do_formula pos t2)
|
55205
|
85 |
| (_, @{const HOL.implies} $ t1 $ t2) =>
|
55208
|
86 |
do_formula (not pos) t1 andalso (t2 = @{const False} orelse do_formula pos t2)
|
55205
|
87 |
| (_, @{const Not} $ t1) => do_formula (not pos) t1
|
|
88 |
| (true, @{const HOL.disj} $ t1 $ t2) => forall (do_formula pos) [t1, t2]
|
|
89 |
| (false, @{const HOL.conj} $ t1 $ t2) => forall (do_formula pos) [t1, t2]
|
|
90 |
| (true, Const (@{const_name HOL.eq}, _) $ t1 $ t2) => do_equals t1 t2
|
|
91 |
| (true, Const (@{const_name "=="}, _) $ t1 $ t2) => do_equals t1 t2
|
55208
|
92 |
| _ => false)
|
55205
|
93 |
in do_formula true end
|
|
94 |
|
|
95 |
(* Facts containing variables of finite types such as "unit" or "bool" or of the form
|
|
96 |
"ALL x. x = A | x = B | x = C" are likely to lead to untypable proofs for unsound type
|
|
97 |
encodings. *)
|
|
98 |
fun is_dangerous_prop ctxt =
|
|
99 |
transform_elim_prop
|
|
100 |
#> (has_bound_or_var_of_type (is_type_surely_finite ctxt) orf is_exhaustive_finite)
|
|
101 |
|
|
102 |
fun get_slices slice slices =
|
|
103 |
(0 upto length slices - 1) ~~ slices |> not slice ? (List.last #> single)
|
|
104 |
|
|
105 |
fun get_facts_of_filter _ [(_, facts)] = facts
|
|
106 |
| get_facts_of_filter fact_filter factss =
|
|
107 |
(case AList.lookup (op =) factss fact_filter of
|
|
108 |
SOME facts => facts
|
|
109 |
| NONE => snd (hd factss))
|
|
110 |
|
|
111 |
(* For low values of "max_facts", this fudge value ensures that most slices are invoked with a
|
|
112 |
nontrivial amount of facts. *)
|
|
113 |
val max_fact_factor_fudge = 5
|
|
114 |
|
|
115 |
val mono_max_privileged_facts = 10
|
|
116 |
|
|
117 |
fun suffix_of_mode Auto_Try = "_try"
|
|
118 |
| suffix_of_mode Try = "_try"
|
|
119 |
| suffix_of_mode Normal = ""
|
|
120 |
| suffix_of_mode MaSh = ""
|
|
121 |
| suffix_of_mode Auto_Minimize = "_min"
|
|
122 |
| suffix_of_mode Minimize = "_min"
|
|
123 |
|
|
124 |
(* Give the ATPs some slack before interrupting them the hard way. "z3_tptp" on Linux appears to be
|
|
125 |
the only ATP that does not honor its time limit. *)
|
|
126 |
val atp_timeout_slack = seconds 1.0
|
|
127 |
|
|
128 |
(* Important messages are important but not so important that users want to see
|
|
129 |
them each time. *)
|
|
130 |
val atp_important_message_keep_quotient = 25
|
|
131 |
|
|
132 |
fun run_atp mode name
|
|
133 |
(params as {debug, verbose, overlord, type_enc, strict, lam_trans, uncurried_aliases,
|
|
134 |
fact_filter, max_facts, max_mono_iters, max_new_mono_instances, isar_proofs, compress_isar,
|
|
135 |
try0_isar, slice, timeout, preplay_timeout, ...})
|
|
136 |
minimize_command
|
|
137 |
({comment, state, goal, subgoal, subgoal_count, factss, ...} : prover_problem) =
|
|
138 |
let
|
|
139 |
val thy = Proof.theory_of state
|
|
140 |
val ctxt = Proof.context_of state
|
|
141 |
|
|
142 |
val {exec, arguments, proof_delims, known_failures, prem_role, best_slices, best_max_mono_iters,
|
|
143 |
best_max_new_mono_instances, ...} = get_atp thy name ()
|
|
144 |
|
55212
|
145 |
val atp_mode = if Config.get ctxt atp_completish then Sledgehammer_Completish else Sledgehammer
|
55205
|
146 |
val (_, hyp_ts, concl_t) = strip_subgoal goal subgoal ctxt
|
|
147 |
val (dest_dir, problem_prefix) =
|
|
148 |
if overlord then overlord_file_location_of_prover name
|
55212
|
149 |
else (Config.get ctxt atp_dest_dir, Config.get ctxt atp_problem_prefix)
|
55205
|
150 |
val problem_file_name =
|
|
151 |
Path.basic (problem_prefix ^ (if overlord then "" else serial_string ()) ^
|
|
152 |
suffix_of_mode mode ^ "_" ^ string_of_int subgoal)
|
|
153 |
val prob_path =
|
|
154 |
if dest_dir = "" then
|
|
155 |
File.tmp_path problem_file_name
|
|
156 |
else if File.exists (Path.explode dest_dir) then
|
|
157 |
Path.append (Path.explode dest_dir) problem_file_name
|
|
158 |
else
|
|
159 |
error ("No such directory: " ^ quote dest_dir ^ ".")
|
|
160 |
val exec = exec ()
|
|
161 |
val command0 =
|
55208
|
162 |
(case find_first (fn var => getenv var <> "") (fst exec) of
|
55205
|
163 |
SOME var =>
|
|
164 |
let
|
|
165 |
val pref = getenv var ^ "/"
|
|
166 |
val paths = map (Path.explode o prefix pref) (snd exec)
|
|
167 |
in
|
55208
|
168 |
(case find_first File.exists paths of
|
55205
|
169 |
SOME path => path
|
55208
|
170 |
| NONE => error ("Bad executable: " ^ Path.print (hd paths) ^ "."))
|
55205
|
171 |
end
|
55208
|
172 |
| NONE => error ("The environment variable " ^ quote (List.last (fst exec)) ^ " is not set."))
|
55205
|
173 |
|
|
174 |
fun split_time s =
|
|
175 |
let
|
|
176 |
val split = String.tokens (fn c => str c = "\n")
|
|
177 |
val (output, t) =
|
|
178 |
s |> split |> (try split_last #> the_default ([], "0"))
|
|
179 |
|>> cat_lines
|
|
180 |
fun as_num f = f >> (fst o read_int)
|
|
181 |
val num = as_num (Scan.many1 Symbol.is_ascii_digit)
|
|
182 |
val digit = Scan.one Symbol.is_ascii_digit
|
|
183 |
val num3 = as_num (digit ::: digit ::: (digit >> single))
|
|
184 |
val time = num --| Scan.$$ "." -- num3 >> (fn (a, b) => a * 1000 + b)
|
|
185 |
val as_time =
|
|
186 |
raw_explode #> Scan.read Symbol.stopper time #> the_default 0
|
|
187 |
in (output, as_time t |> Time.fromMilliseconds) end
|
|
188 |
|
|
189 |
fun run () =
|
|
190 |
let
|
55208
|
191 |
(* If slicing is disabled, we expand the last slice to fill the entire time available. *)
|
55205
|
192 |
val all_slices = best_slices ctxt
|
|
193 |
val actual_slices = get_slices slice all_slices
|
|
194 |
fun max_facts_of_slices f slices = fold (Integer.max o fst o #1 o fst o snd o f) slices 0
|
|
195 |
val num_actual_slices = length actual_slices
|
|
196 |
val max_fact_factor =
|
|
197 |
Real.fromInt (case max_facts of
|
|
198 |
NONE => max_facts_of_slices I all_slices
|
|
199 |
| SOME max => max)
|
|
200 |
/ Real.fromInt (max_facts_of_slices snd actual_slices)
|
55212
|
201 |
|
55205
|
202 |
fun monomorphize_facts facts =
|
|
203 |
let
|
|
204 |
val ctxt =
|
|
205 |
ctxt
|
|
206 |
|> repair_monomorph_context max_mono_iters best_max_mono_iters max_new_mono_instances
|
|
207 |
best_max_new_mono_instances
|
|
208 |
(* pseudo-theorem involving the same constants as the subgoal *)
|
|
209 |
val subgoal_th =
|
|
210 |
Logic.list_implies (hyp_ts, concl_t) |> Skip_Proof.make_thm thy
|
|
211 |
val rths =
|
|
212 |
facts |> chop mono_max_privileged_facts
|
|
213 |
|>> map (pair 1 o snd)
|
|
214 |
||> map (pair 2 o snd)
|
|
215 |
|> op @
|
|
216 |
|> cons (0, subgoal_th)
|
|
217 |
in
|
|
218 |
Monomorph.monomorph atp_schematic_consts_of ctxt rths
|
|
219 |
|> tl |> curry ListPair.zip (map fst facts)
|
55208
|
220 |
|> maps (fn (name, rths) => map (pair name o zero_var_indexes o snd) rths)
|
55205
|
221 |
end
|
|
222 |
|
55208
|
223 |
fun run_slice time_left (cache_key, cache_value) (slice, (time_frac,
|
|
224 |
(key as ((best_max_facts, best_fact_filter), format, best_type_enc, best_lam_trans,
|
|
225 |
best_uncurried_aliases),
|
|
226 |
extra))) =
|
55205
|
227 |
let
|
55208
|
228 |
val effective_fact_filter = fact_filter |> the_default best_fact_filter
|
55205
|
229 |
val facts = get_facts_of_filter effective_fact_filter factss
|
|
230 |
val num_facts =
|
|
231 |
Real.ceil (max_fact_factor * Real.fromInt best_max_facts) + max_fact_factor_fudge
|
|
232 |
|> Integer.min (length facts)
|
|
233 |
val strictness = if strict then Strict else Non_Strict
|
|
234 |
val type_enc = type_enc |> choose_type_enc strictness best_type_enc format
|
|
235 |
val sound = is_type_enc_sound type_enc
|
|
236 |
val real_ms = Real.fromInt o Time.toMilliseconds
|
|
237 |
val slice_timeout =
|
|
238 |
(real_ms time_left
|
|
239 |
|> (if slice < num_actual_slices - 1 then
|
|
240 |
curry Real.min (time_frac * real_ms timeout)
|
|
241 |
else
|
|
242 |
I))
|
|
243 |
* 0.001
|
|
244 |
|> seconds
|
|
245 |
val generous_slice_timeout =
|
|
246 |
if mode = MaSh then one_day else Time.+ (slice_timeout, atp_timeout_slack)
|
|
247 |
val _ =
|
|
248 |
if debug then
|
|
249 |
quote name ^ " slice #" ^ string_of_int (slice + 1) ^
|
|
250 |
" with " ^ string_of_int num_facts ^ " fact" ^
|
|
251 |
plural_s num_facts ^ " for " ^ string_of_time slice_timeout ^ "..."
|
|
252 |
|> Output.urgent_message
|
|
253 |
else
|
|
254 |
()
|
|
255 |
val readable_names = not (Config.get ctxt atp_full_names)
|
55208
|
256 |
val lam_trans = lam_trans |> the_default best_lam_trans
|
|
257 |
val uncurried_aliases = uncurried_aliases |> the_default best_uncurried_aliases
|
55205
|
258 |
val value as (atp_problem, _, fact_names, _, _) =
|
|
259 |
if cache_key = SOME key then
|
|
260 |
cache_value
|
|
261 |
else
|
|
262 |
facts
|
|
263 |
|> not sound ? filter_out (is_dangerous_prop ctxt o prop_of o snd)
|
|
264 |
|> take num_facts
|
|
265 |
|> not (is_type_enc_polymorphic type_enc) ? monomorphize_facts
|
|
266 |
|> map (apsnd prop_of)
|
55208
|
267 |
|> prepare_atp_problem ctxt format prem_role type_enc atp_mode lam_trans
|
|
268 |
uncurried_aliases readable_names true hyp_ts concl_t
|
55205
|
269 |
|
|
270 |
fun sel_weights () = atp_problem_selection_weights atp_problem
|
|
271 |
fun ord_info () = atp_problem_term_order_info atp_problem
|
|
272 |
|
|
273 |
val ord = effective_term_order ctxt name
|
|
274 |
val full_proof = isar_proofs |> the_default (mode = Minimize)
|
|
275 |
val args =
|
|
276 |
arguments ctxt full_proof extra slice_timeout (File.shell_path prob_path)
|
|
277 |
(ord, ord_info, sel_weights)
|
|
278 |
val command =
|
|
279 |
"(exec 2>&1; " ^ File.shell_path command0 ^ " " ^ args ^ " " ^ ")"
|
|
280 |
|> enclose "TIMEFORMAT='%3R'; { time " " ; }"
|
|
281 |
val _ =
|
|
282 |
atp_problem
|
|
283 |
|> lines_of_atp_problem format ord ord_info
|
|
284 |
|> cons ("% " ^ command ^ "\n" ^ (if comment = "" then "" else "% " ^ comment ^ "\n"))
|
|
285 |
|> File.write_list prob_path
|
|
286 |
val ((output, run_time), (atp_proof, outcome)) =
|
|
287 |
TimeLimit.timeLimit generous_slice_timeout Isabelle_System.bash_output command
|
|
288 |
|>> (if overlord then prefix ("% " ^ command ^ "\n% " ^ timestamp () ^ "\n") else I)
|
|
289 |
|> fst |> split_time
|
|
290 |
|> (fn accum as (output, _) =>
|
|
291 |
(accum,
|
|
292 |
extract_tstplike_proof_and_outcome verbose proof_delims known_failures output
|
|
293 |
|>> atp_proof_of_tstplike_proof atp_problem
|
|
294 |
handle UNRECOGNIZED_ATP_PROOF () => ([], SOME ProofIncomplete)))
|
|
295 |
handle TimeLimit.TimeOut => (("", slice_timeout), ([], SOME TimedOut))
|
|
296 |
val outcome =
|
|
297 |
(case outcome of
|
|
298 |
NONE =>
|
|
299 |
(case used_facts_in_unsound_atp_proof ctxt fact_names atp_proof
|
|
300 |
|> Option.map (sort string_ord) of
|
55208
|
301 |
SOME facts =>
|
|
302 |
let val failure = UnsoundProof (is_type_enc_sound type_enc, facts) in
|
|
303 |
if debug then (warning (string_of_atp_failure failure); NONE) else SOME failure
|
|
304 |
end
|
|
305 |
| NONE => NONE)
|
55205
|
306 |
| _ => outcome)
|
|
307 |
in
|
|
308 |
((SOME key, value), (output, run_time, facts, atp_proof, outcome))
|
|
309 |
end
|
|
310 |
|
|
311 |
val timer = Timer.startRealTimer ()
|
|
312 |
|
55208
|
313 |
fun maybe_run_slice slice (result as (cache, (_, run_time0, _, _, SOME _))) =
|
|
314 |
let val time_left = Time.- (timeout, Timer.checkRealTimer timer) in
|
55205
|
315 |
if Time.<= (time_left, Time.zeroTime) then
|
|
316 |
result
|
|
317 |
else
|
|
318 |
run_slice time_left cache slice
|
|
319 |
|> (fn (cache, (output, run_time, used_from, atp_proof, outcome)) =>
|
|
320 |
(cache, (output, Time.+ (run_time0, run_time), used_from, atp_proof, outcome)))
|
|
321 |
end
|
|
322 |
| maybe_run_slice _ result = result
|
|
323 |
in
|
|
324 |
((NONE, ([], Symtab.empty, Vector.fromList [], [], Symtab.empty)),
|
|
325 |
("", Time.zeroTime, [], [], SOME InternalError))
|
|
326 |
|> fold maybe_run_slice actual_slices
|
|
327 |
end
|
|
328 |
|
|
329 |
(* If the problem file has not been exported, remove it; otherwise, export
|
|
330 |
the proof file too. *)
|
55208
|
331 |
fun clean_up () = if dest_dir = "" then (try File.rm prob_path; ()) else ()
|
55205
|
332 |
fun export (_, (output, _, _, _, _)) =
|
|
333 |
if dest_dir = "" then ()
|
|
334 |
else File.write (Path.explode (Path.implode prob_path ^ "_proof")) output
|
55208
|
335 |
|
55205
|
336 |
val ((_, (_, pool, fact_names, lifted, sym_tab)),
|
|
337 |
(output, run_time, used_from, atp_proof, outcome)) =
|
|
338 |
with_cleanup clean_up run () |> tap export
|
55208
|
339 |
|
55205
|
340 |
val important_message =
|
55208
|
341 |
if mode = Normal andalso random_range 0 (atp_important_message_keep_quotient - 1) = 0 then
|
55205
|
342 |
extract_important_message output
|
|
343 |
else
|
|
344 |
""
|
|
345 |
|
|
346 |
val (used_facts, preplay, message, message_tail) =
|
|
347 |
(case outcome of
|
|
348 |
NONE =>
|
|
349 |
let
|
|
350 |
val used_facts = used_facts_in_atp_proof ctxt fact_names atp_proof
|
|
351 |
val needs_full_types = is_typed_helper_used_in_atp_proof atp_proof
|
|
352 |
val reconstrs =
|
|
353 |
bunch_of_reconstructors needs_full_types (lam_trans_of_atp_proof atp_proof
|
|
354 |
o (fn desperate => if desperate then hide_lamsN else default_metis_lam_trans))
|
|
355 |
in
|
|
356 |
(used_facts,
|
|
357 |
Lazy.lazy (fn () =>
|
|
358 |
let val used_pairs = used_from |> filter_used_facts false used_facts in
|
|
359 |
play_one_line_proof mode debug verbose preplay_timeout used_pairs state subgoal
|
|
360 |
(hd reconstrs) reconstrs
|
|
361 |
end),
|
|
362 |
fn preplay =>
|
|
363 |
let
|
|
364 |
val _ = if verbose then Output.urgent_message "Generating proof text..." else ()
|
|
365 |
fun isar_params () =
|
|
366 |
let
|
|
367 |
val metis_type_enc =
|
|
368 |
if is_typed_helper_used_in_atp_proof atp_proof then full_typesN
|
|
369 |
else partial_typesN
|
|
370 |
val metis_lam_trans = lam_trans_of_atp_proof atp_proof default_metis_lam_trans
|
|
371 |
val atp_proof =
|
|
372 |
atp_proof
|
|
373 |
|> termify_atp_proof ctxt pool lifted sym_tab
|
|
374 |
|> introduce_spass_skolem
|
|
375 |
|> factify_atp_proof fact_names hyp_ts concl_t
|
|
376 |
in
|
|
377 |
(verbose, metis_type_enc, metis_lam_trans, preplay_timeout, compress_isar,
|
|
378 |
try0_isar, atp_proof, goal)
|
|
379 |
end
|
|
380 |
val one_line_params =
|
|
381 |
(preplay, proof_banner mode name, used_facts,
|
|
382 |
choose_minimize_command thy params minimize_command name preplay, subgoal,
|
|
383 |
subgoal_count)
|
|
384 |
val num_chained = length (#facts (Proof.goal state))
|
|
385 |
in
|
|
386 |
proof_text ctxt debug isar_proofs isar_params num_chained one_line_params
|
|
387 |
end,
|
|
388 |
(if verbose then "\nATP real CPU time: " ^ string_of_time run_time ^ "." else "") ^
|
|
389 |
(if important_message <> "" then
|
|
390 |
"\n\nImportant message from Dr. Geoff Sutcliffe:\n" ^ important_message
|
|
391 |
else
|
|
392 |
""))
|
|
393 |
end
|
|
394 |
| SOME failure =>
|
|
395 |
([], Lazy.value (plain_metis, Play_Failed), fn _ => string_of_atp_failure failure, ""))
|
|
396 |
in
|
|
397 |
{outcome = outcome, used_facts = used_facts, used_from = used_from, run_time = run_time,
|
|
398 |
preplay = preplay, message = message, message_tail = message_tail}
|
|
399 |
end
|
|
400 |
|
|
401 |
end;
|