|
1 (* Title: HOL/Tools/Sledgehammer/sledgehammer_prover_atp.ML |
|
2 Author: Fabian Immler, TU Muenchen |
|
3 Author: Makarius |
|
4 Author: Jasmin Blanchette, TU Muenchen |
|
5 |
|
6 ATPs as Sledgehammer provers. |
|
7 *) |
|
8 |
|
9 signature SLEDGEHAMMER_PROVER_ATP = |
|
10 sig |
|
11 type mode = Sledgehammer_Prover.mode |
|
12 type prover = Sledgehammer_Prover.prover |
|
13 |
|
14 val run_atp : mode -> string -> prover |
|
15 end; |
|
16 |
|
17 structure Sledgehammer_Prover_ATP : SLEDGEHAMMER_PROVER_ATP = |
|
18 struct |
|
19 |
|
20 open ATP_Util |
|
21 open ATP_Problem |
|
22 open ATP_Proof |
|
23 open ATP_Problem_Generate |
|
24 open ATP_Proof_Reconstruct |
|
25 open ATP_Systems |
|
26 open Sledgehammer_Util |
|
27 open Sledgehammer_Reconstructor |
|
28 open Sledgehammer_Isar |
|
29 open Sledgehammer_Prover |
|
30 |
|
31 fun choose_type_enc strictness best_type_enc format = |
|
32 the_default best_type_enc |
|
33 #> type_enc_of_string strictness |
|
34 #> adjust_type_enc format |
|
35 |
|
36 fun has_bound_or_var_of_type pred = |
|
37 exists_subterm (fn Var (_, T as Type _) => pred T |
|
38 | Abs (_, T as Type _, _) => pred T |
|
39 | _ => false) |
|
40 |
|
41 (* Unwanted equalities are those between a (bound or schematic) variable that does not properly |
|
42 occur in the second operand. *) |
|
43 val is_exhaustive_finite = |
|
44 let |
|
45 fun is_bad_equal (Var z) t = |
|
46 not (exists_subterm (fn Var z' => z = z' | _ => false) t) |
|
47 | is_bad_equal (Bound j) t = not (loose_bvar1 (t, j)) |
|
48 | is_bad_equal _ _ = false |
|
49 fun do_equals t1 t2 = is_bad_equal t1 t2 orelse is_bad_equal t2 t1 |
|
50 fun do_formula pos t = |
|
51 case (pos, t) of |
|
52 (_, @{const Trueprop} $ t1) => do_formula pos t1 |
|
53 | (true, Const (@{const_name all}, _) $ Abs (_, _, t')) => |
|
54 do_formula pos t' |
|
55 | (true, Const (@{const_name All}, _) $ Abs (_, _, t')) => |
|
56 do_formula pos t' |
|
57 | (false, Const (@{const_name Ex}, _) $ Abs (_, _, t')) => |
|
58 do_formula pos t' |
|
59 | (_, @{const "==>"} $ t1 $ t2) => |
|
60 do_formula (not pos) t1 andalso |
|
61 (t2 = @{prop False} orelse do_formula pos t2) |
|
62 | (_, @{const HOL.implies} $ t1 $ t2) => |
|
63 do_formula (not pos) t1 andalso |
|
64 (t2 = @{const False} orelse do_formula pos t2) |
|
65 | (_, @{const Not} $ t1) => do_formula (not pos) t1 |
|
66 | (true, @{const HOL.disj} $ t1 $ t2) => forall (do_formula pos) [t1, t2] |
|
67 | (false, @{const HOL.conj} $ t1 $ t2) => forall (do_formula pos) [t1, t2] |
|
68 | (true, Const (@{const_name HOL.eq}, _) $ t1 $ t2) => do_equals t1 t2 |
|
69 | (true, Const (@{const_name "=="}, _) $ t1 $ t2) => do_equals t1 t2 |
|
70 | _ => false |
|
71 in do_formula true end |
|
72 |
|
73 (* Facts containing variables of finite types such as "unit" or "bool" or of the form |
|
74 "ALL x. x = A | x = B | x = C" are likely to lead to untypable proofs for unsound type |
|
75 encodings. *) |
|
76 fun is_dangerous_prop ctxt = |
|
77 transform_elim_prop |
|
78 #> (has_bound_or_var_of_type (is_type_surely_finite ctxt) orf is_exhaustive_finite) |
|
79 |
|
80 fun get_slices slice slices = |
|
81 (0 upto length slices - 1) ~~ slices |> not slice ? (List.last #> single) |
|
82 |
|
83 fun get_facts_of_filter _ [(_, facts)] = facts |
|
84 | get_facts_of_filter fact_filter factss = |
|
85 (case AList.lookup (op =) factss fact_filter of |
|
86 SOME facts => facts |
|
87 | NONE => snd (hd factss)) |
|
88 |
|
89 (* For low values of "max_facts", this fudge value ensures that most slices are invoked with a |
|
90 nontrivial amount of facts. *) |
|
91 val max_fact_factor_fudge = 5 |
|
92 |
|
93 val mono_max_privileged_facts = 10 |
|
94 |
|
95 fun suffix_of_mode Auto_Try = "_try" |
|
96 | suffix_of_mode Try = "_try" |
|
97 | suffix_of_mode Normal = "" |
|
98 | suffix_of_mode MaSh = "" |
|
99 | suffix_of_mode Auto_Minimize = "_min" |
|
100 | suffix_of_mode Minimize = "_min" |
|
101 |
|
102 (* Give the ATPs some slack before interrupting them the hard way. "z3_tptp" on Linux appears to be |
|
103 the only ATP that does not honor its time limit. *) |
|
104 val atp_timeout_slack = seconds 1.0 |
|
105 |
|
106 (* Important messages are important but not so important that users want to see |
|
107 them each time. *) |
|
108 val atp_important_message_keep_quotient = 25 |
|
109 |
|
110 fun run_atp mode name |
|
111 (params as {debug, verbose, overlord, type_enc, strict, lam_trans, uncurried_aliases, |
|
112 fact_filter, max_facts, max_mono_iters, max_new_mono_instances, isar_proofs, compress_isar, |
|
113 try0_isar, slice, timeout, preplay_timeout, ...}) |
|
114 minimize_command |
|
115 ({comment, state, goal, subgoal, subgoal_count, factss, ...} : prover_problem) = |
|
116 let |
|
117 val thy = Proof.theory_of state |
|
118 val ctxt = Proof.context_of state |
|
119 |
|
120 val {exec, arguments, proof_delims, known_failures, prem_role, best_slices, best_max_mono_iters, |
|
121 best_max_new_mono_instances, ...} = get_atp thy name () |
|
122 |
|
123 val atp_mode = if Config.get ctxt completish then Sledgehammer_Completish else Sledgehammer |
|
124 val (_, hyp_ts, concl_t) = strip_subgoal goal subgoal ctxt |
|
125 val (dest_dir, problem_prefix) = |
|
126 if overlord then overlord_file_location_of_prover name |
|
127 else (Config.get ctxt dest_dir, Config.get ctxt problem_prefix) |
|
128 val problem_file_name = |
|
129 Path.basic (problem_prefix ^ (if overlord then "" else serial_string ()) ^ |
|
130 suffix_of_mode mode ^ "_" ^ string_of_int subgoal) |
|
131 val prob_path = |
|
132 if dest_dir = "" then |
|
133 File.tmp_path problem_file_name |
|
134 else if File.exists (Path.explode dest_dir) then |
|
135 Path.append (Path.explode dest_dir) problem_file_name |
|
136 else |
|
137 error ("No such directory: " ^ quote dest_dir ^ ".") |
|
138 val exec = exec () |
|
139 val command0 = |
|
140 case find_first (fn var => getenv var <> "") (fst exec) of |
|
141 SOME var => |
|
142 let |
|
143 val pref = getenv var ^ "/" |
|
144 val paths = map (Path.explode o prefix pref) (snd exec) |
|
145 in |
|
146 case find_first File.exists paths of |
|
147 SOME path => path |
|
148 | NONE => error ("Bad executable: " ^ Path.print (hd paths) ^ ".") |
|
149 end |
|
150 | NONE => error ("The environment variable " ^ quote (List.last (fst exec)) ^ |
|
151 " is not set.") |
|
152 |
|
153 fun split_time s = |
|
154 let |
|
155 val split = String.tokens (fn c => str c = "\n") |
|
156 val (output, t) = |
|
157 s |> split |> (try split_last #> the_default ([], "0")) |
|
158 |>> cat_lines |
|
159 fun as_num f = f >> (fst o read_int) |
|
160 val num = as_num (Scan.many1 Symbol.is_ascii_digit) |
|
161 val digit = Scan.one Symbol.is_ascii_digit |
|
162 val num3 = as_num (digit ::: digit ::: (digit >> single)) |
|
163 val time = num --| Scan.$$ "." -- num3 >> (fn (a, b) => a * 1000 + b) |
|
164 val as_time = |
|
165 raw_explode #> Scan.read Symbol.stopper time #> the_default 0 |
|
166 in (output, as_time t |> Time.fromMilliseconds) end |
|
167 |
|
168 fun run () = |
|
169 let |
|
170 (* If slicing is disabled, we expand the last slice to fill the entire |
|
171 time available. *) |
|
172 val all_slices = best_slices ctxt |
|
173 val actual_slices = get_slices slice all_slices |
|
174 fun max_facts_of_slices f slices = fold (Integer.max o fst o #1 o fst o snd o f) slices 0 |
|
175 val num_actual_slices = length actual_slices |
|
176 val max_fact_factor = |
|
177 Real.fromInt (case max_facts of |
|
178 NONE => max_facts_of_slices I all_slices |
|
179 | SOME max => max) |
|
180 / Real.fromInt (max_facts_of_slices snd actual_slices) |
|
181 fun monomorphize_facts facts = |
|
182 let |
|
183 val ctxt = |
|
184 ctxt |
|
185 |> repair_monomorph_context max_mono_iters best_max_mono_iters max_new_mono_instances |
|
186 best_max_new_mono_instances |
|
187 (* pseudo-theorem involving the same constants as the subgoal *) |
|
188 val subgoal_th = |
|
189 Logic.list_implies (hyp_ts, concl_t) |> Skip_Proof.make_thm thy |
|
190 val rths = |
|
191 facts |> chop mono_max_privileged_facts |
|
192 |>> map (pair 1 o snd) |
|
193 ||> map (pair 2 o snd) |
|
194 |> op @ |
|
195 |> cons (0, subgoal_th) |
|
196 in |
|
197 Monomorph.monomorph atp_schematic_consts_of ctxt rths |
|
198 |> tl |> curry ListPair.zip (map fst facts) |
|
199 |> maps (fn (name, rths) => |
|
200 map (pair name o zero_var_indexes o snd) rths) |
|
201 end |
|
202 |
|
203 fun run_slice time_left (cache_key, cache_value) |
|
204 (slice, (time_frac, |
|
205 (key as ((best_max_facts, best_fact_filter), format, |
|
206 best_type_enc, best_lam_trans, |
|
207 best_uncurried_aliases), |
|
208 extra))) = |
|
209 let |
|
210 val effective_fact_filter = |
|
211 fact_filter |> the_default best_fact_filter |
|
212 val facts = get_facts_of_filter effective_fact_filter factss |
|
213 val num_facts = |
|
214 Real.ceil (max_fact_factor * Real.fromInt best_max_facts) + max_fact_factor_fudge |
|
215 |> Integer.min (length facts) |
|
216 val strictness = if strict then Strict else Non_Strict |
|
217 val type_enc = type_enc |> choose_type_enc strictness best_type_enc format |
|
218 val sound = is_type_enc_sound type_enc |
|
219 val real_ms = Real.fromInt o Time.toMilliseconds |
|
220 val slice_timeout = |
|
221 (real_ms time_left |
|
222 |> (if slice < num_actual_slices - 1 then |
|
223 curry Real.min (time_frac * real_ms timeout) |
|
224 else |
|
225 I)) |
|
226 * 0.001 |
|
227 |> seconds |
|
228 val generous_slice_timeout = |
|
229 if mode = MaSh then one_day else Time.+ (slice_timeout, atp_timeout_slack) |
|
230 val _ = |
|
231 if debug then |
|
232 quote name ^ " slice #" ^ string_of_int (slice + 1) ^ |
|
233 " with " ^ string_of_int num_facts ^ " fact" ^ |
|
234 plural_s num_facts ^ " for " ^ string_of_time slice_timeout ^ "..." |
|
235 |> Output.urgent_message |
|
236 else |
|
237 () |
|
238 val readable_names = not (Config.get ctxt atp_full_names) |
|
239 val lam_trans = |
|
240 case lam_trans of |
|
241 SOME s => s |
|
242 | NONE => best_lam_trans |
|
243 val uncurried_aliases = |
|
244 case uncurried_aliases of |
|
245 SOME b => b |
|
246 | NONE => best_uncurried_aliases |
|
247 val value as (atp_problem, _, fact_names, _, _) = |
|
248 if cache_key = SOME key then |
|
249 cache_value |
|
250 else |
|
251 facts |
|
252 |> not sound ? filter_out (is_dangerous_prop ctxt o prop_of o snd) |
|
253 |> take num_facts |
|
254 |> not (is_type_enc_polymorphic type_enc) ? monomorphize_facts |
|
255 |> map (apsnd prop_of) |
|
256 |> prepare_atp_problem ctxt format prem_role type_enc atp_mode |
|
257 lam_trans uncurried_aliases |
|
258 readable_names true hyp_ts concl_t |
|
259 |
|
260 fun sel_weights () = atp_problem_selection_weights atp_problem |
|
261 fun ord_info () = atp_problem_term_order_info atp_problem |
|
262 |
|
263 val ord = effective_term_order ctxt name |
|
264 val full_proof = isar_proofs |> the_default (mode = Minimize) |
|
265 val args = |
|
266 arguments ctxt full_proof extra slice_timeout (File.shell_path prob_path) |
|
267 (ord, ord_info, sel_weights) |
|
268 val command = |
|
269 "(exec 2>&1; " ^ File.shell_path command0 ^ " " ^ args ^ " " ^ ")" |
|
270 |> enclose "TIMEFORMAT='%3R'; { time " " ; }" |
|
271 val _ = |
|
272 atp_problem |
|
273 |> lines_of_atp_problem format ord ord_info |
|
274 |> cons ("% " ^ command ^ "\n" ^ (if comment = "" then "" else "% " ^ comment ^ "\n")) |
|
275 |> File.write_list prob_path |
|
276 val ((output, run_time), (atp_proof, outcome)) = |
|
277 TimeLimit.timeLimit generous_slice_timeout Isabelle_System.bash_output command |
|
278 |>> (if overlord then prefix ("% " ^ command ^ "\n% " ^ timestamp () ^ "\n") else I) |
|
279 |> fst |> split_time |
|
280 |> (fn accum as (output, _) => |
|
281 (accum, |
|
282 extract_tstplike_proof_and_outcome verbose proof_delims known_failures output |
|
283 |>> atp_proof_of_tstplike_proof atp_problem |
|
284 handle UNRECOGNIZED_ATP_PROOF () => ([], SOME ProofIncomplete))) |
|
285 handle TimeLimit.TimeOut => (("", slice_timeout), ([], SOME TimedOut)) |
|
286 val outcome = |
|
287 (case outcome of |
|
288 NONE => |
|
289 (case used_facts_in_unsound_atp_proof ctxt fact_names atp_proof |
|
290 |> Option.map (sort string_ord) of |
|
291 SOME facts => |
|
292 let val failure = UnsoundProof (is_type_enc_sound type_enc, facts) in |
|
293 if debug then (warning (string_of_atp_failure failure); NONE) else SOME failure |
|
294 end |
|
295 | NONE => NONE) |
|
296 | _ => outcome) |
|
297 in |
|
298 ((SOME key, value), (output, run_time, facts, atp_proof, outcome)) |
|
299 end |
|
300 |
|
301 val timer = Timer.startRealTimer () |
|
302 |
|
303 fun maybe_run_slice slice |
|
304 (result as (cache, (_, run_time0, _, _, SOME _))) = |
|
305 let |
|
306 val time_left = Time.- (timeout, Timer.checkRealTimer timer) |
|
307 in |
|
308 if Time.<= (time_left, Time.zeroTime) then |
|
309 result |
|
310 else |
|
311 run_slice time_left cache slice |
|
312 |> (fn (cache, (output, run_time, used_from, atp_proof, outcome)) => |
|
313 (cache, (output, Time.+ (run_time0, run_time), used_from, atp_proof, outcome))) |
|
314 end |
|
315 | maybe_run_slice _ result = result |
|
316 in |
|
317 ((NONE, ([], Symtab.empty, Vector.fromList [], [], Symtab.empty)), |
|
318 ("", Time.zeroTime, [], [], SOME InternalError)) |
|
319 |> fold maybe_run_slice actual_slices |
|
320 end |
|
321 |
|
322 (* If the problem file has not been exported, remove it; otherwise, export |
|
323 the proof file too. *) |
|
324 fun clean_up () = |
|
325 if dest_dir = "" then (try File.rm prob_path; ()) else () |
|
326 fun export (_, (output, _, _, _, _)) = |
|
327 if dest_dir = "" then () |
|
328 else File.write (Path.explode (Path.implode prob_path ^ "_proof")) output |
|
329 val ((_, (_, pool, fact_names, lifted, sym_tab)), |
|
330 (output, run_time, used_from, atp_proof, outcome)) = |
|
331 with_cleanup clean_up run () |> tap export |
|
332 val important_message = |
|
333 if mode = Normal andalso random_range 0 (atp_important_message_keep_quotient - 1) = 0 |
|
334 then |
|
335 extract_important_message output |
|
336 else |
|
337 "" |
|
338 |
|
339 val (used_facts, preplay, message, message_tail) = |
|
340 (case outcome of |
|
341 NONE => |
|
342 let |
|
343 val used_facts = used_facts_in_atp_proof ctxt fact_names atp_proof |
|
344 val needs_full_types = is_typed_helper_used_in_atp_proof atp_proof |
|
345 val reconstrs = |
|
346 bunch_of_reconstructors needs_full_types (lam_trans_of_atp_proof atp_proof |
|
347 o (fn desperate => if desperate then hide_lamsN else default_metis_lam_trans)) |
|
348 in |
|
349 (used_facts, |
|
350 Lazy.lazy (fn () => |
|
351 let val used_pairs = used_from |> filter_used_facts false used_facts in |
|
352 play_one_line_proof mode debug verbose preplay_timeout used_pairs state subgoal |
|
353 (hd reconstrs) reconstrs |
|
354 end), |
|
355 fn preplay => |
|
356 let |
|
357 val _ = if verbose then Output.urgent_message "Generating proof text..." else () |
|
358 fun isar_params () = |
|
359 let |
|
360 val metis_type_enc = |
|
361 if is_typed_helper_used_in_atp_proof atp_proof then full_typesN |
|
362 else partial_typesN |
|
363 val metis_lam_trans = lam_trans_of_atp_proof atp_proof default_metis_lam_trans |
|
364 val atp_proof = |
|
365 atp_proof |
|
366 |> termify_atp_proof ctxt pool lifted sym_tab |
|
367 |> introduce_spass_skolem |
|
368 |> factify_atp_proof fact_names hyp_ts concl_t |
|
369 in |
|
370 (verbose, metis_type_enc, metis_lam_trans, preplay_timeout, compress_isar, |
|
371 try0_isar, atp_proof, goal) |
|
372 end |
|
373 val one_line_params = |
|
374 (preplay, proof_banner mode name, used_facts, |
|
375 choose_minimize_command thy params minimize_command name preplay, subgoal, |
|
376 subgoal_count) |
|
377 val num_chained = length (#facts (Proof.goal state)) |
|
378 in |
|
379 proof_text ctxt debug isar_proofs isar_params num_chained one_line_params |
|
380 end, |
|
381 (if verbose then "\nATP real CPU time: " ^ string_of_time run_time ^ "." else "") ^ |
|
382 (if important_message <> "" then |
|
383 "\n\nImportant message from Dr. Geoff Sutcliffe:\n" ^ important_message |
|
384 else |
|
385 "")) |
|
386 end |
|
387 | SOME failure => |
|
388 ([], Lazy.value (plain_metis, Play_Failed), fn _ => string_of_atp_failure failure, "")) |
|
389 in |
|
390 {outcome = outcome, used_facts = used_facts, used_from = used_from, run_time = run_time, |
|
391 preplay = preplay, message = message, message_tail = message_tail} |
|
392 end |
|
393 |
|
394 end; |