1 (* Title: HOL/Mirabelle/Tools/mirabelle_sledgehammer.ML |
|
2 Author: Jasmin Blanchette and Sascha Boehme and Tobias Nipkow, TU Munich |
|
3 *) |
|
4 |
|
5 structure Mirabelle_Sledgehammer : MIRABELLE_ACTION = |
|
6 struct |
|
7 |
|
8 val proverK = "prover" |
|
9 val prover_timeoutK = "prover_timeout" |
|
10 val keepK = "keep" |
|
11 val type_encK = "type_enc" |
|
12 val strictK = "strict" |
|
13 val sliceK = "slice" |
|
14 val lam_transK = "lam_trans" |
|
15 val uncurried_aliasesK = "uncurried_aliases" |
|
16 val e_selection_heuristicK = "e_selection_heuristic" |
|
17 val term_orderK = "term_order" |
|
18 val force_sosK = "force_sos" |
|
19 val max_relevantK = "max_relevant" |
|
20 val max_callsK = "max_calls" |
|
21 val minimizeK = "minimize" (*refers to minimization attempted by Mirabelle*) |
|
22 val minimize_timeoutK = "minimize_timeout" |
|
23 val metis_ftK = "metis_ft" |
|
24 val reconstructorK = "reconstructor" |
|
25 val preplay_timeoutK = "preplay_timeout" |
|
26 val sh_minimizeK = "sh_minimize" (*refers to minimizer run within Sledgehammer*) |
|
27 val max_new_mono_instancesK = "max_new_mono_instances" |
|
28 val max_mono_itersK = "max_mono_iters" |
|
29 val check_trivialK = "check_trivial" (*false by default*) |
|
30 |
|
31 fun sh_tag id = "#" ^ string_of_int id ^ " sledgehammer: " |
|
32 fun minimize_tag id = "#" ^ string_of_int id ^ " minimize (sledgehammer): " |
|
33 fun reconstructor_tag reconstructor id = |
|
34 "#" ^ string_of_int id ^ " " ^ (!reconstructor) ^ " (sledgehammer): " |
|
35 |
|
36 val separator = "-----" |
|
37 |
|
38 val preplay_timeout_default = "4" |
|
39 (*FIXME sensible to have Mirabelle-level Sledgehammer defaults?*) |
|
40 |
|
41 (*If a key is present in args then augment a list with its pair*) |
|
42 (*This is used to avoid fixing default values at the Mirabelle level, and |
|
43 instead use the default values of the tool (Sledgehammer in this case).*) |
|
44 fun available_parameter args key label list = |
|
45 let |
|
46 val value = AList.lookup (op =) args key |
|
47 in if is_some value then (label, the value) :: list else list end |
|
48 |
|
49 |
|
50 datatype sh_data = ShData of { |
|
51 calls: int, |
|
52 success: int, |
|
53 nontriv_calls: int, |
|
54 nontriv_success: int, |
|
55 lemmas: int, |
|
56 max_lems: int, |
|
57 time_isa: int, |
|
58 time_prover: int, |
|
59 time_prover_fail: int} |
|
60 |
|
61 datatype re_data = ReData of { |
|
62 calls: int, |
|
63 success: int, |
|
64 nontriv_calls: int, |
|
65 nontriv_success: int, |
|
66 proofs: int, |
|
67 time: int, |
|
68 timeout: int, |
|
69 lemmas: int * int * int, |
|
70 posns: (Position.T * bool) list |
|
71 } |
|
72 |
|
73 datatype min_data = MinData of { |
|
74 succs: int, |
|
75 ab_ratios: int |
|
76 } |
|
77 |
|
78 fun make_sh_data |
|
79 (calls,success,nontriv_calls,nontriv_success,lemmas,max_lems,time_isa, |
|
80 time_prover,time_prover_fail) = |
|
81 ShData{calls=calls, success=success, nontriv_calls=nontriv_calls, |
|
82 nontriv_success=nontriv_success, lemmas=lemmas, max_lems=max_lems, |
|
83 time_isa=time_isa, time_prover=time_prover, |
|
84 time_prover_fail=time_prover_fail} |
|
85 |
|
86 fun make_min_data (succs, ab_ratios) = |
|
87 MinData{succs=succs, ab_ratios=ab_ratios} |
|
88 |
|
89 fun make_re_data (calls,success,nontriv_calls,nontriv_success,proofs,time, |
|
90 timeout,lemmas,posns) = |
|
91 ReData{calls=calls, success=success, nontriv_calls=nontriv_calls, |
|
92 nontriv_success=nontriv_success, proofs=proofs, time=time, |
|
93 timeout=timeout, lemmas=lemmas, posns=posns} |
|
94 |
|
95 val empty_sh_data = make_sh_data (0, 0, 0, 0, 0, 0, 0, 0, 0) |
|
96 val empty_min_data = make_min_data (0, 0) |
|
97 val empty_re_data = make_re_data (0, 0, 0, 0, 0, 0, 0, (0,0,0), []) |
|
98 |
|
99 fun tuple_of_sh_data (ShData {calls, success, nontriv_calls, nontriv_success, |
|
100 lemmas, max_lems, time_isa, |
|
101 time_prover, time_prover_fail}) = (calls, success, nontriv_calls, |
|
102 nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail) |
|
103 |
|
104 fun tuple_of_min_data (MinData {succs, ab_ratios}) = (succs, ab_ratios) |
|
105 |
|
106 fun tuple_of_re_data (ReData {calls, success, nontriv_calls, nontriv_success, |
|
107 proofs, time, timeout, lemmas, posns}) = (calls, success, nontriv_calls, |
|
108 nontriv_success, proofs, time, timeout, lemmas, posns) |
|
109 |
|
110 |
|
111 datatype reconstructor_mode = |
|
112 Unminimized | Minimized | UnminimizedFT | MinimizedFT |
|
113 |
|
114 datatype data = Data of { |
|
115 sh: sh_data, |
|
116 min: min_data, |
|
117 re_u: re_data, (* reconstructor with unminimized set of lemmas *) |
|
118 re_m: re_data, (* reconstructor with minimized set of lemmas *) |
|
119 re_uft: re_data, (* reconstructor with unminimized set of lemmas and fully-typed *) |
|
120 re_mft: re_data, (* reconstructor with minimized set of lemmas and fully-typed *) |
|
121 mini: bool (* with minimization *) |
|
122 } |
|
123 |
|
124 fun make_data (sh, min, re_u, re_m, re_uft, re_mft, mini) = |
|
125 Data {sh=sh, min=min, re_u=re_u, re_m=re_m, re_uft=re_uft, re_mft=re_mft, |
|
126 mini=mini} |
|
127 |
|
128 val empty_data = make_data (empty_sh_data, empty_min_data, |
|
129 empty_re_data, empty_re_data, empty_re_data, empty_re_data, false) |
|
130 |
|
131 fun map_sh_data f (Data {sh, min, re_u, re_m, re_uft, re_mft, mini}) = |
|
132 let val sh' = make_sh_data (f (tuple_of_sh_data sh)) |
|
133 in make_data (sh', min, re_u, re_m, re_uft, re_mft, mini) end |
|
134 |
|
135 fun map_min_data f (Data {sh, min, re_u, re_m, re_uft, re_mft, mini}) = |
|
136 let val min' = make_min_data (f (tuple_of_min_data min)) |
|
137 in make_data (sh, min', re_u, re_m, re_uft, re_mft, mini) end |
|
138 |
|
139 fun map_re_data f m (Data {sh, min, re_u, re_m, re_uft, re_mft, mini}) = |
|
140 let |
|
141 fun map_me g Unminimized (u, m, uft, mft) = (g u, m, uft, mft) |
|
142 | map_me g Minimized (u, m, uft, mft) = (u, g m, uft, mft) |
|
143 | map_me g UnminimizedFT (u, m, uft, mft) = (u, m, g uft, mft) |
|
144 | map_me g MinimizedFT (u, m, uft, mft) = (u, m, uft, g mft) |
|
145 |
|
146 val f' = make_re_data o f o tuple_of_re_data |
|
147 |
|
148 val (re_u', re_m', re_uft', re_mft') = |
|
149 map_me f' m (re_u, re_m, re_uft, re_mft) |
|
150 in make_data (sh, min, re_u', re_m', re_uft', re_mft', mini) end |
|
151 |
|
152 fun set_mini mini (Data {sh, min, re_u, re_m, re_uft, re_mft, ...}) = |
|
153 make_data (sh, min, re_u, re_m, re_uft, re_mft, mini) |
|
154 |
|
155 fun inc_max (n:int) (s,sos,m) = (s+n, sos + n*n, Int.max(m,n)); |
|
156 |
|
157 val inc_sh_calls = map_sh_data |
|
158 (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail) |
|
159 => (calls + 1, success, nontriv_calls, nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail)) |
|
160 |
|
161 val inc_sh_success = map_sh_data |
|
162 (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail) |
|
163 => (calls, success + 1, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail)) |
|
164 |
|
165 val inc_sh_nontriv_calls = map_sh_data |
|
166 (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail) |
|
167 => (calls, success, nontriv_calls + 1, nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail)) |
|
168 |
|
169 val inc_sh_nontriv_success = map_sh_data |
|
170 (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail) |
|
171 => (calls, success, nontriv_calls, nontriv_success + 1, lemmas,max_lems, time_isa, time_prover, time_prover_fail)) |
|
172 |
|
173 fun inc_sh_lemmas n = map_sh_data |
|
174 (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail) |
|
175 => (calls,success,nontriv_calls, nontriv_success, lemmas+n,max_lems,time_isa,time_prover,time_prover_fail)) |
|
176 |
|
177 fun inc_sh_max_lems n = map_sh_data |
|
178 (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail) |
|
179 => (calls,success,nontriv_calls, nontriv_success, lemmas,Int.max(max_lems,n),time_isa,time_prover,time_prover_fail)) |
|
180 |
|
181 fun inc_sh_time_isa t = map_sh_data |
|
182 (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail) |
|
183 => (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa + t,time_prover,time_prover_fail)) |
|
184 |
|
185 fun inc_sh_time_prover t = map_sh_data |
|
186 (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail) |
|
187 => (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover + t,time_prover_fail)) |
|
188 |
|
189 fun inc_sh_time_prover_fail t = map_sh_data |
|
190 (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail) |
|
191 => (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail + t)) |
|
192 |
|
193 val inc_min_succs = map_min_data |
|
194 (fn (succs,ab_ratios) => (succs+1, ab_ratios)) |
|
195 |
|
196 fun inc_min_ab_ratios r = map_min_data |
|
197 (fn (succs, ab_ratios) => (succs, ab_ratios+r)) |
|
198 |
|
199 val inc_reconstructor_calls = map_re_data |
|
200 (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns) |
|
201 => (calls + 1, success, nontriv_calls, nontriv_success, proofs, time, timeout, lemmas,posns)) |
|
202 |
|
203 val inc_reconstructor_success = map_re_data |
|
204 (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns) |
|
205 => (calls, success + 1, nontriv_calls, nontriv_success, proofs, time, timeout, lemmas,posns)) |
|
206 |
|
207 val inc_reconstructor_nontriv_calls = map_re_data |
|
208 (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns) |
|
209 => (calls, success, nontriv_calls + 1, nontriv_success, proofs, time, timeout, lemmas,posns)) |
|
210 |
|
211 val inc_reconstructor_nontriv_success = map_re_data |
|
212 (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns) |
|
213 => (calls, success, nontriv_calls, nontriv_success + 1, proofs, time, timeout, lemmas,posns)) |
|
214 |
|
215 val inc_reconstructor_proofs = map_re_data |
|
216 (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns) |
|
217 => (calls, success, nontriv_calls, nontriv_success, proofs + 1, time, timeout, lemmas,posns)) |
|
218 |
|
219 fun inc_reconstructor_time m t = map_re_data |
|
220 (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns) |
|
221 => (calls, success, nontriv_calls, nontriv_success, proofs, time + t, timeout, lemmas,posns)) m |
|
222 |
|
223 val inc_reconstructor_timeout = map_re_data |
|
224 (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns) |
|
225 => (calls, success, nontriv_calls, nontriv_success, proofs, time, timeout + 1, lemmas,posns)) |
|
226 |
|
227 fun inc_reconstructor_lemmas m n = map_re_data |
|
228 (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns) |
|
229 => (calls, success, nontriv_calls, nontriv_success, proofs, time, timeout, inc_max n lemmas, posns)) m |
|
230 |
|
231 fun inc_reconstructor_posns m pos = map_re_data |
|
232 (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns) |
|
233 => (calls, success, nontriv_calls, nontriv_success, proofs, time, timeout, lemmas, pos::posns)) m |
|
234 |
|
235 val str0 = string_of_int o the_default 0 |
|
236 |
|
237 local |
|
238 |
|
239 val str = string_of_int |
|
240 val str3 = Real.fmt (StringCvt.FIX (SOME 3)) |
|
241 fun percentage a b = string_of_int (a * 100 div b) |
|
242 fun time t = Real.fromInt t / 1000.0 |
|
243 fun avg_time t n = |
|
244 if n > 0 then (Real.fromInt t / 1000.0) / Real.fromInt n else 0.0 |
|
245 |
|
246 fun log_sh_data log |
|
247 (calls, success, nontriv_calls, nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail) = |
|
248 (log ("Total number of sledgehammer calls: " ^ str calls); |
|
249 log ("Number of successful sledgehammer calls: " ^ str success); |
|
250 log ("Number of sledgehammer lemmas: " ^ str lemmas); |
|
251 log ("Max number of sledgehammer lemmas: " ^ str max_lems); |
|
252 log ("Success rate: " ^ percentage success calls ^ "%"); |
|
253 log ("Total number of nontrivial sledgehammer calls: " ^ str nontriv_calls); |
|
254 log ("Number of successful nontrivial sledgehammer calls: " ^ str nontriv_success); |
|
255 log ("Total time for sledgehammer calls (Isabelle): " ^ str3 (time time_isa)); |
|
256 log ("Total time for successful sledgehammer calls (ATP): " ^ str3 (time time_prover)); |
|
257 log ("Total time for failed sledgehammer calls (ATP): " ^ str3 (time time_prover_fail)); |
|
258 log ("Average time for sledgehammer calls (Isabelle): " ^ |
|
259 str3 (avg_time time_isa calls)); |
|
260 log ("Average time for successful sledgehammer calls (ATP): " ^ |
|
261 str3 (avg_time time_prover success)); |
|
262 log ("Average time for failed sledgehammer calls (ATP): " ^ |
|
263 str3 (avg_time time_prover_fail (calls - success))) |
|
264 ) |
|
265 |
|
266 fun str_of_pos (pos, triv) = |
|
267 str0 (Position.line_of pos) (* ^ ":" ^ str0 (Position.offset_of pos) *) ^ |
|
268 (if triv then "[T]" else "") |
|
269 |
|
270 fun log_re_data log tag sh_calls (re_calls, re_success, re_nontriv_calls, |
|
271 re_nontriv_success, re_proofs, re_time, re_timeout, |
|
272 (lemmas, lems_sos, lems_max), re_posns) = |
|
273 (log ("Total number of " ^ tag ^ "reconstructor calls: " ^ str re_calls); |
|
274 log ("Number of successful " ^ tag ^ "reconstructor calls: " ^ str re_success ^ |
|
275 " (proof: " ^ str re_proofs ^ ")"); |
|
276 log ("Number of " ^ tag ^ "reconstructor timeouts: " ^ str re_timeout); |
|
277 log ("Success rate: " ^ percentage re_success sh_calls ^ "%"); |
|
278 log ("Total number of nontrivial " ^ tag ^ "reconstructor calls: " ^ str re_nontriv_calls); |
|
279 log ("Number of successful nontrivial " ^ tag ^ "reconstructor calls: " ^ str re_nontriv_success ^ |
|
280 " (proof: " ^ str re_proofs ^ ")"); |
|
281 log ("Number of successful " ^ tag ^ "reconstructor lemmas: " ^ str lemmas); |
|
282 log ("SOS of successful " ^ tag ^ "reconstructor lemmas: " ^ str lems_sos); |
|
283 log ("Max number of successful " ^ tag ^ "reconstructor lemmas: " ^ str lems_max); |
|
284 log ("Total time for successful " ^ tag ^ "reconstructor calls: " ^ str3 (time re_time)); |
|
285 log ("Average time for successful " ^ tag ^ "reconstructor calls: " ^ |
|
286 str3 (avg_time re_time re_success)); |
|
287 if tag="" |
|
288 then log ("Proved: " ^ space_implode " " (map str_of_pos re_posns)) |
|
289 else () |
|
290 ) |
|
291 |
|
292 fun log_min_data log (succs, ab_ratios) = |
|
293 (log ("Number of successful minimizations: " ^ string_of_int succs); |
|
294 log ("After/before ratios: " ^ string_of_int ab_ratios) |
|
295 ) |
|
296 |
|
297 in |
|
298 |
|
299 fun log_data id log (Data {sh, min, re_u, re_m, re_uft, re_mft, mini}) = |
|
300 let |
|
301 val ShData {calls=sh_calls, ...} = sh |
|
302 |
|
303 fun app_if (ReData {calls, ...}) f = if calls > 0 then f () else () |
|
304 fun log_re tag m = |
|
305 log_re_data log tag sh_calls (tuple_of_re_data m) |
|
306 fun log_reconstructor (tag1, m1) (tag2, m2) = app_if m1 (fn () => |
|
307 (log_re tag1 m1; log ""; app_if m2 (fn () => log_re tag2 m2))) |
|
308 in |
|
309 if sh_calls > 0 |
|
310 then |
|
311 (log ("\n\n\nReport #" ^ string_of_int id ^ ":\n"); |
|
312 log_sh_data log (tuple_of_sh_data sh); |
|
313 log ""; |
|
314 if not mini |
|
315 then log_reconstructor ("", re_u) ("fully-typed ", re_uft) |
|
316 else |
|
317 app_if re_u (fn () => |
|
318 (log_reconstructor ("unminimized ", re_u) ("unminimized fully-typed ", re_uft); |
|
319 log ""; |
|
320 app_if re_m (fn () => |
|
321 (log_min_data log (tuple_of_min_data min); log ""; |
|
322 log_reconstructor ("", re_m) ("fully-typed ", re_mft)))))) |
|
323 else () |
|
324 end |
|
325 |
|
326 end |
|
327 |
|
328 |
|
329 (* Warning: we implicitly assume single-threaded execution here! *) |
|
330 val data = Unsynchronized.ref ([] : (int * data) list) |
|
331 |
|
332 fun init id thy = (Unsynchronized.change data (cons (id, empty_data)); thy) |
|
333 fun done id ({log, ...}: Mirabelle.done_args) = |
|
334 AList.lookup (op =) (!data) id |
|
335 |> Option.map (log_data id log) |
|
336 |> K () |
|
337 |
|
338 fun change_data id f = (Unsynchronized.change data (AList.map_entry (op =) id f); ()) |
|
339 |
|
340 |
|
341 fun get_prover ctxt args = |
|
342 let |
|
343 fun default_prover_name () = |
|
344 hd (#provers (Sledgehammer_Isar.default_params ctxt [])) |
|
345 handle List.Empty => error "No ATP available." |
|
346 fun get_prover name = |
|
347 (name, Sledgehammer_Run.get_minimizing_prover ctxt |
|
348 Sledgehammer_Provers.Normal name) |
|
349 in |
|
350 (case AList.lookup (op =) args proverK of |
|
351 SOME name => get_prover name |
|
352 | NONE => get_prover (default_prover_name ())) |
|
353 end |
|
354 |
|
355 type stature = ATP_Problem_Generate.stature |
|
356 |
|
357 (* hack *) |
|
358 fun reconstructor_from_msg args msg = |
|
359 (case AList.lookup (op =) args reconstructorK of |
|
360 SOME name => name |
|
361 | NONE => |
|
362 if String.isSubstring "metis (" msg then |
|
363 msg |> Substring.full |
|
364 |> Substring.position "metis (" |
|
365 |> snd |> Substring.position ")" |
|
366 |> fst |> Substring.string |
|
367 |> suffix ")" |
|
368 else if String.isSubstring "metis" msg then |
|
369 "metis" |
|
370 else |
|
371 "smt") |
|
372 |
|
373 local |
|
374 |
|
375 datatype sh_result = |
|
376 SH_OK of int * int * (string * stature) list | |
|
377 SH_FAIL of int * int | |
|
378 SH_ERROR |
|
379 |
|
380 fun run_sh prover_name prover type_enc strict max_relevant slice lam_trans |
|
381 uncurried_aliases e_selection_heuristic term_order force_sos |
|
382 hard_timeout timeout preplay_timeout sh_minimizeLST |
|
383 max_new_mono_instancesLST max_mono_itersLST dir pos st = |
|
384 let |
|
385 val {context = ctxt, facts = chained_ths, goal} = Proof.goal st |
|
386 val i = 1 |
|
387 fun set_file_name (SOME dir) = |
|
388 Config.put Sledgehammer_Provers.dest_dir dir |
|
389 #> Config.put Sledgehammer_Provers.problem_prefix |
|
390 ("prob_" ^ str0 (Position.line_of pos) ^ "__") |
|
391 #> Config.put SMT_Config.debug_files |
|
392 (dir ^ "/" ^ Name.desymbolize false (ATP_Util.timestamp ()) ^ "_" |
|
393 ^ serial_string ()) |
|
394 | set_file_name NONE = I |
|
395 val st' = |
|
396 st |
|
397 |> Proof.map_context |
|
398 (set_file_name dir |
|
399 #> (Option.map (Config.put ATP_Systems.e_selection_heuristic) |
|
400 e_selection_heuristic |> the_default I) |
|
401 #> (Option.map (Config.put ATP_Systems.term_order) |
|
402 term_order |> the_default I) |
|
403 #> (Option.map (Config.put ATP_Systems.force_sos) |
|
404 force_sos |> the_default I)) |
|
405 val params as {relevance_thresholds, max_relevant, slice, ...} = |
|
406 Sledgehammer_Isar.default_params ctxt |
|
407 ([("verbose", "true"), |
|
408 ("type_enc", type_enc), |
|
409 ("strict", strict), |
|
410 ("lam_trans", lam_trans |> the_default "smart"), |
|
411 ("uncurried_aliases", uncurried_aliases |> the_default "smart"), |
|
412 ("max_relevant", max_relevant), |
|
413 ("slice", slice), |
|
414 ("timeout", string_of_int timeout), |
|
415 ("preplay_timeout", preplay_timeout)] |
|
416 |> sh_minimizeLST (*don't confuse the two minimization flags*) |
|
417 |> max_new_mono_instancesLST |
|
418 |> max_mono_itersLST) |
|
419 val default_max_relevant = |
|
420 Sledgehammer_Provers.default_max_relevant_for_prover ctxt slice |
|
421 prover_name |
|
422 val is_appropriate_prop = |
|
423 Sledgehammer_Provers.is_appropriate_prop_for_prover ctxt prover_name |
|
424 val is_built_in_const = |
|
425 Sledgehammer_Provers.is_built_in_const_for_prover ctxt prover_name |
|
426 val relevance_fudge = |
|
427 Sledgehammer_Provers.relevance_fudge_for_prover ctxt prover_name |
|
428 val relevance_override = {add = [], del = [], only = false} |
|
429 val (_, hyp_ts, concl_t) = ATP_Util.strip_subgoal ctxt goal i |
|
430 val time_limit = |
|
431 (case hard_timeout of |
|
432 NONE => I |
|
433 | SOME secs => TimeLimit.timeLimit (Time.fromSeconds secs)) |
|
434 fun failed failure = |
|
435 ({outcome = SOME failure, used_facts = [], run_time = Time.zeroTime, |
|
436 preplay = |
|
437 K (ATP_Proof_Reconstruct.Failed_to_Play Sledgehammer_Provers.plain_metis), |
|
438 message = K "", message_tail = ""}, ~1) |
|
439 val ({outcome, used_facts, run_time, preplay, message, message_tail} |
|
440 : Sledgehammer_Provers.prover_result, |
|
441 time_isa) = time_limit (Mirabelle.cpu_time (fn () => |
|
442 let |
|
443 val _ = if is_appropriate_prop concl_t then () |
|
444 else raise Fail "inappropriate" |
|
445 val ho_atp = Sledgehammer_Provers.is_ho_atp ctxt prover_name |
|
446 val facts = |
|
447 Sledgehammer_Filter.nearly_all_facts ctxt ho_atp relevance_override |
|
448 chained_ths hyp_ts concl_t |
|
449 |> filter (is_appropriate_prop o prop_of o snd) |
|
450 |> Sledgehammer_Filter.relevant_facts ctxt relevance_thresholds |
|
451 (the_default default_max_relevant max_relevant) |
|
452 is_built_in_const relevance_fudge relevance_override |
|
453 chained_ths hyp_ts concl_t |
|
454 val problem = |
|
455 {state = st', goal = goal, subgoal = i, |
|
456 subgoal_count = Sledgehammer_Util.subgoal_count st, |
|
457 facts = facts |> map Sledgehammer_Provers.Untranslated_Fact, |
|
458 smt_filter = NONE} |
|
459 in prover params (K (K (K ""))) problem end)) () |
|
460 handle TimeLimit.TimeOut => failed ATP_Proof.TimedOut |
|
461 | Fail "inappropriate" => failed ATP_Proof.Inappropriate |
|
462 val time_prover = run_time |> Time.toMilliseconds |
|
463 val msg = message (preplay ()) ^ message_tail |
|
464 in |
|
465 case outcome of |
|
466 NONE => (msg, SH_OK (time_isa, time_prover, used_facts)) |
|
467 | SOME _ => (msg, SH_FAIL (time_isa, time_prover)) |
|
468 end |
|
469 handle ERROR msg => ("error: " ^ msg, SH_ERROR) |
|
470 |
|
471 fun thms_of_name ctxt name = |
|
472 let |
|
473 val lex = Keyword.get_lexicons |
|
474 val get = maps (Proof_Context.get_fact ctxt o fst) |
|
475 in |
|
476 Source.of_string name |
|
477 |> Symbol.source |
|
478 |> Token.source {do_recover=SOME false} lex Position.start |
|
479 |> Token.source_proper |
|
480 |> Source.source Token.stopper (Parse_Spec.xthms1 >> get) NONE |
|
481 |> Source.exhaust |
|
482 end |
|
483 |
|
484 in |
|
485 |
|
486 fun run_sledgehammer trivial args reconstructor named_thms id |
|
487 ({pre=st, log, pos, ...}: Mirabelle.run_args) = |
|
488 let |
|
489 val triv_str = if trivial then "[T] " else "" |
|
490 val _ = change_data id inc_sh_calls |
|
491 val _ = if trivial then () else change_data id inc_sh_nontriv_calls |
|
492 val (prover_name, prover) = get_prover (Proof.context_of st) args |
|
493 val type_enc = AList.lookup (op =) args type_encK |> the_default "smart" |
|
494 val strict = AList.lookup (op =) args strictK |> the_default "false" |
|
495 val max_relevant = AList.lookup (op =) args max_relevantK |> the_default "smart" |
|
496 val slice = AList.lookup (op =) args sliceK |> the_default "true" |
|
497 val lam_trans = AList.lookup (op =) args lam_transK |
|
498 val uncurried_aliases = AList.lookup (op =) args uncurried_aliasesK |
|
499 val e_selection_heuristic = AList.lookup (op =) args e_selection_heuristicK |
|
500 val term_order = AList.lookup (op =) args term_orderK |
|
501 val force_sos = AList.lookup (op =) args force_sosK |
|
502 |> Option.map (curry (op <>) "false") |
|
503 val dir = AList.lookup (op =) args keepK |
|
504 val timeout = Mirabelle.get_int_setting args (prover_timeoutK, 30) |
|
505 (* always use a hard timeout, but give some slack so that the automatic |
|
506 minimizer has a chance to do its magic *) |
|
507 val preplay_timeout = AList.lookup (op =) args preplay_timeoutK |
|
508 |> the_default preplay_timeout_default |
|
509 val sh_minimizeLST = available_parameter args sh_minimizeK "minimize" |
|
510 val max_new_mono_instancesLST = |
|
511 available_parameter args max_new_mono_instancesK max_new_mono_instancesK |
|
512 val max_mono_itersLST = available_parameter args max_mono_itersK max_mono_itersK |
|
513 val hard_timeout = SOME (2 * timeout) |
|
514 val (msg, result) = |
|
515 run_sh prover_name prover type_enc strict max_relevant slice lam_trans |
|
516 uncurried_aliases e_selection_heuristic term_order force_sos |
|
517 hard_timeout timeout preplay_timeout sh_minimizeLST |
|
518 max_new_mono_instancesLST max_mono_itersLST dir pos st |
|
519 in |
|
520 case result of |
|
521 SH_OK (time_isa, time_prover, names) => |
|
522 let |
|
523 fun get_thms (name, stature) = |
|
524 try (thms_of_name (Proof.context_of st)) name |
|
525 |> Option.map (pair (name, stature)) |
|
526 in |
|
527 change_data id inc_sh_success; |
|
528 if trivial then () else change_data id inc_sh_nontriv_success; |
|
529 change_data id (inc_sh_lemmas (length names)); |
|
530 change_data id (inc_sh_max_lems (length names)); |
|
531 change_data id (inc_sh_time_isa time_isa); |
|
532 change_data id (inc_sh_time_prover time_prover); |
|
533 reconstructor := reconstructor_from_msg args msg; |
|
534 named_thms := SOME (map_filter get_thms names); |
|
535 log (sh_tag id ^ triv_str ^ "succeeded (" ^ string_of_int time_isa ^ "+" ^ |
|
536 string_of_int time_prover ^ ") [" ^ prover_name ^ "]:\n" ^ msg) |
|
537 end |
|
538 | SH_FAIL (time_isa, time_prover) => |
|
539 let |
|
540 val _ = change_data id (inc_sh_time_isa time_isa) |
|
541 val _ = change_data id (inc_sh_time_prover_fail time_prover) |
|
542 in log (sh_tag id ^ triv_str ^ "failed: " ^ msg) end |
|
543 | SH_ERROR => log (sh_tag id ^ "failed: " ^ msg) |
|
544 end |
|
545 |
|
546 end |
|
547 |
|
548 fun run_minimize args reconstructor named_thms id |
|
549 ({pre=st, log, ...}: Mirabelle.run_args) = |
|
550 let |
|
551 val ctxt = Proof.context_of st |
|
552 val n0 = length (these (!named_thms)) |
|
553 val (prover_name, _) = get_prover ctxt args |
|
554 val type_enc = AList.lookup (op =) args type_encK |> the_default "smart" |
|
555 val strict = AList.lookup (op =) args strictK |> the_default "false" |
|
556 val timeout = |
|
557 AList.lookup (op =) args minimize_timeoutK |
|
558 |> Option.map (fst o read_int o raw_explode) (* FIXME Symbol.explode (?) *) |
|
559 |> the_default 5 |
|
560 val preplay_timeout = AList.lookup (op =) args preplay_timeoutK |
|
561 |> the_default preplay_timeout_default |
|
562 val sh_minimizeLST = available_parameter args sh_minimizeK "minimize" |
|
563 val max_new_mono_instancesLST = |
|
564 available_parameter args max_new_mono_instancesK max_new_mono_instancesK |
|
565 val max_mono_itersLST = available_parameter args max_mono_itersK max_mono_itersK |
|
566 val params = Sledgehammer_Isar.default_params ctxt |
|
567 ([("provers", prover_name), |
|
568 ("verbose", "true"), |
|
569 ("type_enc", type_enc), |
|
570 ("strict", strict), |
|
571 ("timeout", string_of_int timeout), |
|
572 ("preplay_timeout", preplay_timeout)] |
|
573 |> sh_minimizeLST (*don't confuse the two minimization flags*) |
|
574 |> max_new_mono_instancesLST |
|
575 |> max_mono_itersLST) |
|
576 val minimize = |
|
577 Sledgehammer_Minimize.minimize_facts prover_name params |
|
578 true 1 (Sledgehammer_Util.subgoal_count st) |
|
579 val _ = log separator |
|
580 val (used_facts, (preplay, message, message_tail)) = |
|
581 minimize st (these (!named_thms)) |
|
582 val msg = message (preplay ()) ^ message_tail |
|
583 in |
|
584 case used_facts of |
|
585 SOME named_thms' => |
|
586 (change_data id inc_min_succs; |
|
587 change_data id (inc_min_ab_ratios ((100 * length named_thms') div n0)); |
|
588 if length named_thms' = n0 |
|
589 then log (minimize_tag id ^ "already minimal") |
|
590 else (reconstructor := reconstructor_from_msg args msg; |
|
591 named_thms := SOME named_thms'; |
|
592 log (minimize_tag id ^ "succeeded:\n" ^ msg)) |
|
593 ) |
|
594 | NONE => log (minimize_tag id ^ "failed: " ^ msg) |
|
595 end |
|
596 |
|
597 fun override_params prover type_enc timeout = |
|
598 [("provers", prover), |
|
599 ("max_relevant", "0"), |
|
600 ("type_enc", type_enc), |
|
601 ("strict", "true"), |
|
602 ("slice", "false"), |
|
603 ("timeout", timeout |> Time.toSeconds |> string_of_int)] |
|
604 |
|
605 fun run_reconstructor trivial full m name reconstructor named_thms id |
|
606 ({pre=st, timeout, log, pos, ...}: Mirabelle.run_args) = |
|
607 let |
|
608 fun do_reconstructor named_thms ctxt = |
|
609 let |
|
610 val ref_of_str = |
|
611 suffix ";" #> Outer_Syntax.scan Position.none #> Parse_Spec.xthm |
|
612 #> fst |
|
613 val thms = named_thms |> maps snd |
|
614 val facts = named_thms |> map (ref_of_str o fst o fst) |
|
615 val relevance_override = {add = facts, del = [], only = true} |
|
616 fun my_timeout time_slice = |
|
617 timeout |> Time.toReal |> curry Real.* time_slice |> Time.fromReal |
|
618 fun sledge_tac time_slice prover type_enc = |
|
619 Sledgehammer_Tactics.sledgehammer_as_oracle_tac ctxt |
|
620 (override_params prover type_enc (my_timeout time_slice)) |
|
621 relevance_override |
|
622 in |
|
623 if !reconstructor = "sledgehammer_tac" then |
|
624 sledge_tac 0.2 ATP_Systems.z3_tptpN "mono_native" |
|
625 ORELSE' sledge_tac 0.2 ATP_Systems.eN "mono_guards??" |
|
626 ORELSE' sledge_tac 0.2 ATP_Systems.vampireN "mono_guards??" |
|
627 ORELSE' sledge_tac 0.2 ATP_Systems.spassN "poly_tags" |
|
628 ORELSE' Metis_Tactic.metis_tac [] ATP_Problem_Generate.combsN |
|
629 ctxt thms |
|
630 else if !reconstructor = "smt" then |
|
631 SMT_Solver.smt_tac ctxt thms |
|
632 else if full then |
|
633 Metis_Tactic.metis_tac [ATP_Proof_Reconstruct.full_typesN] |
|
634 ATP_Proof_Reconstruct.metis_default_lam_trans ctxt thms |
|
635 else if String.isPrefix "metis (" (!reconstructor) then |
|
636 let |
|
637 val (type_encs, lam_trans) = |
|
638 !reconstructor |
|
639 |> Outer_Syntax.scan Position.start |
|
640 |> filter Token.is_proper |> tl |
|
641 |> Metis_Tactic.parse_metis_options |> fst |
|
642 |>> the_default [ATP_Proof_Reconstruct.partial_typesN] |
|
643 ||> the_default ATP_Proof_Reconstruct.metis_default_lam_trans |
|
644 in Metis_Tactic.metis_tac type_encs lam_trans ctxt thms end |
|
645 else if !reconstructor = "metis" then |
|
646 Metis_Tactic.metis_tac [] ATP_Proof_Reconstruct.metis_default_lam_trans ctxt |
|
647 thms |
|
648 else |
|
649 K all_tac |
|
650 end |
|
651 fun apply_reconstructor named_thms = |
|
652 Mirabelle.can_apply timeout (do_reconstructor named_thms) st |
|
653 |
|
654 fun with_time (false, t) = "failed (" ^ string_of_int t ^ ")" |
|
655 | with_time (true, t) = (change_data id (inc_reconstructor_success m); |
|
656 if trivial then () |
|
657 else change_data id (inc_reconstructor_nontriv_success m); |
|
658 change_data id (inc_reconstructor_lemmas m (length named_thms)); |
|
659 change_data id (inc_reconstructor_time m t); |
|
660 change_data id (inc_reconstructor_posns m (pos, trivial)); |
|
661 if name = "proof" then change_data id (inc_reconstructor_proofs m) |
|
662 else (); |
|
663 "succeeded (" ^ string_of_int t ^ ")") |
|
664 fun timed_reconstructor named_thms = |
|
665 (with_time (Mirabelle.cpu_time apply_reconstructor named_thms), true) |
|
666 handle TimeLimit.TimeOut => (change_data id (inc_reconstructor_timeout m); |
|
667 ("timeout", false)) |
|
668 | ERROR msg => ("error: " ^ msg, false) |
|
669 |
|
670 val _ = log separator |
|
671 val _ = change_data id (inc_reconstructor_calls m) |
|
672 val _ = if trivial then () |
|
673 else change_data id (inc_reconstructor_nontriv_calls m) |
|
674 in |
|
675 named_thms |
|
676 |> timed_reconstructor |
|
677 |>> log o prefix (reconstructor_tag reconstructor id) |
|
678 |> snd |
|
679 end |
|
680 |
|
681 val try_timeout = seconds 5.0 |
|
682 |
|
683 (* crude hack *) |
|
684 val num_sledgehammer_calls = Unsynchronized.ref 0 |
|
685 |
|
686 fun sledgehammer_action args id (st as {pre, name, ...}: Mirabelle.run_args) = |
|
687 let val goal = Thm.major_prem_of (#goal (Proof.goal pre)) in |
|
688 if can Logic.dest_conjunction goal orelse can Logic.dest_equals goal |
|
689 then () else |
|
690 let |
|
691 val max_calls = |
|
692 AList.lookup (op =) args max_callsK |> the_default "10000000" |
|
693 |> Int.fromString |> the |
|
694 val _ = num_sledgehammer_calls := !num_sledgehammer_calls + 1; |
|
695 in |
|
696 if !num_sledgehammer_calls > max_calls then () |
|
697 else |
|
698 let |
|
699 val reconstructor = Unsynchronized.ref "" |
|
700 val named_thms = |
|
701 Unsynchronized.ref (NONE : ((string * stature) * thm list) list option) |
|
702 val minimize = AList.defined (op =) args minimizeK |
|
703 val metis_ft = AList.defined (op =) args metis_ftK |
|
704 val trivial = |
|
705 if AList.lookup (op =) args check_trivialK |> the_default "false" |
|
706 |> Bool.fromString |> the then |
|
707 Try0.try0 (SOME try_timeout) ([], [], [], []) pre |
|
708 handle TimeLimit.TimeOut => false |
|
709 else false |
|
710 fun apply_reconstructor m1 m2 = |
|
711 if metis_ft |
|
712 then |
|
713 if not (Mirabelle.catch_result (reconstructor_tag reconstructor) false |
|
714 (run_reconstructor trivial false m1 name reconstructor |
|
715 (these (!named_thms))) id st) |
|
716 then |
|
717 (Mirabelle.catch_result (reconstructor_tag reconstructor) false |
|
718 (run_reconstructor trivial true m2 name reconstructor |
|
719 (these (!named_thms))) id st; ()) |
|
720 else () |
|
721 else |
|
722 (Mirabelle.catch_result (reconstructor_tag reconstructor) false |
|
723 (run_reconstructor trivial false m1 name reconstructor |
|
724 (these (!named_thms))) id st; ()) |
|
725 in |
|
726 change_data id (set_mini minimize); |
|
727 Mirabelle.catch sh_tag (run_sledgehammer trivial args reconstructor |
|
728 named_thms) id st; |
|
729 if is_some (!named_thms) |
|
730 then |
|
731 (apply_reconstructor Unminimized UnminimizedFT; |
|
732 if minimize andalso not (null (these (!named_thms))) |
|
733 then |
|
734 (Mirabelle.catch minimize_tag |
|
735 (run_minimize args reconstructor named_thms) id st; |
|
736 apply_reconstructor Minimized MinimizedFT) |
|
737 else ()) |
|
738 else () |
|
739 end |
|
740 end |
|
741 end |
|
742 |
|
743 fun invoke args = |
|
744 Mirabelle.register (init, sledgehammer_action args, done) |
|
745 |
|
746 end |
|