58 |
59 |
59 fun with_path cleanup after f path = |
60 fun with_path cleanup after f path = |
60 Exn.capture f path |
61 Exn.capture f path |
61 |> tap (fn _ => cleanup path) |
62 |> tap (fn _ => cleanup path) |
62 |> Exn.release |
63 |> Exn.release |
63 |> tap (after path); |
64 |> tap (after path) |
64 |
65 |
65 fun find_known_failure known_failures proof = |
66 (* Splits by the first possible of a list of delimiters. *) |
66 case map_filter (fn (patterns, message) => |
67 fun extract_proof delims output = |
67 if exists (fn pattern => String.isSubstring pattern proof) |
68 case pairself (find_first (fn s => String.isSubstring s output)) |
68 patterns then |
69 (ListPair.unzip delims) of |
69 SOME message |
70 (SOME begin_delim, SOME end_delim) => |
70 else |
71 output |> first_field begin_delim |> the |> snd |
71 NONE) known_failures of |
72 |> first_field end_delim |> the |> fst |
72 [] => if is_proof_well_formed proof then "" |
73 | _ => "" |
73 else "Error: The ATP output is ill-formed." |
74 |
74 | (message :: _) => message |
75 fun extract_proof_or_failure proof_delims known_failures output = |
|
76 case map_filter |
|
77 (fn (patterns, message) => |
|
78 if exists (fn p => String.isSubstring p output) patterns then |
|
79 SOME message |
|
80 else |
|
81 NONE) known_failures of |
|
82 [] => (case extract_proof proof_delims output of |
|
83 "" => ("", "Error: The ATP output is malformed.") |
|
84 | proof => (proof, "")) |
|
85 | (message :: _) => ("", message) |
75 |
86 |
76 fun generic_prover overlord get_facts prepare write_file home executable args |
87 fun generic_prover overlord get_facts prepare write_file home executable args |
77 known_failures name |
88 proof_delims known_failures name |
78 ({debug, full_types, explicit_apply, isar_proof, modulus, sorts, ...} |
89 ({debug, full_types, explicit_apply, isar_proof, modulus, sorts, ...} |
79 : params) minimize_command |
90 : params) minimize_command |
80 ({subgoal, goal, relevance_override, axiom_clauses, filtered_clauses} |
91 ({subgoal, goal, relevance_override, axiom_clauses, filtered_clauses} |
81 : problem) = |
92 : problem) = |
82 let |
93 let |
133 else |
144 else |
134 "") |
145 "") |
135 fun split_time s = |
146 fun split_time s = |
136 let |
147 let |
137 val split = String.tokens (fn c => str c = "\n"); |
148 val split = String.tokens (fn c => str c = "\n"); |
138 val (proof, t) = s |> split |> split_last |> apfst cat_lines; |
149 val (output, t) = s |> split |> split_last |> apfst cat_lines; |
139 fun as_num f = f >> (fst o read_int); |
150 fun as_num f = f >> (fst o read_int); |
140 val num = as_num (Scan.many1 Symbol.is_ascii_digit); |
151 val num = as_num (Scan.many1 Symbol.is_ascii_digit); |
141 val digit = Scan.one Symbol.is_ascii_digit; |
152 val digit = Scan.one Symbol.is_ascii_digit; |
142 val num3 = as_num (digit ::: digit ::: (digit >> single)); |
153 val num3 = as_num (digit ::: digit ::: (digit >> single)); |
143 val time = num --| Scan.$$ "." -- num3 >> (fn (a, b) => a * 1000 + b); |
154 val time = num --| Scan.$$ "." -- num3 >> (fn (a, b) => a * 1000 + b); |
144 val as_time = the_default 0 o Scan.read Symbol.stopper time o explode; |
155 val as_time = the_default 0 o Scan.read Symbol.stopper time o explode; |
145 in (proof, as_time t) end; |
156 in (output, as_time t) end; |
146 fun split_time' s = |
157 fun split_time' s = |
147 if Config.get ctxt measure_runtime then split_time s else (s, 0) |
158 if Config.get ctxt measure_runtime then split_time s else (s, 0) |
148 fun run_on probfile = |
159 fun run_on probfile = |
149 if File.exists command then |
160 if File.exists command then |
150 write_file full_types explicit_apply probfile clauses |
161 write_file full_types explicit_apply probfile clauses |
152 else error ("Bad executable: " ^ Path.implode command ^ "."); |
163 else error ("Bad executable: " ^ Path.implode command ^ "."); |
153 |
164 |
154 (* If the problem file has not been exported, remove it; otherwise, export |
165 (* If the problem file has not been exported, remove it; otherwise, export |
155 the proof file too. *) |
166 the proof file too. *) |
156 fun cleanup probfile = if destdir' = "" then try File.rm probfile else NONE; |
167 fun cleanup probfile = if destdir' = "" then try File.rm probfile else NONE; |
157 fun export probfile (((proof, _), _), _) = |
168 fun export probfile (((output, _), _), _) = |
158 if destdir' = "" then |
169 if destdir' = "" then |
159 () |
170 () |
160 else |
171 else |
161 File.write (Path.explode (Path.implode probfile ^ "_proof")) |
172 File.write (Path.explode (Path.implode probfile ^ "_proof")) |
162 ((if overlord then |
173 ((if overlord then |
163 "% " ^ command_line probfile ^ "\n% " ^ timestamp () ^ |
174 "% " ^ command_line probfile ^ "\n% " ^ timestamp () ^ |
164 "\n" |
175 "\n" |
165 else |
176 else |
166 "") ^ proof) |
177 "") ^ output) |
167 |
178 |
168 val (((proof, atp_run_time_in_msecs), rc), _) = |
179 val (((output, atp_run_time_in_msecs), rc), _) = |
169 with_path cleanup export run_on (prob_pathname subgoal); |
180 with_path cleanup export run_on (prob_pathname subgoal); |
170 |
181 |
171 (* Check for success and print out some information on failure. *) |
182 (* Check for success and print out some information on failure. *) |
172 val failure = find_known_failure known_failures proof; |
183 val (proof, failure) = |
173 val success = rc = 0 andalso failure = ""; |
184 extract_proof_or_failure proof_delims known_failures output |
|
185 val success = (rc = 0 andalso failure = "") |
174 val (message, relevant_thm_names) = |
186 val (message, relevant_thm_names) = |
175 if success then |
187 if success then |
176 proof_text isar_proof debug modulus sorts ctxt |
188 proof_text isar_proof debug modulus sorts ctxt |
177 (minimize_command, proof, internal_thm_names, th, subgoal) |
189 (minimize_command, proof, internal_thm_names, th, subgoal) |
178 else if failure <> "" then |
190 else if failure <> "" then |
179 (failure ^ "\n", []) |
191 (failure ^ "\n", []) |
180 else |
192 else |
181 ("Unknown ATP error: " ^ proof ^ ".\n", []) |
193 ("Unknown ATP error: " ^ output ^ ".\n", []) |
182 in |
194 in |
183 {success = success, message = message, |
195 {success = success, message = message, |
184 relevant_thm_names = relevant_thm_names, |
196 relevant_thm_names = relevant_thm_names, |
185 atp_run_time_in_msecs = atp_run_time_in_msecs, proof = proof, |
197 atp_run_time_in_msecs = atp_run_time_in_msecs, output = output, |
186 internal_thm_names = internal_thm_names, |
198 proof = proof, internal_thm_names = internal_thm_names, |
187 filtered_clauses = the_filtered_clauses} |
199 filtered_clauses = the_filtered_clauses} |
188 end; |
200 end; |
189 |
201 |
190 |
202 |
191 (* generic TPTP-based provers *) |
203 (* generic TPTP-based provers *) |
192 |
204 |
193 fun generic_tptp_prover |
205 fun generic_tptp_prover |
194 (name, {home, executable, arguments, known_failures, max_new_clauses, |
206 (name, {home, executable, arguments, proof_delims, known_failures, |
195 prefers_theory_relevant}) |
207 max_new_clauses, prefers_theory_relevant}) |
196 (params as {debug, overlord, respect_no_atp, relevance_threshold, |
208 (params as {debug, overlord, respect_no_atp, relevance_threshold, |
197 convergence, theory_relevant, higher_order, follow_defs, |
209 convergence, theory_relevant, higher_order, follow_defs, |
198 isar_proof, ...}) |
210 isar_proof, ...}) |
199 minimize_command timeout = |
211 minimize_command timeout = |
200 generic_prover overlord |
212 generic_prover overlord |
201 (get_relevant_facts respect_no_atp relevance_threshold convergence |
213 (get_relevant_facts respect_no_atp relevance_threshold convergence |
202 higher_order follow_defs max_new_clauses |
214 higher_order follow_defs max_new_clauses |
203 (the_default prefers_theory_relevant theory_relevant)) |
215 (the_default prefers_theory_relevant theory_relevant)) |
204 (prepare_clauses higher_order false) |
216 (prepare_clauses higher_order false) |
205 (write_tptp_file (debug andalso overlord andalso not isar_proof)) home |
217 (write_tptp_file (debug andalso overlord andalso not isar_proof)) home |
206 executable (arguments timeout) known_failures name params minimize_command |
218 executable (arguments timeout) proof_delims known_failures name params |
|
219 minimize_command |
207 |
220 |
208 fun tptp_prover name p = (name, generic_tptp_prover (name, p)); |
221 fun tptp_prover name p = (name, generic_tptp_prover (name, p)); |
209 |
222 |
210 |
223 |
211 (** common provers **) |
224 (** common provers **) |
219 val vampire_config : prover_config = |
232 val vampire_config : prover_config = |
220 {home = getenv "VAMPIRE_HOME", |
233 {home = getenv "VAMPIRE_HOME", |
221 executable = "vampire", |
234 executable = "vampire", |
222 arguments = (fn timeout => "--output_syntax tptp --mode casc -t " ^ |
235 arguments = (fn timeout => "--output_syntax tptp --mode casc -t " ^ |
223 string_of_int (generous_to_secs timeout)), |
236 string_of_int (generous_to_secs timeout)), |
|
237 proof_delims = [("=========== Refutation ==========", |
|
238 "======= End of refutation =======")], |
224 known_failures = |
239 known_failures = |
225 [(["Satisfiability detected", "CANNOT PROVE"], |
240 [(["Satisfiability detected", "CANNOT PROVE"], |
226 "The ATP problem is unprovable."), |
241 "The ATP problem is unprovable."), |
227 (["Refutation not found"], |
242 (["Refutation not found"], |
228 "The ATP failed to determine the problem's status.")], |
243 "The ATP failed to determine the problem's status.")], |
231 val vampire = tptp_prover "vampire" vampire_config |
246 val vampire = tptp_prover "vampire" vampire_config |
232 |
247 |
233 |
248 |
234 (* E prover *) |
249 (* E prover *) |
235 |
250 |
|
251 val tstp_proof_delims = |
|
252 ("# SZS output start CNFRefutation.", "# SZS output end CNFRefutation") |
|
253 |
236 val e_config : prover_config = |
254 val e_config : prover_config = |
237 {home = getenv "E_HOME", |
255 {home = getenv "E_HOME", |
238 executable = "eproof", |
256 executable = "eproof", |
239 arguments = (fn timeout => "--tstp-in --tstp-out -l5 -xAutoDev \ |
257 arguments = (fn timeout => "--tstp-in --tstp-out -l5 -xAutoDev \ |
240 \-tAutoDev --silent --cpu-limit=" ^ |
258 \-tAutoDev --silent --cpu-limit=" ^ |
241 string_of_int (generous_to_secs timeout)), |
259 string_of_int (generous_to_secs timeout)), |
|
260 proof_delims = [tstp_proof_delims], |
242 known_failures = |
261 known_failures = |
243 [(["SZS status: Satisfiable", "SZS status Satisfiable"], |
262 [(["SZS status: Satisfiable", "SZS status Satisfiable"], |
244 "The ATP problem is unprovable."), |
263 "The ATP problem is unprovable."), |
245 (["SZS status: ResourceOut", "SZS status ResourceOut"], |
264 (["SZS status: ResourceOut", "SZS status ResourceOut"], |
246 "The ATP ran out of resources."), |
265 "The ATP ran out of resources."), |
252 |
271 |
253 |
272 |
254 (* SPASS *) |
273 (* SPASS *) |
255 |
274 |
256 fun generic_dfg_prover |
275 fun generic_dfg_prover |
257 (name, {home, executable, arguments, known_failures, max_new_clauses, |
276 (name, {home, executable, arguments, proof_delims, known_failures, |
258 prefers_theory_relevant}) |
277 max_new_clauses, prefers_theory_relevant}) |
259 (params as {overlord, respect_no_atp, relevance_threshold, convergence, |
278 (params as {overlord, respect_no_atp, relevance_threshold, convergence, |
260 theory_relevant, higher_order, follow_defs, ...}) |
279 theory_relevant, higher_order, follow_defs, ...}) |
261 minimize_command timeout = |
280 minimize_command timeout = |
262 generic_prover overlord |
281 generic_prover overlord |
263 (get_relevant_facts respect_no_atp relevance_threshold convergence |
282 (get_relevant_facts respect_no_atp relevance_threshold convergence |
264 higher_order follow_defs max_new_clauses |
283 higher_order follow_defs max_new_clauses |
265 (the_default prefers_theory_relevant theory_relevant)) |
284 (the_default prefers_theory_relevant theory_relevant)) |
266 (prepare_clauses higher_order true) write_dfg_file home executable |
285 (prepare_clauses higher_order true) write_dfg_file home executable |
267 (arguments timeout) known_failures name params minimize_command |
286 (arguments timeout) proof_delims known_failures name params |
|
287 minimize_command |
268 |
288 |
269 fun dfg_prover name p = (name, generic_dfg_prover (name, p)) |
289 fun dfg_prover name p = (name, generic_dfg_prover (name, p)) |
270 |
290 |
271 (* The "-VarWeight=3" option helps the higher-order problems, probably by |
291 (* The "-VarWeight=3" option helps the higher-order problems, probably by |
272 counteracting the presence of "hAPP". *) |
292 counteracting the presence of "hAPP". *) |
274 {home = getenv "SPASS_HOME", |
294 {home = getenv "SPASS_HOME", |
275 executable = "SPASS", |
295 executable = "SPASS", |
276 arguments = (fn timeout => "-Auto -SOS=1 -PGiven=0 -PProblem=0 -Splits=0" ^ |
296 arguments = (fn timeout => "-Auto -SOS=1 -PGiven=0 -PProblem=0 -Splits=0" ^ |
277 " -FullRed=0 -DocProof -VarWeight=3 -TimeLimit=" ^ |
297 " -FullRed=0 -DocProof -VarWeight=3 -TimeLimit=" ^ |
278 string_of_int (generous_to_secs timeout)), |
298 string_of_int (generous_to_secs timeout)), |
|
299 proof_delims = [("Here is a proof", "Formulae used in the proof")], |
279 known_failures = |
300 known_failures = |
280 [(["SPASS beiseite: Completion found."], "The ATP problem is unprovable."), |
301 [(["SPASS beiseite: Completion found."], "The ATP problem is unprovable."), |
281 (["SPASS beiseite: Ran out of time."], "The ATP timed out."), |
302 (["SPASS beiseite: Ran out of time."], "The ATP timed out."), |
282 (["SPASS beiseite: Maximal number of loops exceeded."], |
303 (["SPASS beiseite: Maximal number of loops exceeded."], |
283 "The ATP hit its loop limit.")], |
304 "The ATP hit its loop limit.")], |
295 |
316 |
296 val spass_tptp_config = |
317 val spass_tptp_config = |
297 {home = #home spass_config, |
318 {home = #home spass_config, |
298 executable = #executable spass_config, |
319 executable = #executable spass_config, |
299 arguments = prefix "-TPTP " o #arguments spass_config, |
320 arguments = prefix "-TPTP " o #arguments spass_config, |
|
321 proof_delims = #proof_delims spass_config, |
300 known_failures = |
322 known_failures = |
301 #known_failures spass_config @ |
323 #known_failures spass_config @ |
302 [(["unrecognized option `-TPTP'", "Unrecognized option TPTP"], |
324 [(["unrecognized option `-TPTP'", "Unrecognized option TPTP"], |
303 "Warning: Sledgehammer requires a more recent version of SPASS with \ |
325 "Warning: Sledgehammer requires a more recent version of SPASS with \ |
304 \support for the TPTP syntax. To install it, download and untar the \ |
326 \support for the TPTP syntax. To install it, download and untar the \ |
341 val remote_known_failures = |
363 val remote_known_failures = |
342 [(["Remote-script could not extract proof"], |
364 [(["Remote-script could not extract proof"], |
343 "Error: The remote ATP proof is ill-formed.")] |
365 "Error: The remote ATP proof is ill-formed.")] |
344 |
366 |
345 fun remote_prover_config prover_prefix args |
367 fun remote_prover_config prover_prefix args |
346 ({known_failures, max_new_clauses, prefers_theory_relevant, ...} |
368 ({proof_delims, known_failures, max_new_clauses, |
347 : prover_config) : prover_config = |
369 prefers_theory_relevant, ...} : prover_config) : prover_config = |
348 {home = getenv "ISABELLE_ATP_MANAGER", |
370 {home = getenv "ISABELLE_ATP_MANAGER", |
349 executable = "SystemOnTPTP", |
371 executable = "SystemOnTPTP", |
350 arguments = (fn timeout => |
372 arguments = (fn timeout => |
351 args ^ " -t " ^ string_of_int (generous_to_secs timeout) ^ " -s " ^ |
373 args ^ " -t " ^ string_of_int (generous_to_secs timeout) ^ " -s " ^ |
352 the_system prover_prefix), |
374 the_system prover_prefix), |
|
375 proof_delims = insert (op =) tstp_proof_delims proof_delims, |
353 known_failures = remote_known_failures @ known_failures, |
376 known_failures = remote_known_failures @ known_failures, |
354 max_new_clauses = max_new_clauses, |
377 max_new_clauses = max_new_clauses, |
355 prefers_theory_relevant = prefers_theory_relevant} |
378 prefers_theory_relevant = prefers_theory_relevant} |
356 |
379 |
357 val remote_vampire = |
380 val remote_vampire = |