author | boehmes |
Tue, 08 Sep 2009 09:57:33 +0200 | |
changeset 32541 | cea1716eb106 |
parent 32536 | ac56c62758d3 |
child 32549 | 338ccfd37f67 |
permissions | -rw-r--r-- |
32385
594890623c46
split actions from Mirabelle core (Mirabelle may thus be applied to basic theories in HOL)
boehmes
parents:
diff
changeset
|
1 |
(* Title: mirabelle_sledgehammer.ML |
594890623c46
split actions from Mirabelle core (Mirabelle may thus be applied to basic theories in HOL)
boehmes
parents:
diff
changeset
|
2 |
Author: Jasmin Blanchette and Sascha Boehme |
594890623c46
split actions from Mirabelle core (Mirabelle may thus be applied to basic theories in HOL)
boehmes
parents:
diff
changeset
|
3 |
*) |
594890623c46
split actions from Mirabelle core (Mirabelle may thus be applied to basic theories in HOL)
boehmes
parents:
diff
changeset
|
4 |
|
594890623c46
split actions from Mirabelle core (Mirabelle may thus be applied to basic theories in HOL)
boehmes
parents:
diff
changeset
|
5 |
structure Mirabelle_Sledgehammer : MIRABELLE_ACTION = |
594890623c46
split actions from Mirabelle core (Mirabelle may thus be applied to basic theories in HOL)
boehmes
parents:
diff
changeset
|
6 |
struct |
594890623c46
split actions from Mirabelle core (Mirabelle may thus be applied to basic theories in HOL)
boehmes
parents:
diff
changeset
|
7 |
|
32521 | 8 |
val proverK = "prover" |
32541 | 9 |
val prover_timeoutK = "prover_timeout" |
32521 | 10 |
val keepK = "keep" |
11 |
val metisK = "metis" |
|
12 |
val full_typesK = "full_types" |
|
32525 | 13 |
val minimizeK = "minimize" |
14 |
val minimize_timeoutK = "minimize_timeout" |
|
32521 | 15 |
|
16 |
fun sh_tag id = "#" ^ string_of_int id ^ " sledgehammer: " |
|
32525 | 17 |
fun minimize_tag id = "#" ^ string_of_int id ^ " minimize (sledgehammer): " |
32521 | 18 |
fun metis_tag id = "#" ^ string_of_int id ^ " metis (sledgehammer): " |
19 |
||
32525 | 20 |
val separator = "-----" |
21 |
||
32521 | 22 |
|
23 |
datatype data = Data of { |
|
24 |
sh_calls: int, |
|
25 |
sh_success: int, |
|
32533 | 26 |
sh_time_isa: int, |
27 |
sh_time_atp: int, |
|
32536 | 28 |
sh_time_atp_fail: int, |
32521 | 29 |
metis_calls: int, |
30 |
metis_success: int, |
|
31 |
metis_time: int, |
|
32 |
metis_timeout: int } |
|
33 |
||
32536 | 34 |
fun make_data (sh_calls, sh_success, sh_time_isa, sh_time_atp, sh_time_atp_fail, metis_calls, metis_success, |
32521 | 35 |
metis_time, metis_timeout) = |
32533 | 36 |
Data {sh_calls=sh_calls, sh_success=sh_success, sh_time_isa=sh_time_isa, |
32536 | 37 |
sh_time_atp=sh_time_atp, sh_time_atp_fail=sh_time_atp_fail, |
32521 | 38 |
metis_calls=metis_calls, metis_success=metis_success, |
39 |
metis_time=metis_time, metis_timeout=metis_timeout} |
|
40 |
||
32536 | 41 |
fun map_data f (Data {sh_calls, sh_success, sh_time_isa, sh_time_atp, sh_time_atp_fail, metis_calls, |
32521 | 42 |
metis_success, metis_time, metis_timeout}) = |
32536 | 43 |
make_data (f (sh_calls, sh_success, sh_time_isa, sh_time_atp, sh_time_atp_fail, metis_calls, metis_success, |
32521 | 44 |
metis_time, metis_timeout)) |
45 |
||
32536 | 46 |
val empty_data = make_data (0, 0, 0, 0, 0, 0, 0, 0, 0) |
32521 | 47 |
|
32536 | 48 |
val inc_sh_calls = map_data (fn (sh_calls, sh_success, sh_time_isa, sh_time_atp, sh_time_atp_fail, metis_calls, |
32521 | 49 |
metis_success, metis_time, metis_timeout) => (sh_calls + 1, sh_success, |
32536 | 50 |
sh_time_isa, sh_time_atp, sh_time_atp_fail, metis_calls, metis_success, metis_time, metis_timeout)) |
32521 | 51 |
|
32536 | 52 |
val inc_sh_success = map_data (fn (sh_calls, sh_success, sh_time_isa, sh_time_atp, sh_time_atp_fail, metis_calls, |
32521 | 53 |
metis_success, metis_time, metis_timeout) => (sh_calls, sh_success + 1, |
32536 | 54 |
sh_time_isa, sh_time_atp, sh_time_atp_fail, metis_calls, metis_success, metis_time, metis_timeout)) |
32521 | 55 |
|
32536 | 56 |
fun inc_sh_time_isa t = map_data (fn (sh_calls, sh_success, sh_time_isa, sh_time_atp, sh_time_atp_fail, metis_calls, |
32521 | 57 |
metis_success, metis_time, metis_timeout) => (sh_calls, sh_success, |
32536 | 58 |
sh_time_isa + t, sh_time_atp, sh_time_atp_fail, metis_calls, metis_success, metis_time, metis_timeout)) |
32521 | 59 |
|
32536 | 60 |
fun inc_sh_time_atp t = map_data (fn (sh_calls, sh_success, sh_time_isa, sh_time_atp, sh_time_atp_fail, metis_calls, |
32521 | 61 |
metis_success, metis_time, metis_timeout) => (sh_calls, sh_success, |
32536 | 62 |
sh_time_isa, sh_time_atp + t, sh_time_atp_fail, metis_calls, metis_success, metis_time, metis_timeout)) |
32521 | 63 |
|
32536 | 64 |
fun inc_sh_time_atp_fail t = map_data (fn (sh_calls, sh_success, sh_time_isa, sh_time_atp, sh_time_atp_fail, metis_calls, |
32533 | 65 |
metis_success, metis_time, metis_timeout) => (sh_calls, sh_success, |
32536 | 66 |
sh_time_isa, sh_time_atp, sh_time_atp_fail + t, metis_calls, metis_success, metis_time, metis_timeout)) |
32533 | 67 |
|
32536 | 68 |
val inc_metis_calls = map_data (fn (sh_calls, sh_success, sh_time_isa, sh_time_atp, sh_time_atp_fail, metis_calls, |
69 |
metis_success, metis_time, metis_timeout) => (sh_calls, sh_success, |
|
70 |
sh_time_isa, sh_time_atp, sh_time_atp_fail, metis_calls + 1, metis_success, metis_time, metis_timeout)) |
|
71 |
||
72 |
val inc_metis_success = map_data (fn (sh_calls, sh_success, sh_time_isa, sh_time_atp, sh_time_atp_fail, |
|
32521 | 73 |
metis_calls, metis_success, metis_time, metis_timeout) => (sh_calls, |
32536 | 74 |
sh_success, sh_time_isa, sh_time_atp, sh_time_atp_fail, metis_calls, metis_success + 1, metis_time, |
32521 | 75 |
metis_timeout)) |
76 |
||
32536 | 77 |
fun inc_metis_time t = map_data (fn (sh_calls, sh_success, sh_time_isa, sh_time_atp, sh_time_atp_fail, |
32521 | 78 |
metis_calls, metis_success, metis_time, metis_timeout) => (sh_calls, |
32536 | 79 |
sh_success, sh_time_isa, sh_time_atp, sh_time_atp_fail, metis_calls, metis_success, metis_time + t, |
32521 | 80 |
metis_timeout)) |
81 |
||
32536 | 82 |
val inc_metis_timeout = map_data (fn (sh_calls, sh_success, sh_time_isa, sh_time_atp, sh_time_atp_fail, |
32521 | 83 |
metis_calls, metis_success, metis_time, metis_timeout) => (sh_calls, |
32536 | 84 |
sh_success, sh_time_isa, sh_time_atp, sh_time_atp_fail, metis_calls, metis_success, metis_time, |
32521 | 85 |
metis_timeout + 1)) |
86 |
||
87 |
||
88 |
local |
|
89 |
||
90 |
val str = string_of_int |
|
91 |
val str3 = Real.fmt (StringCvt.FIX (SOME 3)) |
|
92 |
fun percentage a b = string_of_int (a * 100 div b) |
|
93 |
fun time t = Real.fromInt t / 1000.0 |
|
94 |
fun avg_time t n = |
|
95 |
if n > 0 then (Real.fromInt t / 1000.0) / Real.fromInt n else 0.0 |
|
96 |
||
32536 | 97 |
fun log_sh_data log sh_calls sh_success sh_time_isa sh_time_atp sh_time_atp_fail = |
32521 | 98 |
(log ("Total number of sledgehammer calls: " ^ str sh_calls); |
99 |
log ("Number of successful sledgehammer calls: " ^ str sh_success); |
|
100 |
log ("Success rate: " ^ percentage sh_success sh_calls ^ "%"); |
|
32536 | 101 |
log ("Total time for sledgehammer calls (Isabelle): " ^ str3 (time sh_time_isa)); |
32533 | 102 |
log ("Total time for successful sledgehammer calls (ATP): " ^ str3 (time sh_time_atp)); |
32536 | 103 |
log ("Total time for failed sledgehammer calls (ATP): " ^ str3 (time sh_time_atp_fail)); |
104 |
log ("Average time for sledgehammer calls (Isabelle): " ^ |
|
105 |
str3 (avg_time sh_time_isa sh_calls)); |
|
32533 | 106 |
log ("Average time for successful sledgehammer calls (ATP): " ^ |
32536 | 107 |
str3 (avg_time sh_time_atp sh_success)); |
108 |
log ("Average time for failed sledgehammer calls (ATP): " ^ |
|
109 |
str3 (avg_time sh_time_atp_fail (sh_calls - sh_success))) |
|
32533 | 110 |
) |
32521 | 111 |
|
32533 | 112 |
fun log_metis_data log sh_calls sh_success metis_calls metis_success metis_time |
32521 | 113 |
metis_timeout = |
114 |
(log ("Total number of metis calls: " ^ str metis_calls); |
|
115 |
log ("Number of successful metis calls: " ^ str metis_success); |
|
116 |
log ("Number of metis timeouts: " ^ str metis_timeout); |
|
117 |
log ("Number of metis exceptions: " ^ |
|
118 |
str (sh_success - metis_success - metis_timeout)); |
|
32533 | 119 |
log ("Success rate: " ^ percentage metis_success sh_calls ^ "%"); |
32521 | 120 |
log ("Total time for successful metis calls: " ^ str3 (time metis_time)); |
121 |
log ("Average time for successful metis calls: " ^ |
|
122 |
str3 (avg_time metis_time metis_success))) |
|
123 |
||
124 |
in |
|
125 |
||
32536 | 126 |
fun log_data id log (Data {sh_calls, sh_success, sh_time_isa, sh_time_atp, sh_time_atp_fail, metis_calls, |
32521 | 127 |
metis_success, metis_time, metis_timeout}) = |
128 |
if sh_calls > 0 |
|
129 |
then |
|
130 |
(log ("\n\n\nReport #" ^ string_of_int id ^ ":\n"); |
|
32536 | 131 |
log_sh_data log sh_calls sh_success sh_time_isa sh_time_atp sh_time_atp_fail; |
32521 | 132 |
log ""; |
32533 | 133 |
if metis_calls > 0 then log_metis_data log sh_calls sh_success metis_calls |
32521 | 134 |
metis_success metis_time metis_timeout else ()) |
135 |
else () |
|
136 |
||
137 |
end |
|
138 |
||
139 |
||
140 |
(* Warning: we implicitly assume single-threaded execution here! *) |
|
141 |
val data = ref ([] : (int * data) list) |
|
142 |
||
143 |
fun init id thy = (change data (cons (id, empty_data)); thy) |
|
144 |
fun done id {log, ...} = |
|
145 |
AList.lookup (op =) (!data) id |
|
146 |
|> Option.map (log_data id log) |
|
147 |
|> K () |
|
148 |
||
149 |
fun change_data id f = (change data (AList.map_entry (op =) id f); ()) |
|
150 |
||
151 |
||
32525 | 152 |
fun get_atp thy args = |
153 |
AList.lookup (op =) args proverK |
|
154 |
|> the_default (hd (space_explode " " (AtpManager.get_atps ()))) |
|
155 |
|> (fn name => (name, the (AtpManager.get_prover name thy))) |
|
156 |
||
32521 | 157 |
local |
158 |
||
159 |
fun safe init done f x = |
|
160 |
let |
|
161 |
val y = init x |
|
162 |
val z = Exn.capture f y |
|
163 |
val _ = done y |
|
164 |
in Exn.release z end |
|
165 |
||
166 |
fun init_sh NONE = !AtpWrapper.destdir |
|
167 |
| init_sh (SOME path) = |
|
168 |
let |
|
169 |
(* Warning: we implicitly assume single-threaded execution here! *) |
|
170 |
val old = !AtpWrapper.destdir |
|
171 |
val _ = AtpWrapper.destdir := path |
|
172 |
in old end |
|
173 |
||
174 |
fun done_sh path = AtpWrapper.destdir := path |
|
175 |
||
32536 | 176 |
datatype sh_result = |
177 |
SH_OK of int * int * string list | |
|
178 |
SH_FAIL of int * int | |
|
179 |
SH_ERROR |
|
180 |
||
32525 | 181 |
fun run_sh (prover_name, prover) timeout st _ = |
32521 | 182 |
let |
32541 | 183 |
val atp = prover timeout NONE NONE prover_name 1 |
32536 | 184 |
val ((success, (message, thm_names), time_atp, _, _, _), time_isa) = |
32525 | 185 |
Mirabelle.cpu_time atp (Proof.get_goal st) |
32521 | 186 |
in |
32536 | 187 |
if success then (message, SH_OK (time_isa, time_atp, thm_names)) |
188 |
else (message, SH_FAIL(time_isa, time_atp)) |
|
32521 | 189 |
end |
32536 | 190 |
handle ResHolClause.TOO_TRIVIAL => ("trivial", SH_OK (0, 0, [])) |
191 |
| ERROR msg => ("error: " ^ msg, SH_ERROR) |
|
32521 | 192 |
|
32454
a1a5589207ad
Mirabelle: proper parsing of theorem names found by sledgehammer, respecting test intervals given along with file names
boehmes
parents:
32452
diff
changeset
|
193 |
fun thms_of_name ctxt name = |
a1a5589207ad
Mirabelle: proper parsing of theorem names found by sledgehammer, respecting test intervals given along with file names
boehmes
parents:
32452
diff
changeset
|
194 |
let |
a1a5589207ad
Mirabelle: proper parsing of theorem names found by sledgehammer, respecting test intervals given along with file names
boehmes
parents:
32452
diff
changeset
|
195 |
val lex = OuterKeyword.get_lexicons |
a1a5589207ad
Mirabelle: proper parsing of theorem names found by sledgehammer, respecting test intervals given along with file names
boehmes
parents:
32452
diff
changeset
|
196 |
val get = maps (ProofContext.get_fact ctxt o fst) |
a1a5589207ad
Mirabelle: proper parsing of theorem names found by sledgehammer, respecting test intervals given along with file names
boehmes
parents:
32452
diff
changeset
|
197 |
in |
a1a5589207ad
Mirabelle: proper parsing of theorem names found by sledgehammer, respecting test intervals given along with file names
boehmes
parents:
32452
diff
changeset
|
198 |
Source.of_string name |
a1a5589207ad
Mirabelle: proper parsing of theorem names found by sledgehammer, respecting test intervals given along with file names
boehmes
parents:
32452
diff
changeset
|
199 |
|> Symbol.source {do_recover=false} |
a1a5589207ad
Mirabelle: proper parsing of theorem names found by sledgehammer, respecting test intervals given along with file names
boehmes
parents:
32452
diff
changeset
|
200 |
|> OuterLex.source {do_recover=SOME false} lex Position.start |
a1a5589207ad
Mirabelle: proper parsing of theorem names found by sledgehammer, respecting test intervals given along with file names
boehmes
parents:
32452
diff
changeset
|
201 |
|> OuterLex.source_proper |
a1a5589207ad
Mirabelle: proper parsing of theorem names found by sledgehammer, respecting test intervals given along with file names
boehmes
parents:
32452
diff
changeset
|
202 |
|> Source.source OuterLex.stopper (SpecParse.xthms1 >> get) NONE |
a1a5589207ad
Mirabelle: proper parsing of theorem names found by sledgehammer, respecting test intervals given along with file names
boehmes
parents:
32452
diff
changeset
|
203 |
|> Source.exhaust |
a1a5589207ad
Mirabelle: proper parsing of theorem names found by sledgehammer, respecting test intervals given along with file names
boehmes
parents:
32452
diff
changeset
|
204 |
end |
32452
d84edd022efe
apply metis with found theorems in case sledgehammer was successful
boehmes
parents:
32434
diff
changeset
|
205 |
|
32498
1132c7c13f36
Mirabelle: actions are responsible for handling exceptions,
boehmes
parents:
32496
diff
changeset
|
206 |
in |
1132c7c13f36
Mirabelle: actions are responsible for handling exceptions,
boehmes
parents:
32496
diff
changeset
|
207 |
|
32541 | 208 |
fun run_sledgehammer args named_thms id {pre=st, log, ...} = |
32385
594890623c46
split actions from Mirabelle core (Mirabelle may thus be applied to basic theories in HOL)
boehmes
parents:
diff
changeset
|
209 |
let |
32536 | 210 |
val _ = change_data id inc_sh_calls |
32525 | 211 |
val atp as (prover_name, _) = get_atp (Proof.theory_of st) args |
212 |
val dir = AList.lookup (op =) args keepK |
|
32541 | 213 |
val timeout = Mirabelle.get_int_setting args (prover_timeoutK, 30) |
32525 | 214 |
val (msg, result) = safe init_sh done_sh (run_sh atp timeout st) dir |
215 |
in |
|
32536 | 216 |
case result of |
217 |
SH_OK (time_isa, time_atp, names) => |
|
32525 | 218 |
let |
219 |
val _ = change_data id inc_sh_success |
|
32536 | 220 |
val _ = change_data id (inc_sh_time_isa time_isa) |
221 |
val _ = change_data id (inc_sh_time_atp time_atp) |
|
32525 | 222 |
|
223 |
fun get_thms name = (name, thms_of_name (Proof.context_of st) name) |
|
224 |
val _ = named_thms := SOME (map get_thms names) |
|
225 |
in |
|
32536 | 226 |
log (sh_tag id ^ "succeeded (" ^ string_of_int time_isa ^ "+" ^ |
227 |
string_of_int time_atp ^ ") [" ^ prover_name ^ "]:\n" ^ msg) |
|
32525 | 228 |
end |
32536 | 229 |
| SH_FAIL (time_isa, time_atp) => |
230 |
let |
|
231 |
val _ = change_data id (inc_sh_time_isa time_isa) |
|
232 |
val _ = change_data id (inc_sh_time_atp_fail time_atp) |
|
233 |
in log (sh_tag id ^ "failed: " ^ msg) end |
|
234 |
| SH_ERROR => log (sh_tag id ^ "failed: " ^ msg) |
|
32525 | 235 |
end |
236 |
||
237 |
end |
|
238 |
||
32521 | 239 |
|
32525 | 240 |
fun run_minimize args named_thms id {pre=st, log, ...} = |
241 |
let |
|
242 |
val (prover_name, prover) = get_atp (Proof.theory_of st) args |
|
243 |
val minimize = AtpMinimal.minimalize prover prover_name |
|
244 |
val timeout = |
|
245 |
AList.lookup (op =) args minimize_timeoutK |
|
246 |
|> Option.map (fst o read_int o explode) |
|
247 |
|> the_default 5 |
|
248 |
val _ = log separator |
|
249 |
in |
|
250 |
(case minimize timeout st (these (!named_thms)) of |
|
251 |
(SOME named_thms', msg) => |
|
252 |
if length named_thms' = length (these (!named_thms)) |
|
253 |
then log (minimize_tag id ^ "already minimal") |
|
254 |
else |
|
255 |
(named_thms := SOME named_thms'; |
|
256 |
log (minimize_tag id ^ "succeeded:\n" ^ msg)) |
|
257 |
| (NONE, msg) => log (minimize_tag id ^ "failed: " ^ msg)) |
|
258 |
end |
|
259 |
||
260 |
||
261 |
fun run_metis args named_thms id {pre=st, timeout, log, ...} = |
|
262 |
let |
|
32521 | 263 |
fun metis thms ctxt = MetisTools.metis_tac ctxt thms |
264 |
fun apply_metis thms = Mirabelle.can_apply timeout (metis thms) st |
|
265 |
||
266 |
fun with_time (false, t) = "failed (" ^ string_of_int t ^ ")" |
|
267 |
| with_time (true, t) = (change_data id inc_metis_success; |
|
268 |
change_data id (inc_metis_time t); |
|
269 |
"succeeded (" ^ string_of_int t ^ ")") |
|
270 |
fun timed_metis thms = with_time (Mirabelle.cpu_time apply_metis thms) |
|
271 |
handle TimeLimit.TimeOut => (change_data id inc_metis_timeout; "timeout") |
|
272 |
| ERROR msg => "error: " ^ msg |
|
273 |
||
32525 | 274 |
val _ = log separator |
32521 | 275 |
val _ = change_data id inc_metis_calls |
276 |
in |
|
32525 | 277 |
maps snd named_thms |
32521 | 278 |
|> timed_metis |
279 |
|> log o prefix (metis_tag id) |
|
280 |
end |
|
32385
594890623c46
split actions from Mirabelle core (Mirabelle may thus be applied to basic theories in HOL)
boehmes
parents:
diff
changeset
|
281 |
|
32452
d84edd022efe
apply metis with found theorems in case sledgehammer was successful
boehmes
parents:
32434
diff
changeset
|
282 |
|
32521 | 283 |
fun sledgehammer_action args id (st as {log, ...}) = |
32515
e7c0d3c0494a
Mirabelle: actions are responsible for catching exceptions and producing suitable log messages (makes log message uniform),
boehmes
parents:
32511
diff
changeset
|
284 |
let |
32525 | 285 |
val named_thms = ref (NONE : (string * thm list) list option) |
286 |
||
287 |
fun if_enabled k f = |
|
288 |
if AList.defined (op =) args k andalso is_some (!named_thms) |
|
289 |
then f id st else () |
|
290 |
||
291 |
val _ = Mirabelle.catch sh_tag (run_sledgehammer args named_thms) id st |
|
292 |
val _ = if_enabled minimizeK |
|
293 |
(Mirabelle.catch minimize_tag (run_minimize args named_thms)) |
|
294 |
val _ = if_enabled metisK |
|
295 |
(Mirabelle.catch metis_tag (run_metis args (these (!named_thms)))) |
|
296 |
in () end |
|
32385
594890623c46
split actions from Mirabelle core (Mirabelle may thus be applied to basic theories in HOL)
boehmes
parents:
diff
changeset
|
297 |
|
32511 | 298 |
fun invoke args = |
32515
e7c0d3c0494a
Mirabelle: actions are responsible for catching exceptions and producing suitable log messages (makes log message uniform),
boehmes
parents:
32511
diff
changeset
|
299 |
let |
32526 | 300 |
val args = (metisK,"yes") :: args; (* always enable metis *) |
32515
e7c0d3c0494a
Mirabelle: actions are responsible for catching exceptions and producing suitable log messages (makes log message uniform),
boehmes
parents:
32511
diff
changeset
|
301 |
val _ = AtpManager.set_full_types (AList.defined (op =) args full_typesK) |
32521 | 302 |
in Mirabelle.register (init, sledgehammer_action args, done) end |
32385
594890623c46
split actions from Mirabelle core (Mirabelle may thus be applied to basic theories in HOL)
boehmes
parents:
diff
changeset
|
303 |
|
594890623c46
split actions from Mirabelle core (Mirabelle may thus be applied to basic theories in HOL)
boehmes
parents:
diff
changeset
|
304 |
end |