| author | wenzelm | 
| Thu, 06 Nov 2014 15:42:34 +0100 | |
| changeset 58922 | 1f500b18c4c6 | 
| parent 58205 | 369513534627 | 
| child 59058 | a78612c67ec0 | 
| permissions | -rw-r--r-- | 
| 48285 | 1  | 
(* Title: HOL/TPTP/mash_eval.ML  | 
| 48234 | 2  | 
Author: Jasmin Blanchette, TU Muenchen  | 
3  | 
Copyright 2012  | 
|
4  | 
||
| 48285 | 5  | 
Evaluate proof suggestions from MaSh (Machine-learning for Sledgehammer).  | 
| 48234 | 6  | 
*)  | 
7  | 
||
| 48285 | 8  | 
signature MASH_EVAL =  | 
| 48234 | 9  | 
sig  | 
| 55201 | 10  | 
type params = Sledgehammer_Prover.params  | 
| 
48251
 
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
 
blanchet 
parents: 
48250 
diff
changeset
 | 
11  | 
|
| 57456 | 12  | 
val evaluate_mash_suggestions : Proof.context -> params -> int * int option -> string option ->  | 
13  | 
string list -> string -> unit  | 
|
| 48234 | 14  | 
end;  | 
15  | 
||
| 48285 | 16  | 
structure MaSh_Eval : MASH_EVAL =  | 
| 48234 | 17  | 
struct  | 
| 48235 | 18  | 
|
| 50557 | 19  | 
open Sledgehammer_Util  | 
| 
48315
 
82d6e46c673f
fixed order of accessibles + other tweaks to MaSh
 
blanchet 
parents: 
48313 
diff
changeset
 | 
20  | 
open Sledgehammer_Fact  | 
| 50557 | 21  | 
open Sledgehammer_MePo  | 
| 48381 | 22  | 
open Sledgehammer_MaSh  | 
| 55201 | 23  | 
open Sledgehammer_Prover  | 
| 55212 | 24  | 
open Sledgehammer_Prover_ATP  | 
| 55198 | 25  | 
open Sledgehammer_Commands  | 
| 
57431
 
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
 
blanchet 
parents: 
57406 
diff
changeset
 | 
26  | 
open MaSh_Export  | 
| 48240 | 27  | 
|
| 
53980
 
7e6a82c593f4
made SML/NJ happy (NB: toplevel ML environment is unmanaged);
 
wenzelm 
parents: 
53139 
diff
changeset
 | 
28  | 
val prefix = Library.prefix  | 
| 
 
7e6a82c593f4
made SML/NJ happy (NB: toplevel ML environment is unmanaged);
 
wenzelm 
parents: 
53139 
diff
changeset
 | 
29  | 
|
| 57456 | 30  | 
fun evaluate_mash_suggestions ctxt params range prob_dir_name file_names report_file_name =  | 
| 48235 | 31  | 
let  | 
| 55205 | 32  | 
val thy = Proof_Context.theory_of ctxt  | 
| 51028 | 33  | 
val zeros = [0, 0, 0, 0, 0, 0]  | 
| 
50448
 
0a740d127187
have MaSh evaluator keep all raw problem/solution files in a directory
 
blanchet 
parents: 
50442 
diff
changeset
 | 
34  | 
val report_path = report_file_name |> Path.explode  | 
| 
 
0a740d127187
have MaSh evaluator keep all raw problem/solution files in a directory
 
blanchet 
parents: 
50442 
diff
changeset
 | 
35  | 
val _ = File.write report_path ""  | 
| 57432 | 36  | 
|
| 50589 | 37  | 
fun print s = File.append report_path (s ^ "\n")  | 
| 57432 | 38  | 
|
| 55205 | 39  | 
    val {provers, max_facts, slice, type_enc, lam_trans, timeout, ...} = default_params thy []
 | 
| 48318 | 40  | 
val prover = hd provers  | 
| 
57108
 
dc0b4f50e288
more generous max number of suggestions, for more safety
 
blanchet 
parents: 
55212 
diff
changeset
 | 
41  | 
val max_suggs = generous_max_suggestions (the max_facts)  | 
| 57456 | 42  | 
|
43  | 
val method_of_file_name =  | 
|
44  | 
perhaps (try (unsuffix "_suggestions")) o List.last o space_explode "/"  | 
|
45  | 
||
46  | 
val methods = "isar" :: map method_of_file_name file_names  | 
|
| 
50964
 
2a990baa09af
use precomputed MaSh/MePo data whenever available
 
blanchet 
parents: 
50953 
diff
changeset
 | 
47  | 
val lines_of = Path.explode #> try File.read_lines #> these  | 
| 57456 | 48  | 
val liness0 = map lines_of file_names  | 
49  | 
val num_lines = fold (Integer.max o length) liness0 0  | 
|
| 57432 | 50  | 
|
| 
50964
 
2a990baa09af
use precomputed MaSh/MePo data whenever available
 
blanchet 
parents: 
50953 
diff
changeset
 | 
51  | 
fun pad lines = lines @ replicate (num_lines - length lines) ""  | 
| 57432 | 52  | 
|
| 58205 | 53  | 
val liness' = Ctr_Sugar_Util.transpose (map pad liness0)  | 
| 57456 | 54  | 
|
| 50557 | 55  | 
val css = clasimpset_rule_table_of ctxt  | 
| 58922 | 56  | 
val facts = all_facts ctxt true false Keyword.empty_keywords [] [] css  | 
| 
50735
 
6b232d76cbc9
refined class handling, to prevent cycles in fact graph
 
blanchet 
parents: 
50633 
diff
changeset
 | 
57  | 
val name_tabs = build_name_tables nickname_of_thm facts  | 
| 57432 | 58  | 
|
| 48289 | 59  | 
fun with_index facts s = (find_index (curry (op =) s) facts + 1, s)  | 
| 50952 | 60  | 
fun index_str (j, s) = s ^ "@" ^ string_of_int j  | 
| 50953 | 61  | 
val str_of_method = enclose " " ": "  | 
| 57432 | 62  | 
|
63  | 
    fun str_of_result method facts ({outcome, run_time, used_facts, ...} : prover_result) =
 | 
|
| 
51004
 
5f2788c38127
distinguish raw and non-raw facts, using raw for 10 000s of facts and non-raw after selection of some hundreds
 
blanchet 
parents: 
50967 
diff
changeset
 | 
64  | 
let val facts = facts |> map (fst o fst) in  | 
| 50953 | 65  | 
str_of_method method ^  | 
| 48289 | 66  | 
(if is_none outcome then  | 
| 
52031
 
9a9238342963
tuning -- renamed '_from_' to '_of_' in Sledgehammer
 
blanchet 
parents: 
51199 
diff
changeset
 | 
67  | 
           "Success (" ^ ATP_Util.string_of_time run_time ^ "): " ^
 | 
| 48289 | 68  | 
(used_facts |> map (with_index facts o fst)  | 
69  | 
|> sort (int_ord o pairself fst)  | 
|
| 50952 | 70  | 
|> map index_str  | 
| 48289 | 71  | 
|> space_implode " ") ^  | 
| 48293 | 72  | 
(if length facts < the max_facts then  | 
| 48289 | 73  | 
" (of " ^ string_of_int (length facts) ^ ")"  | 
74  | 
else  | 
|
75  | 
"")  | 
|
76  | 
else  | 
|
77  | 
"Failure: " ^  | 
|
| 48293 | 78  | 
(facts |> take (the max_facts) |> tag_list 1  | 
| 50952 | 79  | 
|> map index_str  | 
| 48289 | 80  | 
|> space_implode " "))  | 
81  | 
end  | 
|
| 57432 | 82  | 
|
| 57456 | 83  | 
fun solve_goal (j, lines) =  | 
| 58205 | 84  | 
if in_range range j andalso exists (curry (op <>) "") lines then  | 
| 
50559
 
89c0d2f13cca
MaSh exporter can now export subsets of the facts, as consecutive ranges
 
blanchet 
parents: 
50557 
diff
changeset
 | 
85  | 
let  | 
| 
57125
 
2f620ef839ee
added another way of invoking Python code, for experiments
 
blanchet 
parents: 
57108 
diff
changeset
 | 
86  | 
val get_suggs = extract_suggestions ##> (take max_suggs #> map fst)  | 
| 57456 | 87  | 
val (names, suggss0) = split_list (map get_suggs lines)  | 
| 58205 | 88  | 
val name =  | 
89  | 
(case names |> filter (curry (op <>) "") |> distinct (op =) of  | 
|
90  | 
[name] => name  | 
|
91  | 
            | names => error ("Input files out of sync: facts " ^ commas (map quote names) ^ "."))
 | 
|
| 
50559
 
89c0d2f13cca
MaSh exporter can now export subsets of the facts, as consecutive ranges
 
blanchet 
parents: 
50557 
diff
changeset
 | 
92  | 
val th =  | 
| 
50624
 
4d0997abce79
improved thm order hack, in case the default names are overridden
 
blanchet 
parents: 
50620 
diff
changeset
 | 
93  | 
case find_first (fn (_, th) => nickname_of_thm th = name) facts of  | 
| 
50559
 
89c0d2f13cca
MaSh exporter can now export subsets of the facts, as consecutive ranges
 
blanchet 
parents: 
50557 
diff
changeset
 | 
94  | 
SOME (_, th) => th  | 
| 
 
89c0d2f13cca
MaSh exporter can now export subsets of the facts, as consecutive ranges
 
blanchet 
parents: 
50557 
diff
changeset
 | 
95  | 
            | NONE => error ("No fact called \"" ^ name ^ "\".")
 | 
| 
 
89c0d2f13cca
MaSh exporter can now export subsets of the facts, as consecutive ranges
 
blanchet 
parents: 
50557 
diff
changeset
 | 
96  | 
val goal = goal_of_thm (Proof_Context.theory_of ctxt) th  | 
| 
52196
 
2281f33e8da6
redid rac7830871177 to avoid duplicate fixed variable (e.g. lemma "P (a::nat)" proof - have "!!a::int. Q a" sledgehammer [e])
 
blanchet 
parents: 
52125 
diff
changeset
 | 
97  | 
val (_, hyp_ts, concl_t) = ATP_Util.strip_subgoal goal 1 ctxt  | 
| 
57306
 
ff10067b2248
optimized traversal of proof terms by skipping bad apples (e.g. full_exhaustive_int'.pinduct)
 
blanchet 
parents: 
57150 
diff
changeset
 | 
98  | 
val isar_deps = these (isar_dependencies_of name_tabs th)  | 
| 57456 | 99  | 
val suggss = isar_deps :: suggss0  | 
| 57406 | 100  | 
val facts = facts |> filter (fn (_, th') => thm_less (th', th))  | 
| 57432 | 101  | 
|
| 
50559
 
89c0d2f13cca
MaSh exporter can now export subsets of the facts, as consecutive ranges
 
blanchet 
parents: 
50557 
diff
changeset
 | 
102  | 
(* adapted from "mirabelle_sledgehammer.ML" *)  | 
| 50953 | 103  | 
fun set_file_name method (SOME dir) =  | 
| 
50559
 
89c0d2f13cca
MaSh exporter can now export subsets of the facts, as consecutive ranges
 
blanchet 
parents: 
50557 
diff
changeset
 | 
104  | 
let  | 
| 57432 | 105  | 
val prob_prefix = "goal_" ^ string_of_int j ^ "__" ^ encode_str name ^ "__" ^ method  | 
| 
50559
 
89c0d2f13cca
MaSh exporter can now export subsets of the facts, as consecutive ranges
 
blanchet 
parents: 
50557 
diff
changeset
 | 
106  | 
in  | 
| 55212 | 107  | 
Config.put atp_dest_dir dir  | 
108  | 
#> Config.put atp_problem_prefix (prob_prefix ^ "__")  | 
|
| 58061 | 109  | 
#> Config.put SMT_Config.debug_files (dir ^ "/" ^ prob_prefix)  | 
| 
50559
 
89c0d2f13cca
MaSh exporter can now export subsets of the facts, as consecutive ranges
 
blanchet 
parents: 
50557 
diff
changeset
 | 
110  | 
end  | 
| 
 
89c0d2f13cca
MaSh exporter can now export subsets of the facts, as consecutive ranges
 
blanchet 
parents: 
50557 
diff
changeset
 | 
111  | 
| set_file_name _ NONE = I  | 
| 57432 | 112  | 
|
| 53139 | 113  | 
fun prove method suggs =  | 
| 57456 | 114  | 
if null facts then  | 
| 50953 | 115  | 
(str_of_method method ^ "Skipped", 0)  | 
| 50952 | 116  | 
else  | 
117  | 
let  | 
|
118  | 
fun nickify ((_, stature), th) =  | 
|
119  | 
((K (encode_str (nickname_of_thm th)), stature), th)  | 
|
| 57432 | 120  | 
|
| 50952 | 121  | 
val facts =  | 
| 53139 | 122  | 
suggs  | 
123  | 
|> find_suggested_facts ctxt facts  | 
|
124  | 
|> map (fact_of_raw_fact #> nickify)  | 
|
| 50952 | 125  | 
|> maybe_instantiate_inducts ctxt hyp_ts concl_t  | 
126  | 
|> take (the max_facts)  | 
|
| 
51004
 
5f2788c38127
distinguish raw and non-raw facts, using raw for 10 000s of facts and non-raw after selection of some hundreds
 
blanchet 
parents: 
50967 
diff
changeset
 | 
127  | 
|> map fact_of_raw_fact  | 
| 50953 | 128  | 
val ctxt = ctxt |> set_file_name method prob_dir_name  | 
| 57434 | 129  | 
                val res as {outcome, ...} = run_prover_for_mash ctxt params prover name facts goal
 | 
| 50952 | 130  | 
val ok = if is_none outcome then 1 else 0  | 
| 57432 | 131  | 
in  | 
132  | 
(str_of_result method facts res, ok)  | 
|
133  | 
end  | 
|
134  | 
||
| 57456 | 135  | 
val ress = map2 prove methods suggss  | 
| 
50559
 
89c0d2f13cca
MaSh exporter can now export subsets of the facts, as consecutive ranges
 
blanchet 
parents: 
50557 
diff
changeset
 | 
136  | 
in  | 
| 50591 | 137  | 
"Goal " ^ string_of_int j ^ ": " ^ name :: map fst ress  | 
| 
50587
 
bd6582be1562
synchronize access to shared reference and print proper total
 
blanchet 
parents: 
50563 
diff
changeset
 | 
138  | 
|> cat_lines |> print;  | 
| 50591 | 139  | 
map snd ress  | 
| 
50559
 
89c0d2f13cca
MaSh exporter can now export subsets of the facts, as consecutive ranges
 
blanchet 
parents: 
50557 
diff
changeset
 | 
140  | 
end  | 
| 
 
89c0d2f13cca
MaSh exporter can now export subsets of the facts, as consecutive ranges
 
blanchet 
parents: 
50557 
diff
changeset
 | 
141  | 
else  | 
| 51028 | 142  | 
zeros  | 
| 57432 | 143  | 
|
| 50557 | 144  | 
val inst_inducts = Config.get ctxt instantiate_inducts  | 
| 48245 | 145  | 
val options =  | 
| 50767 | 146  | 
["prover = " ^ prover,  | 
147  | 
"max_facts = " ^ string_of_int (the max_facts),  | 
|
148  | 
"slice" |> not slice ? prefix "dont_",  | 
|
149  | 
"type_enc = " ^ the_default "smart" type_enc,  | 
|
150  | 
"lam_trans = " ^ the_default "smart" lam_trans,  | 
|
| 
54816
 
10d48c2a3e32
made timeouts in Sledgehammer not be 'option's -- simplified lots of code
 
blanchet 
parents: 
54141 
diff
changeset
 | 
151  | 
"timeout = " ^ ATP_Util.string_of_time timeout,  | 
| 48241 | 152  | 
"instantiate_inducts" |> not inst_inducts ? prefix "dont_"]  | 
| 
50587
 
bd6582be1562
synchronize access to shared reference and print proper total
 
blanchet 
parents: 
50563 
diff
changeset
 | 
153  | 
val _ = print " * * *";  | 
| 
 
bd6582be1562
synchronize access to shared reference and print proper total
 
blanchet 
parents: 
50563 
diff
changeset
 | 
154  | 
    val _ = print ("Options: " ^ commas options);
 | 
| 58205 | 155  | 
val oks = Par_List.map solve_goal (tag_list 1 liness')  | 
| 50620 | 156  | 
val n = length oks  | 
| 57456 | 157  | 
|
158  | 
fun total_of method ok =  | 
|
159  | 
      str_of_method method ^ string_of_int ok ^ " (" ^ Real.fmt (StringCvt.FIX (SOME 1))
 | 
|
160  | 
(100.0 * Real.fromInt ok / Real.fromInt (Int.max (1, n))) ^ "%)"  | 
|
161  | 
||
162  | 
val oks' = if n = 0 then zeros else map Integer.sum (map_transpose I oks)  | 
|
| 48241 | 163  | 
in  | 
| 57456 | 164  | 
"Successes (of " ^ string_of_int n ^ " goals)" ::  | 
165  | 
map2 total_of methods oks'  | 
|
| 50437 | 166  | 
|> cat_lines |> print  | 
| 48241 | 167  | 
end  | 
| 48235 | 168  | 
|
| 48234 | 169  | 
end;  |