author  blanchet 
Sun, 13 Jan 2013 15:04:55 +0100  
changeset 50860  e32a283b8ce0 
parent 50858  42c5fcc6f28f 
child 50861  fa4253914e98 
permissions  rwrr 
48380  1 
(* Title: HOL/Tools/Sledgehammer/sledgehammer_mash.ML 
48248  2 
Author: Jasmin Blanchette, TU Muenchen 
3 

4 
Sledgehammer's machinelearningbased relevance filter (MaSh). 

5 
*) 

6 

48381  7 
signature SLEDGEHAMMER_MASH = 
48248  8 
sig 
48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

9 
type stature = ATP_Problem_Generate.stature 
48296
e7f01b7e244e
gracefully handle the case of empty theories when going up the accessibility chain
blanchet
parents:
48293
diff
changeset

10 
type fact = Sledgehammer_Fact.fact 
e7f01b7e244e
gracefully handle the case of empty theories when going up the accessibility chain
blanchet
parents:
48293
diff
changeset

11 
type fact_override = Sledgehammer_Fact.fact_override 
48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

12 
type params = Sledgehammer_Provers.params 
48288
255c6e1fd505
rationalize relevance filter, slowing moving code from Iter to MaSh
blanchet
parents:
48251
diff
changeset

13 
type relevance_fudge = Sledgehammer_Provers.relevance_fudge 
48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

14 
type prover_result = Sledgehammer_Provers.prover_result 
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

15 

48308  16 
val trace : bool Config.T 
48319
340187063d84
use async manager to manage MaSh learners to make sure they get killed cleanly
blanchet
parents:
48318
diff
changeset

17 
val MaShN : string 
48379
2b5ad61e2ccc
renamed "iter" fact filter to "MePo" (MengPaulson)
blanchet
parents:
48378
diff
changeset

18 
val mepoN : string 
2b5ad61e2ccc
renamed "iter" fact filter to "MePo" (MengPaulson)
blanchet
parents:
48378
diff
changeset

19 
val mashN : string 
48314
ee33ba3c0e05
added option to control which fact filter is used
blanchet
parents:
48313
diff
changeset

20 
val meshN : string 
48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

21 
val unlearnN : string 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

22 
val learn_isarN : string 
50484
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

23 
val learn_proverN : string 
48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

24 
val relearn_isarN : string 
50484
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

25 
val relearn_proverN : string 
48314
ee33ba3c0e05
added option to control which fact filter is used
blanchet
parents:
48313
diff
changeset

26 
val fact_filters : string list 
50826  27 
val encode_str : string > string 
28 
val encode_strs : string list > string 

29 
val unencode_str : string > string 

30 
val unencode_strs : string > string list 

50356  31 
val encode_features : (string * real) list > string 
50633  32 
val extract_suggestions : string > string * (string * real) list 
50632
12c097ff3241
slightly more elegant naming convention (to keep lowlevel and highlevel APIs separated)
blanchet
parents:
50631
diff
changeset

33 

12c097ff3241
slightly more elegant naming convention (to keep lowlevel and highlevel APIs separated)
blanchet
parents:
50631
diff
changeset

34 
structure MaSh: 
12c097ff3241
slightly more elegant naming convention (to keep lowlevel and highlevel APIs separated)
blanchet
parents:
50631
diff
changeset

35 
sig 
12c097ff3241
slightly more elegant naming convention (to keep lowlevel and highlevel APIs separated)
blanchet
parents:
50631
diff
changeset

36 
val unlearn : Proof.context > unit 
12c097ff3241
slightly more elegant naming convention (to keep lowlevel and highlevel APIs separated)
blanchet
parents:
50631
diff
changeset

37 
val learn : 
12c097ff3241
slightly more elegant naming convention (to keep lowlevel and highlevel APIs separated)
blanchet
parents:
50631
diff
changeset

38 
Proof.context > bool 
12c097ff3241
slightly more elegant naming convention (to keep lowlevel and highlevel APIs separated)
blanchet
parents:
50631
diff
changeset

39 
> (string * string list * (string * real) list * string list) list > unit 
12c097ff3241
slightly more elegant naming convention (to keep lowlevel and highlevel APIs separated)
blanchet
parents:
50631
diff
changeset

40 
val relearn : 
12c097ff3241
slightly more elegant naming convention (to keep lowlevel and highlevel APIs separated)
blanchet
parents:
50631
diff
changeset

41 
Proof.context > bool > (string * string list) list > unit 
50633  42 
val suggest : 
50858  43 
Proof.context > bool > bool > int 
50826  44 
> string list * (string * real) list * string list 
50632
12c097ff3241
slightly more elegant naming convention (to keep lowlevel and highlevel APIs separated)
blanchet
parents:
50631
diff
changeset

45 
> (string * real) list 
12c097ff3241
slightly more elegant naming convention (to keep lowlevel and highlevel APIs separated)
blanchet
parents:
50631
diff
changeset

46 
end 
12c097ff3241
slightly more elegant naming convention (to keep lowlevel and highlevel APIs separated)
blanchet
parents:
50631
diff
changeset

47 

50311  48 
val mash_unlearn : Proof.context > unit 
50624
4d0997abce79
improved thm order hack, in case the default names are overridden
blanchet
parents:
50623
diff
changeset

49 
val nickname_of_thm : thm > string 
50412  50 
val find_suggested_facts : 
48406  51 
(string * 'a) list > ('b * thm) list > (('b * thm) * 'a) list 
48321  52 
val mesh_facts : 
50814  53 
('a * 'a > bool) > int > (real * (('a * real) list * 'a list)) list 
54 
> 'a list 

48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

55 
val theory_ord : theory * theory > order 
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

56 
val thm_ord : thm * thm > order 
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

57 
val goal_of_thm : theory > thm > thm 
48321  58 
val run_prover_for_mash : 
48318  59 
Proof.context > params > string > fact list > thm > prover_result 
48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

60 
val features_of : 
50356  61 
Proof.context > string > theory > stature > term list 
62 
> (string * real) list 

50754
74a6adcb96ac
also generate queries for goals with too many Isar dependencies
blanchet
parents:
50751
diff
changeset

63 
val trim_dependencies : thm > string list > string list option 
50735
6b232d76cbc9
refined class handling, to prevent cycles in fact graph
blanchet
parents:
50732
diff
changeset

64 
val isar_dependencies_of : 
50754
74a6adcb96ac
also generate queries for goals with too many Isar dependencies
blanchet
parents:
50751
diff
changeset

65 
string Symtab.table * string Symtab.table > thm > string list 
50484
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

66 
val prover_dependencies_of : 
50735
6b232d76cbc9
refined class handling, to prevent cycles in fact graph
blanchet
parents:
50732
diff
changeset

67 
Proof.context > params > string > int > fact list 
6b232d76cbc9
refined class handling, to prevent cycles in fact graph
blanchet
parents:
50732
diff
changeset

68 
> string Symtab.table * string Symtab.table > thm 
50754
74a6adcb96ac
also generate queries for goals with too many Isar dependencies
blanchet
parents:
50751
diff
changeset

69 
> bool * string list 
50608  70 
val weight_mash_facts : 'a list > ('a * real) list 
50412  71 
val find_mash_suggestions : 
72 
int > (Symtab.key * 'a) list > ('b * thm) list > ('b * thm) list 

50440
ca99c269ca3a
don't have MaSh pretend it knows facts it doesn't know
blanchet
parents:
50438
diff
changeset

73 
> ('b * thm) list > ('b * thm) list * ('b * thm) list 
48406  74 
val mash_suggested_facts : 
50383
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

75 
Proof.context > params > string > int > term list > term > fact list 
50440
ca99c269ca3a
don't have MaSh pretend it knows facts it doesn't know
blanchet
parents:
50438
diff
changeset

76 
> fact list * fact list 
48383  77 
val mash_learn_proof : 
48384
83dc102041e6
learn on explicit "min" command but do the learning in a thread, since it may take a couple of seconds
blanchet
parents:
48383
diff
changeset

78 
Proof.context > params > string > term > ('a * thm) list > thm list 
83dc102041e6
learn on explicit "min" command but do the learning in a thread, since it may take a couple of seconds
blanchet
parents:
48383
diff
changeset

79 
> unit 
48395
85a7fb65507a
learning should honor the fact override and the chained facts
blanchet
parents:
48394
diff
changeset

80 
val mash_learn : 
85a7fb65507a
learning should honor the fact override and the chained facts
blanchet
parents:
48394
diff
changeset

81 
Proof.context > params > fact_override > thm list > bool > unit 
50311  82 
val is_mash_enabled : unit > bool 
83 
val mash_can_suggest_facts : Proof.context > bool 

50412  84 
val generous_max_facts : int > int 
50814  85 
val mepo_weight : real 
86 
val mash_weight : real 

48288
255c6e1fd505
rationalize relevance filter, slowing moving code from Iter to MaSh
blanchet
parents:
48251
diff
changeset

87 
val relevant_facts : 
48292  88 
Proof.context > params > string > int > fact_override > term list 
48296
e7f01b7e244e
gracefully handle the case of empty theories when going up the accessibility chain
blanchet
parents:
48293
diff
changeset

89 
> term > fact list > fact list 
48319
340187063d84
use async manager to manage MaSh learners to make sure they get killed cleanly
blanchet
parents:
48318
diff
changeset

90 
val kill_learners : unit > unit 
340187063d84
use async manager to manage MaSh learners to make sure they get killed cleanly
blanchet
parents:
48318
diff
changeset

91 
val running_learners : unit > unit 
48248  92 
end; 
93 

48381  94 
structure Sledgehammer_MaSh : SLEDGEHAMMER_MASH = 
48248  95 
struct 
48249  96 

48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

97 
open ATP_Util 
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

98 
open ATP_Problem_Generate 
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

99 
open Sledgehammer_Util 
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

100 
open Sledgehammer_Fact 
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

101 
open Sledgehammer_Provers 
48318  102 
open Sledgehammer_Minimize 
48381  103 
open Sledgehammer_MePo 
48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

104 

48308  105 
val trace = 
48380  106 
Attrib.setup_config_bool @{binding sledgehammer_mash_trace} (K false) 
48308  107 
fun trace_msg ctxt msg = if Config.get ctxt trace then tracing (msg ()) else () 
108 

48319
340187063d84
use async manager to manage MaSh learners to make sure they get killed cleanly
blanchet
parents:
48318
diff
changeset

109 
val MaShN = "MaSh" 
340187063d84
use async manager to manage MaSh learners to make sure they get killed cleanly
blanchet
parents:
48318
diff
changeset

110 

48379
2b5ad61e2ccc
renamed "iter" fact filter to "MePo" (MengPaulson)
blanchet
parents:
48378
diff
changeset

111 
val mepoN = "mepo" 
48314
ee33ba3c0e05
added option to control which fact filter is used
blanchet
parents:
48313
diff
changeset

112 
val mashN = "mash" 
48379
2b5ad61e2ccc
renamed "iter" fact filter to "MePo" (MengPaulson)
blanchet
parents:
48378
diff
changeset

113 
val meshN = "mesh" 
48314
ee33ba3c0e05
added option to control which fact filter is used
blanchet
parents:
48313
diff
changeset

114 

48379
2b5ad61e2ccc
renamed "iter" fact filter to "MePo" (MengPaulson)
blanchet
parents:
48378
diff
changeset

115 
val fact_filters = [meshN, mepoN, mashN] 
48314
ee33ba3c0e05
added option to control which fact filter is used
blanchet
parents:
48313
diff
changeset

116 

48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

117 
val unlearnN = "unlearn" 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

118 
val learn_isarN = "learn_isar" 
50484
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

119 
val learn_proverN = "learn_prover" 
48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

120 
val relearn_isarN = "relearn_isar" 
50484
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

121 
val relearn_proverN = "relearn_prover" 
48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

122 

48394
82fc8c956cdc
fixed various issues with MaSh's file handling + tune output + generate local facts again + handle nameless facts gracefully
blanchet
parents:
48392
diff
changeset

123 
fun mash_model_dir () = 
50340
72519bf5f135
simplify MaSh term patterns + add missing global facts if there aren't too many
blanchet
parents:
50339
diff
changeset

124 
Path.explode "$ISABELLE_HOME_USER/mash" > tap Isabelle_System.mkdir 
48394
82fc8c956cdc
fixed various issues with MaSh's file handling + tune output + generate local facts again + handle nameless facts gracefully
blanchet
parents:
48392
diff
changeset

125 
val mash_state_dir = mash_model_dir 
50310  126 
fun mash_state_file () = Path.append (mash_state_dir ()) (Path.explode "state") 
48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

127 

48330  128 

50311  129 
(*** Lowlevel communication with MaSh ***) 
130 

131 
fun wipe_out_file file = (try (File.rm o Path.explode) file; ()) 

132 

50335
b17b05c8d4a4
proper quoting of paths in MaSh shell script, take 2 (cf. b00eeb8e352e)
blanchet
parents:
50319
diff
changeset

133 
fun write_file banner (xs, f) path = 
b17b05c8d4a4
proper quoting of paths in MaSh shell script, take 2 (cf. b00eeb8e352e)
blanchet
parents:
50319
diff
changeset

134 
(case banner of SOME s => File.write path s  NONE => (); 
b17b05c8d4a4
proper quoting of paths in MaSh shell script, take 2 (cf. b00eeb8e352e)
blanchet
parents:
50319
diff
changeset

135 
xs > chunk_list 500 
b17b05c8d4a4
proper quoting of paths in MaSh shell script, take 2 (cf. b00eeb8e352e)
blanchet
parents:
50319
diff
changeset

136 
> List.app (File.append path o space_implode "" o map f)) 
50319
dddcaeb92e11
robust writing of MaSh state  better drop learning data than cause other problems in Sledgehammer
blanchet
parents:
50311
diff
changeset

137 
handle IO.Io _ => () 
50311  138 

139 
fun run_mash_tool ctxt overlord save max_suggs write_cmds read_suggs = 

140 
let 

141 
val (temp_dir, serial) = 

142 
if overlord then (getenv "ISABELLE_HOME_USER", "") 

143 
else (getenv "ISABELLE_TMP", serial_string ()) 

144 
val log_file = if overlord then temp_dir ^ "/mash_log" else "/dev/null" 

145 
val err_file = temp_dir ^ "/mash_err" ^ serial 

146 
val sugg_file = temp_dir ^ "/mash_suggs" ^ serial 

50335
b17b05c8d4a4
proper quoting of paths in MaSh shell script, take 2 (cf. b00eeb8e352e)
blanchet
parents:
50319
diff
changeset

147 
val sugg_path = Path.explode sugg_file 
50311  148 
val cmd_file = temp_dir ^ "/mash_commands" ^ serial 
50335
b17b05c8d4a4
proper quoting of paths in MaSh shell script, take 2 (cf. b00eeb8e352e)
blanchet
parents:
50319
diff
changeset

149 
val cmd_path = Path.explode cmd_file 
50311  150 
val core = 
151 
"inputFile " ^ cmd_file ^ " predictions " ^ sugg_file ^ 

152 
" numberOfPredictions " ^ string_of_int max_suggs ^ 

50623  153 
" learnTheories" ^ 
50311  154 
(if save then " saveModel" else "") 
155 
val command = 

50335
b17b05c8d4a4
proper quoting of paths in MaSh shell script, take 2 (cf. b00eeb8e352e)
blanchet
parents:
50319
diff
changeset

156 
"\"$ISABELLE_SLEDGEHAMMER_MASH/src/mash.py\" quiet outputDir " ^ 
b17b05c8d4a4
proper quoting of paths in MaSh shell script, take 2 (cf. b00eeb8e352e)
blanchet
parents:
50319
diff
changeset

157 
File.shell_path (mash_model_dir ()) ^ " log " ^ log_file ^ " " ^ core ^ 
50311  158 
" >& " ^ err_file 
159 
fun run_on () = 

50750  160 
(Isabelle_System.bash command 
161 
> tap (fn _ => trace_msg ctxt (fn () => 

162 
case try File.read (Path.explode err_file) of 

163 
NONE => "Done" 

164 
 SOME "" => "Done" 

165 
 SOME s => "Error: " ^ elide_string 1000 s)); 

50335
b17b05c8d4a4
proper quoting of paths in MaSh shell script, take 2 (cf. b00eeb8e352e)
blanchet
parents:
50319
diff
changeset

166 
read_suggs (fn () => try File.read_lines sugg_path > these)) 
50311  167 
fun clean_up () = 
168 
if overlord then () 

169 
else List.app wipe_out_file [err_file, sugg_file, cmd_file] 

170 
in 

50335
b17b05c8d4a4
proper quoting of paths in MaSh shell script, take 2 (cf. b00eeb8e352e)
blanchet
parents:
50319
diff
changeset

171 
write_file (SOME "") ([], K "") sugg_path; 
b17b05c8d4a4
proper quoting of paths in MaSh shell script, take 2 (cf. b00eeb8e352e)
blanchet
parents:
50319
diff
changeset

172 
write_file (SOME "") write_cmds cmd_path; 
50311  173 
trace_msg ctxt (fn () => "Running " ^ command); 
174 
with_cleanup clean_up run_on () 

175 
end 

48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

176 

48308  177 
fun meta_char c = 
48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

178 
if Char.isAlphaNum c orelse c = #"_" orelse c = #"." orelse c = #"(" orelse 
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

179 
c = #")" orelse c = #"," then 
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

180 
String.str c 
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

181 
else 
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

182 
(* fixed width, in case more digits follow *) 
48395
85a7fb65507a
learning should honor the fact override and the chained facts
blanchet
parents:
48394
diff
changeset

183 
"%" ^ stringN_of_int 3 (Char.ord c) 
48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

184 

48308  185 
fun unmeta_chars accum [] = String.implode (rev accum) 
48395
85a7fb65507a
learning should honor the fact override and the chained facts
blanchet
parents:
48394
diff
changeset

186 
 unmeta_chars accum (#"%" :: d1 :: d2 :: d3 :: cs) = 
48308  187 
(case Int.fromString (String.implode [d1, d2, d3]) of 
188 
SOME n => unmeta_chars (Char.chr n :: accum) cs 

189 
 NONE => "" (* error *)) 

48395
85a7fb65507a
learning should honor the fact override and the chained facts
blanchet
parents:
48394
diff
changeset

190 
 unmeta_chars _ (#"%" :: _) = "" (* error *) 
48308  191 
 unmeta_chars accum (c :: cs) = unmeta_chars (c :: accum) cs 
192 

50826  193 
val encode_str = String.translate meta_char 
194 
val encode_strs = map encode_str #> space_implode " " 

195 
val unencode_str = String.explode #> unmeta_chars [] 

196 
val unencode_strs = 

197 
space_explode " " #> filter_out (curry (op =) "") #> map unencode_str 

48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

198 

50858  199 
fun freshish_name () = 
200 
Date.fmt ".%Y%m%d_%H%M%S__" (Date.fromTimeLocal (Time.now ())) ^ 

201 
serial_string () 

202 

50356  203 
fun encode_feature (name, weight) = 
50826  204 
encode_str name ^ 
50389
ad0ac9112d2c
simplify code now that "mash.py" supports weights
blanchet
parents:
50383
diff
changeset

205 
(if Real.== (weight, 1.0) then "" else "=" ^ Real.toString weight) 
50356  206 

207 
val encode_features = map encode_feature #> space_implode " " 

208 

50631  209 
fun str_of_learn (name, parents, feats, deps) = 
50826  210 
"! " ^ encode_str name ^ ": " ^ encode_strs parents ^ "; " ^ 
211 
encode_features feats ^ "; " ^ encode_strs deps ^ "\n" 

48669
cdcdb0547f29
remember ATP flops to avoid repeating them too quickly
blanchet
parents:
48668
diff
changeset

212 

50631  213 
fun str_of_relearn (name, deps) = 
50826  214 
"p " ^ encode_str name ^ ": " ^ encode_strs deps ^ "\n" 
50311  215 

50858  216 
fun str_of_query learn_hints (parents, feats, hints) = 
217 
(if not learn_hints orelse null hints then "" 

218 
else str_of_learn (freshish_name (), parents, feats, hints)) ^ 

50826  219 
"? " ^ encode_strs parents ^ "; " ^ encode_features feats ^ 
50858  220 
(if learn_hints orelse null hints then "" else "; " ^ encode_strs hints) ^ 
221 
"\n" 

48406  222 

223 
fun extract_suggestion sugg = 

224 
case space_explode "=" sugg of 

225 
[name, weight] => 

50826  226 
SOME (unencode_str name, Real.fromString weight > the_default 1.0) 
227 
 [name] => SOME (unencode_str name, 1.0) 

48406  228 
 _ => NONE 
229 

50633  230 
fun extract_suggestions line = 
48311  231 
case space_explode ":" line of 
48406  232 
[goal, suggs] => 
50826  233 
(unencode_str goal, map_filter extract_suggestion (space_explode " " suggs)) 
48312  234 
 _ => ("", []) 
48311  235 

50632
12c097ff3241
slightly more elegant naming convention (to keep lowlevel and highlevel APIs separated)
blanchet
parents:
50631
diff
changeset

236 
structure MaSh = 
12c097ff3241
slightly more elegant naming convention (to keep lowlevel and highlevel APIs separated)
blanchet
parents:
50631
diff
changeset

237 
struct 
12c097ff3241
slightly more elegant naming convention (to keep lowlevel and highlevel APIs separated)
blanchet
parents:
50631
diff
changeset

238 

12c097ff3241
slightly more elegant naming convention (to keep lowlevel and highlevel APIs separated)
blanchet
parents:
50631
diff
changeset

239 
fun unlearn ctxt = 
50311  240 
let val path = mash_model_dir () in 
50632
12c097ff3241
slightly more elegant naming convention (to keep lowlevel and highlevel APIs separated)
blanchet
parents:
50631
diff
changeset

241 
trace_msg ctxt (K "MaSh unlearn"); 
50319
dddcaeb92e11
robust writing of MaSh state  better drop learning data than cause other problems in Sledgehammer
blanchet
parents:
50311
diff
changeset

242 
try (File.fold_dir (fn file => fn _ => 
dddcaeb92e11
robust writing of MaSh state  better drop learning data than cause other problems in Sledgehammer
blanchet
parents:
50311
diff
changeset

243 
try File.rm (Path.append path (Path.basic file))) 
dddcaeb92e11
robust writing of MaSh state  better drop learning data than cause other problems in Sledgehammer
blanchet
parents:
50311
diff
changeset

244 
path) NONE; 
50311  245 
() 
246 
end 

247 

50632
12c097ff3241
slightly more elegant naming convention (to keep lowlevel and highlevel APIs separated)
blanchet
parents:
50631
diff
changeset

248 
fun learn _ _ [] = () 
12c097ff3241
slightly more elegant naming convention (to keep lowlevel and highlevel APIs separated)
blanchet
parents:
50631
diff
changeset

249 
 learn ctxt overlord learns = 
12c097ff3241
slightly more elegant naming convention (to keep lowlevel and highlevel APIs separated)
blanchet
parents:
50631
diff
changeset

250 
(trace_msg ctxt (fn () => "MaSh learn " ^ 
50631  251 
elide_string 1000 (space_implode " " (map #1 learns))); 
252 
run_mash_tool ctxt overlord true 0 (learns, str_of_learn) (K ())) 

50311  253 

50632
12c097ff3241
slightly more elegant naming convention (to keep lowlevel and highlevel APIs separated)
blanchet
parents:
50631
diff
changeset

254 
fun relearn _ _ [] = () 
12c097ff3241
slightly more elegant naming convention (to keep lowlevel and highlevel APIs separated)
blanchet
parents:
50631
diff
changeset

255 
 relearn ctxt overlord relearns = 
12c097ff3241
slightly more elegant naming convention (to keep lowlevel and highlevel APIs separated)
blanchet
parents:
50631
diff
changeset

256 
(trace_msg ctxt (fn () => "MaSh relearn " ^ 
50631  257 
elide_string 1000 (space_implode " " (map #1 relearns))); 
258 
run_mash_tool ctxt overlord true 0 (relearns, str_of_relearn) (K ())) 

50311  259 

50858  260 
fun suggest ctxt overlord learn_hints max_suggs (query as (_, feats, hints)) = 
50633  261 
(trace_msg ctxt (fn () => "MaSh suggest " ^ encode_features feats); 
50858  262 
run_mash_tool ctxt overlord (learn_hints andalso not (null hints)) 
263 
max_suggs ([query], str_of_query learn_hints) 

50311  264 
(fn suggs => 
265 
case suggs () of 

266 
[] => [] 

50633  267 
 suggs => snd (extract_suggestions (List.last suggs))) 
50311  268 
handle List.Empty => []) 
269 

50632
12c097ff3241
slightly more elegant naming convention (to keep lowlevel and highlevel APIs separated)
blanchet
parents:
50631
diff
changeset

270 
end; 
12c097ff3241
slightly more elegant naming convention (to keep lowlevel and highlevel APIs separated)
blanchet
parents:
50631
diff
changeset

271 

50311  272 

273 
(*** Middlelevel communication with MaSh ***) 

274 

50484
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

275 
datatype proof_kind = 
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

276 
Isar_Proof  Automatic_Proof  Isar_Proof_wegen_Prover_Flop 
50311  277 

278 
fun str_of_proof_kind Isar_Proof = "i" 

50484
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

279 
 str_of_proof_kind Automatic_Proof = "a" 
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

280 
 str_of_proof_kind Isar_Proof_wegen_Prover_Flop = "x" 
50311  281 

282 
fun proof_kind_of_str "i" = Isar_Proof 

50484
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

283 
 proof_kind_of_str "a" = Automatic_Proof 
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

284 
 proof_kind_of_str "x" = Isar_Proof_wegen_Prover_Flop 
50311  285 

286 
(* FIXME: Here a "Graph.update_node" function would be useful *) 

50610  287 
fun update_access_graph_node (name, kind) = 
50311  288 
Graph.default_node (name, Isar_Proof) 
289 
#> kind <> Isar_Proof ? Graph.map_node name (K kind) 

290 

291 
fun try_graph ctxt when def f = 

292 
f () 

293 
handle Graph.CYCLES (cycle :: _) => 

294 
(trace_msg ctxt (fn () => 

295 
"Cycle involving " ^ commas cycle ^ " when " ^ when); def) 

296 
 Graph.DUP name => 

297 
(trace_msg ctxt (fn () => 

298 
"Duplicate fact " ^ quote name ^ " when " ^ when); def) 

299 
 Graph.UNDEF name => 

300 
(trace_msg ctxt (fn () => 

301 
"Unknown fact " ^ quote name ^ " when " ^ when); def) 

302 
 exn => 

303 
if Exn.is_interrupt exn then 

304 
reraise exn 

305 
else 

306 
(trace_msg ctxt (fn () => 

307 
"Internal error when " ^ when ^ ":\n" ^ 

308 
ML_Compiler.exn_message exn); def) 

309 

310 
fun graph_info G = 

311 
string_of_int (length (Graph.keys G)) ^ " node(s), " ^ 

312 
string_of_int (fold (Integer.add o length o snd) (Graph.dest G) 0) ^ 

313 
" edge(s), " ^ 

314 
string_of_int (length (Graph.minimals G)) ^ " minimal, " ^ 

315 
string_of_int (length (Graph.maximals G)) ^ " maximal" 

316 

50610  317 
type mash_state = {access_G : unit Graph.T, dirty : string list option} 
50311  318 

50610  319 
val empty_state = {access_G = Graph.empty, dirty = SOME []} 
50311  320 

321 
local 

322 

50840  323 
val version = "*** MaSh version 20130112a ***" 
50357
187ae76a1757
more robustness in the face of MaSh format changes  don't overwrite new versions with old versions
blanchet
parents:
50356
diff
changeset

324 

187ae76a1757
more robustness in the face of MaSh format changes  don't overwrite new versions with old versions
blanchet
parents:
50356
diff
changeset

325 
exception Too_New of unit 
50311  326 

327 
fun extract_node line = 

328 
case space_explode ":" line of 

329 
[head, parents] => 

330 
(case space_explode " " head of 

331 
[kind, name] => 

50826  332 
SOME (unencode_str name, unencode_strs parents, 
50311  333 
try proof_kind_of_str kind > the_default Isar_Proof) 
334 
 _ => NONE) 

335 
 _ => NONE 

336 

337 
fun load _ (state as (true, _)) = state 

338 
 load ctxt _ = 

339 
let val path = mash_state_file () in 

340 
(true, 

341 
case try File.read_lines path of 

342 
SOME (version' :: node_lines) => 

343 
let 

344 
fun add_edge_to name parent = 

345 
Graph.default_node (parent, Isar_Proof) 

346 
#> Graph.add_edge (parent, name) 

347 
fun add_node line = 

348 
case extract_node line of 

349 
NONE => I (* shouldn't happen *) 

350 
 SOME (name, parents, kind) => 

50610  351 
update_access_graph_node (name, kind) 
50311  352 
#> fold (add_edge_to name) parents 
50610  353 
val access_G = 
50357
187ae76a1757
more robustness in the face of MaSh format changes  don't overwrite new versions with old versions
blanchet
parents:
50356
diff
changeset

354 
case string_ord (version', version) of 
187ae76a1757
more robustness in the face of MaSh format changes  don't overwrite new versions with old versions
blanchet
parents:
50356
diff
changeset

355 
EQUAL => 
187ae76a1757
more robustness in the face of MaSh format changes  don't overwrite new versions with old versions
blanchet
parents:
50356
diff
changeset

356 
try_graph ctxt "loading state" Graph.empty (fn () => 
187ae76a1757
more robustness in the face of MaSh format changes  don't overwrite new versions with old versions
blanchet
parents:
50356
diff
changeset

357 
fold add_node node_lines Graph.empty) 
50860  358 
 LESS => 
359 
(MaSh.unlearn ctxt; Graph.empty) (* can't parse old file *) 

50357
187ae76a1757
more robustness in the face of MaSh format changes  don't overwrite new versions with old versions
blanchet
parents:
50356
diff
changeset

360 
 GREATER => raise Too_New () 
50311  361 
in 
362 
trace_msg ctxt (fn () => 

50610  363 
"Loaded fact graph (" ^ graph_info access_G ^ ")"); 
364 
{access_G = access_G, dirty = SOME []} 

50311  365 
end 
366 
 _ => empty_state) 

367 
end 

368 

369 
fun save _ (state as {dirty = SOME [], ...}) = state 

50610  370 
 save ctxt {access_G, dirty} = 
50311  371 
let 
372 
fun str_of_entry (name, parents, kind) = 

50826  373 
str_of_proof_kind kind ^ " " ^ encode_str name ^ ": " ^ 
374 
encode_strs parents ^ "\n" 

50311  375 
fun append_entry (name, (kind, (parents, _))) = 
376 
cons (name, Graph.Keys.dest parents, kind) 

377 
val (banner, entries) = 

378 
case dirty of 

379 
SOME names => 

50610  380 
(NONE, fold (append_entry o Graph.get_entry access_G) names []) 
381 
 NONE => (SOME (version ^ "\n"), Graph.fold append_entry access_G []) 

50311  382 
in 
50335
b17b05c8d4a4
proper quoting of paths in MaSh shell script, take 2 (cf. b00eeb8e352e)
blanchet
parents:
50319
diff
changeset

383 
write_file banner (entries, str_of_entry) (mash_state_file ()); 
50311  384 
trace_msg ctxt (fn () => 
50610  385 
"Saved fact graph (" ^ graph_info access_G ^ 
50311  386 
(case dirty of 
387 
SOME dirty => 

388 
"; " ^ string_of_int (length dirty) ^ " dirty fact(s)" 

389 
 _ => "") ^ ")"); 

50610  390 
{access_G = access_G, dirty = SOME []} 
50311  391 
end 
392 

393 
val global_state = 

394 
Synchronized.var "Sledgehammer_MaSh.global_state" (false, empty_state) 

395 

396 
in 

397 

50570  398 
fun map_state ctxt f = 
50311  399 
Synchronized.change global_state (load ctxt ##> (f #> save ctxt)) 
50357
187ae76a1757
more robustness in the face of MaSh format changes  don't overwrite new versions with old versions
blanchet
parents:
50356
diff
changeset

400 
handle Too_New () => () 
50311  401 

50570  402 
fun peek_state ctxt f = 
50357
187ae76a1757
more robustness in the face of MaSh format changes  don't overwrite new versions with old versions
blanchet
parents:
50356
diff
changeset

403 
Synchronized.change_result global_state 
187ae76a1757
more robustness in the face of MaSh format changes  don't overwrite new versions with old versions
blanchet
parents:
50356
diff
changeset

404 
(perhaps (try (load ctxt)) #> `snd #>> f) 
50311  405 

50570  406 
fun clear_state ctxt = 
50311  407 
Synchronized.change global_state (fn _ => 
50632
12c097ff3241
slightly more elegant naming convention (to keep lowlevel and highlevel APIs separated)
blanchet
parents:
50631
diff
changeset

408 
(MaSh.unlearn ctxt; (* also removes the state file *) 
50311  409 
(true, empty_state))) 
410 

411 
end 

412 

50570  413 
val mash_unlearn = clear_state 
414 

50311  415 

416 
(*** Isabelle helpers ***) 

417 

50722  418 
val local_prefix = "local" ^ Long_Name.separator 
48378  419 

50722  420 
fun elided_backquote_thm threshold th = 
421 
elide_string threshold 

422 
(backquote_thm (Proof_Context.init_global (Thm.theory_of_thm th)) th) 

48378  423 

50624
4d0997abce79
improved thm order hack, in case the default names are overridden
blanchet
parents:
50623
diff
changeset

424 
fun nickname_of_thm th = 
48394
82fc8c956cdc
fixed various issues with MaSh's file handling + tune output + generate local facts again + handle nameless facts gracefully
blanchet
parents:
48392
diff
changeset

425 
if Thm.has_name_hint th then 
82fc8c956cdc
fixed various issues with MaSh's file handling + tune output + generate local facts again + handle nameless facts gracefully
blanchet
parents:
48392
diff
changeset

426 
let val hint = Thm.get_name_hint th in 
50722  427 
(* There must be a better way to detect local facts. *) 
48394
82fc8c956cdc
fixed various issues with MaSh's file handling + tune output + generate local facts again + handle nameless facts gracefully
blanchet
parents:
48392
diff
changeset

428 
case try (unprefix local_prefix) hint of 
82fc8c956cdc
fixed various issues with MaSh's file handling + tune output + generate local facts again + handle nameless facts gracefully
blanchet
parents:
48392
diff
changeset

429 
SOME suf => 
50732  430 
Context.theory_name (Thm.theory_of_thm th) ^ Long_Name.separator ^ suf ^ 
431 
Long_Name.separator ^ elided_backquote_thm 50 th 

48394
82fc8c956cdc
fixed various issues with MaSh's file handling + tune output + generate local facts again + handle nameless facts gracefully
blanchet
parents:
48392
diff
changeset

432 
 NONE => hint 
82fc8c956cdc
fixed various issues with MaSh's file handling + tune output + generate local facts again + handle nameless facts gracefully
blanchet
parents:
48392
diff
changeset

433 
end 
82fc8c956cdc
fixed various issues with MaSh's file handling + tune output + generate local facts again + handle nameless facts gracefully
blanchet
parents:
48392
diff
changeset

434 
else 
50722  435 
elided_backquote_thm 200 th 
48378  436 

50412  437 
fun find_suggested_facts suggs facts = 
48330  438 
let 
50624
4d0997abce79
improved thm order hack, in case the default names are overridden
blanchet
parents:
50623
diff
changeset

439 
fun add_fact (fact as (_, th)) = Symtab.default (nickname_of_thm th, fact) 
48330  440 
val tab = Symtab.empty > fold add_fact facts 
48406  441 
fun find_sugg (name, weight) = 
442 
Symtab.lookup tab name > Option.map (rpair weight) 

443 
in map_filter find_sugg suggs end 

48311  444 

50383
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

445 
fun scaled_avg [] = 0 
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

446 
 scaled_avg xs = 
48407  447 
Real.ceil (100000000.0 * fold (curry (op +)) xs 0.0) div length xs 
48328
ca0b7d19dd62
attempt at meshing according to more meaningful factors
blanchet
parents:
48327
diff
changeset

448 

50383
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

449 
fun avg [] = 0.0 
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

450 
 avg xs = fold (curry (op +)) xs 0.0 / Real.fromInt (length xs) 
48313
0faafdffa662
mesh facts by taking into consideration whether a fact is known to MeSh
blanchet
parents:
48312
diff
changeset

451 

50383
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

452 
fun normalize_scores _ [] = [] 
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

453 
 normalize_scores max_facts xs = 
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

454 
let val avg = avg (map snd (take max_facts xs)) in 
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

455 
map (apsnd (curry Real.* (1.0 / avg))) xs 
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

456 
end 
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

457 

50814  458 
fun mesh_facts _ max_facts [(_, (sels, unks))] = 
48406  459 
map fst (take max_facts sels) @ take (max_facts  length sels) unks 
50814  460 
 mesh_facts eq max_facts mess = 
48314
ee33ba3c0e05
added option to control which fact filter is used
blanchet
parents:
48313
diff
changeset

461 
let 
50383
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

462 
val mess = 
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

463 
mess > map (apsnd (apfst (normalize_scores max_facts #> `length))) 
50814  464 
val fact_eq = eq 
50383
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

465 
fun score_in fact (global_weight, ((sel_len, sels), unks)) = 
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

466 
let 
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

467 
fun score_at j = 
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

468 
case try (nth sels) j of 
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

469 
SOME (_, score) => SOME (global_weight * score) 
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

470 
 NONE => NONE 
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

471 
in 
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

472 
case find_index (curry fact_eq fact o fst) sels of 
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

473 
~1 => (case find_index (curry fact_eq fact) unks of 
50438
9bb7868a4c20
fixed embarrassing offbyone bug in MaSh's Mesh
blanchet
parents:
50435
diff
changeset

474 
~1 => score_at (sel_len  1) 
50383
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

475 
 _ => NONE) 
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

476 
 rank => score_at rank 
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

477 
end 
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

478 
fun weight_of fact = mess > map_filter (score_in fact) > scaled_avg 
48406  479 
val facts = 
50383
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

480 
fold (union fact_eq o map fst o take max_facts o snd o fst o snd) mess 
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

481 
[] 
48314
ee33ba3c0e05
added option to control which fact filter is used
blanchet
parents:
48313
diff
changeset

482 
in 
48406  483 
facts > map (`weight_of) > sort (int_ord o swap o pairself fst) 
48328
ca0b7d19dd62
attempt at meshing according to more meaningful factors
blanchet
parents:
48327
diff
changeset

484 
> map snd > take max_facts 
48314
ee33ba3c0e05
added option to control which fact filter is used
blanchet
parents:
48313
diff
changeset

485 
end 
48312  486 

50584
4fff0898cc0e
tuned weights  keep same relative values, but use 1.0 as the least weight
blanchet
parents:
50583
diff
changeset

487 
fun thy_feature_of s = ("y" ^ s, 2.0 (* FUDGE *)) 
4fff0898cc0e
tuned weights  keep same relative values, but use 1.0 as the least weight
blanchet
parents:
50583
diff
changeset

488 
fun const_feature_of s = ("c" ^ s, 16.0 (* FUDGE *)) 
4fff0898cc0e
tuned weights  keep same relative values, but use 1.0 as the least weight
blanchet
parents:
50583
diff
changeset

489 
fun free_feature_of s = ("f" ^ s, 20.0 (* FUDGE *)) 
4fff0898cc0e
tuned weights  keep same relative values, but use 1.0 as the least weight
blanchet
parents:
50583
diff
changeset

490 
fun type_feature_of s = ("t" ^ s, 2.0 (* FUDGE *)) 
4fff0898cc0e
tuned weights  keep same relative values, but use 1.0 as the least weight
blanchet
parents:
50583
diff
changeset

491 
fun class_feature_of s = ("s" ^ s, 1.0 (* FUDGE *)) 
4fff0898cc0e
tuned weights  keep same relative values, but use 1.0 as the least weight
blanchet
parents:
50583
diff
changeset

492 
fun status_feature_of status = (string_of_status status, 2.0 (* FUDGE *)) 
4fff0898cc0e
tuned weights  keep same relative values, but use 1.0 as the least weight
blanchet
parents:
50583
diff
changeset

493 
val local_feature = ("local", 8.0 (* FUDGE *)) 
4fff0898cc0e
tuned weights  keep same relative values, but use 1.0 as the least weight
blanchet
parents:
50583
diff
changeset

494 
val lams_feature = ("lams", 2.0 (* FUDGE *)) 
4fff0898cc0e
tuned weights  keep same relative values, but use 1.0 as the least weight
blanchet
parents:
50583
diff
changeset

495 
val skos_feature = ("skos", 2.0 (* FUDGE *)) 
48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

496 

48324  497 
fun theory_ord p = 
50722  498 
if Theory.subthy p then 
499 
if Theory.eq_thy p then EQUAL else LESS 

48324  500 
else if Theory.subthy (swap p) then 
501 
GREATER 

502 
else case int_ord (pairself (length o Theory.ancestors_of) p) of 

503 
EQUAL => string_ord (pairself Context.theory_name p) 

504 
 order => order 

505 

50382  506 
fun thm_ord p = 
507 
case theory_ord (pairself theory_of_thm p) of 

50359
da395f0e7dea
tweaked order of theorems to avoid forward dependencies (MaSh)
blanchet
parents:
50357
diff
changeset

508 
EQUAL => 
50624
4d0997abce79
improved thm order hack, in case the default names are overridden
blanchet
parents:
50623
diff
changeset

509 
let val q = pairself nickname_of_thm p in 
4d0997abce79
improved thm order hack, in case the default names are overridden
blanchet
parents:
50623
diff
changeset

510 
(* Hack to put "xxx_def" before "xxxI" and "xxxE" *) 
4d0997abce79
improved thm order hack, in case the default names are overridden
blanchet
parents:
50623
diff
changeset

511 
case bool_ord (pairself (String.isSuffix "_def") (swap q)) of 
4d0997abce79
improved thm order hack, in case the default names are overridden
blanchet
parents:
50623
diff
changeset

512 
EQUAL => string_ord q 
4d0997abce79
improved thm order hack, in case the default names are overridden
blanchet
parents:
50623
diff
changeset

513 
 ord => ord 
4d0997abce79
improved thm order hack, in case the default names are overridden
blanchet
parents:
50623
diff
changeset

514 
end 
50359
da395f0e7dea
tweaked order of theorems to avoid forward dependencies (MaSh)
blanchet
parents:
50357
diff
changeset

515 
 ord => ord 
48324  516 

48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

517 
val freezeT = Type.legacy_freeze_type 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

518 

ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

519 
fun freeze (t $ u) = freeze t $ freeze u 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

520 
 freeze (Abs (s, T, t)) = Abs (s, freezeT T, freeze t) 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

521 
 freeze (Var ((s, _), T)) = Free (s, freezeT T) 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

522 
 freeze (Const (s, T)) = Const (s, freezeT T) 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

523 
 freeze (Free (s, T)) = Free (s, freezeT T) 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

524 
 freeze t = t 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

525 

ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

526 
fun goal_of_thm thy = prop_of #> freeze #> cterm_of thy #> Goal.init 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

527 

ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

528 
fun run_prover_for_mash ctxt params prover facts goal = 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

529 
let 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

530 
val problem = 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

531 
{state = Proof.init ctxt, goal = goal, subgoal = 1, subgoal_count = 1, 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

532 
facts = facts > map (apfst (apfst (fn name => name ()))) 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

533 
> map Untranslated_Fact} 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

534 
in 
50668  535 
get_minimizing_isar_prover ctxt MaSh (K (K ())) prover params (K (K (K ""))) 
536 
problem 

48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

537 
end 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

538 

48326  539 
val bad_types = [@{type_name prop}, @{type_name bool}, @{type_name fun}] 
540 

48398  541 
val logical_consts = 
542 
[@{const_name prop}, @{const_name Pure.conjunction}] @ atp_logical_consts 

543 

50585  544 
val max_pattern_breadth = 10 
545 

50841
087e3c531e86
honor filtering out of arguments for builtin constants (e.g. representation of numerals)
blanchet
parents:
50840
diff
changeset

546 
fun term_features_of ctxt prover thy_name term_max_depth type_max_depth ts = 
48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

547 
let 
50392
190053ee24ed
expand type classes into their ancestors for MaSh
blanchet
parents:
50391
diff
changeset

548 
val thy = Proof_Context.theory_of ctxt 
50857
80768e28c9ee
better handlig of builtins  at the toplevel, not in subterms
blanchet
parents:
50841
diff
changeset

549 
fun is_built_in (x as (s, _)) args = 
80768e28c9ee
better handlig of builtins  at the toplevel, not in subterms
blanchet
parents:
50841
diff
changeset

550 
if member (op =) logical_consts s then (true, args) 
80768e28c9ee
better handlig of builtins  at the toplevel, not in subterms
blanchet
parents:
50841
diff
changeset

551 
else is_built_in_const_for_prover ctxt prover x args 
50393  552 
val fixes = map snd (Variable.dest_fixes ctxt) 
50392
190053ee24ed
expand type classes into their ancestors for MaSh
blanchet
parents:
50391
diff
changeset

553 
val classes = Sign.classes_of thy 
48304  554 
fun add_classes @{sort type} = I 
50392
190053ee24ed
expand type classes into their ancestors for MaSh
blanchet
parents:
50391
diff
changeset

555 
 add_classes S = 
190053ee24ed
expand type classes into their ancestors for MaSh
blanchet
parents:
50391
diff
changeset

556 
fold (`(Sorts.super_classes classes) 
190053ee24ed
expand type classes into their ancestors for MaSh
blanchet
parents:
50391
diff
changeset

557 
#> swap #> op :: 
190053ee24ed
expand type classes into their ancestors for MaSh
blanchet
parents:
50391
diff
changeset

558 
#> subtract (op =) @{sort type} 
190053ee24ed
expand type classes into their ancestors for MaSh
blanchet
parents:
50391
diff
changeset

559 
#> map class_feature_of 
190053ee24ed
expand type classes into their ancestors for MaSh
blanchet
parents:
50391
diff
changeset

560 
#> union (op = o pairself fst)) S 
48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

561 
fun do_add_type (Type (s, Ts)) = 
50356  562 
(not (member (op =) bad_types s) 
563 
? insert (op = o pairself fst) (type_feature_of s)) 

48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

564 
#> fold do_add_type Ts 
48304  565 
 do_add_type (TFree (_, S)) = add_classes S 
566 
 do_add_type (TVar (_, S)) = add_classes S 

48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

567 
fun add_type T = type_max_depth >= 0 ? do_add_type T 
50857
80768e28c9ee
better handlig of builtins  at the toplevel, not in subterms
blanchet
parents:
50841
diff
changeset

568 
fun patternify_term _ 0 _ = [] 
50339
d8dae91f3107
MaSh improvements: deeper patterns + more respect for chained facts
blanchet
parents:
50338
diff
changeset

569 
 patternify_term args _ (Const (x as (s, _))) = 
50857
80768e28c9ee
better handlig of builtins  at the toplevel, not in subterms
blanchet
parents:
50841
diff
changeset

570 
if fst (is_built_in x args) then [] else [s] 
80768e28c9ee
better handlig of builtins  at the toplevel, not in subterms
blanchet
parents:
50841
diff
changeset

571 
 patternify_term _ depth (Free (s, _)) = 
80768e28c9ee
better handlig of builtins  at the toplevel, not in subterms
blanchet
parents:
50841
diff
changeset

572 
if depth = term_max_depth andalso member (op =) fixes s then 
80768e28c9ee
better handlig of builtins  at the toplevel, not in subterms
blanchet
parents:
50841
diff
changeset

573 
[thy_name ^ Long_Name.separator ^ s] 
80768e28c9ee
better handlig of builtins  at the toplevel, not in subterms
blanchet
parents:
50841
diff
changeset

574 
else 
80768e28c9ee
better handlig of builtins  at the toplevel, not in subterms
blanchet
parents:
50841
diff
changeset

575 
[] 
50339
d8dae91f3107
MaSh improvements: deeper patterns + more respect for chained facts
blanchet
parents:
50338
diff
changeset

576 
 patternify_term args depth (t $ u) = 
d8dae91f3107
MaSh improvements: deeper patterns + more respect for chained facts
blanchet
parents:
50338
diff
changeset

577 
let 
50857
80768e28c9ee
better handlig of builtins  at the toplevel, not in subterms
blanchet
parents:
50841
diff
changeset

578 
val ps = 
80768e28c9ee
better handlig of builtins  at the toplevel, not in subterms
blanchet
parents:
50841
diff
changeset

579 
take max_pattern_breadth (patternify_term (u :: args) depth t) 
80768e28c9ee
better handlig of builtins  at the toplevel, not in subterms
blanchet
parents:
50841
diff
changeset

580 
val qs = 
80768e28c9ee
better handlig of builtins  at the toplevel, not in subterms
blanchet
parents:
50841
diff
changeset

581 
take max_pattern_breadth ("" :: patternify_term [] (depth  1) u) 
80768e28c9ee
better handlig of builtins  at the toplevel, not in subterms
blanchet
parents:
50841
diff
changeset

582 
in map_product (fn p => fn "" => p  q => p ^ "(" ^ q ^ ")") ps qs end 
80768e28c9ee
better handlig of builtins  at the toplevel, not in subterms
blanchet
parents:
50841
diff
changeset

583 
 patternify_term _ _ _ = [] 
50393  584 
fun add_term_pattern feature_of = 
50857
80768e28c9ee
better handlig of builtins  at the toplevel, not in subterms
blanchet
parents:
50841
diff
changeset

585 
union (op = o pairself fst) o map feature_of oo patternify_term [] 
50583
681edd111e9b
really honor pattern depth, and use 2 by default
blanchet
parents:
50570
diff
changeset

586 
fun add_term_patterns _ 0 _ = I 
50393  587 
 add_term_patterns feature_of depth t = 
588 
add_term_pattern feature_of depth t 

589 
#> add_term_patterns feature_of (depth  1) t 

590 
fun add_term feature_of = add_term_patterns feature_of term_max_depth 

48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

591 
fun add_patterns t = 
50857
80768e28c9ee
better handlig of builtins  at the toplevel, not in subterms
blanchet
parents:
50841
diff
changeset

592 
case strip_comb t of 
80768e28c9ee
better handlig of builtins  at the toplevel, not in subterms
blanchet
parents:
50841
diff
changeset

593 
(Const (x as (_, T)), args) => 
80768e28c9ee
better handlig of builtins  at the toplevel, not in subterms
blanchet
parents:
50841
diff
changeset

594 
let val (built_in, args) = is_built_in x args in 
80768e28c9ee
better handlig of builtins  at the toplevel, not in subterms
blanchet
parents:
50841
diff
changeset

595 
(not built_in ? add_term const_feature_of t) 
80768e28c9ee
better handlig of builtins  at the toplevel, not in subterms
blanchet
parents:
50841
diff
changeset

596 
#> add_type T 
80768e28c9ee
better handlig of builtins  at the toplevel, not in subterms
blanchet
parents:
50841
diff
changeset

597 
#> fold add_patterns args 
80768e28c9ee
better handlig of builtins  at the toplevel, not in subterms
blanchet
parents:
50841
diff
changeset

598 
end 
80768e28c9ee
better handlig of builtins  at the toplevel, not in subterms
blanchet
parents:
50841
diff
changeset

599 
 (head, args) => 
48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

600 
(case head of 
50393  601 
Const (_, T) => add_term const_feature_of t #> add_type T 
602 
 Free (_, T) => add_term free_feature_of t #> add_type T 

48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

603 
 Var (_, T) => add_type T 
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

604 
 Abs (_, T, body) => add_type T #> add_patterns body 
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

605 
 _ => I) 
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

606 
#> fold add_patterns args 
48326  607 
in [] > fold add_patterns ts end 
48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

608 

6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

609 
fun is_exists (s, _) = (s = @{const_name Ex} orelse s = @{const_name Ex1}) 
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

610 

50583
681edd111e9b
really honor pattern depth, and use 2 by default
blanchet
parents:
50570
diff
changeset

611 
val term_max_depth = 2 
681edd111e9b
really honor pattern depth, and use 2 by default
blanchet
parents:
50570
diff
changeset

612 
val type_max_depth = 2 
48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

613 

6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

614 
(* TODO: Generate type classes for types? *) 
48385  615 
fun features_of ctxt prover thy (scope, status) ts = 
50393  616 
let val thy_name = Context.theory_name thy in 
617 
thy_feature_of thy_name :: 

50841
087e3c531e86
honor filtering out of arguments for builtin constants (e.g. representation of numerals)
blanchet
parents:
50840
diff
changeset

618 
term_features_of ctxt prover thy_name term_max_depth type_max_depth ts 
50393  619 
> status <> General ? cons (status_feature_of status) 
620 
> scope <> Global ? cons local_feature 

621 
> exists (not o is_lambda_free) ts ? cons lams_feature 

622 
> exists (exists_Const is_exists) ts ? cons skos_feature 

623 
end 

48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

624 

50434
960a3429615c
more MaSh tweaking  in particular, export the same facts in "MaSh_Export" as are later tried in "MaSh_Eval"
blanchet
parents:
50412
diff
changeset

625 
(* Too many dependencies is a sign that a decision procedure is at work. There 
960a3429615c
more MaSh tweaking  in particular, export the same facts in "MaSh_Export" as are later tried in "MaSh_Eval"
blanchet
parents:
50412
diff
changeset

626 
isn't much to learn from such proofs. *) 
960a3429615c
more MaSh tweaking  in particular, export the same facts in "MaSh_Export" as are later tried in "MaSh_Eval"
blanchet
parents:
50412
diff
changeset

627 
val max_dependencies = 20 
50484
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

628 

8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

629 
val prover_dependency_default_max_facts = 50 
48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

630 

48438
3e45c98fe127
distinguish between recursive and nonrecursive definitions + clean up typedef dependencies in MaSh
blanchet
parents:
48436
diff
changeset

631 
(* "type_definition_xxx" facts are characterized by their use of "CollectI". *) 
50755  632 
val typedef_dep = nickname_of_thm @{thm CollectI} 
633 
(* Mysterious parts of the class machinery create lots of proofs that refer 

634 
exclusively to "someI_e" (and to some internal constructions). *) 

635 
val class_some_dep = nickname_of_thm @{thm someI_ex} 

48438
3e45c98fe127
distinguish between recursive and nonrecursive definitions + clean up typedef dependencies in MaSh
blanchet
parents:
48436
diff
changeset

636 

50828  637 
val fundef_ths = 
638 
@{thms fundef_ex1_existence fundef_ex1_uniqueness fundef_ex1_iff 

639 
fundef_default_value} 

640 
> map nickname_of_thm 

641 

48438
3e45c98fe127
distinguish between recursive and nonrecursive definitions + clean up typedef dependencies in MaSh
blanchet
parents:
48436
diff
changeset

642 
(* "Rep_xxx_inject", "Abs_xxx_inverse", etc., are derived using these facts. *) 
3e45c98fe127
distinguish between recursive and nonrecursive definitions + clean up typedef dependencies in MaSh
blanchet
parents:
48436
diff
changeset

643 
val typedef_ths = 
3e45c98fe127
distinguish between recursive and nonrecursive definitions + clean up typedef dependencies in MaSh
blanchet
parents:
48436
diff
changeset

644 
@{thms type_definition.Abs_inverse type_definition.Rep_inverse 
3e45c98fe127
distinguish between recursive and nonrecursive definitions + clean up typedef dependencies in MaSh
blanchet
parents:
48436
diff
changeset

645 
type_definition.Rep type_definition.Rep_inject 
3e45c98fe127
distinguish between recursive and nonrecursive definitions + clean up typedef dependencies in MaSh
blanchet
parents:
48436
diff
changeset

646 
type_definition.Abs_inject type_definition.Rep_cases 
3e45c98fe127
distinguish between recursive and nonrecursive definitions + clean up typedef dependencies in MaSh
blanchet
parents:
48436
diff
changeset

647 
type_definition.Abs_cases type_definition.Rep_induct 
3e45c98fe127
distinguish between recursive and nonrecursive definitions + clean up typedef dependencies in MaSh
blanchet
parents:
48436
diff
changeset

648 
type_definition.Abs_induct type_definition.Rep_range 
3e45c98fe127
distinguish between recursive and nonrecursive definitions + clean up typedef dependencies in MaSh
blanchet
parents:
48436
diff
changeset

649 
type_definition.Abs_image} 
50624
4d0997abce79
improved thm order hack, in case the default names are overridden
blanchet
parents:
50623
diff
changeset

650 
> map nickname_of_thm 
48438
3e45c98fe127
distinguish between recursive and nonrecursive definitions + clean up typedef dependencies in MaSh
blanchet
parents:
48436
diff
changeset

651 

48441  652 
fun is_size_def [dep] th = 
653 
(case first_field ".recs" dep of 

654 
SOME (pref, _) => 

50624
4d0997abce79
improved thm order hack, in case the default names are overridden
blanchet
parents:
50623
diff
changeset

655 
(case first_field ".size" (nickname_of_thm th) of 
48441  656 
SOME (pref', _) => pref = pref' 
657 
 NONE => false) 

658 
 NONE => false) 

659 
 is_size_def _ _ = false 

660 

661 
fun trim_dependencies th deps = 

50755  662 
if length deps > max_dependencies then NONE else SOME deps 
48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

663 

50755  664 
fun isar_dependencies_of name_tabs th = 
665 
let 

666 
val deps = thms_in_proof (SOME name_tabs) th 

667 
in 

50828  668 
if deps = [typedef_dep] orelse 
669 
deps = [class_some_dep] orelse 

670 
exists (member (op =) fundef_ths) deps orelse 

671 
exists (member (op =) typedef_ths) deps orelse 

672 
is_size_def deps th then 

50755  673 
[] 
674 
else 

675 
deps 

676 
end 

48404  677 

50484
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

678 
fun prover_dependencies_of ctxt (params as {verbose, max_facts, ...}) prover 
50735
6b232d76cbc9
refined class handling, to prevent cycles in fact graph
blanchet
parents:
50732
diff
changeset

679 
auto_level facts name_tabs th = 
6b232d76cbc9
refined class handling, to prevent cycles in fact graph
blanchet
parents:
50732
diff
changeset

680 
case isar_dependencies_of name_tabs th of 
50754
74a6adcb96ac
also generate queries for goals with too many Isar dependencies
blanchet
parents:
50751
diff
changeset

681 
[] => (false, []) 
48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

682 
 isar_deps => 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

683 
let 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

684 
val thy = Proof_Context.theory_of ctxt 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

685 
val goal = goal_of_thm thy th 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

686 
val (_, hyp_ts, concl_t) = ATP_Util.strip_subgoal ctxt goal 1 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

687 
val facts = facts > filter (fn (_, th') => thm_ord (th', th) = LESS) 
50624
4d0997abce79
improved thm order hack, in case the default names are overridden
blanchet
parents:
50623
diff
changeset

688 
fun nickify ((_, stature), th) = 
4d0997abce79
improved thm order hack, in case the default names are overridden
blanchet
parents:
50623
diff
changeset

689 
((fn () => nickname_of_thm th, stature), th) 
4d0997abce79
improved thm order hack, in case the default names are overridden
blanchet
parents:
50623
diff
changeset

690 
fun is_dep dep (_, th) = nickname_of_thm th = dep 
48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

691 
fun add_isar_dep facts dep accum = 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

692 
if exists (is_dep dep) accum then 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

693 
accum 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

694 
else case find_first (is_dep dep) facts of 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

695 
SOME ((name, status), th) => accum @ [((name, status), th)] 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

696 
 NONE => accum (* shouldn't happen *) 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

697 
val facts = 
50484
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

698 
facts 
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

699 
> mepo_suggested_facts ctxt params prover 
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

700 
(max_facts > the_default prover_dependency_default_max_facts) 
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

701 
NONE hyp_ts concl_t 
50754
74a6adcb96ac
also generate queries for goals with too many Isar dependencies
blanchet
parents:
50751
diff
changeset

702 
> fold (add_isar_dep facts) isar_deps 
50624
4d0997abce79
improved thm order hack, in case the default names are overridden
blanchet
parents:
50623
diff
changeset

703 
> map nickify 
48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

704 
in 
48404  705 
if verbose andalso auto_level = 0 then 
48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

706 
let val num_facts = length facts in 
50624
4d0997abce79
improved thm order hack, in case the default names are overridden
blanchet
parents:
50623
diff
changeset

707 
"MaSh: " ^ quote prover ^ " on " ^ quote (nickname_of_thm th) ^ 
48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

708 
" with " ^ string_of_int num_facts ^ " fact" ^ plural_s num_facts ^ 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

709 
"." 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

710 
> Output.urgent_message 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

711 
end 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

712 
else 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

713 
(); 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

714 
case run_prover_for_mash ctxt params prover facts goal of 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

715 
{outcome = NONE, used_facts, ...} => 
48404  716 
(if verbose andalso auto_level = 0 then 
48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

717 
let val num_facts = length used_facts in 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

718 
"Found proof with " ^ string_of_int num_facts ^ " fact" ^ 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

719 
plural_s num_facts ^ "." 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

720 
> Output.urgent_message 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

721 
end 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

722 
else 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

723 
(); 
50754
74a6adcb96ac
also generate queries for goals with too many Isar dependencies
blanchet
parents:
50751
diff
changeset

724 
(true, map fst used_facts)) 
48665  725 
 _ => (false, isar_deps) 
48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

726 
end 
48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

727 

6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

728 

6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

729 
(*** Highlevel communication with MaSh ***) 
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

730 

48407  731 
fun num_keys keys = Graph.Keys.fold (K (Integer.add 1)) keys 0 
48400  732 

50610  733 
fun maximal_in_graph access_G facts = 
48316
252f45c04042
drastic overhaul of MaSh data structures + fixed a few performance issues
blanchet
parents:
48315
diff
changeset

734 
let 
50624
4d0997abce79
improved thm order hack, in case the default names are overridden
blanchet
parents:
50623
diff
changeset

735 
val facts = [] > fold (cons o nickname_of_thm o snd) facts 
48407  736 
val tab = Symtab.empty > fold (fn name => Symtab.default (name, ())) facts 
737 
fun insert_new seen name = 

738 
not (Symtab.defined seen name) ? insert (op =) name 

739 
fun find_maxes _ (maxs, []) = map snd maxs 

740 
 find_maxes seen (maxs, new :: news) = 

741 
find_maxes 

50610  742 
(seen > num_keys (Graph.imm_succs access_G new) > 1 
48407  743 
? Symtab.default (new, ())) 
744 
(if Symtab.defined tab new then 

745 
let 

50610  746 
val newp = Graph.all_preds access_G [new] 
48407  747 
fun is_ancestor x yp = member (op =) yp x 
748 
val maxs = 

749 
maxs > filter (fn (_, max) => not (is_ancestor max newp)) 

750 
in 

751 
if exists (is_ancestor new o fst) maxs then 

752 
(maxs, news) 

753 
else 

754 
((newp, new) 

755 
:: filter_out (fn (_, max) => is_ancestor max newp) maxs, 

756 
news) 

757 
end 

758 
else 

759 
(maxs, Graph.Keys.fold (insert_new seen) 

50610  760 
(Graph.imm_preds access_G new) news)) 
761 
in find_maxes Symtab.empty ([], Graph.maximals access_G) end 

48316
252f45c04042
drastic overhaul of MaSh data structures + fixed a few performance issues
blanchet
parents:
48315
diff
changeset

762 

50610  763 
fun is_fact_in_graph access_G (_, th) = 
50624
4d0997abce79
improved thm order hack, in case the default names are overridden
blanchet
parents:
50623
diff
changeset

764 
can (Graph.get_node access_G) (nickname_of_thm th) 
48320
891a24a48155
improved meshing of MaSh and MengPaulson if some MaSh suggestions are cutoff (the common case)
blanchet
parents:
48319
diff
changeset

765 

50383
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

766 
val weight_raw_mash_facts = weight_mepo_facts 
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

767 
val weight_mash_facts = weight_raw_mash_facts 
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

768 

4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

769 
(* FUDGE *) 
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

770 
fun weight_of_proximity_fact rank = 
50398  771 
Math.pow (1.3, 15.5  0.2 * Real.fromInt rank) + 15.0 
50383
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

772 

4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

773 
fun weight_proximity_facts facts = 
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

774 
facts ~~ map weight_of_proximity_fact (0 upto length facts  1) 
50382  775 

50440
ca99c269ca3a
don't have MaSh pretend it knows facts it doesn't know
blanchet
parents:
50438
diff
changeset

776 
val max_proximity_facts = 100 
ca99c269ca3a
don't have MaSh pretend it knows facts it doesn't know
blanchet
parents:
50438
diff
changeset

777 

ca99c269ca3a
don't have MaSh pretend it knows facts it doesn't know
blanchet
parents:
50438
diff
changeset

778 
fun find_mash_suggestions max_facts suggs facts chained raw_unknown = 
50412  779 
let 
780 
val raw_mash = 

781 
facts > find_suggested_facts suggs 

782 
(* The weights currently returned by "mash.py" are too spaced out to 

783 
make any sense. *) 

784 
> map fst 

50440
ca99c269ca3a
don't have MaSh pretend it knows facts it doesn't know
blanchet
parents:
50438
diff
changeset

785 
val proximity = 
ca99c269ca3a
don't have MaSh pretend it knows facts it doesn't know
blanchet
parents:
50438
diff
changeset

786 
facts > sort (thm_ord o pairself snd o swap) 
ca99c269ca3a
don't have MaSh pretend it knows facts it doesn't know
blanchet
parents:
50438
diff
changeset

787 
> take max_proximity_facts 
50412  788 
val mess = 
50858  789 
[(0.8 (* FUDGE *), (weight_raw_mash_facts raw_mash, raw_unknown)), 
790 
(0.2 (* FUDGE *), (weight_proximity_facts proximity, []))] 

50440
ca99c269ca3a
don't have MaSh pretend it knows facts it doesn't know
blanchet
parents:
50438
diff
changeset

791 
val unknown = 
50858  792 
raw_unknown > subtract (Thm.eq_thm_prop o pairself snd) proximity 
50825
aed1d7242050
always compare theorem using the same, weaker function
blanchet
parents:
50814
diff
changeset

793 
in (mesh_facts (Thm.eq_thm_prop o pairself snd) max_facts mess, unknown) end 
50412  794 

50858  795 
fun mash_suggested_facts ctxt ({overlord, learn, ...} : params) prover max_facts 
796 
hyp_ts concl_t facts = 

48301  797 
let 
48302  798 
val thy = Proof_Context.theory_of ctxt 
50826  799 
val chained = facts > filter (fn ((_, (scope, _)), _) => scope = Chained) 
50610  800 
val (access_G, suggs) = 
801 
peek_state ctxt (fn {access_G, ...} => 

802 
if Graph.is_empty access_G then 

803 
(access_G, []) 

48434
aaaec69db3db
ensure all calls to "mash" program are synchronous
blanchet
parents:
48433
diff
changeset

804 
else 
aaaec69db3db
ensure all calls to "mash" program are synchronous
blanchet
parents:
48433
diff
changeset

805 
let 
50610  806 
val parents = maximal_in_graph access_G facts 
48434
aaaec69db3db
ensure all calls to "mash" program are synchronous
blanchet
parents:
48433
diff
changeset

807 
val feats = 
aaaec69db3db
ensure all calls to "mash" program are synchronous
blanchet
parents:
48433
diff
changeset

808 
features_of ctxt prover thy (Local, General) (concl_t :: hyp_ts) 
50826  809 
val hints = map (nickname_of_thm o snd) chained 
48434
aaaec69db3db
ensure all calls to "mash" program are synchronous
blanchet
parents:
48433
diff
changeset

810 
in 
50826  811 
(access_G, 
50858  812 
MaSh.suggest ctxt overlord learn max_facts 
813 
(parents, feats, hints)) 

48434
aaaec69db3db
ensure all calls to "mash" program are synchronous
blanchet
parents:
48433
diff
changeset

814 
end) 
50610  815 
val unknown = facts > filter_out (is_fact_in_graph access_G) 
50412  816 
in find_mash_suggestions max_facts suggs facts chained unknown end 
48249  817 

50631  818 
fun learn_wrt_access_graph ctxt (name, parents, feats, deps) (learns, graph) = 
48316
252f45c04042
drastic overhaul of MaSh data structures + fixed a few performance issues
blanchet
parents:
48315
diff
changeset

819 
let 
50631  820 
fun maybe_learn_from from (accum as (parents, graph)) = 
48321  821 
try_graph ctxt "updating graph" accum (fn () => 
822 
(from :: parents, Graph.add_edge_acyclic (from, name) graph)) 

48669
cdcdb0547f29
remember ATP flops to avoid repeating them too quickly
blanchet
parents:
48668
diff
changeset

823 
val graph = graph > Graph.default_node (name, Isar_Proof) 
50631  824 
val (parents, graph) = ([], graph) > fold maybe_learn_from parents 
825 
val (deps, _) = ([], graph) > fold maybe_learn_from deps 

826 
in ((name, parents, feats, deps) :: learns, graph) end 

48306  827 

50631  828 
fun relearn_wrt_access_graph ctxt (name, deps) (relearns, graph) = 
48668
5d63c23b4042
remember which MaSh proofs were found using ATPs
blanchet
parents:
48667
diff
changeset

829 
let 
50631  830 
fun maybe_relearn_from from (accum as (parents, graph)) = 
48668
5d63c23b4042
remember which MaSh proofs were found using ATPs
blanchet
parents:
48667
diff
changeset

831 
try_graph ctxt "updating graph" accum (fn () => 
5d63c23b4042
remember which MaSh proofs were found using ATPs
blanchet
parents:
48667
diff
changeset

832 
(from :: parents, Graph.add_edge_acyclic (from, name) graph)) 
50610  833 
val graph = graph > update_access_graph_node (name, Automatic_Proof) 
50631  834 
val (deps, _) = ([], graph) > fold maybe_relearn_from deps 
835 
in ((name, deps) :: relearns, graph) end 

48668
5d63c23b4042
remember which MaSh proofs were found using ATPs
blanchet
parents:
48667
diff
changeset

836 

50610  837 
fun flop_wrt_access_graph name = 
838 
update_access_graph_node (name, Isar_Proof_wegen_Prover_Flop) 

48669
cdcdb0547f29
remember ATP flops to avoid repeating them too quickly
blanchet
parents:
48668
diff
changeset

839 

48384
83dc102041e6
learn on explicit "min" command but do the learning in a thread, since it may take a couple of seconds
blanchet
parents:
48383
diff
changeset

840 
val learn_timeout_slack = 2.0 
48318  841 

48384
83dc102041e6
learn on explicit "min" command but do the learning in a thread, since it may take a couple of seconds
blanchet
parents:
48383
diff
changeset

842 
fun launch_thread timeout task = 
48383  843 
let 
48384
83dc102041e6
learn on explicit "min" command but do the learning in a thread, since it may take a couple of seconds
blanchet
parents:
48383
diff
changeset

844 
val hard_timeout = time_mult learn_timeout_slack timeout 
83dc102041e6
learn on explicit "min" command but do the learning in a thread, since it may take a couple of seconds
blanchet
parents:
48383
diff
changeset

845 
val birth_time = Time.now () 
83dc102041e6
learn on explicit "min" command but do the learning in a thread, since it may take a couple of seconds
blanchet
parents:
48383
diff
changeset

846 
val death_time = Time.+ (birth_time, hard_timeout) 
48442  847 
val desc = ("Machine learner for Sledgehammer", "") 
48384
83dc102041e6
learn on explicit "min" command but do the learning in a thread, since it may take a couple of seconds
blanchet
parents:
48383
diff
changeset

848 
in Async_Manager.launch MaShN birth_time death_time desc task end 
83dc102041e6
learn on explicit "min" command but do the learning in a thread, since it may take a couple of seconds
blanchet
parents:
48383
diff
changeset

849 

83dc102041e6
learn on explicit "min" command but do the learning in a thread, since it may take a couple of seconds
blanchet
parents:
48383
diff
changeset

850 
fun mash_learn_proof ctxt ({overlord, timeout, ...} : params) prover t facts 
83dc102041e6
learn on explicit "min" command but do the learning in a thread, since it may take a couple of seconds
blanchet
parents:
48383
diff
changeset

851 
used_ths = 
83dc102041e6
learn on explicit "min" command but do the learning in a thread, since it may take a couple of seconds
blanchet
parents:
48383
diff
changeset

852 
if is_smt_prover ctxt prover then 
83dc102041e6
learn on explicit "min" command but do the learning in a thread, since it may take a couple of seconds
blanchet
parents:
48383
diff
changeset

853 
() 
83dc102041e6
learn on explicit "min" command but do the learning in a thread, since it may take a couple of seconds
blanchet
parents:
48383
diff
changeset

854 
else 
50557  855 
launch_thread (timeout > the_default one_day) (fn () => 
48403
1f214c653c80
don't store fresh names in fact graph, since these cannot be the parents of any other facts
blanchet
parents:
48401
diff
changeset

856 
let 
1f214c653c80
don't store fresh names in fact graph, since these cannot be the parents of any other facts
blanchet
parents:
48401
diff
changeset

857 
val thy = Proof_Context.theory_of ctxt 
1f214c653c80
don't store fresh names in fact graph, since these cannot be the parents of any other facts
blanchet
parents:
48401
diff
changeset

858 
val name = freshish_name () 
1f214c653c80
don't store fresh names in fact graph, since these cannot be the parents of any other facts
blanchet
parents:
48401
diff
changeset

859 
val feats = features_of ctxt prover thy (Local, General) [t] 
50624
4d0997abce79
improved thm order hack, in case the default names are overridden
blanchet
parents:
50623
diff
changeset

860 
val deps = used_ths > map nickname_of_thm 
48403
1f214c653c80
don't store fresh names in fact graph, since these cannot be the parents of any other facts
blanchet
parents:
48401
diff
changeset

861 
in 
50610  862 
peek_state ctxt (fn {access_G, ...} => 
863 
let val parents = maximal_in_graph access_G facts in 

50632
12c097ff3241
slightly more elegant naming convention (to keep lowlevel and highlevel APIs separated)
blanchet
parents:
50631
diff
changeset

864 
MaSh.learn ctxt overlord [(name, parents, feats, deps)] 
48434
aaaec69db3db
ensure all calls to "mash" program are synchronous
blanchet
parents:
48433
diff
changeset

865 
end); 
aaaec69db3db
ensure all calls to "mash" program are synchronous
blanchet
parents:
48433
diff
changeset

866 
(true, "") 
48403
1f214c653c80
don't store fresh names in fact graph, since these cannot be the parents of any other facts
blanchet
parents:
48401
diff
changeset

867 
end) 
48383  868 

50450
358b6020f8b6
generalized notion of active area, where sendback is just one application;
wenzelm
parents:
50440
diff
changeset

869 
fun sendback sub = Active.sendback_markup (sledgehammerN ^ " " ^ sub) 
48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

870 

ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

871 
val commit_timeout = seconds 30.0 
48332
271a4a6af734
optimize parent computation in MaSh + remove temporary files
blanchet
parents:
48330
diff
changeset

872 

50485
3c6ac2da2f45
merge aliased theorems in MaSh dependencies, modulo symmetry of equality
blanchet
parents:
50484
diff
changeset

873 
(* The timeout is understood in a very relaxed fashion. *) 
48404  874 
fun mash_learn_facts ctxt (params as {debug, verbose, overlord, ...}) prover 
50484
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

875 
auto_level run_prover learn_timeout facts = 
48304  876 
let 
48318  877 
val timer = Timer.startRealTimer () 
48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

878 
fun next_commit_time () = 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

879 
Time.+ (Timer.checkRealTimer timer, commit_timeout) 
50610  880 
val {access_G, ...} = peek_state ctxt I 
50485
3c6ac2da2f45
merge aliased theorems in MaSh dependencies, modulo symmetry of equality
blanchet
parents:
50484
diff
changeset

881 
val facts = facts > sort (thm_ord o pairself snd) 
48400  882 
val (old_facts, new_facts) = 
50610  883 
facts > List.partition (is_fact_in_graph access_G) 
48308  884 
in 
50484
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

885 
if null new_facts andalso (not run_prover orelse null old_facts) then 
48404  886 
if auto_level < 2 then 
50484
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

887 
"No new " ^ (if run_prover then "automatic" else "Isar") ^ 
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

888 
" proofs to learn." ^ 
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

889 
(if auto_level = 0 andalso not run_prover then 
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

890 
"\n\nHint: Try " ^ sendback learn_proverN ^ 
50751  891 
" to learn from an automatic prover." 
48404  892 
else 
893 
"") 

48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

894 
else 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

895 
"" 
48308  896 
else 
48304  897 
let 
50735
6b232d76cbc9
refined class handling, to prevent cycles in fact graph
blanchet
parents:
50732
diff
changeset

898 
val name_tabs = build_name_tables nickname_of_thm facts 
48439
67a6bcbd3587
removed MaSh junk arising from primrec definitions
blanchet
parents:
48438
diff
changeset

899 
fun deps_of status th = 
67a6bcbd3587
removed MaSh junk arising from primrec definitions
blanchet
parents:
48438
diff
changeset

900 
if status = Non_Rec_Def orelse status = Rec_Def then 
67a6bcbd3587
removed MaSh junk arising from primrec definitions
blanchet
parents:
48438
diff
changeset

901 
SOME [] 
50484
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

902 
else if run_prover then 
50735
6b232d76cbc9
refined class handling, to prevent cycles in fact graph
blanchet
parents:
50732
diff
changeset

903 
prover_dependencies_of ctxt params prover auto_level facts name_tabs 
50484
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

904 
th 
50754
74a6adcb96ac
also generate queries for goals with too many Isar dependencies
blanchet
parents:
50751
diff
changeset

905 
> (fn (false, _) => NONE 
74a6adcb96ac
also generate queries for goals with too many Isar dependencies
blanchet
parents:
50751
diff
changeset

906 
 (true, deps) => trim_dependencies th deps) 
48404  907 
else 
50735
6b232d76cbc9
refined class handling, to prevent cycles in fact graph
blanchet
parents:
50732
diff
changeset

908 
isar_dependencies_of name_tabs th 
50754
74a6adcb96ac
also generate queries for goals with too many Isar dependencies
blanchet
parents:
50751
diff
changeset

909 
> trim_dependencies th 
48669
cdcdb0547f29
remember ATP flops to avoid repeating them too quickly
blanchet
parents:
48668
diff
changeset

910 
fun do_commit [] [] [] state = state 
50631  911 
 do_commit learns relearns flops {access_G, dirty} = 
48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

912 
let 
50610  913 
val was_empty = Graph.is_empty access_G 
50631  914 
val (learns, access_G) = 
915 
([], access_G) > fold (learn_wrt_access_graph ctxt) learns 

916 
val (relearns, access_G) = 

917 
([], access_G) > fold (relearn_wrt_access_graph ctxt) relearns 

50610  918 
val access_G = access_G > fold flop_wrt_access_graph flops 
48699  919 
val dirty = 
50631  920 
case (was_empty, dirty, relearns) of 
921 
(false, SOME names, []) => SOME (map #1 learns @ names) 

48699  922 
 _ => NONE 
48404  923 
in 
50632
12c097ff3241
slightly more elegant naming convention (to keep lowlevel and highlevel APIs separated)
blanchet
parents:
50631
diff
changeset

924 
MaSh.learn ctxt overlord (rev learns); 
12c097ff3241
slightly more elegant naming convention (to keep lowlevel and highlevel APIs separated)
blanchet
parents:
50631
diff
changeset

925 
MaSh.relearn ctxt overlord relearns; 
50610  926 
{access_G = access_G, dirty = dirty} 
48404  927 
end 
50631  928 
fun commit last learns relearns flops = 
48404  929 
(if debug andalso auto_level = 0 then 
930 
Output.urgent_message "Committing..." 

931 
else 

932 
(); 

50631  933 
map_state ctxt (do_commit (rev learns) relearns flops); 
48404  934 
if not last andalso auto_level = 0 then 
50631  935 
let val num_proofs = length learns + length relearns in 
48404  936 
"Learned " ^ string_of_int num_proofs ^ " " ^ 
50484
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

937 
(if run_prover then "automatic" else "Isar") ^ " proof" ^ 
48404  938 
plural_s num_proofs ^ " in the last " ^ 
48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

939 
string_from_time commit_timeout ^ "." 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

940 
> Output.urgent_message 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

941 
end 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

942 
else 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

943 
()) 
48404  944 
fun learn_new_fact _ (accum as (_, (_, _, _, true))) = accum 
48439
67a6bcbd3587
removed MaSh junk arising from primrec definitions
blanchet
parents:
48438
diff
changeset

945 
 learn_new_fact ((_, stature as (_, status)), th) 
50631  946 
(learns, (parents, n, next_commit, _)) = 
48318  947 
let 
50624
4d0997abce79
improved thm order hack, in case the default names are overridden
blanchet
parents:
50623
diff
changeset

948 
val name = nickname_of_thm th 
48332
271a4a6af734
optimize parent computation in MaSh + remove temporary files
blanchet
parents:
48330
diff
changeset

949 
val feats = 
48385  950 
features_of ctxt prover (theory_of_thm th) stature [prop_of th] 
48439
67a6bcbd3587
removed MaSh junk arising from primrec definitions
blanchet
parents:
48438
diff
changeset

951 
val deps = deps_of status th > these 
48394
82fc8c956cdc
fixed various issues with MaSh's file handling + tune output + generate local facts again + handle nameless facts gracefully
blanchet
parents:
48392
diff
changeset

952 
val n = n > not (null deps) ? Integer.add 1 
50631  953 
val learns = (name, parents, feats, deps) :: learns 
954 
val (learns, next_commit) = 

48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

955 
if Time.> (Timer.checkRealTimer timer, next_commit) then 
50631  956 
(commit false learns [] []; ([], next_commit_time ())) 
48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

957 
else 
50631  958 
(learns, next_commit) 
50557  959 
val timed_out = 
960 
case learn_timeout of 

961 
SOME timeout => Time.> (Timer.checkRealTimer timer, timeout) 

962 
 NONE => false 

50631  963 
in (learns, ([name], n, next_commit, timed_out)) end 
48404  964 
val n = 
965 
if null new_facts then 

966 
0 

967 
else 

968 
let 

969 
val last_th = new_facts > List.last > snd 

970 
(* crude approximation *) 

971 
val ancestors = 

972 
old_facts 

973 
> filter (fn (_, th) => thm_ord (th, last_th) <> GREATER) 

50610  974 
val parents = maximal_in_graph access_G ancestors 
50631  975 
val (learns, (_, n, _, _)) = 
48404  976 
([], (parents, 0, next_commit_time (), false)) 
977 
> fold learn_new_fact new_facts 

50631  978 
in commit true learns [] []; n end 
48404  979 
fun relearn_old_fact _ (accum as (_, (_, _, true))) = accum 
48439
67a6bcbd3587
removed MaSh junk arising from primrec definitions
blanchet
parents:
48438
diff
changeset

980 
 relearn_old_fact ((_, (_, status)), th) 
50631  981 
((relearns, flops), (n, next_commit, _)) = 
48404  982 
let 
50624
4d0997abce79
improved thm order hack, in case the default names are overridden
blanchet
parents:
50623
diff
changeset

983 
val name = nickname_of_thm th 
50631  984 
val (n, relearns, flops) = 
48439
67a6bcbd3587
removed MaSh junk arising from primrec definitions
blanchet
parents:
48438
diff
changeset

985 
case deps_of status th of 
50631  986 
SOME deps => (n + 1, (name, deps) :: relearns, flops) 
987 
 NONE => (n, relearns, name :: flops) 

988 
val (relearns, flops, next_commit) = 

48404  989 
if Time.> (Timer.checkRealTimer timer, next_commit) then 
50631  990 
(commit false [] relearns flops; 
991 
([], [], next_commit_time ())) 

48404  992 
else 
50631  993 
(relearns, flops, next_commit) 
50557  994 
val timed_out = 
995 
case learn_timeout of 

996 
SOME timeout => Time.> (Timer.checkRealTimer timer, timeout) 

997 
 NONE => false 

50631  998 
in ((relearns, flops), (n, next_commit, timed_out)) end 
48404  999 
val n = 
50484
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

1000 
if not run_prover orelse null old_facts then 
48404  1001 
n 
1002 
else 

1003 
let 

48668
5d63c23b4042
remember which MaSh proofs were found using ATPs
blanchet
parents:
48667
diff
changeset

1004 
val max_isar = 1000 * max_dependencies 
48669
cdcdb0547f29
remember ATP flops to avoid repeating them too quickly
blanchet
parents:
48668
diff
changeset

1005 
fun kind_of_proof th = 
50624
4d0997abce79
improved thm order hack, in case the default names are overridden
blanchet
parents:
50623
diff
changeset

1006 
try (Graph.get_node access_G) (nickname_of_thm th) 
48669
cdcdb0547f29
remember ATP flops to avoid repeating them too quickly
blanchet
parents:
48668
diff
changeset

1007 
> the_default Isar_Proof 
48406  1008 
fun priority_of (_, th) = 
48668
5d63c23b4042
remember which MaSh proofs were found using ATPs
blanchet
parents:
48667
diff
changeset

1009 
random_range 0 max_isar 
48669
cdcdb0547f29
remember ATP flops to avoid repeating them too quickly
blanchet
parents:
48668
diff
changeset

1010 
+ (case kind_of_proof th of 
cdcdb0547f29
remember ATP flops to avoid repeating them too quickly
blanchet
parents:
48668
diff
changeset

1011 
Isar_Proof => 0 
50484
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

1012 
 Automatic_Proof => 2 * max_isar 
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

1013 
 Isar_Proof_wegen_Prover_Flop => max_isar) 
50754
74a6adcb96ac
also generate queries for goals with too many Isar dependencies
blanchet
parents:
50751
diff
changeset

1014 
 500 * length (isar_dependencies_of name_tabs th) 
48404  1015 
val old_facts = 
48406  1016 
old_facts > map (`priority_of) 
48404  1017 
> sort (int_ord o pairself fst) 
1018 
> map snd 

50631  1019 
val ((relearns, flops), (n, _, _)) = 
48669
cdcdb0547f29
remember ATP flops to avoid repeating them too quickly
blanchet
parents:
48668
diff
changeset

1020 
(([], []), (n, next_commit_time (), false)) 
48404  1021 
> fold relearn_old_fact old_facts 
50631  1022 
in commit true [] relearns flops; n end 
48318  1023 
in 
48404  1024 
if verbose orelse auto_level < 2 then 
1025 
"Learned " ^ string_of_int n ^ " nontrivial " ^ 

50484
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

1026 
(if run_prover then "automatic" else "Isar") ^ " proof" ^ plural_s n ^ 
48319
340187063d84
use async manager to manage MaSh learners to make sure they get killed cleanly
blanchet
parents:
48318
diff
changeset

1027 
(if verbose then 
340187063d84
use async manager to manage MaSh learners to make sure they get killed cleanly
blanchet
parents:
48318
diff
changeset

1028 
" in " ^ string_from_time (Timer.checkRealTimer timer) 
340187063d84
use async manager to manage MaSh learners to make sure they get killed cleanly
blanchet
parents:
48318
diff
changeset

1029 
else 
340187063d84
use async manager to manage MaSh learners to make sure they get killed cleanly
blanchet
parents:
48318
diff
changeset

1030 
"") ^ "." 
340187063d84
use async manager to manage MaSh learners to make sure they get killed cleanly
blanchet
parents:
48318
diff
changeset

1031 
else 
340187063d84
use async manager to manage MaSh learners to make sure they get killed cleanly
blanchet
parents:
48318
diff
changeset

1032 
"" 
48318  1033 
end 
48308  1034 
end 
48304  1035 

48404  1036 
fun mash_learn ctxt (params as {provers, timeout, ...}) fact_override chained 
50484
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

1037 
run_prover = 
48316
252f45c04042
drastic overhaul of MaSh data structures + fixed a few performance issues
blanchet
parents:
48315
diff
changeset

1038 
let 
48396  1039 
val css = Sledgehammer_Fact.clasimpset_rule_table_of ctxt 
48395
85a7fb65507a
learning should honor the fact override and the chained facts
blanchet
parents:
48394
diff
changeset

1040 
val ctxt = ctxt > Config.put instantiate_inducts false 
85a7fb65507a
learning should honor the fact override and the chained facts
blanchet
parents:
48394
diff
changeset

1041 
val facts = 
48396  1042 
nearly_all_facts ctxt false fact_override Symtab.empty css chained [] 
1043 
@{prop True} 

48404  1044 
val num_facts = length facts 
1045 
val prover = hd provers 

50484
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

1046 
fun learn auto_level run_prover = 
50557  1047 
mash_learn_facts ctxt params prover auto_level run_prover NONE facts 
48404  1048 
> Output.urgent_message 
48316
252f45c04042
drastic overhaul of MaSh data structures + fixed a few performance issues
blanchet
parents:
48315
diff
changeset

1049 
in 
50484
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

1050 
if run_prover then 
50340
72519bf5f135
simplify MaSh term patterns + add missing global facts if there aren't too many
blanchet
parents:
50339
diff
changeset

1051 
("MaShing through " ^ string_of_int num_facts ^ " fact" ^ 
50484
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

1052 
plural_s num_facts ^ " for automatic proofs (" ^ quote prover ^ 
50557  1053 
(case timeout of 
1054 
SOME timeout => " timeout: " ^ string_from_time timeout 

1055 
 NONE => "") ^ ").\n\nCollecting Isar proofs first..." 

50340
72519bf5f135
simplify MaSh term patterns + add missing global facts if there aren't too many
blanchet
parents:
50339
diff
changeset

1056 
> Output.urgent_message; 
72519bf5f135
simplify MaSh term patterns + add missing global facts if there aren't too many
blanchet
parents:
50339
diff
changeset

1057 
learn 1 false; 
50484
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

1058 
"Now collecting automatic proofs. This may take several hours. You can \ 
50340
72519bf5f135
simplify MaSh term patterns + add missing global facts if there aren't too many
blanchet
parents:
50339
diff
changeset

1059 
\safely stop the learning process at any point." 
72519bf5f135
simplify MaSh term patterns + add missing global facts if there aren't too many
blanchet
parents:
50339
diff
changeset

1060 
> Output.urgent_message; 
72519bf5f135
simplify MaSh term patterns + add missing global facts if there aren't too many
blanchet
parents:
50339
diff
changeset

1061 
learn 0 true) 
72519bf5f135
simplify MaSh term patterns + add missing global facts if there aren't too many
blanchet
parents:
50339
diff
changeset

1062 
else 
72519bf5f135
simplify MaSh term patterns + add missing global facts if there aren't too many
blanchet
parents:
50339
diff
changeset

1063 
("MaShing through " ^ string_of_int num_facts ^ " fact" ^ 
72519bf5f135
simplify MaSh term patterns + add missing global facts if there aren't too many
blanchet
parents:
50339
diff
changeset

1064 
plural_s num_facts ^ " for Isar proofs..." 
72519bf5f135
simplify MaSh term patterns + add missing global facts if there aren't too many
blanchet
parents:
50339
diff
changeset

1065 
> Output.urgent_message; 
72519bf5f135
simplify MaSh term patterns + add missing global facts if there aren't too many
blanchet
parents:
50339
diff
changeset

1066 
learn 0 false) 
48316
252f45c04042
drastic overhaul of MaSh data structures + fixed a few performance issues
blanchet
parents:
48315
diff
changeset

1067 
end 
48249  1068 

50311  1069 
fun is_mash_enabled () = (getenv "MASH" = "yes") 
50570  1070 
fun mash_can_suggest_facts ctxt = 
50610  1071 
not (Graph.is_empty (#access_G (peek_state ctxt I))) 
50311  1072 

50383
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

1073 
(* Generate more suggestions than requested, because some might be thrown out 
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

1074 
later for various reasons. *) 
50718  1075 
fun generous_max_facts max_facts = max_facts + Int.min (50, max_facts div 2) 
50383
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset

1076 

50814  1077 
val mepo_weight = 0.5 
1078 
val mash_weight = 0.5 

1079 

48318  1080 
(* The threshold should be large enough so that MaSh doesn't kick in for Auto 
1081 
Sledgehammer and Try. *) 

1082 
val min_secs_for_learning = 15 

1083 

48321  1084 
fun relevant_facts ctxt (params as {learn, fact_filter, timeout, ...}) prover 
1085 
max_facts ({add, only, ...} : fact_override) hyp_ts concl_t facts = 

48314
ee33ba3c0e05
added option to control which fact filter is used
blanchet
parents:
48313
diff
changeset

1086 
if not (subset (op =) (the_list fact_filter, fact_filters)) then 
ee33ba3c0e05
added option to control which fact filter is used
blanchet
parents:
48313
diff
changeset

1087 
error ("Unknown fact filter: " ^ quote (the fact_filter) ^ ".") 
ee33ba3c0e05
added option to control which fact filter is used
blanchet
parents:
48313
diff
changeset

1088 
else if only then 
48289  1089 
facts 
48321  1090 
else if max_facts <= 0 orelse null facts then 
48288
255c6e1fd505
rationalize relevance filter, slowing moving code from Iter to MaSh
blanchet
parents:
48251
diff
changeset

1091 
[] 
255c6e1fd505
rationalize relevance filter, slowing moving code from Iter to MaSh
blanchet
parents:
48251
diff
changeset

1092 
else 
255c6e1fd505
rationalize relevance filter, slowing moving code from Iter to MaSh
blanchet
parents:
48251
diff
changeset

1093 
let 
48327
568b3193e53e
don't include hidden facts in relevance filter + tweak MaSh learning
blanchet
parents:
48326
diff
changeset

1094 
fun maybe_learn () = 
48384
83dc102041e6
learn on explicit "min" command but do the learning in a thread, since it may take a couple of seconds
blanchet
parents:
48383
diff
changeset

1095 
if learn andalso not (Async_Manager.has_running_threads MaShN) andalso 
50557  1096 
(timeout = NONE orelse 
1097 
Time.toSeconds (the timeout) >= min_secs_for_learning) then 

1098 
let 

1099 
val timeout = Option.map (time_mult learn_timeout_slack) timeout 

1100 
in 

1101 
launch_thread (timeout > the_default one_day) 

48404  1102 
(fn () => (true, mash_learn_facts ctxt params prover 2 false 
48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

1103 
timeout facts)) 
48319
340187063d84
use async manager to manage MaSh learners to make sure they get killed cleanly
blanchet
parents:
48318
diff
changeset

1104 
end 
48318  1105 
else 
1106 
() 

48314
ee33ba3c0e05
added option to control which fact filter is used
blanchet
parents:
48313
diff
changeset

1107 
val fact_filter = 
ee33ba3c0e05
added option to control which fact filter is used
blanchet
parents:
48313
diff
changeset

1108 
case fact_filter of 
48379
2b5ad61e2ccc
renamed "iter" fact filter to "MePo" (MengPaulson)
blanchet
parents:
48378
diff
changeset

1109 
SOME ff => (() > ff <> mepoN ? maybe_learn; ff) 
48318  1110 
 NONE => 
48407  1111 
if is_smt_prover ctxt prover then 
1112 
mepoN 

50229  1113 
else if is_mash_enabled () then 
48407  1114 
(maybe_learn (); 
1115 
if mash_can_suggest_facts ctxt then meshN else mepoN) 

1116 
else 

1117 
mepoN 

48288
255c6e1fd505
rationalize relevance filter, slowing moving code from Iter to MaSh
blanchet
parents:
48251
diff
changeset

1118 
val add_ths = Attrib.eval_thms ctxt add 
48292  1119 
fun prepend_facts ths accepts = 
48288
255c6e1fd505
rationalize relevance filter, slowing moving code from Iter to MaSh
blanchet
parents:
48251
diff
changeset

1120 
((facts > filter (member Thm.eq_thm_prop ths o snd)) @ 
48292  1121 
(accepts > filter_out (member Thm.eq_thm_prop ths o snd))) 
48293  1122 
> take max_facts 
48406  1123 
fun mepo () = 
50382  1124 
mepo_suggested_facts ctxt params prover max_facts NONE hyp_ts concl_t 
1125 
facts 

1126 
> weight_mepo_facts 

48314
ee33ba3c0e05
added option to control which fact filter is used
blanchet
parents:
48313
diff
changeset

1127 
fun mash () = 
50383
4274b25ff4e7
take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents:
50382
diff
changeset
