author  blanchet 
Tue, 01 Jul 2014 16:47:10 +0200  
changeset 57461  29efe682335b 
parent 57460  9cc802a8ab06 
child 57462  dabd4516450d 
permissions  rwrr 
48380  1 
(* Title: HOL/Tools/Sledgehammer/sledgehammer_mash.ML 
48248  2 
Author: Jasmin Blanchette, TU Muenchen 
57009  3 
Author: Cezary Kaliszyk, University of Innsbruck 
48248  4 

5 
Sledgehammer's machinelearningbased relevance filter (MaSh). 

6 
*) 

7 

48381  8 
signature SLEDGEHAMMER_MASH = 
48248  9 
sig 
48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

10 
type stature = ATP_Problem_Generate.stature 
51004
5f2788c38127
distinguish raw and nonraw facts, using raw for 10 000s of facts and nonraw after selection of some hundreds
blanchet
parents:
51003
diff
changeset

11 
type raw_fact = Sledgehammer_Fact.raw_fact 
48296
e7f01b7e244e
gracefully handle the case of empty theories when going up the accessibility chain
blanchet
parents:
48293
diff
changeset

12 
type fact = Sledgehammer_Fact.fact 
e7f01b7e244e
gracefully handle the case of empty theories when going up the accessibility chain
blanchet
parents:
48293
diff
changeset

13 
type fact_override = Sledgehammer_Fact.fact_override 
55201  14 
type params = Sledgehammer_Prover.params 
15 
type prover_result = Sledgehammer_Prover.prover_result 

48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

16 

48308  17 
val trace : bool Config.T 
57150
f591096a9c94
add option to keep duplicates, for more precise evaluation of relevance filters
blanchet
parents:
57133
diff
changeset

18 
val duplicates : bool Config.T 
51008  19 
val MePoN : string 
48319
340187063d84
use async manager to manage MaSh learners to make sure they get killed cleanly
blanchet
parents:
48318
diff
changeset

20 
val MaShN : string 
51008  21 
val MeShN : string 
48379
2b5ad61e2ccc
renamed "iter" fact filter to "MePo" (MengPaulson)
blanchet
parents:
48378
diff
changeset

22 
val mepoN : string 
2b5ad61e2ccc
renamed "iter" fact filter to "MePo" (MengPaulson)
blanchet
parents:
48378
diff
changeset

23 
val mashN : string 
48314
ee33ba3c0e05
added option to control which fact filter is used
blanchet
parents:
48313
diff
changeset

24 
val meshN : string 
48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

25 
val unlearnN : string 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

26 
val learn_isarN : string 
50484
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

27 
val learn_proverN : string 
48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

28 
val relearn_isarN : string 
50484
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

29 
val relearn_proverN : string 
48314
ee33ba3c0e05
added option to control which fact filter is used
blanchet
parents:
48313
diff
changeset

30 
val fact_filters : string list 
50826  31 
val encode_str : string > string 
32 
val encode_strs : string list > string 

57005
33f3d2ea803d
store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents:
56995
diff
changeset

33 
val decode_str : string > string 
33f3d2ea803d
store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents:
56995
diff
changeset

34 
val decode_strs : string > string list 
50632
12c097ff3241
slightly more elegant naming convention (to keep lowlevel and highlevel APIs separated)
blanchet
parents:
50631
diff
changeset

35 

57278  36 
datatype mash_engine = 
57458  37 
MaSh_NB 
57459  38 
 MaSh_kNN 
39 
 MaSh_NB_kNN 

57458  40 
 MaSh_NB_Ext 
57431
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
blanchet
parents:
57409
diff
changeset

41 
 MaSh_kNN_Ext 
57106  42 

57120  43 
val is_mash_enabled : unit > bool 
44 
val the_mash_engine : unit > mash_engine 

45 

57459  46 
val mesh_facts : ('a * 'a > bool) > int > (real * (('a * real) list * 'a list)) list > 'a list 
50624
4d0997abce79
improved thm order hack, in case the default names are overridden
blanchet
parents:
50623
diff
changeset

47 
val nickname_of_thm : thm > string 
57006  48 
val find_suggested_facts : Proof.context > ('b * thm) list > string list > ('b * thm) list 
51135  49 
val crude_thm_ord : thm * thm > order 
51182
962190eab40d
provide two modes for MaSh driver: linearized or real visibility
blanchet
parents:
51181
diff
changeset

50 
val thm_less : thm * thm > bool 
48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

51 
val goal_of_thm : theory > thm > thm 
57006  52 
val run_prover_for_mash : Proof.context > params > string > string > fact list > thm > 
53 
prover_result 

57406  54 
val features_of : Proof.context > theory > stature > term list > string list 
51177  55 
val trim_dependencies : string list > string list option 
57306
ff10067b2248
optimized traversal of proof terms by skipping bad apples (e.g. full_exhaustive_int'.pinduct)
blanchet
parents:
57305
diff
changeset

56 
val isar_dependencies_of : string Symtab.table * string Symtab.table > thm > string list option 
57006  57 
val prover_dependencies_of : Proof.context > params > string > int > raw_fact list > 
58 
string Symtab.table * string Symtab.table > thm > bool * string list 

59 
val attach_parents_to_facts : ('a * thm) list > ('a * thm) list > 

60 
(string list * ('a * thm)) list 

53140
a1235e90da5f
pour extra features from proximate facts into goal, in exporter
blanchet
parents:
53137
diff
changeset

61 
val num_extra_feature_facts : int 
53141
d27e99a6a679
take chained and proximate facts into consideration when computing MaSh features
blanchet
parents:
53140
diff
changeset

62 
val extra_feature_factor : real 
53140
a1235e90da5f
pour extra features from proximate facts into goal, in exporter
blanchet
parents:
53137
diff
changeset

63 
val weight_facts_smoothly : 'a list > ('a * real) list 
a1235e90da5f
pour extra features from proximate facts into goal, in exporter
blanchet
parents:
53137
diff
changeset

64 
val weight_facts_steeply : 'a list > ('a * real) list 
57432  65 
val find_mash_suggestions : Proof.context > int > string list > ('a * thm) list > 
66 
('a * thm) list > ('a * thm) list > ('a * thm) list * ('a * thm) list 

57407
140e16bc2a40
use right theory name for theorems in evaluation driver
blanchet
parents:
57406
diff
changeset

67 
val mash_suggested_facts : Proof.context > theory > params > int > term list > term > 
140e16bc2a40
use right theory name for theorems in evaluation driver
blanchet
parents:
57406
diff
changeset

68 
raw_fact list > fact list * fact list 
57431
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
blanchet
parents:
57409
diff
changeset

69 

02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
blanchet
parents:
57409
diff
changeset

70 
val mash_unlearn : unit > unit 
54503  71 
val mash_learn_proof : Proof.context > params > term > ('a * thm) list > thm list > unit 
57431
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
blanchet
parents:
57409
diff
changeset

72 
val mash_learn_facts : Proof.context > params > string > int > bool > Time.time > 
57120  73 
raw_fact list > string 
57006  74 
val mash_learn : Proof.context > params > fact_override > thm list > bool > unit 
57432  75 
val mash_can_suggest_facts : Proof.context > bool 
53819
e55d641d0a70
honor MaSh's zerooverhead policy  no learning if the tool is disabled
blanchet
parents:
53790
diff
changeset

76 

57108
dc0b4f50e288
more generous max number of suggestions, for more safety
blanchet
parents:
57107
diff
changeset

77 
val generous_max_suggestions : int > int 
50814  78 
val mepo_weight : real 
79 
val mash_weight : real 

57006  80 
val relevant_facts : Proof.context > params > string > int > fact_override > term list > 
81 
term > raw_fact list > (string * fact list) list 

57431
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
blanchet
parents:
57409
diff
changeset

82 
val kill_learners : unit > unit 
48319
340187063d84
use async manager to manage MaSh learners to make sure they get killed cleanly
blanchet
parents:
48318
diff
changeset

83 
val running_learners : unit > unit 
48248  84 
end; 
85 

48381  86 
structure Sledgehammer_MaSh : SLEDGEHAMMER_MASH = 
48248  87 
struct 
48249  88 

48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

89 
open ATP_Util 
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

90 
open ATP_Problem_Generate 
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

91 
open Sledgehammer_Util 
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

92 
open Sledgehammer_Fact 
55201  93 
open Sledgehammer_Prover 
55202
824c48a539c9
renamed many Sledgehammer ML files to clarify structure
blanchet
parents:
55201
diff
changeset

94 
open Sledgehammer_Prover_Minimize 
48381  95 
open Sledgehammer_MePo 
48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

96 

57005
33f3d2ea803d
store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents:
56995
diff
changeset

97 
val trace = Attrib.setup_config_bool @{binding sledgehammer_mash_trace} (K false) 
57150
f591096a9c94
add option to keep duplicates, for more precise evaluation of relevance filters
blanchet
parents:
57133
diff
changeset

98 
val duplicates = Attrib.setup_config_bool @{binding sledgehammer_fact_duplicates} (K false) 
51032
69da236d7838
added option to use SNoW as machine learning algo
blanchet
parents:
51029
diff
changeset

99 

48308  100 
fun trace_msg ctxt msg = if Config.get ctxt trace then tracing (msg ()) else () 
101 

57150
f591096a9c94
add option to keep duplicates, for more precise evaluation of relevance filters
blanchet
parents:
57133
diff
changeset

102 
fun gen_eq_thm ctxt = if Config.get ctxt duplicates then Thm.eq_thm_strict else Thm.eq_thm_prop 
f591096a9c94
add option to keep duplicates, for more precise evaluation of relevance filters
blanchet
parents:
57133
diff
changeset

103 

51008  104 
val MePoN = "MePo" 
48319
340187063d84
use async manager to manage MaSh learners to make sure they get killed cleanly
blanchet
parents:
48318
diff
changeset

105 
val MaShN = "MaSh" 
51008  106 
val MeShN = "MeSh" 
48319
340187063d84
use async manager to manage MaSh learners to make sure they get killed cleanly
blanchet
parents:
48318
diff
changeset

107 

48379
2b5ad61e2ccc
renamed "iter" fact filter to "MePo" (MengPaulson)
blanchet
parents:
48378
diff
changeset

108 
val mepoN = "mepo" 
48314
ee33ba3c0e05
added option to control which fact filter is used
blanchet
parents:
48313
diff
changeset

109 
val mashN = "mash" 
48379
2b5ad61e2ccc
renamed "iter" fact filter to "MePo" (MengPaulson)
blanchet
parents:
48378
diff
changeset

110 
val meshN = "mesh" 
48314
ee33ba3c0e05
added option to control which fact filter is used
blanchet
parents:
48313
diff
changeset

111 

48379
2b5ad61e2ccc
renamed "iter" fact filter to "MePo" (MengPaulson)
blanchet
parents:
48378
diff
changeset

112 
val fact_filters = [meshN, mepoN, mashN] 
48314
ee33ba3c0e05
added option to control which fact filter is used
blanchet
parents:
48313
diff
changeset

113 

48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

114 
val unlearnN = "unlearn" 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

115 
val learn_isarN = "learn_isar" 
50484
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

116 
val learn_proverN = "learn_prover" 
48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

117 
val relearn_isarN = "relearn_isar" 
50484
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

118 
val relearn_proverN = "relearn_prover" 
48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

119 

57368  120 
fun map_array_at ary f i = Array.update (ary, i, f (Array.sub (ary, i))) 
121 

57371  122 
type xtab = int * int Symtab.table 
123 

124 
val empty_xtab = (0, Symtab.empty) 

125 

126 
fun add_to_xtab key (next, tab) = (next + 1, Symtab.update_new (key, next) tab) 

127 
fun maybe_add_to_xtab key = perhaps (try (add_to_xtab key)) 

128 

57386  129 
fun mash_state_dir () = 
130 
Path.expand (Path.explode "$ISABELLE_HOME_USER/mash" > tap Isabelle_System.mkdir) 

131 

50310  132 
fun mash_state_file () = Path.append (mash_state_dir ()) (Path.explode "state") 
48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

133 

57007  134 
fun wipe_out_mash_state_dir () = 
135 
let val path = mash_state_dir () in 

136 
try (File.fold_dir (fn file => fn _ => try File.rm (Path.append path (Path.basic file))) path) 

137 
NONE; 

138 
() 

139 
end 

48330  140 

57278  141 
datatype mash_engine = 
57458  142 
MaSh_NB 
57459  143 
 MaSh_kNN 
144 
 MaSh_NB_kNN 

57458  145 
 MaSh_NB_Ext 
57431
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
blanchet
parents:
57409
diff
changeset

146 
 MaSh_kNN_Ext 
57278  147 

57028  148 
fun mash_engine () = 
57089  149 
let val flag1 = Options.default_string @{system_option MaSh} in 
57028  150 
(case if flag1 <> "none" (* default *) then flag1 else getenv "MASH" of 
57431
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
blanchet
parents:
57409
diff
changeset

151 
"yes" => SOME MaSh_NB 
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
blanchet
parents:
57409
diff
changeset

152 
 "sml" => SOME MaSh_NB 
57458  153 
 "nb" => SOME MaSh_NB 
57459  154 
 "knn" => SOME MaSh_kNN 
155 
 "nb_knn" => SOME MaSh_NB_kNN 

57458  156 
 "nb_ext" => SOME MaSh_NB_Ext 
57431
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
blanchet
parents:
57409
diff
changeset

157 
 "knn_ext" => SOME MaSh_kNN_Ext 
57028  158 
 _ => NONE) 
159 
end 

57018  160 

57028  161 
val is_mash_enabled = is_some o mash_engine 
57431
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
blanchet
parents:
57409
diff
changeset

162 
val the_mash_engine = the_default MaSh_NB o mash_engine 
57018  163 

57459  164 
fun scaled_avg [] = 0 
165 
 scaled_avg xs = Real.ceil (100000000.0 * fold (curry (op +)) xs 0.0) div length xs 

166 

167 
fun avg [] = 0.0 

168 
 avg xs = fold (curry (op +)) xs 0.0 / Real.fromInt (length xs) 

169 

170 
fun normalize_scores _ [] = [] 

171 
 normalize_scores max_facts xs = 

172 
map (apsnd (curry Real.* (1.0 / avg (map snd (take max_facts xs))))) xs 

173 

174 
fun mesh_facts fact_eq max_facts [(_, (sels, unks))] = 

175 
distinct fact_eq (map fst (take max_facts sels) @ take (max_facts  length sels) unks) 

176 
 mesh_facts fact_eq max_facts mess = 

177 
let 

178 
val mess = mess > map (apsnd (apfst (normalize_scores max_facts))) 

179 

180 
fun score_in fact (global_weight, (sels, unks)) = 

181 
let val score_at = try (nth sels) #> Option.map (fn (_, score) => global_weight * score) in 

182 
(case find_index (curry fact_eq fact o fst) sels of 

183 
~1 => if member fact_eq unks fact then NONE else SOME 0.0 

184 
 rank => score_at rank) 

185 
end 

186 

187 
fun weight_of fact = mess > map_filter (score_in fact) > scaled_avg 

188 
in 

189 
fold (union fact_eq o map fst o take max_facts o fst o snd) mess [] 

57460  190 
> map (`weight_of) > sort (int_ord o pairself fst o swap) 
57459  191 
> map snd > take max_facts 
192 
end 

193 

194 
fun smooth_weight_of_fact rank = Math.pow (1.3, 15.5  0.2 * Real.fromInt rank) + 15.0 (* FUDGE *) 

195 
fun steep_weight_of_fact rank = Math.pow (0.62, log2 (Real.fromInt (rank + 1))) (* FUDGE *) 

196 

197 
fun weight_facts_smoothly facts = facts ~~ map smooth_weight_of_fact (0 upto length facts  1) 

198 
fun weight_facts_steeply facts = facts ~~ map steep_weight_of_fact (0 upto length facts  1) 

199 

57460  200 
fun rev_sort_array_prefix cmp bnd a = 
57009  201 
let 
57406  202 
exception BOTTOM of int 
203 

57460  204 
val al = Array.length a 
205 

57009  206 
fun maxson l i = 
57017  207 
let val i31 = i + i + i + 1 in 
57009  208 
if i31 + 2 < l then 
57017  209 
let val x = Unsynchronized.ref i31 in 
210 
if cmp (Array.sub (a, i31), Array.sub (a, i31 + 1)) = LESS then x := i31 + 1 else (); 

211 
if cmp (Array.sub (a, !x), Array.sub (a, i31 + 2)) = LESS then x := i31 + 2 else (); 

57009  212 
!x 
213 
end 

214 
else 

215 
if i31 + 1 < l andalso cmp (Array.sub (a, i31), Array.sub (a, i31 + 1)) = LESS 

216 
then i31 + 1 else if i31 < l then i31 else raise BOTTOM i 

217 
end 

218 

219 
fun trickledown l i e = 

57029
75cc30d2b83f
added naive Bayes ML implementation, due to Cezary Kaliszyk (like kNN)
blanchet
parents:
57028
diff
changeset

220 
let val j = maxson l i in 
57009  221 
if cmp (Array.sub (a, j), e) = GREATER then 
57029
75cc30d2b83f
added naive Bayes ML implementation, due to Cezary Kaliszyk (like kNN)
blanchet
parents:
57028
diff
changeset

222 
(Array.update (a, i, Array.sub (a, j)); trickledown l j e) 
75cc30d2b83f
added naive Bayes ML implementation, due to Cezary Kaliszyk (like kNN)
blanchet
parents:
57028
diff
changeset

223 
else 
75cc30d2b83f
added naive Bayes ML implementation, due to Cezary Kaliszyk (like kNN)
blanchet
parents:
57028
diff
changeset

224 
Array.update (a, i, e) 
57009  225 
end 
226 

227 
fun trickle l i e = trickledown l i e handle BOTTOM i => Array.update (a, i, e) 

228 

229 
fun bubbledown l i = 

57029
75cc30d2b83f
added naive Bayes ML implementation, due to Cezary Kaliszyk (like kNN)
blanchet
parents:
57028
diff
changeset

230 
let val j = maxson l i in 
75cc30d2b83f
added naive Bayes ML implementation, due to Cezary Kaliszyk (like kNN)
blanchet
parents:
57028
diff
changeset

231 
Array.update (a, i, Array.sub (a, j)); 
57009  232 
bubbledown l j 
233 
end 

234 

235 
fun bubble l i = bubbledown l i handle BOTTOM i => i 

236 

237 
fun trickleup i e = 

57029
75cc30d2b83f
added naive Bayes ML implementation, due to Cezary Kaliszyk (like kNN)
blanchet
parents:
57028
diff
changeset

238 
let val father = (i  1) div 3 in 
57009  239 
if cmp (Array.sub (a, father), e) = LESS then 
57029
75cc30d2b83f
added naive Bayes ML implementation, due to Cezary Kaliszyk (like kNN)
blanchet
parents:
57028
diff
changeset

240 
(Array.update (a, i, Array.sub (a, father)); 
75cc30d2b83f
added naive Bayes ML implementation, due to Cezary Kaliszyk (like kNN)
blanchet
parents:
57028
diff
changeset

241 
if father > 0 then trickleup father e else Array.update (a, 0, e)) 
75cc30d2b83f
added naive Bayes ML implementation, due to Cezary Kaliszyk (like kNN)
blanchet
parents:
57028
diff
changeset

242 
else 
75cc30d2b83f
added naive Bayes ML implementation, due to Cezary Kaliszyk (like kNN)
blanchet
parents:
57028
diff
changeset

243 
Array.update (a, i, e) 
57009  244 
end 
245 

57355  246 
fun for i = if i < 0 then () else (trickle al i (Array.sub (a, i)); for (i  1)) 
57009  247 

248 
fun for2 i = 

57355  249 
if i < Integer.max 2 (al  bnd) then 
57029
75cc30d2b83f
added naive Bayes ML implementation, due to Cezary Kaliszyk (like kNN)
blanchet
parents:
57028
diff
changeset

250 
() 
75cc30d2b83f
added naive Bayes ML implementation, due to Cezary Kaliszyk (like kNN)
blanchet
parents:
57028
diff
changeset

251 
else 
75cc30d2b83f
added naive Bayes ML implementation, due to Cezary Kaliszyk (like kNN)
blanchet
parents:
57028
diff
changeset

252 
let val e = Array.sub (a, i) in 
75cc30d2b83f
added naive Bayes ML implementation, due to Cezary Kaliszyk (like kNN)
blanchet
parents:
57028
diff
changeset

253 
Array.update (a, i, Array.sub (a, 0)); 
75cc30d2b83f
added naive Bayes ML implementation, due to Cezary Kaliszyk (like kNN)
blanchet
parents:
57028
diff
changeset

254 
trickleup (bubble i 0) e; 
75cc30d2b83f
added naive Bayes ML implementation, due to Cezary Kaliszyk (like kNN)
blanchet
parents:
57028
diff
changeset

255 
for2 (i  1) 
75cc30d2b83f
added naive Bayes ML implementation, due to Cezary Kaliszyk (like kNN)
blanchet
parents:
57028
diff
changeset

256 
end 
57009  257 
in 
57355  258 
for (((al + 1) div 3)  1); 
259 
for2 (al  1); 

260 
if al > 1 then 

57029
75cc30d2b83f
added naive Bayes ML implementation, due to Cezary Kaliszyk (like kNN)
blanchet
parents:
57028
diff
changeset

261 
let val e = Array.sub (a, 1) in 
75cc30d2b83f
added naive Bayes ML implementation, due to Cezary Kaliszyk (like kNN)
blanchet
parents:
57028
diff
changeset

262 
Array.update (a, 1, Array.sub (a, 0)); 
57009  263 
Array.update (a, 0, e) 
264 
end 

57029
75cc30d2b83f
added naive Bayes ML implementation, due to Cezary Kaliszyk (like kNN)
blanchet
parents:
57028
diff
changeset

265 
else 
75cc30d2b83f
added naive Bayes ML implementation, due to Cezary Kaliszyk (like kNN)
blanchet
parents:
57028
diff
changeset

266 
() 
57009  267 
end 
268 

57460  269 
fun rev_sort_list_prefix cmp bnd xs = 
270 
let val ary = Array.fromList xs in 

271 
rev_sort_array_prefix cmp bnd ary; 

272 
Array.foldr (op ::) [] ary 

273 
end 

274 

275 

276 
(*** Isabelleagnostic machine learning ***) 

277 

278 
structure MaSh = 

279 
struct 

280 

57371  281 
fun select_visible_facts big_number recommends = 
57364  282 
List.app (fn at => 
283 
let val (j, ov) = Array.sub (recommends, at) in 

57371  284 
Array.update (recommends, at, (j, big_number + ov)) 
57364  285 
end) 
286 

57374  287 
fun wider_array_of_vector init vec = 
288 
let val ary = Array.array init in 

289 
Array.copyVec {src = vec, dst = ary, di = 0}; 

290 
ary 

291 
end 

292 

57132
4ddf5a8f8f38
make SML code closer to Python code when 'nb_kuehlwein_style' is true
blanchet
parents:
57130
diff
changeset

293 
val nb_def_prior_weight = 21 (* FUDGE *) 
57095  294 

57374  295 
fun learn_facts (tfreq0, sfreq0, dffreq0) num_facts0 num_facts num_feats depss featss = 
57354  296 
let 
57374  297 
val tfreq = wider_array_of_vector (num_facts, 0) tfreq0 
298 
val sfreq = wider_array_of_vector (num_facts, Inttab.empty) sfreq0 

299 
val dffreq = wider_array_of_vector (num_feats, 0) dffreq0 

300 

301 
fun learn_one th feats deps = 

57354  302 
let 
57355  303 
fun add_th weight t = 
304 
let 

305 
val im = Array.sub (sfreq, t) 

57374  306 
fun fold_fn s = Inttab.map_default (s, 0) (Integer.add weight) 
57355  307 
in 
57368  308 
map_array_at tfreq (Integer.add weight) t; 
57355  309 
Array.update (sfreq, t, fold fold_fn feats im) 
310 
end 

311 

57368  312 
val add_sym = map_array_at dffreq (Integer.add 1) 
57354  313 
in 
57355  314 
add_th nb_def_prior_weight th; 
315 
List.app (add_th 1) deps; 

316 
List.app add_sym feats 

57354  317 
end 
318 

57102  319 
fun for i = 
57383  320 
if i = num_facts then () 
321 
else (learn_one i (Vector.sub (featss, i)) (Vector.sub (depss, i)); for (i + 1)) 

57102  322 
in 
57383  323 
for num_facts0; 
57374  324 
(Array.vector tfreq, Array.vector sfreq, Array.vector dffreq) 
57102  325 
end 
57029
75cc30d2b83f
added naive Bayes ML implementation, due to Cezary Kaliszyk (like kNN)
blanchet
parents:
57028
diff
changeset

326 

57376  327 
fun naive_bayes (tfreq, sfreq, dffreq) num_facts max_suggs visible_facts goal_feats = 
57278  328 
let 
57362  329 
val tau = 0.05 (* FUDGE *) 
330 
val pos_weight = 10.0 (* FUDGE *) 

57278  331 
val def_val = ~15.0 (* FUDGE *) 
57124  332 

57366  333 
val ln_afreq = Math.ln (Real.fromInt num_facts) 
57374  334 
val idf = Vector.map (fn i => ln_afreq  Math.ln (Real.fromInt i)) dffreq 
57366  335 

57359  336 
fun tfidf feat = Vector.sub (idf, feat) 
57102  337 

338 
fun log_posterior i = 

57029
75cc30d2b83f
added naive Bayes ML implementation, due to Cezary Kaliszyk (like kNN)
blanchet
parents:
57028
diff
changeset

339 
let 
57374  340 
val tfreq = Real.fromInt (Vector.sub (tfreq, i)) 
57097  341 

57401
02f56126b4e4
reintroduced 'extra features' but with lower weight than before (to account for tfidf)
blanchet
parents:
57387
diff
changeset

342 
fun fold_feats (f, fw0) (res, sfh) = 
57102  343 
(case Inttab.lookup sfh f of 
344 
SOME sf => 

57401
02f56126b4e4
reintroduced 'extra features' but with lower weight than before (to account for tfidf)
blanchet
parents:
57387
diff
changeset

345 
(res + fw0 * tfidf f * Math.ln (pos_weight * Real.fromInt sf / tfreq), 
57102  346 
Inttab.delete f sfh) 
57409  347 
 NONE => (res + fw0 * tfidf f * def_val, sfh)) 
57097  348 

57376  349 
val (res, sfh) = fold fold_feats goal_feats (Math.ln tfreq, Vector.sub (sfreq, i)) 
57097  350 

57409  351 
fun fold_sfh (f, sf) sow = 
352 
sow + tfidf f * Math.ln (1.0 + (1.0  Real.fromInt sf) / tfreq) 

57097  353 

57102  354 
val sum_of_weights = Inttab.fold fold_sfh sfh 0.0 
57029
75cc30d2b83f
added naive Bayes ML implementation, due to Cezary Kaliszyk (like kNN)
blanchet
parents:
57028
diff
changeset

355 
in 
57278  356 
res + tau * sum_of_weights 
57097  357 
end 
57029
75cc30d2b83f
added naive Bayes ML implementation, due to Cezary Kaliszyk (like kNN)
blanchet
parents:
57028
diff
changeset

358 

57356  359 
val posterior = Array.tabulate (num_facts, (fn j => (j, log_posterior j))) 
57102  360 

57364  361 
fun ret at acc = 
362 
if at = num_facts then acc else ret (at + 1) (Array.sub (posterior, at) :: acc) 

57029
75cc30d2b83f
added naive Bayes ML implementation, due to Cezary Kaliszyk (like kNN)
blanchet
parents:
57028
diff
changeset

363 
in 
57371  364 
select_visible_facts 100000.0 posterior visible_facts; 
57460  365 
rev_sort_array_prefix (Real.compare o pairself snd) max_suggs posterior; 
57364  366 
ret (Integer.max 0 (num_facts  max_suggs)) [] 
57029
75cc30d2b83f
added naive Bayes ML implementation, due to Cezary Kaliszyk (like kNN)
blanchet
parents:
57028
diff
changeset

367 
end 
75cc30d2b83f
added naive Bayes ML implementation, due to Cezary Kaliszyk (like kNN)
blanchet
parents:
57028
diff
changeset

368 

57458  369 
val number_of_nearest_neighbors = 10 (* FUDGE *) 
370 

57459  371 
fun k_nearest_neighbors dffreq num_facts num_feats depss featss max_suggs visible_facts goal_feats = 
57458  372 
let 
373 
exception EXIT of unit 

374 

375 
val ln_afreq = Math.ln (Real.fromInt num_facts) 

376 
fun tfidf feat = ln_afreq  Math.ln (Real.fromInt (Vector.sub (dffreq, feat))) 

377 

378 
val overlaps_sqr = Array.tabulate (num_facts, rpair 0.0) 

379 

57459  380 
val feat_facts = Array.array (num_feats, []) 
381 
val _ = Vector.foldl (fn (feats, fact) => 

382 
(List.app (map_array_at feat_facts (cons fact)) feats; fact + 1)) 0 featss 

383 

57458  384 
fun do_feat (s, sw0) = 
385 
let 

386 
val sw = sw0 * tfidf s 

387 
val w2 = sw * sw 

388 

389 
fun inc_overlap j = 

390 
let val (_, ov) = Array.sub (overlaps_sqr, j) in 

391 
Array.update (overlaps_sqr, j, (j, w2 + ov)) 

392 
end 

393 
in 

394 
List.app inc_overlap (Array.sub (feat_facts, s)) 

395 
end 

396 

397 
val _ = List.app do_feat goal_feats 

57460  398 
val _ = rev_sort_array_prefix (Real.compare o pairself snd) num_facts overlaps_sqr 
57458  399 
val no_recommends = Unsynchronized.ref 0 
400 
val recommends = Array.tabulate (num_facts, rpair 0.0) 

401 
val age = Unsynchronized.ref 500000000.0 

402 

403 
fun inc_recommend v j = 

404 
let val (_, ov) = Array.sub (recommends, j) in 

405 
if ov <= 0.0 then 

406 
(no_recommends := !no_recommends + 1; Array.update (recommends, j, (j, !age + ov))) 

407 
else if ov < !age + 1000.0 then 

408 
Array.update (recommends, j, (j, v + ov)) 

409 
else 

410 
() 

411 
end 

412 

413 
val k = Unsynchronized.ref 0 

414 
fun do_k k = 

415 
if k >= num_facts then 

416 
raise EXIT () 

417 
else 

418 
let 

419 
val (j, o2) = Array.sub (overlaps_sqr, num_facts  k  1) 

420 
val o1 = Math.sqrt o2 

421 
val _ = inc_recommend o1 j 

422 
val ds = Vector.sub (depss, j) 

423 
val l = Real.fromInt (length ds) 

424 
in 

425 
List.app (inc_recommend (o1 / l)) ds 

426 
end 

427 

428 
fun while1 () = 

429 
if !k = number_of_nearest_neighbors then () else (do_k (!k); k := !k + 1; while1 ()) 

430 
handle EXIT () => () 

431 

432 
fun while2 () = 

433 
if !no_recommends >= max_suggs then () 

434 
else (do_k (!k); k := !k + 1; age := !age  10000.0; while2 ()) 

435 
handle EXIT () => () 

436 

437 
fun ret acc at = 

438 
if at = num_facts then acc else ret (Array.sub (recommends, at) :: acc) (at + 1) 

439 
in 

440 
while1 (); 

441 
while2 (); 

442 
select_visible_facts 1000000000.0 recommends visible_facts; 

57460  443 
rev_sort_array_prefix (Real.compare o pairself snd) max_suggs recommends; 
57458  444 
ret [] (Integer.max 0 (num_facts  max_suggs)) 
445 
end 

446 

57125
2f620ef839ee
added another way of invoking Python code, for experiments
blanchet
parents:
57124
diff
changeset

447 
(* experimental *) 
57376  448 
fun external_tool tool max_suggs learns goal_feats = 
57291  449 
let 
57297  450 
val ser = string_of_int (serial ()) (* poor person's attempt at threadsafety *) 
451 
val ocs = TextIO.openOut ("adv_syms" ^ ser) 

452 
val ocd = TextIO.openOut ("adv_deps" ^ ser) 

453 
val ocq = TextIO.openOut ("adv_seq" ^ ser) 

454 
val occ = TextIO.openOut ("adv_conj" ^ ser) 

57296  455 

57291  456 
fun os oc s = TextIO.output (oc, s) 
57296  457 

57297  458 
fun ol _ _ _ [] = () 
459 
 ol _ f _ [e] = f e 

57291  460 
 ol oc f sep (h :: t) = (f h; os oc sep; ol oc f sep t) 
57296  461 

57294
ef9d4e1ceb00
use strings to communicate with external process, to ease debugging
blanchet
parents:
57291
diff
changeset

462 
fun do_learn (name, feats, deps) = 
57357  463 
(os ocs name; os ocs ":"; ol ocs (os ocs o quote) ", " feats; os ocs "\n"; 
57297  464 
os ocd name; os ocd ":"; ol ocd (os ocd) " " deps; os ocd "\n"; os ocq name; os ocq "\n") 
57296  465 

57291  466 
fun forkexec no = 
467 
let 

468 
val cmd = 

57297  469 
"~/misc/" ^ tool ^ " adv_syms" ^ ser ^ " adv_deps" ^ ser ^ " " ^ string_of_int no ^ 
470 
" adv_seq" ^ ser ^ " < adv_conj" ^ ser 

57291  471 
in 
472 
fst (Isabelle_System.bash_output cmd) 

473 
> space_explode " " 

57294
ef9d4e1ceb00
use strings to communicate with external process, to ease debugging
blanchet
parents:
57291
diff
changeset

474 
> filter_out (curry (op =) "") 
57291  475 
end 
476 
in 

57401
02f56126b4e4
reintroduced 'extra features' but with lower weight than before (to account for tfidf)
blanchet
parents:
57387
diff
changeset

477 
(List.app do_learn learns; ol occ (os occ o quote) ", " (map fst goal_feats); 
57297  478 
TextIO.closeOut ocs; TextIO.closeOut ocd; TextIO.closeOut ocq; TextIO.closeOut occ; 
57294
ef9d4e1ceb00
use strings to communicate with external process, to ease debugging
blanchet
parents:
57291
diff
changeset

479 
forkexec max_suggs) 
57291  480 
end 
481 

57375  482 
val k_nearest_neighbors_ext = 
57376  483 
external_tool ("newknn/knn" ^ " " ^ string_of_int number_of_nearest_neighbors) 
484 
val naive_bayes_ext = external_tool "predict/nbayes" 

57374  485 

57376  486 
fun query_external ctxt engine max_suggs learns goal_feats = 
57432  487 
(trace_msg ctxt (fn () => "MaSh query external " ^ commas (map fst goal_feats)); 
57376  488 
(case engine of 
57458  489 
MaSh_NB_Ext => naive_bayes_ext max_suggs learns goal_feats 
490 
 MaSh_kNN_Ext => k_nearest_neighbors_ext max_suggs learns goal_feats)) 

57371  491 

57378
fe96689f393b
recompute learning data at learning time, not query time
blanchet
parents:
57377
diff
changeset

492 
fun query_internal ctxt engine num_facts num_feats (fact_names, featss, depss) 
fe96689f393b
recompute learning data at learning time, not query time
blanchet
parents:
57377
diff
changeset

493 
(freqs as (_, _, dffreq)) visible_facts max_suggs goal_feats int_goal_feats = 
57459  494 
let 
495 
fun nb () = 

496 
naive_bayes freqs num_facts max_suggs visible_facts int_goal_feats 

497 
> map fst 

498 
fun knn () = 

499 
k_nearest_neighbors dffreq num_facts num_feats depss featss max_suggs visible_facts 

500 
int_goal_feats 

501 
> map fst 

502 
in 

503 
(trace_msg ctxt (fn () => "MaSh query internal " ^ commas (map fst goal_feats) ^ " from {" ^ 

504 
elide_string 1000 (space_implode " " (Vector.foldr (op ::) [] fact_names)) ^ "}"); 

505 
(case engine of 

506 
MaSh_NB => nb () 

507 
 MaSh_kNN => knn () 

508 
 MaSh_NB_kNN => 

509 
let 

510 
val mess = 

511 
[(0.5 (* FUDGE *), (weight_facts_steeply (nb ()), [])), 

512 
(0.5 (* FUDGE *), (weight_facts_steeply (knn ()), []))] 

513 
in 

514 
mesh_facts (op =) max_suggs mess 

515 
end) 

516 
> map (curry Vector.sub fact_names)) 

517 
end 

57007  518 

519 
end; 

520 

521 

57432  522 
(*** Persistent, stringlytyped state ***) 
523 

524 
fun meta_char c = 

525 
if Char.isAlphaNum c orelse c = #"_" orelse c = #"." orelse c = #"(" orelse c = #")" orelse 

526 
c = #"," then 

527 
String.str c 

528 
else 

529 
(* fixed width, in case more digits follow *) 

530 
"%" ^ stringN_of_int 3 (Char.ord c) 

531 

532 
fun unmeta_chars accum [] = String.implode (rev accum) 

533 
 unmeta_chars accum (#"%" :: d1 :: d2 :: d3 :: cs) = 

534 
(case Int.fromString (String.implode [d1, d2, d3]) of 

535 
SOME n => unmeta_chars (Char.chr n :: accum) cs 

536 
 NONE => "" (* error *)) 

537 
 unmeta_chars _ (#"%" :: _) = "" (* error *) 

538 
 unmeta_chars accum (c :: cs) = unmeta_chars (c :: accum) cs 

539 

540 
val encode_str = String.translate meta_char 

541 
val decode_str = String.explode #> unmeta_chars [] 

542 

543 
val encode_strs = map encode_str #> space_implode " " 

57461  544 
val decode_strs = space_explode " " #> map decode_str 
50311  545 

57005
33f3d2ea803d
store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents:
56995
diff
changeset

546 
datatype proof_kind = Isar_Proof  Automatic_Proof  Isar_Proof_wegen_Prover_Flop 
50311  547 

548 
fun str_of_proof_kind Isar_Proof = "i" 

50484
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

549 
 str_of_proof_kind Automatic_Proof = "a" 
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

550 
 str_of_proof_kind Isar_Proof_wegen_Prover_Flop = "x" 
50311  551 

57005
33f3d2ea803d
store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents:
56995
diff
changeset

552 
fun proof_kind_of_str "a" = Automatic_Proof 
50484
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

553 
 proof_kind_of_str "x" = Isar_Proof_wegen_Prover_Flop 
57005
33f3d2ea803d
store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents:
56995
diff
changeset

554 
 proof_kind_of_str _ (* "i" *) = Isar_Proof 
50311  555 

57011
a4428f517f46
implemented learning of single proofs in SML MaSh
blanchet
parents:
57010
diff
changeset

556 
fun add_edge_to name parent = 
a4428f517f46
implemented learning of single proofs in SML MaSh
blanchet
parents:
57010
diff
changeset

557 
Graph.default_node (parent, (Isar_Proof, [], [])) 
a4428f517f46
implemented learning of single proofs in SML MaSh
blanchet
parents:
57010
diff
changeset

558 
#> Graph.add_edge (parent, name) 
a4428f517f46
implemented learning of single proofs in SML MaSh
blanchet
parents:
57010
diff
changeset

559 

57379  560 
fun add_node kind name parents feats deps (access_G, (fact_xtab, feat_xtab), learns) = 
57371  561 
((Graph.new_node (name, (kind, feats, deps)) access_G 
562 
handle Graph.DUP _ => Graph.map_node name (K (kind, feats, deps)) access_G) 

563 
> fold (add_edge_to name) parents, 

57383  564 
(add_to_xtab name fact_xtab, fold maybe_add_to_xtab feats feat_xtab), 
57379  565 
(name, feats, deps) :: learns) 
57011
a4428f517f46
implemented learning of single proofs in SML MaSh
blanchet
parents:
57010
diff
changeset

566 

50311  567 
fun try_graph ctxt when def f = 
568 
f () 

57005
33f3d2ea803d
store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents:
56995
diff
changeset

569 
handle 
33f3d2ea803d
store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents:
56995
diff
changeset

570 
Graph.CYCLES (cycle :: _) => 
33f3d2ea803d
store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents:
56995
diff
changeset

571 
(trace_msg ctxt (fn () => "Cycle involving " ^ commas cycle ^ " when " ^ when); def) 
33f3d2ea803d
store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents:
56995
diff
changeset

572 
 Graph.DUP name => 
33f3d2ea803d
store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents:
56995
diff
changeset

573 
(trace_msg ctxt (fn () => "Duplicate fact " ^ quote name ^ " when " ^ when); def) 
33f3d2ea803d
store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents:
56995
diff
changeset

574 
 Graph.UNDEF name => 
33f3d2ea803d
store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents:
56995
diff
changeset

575 
(trace_msg ctxt (fn () => "Unknown fact " ^ quote name ^ " when " ^ when); def) 
33f3d2ea803d
store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents:
56995
diff
changeset

576 
 exn => 
33f3d2ea803d
store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents:
56995
diff
changeset

577 
if Exn.is_interrupt exn then 
33f3d2ea803d
store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents:
56995
diff
changeset

578 
reraise exn 
33f3d2ea803d
store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents:
56995
diff
changeset

579 
else 
33f3d2ea803d
store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents:
56995
diff
changeset

580 
(trace_msg ctxt (fn () => "Internal error when " ^ when ^ ":\n" ^ Runtime.exn_message exn); 
33f3d2ea803d
store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents:
56995
diff
changeset

581 
def) 
50311  582 

583 
fun graph_info G = 

584 
string_of_int (length (Graph.keys G)) ^ " node(s), " ^ 

57006  585 
string_of_int (fold (Integer.add o length o snd) (Graph.dest G) 0) ^ " edge(s), " ^ 
50311  586 
string_of_int (length (Graph.maximals G)) ^ " maximal" 
587 

53095  588 
type mash_state = 
57358  589 
{access_G : (proof_kind * string list * string list) Graph.T, 
57374  590 
xtabs : xtab * xtab, 
57378
fe96689f393b
recompute learning data at learning time, not query time
blanchet
parents:
57377
diff
changeset

591 
ffds : string vector * int list vector * int list vector, 
fe96689f393b
recompute learning data at learning time, not query time
blanchet
parents:
57377
diff
changeset

592 
freqs : int vector * int Inttab.table vector * int vector, 
57365  593 
dirty_facts : string list option} 
50311  594 

57378
fe96689f393b
recompute learning data at learning time, not query time
blanchet
parents:
57377
diff
changeset

595 
val empty_xtabs = (empty_xtab, empty_xtab) 
fe96689f393b
recompute learning data at learning time, not query time
blanchet
parents:
57377
diff
changeset

596 
val empty_ffds = (Vector.fromList [], Vector.fromList [], Vector.fromList []) 
fe96689f393b
recompute learning data at learning time, not query time
blanchet
parents:
57377
diff
changeset

597 
val empty_freqs = (Vector.fromList [], Vector.fromList [], Vector.fromList []) 
fe96689f393b
recompute learning data at learning time, not query time
blanchet
parents:
57377
diff
changeset

598 

57371  599 
val empty_state = 
600 
{access_G = Graph.empty, 

57378
fe96689f393b
recompute learning data at learning time, not query time
blanchet
parents:
57377
diff
changeset

601 
xtabs = empty_xtabs, 
fe96689f393b
recompute learning data at learning time, not query time
blanchet
parents:
57377
diff
changeset

602 
ffds = empty_ffds, 
fe96689f393b
recompute learning data at learning time, not query time
blanchet
parents:
57377
diff
changeset

603 
freqs = empty_freqs, 
57371  604 
dirty_facts = SOME []} : mash_state 
605 

57380  606 
fun recompute_ffds_freqs_from_learns learns ((num_facts, fact_tab), (num_feats, feat_tab)) 
607 
num_facts0 (fact_names0, featss0, depss0) freqs0 = 

57379  608 
let 
57380  609 
val fact_names = Vector.concat [fact_names0, Vector.fromList (map #1 learns)] 
610 
val featss = Vector.concat [featss0, 

611 
Vector.fromList (map (map_filter (Symtab.lookup feat_tab) o #2) learns)] 

612 
val depss = Vector.concat [depss0, 

613 
Vector.fromList (map (map_filter (Symtab.lookup fact_tab) o #3) learns)] 

57379  614 
in 
615 
((fact_names, featss, depss), 

57431
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
blanchet
parents:
57409
diff
changeset

616 
MaSh.learn_facts freqs0 num_facts0 num_facts num_feats depss featss) 
57379  617 
end 
618 

57378
fe96689f393b
recompute learning data at learning time, not query time
blanchet
parents:
57377
diff
changeset

619 
fun reorder_learns (num_facts, fact_tab) learns = 
fe96689f393b
recompute learning data at learning time, not query time
blanchet
parents:
57377
diff
changeset

620 
let val ary = Array.array (num_facts, ("", [], [])) in 
fe96689f393b
recompute learning data at learning time, not query time
blanchet
parents:
57377
diff
changeset

621 
List.app (fn learn as (fact, _, _) => 
fe96689f393b
recompute learning data at learning time, not query time
blanchet
parents:
57377
diff
changeset

622 
Array.update (ary, the (Symtab.lookup fact_tab fact), learn)) 
fe96689f393b
recompute learning data at learning time, not query time
blanchet
parents:
57377
diff
changeset

623 
learns; 
fe96689f393b
recompute learning data at learning time, not query time
blanchet
parents:
57377
diff
changeset

624 
Array.foldr (op ::) [] ary 
fe96689f393b
recompute learning data at learning time, not query time
blanchet
parents:
57377
diff
changeset

625 
end 
fe96689f393b
recompute learning data at learning time, not query time
blanchet
parents:
57377
diff
changeset

626 

57380  627 
fun recompute_ffds_freqs_from_access_G access_G (xtabs as (fact_xtab, _)) = 
57378
fe96689f393b
recompute learning data at learning time, not query time
blanchet
parents:
57377
diff
changeset

628 
let 
fe96689f393b
recompute learning data at learning time, not query time
blanchet
parents:
57377
diff
changeset

629 
val learns = 
fe96689f393b
recompute learning data at learning time, not query time
blanchet
parents:
57377
diff
changeset

630 
Graph.schedule (fn _ => fn (fact, (_, feats, deps)) => (fact, feats, deps)) access_G 
fe96689f393b
recompute learning data at learning time, not query time
blanchet
parents:
57377
diff
changeset

631 
> reorder_learns fact_xtab 
fe96689f393b
recompute learning data at learning time, not query time
blanchet
parents:
57377
diff
changeset

632 
in 
57380  633 
recompute_ffds_freqs_from_learns learns xtabs 0 empty_ffds empty_freqs 
57378
fe96689f393b
recompute learning data at learning time, not query time
blanchet
parents:
57377
diff
changeset

634 
end 
50311  635 

636 
local 

637 

57358  638 
val version = "*** MaSh version 20140625 ***" 
50357
187ae76a1757
more robustness in the face of MaSh format changes  don't overwrite new versions with old versions
blanchet
parents:
50356
diff
changeset

639 

53095  640 
exception FILE_VERSION_TOO_NEW of unit 
50311  641 

642 
fun extract_node line = 

55286  643 
(case space_explode ":" line of 
57005
33f3d2ea803d
store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents:
56995
diff
changeset

644 
[head, tail] => 
33f3d2ea803d
store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents:
56995
diff
changeset

645 
(case (space_explode " " head, map (unprefix " ") (space_explode ";" tail)) of 
33f3d2ea803d
store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents:
56995
diff
changeset

646 
([kind, name], [parents, feats, deps]) => 
57358  647 
SOME (proof_kind_of_str kind, decode_str name, decode_strs parents, decode_strs feats, 
57010  648 
decode_strs deps) 
55286  649 
 _ => NONE) 
650 
 _ => NONE) 

50311  651 

57431
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
blanchet
parents:
57409
diff
changeset

652 
fun load_state ctxt (time_state as (memory_time, _)) = 
57076
3d4b172d2209
automatically reload state file when it changes on disk
blanchet
parents:
57062
diff
changeset

653 
let val path = mash_state_file () in 
57386  654 
(case try OS.FileSys.modTime (Path.implode path) of 
57076
3d4b172d2209
automatically reload state file when it changes on disk
blanchet
parents:
57062
diff
changeset

655 
NONE => time_state 
3d4b172d2209
automatically reload state file when it changes on disk
blanchet
parents:
57062
diff
changeset

656 
 SOME disk_time => 
3d4b172d2209
automatically reload state file when it changes on disk
blanchet
parents:
57062
diff
changeset

657 
if Time.>= (memory_time, disk_time) then 
3d4b172d2209
automatically reload state file when it changes on disk
blanchet
parents:
57062
diff
changeset

658 
time_state 
3d4b172d2209
automatically reload state file when it changes on disk
blanchet
parents:
57062
diff
changeset

659 
else 
3d4b172d2209
automatically reload state file when it changes on disk
blanchet
parents:
57062
diff
changeset

660 
(disk_time, 
3d4b172d2209
automatically reload state file when it changes on disk
blanchet
parents:
57062
diff
changeset

661 
(case try File.read_lines path of 
3d4b172d2209
automatically reload state file when it changes on disk
blanchet
parents:
57062
diff
changeset

662 
SOME (version' :: node_lines) => 
3d4b172d2209
automatically reload state file when it changes on disk
blanchet
parents:
57062
diff
changeset

663 
let 
3d4b172d2209
automatically reload state file when it changes on disk
blanchet
parents:
57062
diff
changeset

664 
fun extract_line_and_add_node line = 
3d4b172d2209
automatically reload state file when it changes on disk
blanchet
parents:
57062
diff
changeset

665 
(case extract_node line of 
3d4b172d2209
automatically reload state file when it changes on disk
blanchet
parents:
57062
diff
changeset

666 
NONE => I (* should not happen *) 
3d4b172d2209
automatically reload state file when it changes on disk
blanchet
parents:
57062
diff
changeset

667 
 SOME (kind, name, parents, feats, deps) => add_node kind name parents feats deps) 
57011
a4428f517f46
implemented learning of single proofs in SML MaSh
blanchet
parents:
57010
diff
changeset

668 

57379  669 
val empty_G_etc = (Graph.empty, empty_xtabs, []) 
670 

671 
val (access_G, xtabs, rev_learns) = 

57076
3d4b172d2209
automatically reload state file when it changes on disk
blanchet
parents:
57062
diff
changeset

672 
(case string_ord (version', version) of 
3d4b172d2209
automatically reload state file when it changes on disk
blanchet
parents:
57062
diff
changeset

673 
EQUAL => 
57379  674 
try_graph ctxt "loading state" empty_G_etc 
675 
(fn () => fold extract_line_and_add_node node_lines empty_G_etc) 

57431
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
blanchet
parents:
57409
diff
changeset

676 
 LESS => (wipe_out_mash_state_dir (); empty_G_etc) (* cannot parse old file *) 
57076
3d4b172d2209
automatically reload state file when it changes on disk
blanchet
parents:
57062
diff
changeset

677 
 GREATER => raise FILE_VERSION_TOO_NEW ()) 
57378
fe96689f393b
recompute learning data at learning time, not query time
blanchet
parents:
57377
diff
changeset

678 

57380  679 
val (ffds, freqs) = 
680 
recompute_ffds_freqs_from_learns (rev rev_learns) xtabs 0 empty_ffds empty_freqs 

57076
3d4b172d2209
automatically reload state file when it changes on disk
blanchet
parents:
57062
diff
changeset

681 
in 
3d4b172d2209
automatically reload state file when it changes on disk
blanchet
parents:
57062
diff
changeset

682 
trace_msg ctxt (fn () => "Loaded fact graph (" ^ graph_info access_G ^ ")"); 
57378
fe96689f393b
recompute learning data at learning time, not query time
blanchet
parents:
57377
diff
changeset

683 
{access_G = access_G, xtabs = xtabs, ffds = ffds, freqs = freqs, dirty_facts = SOME []} 
57076
3d4b172d2209
automatically reload state file when it changes on disk
blanchet
parents:
57062
diff
changeset

684 
end 
3d4b172d2209
automatically reload state file when it changes on disk
blanchet
parents:
57062
diff
changeset

685 
 _ => empty_state))) 
3d4b172d2209
automatically reload state file when it changes on disk
blanchet
parents:
57062
diff
changeset

686 
end 
50311  687 

57005
33f3d2ea803d
store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents:
56995
diff
changeset

688 
fun str_of_entry (kind, name, parents, feats, deps) = 
33f3d2ea803d
store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents:
56995
diff
changeset

689 
str_of_proof_kind kind ^ " " ^ encode_str name ^ ": " ^ encode_strs parents ^ "; " ^ 
57358  690 
encode_strs feats ^ "; " ^ encode_strs deps ^ "\n" 
57005
33f3d2ea803d
store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents:
56995
diff
changeset

691 

57365  692 
fun save_state _ (time_state as (_, {dirty_facts = SOME [], ...})) = time_state 
57378
fe96689f393b
recompute learning data at learning time, not query time
blanchet
parents:
57377
diff
changeset

693 
 save_state ctxt (memory_time, {access_G, xtabs, ffds, freqs, dirty_facts}) = 
50311  694 
let 
57005
33f3d2ea803d
store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents:
56995
diff
changeset

695 
fun append_entry (name, ((kind, feats, deps), (parents, _))) = 
33f3d2ea803d
store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents:
56995
diff
changeset

696 
cons (kind, name, Graph.Keys.dest parents, feats, deps) 
33f3d2ea803d
store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents:
56995
diff
changeset

697 

57076
3d4b172d2209
automatically reload state file when it changes on disk
blanchet
parents:
57062
diff
changeset

698 
val path = mash_state_file () 
57365  699 
val dirty_facts' = 
57076
3d4b172d2209
automatically reload state file when it changes on disk
blanchet
parents:
57062
diff
changeset

700 
(case try OS.FileSys.modTime (Path.implode path) of 
3d4b172d2209
automatically reload state file when it changes on disk
blanchet
parents:
57062
diff
changeset

701 
NONE => NONE 
57365  702 
 SOME disk_time => if Time.< (disk_time, memory_time) then dirty_facts else NONE) 
50311  703 
val (banner, entries) = 
57365  704 
(case dirty_facts' of 
55286  705 
SOME names => (NONE, fold (append_entry o Graph.get_entry access_G) names []) 
706 
 NONE => (SOME (version ^ "\n"), Graph.fold append_entry access_G [])) 

50311  707 
in 
57431
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
blanchet
parents:
57409
diff
changeset

708 
(case banner of SOME s => File.write path s  NONE => (); 
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
blanchet
parents:
57409
diff
changeset

709 
entries > chunk_list 500 > List.app (File.append path o implode o map str_of_entry)) 
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
blanchet
parents:
57409
diff
changeset

710 
handle IO.Io _ => (); 
50311  711 
trace_msg ctxt (fn () => 
57005
33f3d2ea803d
store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents:
56995
diff
changeset

712 
"Saved fact graph (" ^ graph_info access_G ^ 
57365  713 
(case dirty_facts of 
714 
SOME dirty_facts => "; " ^ string_of_int (length dirty_facts) ^ " dirty fact(s)" 

57005
33f3d2ea803d
store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents:
56995
diff
changeset

715 
 _ => "") ^ ")"); 
57378
fe96689f393b
recompute learning data at learning time, not query time
blanchet
parents:
57377
diff
changeset

716 
(Time.now (), 
fe96689f393b
recompute learning data at learning time, not query time
blanchet
parents:
57377
diff
changeset

717 
{access_G = access_G, xtabs = xtabs, ffds = ffds, freqs = freqs, dirty_facts = SOME []}) 
50311  718 
end 
719 

57365  720 
val global_state = Synchronized.var "Sledgehammer_MaSh.global_state" (Time.zeroTime, empty_state) 
50311  721 

722 
in 

723 

57431
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
blanchet
parents:
57409
diff
changeset

724 
fun map_state ctxt f = 
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
blanchet
parents:
57409
diff
changeset

725 
Synchronized.change global_state (load_state ctxt ##> f #> save_state ctxt) 
53095  726 
handle FILE_VERSION_TOO_NEW () => () 
50311  727 

57431
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
blanchet
parents:
57409
diff
changeset

728 
fun peek_state ctxt = 
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
blanchet
parents:
57409
diff
changeset

729 
Synchronized.change_result global_state (perhaps (try (load_state ctxt)) #> `snd) 
50311  730 

57431
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
blanchet
parents:
57409
diff
changeset

731 
fun clear_state () = 
57007  732 
Synchronized.change global_state (fn _ => 
57431
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
blanchet
parents:
57409
diff
changeset

733 
(wipe_out_mash_state_dir (); (Time.zeroTime, empty_state))) 
50311  734 

735 
end 

736 

737 

738 
(*** Isabelle helpers ***) 

739 

50722  740 
val local_prefix = "local" ^ Long_Name.separator 
48378  741 

50722  742 
fun elided_backquote_thm threshold th = 
57006  743 
elide_string threshold (backquote_thm (Proof_Context.init_global (Thm.theory_of_thm th)) th) 
48378  744 

51181
d0fa18638478
implement (more) accurate computation of parents
blanchet
parents:
51180
diff
changeset

745 
val thy_name_of_thm = Context.theory_name o Thm.theory_of_thm 
d0fa18638478
implement (more) accurate computation of parents
blanchet
parents:
51180
diff
changeset

746 

50624
4d0997abce79
improved thm order hack, in case the default names are overridden
blanchet
parents:
50623
diff
changeset

747 
fun nickname_of_thm th = 
48394
82fc8c956cdc
fixed various issues with MaSh's file handling + tune output + generate local facts again + handle nameless facts gracefully
blanchet
parents:
48392
diff
changeset

748 
if Thm.has_name_hint th then 
82fc8c956cdc
fixed various issues with MaSh's file handling + tune output + generate local facts again + handle nameless facts gracefully
blanchet
parents:
48392
diff
changeset

749 
let val hint = Thm.get_name_hint th in 
50722  750 
(* There must be a better way to detect local facts. *) 
55286  751 
(case try (unprefix local_prefix) hint of 
48394
82fc8c956cdc
fixed various issues with MaSh's file handling + tune output + generate local facts again + handle nameless facts gracefully
blanchet
parents:
48392
diff
changeset

752 
SOME suf => 
55286  753 
thy_name_of_thm th ^ Long_Name.separator ^ suf ^ Long_Name.separator ^ 
754 
elided_backquote_thm 50 th 

755 
 NONE => hint) 

48394
82fc8c956cdc
fixed various issues with MaSh's file handling + tune output + generate local facts again + handle nameless facts gracefully
blanchet
parents:
48392
diff
changeset

756 
end 
82fc8c956cdc
fixed various issues with MaSh's file handling + tune output + generate local facts again + handle nameless facts gracefully
blanchet
parents:
48392
diff
changeset

757 
else 
50722  758 
elided_backquote_thm 200 th 
48378  759 

51134  760 
fun find_suggested_facts ctxt facts = 
48330  761 
let 
51134  762 
fun add (fact as (_, th)) = Symtab.default (nickname_of_thm th, fact) 
763 
val tab = fold add facts Symtab.empty 

764 
fun lookup nick = 

765 
Symtab.lookup tab nick 

57006  766 
> tap (fn NONE => trace_msg ctxt (fn () => "Cannot find " ^ quote nick)  _ => ()) 
51134  767 
in map_filter lookup end 
48311  768 

57404
a68ae60c1504
got rid of hardcoded weights, now that we have TFIDF
blanchet
parents:
57403
diff
changeset

769 
fun free_feature_of s = "f" ^ s 
a68ae60c1504
got rid of hardcoded weights, now that we have TFIDF
blanchet
parents:
57403
diff
changeset

770 
fun thy_feature_of s = "y" ^ s 
a68ae60c1504
got rid of hardcoded weights, now that we have TFIDF
blanchet
parents:
57403
diff
changeset

771 
fun type_feature_of s = "t" ^ s 
a68ae60c1504
got rid of hardcoded weights, now that we have TFIDF
blanchet
parents:
57403
diff
changeset

772 
fun class_feature_of s = "s" ^ s 
a68ae60c1504
got rid of hardcoded weights, now that we have TFIDF
blanchet
parents:
57403
diff
changeset

773 
val local_feature = "local" 
48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

774 

51135  775 
fun crude_theory_ord p = 
50722  776 
if Theory.subthy p then 
777 
if Theory.eq_thy p then EQUAL else LESS 

48324  778 
else if Theory.subthy (swap p) then 
779 
GREATER 

55286  780 
else 
781 
(case int_ord (pairself (length o Theory.ancestors_of) p) of 

782 
EQUAL => string_ord (pairself Context.theory_name p) 

783 
 order => order) 

48324  784 

51135  785 
fun crude_thm_ord p = 
55286  786 
(case crude_theory_ord (pairself theory_of_thm p) of 
50359
da395f0e7dea
tweaked order of theorems to avoid forward dependencies (MaSh)
blanchet
parents:
50357
diff
changeset

787 
EQUAL => 
57039  788 
(* The hack below is necessary because of odd dependencies that are not reflected in the theory 
789 
comparison. *) 

50624
4d0997abce79
improved thm order hack, in case the default names are overridden
blanchet
parents:
50623
diff
changeset

790 
let val q = pairself nickname_of_thm p in 
4d0997abce79
improved thm order hack, in case the default names are overridden
blanchet
parents:
50623
diff
changeset

791 
(* Hack to put "xxx_def" before "xxxI" and "xxxE" *) 
55286  792 
(case bool_ord (pairself (String.isSuffix "_def") (swap q)) of 
50624
4d0997abce79
improved thm order hack, in case the default names are overridden
blanchet
parents:
50623
diff
changeset

793 
EQUAL => string_ord q 
55286  794 
 ord => ord) 
50624
4d0997abce79
improved thm order hack, in case the default names are overridden
blanchet
parents:
50623
diff
changeset

795 
end 
55286  796 
 ord => ord) 
48324  797 

51136  798 
val thm_less_eq = Theory.subthy o pairself theory_of_thm 
799 
fun thm_less p = thm_less_eq p andalso not (thm_less_eq (swap p)) 

800 

48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

801 
val freezeT = Type.legacy_freeze_type 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

802 

ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

803 
fun freeze (t $ u) = freeze t $ freeze u 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

804 
 freeze (Abs (s, T, t)) = Abs (s, freezeT T, freeze t) 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

805 
 freeze (Var ((s, _), T)) = Free (s, freezeT T) 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

806 
 freeze (Const (s, T)) = Const (s, freezeT T) 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

807 
 freeze (Free (s, T)) = Free (s, freezeT T) 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

808 
 freeze t = t 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

809 

ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

810 
fun goal_of_thm thy = prop_of #> freeze #> cterm_of thy #> Goal.init 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

811 

54141
f57f8e7a879f
generate a comment storing the goal nickname in "learn_prover"
blanchet
parents:
54140
diff
changeset

812 
fun run_prover_for_mash ctxt params prover goal_name facts goal = 
48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

813 
let 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

814 
val problem = 
54141
f57f8e7a879f
generate a comment storing the goal nickname in "learn_prover"
blanchet
parents:
54140
diff
changeset

815 
{comment = "Goal: " ^ goal_name, state = Proof.init ctxt, goal = goal, subgoal = 1, 
f57f8e7a879f
generate a comment storing the goal nickname in "learn_prover"
blanchet
parents:
54140
diff
changeset

816 
subgoal_count = 1, factss = [("", facts)]} 
48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

817 
in 
54503  818 
get_minimizing_prover ctxt MaSh (K ()) prover params (K (K (K ""))) problem 
48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

819 
end 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

820 

48326  821 
val bad_types = [@{type_name prop}, @{type_name bool}, @{type_name fun}] 
822 

53086
15fe0ca088b3
MaSh tweaking: shorter names + killed (broken) SNoW
blanchet
parents:
53085
diff
changeset

823 
val pat_tvar_prefix = "_" 
15fe0ca088b3
MaSh tweaking: shorter names + killed (broken) SNoW
blanchet
parents:
53085
diff
changeset

824 
val pat_var_prefix = "_" 
15fe0ca088b3
MaSh tweaking: shorter names + killed (broken) SNoW
blanchet
parents:
53085
diff
changeset

825 

53089  826 
(* try "Long_Name.base_name" for shorter names *) 
57055
df3a26987a8d
reverted '' features in MaSh  these sounded like a good idea but never really worked
blanchet
parents:
57052
diff
changeset

827 
fun massage_long_name s = s 
53083
019ecbb18e3f
generate patterns for variables as well in MaSh (cf. HOL(y)Hammer)
blanchet
parents:
53082
diff
changeset

828 

57006  829 
val crude_str_of_sort = space_implode ":" o map massage_long_name o subtract (op =) @{sort type} 
53086
15fe0ca088b3
MaSh tweaking: shorter names + killed (broken) SNoW
blanchet
parents:
53085
diff
changeset

830 

15fe0ca088b3
MaSh tweaking: shorter names + killed (broken) SNoW
blanchet
parents:
53085
diff
changeset

831 
fun crude_str_of_typ (Type (s, [])) = massage_long_name s 
57006  832 
 crude_str_of_typ (Type (s, Ts)) = massage_long_name s ^ implode (map crude_str_of_typ Ts) 
53083
019ecbb18e3f
generate patterns for variables as well in MaSh (cf. HOL(y)Hammer)
blanchet
parents:
53082
diff
changeset

833 
 crude_str_of_typ (TFree (_, S)) = crude_str_of_sort S 
019ecbb18e3f
generate patterns for variables as well in MaSh (cf. HOL(y)Hammer)
blanchet
parents:
53082
diff
changeset

834 
 crude_str_of_typ (TVar (_, S)) = crude_str_of_sort S 
019ecbb18e3f
generate patterns for variables as well in MaSh (cf. HOL(y)Hammer)
blanchet
parents:
53082
diff
changeset

835 

53128  836 
fun maybe_singleton_str _ "" = [] 
837 
 maybe_singleton_str pref s = [pref ^ s] 

838 

53148
c898409d8630
fixed subtle bug with "take" + thread overlord through
blanchet
parents:
53142
diff
changeset

839 
val max_pat_breadth = 10 (* FUDGE *) 
50585  840 

57406  841 
fun term_features_of ctxt thy_name term_max_depth type_max_depth ts = 
48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

842 
let 
50392
190053ee24ed
expand type classes into their ancestors for MaSh
blanchet
parents:
50391
diff
changeset

843 
val thy = Proof_Context.theory_of ctxt 
53082  844 

50393  845 
val fixes = map snd (Variable.dest_fixes ctxt) 
50392
190053ee24ed
expand type classes into their ancestors for MaSh
blanchet
parents:
50391
diff
changeset

846 
val classes = Sign.classes_of thy 
53082  847 

48304  848 
fun add_classes @{sort type} = I 
50392
190053ee24ed
expand type classes into their ancestors for MaSh
blanchet
parents:
50391
diff
changeset

849 
 add_classes S = 
190053ee24ed
expand type classes into their ancestors for MaSh
blanchet
parents:
50391
diff
changeset

850 
fold (`(Sorts.super_classes classes) 
57006  851 
#> swap #> op :: 
852 
#> subtract (op =) @{sort type} #> map massage_long_name 

853 
#> map class_feature_of 

57404
a68ae60c1504
got rid of hardcoded weights, now that we have TFIDF
blanchet
parents:
57403
diff
changeset

854 
#> union (op =)) S 
53082  855 

856 
fun pattify_type 0 _ = [] 

857 
 pattify_type _ (Type (s, [])) = 

53086
15fe0ca088b3
MaSh tweaking: shorter names + killed (broken) SNoW
blanchet
parents:
53085
diff
changeset

858 
if member (op =) bad_types s then [] else [massage_long_name s] 
53082  859 
 pattify_type depth (Type (s, U :: Ts)) = 
860 
let 

861 
val T = Type (s, Ts) 

57055
df3a26987a8d
reverted '' features in MaSh  these sounded like a good idea but never really worked
blanchet
parents:
57052
diff
changeset

862 
val ps = take max_pat_breadth (pattify_type depth T) 
df3a26987a8d
reverted '' features in MaSh  these sounded like a good idea but never really worked
blanchet
parents:
57052
diff
changeset

863 
val qs = take max_pat_breadth ("" :: pattify_type (depth  1) U) 
57006  864 
in 
865 
map_product (fn p => fn "" => p  q => p ^ "(" ^ q ^ ")") ps qs 

866 
end 

53128  867 
 pattify_type _ (TFree (_, S)) = 
868 
maybe_singleton_str pat_tvar_prefix (crude_str_of_sort S) 

869 
 pattify_type _ (TVar (_, S)) = 

870 
maybe_singleton_str pat_tvar_prefix (crude_str_of_sort S) 

57006  871 

53082  872 
fun add_type_pat depth T = 
57404
a68ae60c1504
got rid of hardcoded weights, now that we have TFIDF
blanchet
parents:
57403
diff
changeset

873 
union (op =) (map type_feature_of (pattify_type depth T)) 
57006  874 

53082  875 
fun add_type_pats 0 _ = I 
57404
a68ae60c1504
got rid of hardcoded weights, now that we have TFIDF
blanchet
parents:
57403
diff
changeset

876 
 add_type_pats depth t = add_type_pat depth t #> add_type_pats (depth  1) t 
57006  877 

53083
019ecbb18e3f
generate patterns for variables as well in MaSh (cf. HOL(y)Hammer)
blanchet
parents:
53082
diff
changeset

878 
fun add_type T = 
019ecbb18e3f
generate patterns for variables as well in MaSh (cf. HOL(y)Hammer)
blanchet
parents:
53082
diff
changeset

879 
add_type_pats type_max_depth T 
53156  880 
#> fold_atyps_sorts (add_classes o snd) T 
57006  881 

53084  882 
fun add_subtypes (T as Type (_, Ts)) = add_type T #> fold add_subtypes Ts 
883 
 add_subtypes T = add_type T 

53082  884 

57055
df3a26987a8d
reverted '' features in MaSh  these sounded like a good idea but never really worked
blanchet
parents:
57052
diff
changeset

885 
fun pattify_term _ 0 _ = [] 
df3a26987a8d
reverted '' features in MaSh  these sounded like a good idea but never really worked
blanchet
parents:
57052
diff
changeset

886 
 pattify_term _ _ (Const (s, _)) = 
57404
a68ae60c1504
got rid of hardcoded weights, now that we have TFIDF
blanchet
parents:
57403
diff
changeset

887 
if is_widely_irrelevant_const s then [] else [massage_long_name s] 
54089
b13f6731f873
use same relevance filter for ATP and SMT solvers  attempting to filter out certain ground instances of polymorphic symbols like + and 0 has unexpected sideeffects that lead to incompletenesses (relevant facts not being selected)
blanchet
parents:
54085
diff
changeset

888 
 pattify_term _ _ (Free (s, T)) = 
53128  889 
maybe_singleton_str pat_var_prefix (crude_str_of_typ T) 
53090
1426c97311f2
treat frees as both consts and vars, for more hits
blanchet
parents:
53089
diff
changeset

890 
> (if member (op =) fixes s then 
57404
a68ae60c1504
got rid of hardcoded weights, now that we have TFIDF
blanchet
parents:
57403
diff
changeset

891 
cons (free_feature_of (massage_long_name (thy_name ^ Long_Name.separator ^ s))) 
53090
1426c97311f2
treat frees as both consts and vars, for more hits
blanchet
parents:
53089
diff
changeset

892 
else 
1426c97311f2
treat frees as both consts and vars, for more hits
blanchet
parents:
53089
diff
changeset

893 
I) 
57404
a68ae60c1504
got rid of hardcoded weights, now that we have TFIDF
blanchet
parents:
57403
diff
changeset

894 
 pattify_term _ _ (Var (_, T)) = maybe_singleton_str pat_var_prefix (crude_str_of_typ T) 
54089
b13f6731f873
use same relevance filter for ATP and SMT solvers  attempting to filter out certain ground instances of polymorphic symbols like + and 0 has unexpected sideeffects that lead to incompletenesses (relevant facts not being selected)
blanchet
parents:
54085
diff
changeset

895 
 pattify_term Ts _ (Bound j) = 
54695  896 
maybe_singleton_str pat_var_prefix (crude_str_of_typ (nth Ts j)) 
54089
b13f6731f873
use same relevance filter for ATP and SMT solvers  attempting to filter out certain ground instances of polymorphic symbols like + and 0 has unexpected sideeffects that lead to incompletenesses (relevant facts not being selected)
blanchet
parents:
54085
diff
changeset

897 
 pattify_term Ts depth (t $ u) = 
50339
d8dae91f3107
MaSh improvements: deeper patterns + more respect for chained facts
blanchet
parents:
50338
diff
changeset

898 
let 
57055
df3a26987a8d
reverted '' features in MaSh  these sounded like a good idea but never really worked
blanchet
parents:
57052
diff
changeset

899 
val ps = take max_pat_breadth (pattify_term Ts depth t) 
57404
a68ae60c1504
got rid of hardcoded weights, now that we have TFIDF
blanchet
parents:
57403
diff
changeset

900 
val qs = take max_pat_breadth ("" :: pattify_term Ts (depth  1) u) 
53130  901 
in 
57404
a68ae60c1504
got rid of hardcoded weights, now that we have TFIDF
blanchet
parents:
57403
diff
changeset

902 
map_product (fn p => fn "" => p  q => p ^ "(" ^ q ^ ")") ps qs 
53130  903 
end 
54089
b13f6731f873
use same relevance filter for ATP and SMT solvers  attempting to filter out certain ground instances of polymorphic symbols like + and 0 has unexpected sideeffects that lead to incompletenesses (relevant facts not being selected)
blanchet
parents:
54085
diff
changeset

904 
 pattify_term _ _ _ = [] 
57006  905 

57404
a68ae60c1504
got rid of hardcoded weights, now that we have TFIDF
blanchet
parents:
57403
diff
changeset

906 
fun add_term_pat Ts = union (op =) oo pattify_term Ts 
57006  907 

53130  908 
fun add_term_pats _ 0 _ = I 
57055
df3a26987a8d
reverted '' features in MaSh  these sounded like a good idea but never really worked
blanchet
parents:
57052
diff
changeset

909 
 add_term_pats Ts depth t = add_term_pat Ts depth t #> add_term_pats Ts (depth  1) t 
57006  910 

53130  911 
fun add_term Ts = add_term_pats Ts term_max_depth 
57006  912 

53085  913 
fun add_subterms Ts t = 
55286  914 
(case strip_comb t of 
54089
b13f6731f873
use same relevance filter for ATP and SMT solvers  attempting to filter out certain ground instances of polymorphic symbols like + and 0 has unexpected sideeffects that lead to incompletenesses (relevant facts not being selected)
blanchet
parents:
54085
diff
changeset

915 
(Const (s, T), args) => 
b13f6731f873
use same relevance filter for ATP and SMT solvers  attempting to filter out certain ground instances of polymorphic symbols like + and 0 has unexpected sideeffects that lead to incompletenesses (relevant facts not being selected)
blanchet
parents:
54085
diff
changeset

916 
(not (is_widely_irrelevant_const s) ? add_term Ts t) 
57404
a68ae60c1504
got rid of hardcoded weights, now that we have TFIDF
blanchet
parents:
57403
diff
changeset

917 
#> add_subtypes T #> fold (add_subterms Ts) args 
50857
80768e28c9ee
better handlig of builtins  at the toplevel, not in subterms
blanchet
parents:
50841
diff
changeset

918 
 (head, args) => 
48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

919 
(case head of 
53130  920 
Free (_, T) => add_term Ts t #> add_subtypes T 
53084  921 
 Var (_, T) => add_subtypes T 
53085  922 
 Abs (_, T, body) => add_subtypes T #> add_subterms (T :: Ts) body 
48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

923 
 _ => I) 
55286  924 
#> fold (add_subterms Ts) args) 
57006  925 
in 
926 
fold (add_subterms []) ts [] 

927 
end 

48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

928 

53085  929 
val term_max_depth = 2 
53155  930 
val type_max_depth = 1 
48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

931 

6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

932 
(* TODO: Generate type classes for types? *) 
57406  933 
fun features_of ctxt thy (scope, _) ts = 
50393  934 
let val thy_name = Context.theory_name thy in 
935 
thy_feature_of thy_name :: 

57406  936 
term_features_of ctxt thy_name term_max_depth type_max_depth ts 
50393  937 
> scope <> Global ? cons local_feature 
938 
end 

48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

939 

57006  940 
(* Too many dependencies is a sign that a decision procedure is at work. There is not much to learn 
941 
from such proofs. *) 

50434
960a3429615c
more MaSh tweaking  in particular, export the same facts in "MaSh_Export" as are later tried in "MaSh_Eval"
blanchet
parents:
50412
diff
changeset

942 
val max_dependencies = 20 
50484
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

943 

54125
420b876ff1e2
if slicing is disabled, don't enforce last slice's "max_facts", but rather the maximum "max_facts"
blanchet
parents:
54123
diff
changeset

944 
val prover_default_max_facts = 25 
48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

945 

48438
3e45c98fe127
distinguish between recursive and nonrecursive definitions + clean up typedef dependencies in MaSh
blanchet
parents:
48436
diff
changeset

946 
(* "type_definition_xxx" facts are characterized by their use of "CollectI". *) 
50755  947 
val typedef_dep = nickname_of_thm @{thm CollectI} 
57006  948 
(* Mysterious parts of the class machinery create lots of proofs that refer exclusively to 
949 
"someI_ex" (and to some internal constructions). *) 

50755  950 
val class_some_dep = nickname_of_thm @{thm someI_ex} 
48438
3e45c98fe127
distinguish between recursive and nonrecursive definitions + clean up typedef dependencies in MaSh
blanchet
parents:
48436
diff
changeset

951 

50828  952 
val fundef_ths = 
57006  953 
@{thms fundef_ex1_existence fundef_ex1_uniqueness fundef_ex1_iff fundef_default_value} 
50828  954 
> map nickname_of_thm 
955 

48438
3e45c98fe127
distinguish between recursive and nonrecursive definitions + clean up typedef dependencies in MaSh
blanchet
parents:
48436
diff
changeset

956 
(* "Rep_xxx_inject", "Abs_xxx_inverse", etc., are derived using these facts. *) 
3e45c98fe127
distinguish between recursive and nonrecursive definitions + clean up typedef dependencies in MaSh
blanchet
parents:
48436
diff
changeset

957 
val typedef_ths = 
57006  958 
@{thms type_definition.Abs_inverse type_definition.Rep_inverse type_definition.Rep 
959 
type_definition.Rep_inject type_definition.Abs_inject type_definition.Rep_cases 

960 
type_definition.Abs_cases type_definition.Rep_induct type_definition.Abs_induct 

961 
type_definition.Rep_range type_definition.Abs_image} 

50624
4d0997abce79
improved thm order hack, in case the default names are overridden
blanchet
parents:
50623
diff
changeset

962 
> map nickname_of_thm 
48438
3e45c98fe127
distinguish between recursive and nonrecursive definitions + clean up typedef dependencies in MaSh
blanchet
parents:
48436
diff
changeset

963 

48441  964 
fun is_size_def [dep] th = 
55642
63beb38e9258
adapted to renaming of datatype 'cases' and 'recs' to 'case' and 'rec'
blanchet
parents:
55286
diff
changeset

965 
(case first_field ".rec" dep of 
57006  966 
SOME (pref, _) => 
967 
(case first_field ".size" (nickname_of_thm th) of 

968 
SOME (pref', _) => pref = pref' 

969 
 NONE => false) 

970 
 NONE => false) 

48441  971 
 is_size_def _ _ = false 
972 

51177  973 
fun trim_dependencies deps = 
50755  974 
if length deps > max_dependencies then NONE else SOME deps 
48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

975 

50755  976 
fun isar_dependencies_of name_tabs th = 
57306
ff10067b2248
optimized traversal of proof terms by skipping bad apples (e.g. full_exhaustive_int'.pinduct)
blanchet
parents:
57305
diff
changeset

977 
thms_in_proof max_dependencies (SOME name_tabs) th 
ff10067b2248
optimized traversal of proof terms by skipping bad apples (e.g. full_exhaustive_int'.pinduct)
blanchet
parents:
57305
diff
changeset

978 
> Option.map (fn deps => 
57006  979 
if deps = [typedef_dep] orelse deps = [class_some_dep] orelse 
980 
exists (member (op =) fundef_ths) deps orelse exists (member (op =) typedef_ths) deps orelse 

50828  981 
is_size_def deps th then 
50755  982 
[] 
983 
else 

57306
ff10067b2248
optimized traversal of proof terms by skipping bad apples (e.g. full_exhaustive_int'.pinduct)
blanchet
parents:
57305
diff
changeset

984 
deps) 
48404  985 

57006  986 
fun prover_dependencies_of ctxt (params as {verbose, max_facts, ...}) prover auto_level facts 
987 
name_tabs th = 

55286  988 
(case isar_dependencies_of name_tabs th of 
57306
ff10067b2248
optimized traversal of proof terms by skipping bad apples (e.g. full_exhaustive_int'.pinduct)
blanchet
parents:
57305
diff
changeset

989 
SOME [] => (false, []) 
ff10067b2248
optimized traversal of proof terms by skipping bad apples (e.g. full_exhaustive_int'.pinduct)
blanchet
parents:
57305
diff
changeset

990 
 isar_deps0 => 
48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

991 
let 
57306
ff10067b2248
optimized traversal of proof terms by skipping bad apples (e.g. full_exhaustive_int'.pinduct)
blanchet
parents:
57305
diff
changeset

992 
val isar_deps = these isar_deps0 
48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

993 
val thy = Proof_Context.theory_of ctxt 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

994 
val goal = goal_of_thm thy th 
54141
f57f8e7a879f
generate a comment storing the goal nickname in "learn_prover"
blanchet
parents:
54140
diff
changeset

995 
val name = nickname_of_thm th 
52196
2281f33e8da6
redid rac7830871177 to avoid duplicate fixed variable (e.g. lemma "P (a::nat)" proof  have "!!a::int. Q a" sledgehammer [e])
blanchet
parents:
52125
diff
changeset

996 
val (_, hyp_ts, concl_t) = ATP_Util.strip_subgoal goal 1 ctxt 
51136  997 
val facts = facts > filter (fn (_, th') => thm_less (th', th)) 
57006  998 

51004
5f2788c38127
distinguish raw and nonraw facts, using raw for 10 000s of facts and nonraw after selection of some hundreds
blanchet
parents:
51003
diff
changeset

999 
fun nickify ((_, stature), th) = ((nickname_of_thm th, stature), th) 
57006  1000 

50624
4d0997abce79
improved thm order hack, in case the default names are overridden
blanchet
parents:
50623
diff
changeset

1001 
fun is_dep dep (_, th) = nickname_of_thm th = dep 
57006  1002 

48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

1003 
fun add_isar_dep facts dep accum = 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

1004 
if exists (is_dep dep) accum then 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

1005 
accum 
55286  1006 
else 
1007 
(case find_first (is_dep dep) facts of 

1008 
SOME ((_, status), th) => accum @ [(("", status), th)] 

57005
33f3d2ea803d
store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents:
56995
diff
changeset

1009 
 NONE => accum (* should not happen *)) 
57006  1010 

54123
271a8377656f
remove overloading of "max_facts"  it already controls the number of facts passed to ATPs for 'learn_prover'
blanchet
parents:
54115
diff
changeset

1011 
val mepo_facts = 
50484
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents:
50450
diff
changeset

1012 
facts 
54095  1013 
> mepo_suggested_facts ctxt params (max_facts > the_default prover_default_max_facts) NONE 
1014 
hyp_ts concl_t 

54123
271a8377656f
remove overloading of "max_facts"  it already controls the number of facts passed to ATPs for 'learn_prover'
blanchet
parents:
54115
diff
changeset

1015 
val facts = 
271a8377656f
remove overloading of "max_facts"  it already controls the number of facts passed to ATPs for 'learn_prover'
blanchet
parents:
54115
diff
changeset

1016 
mepo_facts 
50754
74a6adcb96ac
also generate queries for goals with too many Isar dependencies
blanchet
parents:
50751
diff
changeset

1017 
> fold (add_isar_dep facts) isar_deps 
50624
4d0997abce79
improved thm order hack, in case the default names are overridden
blanchet
parents:
50623
diff
changeset

1018 
> map nickify 
54123
271a8377656f
remove overloading of "max_facts"  it already controls the number of facts passed to ATPs for 'learn_prover'
blanchet
parents:
54115
diff
changeset

1019 
val num_isar_deps = length isar_deps 
48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

1020 
in 
48404  1021 
if verbose andalso auto_level = 0 then 
57017  1022 
Output.urgent_message ("MaSh: " ^ quote prover ^ " on " ^ quote name ^ " with " ^ 
1023 
string_of_int num_isar_deps ^ " + " ^ string_of_int (length facts  num_isar_deps) ^ 

1024 
" facts.") 

48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

1025 
else 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

1026 
(); 
55286  1027 
(case run_prover_for_mash ctxt params prover name facts goal of 
48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

1028 
{outcome = NONE, used_facts, ...} => 
48404  1029 
(if verbose andalso auto_level = 0 then 
48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

1030 
let val num_facts = length used_facts in 
57017  1031 
Output.urgent_message ("Found proof with " ^ string_of_int num_facts ^ " fact" ^ 
1032 
plural_s num_facts ^ ".") 

48392
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

1033 
end 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

1034 
else 
ca998fa08cd9
added "learn_from_atp" command to MaSh, for patient users
blanchet
parents:
48390
diff
changeset

1035 
(); 
50754
74a6adcb96ac
also generate queries for goals with too many Isar dependencies
blanchet
parents:
50751
diff
changeset

1036 
(true, map fst used_facts)) 
55286  1037 
 _ => (false, isar_deps)) 
1038 
end) 

48251
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

1039 

6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

1040 

6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

1041 
(*** Highlevel communication with MaSh ***) 
6cdcfbddc077
moved most of MaSh exporter code to Sledgehammer
blanchet
parents:
48249
diff
changeset

1042 

51182
962190eab40d
provide two modes for MaSh driver: linearized or real visibility
blanchet
parents:
51181
diff
changeset

1043 
(* In the following functions, chunks are risers w.r.t. "thm_less_eq". *) 
962190eab40d
provide two modes for MaSh driver: linearized or real visibility
blanchet
parents:
51181
diff
changeset

1044 

51181
d0fa18638478
implement (more) accurate computation of parents
blanchet
parents:
51180
diff
changeset

1045 
fun chunks_and_parents_for chunks th = 
d0fa18638478
implement (more) accurate computation of parents
blanchet
parents:
51180
diff
changeset

1046 
let 
d0fa18638478
implement (more) accurate computation of parents
blanchet
parents:
51180
diff
changeset

1047 
fun insert_parent new parents = 
d0fa18638478
implement (more) accurate computation of parents
blanchet
parents:
51180
diff
changeset

1048 
let val parents = parents > filter_out (fn p => thm_less_eq (p, new)) in 
57096
e4074b91b2a6
always remove duplicates in meshing + use weights for Naive Bayes
blanchet
parents:
57095
diff
changeset

1049 
parents > forall (fn p => not (thm_less_eq (new, p))) parents ? cons new 
51181
d0fa18638478
implement (more) accurate computation of parents
blanchet
parents:
51180
diff
changeset

1050 
end 
57006  1051 

51181
d0fa18638478
implement (more) accurate computation of parents
blanchet
parents:
51180
diff
changeset

1052 
fun rechunk seen (rest as th' :: ths) = 
d0fa18638478
implement (more) accurate computation of parents
blanchet
parents:
51180
diff
changeset

1053 
if thm_less_eq (th', th) then (rev seen, rest) 
d0fa18638478
implement (more) accurate computation of parents
blanchet
parents:
51180
diff
changeset

1054 
else rechunk (th' :: seen) ths 
57006  1055 

51181
d0fa18638478
implement (more) accurate computation of parents
blanchet
parents:
51180
diff
changeset

1056 
fun do_chunk [] accum = accum 
d0fa18638478
implement (more) accurate computation of parents
blanchet
parents:
51180
diff
changeset

1057 
 do_chunk (chunk as hd_chunk :: _) (chunks, parents) = 
d0fa18638478
implement (more) accurate computation of parents
blanchet
parents:
51180
diff
changeset

1058 
if thm_less_eq (hd_chunk, th) then 
d0fa18638478
implement (more) accurate computation of parents
blanchet
parents:
51180
diff
changeset

1059 
(chunk :: chunks, insert_parent hd_chunk parents) 
d0fa18638478
implement (more) accurate computation of parents
blanchet
parents:
51180
diff
changeset

1060 
else if thm_less_eq (List.last chunk, th) then 
d0fa18638478
implement (more) accurate computation of parents
blanchet
parents:
51180
diff
changeset

1061 
let val (front, back as hd_back :: _) = rechunk [] chunk in 
d0fa18638478
implement (more) accurate computation of parents
blanchet
parents:
51180
diff
changeset

1062 
(front :: back :: chunks, insert_parent hd_back parents) 
d0fa18638478
implement (more) accurate computation of parents
blanchet
parents:
51180
diff
changeset

1063 
end 
d0fa18638478
implement (more) accurate computation of parents
blanchet
parents:
51180
diff
changeset

1064 
else 
d0fa18638478
implement (more) accurate computation of parents
blanchet
parents:
51180
diff
changeset

1065 
(chunk :: chunks, parents) 
d0fa18638478
implement (more) accurate computation of parents
blanchet
parents:
51180
diff
changeset

1066 
in 
d0fa18638478
implement (more) accurate computation of parents
blanchet
parents:
51180
diff
changeset

1067 
fold_rev do_chunk chunks ([], []) 
d0fa18638478
implement (more) accurate computation of parents
blanchet
parents:
51180
diff
changeset

1068 
>> cons [] 
51182
962190eab40d
provide two modes for MaSh driver: linearized or real visibility
blanchet
parents:
51181
diff
changeset

1069 
> map nickname_of_thm 
51181
d0fa18638478
implement (more) accurate computation of parents
blanchet
parents:
51180
diff
changeset

1070 
end 
d0fa18638478
implement (more) accurate computation of parents
blanchet
parents:
51180
diff
changeset

1071 

51182
962190eab40d
provide two modes for MaSh driver: linearized or real visibility
blanchet
parents:
51181
diff
changeset

1072 
fun attach_parents_to_facts _ [] = [] 
962190eab40d
provide two modes for MaSh driver: linearized or real visibility
blanchet
parents:
51181
diff
changeset

1073 
 attach_parents_to_facts old_facts (facts as (_, th) :: _) = 
962190eab40d
provide two modes for MaSh driver: linearized or real visibility
blanchet
parents:
51181
diff
changeset

1074 
let 
962190eab40d
provide two modes for MaSh driver: linearized or real visibility
blanchet
parents:
51181
diff
changeset

1075 
fun do_facts _ [] = [] 
962190eab40d
provide two modes for MaSh driver: linearized or real visibility
blanchet
parents:
51181
diff
changeset

1076 
 do_facts (_, parents) [fact] = [(parents, fact)] 
962190eab40d
provide two modes for MaSh driver: linearized or real visibility
blanchet
parents:
51181
diff
changeset

1077 
 do_facts (chunks, parents) 
962190eab40d
provide two modes for MaSh driver: linearized or real visibility
blanchet
parents:
51181
diff
changeset

1078 
((fact as (_, th)) :: (facts as (_, th') :: _)) = 
962190eab40d
provide two modes for MaSh driver: linearized or real visibility
blanchet
parents:
51181
diff
changeset

1079 
let 
962190eab40d
provide two modes for MaSh driver: linearized or real visibility
blanchet
parents:
51181
diff
changeset

1080 
val chunks = app_hd (cons th) chunks 
962190eab40d
provide two modes for MaSh driver: linearized or real visibility
blanchet
parents:
51181
diff
changeset

1081 
val chunks_and_parents' = 
57370  1082 
if thm_less_eq (th, th') andalso thy_name_of_thm th = thy_name_of_thm th' then 
51182
962190eab40d
provide two modes for MaSh driver: linearized or real visibility
blanchet
parents:
51181
diff
changeset

1083 
(chunks, [nickname_of_thm th]) 
962190eab40d
provide two modes for MaSh driver: linearized or real visibility
blanchet
parents:
51181
diff
changeset

1084 
else 
962190eab40d
provide two modes for MaSh driver: linearized or real visibility
blanchet
parents:
51181
diff
changeset

1085 
chunks_and_parents_for chunks th' 
57006  1086 
in 
1087 
(parents, fact) :: do_facts chunks_and_parents' facts 

1088 
end 

51182
962190eab40d
provide two modes for MaSh driver: linearized or real visibility
blanchet
parents:
51181
diff
changeset

1089 
in 
962190eab40d
provide two modes for MaSh driver: linearized or real visibility
blanchet
parents:
51181
diff
changeset

1090 
old_facts @ facts 
962190eab40d
provide two modes for MaSh driver: linearized or real visibility
blanchet
parents:
51181
diff
changeset

1091 
> do_facts (chunks_and_parents_for [[]] th) 
962190eab40d
provide two modes for MaSh driver: linearized or real visibility
blanchet
parents:
51181
diff
changeset

1092 
> drop (length old_facts) 
962190eab40d
provide two modes for MaSh driver: linearized or real visibility
blanchet
parents:
51181
diff
changeset

1093 
end 
51177  1094 

53095  1095 
fun maximal_wrt_graph G keys = 
1096 
let 

1097 
val tab = Symtab.empty > fold (fn name => Symtab.default (name, ())) keys 

57006  1098 

57012  1099 
fun insert_new seen name = not (Symtab.defined seen name) ? insert (op =) name 
57006  1100 

53095  1101 
fun num_keys keys = Graph.Keys.fold (K (Integer.add 1)) keys 0 
57006  1102 

53095  1103 
fun find_maxes _ (maxs, []) = map snd maxs 
1104 
 find_maxes seen (maxs, new :: news) = 

57012  1105 
find_maxes (seen > num_keys (Graph.imm_succs G new) > 1 ? Symtab.default (new, ())) 
1106 
(if Symtab.defined tab new then 

1107 
let 

1108 
val newp = Graph.all_preds G [new] 

1109 
fun is_ancestor x yp = member (op =) yp x 

1110 
val maxs = maxs > filter (fn (_, max) => not (is_ancestor max newp)) 

1111 
in 

1112 
if exists (is_ancestor new o fst) maxs then (maxs, news) 

1113 
else ((newp, new) :: filter_out (fn (_, max) => is_ancestor max newp) maxs, news) 

1114 
end 

1115 
else 

1116 
(maxs, Graph.Keys.fold (insert_new seen) (Graph.imm_preds G new) news)) 

57006  1117 
in 
1118 
find_maxes Symtab.empty ([], Graph.maximals G) 

1119 
end 

53095  1120 

57460  1121 
fun maximal_wrt_access_graph _ [] = [] 
1122 
 maximal_wrt_access_graph access_G ((fact as (_, th)) :: facts) = 

1123 
let val thy = theory_of_thm th in 

1124 
fact :: filter_out (fn (_, th') => Theory.subthy (theory_of_thm th', thy)) facts 

1125 
> map (nickname_of_thm o snd) 

1126 
> maximal_wrt_graph access_G 

1127 
end 

53095  1128 

1129 
fun is_fact_in_graph access_G = can (Graph.get_node access_G) o nickname_of_thm 

1130 

53197  1131 
val chained_feature_factor = 0.5 (* FUDGE *) 
57405
1f49da059947
reintroduced 'extra features' + only print message in verbose mode
blanchet
parents:
57404
diff
changeset

1132 
val extra_feature_factor = 0.1 (* FUDGE *) 
1f49da059947
reintroduced 'extra features' + only print message in verbose mode
blanchet
parents:
57404
diff
changeset

1133 
val num_extra_feature_facts = 10 (* FUDGE *) 
53095  1134 

57386  1135 
val max_proximity_facts = 100 
53095  1136 

54060  1137 
fun find_mash_suggestions ctxt max_facts suggs facts chained raw_unknown = 
1138 
let 

1139 
val inter_fact = inter (eq_snd Thm.eq_thm_prop) 

1140 
val raw_mash = find_suggested_facts ctxt facts suggs 

1141 
val proximate = take max_proximity_facts facts 

1142 
val unknown_chained = inter_fact raw_unknown chained 

1143 
val unknown_proximate = inter_fact raw_unknown proximate 

1144 
val mess = 

1145 
[(0.9 (* FUDGE *), (map (rpair 1.0) unknown_chained, [])), 

1146 
(0.4 (* FUDGE *), (weight_facts_smoothly unknown_proximate, [])), 

1147 
(0.1 (* FUDGE *), (weight_facts_steeply raw_mash, raw_unknown))] 

57007  1148 
val unknown = raw_unknown 
1149 
> fold (subtract (eq_snd Thm.eq_thm_prop)) [unknown_chained, unknown_proximate] 

57006  1150 
in 
57150
f591096a9c94
add option to keep duplicates, for more precise evaluation of relevance filters
blanchet
parents:
57133
diff
changeset

1151 
(mesh_facts (eq_snd (gen_eq_thm ctxt)) max_facts mess, unknown) 
57006 