| author | paulson | 
| Tue, 11 Jul 2023 20:22:08 +0100 | |
| changeset 78321 | 021fb1b01de5 | 
| parent 77889 | 5db014c36f42 | 
| child 80306 | c2537860ccf8 | 
| permissions | -rw-r--r-- | 
| 48234 | 1 | (* Title: HOL/TPTP/mash_export.ML | 
| 2 | Author: Jasmin Blanchette, TU Muenchen | |
| 3 | Copyright 2012 | |
| 4 | ||
| 5 | Export Isabelle theory information for MaSh (Machine-learning for Sledgehammer). | |
| 6 | *) | |
| 7 | ||
| 8 | signature MASH_EXPORT = | |
| 9 | sig | |
| 55201 | 10 | type params = Sledgehammer_Prover.params | 
| 48235 | 11 | |
| 57431 
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
 blanchet parents: 
57407diff
changeset | 12 | val in_range : int * int option -> int -> bool | 
| 
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
 blanchet parents: 
57407diff
changeset | 13 | val extract_suggestions : string -> string * (string * real) list | 
| 
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
 blanchet parents: 
57407diff
changeset | 14 | |
| 57121 | 15 | val generate_accessibility : Proof.context -> theory list -> string -> unit | 
| 55212 | 16 | val generate_features : Proof.context -> theory list -> string -> unit | 
| 57121 | 17 | val generate_isar_dependencies : Proof.context -> int * int option -> theory list -> string -> | 
| 18 | unit | |
| 19 | val generate_prover_dependencies : Proof.context -> params -> int * int option -> theory list -> | |
| 55212 | 20 | string -> unit | 
| 21 | val generate_isar_commands : Proof.context -> string -> (int * int option) * int -> theory list -> | |
| 57121 | 22 | int -> string -> unit | 
| 55212 | 23 | val generate_prover_commands : Proof.context -> params -> (int * int option) * int -> | 
| 57121 | 24 | theory list -> int -> string -> unit | 
| 55212 | 25 | val generate_mepo_suggestions : Proof.context -> params -> (int * int option) * int -> | 
| 57121 | 26 | theory list -> int -> string -> unit | 
| 57457 | 27 | val generate_mash_suggestions : string -> Proof.context -> params -> (int * int option) * int -> | 
| 57121 | 28 | theory list -> int -> string -> unit | 
| 50814 | 29 | val generate_mesh_suggestions : int -> string -> string -> string -> unit | 
| 48234 | 30 | end; | 
| 31 | ||
| 32 | structure MaSh_Export : MASH_EXPORT = | |
| 33 | struct | |
| 34 | ||
| 50485 
3c6ac2da2f45
merge aliased theorems in MaSh dependencies, modulo symmetry of equality
 blanchet parents: 
50484diff
changeset | 35 | open Sledgehammer_Fact | 
| 55212 | 36 | open Sledgehammer_Prover_ATP | 
| 48381 | 37 | open Sledgehammer_MePo | 
| 38 | open Sledgehammer_MaSh | |
| 48245 | 39 | |
| 50559 
89c0d2f13cca
MaSh exporter can now export subsets of the facts, as consecutive ranges
 blanchet parents: 
50523diff
changeset | 40 | fun in_range (from, to) j = | 
| 
89c0d2f13cca
MaSh exporter can now export subsets of the facts, as consecutive ranges
 blanchet parents: 
50523diff
changeset | 41 | j >= from andalso (to = NONE orelse j <= the to) | 
| 
89c0d2f13cca
MaSh exporter can now export subsets of the facts, as consecutive ranges
 blanchet parents: 
50523diff
changeset | 42 | |
| 57432 | 43 | fun encode_feature (names, weight) = | 
| 44 | encode_str names ^ (if Real.== (weight, 1.0) then "" else "=" ^ Real.toString weight) | |
| 45 | ||
| 46 | val encode_features = map encode_feature #> space_implode " " | |
| 47 | ||
| 57431 
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
 blanchet parents: 
57407diff
changeset | 48 | (* The suggested weights do not make much sense. *) | 
| 
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
 blanchet parents: 
57407diff
changeset | 49 | fun extract_suggestion sugg = | 
| 
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
 blanchet parents: 
57407diff
changeset | 50 | (case space_explode "=" sugg of | 
| 
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
 blanchet parents: 
57407diff
changeset | 51 | [name, weight] => SOME (decode_str name, Real.fromString weight |> the_default 1.0) | 
| 
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
 blanchet parents: 
57407diff
changeset | 52 | | [name] => SOME (decode_str name, 1.0) | 
| 
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
 blanchet parents: 
57407diff
changeset | 53 | | _ => NONE) | 
| 
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
 blanchet parents: 
57407diff
changeset | 54 | |
| 
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
 blanchet parents: 
57407diff
changeset | 55 | fun extract_suggestions line = | 
| 
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
 blanchet parents: 
57407diff
changeset | 56 | (case space_explode ":" line of | 
| 
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
 blanchet parents: 
57407diff
changeset | 57 | [goal, suggs] => (decode_str goal, map_filter extract_suggestion (space_explode " " suggs)) | 
| 
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
 blanchet parents: 
57407diff
changeset | 58 |   | _ => ("", []))
 | 
| 
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
 blanchet parents: 
57407diff
changeset | 59 | |
| 50349 | 60 | fun has_thm_thy th thy = | 
| 77889 
5db014c36f42
clarified signature: explicitly distinguish theory_base_name vs. theory_long_name;
 wenzelm parents: 
76092diff
changeset | 61 | Context.theory_base_name thy = Thm.theory_base_name th | 
| 48316 
252f45c04042
drastic overhaul of MaSh data structures + fixed a few performance issues
 blanchet parents: 
48315diff
changeset | 62 | |
| 50349 | 63 | fun has_thys thys th = exists (has_thm_thy th) thys | 
| 64 | ||
| 65 | fun all_facts ctxt = | |
| 48531 | 66 | let val css = Sledgehammer_Fact.clasimpset_rule_table_of ctxt in | 
| 73942 | 67 | Sledgehammer_Fact.all_facts ctxt true Keyword.empty_keywords [] [] css | 
| 60641 | 68 | |> sort (crude_thm_ord ctxt o apply2 snd) | 
| 48531 | 69 | end | 
| 70 | ||
| 51182 
962190eab40d
provide two modes for MaSh driver: linearized or real visibility
 blanchet parents: 
51177diff
changeset | 71 | fun filter_accessible_from th = filter (fn (_, th') => thm_less (th', th)) | 
| 
962190eab40d
provide two modes for MaSh driver: linearized or real visibility
 blanchet parents: 
51177diff
changeset | 72 | |
| 57121 | 73 | fun generate_accessibility ctxt thys file_name = | 
| 48304 | 74 | let | 
| 57121 | 75 | val path = Path.explode file_name | 
| 57055 
df3a26987a8d
reverted '|' features in MaSh -- these sounded like a good idea but never really worked
 blanchet parents: 
57005diff
changeset | 76 | |
| 57406 | 77 | fun do_fact (parents, fact) = | 
| 57122 | 78 | let val s = encode_str fact ^ ": " ^ encode_strs parents ^ "\n" in | 
| 57406 | 79 | File.append path s | 
| 57122 | 80 | end | 
| 57055 
df3a26987a8d
reverted '|' features in MaSh -- these sounded like a good idea but never really worked
 blanchet parents: 
57005diff
changeset | 81 | |
| 50611 
99af6b652b3a
linearize eval driver, to work around horrible bug in previous implementation
 blanchet parents: 
50582diff
changeset | 82 | val facts = | 
| 50349 | 83 | all_facts ctxt | 
| 51182 
962190eab40d
provide two modes for MaSh driver: linearized or real visibility
 blanchet parents: 
51177diff
changeset | 84 | |> filter_out (has_thys thys o snd) | 
| 61321 | 85 | |> attach_parents_to_facts [] | 
| 86 | |> map (apsnd (nickname_of_thm o snd)) | |
| 57055 
df3a26987a8d
reverted '|' features in MaSh -- these sounded like a good idea but never really worked
 blanchet parents: 
57005diff
changeset | 87 | in | 
| 57122 | 88 | File.write path ""; | 
| 57406 | 89 | List.app do_fact facts | 
| 57055 
df3a26987a8d
reverted '|' features in MaSh -- these sounded like a good idea but never really worked
 blanchet parents: 
57005diff
changeset | 90 | end | 
| 48304 | 91 | |
| 55212 | 92 | fun generate_features ctxt thys file_name = | 
| 48304 | 93 | let | 
| 57388 | 94 | val path = Path.explode file_name | 
| 48304 | 95 | val _ = File.write path "" | 
| 51182 
962190eab40d
provide two modes for MaSh driver: linearized or real visibility
 blanchet parents: 
51177diff
changeset | 96 | val facts = all_facts ctxt |> filter_out (has_thys thys o snd) | 
| 57122 | 97 | |
| 48385 | 98 | fun do_fact ((_, stature), th) = | 
| 48304 | 99 | let | 
| 61321 | 100 | val name = nickname_of_thm th | 
| 77889 
5db014c36f42
clarified signature: explicitly distinguish theory_base_name vs. theory_long_name;
 wenzelm parents: 
76092diff
changeset | 101 | val feats = features_of ctxt (Thm.theory_base_name th) stature [Thm.prop_of th] | 
| 57055 
df3a26987a8d
reverted '|' features in MaSh -- these sounded like a good idea but never really worked
 blanchet parents: 
57005diff
changeset | 102 | val s = encode_str name ^ ": " ^ encode_strs (sort string_ord feats) ^ "\n" | 
| 57122 | 103 | in | 
| 104 | File.append path s | |
| 105 | end | |
| 57055 
df3a26987a8d
reverted '|' features in MaSh -- these sounded like a good idea but never really worked
 blanchet parents: 
57005diff
changeset | 106 | in | 
| 
df3a26987a8d
reverted '|' features in MaSh -- these sounded like a good idea but never really worked
 blanchet parents: 
57005diff
changeset | 107 | List.app do_fact facts | 
| 
df3a26987a8d
reverted '|' features in MaSh -- these sounded like a good idea but never really worked
 blanchet parents: 
57005diff
changeset | 108 | end | 
| 48304 | 109 | |
| 51034 | 110 | val prover_marker = "$a" | 
| 51033 
177db6811f11
added markers in proofs identifying origin of proofs, in eval driver
 blanchet parents: 
51020diff
changeset | 111 | val isar_marker = "$i" | 
| 
177db6811f11
added markers in proofs identifying origin of proofs, in eval driver
 blanchet parents: 
51020diff
changeset | 112 | val omitted_marker = "$o" | 
| 51034 | 113 | val unprovable_marker = "$u" (* axiom or definition or characteristic theorem *) | 
| 51033 
177db6811f11
added markers in proofs identifying origin of proofs, in eval driver
 blanchet parents: 
51020diff
changeset | 114 | val prover_failed_marker = "$x" | 
| 
177db6811f11
added markers in proofs identifying origin of proofs, in eval driver
 blanchet parents: 
51020diff
changeset | 115 | |
| 
177db6811f11
added markers in proofs identifying origin of proofs, in eval driver
 blanchet parents: 
51020diff
changeset | 116 | fun smart_dependencies_of ctxt params_opt facts name_tabs th isar_deps_opt = | 
| 
177db6811f11
added markers in proofs identifying origin of proofs, in eval driver
 blanchet parents: 
51020diff
changeset | 117 | let | 
| 
177db6811f11
added markers in proofs identifying origin of proofs, in eval driver
 blanchet parents: 
51020diff
changeset | 118 | val (marker, deps) = | 
| 57055 
df3a26987a8d
reverted '|' features in MaSh -- these sounded like a good idea but never really worked
 blanchet parents: 
57005diff
changeset | 119 | (case params_opt of | 
| 51033 
177db6811f11
added markers in proofs identifying origin of proofs, in eval driver
 blanchet parents: 
51020diff
changeset | 120 |         SOME (params as {provers = prover :: _, ...}) =>
 | 
| 
177db6811f11
added markers in proofs identifying origin of proofs, in eval driver
 blanchet parents: 
51020diff
changeset | 121 | prover_dependencies_of ctxt params prover 0 facts name_tabs th | 
| 
177db6811f11
added markers in proofs identifying origin of proofs, in eval driver
 blanchet parents: 
51020diff
changeset | 122 | |>> (fn true => prover_marker | false => prover_failed_marker) | 
| 
177db6811f11
added markers in proofs identifying origin of proofs, in eval driver
 blanchet parents: 
51020diff
changeset | 123 | | NONE => | 
| 
177db6811f11
added markers in proofs identifying origin of proofs, in eval driver
 blanchet parents: 
51020diff
changeset | 124 | let | 
| 
177db6811f11
added markers in proofs identifying origin of proofs, in eval driver
 blanchet parents: 
51020diff
changeset | 125 | val deps = | 
| 57055 
df3a26987a8d
reverted '|' features in MaSh -- these sounded like a good idea but never really worked
 blanchet parents: 
57005diff
changeset | 126 | (case isar_deps_opt of | 
| 61321 | 127 | NONE => isar_dependencies_of name_tabs th | 
| 57306 
ff10067b2248
optimized traversal of proof terms by skipping bad apples (e.g. full_exhaustive_int'.pinduct)
 blanchet parents: 
57295diff
changeset | 128 | | deps => deps) | 
| 
ff10067b2248
optimized traversal of proof terms by skipping bad apples (e.g. full_exhaustive_int'.pinduct)
 blanchet parents: 
57295diff
changeset | 129 | in | 
| 57351 | 130 | (case deps of | 
| 131 | NONE => (omitted_marker, []) | |
| 132 | | SOME [] => (unprovable_marker, []) | |
| 133 | | SOME deps => (isar_marker, deps)) | |
| 57306 
ff10067b2248
optimized traversal of proof terms by skipping bad apples (e.g. full_exhaustive_int'.pinduct)
 blanchet parents: 
57295diff
changeset | 134 | end) | 
| 51033 
177db6811f11
added markers in proofs identifying origin of proofs, in eval driver
 blanchet parents: 
51020diff
changeset | 135 | in | 
| 57055 
df3a26987a8d
reverted '|' features in MaSh -- these sounded like a good idea but never really worked
 blanchet parents: 
57005diff
changeset | 136 | (case trim_dependencies deps of | 
| 51033 
177db6811f11
added markers in proofs identifying origin of proofs, in eval driver
 blanchet parents: 
51020diff
changeset | 137 | SOME deps => (marker, deps) | 
| 57055 
df3a26987a8d
reverted '|' features in MaSh -- these sounded like a good idea but never really worked
 blanchet parents: 
57005diff
changeset | 138 | | NONE => (omitted_marker, [])) | 
| 51033 
177db6811f11
added markers in proofs identifying origin of proofs, in eval driver
 blanchet parents: 
51020diff
changeset | 139 | end | 
| 50411 | 140 | |
| 57121 | 141 | fun generate_isar_or_prover_dependencies ctxt params_opt range thys file_name = | 
| 48304 | 142 | let | 
| 57121 | 143 | val path = Path.explode file_name | 
| 144 | val facts = filter_out (has_thys thys o snd) (all_facts ctxt) | |
| 61321 | 145 | val name_tabs = build_name_tables nickname_of_thm facts | 
| 57055 
df3a26987a8d
reverted '|' features in MaSh -- these sounded like a good idea but never really worked
 blanchet parents: 
57005diff
changeset | 146 | |
| 50559 
89c0d2f13cca
MaSh exporter can now export subsets of the facts, as consecutive ranges
 blanchet parents: 
50523diff
changeset | 147 | fun do_fact (j, (_, th)) = | 
| 
89c0d2f13cca
MaSh exporter can now export subsets of the facts, as consecutive ranges
 blanchet parents: 
50523diff
changeset | 148 | if in_range range j then | 
| 
89c0d2f13cca
MaSh exporter can now export subsets of the facts, as consecutive ranges
 blanchet parents: 
50523diff
changeset | 149 | let | 
| 61321 | 150 | val name = nickname_of_thm th | 
| 50561 | 151 |           val _ = tracing ("Fact " ^ string_of_int j ^ ": " ^ name)
 | 
| 57121 | 152 | val access_facts = filter_accessible_from th facts | 
| 153 | val (marker, deps) = smart_dependencies_of ctxt params_opt access_facts name_tabs th NONE | |
| 154 | in | |
| 155 | encode_str name ^ ": " ^ marker ^ " " ^ encode_strs deps ^ "\n" | |
| 156 | end | |
| 50559 
89c0d2f13cca
MaSh exporter can now export subsets of the facts, as consecutive ranges
 blanchet parents: 
50523diff
changeset | 157 | else | 
| 
89c0d2f13cca
MaSh exporter can now export subsets of the facts, as consecutive ranges
 blanchet parents: 
50523diff
changeset | 158 | "" | 
| 57055 
df3a26987a8d
reverted '|' features in MaSh -- these sounded like a good idea but never really worked
 blanchet parents: 
57005diff
changeset | 159 | |
| 57352 
9801e9fa9270
avoid parallelism, since it confuses the global state and leads to cheating (with 'sml_xxx' engines)
 blanchet parents: 
57351diff
changeset | 160 | val lines = map do_fact (tag_list 1 facts) | 
| 57055 
df3a26987a8d
reverted '|' features in MaSh -- these sounded like a good idea but never really worked
 blanchet parents: 
57005diff
changeset | 161 | in | 
| 
df3a26987a8d
reverted '|' features in MaSh -- these sounded like a good idea but never really worked
 blanchet parents: 
57005diff
changeset | 162 | File.write_list path lines | 
| 
df3a26987a8d
reverted '|' features in MaSh -- these sounded like a good idea but never really worked
 blanchet parents: 
57005diff
changeset | 163 | end | 
| 48304 | 164 | |
| 50411 | 165 | fun generate_isar_dependencies ctxt = | 
| 54118 | 166 | generate_isar_or_prover_dependencies ctxt NONE | 
| 50411 | 167 | |
| 50484 
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
 blanchet parents: 
50442diff
changeset | 168 | fun generate_prover_dependencies ctxt params = | 
| 
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
 blanchet parents: 
50442diff
changeset | 169 | generate_isar_or_prover_dependencies ctxt (SOME params) | 
| 50411 | 170 | |
| 75044 | 171 | fun is_bad_query ctxt step j th isar_deps = | 
| 50954 | 172 | j mod step <> 0 orelse | 
| 58253 
30e7fecc9e42
reverted 83a8570b44bc, which was a misunderstanding
 blanchet parents: 
58204diff
changeset | 173 | Thm.legacy_get_kind th = "" orelse | 
| 57306 
ff10067b2248
optimized traversal of proof terms by skipping bad apples (e.g. full_exhaustive_int'.pinduct)
 blanchet parents: 
57295diff
changeset | 174 | null (the_list isar_deps) orelse | 
| 73942 | 175 | is_blacklisted_or_something (Thm.get_name_hint th) | 
| 50515 
c4a27ab89c9b
shared bad MaSh query detection between MePo and MaSh, so that the generated files mirror each other
 blanchet parents: 
50511diff
changeset | 176 | |
| 57121 | 177 | fun generate_isar_or_prover_commands ctxt prover params_opt (range, step) thys max_suggs file_name = | 
| 48234 | 178 | let | 
| 57388 | 179 | val path = Path.explode file_name | 
| 50349 | 180 | val facts = all_facts ctxt | 
| 50485 
3c6ac2da2f45
merge aliased theorems in MaSh dependencies, modulo symmetry of equality
 blanchet parents: 
50484diff
changeset | 181 | val (new_facts, old_facts) = facts |> List.partition (has_thys thys o snd) | 
| 61321 | 182 | val name_tabs = build_name_tables nickname_of_thm facts | 
| 57055 
df3a26987a8d
reverted '|' features in MaSh -- these sounded like a good idea but never really worked
 blanchet parents: 
57005diff
changeset | 183 | |
| 57407 
140e16bc2a40
use right theory name for theorems in evaluation driver
 blanchet parents: 
57406diff
changeset | 184 | fun do_fact (j, (name, (parents, ((_, stature), th)))) = | 
| 50559 
89c0d2f13cca
MaSh exporter can now export subsets of the facts, as consecutive ranges
 blanchet parents: 
50523diff
changeset | 185 | if in_range range j then | 
| 
89c0d2f13cca
MaSh exporter can now export subsets of the facts, as consecutive ranges
 blanchet parents: 
50523diff
changeset | 186 | let | 
| 50561 | 187 |           val _ = tracing ("Fact " ^ string_of_int j ^ ": " ^ name)
 | 
| 61321 | 188 | val isar_deps = isar_dependencies_of name_tabs th | 
| 75044 | 189 | val do_query = not (is_bad_query ctxt step j th isar_deps) | 
| 77889 
5db014c36f42
clarified signature: explicitly distinguish theory_base_name vs. theory_long_name;
 wenzelm parents: 
76092diff
changeset | 190 | val goal_feats = features_of ctxt (Thm.theory_base_name th) stature [Thm.prop_of th] | 
| 57121 | 191 | val access_facts = filter_accessible_from th new_facts @ old_facts | 
| 51033 
177db6811f11
added markers in proofs identifying origin of proofs, in eval driver
 blanchet parents: 
51020diff
changeset | 192 | val (marker, deps) = | 
| 57306 
ff10067b2248
optimized traversal of proof terms by skipping bad apples (e.g. full_exhaustive_int'.pinduct)
 blanchet parents: 
57295diff
changeset | 193 | smart_dependencies_of ctxt params_opt access_facts name_tabs th isar_deps | 
| 57122 | 194 | |
| 53140 
a1235e90da5f
pour extra features from proximate facts into goal, in exporter
 blanchet parents: 
53127diff
changeset | 195 | fun extra_features_of (((_, stature), th), weight) = | 
| 59582 | 196 | [Thm.prop_of th] | 
| 77889 
5db014c36f42
clarified signature: explicitly distinguish theory_base_name vs. theory_long_name;
 wenzelm parents: 
76092diff
changeset | 197 | |> features_of ctxt (Thm.theory_base_name th) stature | 
| 57406 | 198 | |> map (rpair (weight * extra_feature_factor)) | 
| 57122 | 199 | |
| 50559 
89c0d2f13cca
MaSh exporter can now export subsets of the facts, as consecutive ranges
 blanchet parents: 
50523diff
changeset | 200 | val query = | 
| 53127 | 201 | if do_query then | 
| 53140 
a1235e90da5f
pour extra features from proximate facts into goal, in exporter
 blanchet parents: 
53127diff
changeset | 202 | let | 
| 53141 
d27e99a6a679
take chained and proximate facts into consideration when computing MaSh features
 blanchet parents: 
53140diff
changeset | 203 | val query_feats = | 
| 53140 
a1235e90da5f
pour extra features from proximate facts into goal, in exporter
 blanchet parents: 
53127diff
changeset | 204 | new_facts | 
| 
a1235e90da5f
pour extra features from proximate facts into goal, in exporter
 blanchet parents: 
53127diff
changeset | 205 | |> drop (j - num_extra_feature_facts) | 
| 
a1235e90da5f
pour extra features from proximate facts into goal, in exporter
 blanchet parents: 
53127diff
changeset | 206 | |> take num_extra_feature_facts | 
| 
a1235e90da5f
pour extra features from proximate facts into goal, in exporter
 blanchet parents: 
53127diff
changeset | 207 | |> rev | 
| 
a1235e90da5f
pour extra features from proximate facts into goal, in exporter
 blanchet parents: 
53127diff
changeset | 208 | |> weight_facts_steeply | 
| 
a1235e90da5f
pour extra features from proximate facts into goal, in exporter
 blanchet parents: 
53127diff
changeset | 209 | |> map extra_features_of | 
| 57406 | 210 | |> rpair (map (rpair 1.0) goal_feats) | 
| 211 | |-> fold (union (eq_fst (op =))) | |
| 53140 
a1235e90da5f
pour extra features from proximate facts into goal, in exporter
 blanchet parents: 
53127diff
changeset | 212 | in | 
| 57122 | 213 | "? " ^ string_of_int max_suggs ^ " # " ^ encode_str name ^ ": " ^ | 
| 214 | encode_strs parents ^ "; " ^ encode_features query_feats ^ "\n" | |
| 53140 
a1235e90da5f
pour extra features from proximate facts into goal, in exporter
 blanchet parents: 
53127diff
changeset | 215 | end | 
| 53127 | 216 | else | 
| 217 | "" | |
| 50754 
74a6adcb96ac
also generate queries for goals with too many Isar dependencies
 blanchet parents: 
50735diff
changeset | 218 | val update = | 
| 57406 | 219 | "! " ^ encode_str name ^ ": " ^ encode_strs parents ^ "; " ^ encode_strs goal_feats ^ | 
| 220 | "; " ^ marker ^ " " ^ encode_strs deps ^ "\n" | |
| 50559 
89c0d2f13cca
MaSh exporter can now export subsets of the facts, as consecutive ranges
 blanchet parents: 
50523diff
changeset | 221 | in query ^ update end | 
| 
89c0d2f13cca
MaSh exporter can now export subsets of the facts, as consecutive ranges
 blanchet parents: 
50523diff
changeset | 222 | else | 
| 
89c0d2f13cca
MaSh exporter can now export subsets of the facts, as consecutive ranges
 blanchet parents: 
50523diff
changeset | 223 | "" | 
| 57122 | 224 | |
| 51182 
962190eab40d
provide two modes for MaSh driver: linearized or real visibility
 blanchet parents: 
51177diff
changeset | 225 | val new_facts = | 
| 60641 | 226 | new_facts | 
| 61321 | 227 | |> attach_parents_to_facts old_facts | 
| 228 | |> map (`(nickname_of_thm o snd o snd)) | |
| 57407 
140e16bc2a40
use right theory name for theorems in evaluation driver
 blanchet parents: 
57406diff
changeset | 229 | val lines = map do_fact (tag_list 1 new_facts) | 
| 57055 
df3a26987a8d
reverted '|' features in MaSh -- these sounded like a good idea but never really worked
 blanchet parents: 
57005diff
changeset | 230 | in | 
| 
df3a26987a8d
reverted '|' features in MaSh -- these sounded like a good idea but never really worked
 blanchet parents: 
57005diff
changeset | 231 | File.write_list path lines | 
| 
df3a26987a8d
reverted '|' features in MaSh -- these sounded like a good idea but never really worked
 blanchet parents: 
57005diff
changeset | 232 | end | 
| 48239 
0016290f904c
generate Meng--Paulson facts for evaluation purposes
 blanchet parents: 
48235diff
changeset | 233 | |
| 50411 | 234 | fun generate_isar_commands ctxt prover = | 
| 50954 | 235 | generate_isar_or_prover_commands ctxt prover NONE | 
| 50411 | 236 | |
| 50484 
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
 blanchet parents: 
50442diff
changeset | 237 | fun generate_prover_commands ctxt (params as {provers = prover :: _, ...}) =
 | 
| 
8ec31bdb9d36
adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
 blanchet parents: 
50442diff
changeset | 238 | generate_isar_or_prover_commands ctxt prover (SOME params) | 
| 50411 | 239 | |
| 57120 | 240 | fun generate_mepo_or_mash_suggestions mepo_or_mash_suggested_facts ctxt | 
| 57121 | 241 |     (params as {provers = prover :: _, ...}) (range, step) thys max_suggs file_name =
 | 
| 48239 
0016290f904c
generate Meng--Paulson facts for evaluation purposes
 blanchet parents: 
48235diff
changeset | 242 | let | 
| 57388 | 243 | val path = Path.explode file_name | 
| 50349 | 244 | val facts = all_facts ctxt | 
| 50485 
3c6ac2da2f45
merge aliased theorems in MaSh dependencies, modulo symmetry of equality
 blanchet parents: 
50484diff
changeset | 245 | val (new_facts, old_facts) = facts |> List.partition (has_thys thys o snd) | 
| 61321 | 246 | val name_tabs = build_name_tables nickname_of_thm facts | 
| 57055 
df3a26987a8d
reverted '|' features in MaSh -- these sounded like a good idea but never really worked
 blanchet parents: 
57005diff
changeset | 247 | |
| 50814 | 248 | fun do_fact (j, ((_, th), old_facts)) = | 
| 50906 | 249 | if in_range range j then | 
| 250 | let | |
| 61321 | 251 | val name = nickname_of_thm th | 
| 50906 | 252 |           val _ = tracing ("Fact " ^ string_of_int j ^ ": " ^ name)
 | 
| 253 | val goal = goal_of_thm (Proof_Context.theory_of ctxt) th | |
| 52196 
2281f33e8da6
redid rac7830871177 to avoid duplicate fixed variable (e.g. lemma "P (a::nat)" proof - have "!!a::int. Q a" sledgehammer [e])
 blanchet parents: 
52125diff
changeset | 254 | val (_, hyp_ts, concl_t) = ATP_Util.strip_subgoal goal 1 ctxt | 
| 61321 | 255 | val isar_deps = isar_dependencies_of name_tabs th | 
| 50906 | 256 | in | 
| 75044 | 257 | if is_bad_query ctxt step j th isar_deps then | 
| 50906 | 258 | "" | 
| 259 | else | |
| 260 | let | |
| 261 | val suggs = | |
| 262 | old_facts | |
| 57121 | 263 | |> filter_accessible_from th | 
| 77889 
5db014c36f42
clarified signature: explicitly distinguish theory_base_name vs. theory_long_name;
 wenzelm parents: 
76092diff
changeset | 264 | |> mepo_or_mash_suggested_facts ctxt (Thm.theory_base_name th) | 
| 60649 
e007aa6a8aa2
more explicit use of context and elimination of Thm.theory_of_thm, although unclear (and untested?) situations remain;
 wenzelm parents: 
60641diff
changeset | 265 | params max_suggs hyp_ts concl_t | 
| 61321 | 266 | |> map (nickname_of_thm o snd) | 
| 57120 | 267 | in | 
| 268 | encode_str name ^ ": " ^ encode_strs suggs ^ "\n" | |
| 269 | end | |
| 50906 | 270 | end | 
| 271 | else | |
| 272 | "" | |
| 57055 
df3a26987a8d
reverted '|' features in MaSh -- these sounded like a good idea but never really worked
 blanchet parents: 
57005diff
changeset | 273 | |
| 50519 | 274 | fun accum x (yss as ys :: _) = (x :: ys) :: yss | 
| 57295 
2ccd6820f74e
changed order of facts so that 'name_tabs' has the same order everywhere (which affects unaliasing)
 blanchet parents: 
57150diff
changeset | 275 | val old_factss = tl (fold accum new_facts [rev old_facts]) | 
| 57352 
9801e9fa9270
avoid parallelism, since it confuses the global state and leads to cheating (with 'sml_xxx' engines)
 blanchet parents: 
57351diff
changeset | 276 | val lines = map do_fact (tag_list 1 (new_facts ~~ rev (map rev old_factss))) | 
| 57055 
df3a26987a8d
reverted '|' features in MaSh -- these sounded like a good idea but never really worked
 blanchet parents: 
57005diff
changeset | 277 | in | 
| 
df3a26987a8d
reverted '|' features in MaSh -- these sounded like a good idea but never really worked
 blanchet parents: 
57005diff
changeset | 278 | File.write_list path lines | 
| 
df3a26987a8d
reverted '|' features in MaSh -- these sounded like a good idea but never really worked
 blanchet parents: 
57005diff
changeset | 279 | end | 
| 48234 | 280 | |
| 57120 | 281 | val generate_mepo_suggestions = | 
| 282 | generate_mepo_or_mash_suggestions | |
| 57407 
140e16bc2a40
use right theory name for theorems in evaluation driver
 blanchet parents: 
57406diff
changeset | 283 | (fn ctxt => fn _ => fn params => fn max_suggs => fn hyp_ts => fn concl_t => | 
| 57150 
f591096a9c94
add option to keep duplicates, for more precise evaluation of relevance filters
 blanchet parents: 
57125diff
changeset | 284 | not (Config.get ctxt Sledgehammer_MaSh.duplicates) ? Sledgehammer_Fact.drop_duplicate_facts | 
| 57120 | 285 | #> Sledgehammer_MePo.mepo_suggested_facts ctxt params max_suggs NONE hyp_ts concl_t) | 
| 286 | ||
| 64522 
b66f8caf86b6
made MaSh faster and less likely to hang seemingly forever
 blanchet parents: 
63692diff
changeset | 287 | fun generate_mash_suggestions algorithm ctxt = | 
| 76092 | 288 | (Options.put_default \<^system_option>\<open>MaSh\<close> algorithm; (* FIXME fragile *) | 
| 64522 
b66f8caf86b6
made MaSh faster and less likely to hang seemingly forever
 blanchet parents: 
63692diff
changeset | 289 | Sledgehammer_MaSh.mash_unlearn ctxt; | 
| 57120 | 290 | generate_mepo_or_mash_suggestions | 
| 60649 
e007aa6a8aa2
more explicit use of context and elimination of Thm.theory_of_thm, although unclear (and untested?) situations remain;
 wenzelm parents: 
60641diff
changeset | 291 |      (fn ctxt => fn thy_name => fn params as {provers = prover :: _, ...} =>
 | 
| 
e007aa6a8aa2
more explicit use of context and elimination of Thm.theory_of_thm, although unclear (and untested?) situations remain;
 wenzelm parents: 
60641diff
changeset | 292 | fn max_suggs => fn hyp_ts => fn concl_t => | 
| 57431 
02c408aed5ee
killed Python version of MaSh, now that the SML version works adequately
 blanchet parents: 
57407diff
changeset | 293 | tap (Sledgehammer_MaSh.mash_learn_facts ctxt params prover 2 false | 
| 57120 | 294 | Sledgehammer_Util.one_year) | 
| 60649 
e007aa6a8aa2
more explicit use of context and elimination of Thm.theory_of_thm, although unclear (and untested?) situations remain;
 wenzelm parents: 
60641diff
changeset | 295 | #> Sledgehammer_MaSh.mash_suggested_facts ctxt thy_name params max_suggs hyp_ts concl_t | 
| 64522 
b66f8caf86b6
made MaSh faster and less likely to hang seemingly forever
 blanchet parents: 
63692diff
changeset | 296 | #> fst) ctxt) | 
| 57120 | 297 | |
| 57055 
df3a26987a8d
reverted '|' features in MaSh -- these sounded like a good idea but never really worked
 blanchet parents: 
57005diff
changeset | 298 | fun generate_mesh_suggestions max_suggs mash_file_name mepo_file_name mesh_file_name = | 
| 50814 | 299 | let | 
| 300 | val mesh_path = Path.explode mesh_file_name | |
| 301 | val _ = File.write mesh_path "" | |
| 57055 
df3a26987a8d
reverted '|' features in MaSh -- these sounded like a good idea but never really worked
 blanchet parents: 
57005diff
changeset | 302 | |
| 50814 | 303 | fun do_fact (mash_line, mepo_line) = | 
| 304 | let | |
| 50829 | 305 | val (name, mash_suggs) = | 
| 50814 | 306 | extract_suggestions mash_line | 
| 57125 
2f620ef839ee
added another way of invoking Python code, for experiments
 blanchet parents: 
57122diff
changeset | 307 | ||> (map fst #> weight_facts_steeply) | 
| 50829 | 308 | val (name', mepo_suggs) = | 
| 50814 | 309 | extract_suggestions mepo_line | 
| 57125 
2f620ef839ee
added another way of invoking Python code, for experiments
 blanchet parents: 
57122diff
changeset | 310 | ||> (map fst #> weight_facts_steeply) | 
| 63692 | 311 | val _ = if name = name' then () else error "Input files out of sync" | 
| 50814 | 312 | val mess = | 
| 313 | [(mepo_weight, (mepo_suggs, [])), | |
| 314 | (mash_weight, (mash_suggs, []))] | |
| 61322 | 315 | val mesh_suggs = mesh_facts I (op =) max_suggs mess | 
| 50829 | 316 | val mesh_line = encode_str name ^ ": " ^ encode_strs mesh_suggs ^ "\n" | 
| 50814 | 317 | in File.append mesh_path mesh_line end | 
| 57055 
df3a26987a8d
reverted '|' features in MaSh -- these sounded like a good idea but never really worked
 blanchet parents: 
57005diff
changeset | 318 | |
| 75616 
986506233812
clarified signature: File.read_lines is based on scalable Bytes.T;
 wenzelm parents: 
75612diff
changeset | 319 | val mash_lines = Path.explode mash_file_name |> File.read_lines | 
| 
986506233812
clarified signature: File.read_lines is based on scalable Bytes.T;
 wenzelm parents: 
75612diff
changeset | 320 | val mepo_lines = Path.explode mepo_file_name |> File.read_lines | 
| 50907 | 321 | in | 
| 57122 | 322 | if length mash_lines = length mepo_lines then List.app do_fact (mash_lines ~~ mepo_lines) | 
| 63692 | 323 | else warning "Skipped: MaSh file missing or out of sync with MePo file" | 
| 50907 | 324 | end | 
| 50814 | 325 | |
| 48234 | 326 | end; |