weight MaSh constants by frequency
authorblanchet
Wed, 21 Aug 2013 14:54:25 +0200
changeset 53127 60801776d8af
parent 53126 f4d2c64c7aa8
child 53128 ea1b62ed5a54
weight MaSh constants by frequency
src/HOL/TPTP/mash_export.ML
src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML
src/HOL/Tools/Sledgehammer/sledgehammer_mepo.ML
--- a/src/HOL/TPTP/mash_export.ML	Wed Aug 21 13:48:25 2013 +0200
+++ b/src/HOL/TPTP/mash_export.ML	Wed Aug 21 14:54:25 2013 +0200
@@ -79,9 +79,11 @@
       let
         val name = nickname_of_thm th
         val feats =
-          features_of ctxt prover (theory_of_thm th) stature [prop_of th]
+          features_of ctxt prover (theory_of_thm th) 0 Symtab.empty stature
+                      [prop_of th]
+          |> map fst
         val s =
-          encode_str name ^ ": " ^ encode_features (sort_wrt fst feats) ^ "\n"
+          encode_str name ^ ": " ^ encode_strs (sort string_ord feats) ^ "\n"
       in File.append path s end
   in List.app do_fact facts end
 
@@ -152,15 +154,19 @@
     val path = file_name |> Path.explode
     val facts = all_facts ctxt
     val (new_facts, old_facts) = facts |> List.partition (has_thys thys o snd)
+    val num_old_facts = length old_facts
     val name_tabs = build_name_tables nickname_of_thm facts
-    fun do_fact (j, ((name, (parents, ((_, stature), th))), prevs)) =
+    fun do_fact (j,
+            (((name, (parents, ((_, stature), th))), prevs), const_tab)) =
       if in_range range j then
         let
           val _ = tracing ("Fact " ^ string_of_int j ^ ": " ^ name)
+          val isar_deps = isar_dependencies_of name_tabs th
+          val do_query = not (is_bad_query ctxt ho_atp step j th isar_deps)
           val feats =
-            features_of ctxt prover (theory_of_thm th) stature [prop_of th]
+            features_of ctxt prover (theory_of_thm th) (num_old_facts + j)
+                        const_tab stature [prop_of th]
             |> sort_wrt fst
-          val isar_deps = isar_dependencies_of name_tabs th
           val access_facts =
             (if linearize then take (j - 1) new_facts
              else new_facts |> filter_accessible_from th) @ old_facts
@@ -169,12 +175,12 @@
                                   (SOME isar_deps)
           val parents = if linearize then prevs else parents
           val query =
-            if is_bad_query ctxt ho_atp step j th isar_deps then
-              ""
-            else
+            if do_query then
               "? " ^ string_of_int max_suggs ^ " # " ^
               encode_str name ^ ": " ^ encode_strs parents ^ "; " ^
               encode_features feats ^ "\n"
+            else
+              ""
           val update =
             "! " ^ encode_str name ^ ": " ^ encode_strs parents ^ "; " ^
             encode_strs (map fst feats) ^ "; " ^ marker ^ " " ^
@@ -187,8 +193,14 @@
                 |> map (`(nickname_of_thm o snd o snd))
     val hd_prevs =
       map (nickname_of_thm o snd) (the_list (try List.last old_facts))
-    val prevss = fst (split_last (hd_prevs :: map (single o fst) new_facts))
-    val lines = Par_List.map do_fact (tag_list 1 (new_facts ~~ prevss))
+    val prevss = hd_prevs :: map (single o fst) new_facts |> split_last |> fst
+    val hd_const_tabs =
+      fold (add_const_counts o prop_of o snd) old_facts Symtab.empty
+    val (const_tabs, _) =
+      fold_map (`I oo add_const_counts o prop_of o snd o snd o snd) new_facts
+               hd_const_tabs
+    val lines =
+      Par_List.map do_fact (tag_list 1 (new_facts ~~ prevss ~~ const_tabs))
   in File.write_list path lines end
 
 fun generate_isar_commands ctxt prover =
--- a/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML	Wed Aug 21 13:48:25 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML	Wed Aug 21 14:54:25 2013 +0200
@@ -62,8 +62,8 @@
   val run_prover_for_mash :
     Proof.context -> params -> string -> fact list -> thm -> prover_result
   val features_of :
-    Proof.context -> string -> theory -> stature -> term list
-    -> (string * real) list
+    Proof.context -> string -> theory -> int -> int Symtab.table -> stature
+    -> term list -> (string * real) list
   val trim_dependencies : string list -> string list option
   val isar_dependencies_of :
     string Symtab.table * string Symtab.table -> thm -> string list
@@ -78,6 +78,7 @@
   val find_mash_suggestions :
     Proof.context -> int -> string list -> ('b * thm) list -> ('b * thm) list
     -> ('b * thm) list -> ('b * thm) list * ('b * thm) list
+  val add_const_counts : term -> int Symtab.table -> int Symtab.table
   val mash_suggested_facts :
     Proof.context -> params -> string -> int -> term list -> term
     -> raw_fact list -> fact list * fact list
@@ -517,6 +518,15 @@
 val lams_feature = ("lams", 2.0 (* FUDGE *))
 val skos_feature = ("skos", 2.0 (* FUDGE *))
 
+fun weighted_const_feature_of num_facts const_tab const_s s =
+  ("c" ^ s,
+   if num_facts = 0 then
+     0.0
+   else
+     let val count = Symtab.lookup const_tab const_s |> the_default 0 in
+       16.0 + (Real.fromInt num_facts / Real.fromInt count)
+     end)
+
 (* The following "crude" functions should be progressively phased out, since
    they create visibility edges that do not exist in Isabelle, resulting in
    failed lookups later on. *)
@@ -588,7 +598,8 @@
 
 val max_pat_breadth = 10
 
-fun term_features_of ctxt prover thy_name term_max_depth type_max_depth ts =
+fun term_features_of ctxt prover thy_name num_facts const_tab term_max_depth
+                     type_max_depth ts =
   let
     val thy = Proof_Context.theory_of ctxt
 
@@ -657,9 +668,10 @@
     fun add_term Ts feature_of = add_term_pats Ts feature_of term_max_depth
     fun add_subterms Ts t =
       case strip_comb t of
-        (Const (x as (_, T)), args) =>
+        (Const (x as (s, T)), args) =>
         let val (built_in, args) = is_built_in x args in
-          (not built_in ? add_term Ts const_feature_of t)
+          (not built_in
+             ? add_term Ts (weighted_const_feature_of num_facts const_tab s) t)
           #> add_subtypes T
           #> fold (add_subterms Ts) args
         end
@@ -678,10 +690,11 @@
 val type_max_depth = 2
 
 (* TODO: Generate type classes for types? *)
-fun features_of ctxt prover thy (scope, status) ts =
+fun features_of ctxt prover thy num_facts const_tab (scope, status) ts =
   let val thy_name = Context.theory_name thy in
     thy_feature_of thy_name ::
-    term_features_of ctxt prover thy_name term_max_depth type_max_depth ts
+    term_features_of ctxt prover thy_name num_facts const_tab term_max_depth
+                     type_max_depth ts
     |> status <> General ? cons (status_feature_of status)
     |> scope <> Global ? cons local_feature
     |> exists (not o is_lambda_free) ts ? cons lams_feature
@@ -922,22 +935,26 @@
                 [unknown_chained, proximity]
     in (mesh_facts (Thm.eq_thm_prop o pairself snd) max_facts mess, unknown) end
 
-val max_learn_on_query = 500
+fun add_const_counts t =
+  fold (fn s => Symtab.map_default (s, ~1) (Integer.add 1))
+       (Term.add_const_names t [])
 
-fun mash_suggested_facts ctxt ({overlord, learn, ...} : params) prover max_facts
-                         hyp_ts concl_t facts =
+fun mash_suggested_facts ctxt ({overlord, ...} : params) prover max_facts hyp_ts
+                         concl_t facts =
   let
     val thy = Proof_Context.theory_of ctxt
     val chained = facts |> filter (fn ((_, (scope, _)), _) => scope = Chained)
+    val const_tab = fold (add_const_counts o prop_of o snd) facts Symtab.empty
     val (access_G, suggs) =
-      peek_state ctxt (fn {access_G, num_known_facts, ...} =>
+      peek_state ctxt (fn {access_G, ...} =>
           if Graph.is_empty access_G then
             (access_G, [])
           else
             let
               val parents = maximal_wrt_access_graph access_G facts
               val feats =
-                features_of ctxt prover thy (Local, General) (concl_t :: hyp_ts)
+                features_of ctxt prover thy (length facts) const_tab
+                            (Local, General) (concl_t :: hyp_ts)
               val hints =
                 chained |> filter (is_fact_in_graph access_G o snd)
                         |> map (nickname_of_thm o snd)
@@ -989,7 +1006,9 @@
       let
         val thy = Proof_Context.theory_of ctxt
         val name = freshish_name ()
-        val feats = features_of ctxt prover thy (Local, General) [t] |> map fst
+        val feats =
+          features_of ctxt prover thy 0 Symtab.empty (Local, General) [t]
+          |> map fst
       in
         peek_state ctxt (fn {access_G, ...} =>
             let
@@ -1086,7 +1105,8 @@
             let
               val name = nickname_of_thm th
               val feats =
-                features_of ctxt prover (theory_of_thm th) stature [prop_of th]
+                features_of ctxt prover (theory_of_thm th) 0 Symtab.empty
+                            stature [prop_of th]
                 |> map fst
               val deps = deps_of status th |> these
               val n = n |> not (null deps) ? Integer.add 1
--- a/src/HOL/Tools/Sledgehammer/sledgehammer_mepo.ML	Wed Aug 21 13:48:25 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/sledgehammer_mepo.ML	Wed Aug 21 14:54:25 2013 +0200
@@ -16,9 +16,6 @@
   val trace : bool Config.T
   val pseudo_abs_name : string
   val pseudo_skolem_prefix : string
-  val const_names_in_fact :
-    theory -> (string * typ -> term list -> bool * term list) -> term
-    -> string list
   val mepo_suggested_facts :
     Proof.context -> params -> string -> int -> relevance_fudge option
     -> term list -> term -> raw_fact list -> fact list
@@ -182,8 +179,6 @@
               (Symtab.empty |> add_pconsts_in_term thy is_built_in_const true
                                                    (SOME true) t) []
 
-val const_names_in_fact = map fst ooo pconsts_in_fact
-
 (* Inserts a dummy "constant" referring to the theory name, so that relevance
    takes the given theory into account. *)
 fun theory_constify ({theory_const_rel_weight, theory_const_irrel_weight, ...}