take chained and proximate facts into consideration when computing MaSh features
authorblanchet
Thu, 22 Aug 2013 12:16:56 +0200
changeset 53141 d27e99a6a679
parent 53140 a1235e90da5f
child 53142 966a251efd16
take chained and proximate facts into consideration when computing MaSh features
src/HOL/TPTP/mash_export.ML
src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML
--- a/src/HOL/TPTP/mash_export.ML	Thu Aug 22 12:12:52 2013 +0200
+++ b/src/HOL/TPTP/mash_export.ML	Thu Aug 22 12:16:56 2013 +0200
@@ -163,7 +163,7 @@
           val _ = tracing ("Fact " ^ string_of_int j ^ ": " ^ name)
           val isar_deps = isar_dependencies_of name_tabs th
           val do_query = not (is_bad_query ctxt ho_atp step j th isar_deps)
-          val feats =
+          val goal_feats =
             features_of ctxt prover (theory_of_thm th) (num_old_facts + j)
                         const_tab stature [prop_of th]
             |> sort_wrt fst
@@ -178,20 +178,18 @@
             [prop_of th]
             |> features_of ctxt prover (theory_of_thm th) (num_old_facts + j)
                            const_tab stature
-            |> map (apsnd (fn r => weight * extra_feature_weight_factor * r))
+            |> map (apsnd (fn r => weight * extra_feature_factor * r))
           val query =
             if do_query then
               let
-                val extra_featss =
+                val query_feats =
                   new_facts
                   |> drop (j - num_extra_feature_facts)
                   |> take num_extra_feature_facts
                   |> rev
                   |> weight_facts_steeply
                   |> map extra_features_of
-                val extra_feats =
-                  fold_rev (union (op = o pairself fst)) extra_featss []
-                val query_feats = union (op = o pairself fst) extra_feats feats
+                  |> rpair goal_feats |-> fold (union (op = o pairself fst))
               in
                 "? " ^ string_of_int max_suggs ^ " # " ^
                 encode_str name ^ ": " ^ encode_strs parents ^ "; " ^
@@ -201,7 +199,7 @@
               ""
           val update =
             "! " ^ encode_str name ^ ": " ^ encode_strs parents ^ "; " ^
-            encode_strs (map fst feats) ^ "; " ^ marker ^ " " ^
+            encode_strs (map fst goal_feats) ^ "; " ^ marker ^ " " ^
             encode_strs deps ^ "\n"
         in query ^ update end
       else
--- a/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML	Thu Aug 22 12:12:52 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML	Thu Aug 22 12:16:56 2013 +0200
@@ -74,7 +74,7 @@
   val attach_parents_to_facts :
     ('a * thm) list -> ('a * thm) list -> (string list * ('a * thm)) list
   val num_extra_feature_facts : int
-  val extra_feature_weight_factor : real
+  val extra_feature_factor : real
   val weight_facts_smoothly : 'a list -> ('a * real) list
   val weight_facts_steeply : 'a list -> ('a * real) list
   val find_mash_suggestions :
@@ -519,10 +519,6 @@
 val lams_feature = ("lams", 2.0 (* FUDGE *))
 val skos_feature = ("skos", 2.0 (* FUDGE *))
 
-(* The following "crude" functions should be progressively phased out, since
-   they create visibility edges that do not exist in Isabelle, resulting in
-   failed lookups later on. *)
-
 fun crude_theory_ord p =
   if Theory.subthy p then
     if Theory.eq_thy p then EQUAL else LESS
@@ -905,8 +901,9 @@
 
 fun is_fact_in_graph access_G = can (Graph.get_node access_G) o nickname_of_thm
 
+val chained_feature_factor = 0.5
+val extra_feature_factor = 0.1
 val num_extra_feature_facts = 10 (* FUDGE *)
-val extra_feature_weight_factor = 0.1
 
 (* FUDGE *)
 fun weight_of_proximity_fact rank =
@@ -930,9 +927,7 @@
       val raw_mash = find_suggested_facts ctxt facts suggs
       val unknown_chained =
         inter (Thm.eq_thm_prop o pairself snd) chained raw_unknown
-      val proximity =
-        facts |> sort (crude_thm_ord o pairself snd o swap)
-              |> take max_proximity_facts
+      val proximity = facts |> take max_proximity_facts
       val mess =
         [(0.90 (* FUDGE *), (map (rpair 1.0) unknown_chained, [])),
          (0.08 (* FUDGE *), (weight_facts_steeply raw_mash, raw_unknown)),
@@ -951,8 +946,14 @@
                          concl_t facts =
   let
     val thy = Proof_Context.theory_of ctxt
+    val facts = facts |> sort (crude_thm_ord o pairself snd o swap)
     val chained = facts |> filter (fn ((_, (scope, _)), _) => scope = Chained)
+    val num_facts = length facts
     val const_tab = fold (add_const_counts o prop_of o snd) facts Symtab.empty
+    fun chained_or_extra_features_of factor (((_, stature), th), weight) =
+      [prop_of th]
+      |> features_of ctxt prover (theory_of_thm th) num_facts const_tab stature
+      |> map (apsnd (fn r => weight * factor * r))
     val (access_G, suggs) =
       peek_state ctxt (fn {access_G, ...} =>
           if Graph.is_empty access_G then
@@ -960,9 +961,23 @@
           else
             let
               val parents = maximal_wrt_access_graph access_G facts
+              val goal_feats =
+                features_of ctxt prover thy num_facts const_tab
+                            (Local, General) (concl_t :: hyp_ts)
+              val chained_feats =
+                chained
+                |> map (rpair 1.0)
+                |> map (chained_or_extra_features_of chained_feature_factor)
+                |> rpair [] |-> fold (union (op = o pairself fst))
+              val extra_feats =
+                facts
+                |> take (num_extra_feature_facts - length chained)
+                |> weight_facts_steeply
+                |> map (chained_or_extra_features_of extra_feature_factor)
+                |> rpair [] |-> fold (union (op = o pairself fst))
               val feats =
-                features_of ctxt prover thy (length facts) const_tab
-                            (Local, General) (concl_t :: hyp_ts)
+                fold (union (op = o pairself fst)) [chained_feats, extra_feats]
+                     goal_feats
               val hints =
                 chained |> filter (is_fact_in_graph access_G o snd)
                         |> map (nickname_of_thm o snd)