# HG changeset patch
# User blanchet
# Date 1404898552 -7200
# Node ID 4d9895d39b590ffa9f9402ff4ee2e21b3b1352d3
# Parent  439f881c8744388b94d73d7556107c5d514ca8d5
improvements to the machine learning algos (due to Cezary K.)

diff -r 439f881c8744 -r 4d9895d39b59 src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML
--- a/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML	Mon Jul 07 17:01:11 2014 +0200
+++ b/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML	Wed Jul 09 11:35:52 2014 +0200
@@ -290,7 +290,7 @@
     ary
   end
 
-val nb_def_prior_weight = 21 (* FUDGE *)
+val nb_def_prior_weight = 1000 (* FUDGE *)
 
 fun learn_facts (tfreq0, sfreq0, dffreq0) num_facts0 num_facts num_feats depss featss =
   let
@@ -326,9 +326,10 @@
 
 fun naive_bayes (tfreq, sfreq, dffreq) num_facts max_suggs visible_facts goal_feats =
   let
-    val tau = 0.05 (* FUDGE *)
-    val pos_weight = 10.0 (* FUDGE *)
-    val def_val = ~15.0 (* FUDGE *)
+    val tau = 0.2 (* FUDGE *)
+    val pos_weight = 5.0 (* FUDGE *)
+    val def_val = ~18.0 (* FUDGE *)
+    val init_val = 30.0 (* FUDGE *)
 
     val ln_afreq = Math.ln (Real.fromInt num_facts)
     val idf = Vector.map (fn i => ln_afreq - Math.ln (Real.fromInt i)) dffreq
@@ -339,14 +340,14 @@
       let
         val tfreq = Real.fromInt (Vector.sub (tfreq, i))
 
-        fun fold_feats (f, fw0) (res, sfh) =
+        fun add_feat (f, fw0) (res, sfh) =
           (case Inttab.lookup sfh f of
             SOME sf =>
             (res + fw0 * tfidf f * Math.ln (pos_weight * Real.fromInt sf / tfreq),
              Inttab.delete f sfh)
           | NONE => (res + fw0 * tfidf f * def_val, sfh))
 
-        val (res, sfh) = fold fold_feats goal_feats (Math.ln tfreq, Vector.sub (sfreq, i))
+        val (res, sfh) = fold add_feat goal_feats (init_val * Math.ln tfreq, Vector.sub (sfreq, i))
 
         fun fold_sfh (f, sf) sow =
           sow + tfidf f * Math.ln (1.0 + (1.0 - Real.fromInt sf) / tfreq)
@@ -366,7 +367,7 @@
     ret (Integer.max 0 (num_facts - max_suggs)) []
   end
 
-val number_of_nearest_neighbors = 10 (* FUDGE *)
+val number_of_nearest_neighbors = 1 (* FUDGE *)
 
 fun k_nearest_neighbors dffreq num_facts num_feats depss featss max_suggs visible_facts goal_feats =
   let
@@ -384,11 +385,11 @@
     fun do_feat (s, sw0) =
       let
         val sw = sw0 * tfidf s
-        val w2 = sw * sw
+        val w6 = Math.pow (sw, 6.0)
 
         fun inc_overlap j =
           let val (_, ov) = Array.sub (overlaps_sqr, j) in
-            Array.update (overlaps_sqr, j, (j, w2 + ov))
+            Array.update (overlaps_sqr, j, (j, w6 + ov))
           end
       in
         List.app inc_overlap (Array.sub (feat_facts, s))
@@ -404,10 +405,7 @@
       let val (_, ov) = Array.sub (recommends, j) in
         if ov <= 0.0 then
           (no_recommends := !no_recommends + 1; Array.update (recommends, j, (j, !age + ov)))
-        else if ov < !age + 1000.0 then
-          Array.update (recommends, j, (j, v + ov))
-        else
-          ()
+        else Array.update (recommends, j, (j, v + ov))
       end
 
     val k = Unsynchronized.ref 0
@@ -416,13 +414,13 @@
         raise EXIT ()
       else
         let
+          val deps_factor = 2.7 (* FUDGE *)
           val (j, o2) = Array.sub (overlaps_sqr, num_facts - k - 1)
-          val o1 = Math.sqrt o2
-          val _ = inc_recommend o1 j
+          val _ = inc_recommend o2 j
           val ds = Vector.sub (depss, j)
           val l = Real.fromInt (length ds)
         in
-          List.app (inc_recommend (o1 / l)) ds
+          List.app (inc_recommend (deps_factor * o2 / l)) ds
         end
 
     fun while1 () =