# HG changeset patch # User blanchet # Date 1404898552 -7200 # Node ID 4d9895d39b590ffa9f9402ff4ee2e21b3b1352d3 # Parent 439f881c8744388b94d73d7556107c5d514ca8d5 improvements to the machine learning algos (due to Cezary K.) diff -r 439f881c8744 -r 4d9895d39b59 src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML --- a/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML Mon Jul 07 17:01:11 2014 +0200 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML Wed Jul 09 11:35:52 2014 +0200 @@ -290,7 +290,7 @@ ary end -val nb_def_prior_weight = 21 (* FUDGE *) +val nb_def_prior_weight = 1000 (* FUDGE *) fun learn_facts (tfreq0, sfreq0, dffreq0) num_facts0 num_facts num_feats depss featss = let @@ -326,9 +326,10 @@ fun naive_bayes (tfreq, sfreq, dffreq) num_facts max_suggs visible_facts goal_feats = let - val tau = 0.05 (* FUDGE *) - val pos_weight = 10.0 (* FUDGE *) - val def_val = ~15.0 (* FUDGE *) + val tau = 0.2 (* FUDGE *) + val pos_weight = 5.0 (* FUDGE *) + val def_val = ~18.0 (* FUDGE *) + val init_val = 30.0 (* FUDGE *) val ln_afreq = Math.ln (Real.fromInt num_facts) val idf = Vector.map (fn i => ln_afreq - Math.ln (Real.fromInt i)) dffreq @@ -339,14 +340,14 @@ let val tfreq = Real.fromInt (Vector.sub (tfreq, i)) - fun fold_feats (f, fw0) (res, sfh) = + fun add_feat (f, fw0) (res, sfh) = (case Inttab.lookup sfh f of SOME sf => (res + fw0 * tfidf f * Math.ln (pos_weight * Real.fromInt sf / tfreq), Inttab.delete f sfh) | NONE => (res + fw0 * tfidf f * def_val, sfh)) - val (res, sfh) = fold fold_feats goal_feats (Math.ln tfreq, Vector.sub (sfreq, i)) + val (res, sfh) = fold add_feat goal_feats (init_val * Math.ln tfreq, Vector.sub (sfreq, i)) fun fold_sfh (f, sf) sow = sow + tfidf f * Math.ln (1.0 + (1.0 - Real.fromInt sf) / tfreq) @@ -366,7 +367,7 @@ ret (Integer.max 0 (num_facts - max_suggs)) [] end -val number_of_nearest_neighbors = 10 (* FUDGE *) +val number_of_nearest_neighbors = 1 (* FUDGE *) fun k_nearest_neighbors dffreq num_facts num_feats depss featss max_suggs visible_facts goal_feats = let @@ -384,11 +385,11 @@ fun do_feat (s, sw0) = let val sw = sw0 * tfidf s - val w2 = sw * sw + val w6 = Math.pow (sw, 6.0) fun inc_overlap j = let val (_, ov) = Array.sub (overlaps_sqr, j) in - Array.update (overlaps_sqr, j, (j, w2 + ov)) + Array.update (overlaps_sqr, j, (j, w6 + ov)) end in List.app inc_overlap (Array.sub (feat_facts, s)) @@ -404,10 +405,7 @@ let val (_, ov) = Array.sub (recommends, j) in if ov <= 0.0 then (no_recommends := !no_recommends + 1; Array.update (recommends, j, (j, !age + ov))) - else if ov < !age + 1000.0 then - Array.update (recommends, j, (j, v + ov)) - else - () + else Array.update (recommends, j, (j, v + ov)) end val k = Unsynchronized.ref 0 @@ -416,13 +414,13 @@ raise EXIT () else let + val deps_factor = 2.7 (* FUDGE *) val (j, o2) = Array.sub (overlaps_sqr, num_facts - k - 1) - val o1 = Math.sqrt o2 - val _ = inc_recommend o1 j + val _ = inc_recommend o2 j val ds = Vector.sub (depss, j) val l = Real.fromInt (length ds) in - List.app (inc_recommend (o1 / l)) ds + List.app (inc_recommend (deps_factor * o2 / l)) ds end fun while1 () =