# HG changeset patch # User blanchet # Date 1403782566 -7200 # Node ID e9d47cd3239b0a4004c26893306406b79b02ee7a # Parent 24738b4f8c6b7b3027f81be0c81d2e62a1b82290 refactoring diff -r 24738b4f8c6b -r e9d47cd3239b src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML --- a/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML Thu Jun 26 13:36:00 2014 +0200 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML Thu Jun 26 13:36:06 2014 +0200 @@ -398,16 +398,8 @@ exception EXIT of unit -fun k_nearest_neighbors num_facts get_deps get_sym_ths max_suggs visible_facts num_feats feats = +fun k_nearest_neighbors dffreq num_facts deps_vec get_sym_ths max_suggs visible_facts conj_feats = let - val dffreq = Array.array (num_feats, 0) - - val add_sym = map_array_at dffreq (Integer.add 1) - fun for1 i = - if i = num_feats then () else - (List.app (fn _ => add_sym i) (get_sym_ths i); for1 (i + 1)) - val _ = for1 0 - val ln_afreq = Math.ln (Real.fromInt num_facts) fun tfidf feat = ln_afreq - Math.ln (Real.fromInt (Array.sub (dffreq, feat))) @@ -427,7 +419,7 @@ List.app do_th (get_sym_ths s) end - val _ = List.app do_feat feats + val _ = List.app do_feat conj_feats val _ = heap (Real.compare o pairself snd) num_facts num_facts overlaps_sqr val no_recommends = Unsynchronized.ref 0 val recommends = Array.tabulate (num_facts, rpair 0.0) @@ -447,7 +439,7 @@ val (j, o2) = Array.sub (overlaps_sqr, num_facts - k - 1) val o1 = Math.sqrt o2 val _ = inc_recommend j o1 - val ds = get_deps j + val ds = Vector.sub (deps_vec, j) val l = Real.fromInt (length ds) in List.app (fn d => inc_recommend d (o1 / l)) ds @@ -474,7 +466,7 @@ val nb_def_prior_weight = 21 (* FUDGE *) -fun learn_facts tfreq sfreq dffreq num_facts get_deps get_feats = +fun learn_facts tfreq sfreq dffreq num_facts depss featss = let fun learn_fact th feats deps = let @@ -495,35 +487,26 @@ end fun for i = - if i = num_facts then () else (learn_fact i (get_feats i) (get_deps i); for (i + 1)) + if i = num_facts then () + else (learn_fact i (Vector.sub (featss, i)) (Vector.sub (depss, i)); for (i + 1)) in - for 0; - (Array.vector tfreq, Array.vector sfreq, Array.vector dffreq) + for 0 end -fun learn num_facts get_deps get_feats num_feats = - let - val tfreq = Array.array (num_facts, 0) - val sfreq = Array.array (num_facts, Inttab.empty) - val dffreq = Array.array (num_feats, 0) - in - learn_facts tfreq sfreq dffreq num_facts get_deps get_feats - end - -fun naive_bayes_query num_facts max_suggs visible_facts feats (tfreq, sfreq, dffreq) = +fun naive_bayes_query tfreq sfreq dffreq num_facts max_suggs visible_facts conj_feats = let val tau = 0.05 (* FUDGE *) val pos_weight = 10.0 (* FUDGE *) val def_val = ~15.0 (* FUDGE *) val ln_afreq = Math.ln (Real.fromInt num_facts) - val idf = Vector.map (fn i => ln_afreq - Math.ln (Real.fromInt i)) dffreq + val idf = Vector.map (fn i => ln_afreq - Math.ln (Real.fromInt i)) (Array.vector dffreq) fun tfidf feat = Vector.sub (idf, feat) fun log_posterior i = let - val tfreq = Real.fromInt (Vector.sub (tfreq, i)) + val tfreq = Real.fromInt (Array.sub (tfreq, i)) fun fold_feats f (res, sfh) = (case Inttab.lookup sfh f of @@ -532,7 +515,7 @@ Inttab.delete f sfh) | NONE => (res + tfidf f * def_val, sfh)) - val (res, sfh) = fold fold_feats feats (Math.ln tfreq, Vector.sub (sfreq, i)) + val (res, sfh) = fold fold_feats conj_feats (Math.ln tfreq, Array.sub (sfreq, i)) fun fold_sfh (f, sf) sow = sow + tfidf f * Math.ln (1.0 + (1.0 - Real.fromInt sf) / tfreq) @@ -551,26 +534,23 @@ ret (Integer.max 0 (num_facts - max_suggs)) [] end -fun naive_bayes num_facts get_deps get_feats num_feats max_suggs visible_facts feats = - learn num_facts get_deps get_feats num_feats - |> naive_bayes_query num_facts max_suggs visible_facts feats - (* experimental *) -fun naive_bayes_py ctxt overlord num_facts get_deps get_feats max_suggs feats = +fun naive_bayes_py ctxt overlord num_facts depss featss max_suggs conj_feats = let fun name_of_fact j = "f" ^ string_of_int j fun fact_of_name s = the (Int.fromString (unprefix "f" s)) fun name_of_feature j = "F" ^ string_of_int j fun parents_of j = if j = 0 then [] else [name_of_fact (j - 1)] - val learns = map (fn j => (name_of_fact j, parents_of j, map name_of_feature (get_feats j), - map name_of_fact (get_deps j))) (0 upto num_facts - 1) + val learns = map (fn j => (name_of_fact j, parents_of j, + map name_of_feature (Vector.sub (featss, j)), + map name_of_fact (Vector.sub (depss, j)))) (0 upto num_facts - 1) val parents' = parents_of num_facts - val feats' = map (rpair 1.0 o name_of_feature) feats + val conj_feats' = map (rpair 1.0 o name_of_feature) conj_feats in MaSh_Py.unlearn ctxt overlord; OS.Process.sleep (seconds 2.0); (* hack *) - MaSh_Py.query ctxt overlord max_suggs (learns, [], parents', feats') + MaSh_Py.query ctxt overlord max_suggs (learns, [], parents', conj_feats') |> map (apfst fact_of_name) end @@ -633,9 +613,14 @@ val depss = map (map_filter (Symtab.lookup fact_tab) o #3) learns val fact_vec = Vector.fromList facts + val feats_vec = Vector.fromList featss val deps_vec = Vector.fromList depss - val get_deps = curry Vector.sub deps_vec + val tfreq = Array.array (num_facts, 0) + val sfreq = Array.array (num_facts, Inttab.empty) + val dffreq = Array.array (num_feats, 0) + + val _ = learn_facts tfreq sfreq dffreq num_facts deps_vec feats_vec val int_visible_facts = map_filter (Symtab.lookup fact_tab) visible_facts val int_conj_feats = map_filter (Symtab.lookup feat_tab) conj_feats @@ -652,17 +637,11 @@ featss 0 val get_facts = curry Array.sub facts_ary in - k_nearest_neighbors num_facts get_deps get_facts max_suggs int_visible_facts num_feats + k_nearest_neighbors dffreq num_facts deps_vec get_facts max_suggs int_visible_facts int_conj_feats end | MaSh_SML_NB => - let - val feats_ary = Vector.fromList featss - val get_feats = curry Vector.sub feats_ary - in - naive_bayes num_facts get_deps get_feats num_feats max_suggs int_visible_facts - int_conj_feats - end) + naive_bayes_query tfreq sfreq dffreq num_facts max_suggs int_visible_facts int_conj_feats) |> map (curry Vector.sub fact_vec o fst) end