isabelle: comparison src/HOL/Tools/Sledgehammer/sledgehammer

equal deleted inserted replaced

-:a9e0f9d35125
+:9816f692b0ca
 (string * real) list
 end
 structure MaSh_SML :
 sig
-val k_nearest_neighbors : int -> int -> (int -> int list) -> (int -> (int * real) list) ->
+val k_nearest_neighbors : int -> (int -> int list) -> (int -> (int * real) list) -> int ->
+int list -> (int * real) list -> (int * real) list
+val naive_bayes : (bool * bool) -> int -> (int -> int list) -> (int -> int list) -> int ->
 int -> (int * real) list -> (int * real) list
-val naive_bayes : (bool * bool) -> int -> int -> (int -> int list) -> (int -> int list) ->
+val naive_bayes_py : Proof.context -> bool -> int -> (int -> int list) -> (int -> int list) ->
 int -> int -> (int * real) list -> (int * real) list
-val naive_bayes_py : Proof.context -> bool -> int -> int -> (int -> int list) ->
-(int -> int list) -> int -> int -> (int * real) list -> (int * real) list
 val query : Proof.context -> bool -> mash_engine -> string list -> int ->
 (string * (string * real) list * string list) list * string list * (string * real) list ->
 string list
 end
 exception EXIT of unit
 (*
 num_facts = maximum number of theorems to check dependencies and symbols
-num_visible_facts = do not return theorems over or equal to this number.
-Must satisfy: num_visible_facts <= num_facts.
 get_deps = returns dependencies of a theorem
 get_sym_ths = get theorems that have this feature
 max_suggs = number of suggestions to return
 feats = features of the goal
 *)
-fun k_nearest_neighbors num_facts num_visible_facts get_deps get_sym_ths max_suggs feats =
+fun k_nearest_neighbors num_facts get_deps get_sym_ths max_suggs visible_facts feats =
 let
 (* Can be later used for TFIDF *)
 fun sym_wght _ = 1.0
 val overlaps_sqr = Array.tabulate (num_facts, rpair 0.0)
 end
 val _ = List.app do_feat feats
 val _ = heap (Real.compare o pairself snd) num_facts num_facts overlaps_sqr
 val no_recommends = Unsynchronized.ref 0
-val recommends = Array.tabulate (num_visible_facts, rpair 0.0)
+val recommends = Array.tabulate (num_facts, rpair 0.0)
 val age = Unsynchronized.ref 1000000000.0
 fun inc_recommend j v =
 let val ov = snd (Array.sub (recommends, j)) in
 if ov <= 0.0 then
 (if ov < !age + 1000.0 then Array.update (recommends, j, (j, v + ov)) else ())
 end
 val k = Unsynchronized.ref 0
 fun do_k k =
-if k >= num_visible_facts then
+if k >= num_facts then
 raise EXIT ()
 else
 let
 val (j, o2) = Array.sub (overlaps_sqr, num_facts - k - 1)
 val o1 = Math.sqrt o2
 fun ret acc at =
 if at = Array.length recommends then acc else ret (Array.sub (recommends, at) :: acc) (at + 1)
 in
 while1 (); while2 ();
-heap (Real.compare o pairself snd) max_suggs num_visible_facts recommends;
+heap (Real.compare o pairself snd) max_suggs num_facts recommends;
-ret [] (Integer.max 0 (num_visible_facts - max_suggs))
+ret [] (Integer.max 0 (num_facts - max_suggs))
 end
 val nb_def_prior_weight = 21 (* FUDGE *)
 fun learn_facts tfreq sfreq dffreq num_facts get_deps get_feats num_feats =
 val dffreq = Array.array (num_feats, 0)
 in
 learn_facts tfreq sfreq dffreq num_facts get_deps get_feats num_feats
 end
-fun naive_bayes_query (kuehlwein_log, kuehlwein_params) num_facts num_visible_facts max_suggs feats
+fun naive_bayes_query (kuehlwein_log, kuehlwein_params) num_facts max_suggs feats
 (tfreq, sfreq, idf) =
 let
 val tau = if kuehlwein_params then 0.05 else 0.02 (* FUDGE *)
 val pos_weight = if kuehlwein_params then 10.0 else 2.0 (* FUDGE *)
 val def_val = ~15.0 (* FUDGE *)
 val sum_of_weights = Inttab.fold fold_sfh sfh 0.0
 in
 res + tau * sum_of_weights
 end
-val posterior = Array.tabulate (num_visible_facts, (fn j => (j, log_posterior j)))
+val posterior = Array.tabulate (num_facts, (fn j => (j, log_posterior j)))
 fun ret acc at =
-if at = num_visible_facts then acc else ret (Array.sub (posterior, at) :: acc) (at + 1)
+if at = num_facts then acc else ret (Array.sub (posterior, at) :: acc) (at + 1)
 in
-heap (Real.compare o pairself snd) max_suggs num_visible_facts posterior;
+heap (Real.compare o pairself snd) max_suggs num_facts posterior;
-ret [] (Integer.max 0 (num_visible_facts - max_suggs))
+ret [] (Integer.max 0 (num_facts - max_suggs))
 end
-fun naive_bayes opts num_facts num_visible_facts get_deps get_feats num_feats max_suggs feats =
+fun naive_bayes opts num_facts get_deps get_feats num_feats max_suggs feats =
 learn num_facts get_deps get_feats num_feats
-|> naive_bayes_query opts num_facts num_visible_facts max_suggs feats
+|> naive_bayes_query opts num_facts max_suggs feats
 (* experimental *)
-fun naive_bayes_py ctxt overlord num_facts num_visible_facts get_deps get_feats num_feats max_suggs
+fun naive_bayes_py ctxt overlord num_facts get_deps get_feats num_feats max_suggs feats =
-feats =
 let
 fun name_of_fact j = "f" ^ string_of_int j
 fun fact_of_name s = the (Int.fromString (unprefix "f" s))
 fun name_of_feature j = "F" ^ string_of_int j
 fun parents_of j = if j = 0 then [] else [name_of_fact (j - 1)]
 val learns = map (fn j => (name_of_fact j, parents_of j, map name_of_feature (get_feats j),
 map name_of_fact (get_deps j))) (0 upto num_facts - 1)
-val parents' = parents_of num_visible_facts
+val parents' = parents_of num_facts
 val feats' = map (apfst name_of_feature) feats
 in
 MaSh_Py.unlearn ctxt overlord;
 OS.Process.sleep (seconds 2.0); (* hack *)
 MaSh_Py.query ctxt overlord max_suggs (learns, [], parents', feats')
 fun map_array_at ary f i = Array.update (ary, i, f (Array.sub (ary, i)))
 fun query ctxt overlord engine visible_facts max_suggs (learns0, hints, feats) =
 let
-val visible_fact_set = Symtab.make_set visible_facts
+val learns = learns0 @ (if null hints then [] else [(".hints", feats, hints)])
-val learns =
-(learns0 |> List.partition (Symtab.defined visible_fact_set o #1) |> op @) @
-(if null hints then [] else [(".hints", feats, hints)])
 in
 if engine = MaSh_SML_kNN_Cpp then
 k_nearest_neighbors_cpp max_suggs learns (map fst feats)
 else if engine = MaSh_SML_NB_Cpp then
 naive_bayes_cpp max_suggs learns (map fst feats)
 else
 let
-val (rev_depss, rev_featss, (num_facts, _, rev_facts), (num_feats, feat_tab, _)) =
+val (rev_depss, rev_featss, (num_facts, fact_tab, rev_facts), (num_feats, feat_tab, _)) =
 fold (fn (fact, feats, deps) =>
 fn (rev_depss, rev_featss, fact_xtab as (_, fact_tab, _), feat_xtab) =>
 let
 fun add_feat (feat, weight) (xtab as (n, tab, _)) =
 (case Symtab.lookup tab feat of
 val facts = rev rev_facts
 val fact_vec = Vector.fromList facts
 val deps_vec = Vector.fromList (rev rev_depss)
-val num_visible_facts = length visible_facts
 val get_deps = curry Vector.sub deps_vec
+val int_visible_facts = map (Symtab.lookup fact_tab) visible_facts
 in
 trace_msg ctxt (fn () => "MaSh_SML query " ^ encode_features feats ^ " from {" ^
-elide_string 1000 (space_implode " " (take num_visible_facts facts)) ^ "}");
+elide_string 1000 (space_implode " " (take num_facts facts)) ^ "}");
 (if engine = MaSh_SML_kNN then
 let
 val facts_ary = Array.array (num_feats, [])
 val _ =
 fold (fn feats => fn fact =>
 map_array_at facts_ary (cons (fact', weight)) feat) feats;
 fact'
 end)
 rev_featss num_facts
 val get_facts = curry Array.sub facts_ary
-val feats' = map_filter (fn (feat, weight) =>
+val int_feats = map_filter (fn (feat, weight) =>
 Option.map (rpair weight) (Symtab.lookup feat_tab feat)) feats
 in
-k_nearest_neighbors num_facts num_visible_facts get_deps get_facts max_suggs feats'
+k_nearest_neighbors num_facts get_deps get_facts max_suggs int_visible_facts int_feats
 end
 else
 let
 val unweighted_feats_ary = Vector.fromList (map (map fst) (rev rev_featss))
 val get_unweighted_feats = curry Vector.sub unweighted_feats_ary
 val int_feats = map (apfst (the_default ~1 o Symtab.lookup feat_tab)) feats
 in
 (case engine of
 MaSh_SML_NB opts =>
-naive_bayes opts num_facts num_visible_facts get_deps get_unweighted_feats num_feats
+naive_bayes opts num_facts get_deps get_unweighted_feats num_feats max_suggs
-max_suggs int_feats
+int_feats
-| MaSh_SML_NB_Py => naive_bayes_py ctxt overlord num_facts num_visible_facts get_deps
+| MaSh_SML_NB_Py => naive_bayes_py ctxt overlord num_facts get_deps
 get_unweighted_feats num_feats max_suggs int_feats)
 end)
 |> map (curry Vector.sub fact_vec o fst)
 end
 end

changeset 57356	9816f692b0ca
parent 57355	a9e0f9d35125
child 57357	30ee18eb23ac