# HG changeset patch # User wenzelm # Date 1358458989 -3600 # Node ID 157d90cdcef0ec4d49b5fe65bf9246e098c6ebe4 # Parent 7bc58677860ed6d94b6c1bbf3c94210ca9146702# Parent d64bc889f7d60471542501b96fed22fbc78406a6 merged diff -r d64bc889f7d6 -r 157d90cdcef0 src/HOL/Multivariate_Analysis/Topology_Euclidean_Space.thy --- a/src/HOL/Multivariate_Analysis/Topology_Euclidean_Space.thy Thu Jan 17 18:23:10 2013 +0100 +++ b/src/HOL/Multivariate_Analysis/Topology_Euclidean_Space.thy Thu Jan 17 22:43:09 2013 +0100 @@ -2152,7 +2152,9 @@ unfolding bounded_def dist_real_def apply(rule_tac x=0 in exI) using assms by auto -lemma bounded_empty[simp]: "bounded {}" by (simp add: bounded_def) +lemma bounded_empty [simp]: "bounded {}" + by (simp add: bounded_def) + lemma bounded_subset: "bounded T \ S \ T ==> bounded S" by (metis bounded_def subset_eq) @@ -2188,17 +2190,6 @@ lemma bounded_ball[simp,intro]: "bounded(ball x e)" by (metis ball_subset_cball bounded_cball bounded_subset) -lemma finite_imp_bounded[intro]: - fixes S :: "'a::metric_space set" assumes "finite S" shows "bounded S" -proof- - { fix a and F :: "'a set" assume as:"bounded F" - then obtain x e where "\y\F. dist x y \ e" unfolding bounded_def by auto - hence "\y\(insert a F). dist x y \ max e (dist x a)" by auto - hence "bounded (insert a F)" unfolding bounded_def by (intro exI) - } - thus ?thesis using finite_induct[of S bounded] using bounded_empty assms by auto -qed - lemma bounded_Un[simp]: "bounded (S \ T) \ bounded S \ bounded T" apply (auto simp add: bounded_def) apply (rename_tac x y r s) @@ -2214,6 +2205,16 @@ lemma bounded_Union[intro]: "finite F \ (\S\F. bounded S) \ bounded(\F)" by (induct rule: finite_induct[of F], auto) +lemma bounded_insert [simp]: "bounded (insert x S) \ bounded S" +proof - + have "\y\{x}. dist x y \ 0" by simp + hence "bounded {x}" unfolding bounded_def by fast + thus ?thesis by (metis insert_is_Un bounded_Un) +qed + +lemma finite_imp_bounded [intro]: "finite S \ bounded S" + by (induct set: finite, simp_all) + lemma bounded_pos: "bounded S \ (\b>0. \x\ S. norm x <= b)" apply (simp add: bounded_iff) apply (subgoal_tac "\x (y::real). 0 < 1 + abs y \ (x <= y \ x <= 1 + abs y)") @@ -2226,9 +2227,6 @@ apply (metis Diff_subset bounded_subset) done -lemma bounded_insert[intro]:"bounded(insert x S) \ bounded S" - by (metis Diff_cancel Un_empty_right Un_insert_right bounded_Un bounded_subset finite.emptyI finite_imp_bounded infinite_remove subset_insertI) - lemma not_bounded_UNIV[simp, intro]: "\ bounded (UNIV :: 'a::{real_normed_vector, perfect_space} set)" proof(auto simp add: bounded_pos not_le) @@ -5063,14 +5061,14 @@ qed lemma continuous_attains_sup: - fixes f :: "'a::metric_space \ real" + fixes f :: "'a::topological_space \ real" shows "compact s \ s \ {} \ continuous_on s f ==> (\x \ s. \y \ s. f y \ f x)" using compact_attains_sup[of "f ` s"] using compact_continuous_image[of s f] by auto lemma continuous_attains_inf: - fixes f :: "'a::metric_space \ real" + fixes f :: "'a::topological_space \ real" shows "compact s \ s \ {} \ continuous_on s f \ (\x \ s. \y \ s. f x \ f y)" using compact_attains_inf[of "f ` s"] @@ -5429,24 +5427,54 @@ qed lemma separate_compact_closed: - fixes s t :: "'a::{heine_borel, real_normed_vector} set" - (* TODO: does this generalize to heine_borel? *) + fixes s t :: "'a::heine_borel set" assumes "compact s" and "closed t" and "s \ t = {}" shows "\d>0. \x\s. \y\t. d \ dist x y" -proof- - have "0 \ {x - y |x y. x \ s \ y \ t}" using assms(3) by auto - then obtain d where "d>0" and d:"\x\{x - y |x y. x \ s \ y \ t}. d \ dist 0 x" - using separate_point_closed[OF compact_closed_differences[OF assms(1,2)], of 0] by auto - { fix x y assume "x\s" "y\t" - hence "x - y \ {x - y |x y. x \ s \ y \ t}" by auto - hence "d \ dist (x - y) 0" using d[THEN bspec[where x="x - y"]] using dist_commute - by (auto simp add: dist_commute) - hence "d \ dist x y" unfolding dist_norm by auto } - thus ?thesis using `d>0` by auto +proof - (* FIXME: long proof *) + let ?T = "\x\s. { ball x (d / 2) | d. 0 < d \ (\y\t. d \ dist x y) }" + note `compact s` + moreover have "\t\?T. open t" by auto + moreover have "s \ \?T" + apply auto + apply (rule rev_bexI, assumption) + apply (subgoal_tac "x \ t") + apply (drule separate_point_closed [OF `closed t`]) + apply clarify + apply (rule_tac x="ball x (d / 2)" in exI) + apply simp + apply fast + apply (cut_tac assms(3)) + apply auto + done + ultimately obtain U where "U \ ?T" and "finite U" and "s \ \U" + by (rule compactE) + from `finite U` and `U \ ?T` have "\d>0. \x\\U. \y\t. d \ dist x y" + apply (induct set: finite) + apply simp + apply (rule exI) + apply (rule zero_less_one) + apply clarsimp + apply (rename_tac y e) + apply (rule_tac x="min d (e / 2)" in exI) + apply simp + apply (subst ball_Un) + apply (rule conjI) + apply (intro ballI, rename_tac z) + apply (rule min_max.le_infI2) + apply (simp only: mem_ball) + apply (drule (1) bspec) + apply (cut_tac x=y and y=x and z=z in dist_triangle, arith) + apply simp + apply (intro ballI) + apply (rule min_max.le_infI1) + apply simp + done + with `s \ \U` show ?thesis + by fast qed lemma separate_closed_compact: - fixes s t :: "'a::{heine_borel, real_normed_vector} set" + fixes s t :: "'a::heine_borel set" assumes "closed s" and "compact t" and "s \ t = {}" shows "\d>0. \x\s. \y\t. d \ dist x y" proof- diff -r d64bc889f7d6 -r 157d90cdcef0 src/HOL/TPTP/MaSh_Eval.thy --- a/src/HOL/TPTP/MaSh_Eval.thy Thu Jan 17 18:23:10 2013 +0100 +++ b/src/HOL/TPTP/MaSh_Eval.thy Thu Jan 17 22:43:09 2013 +0100 @@ -41,8 +41,10 @@ ML {* if do_it then - evaluate_mash_suggestions @{context} params (1, NONE) (SOME prob_dir) - (prefix ^ "mash_suggestions") (prefix ^ "mash_eval") + evaluate_mash_suggestions @{context} params (1, NONE) + [MePoN, MaSh_IsarN, MaSh_ProverN, MeSh_IsarN, MeSh_ProverN, IsarN] + (SOME prob_dir) (prefix ^ "mash_suggestions") + (prefix ^ "mash_prover_suggestions") (prefix ^ "mash_eval") else () *} diff -r d64bc889f7d6 -r 157d90cdcef0 src/HOL/TPTP/MaSh_Export.thy --- a/src/HOL/TPTP/MaSh_Export.thy Thu Jan 17 18:23:10 2013 +0100 +++ b/src/HOL/TPTP/MaSh_Export.thy Thu Jan 17 22:43:09 2013 +0100 @@ -29,6 +29,8 @@ val thys = [@{theory List}] val params as {provers, ...} = Sledgehammer_Isar.default_params @{context} [] val prover = hd provers +val range = (1, NONE) +val step = 1 val max_suggestions = 1536 val dir = "List" val prefix = "/tmp/" ^ dir ^ "/" @@ -65,15 +67,16 @@ ML {* if do_it then - generate_isar_commands @{context} prover thys (prefix ^ "mash_commands") + generate_isar_commands @{context} prover (range, step) thys + (prefix ^ "mash_commands") else () *} ML {* if do_it then - generate_mepo_suggestions @{context} params (1, NONE) thys max_suggestions - (prefix ^ "mepo_suggestions") + generate_mepo_suggestions @{context} params (range, step) thys + max_suggestions (prefix ^ "mepo_suggestions") else () *} @@ -88,7 +91,7 @@ ML {* if do_it then - generate_prover_dependencies @{context} params (1, NONE) thys false + generate_prover_dependencies @{context} params range thys false (prefix ^ "mash_prover_dependencies") else () @@ -96,7 +99,7 @@ ML {* if do_it then - generate_prover_commands @{context} params (1, NONE) thys + generate_prover_commands @{context} params (range, step) thys (prefix ^ "mash_prover_commands") else () diff -r d64bc889f7d6 -r 157d90cdcef0 src/HOL/TPTP/mash_eval.ML --- a/src/HOL/TPTP/mash_eval.ML Thu Jan 17 18:23:10 2013 +0100 +++ b/src/HOL/TPTP/mash_eval.ML Thu Jan 17 22:43:09 2013 +0100 @@ -9,9 +9,15 @@ sig type params = Sledgehammer_Provers.params + val MePoN : string + val MaSh_IsarN : string + val MaSh_ProverN : string + val MeSh_IsarN : string + val MeSh_ProverN : string + val IsarN : string val evaluate_mash_suggestions : - Proof.context -> params -> int * int option -> string option -> string - -> string -> unit + Proof.context -> params -> int * int option -> string list -> string option + -> string -> string -> string -> unit end; structure MaSh_Eval : MASH_EVAL = @@ -25,15 +31,17 @@ open Sledgehammer_Isar val MePoN = "MePo" -val MaShN = "MaSh" -val MeShN = "MeSh" +val MaSh_IsarN = "MaSh-Isar" +val MaSh_ProverN = "MaSh-Prover" +val MeSh_IsarN = "MeSh-Isar" +val MeSh_ProverN = "MeSh-Prover" val IsarN = "Isar" fun in_range (from, to) j = j >= from andalso (to = NONE orelse j <= the to) -fun evaluate_mash_suggestions ctxt params range prob_dir_name sugg_file_name - report_file_name = +fun evaluate_mash_suggestions ctxt params range methods prob_dir_name + isar_sugg_file_name prover_sugg_file_name report_file_name = let val report_path = report_file_name |> Path.explode val _ = File.write report_path "" @@ -42,21 +50,27 @@ default_params ctxt [] val prover = hd provers val slack_max_facts = generous_max_facts (the max_facts) - val lines = Path.explode sugg_file_name |> File.read_lines + val mash_isar_lines = Path.explode isar_sugg_file_name |> File.read_lines + val mash_prover_lines = + case Path.explode prover_sugg_file_name |> try File.read_lines of + NONE => replicate (length mash_isar_lines) "" + | SOME lines => lines + val mash_lines = mash_isar_lines ~~ mash_prover_lines val css = clasimpset_rule_table_of ctxt val facts = all_facts ctxt true false Symtab.empty [] [] css val name_tabs = build_name_tables nickname_of_thm facts fun with_index facts s = (find_index (curry (op =) s) facts + 1, s) - fun index_string (j, s) = s ^ "@" ^ string_of_int j - fun str_of_res label facts ({outcome, run_time, used_facts, ...} - : prover_result) = + fun index_str (j, s) = s ^ "@" ^ string_of_int j + val str_of_method = enclose " " ": " + fun str_of_result method facts ({outcome, run_time, used_facts, ...} + : prover_result) = let val facts = facts |> map (fn ((name, _), _) => name ()) in - " " ^ label ^ ": " ^ + str_of_method method ^ (if is_none outcome then "Success (" ^ ATP_Util.string_from_time run_time ^ "): " ^ (used_facts |> map (with_index facts o fst) |> sort (int_ord o pairself fst) - |> map index_string + |> map index_str |> space_implode " ") ^ (if length facts < the max_facts then " (of " ^ string_of_int (length facts) ^ ")" @@ -65,13 +79,14 @@ else "Failure: " ^ (facts |> take (the max_facts) |> tag_list 1 - |> map index_string + |> map index_str |> space_implode " ")) end - fun solve_goal (j, line) = + fun solve_goal (j, (mash_isar_line, mash_prover_line)) = if in_range range j then let - val (name, suggs) = extract_suggestions line + val (name, mash_isar_suggs) = extract_suggestions mash_isar_line + val (_, mash_prover_suggs) = extract_suggestions mash_prover_line val th = case find_first (fn (_, th) => nickname_of_thm th = name) facts of SOME (_, th) => th @@ -84,46 +99,58 @@ mepo_suggested_facts ctxt params prover slack_max_facts NONE hyp_ts concl_t facts |> weight_mepo_facts - val (mash_facts, mash_unks) = + fun mash_of suggs = find_mash_suggestions slack_max_facts suggs facts [] [] |>> weight_mash_facts - val mess = + val (mash_isar_facts, mash_isar_unks) = mash_of mash_isar_suggs + val (mash_prover_facts, mash_prover_unks) = mash_of mash_prover_suggs + fun mess_of mash_facts mash_unks = [(mepo_weight, (mepo_facts, [])), (mash_weight, (mash_facts, mash_unks))] - val mesh_facts = - mesh_facts (Thm.eq_thm_prop o pairself snd) slack_max_facts mess + fun mesh_of [] _ = [] + | mesh_of mash_facts mash_unks = + mesh_facts (Thm.eq_thm_prop o pairself snd) slack_max_facts + (mess_of mash_facts mash_unks) + val mesh_isar_facts = mesh_of mash_isar_facts mash_isar_unks + val mesh_prover_facts = mesh_of mash_prover_facts mash_prover_unks val isar_facts = find_suggested_facts (map (rpair 1.0) isar_deps) facts (* adapted from "mirabelle_sledgehammer.ML" *) - fun set_file_name heading (SOME dir) = + fun set_file_name method (SOME dir) = let val prob_prefix = "goal_" ^ string_of_int j ^ "__" ^ encode_str name ^ "__" ^ - heading + method in Config.put dest_dir dir #> Config.put problem_prefix (prob_prefix ^ "__") #> Config.put SMT_Config.debug_files (dir ^ "/" ^ prob_prefix) end | set_file_name _ NONE = I - fun prove heading get facts = - let - fun nickify ((_, stature), th) = - ((K (encode_str (nickname_of_thm th)), stature), th) - val facts = - facts - |> map (get #> nickify) - |> maybe_instantiate_inducts ctxt hyp_ts concl_t - |> take (the max_facts) - val ctxt = ctxt |> set_file_name heading prob_dir_name - val res as {outcome, ...} = - run_prover_for_mash ctxt params prover facts goal - val ok = if is_none outcome then 1 else 0 - in (str_of_res heading facts res, ok) end + fun prove method get facts = + if not (member (op =) methods method) orelse + (null facts andalso method <> IsarN) then + (str_of_method method ^ "Skipped", 0) + else + let + fun nickify ((_, stature), th) = + ((K (encode_str (nickname_of_thm th)), stature), th) + val facts = + facts + |> map (get #> nickify) + |> maybe_instantiate_inducts ctxt hyp_ts concl_t + |> take (the max_facts) + val ctxt = ctxt |> set_file_name method prob_dir_name + val res as {outcome, ...} = + run_prover_for_mash ctxt params prover facts goal + val ok = if is_none outcome then 1 else 0 + in (str_of_result method facts res, ok) end val ress = [fn () => prove MePoN fst mepo_facts, - fn () => prove MaShN fst mash_facts, - fn () => prove MeShN I mesh_facts, + fn () => prove MaSh_IsarN fst mash_isar_facts, + fn () => prove MaSh_ProverN fst mash_prover_facts, + fn () => prove MeSh_IsarN I mesh_isar_facts, + fn () => prove MeSh_ProverN I mesh_prover_facts, fn () => prove IsarN fst isar_facts] |> (* Par_List. *) map (fn f => f ()) in @@ -132,9 +159,9 @@ map snd ress end else - [0, 0, 0, 0] - fun total_of heading ok n = - " " ^ heading ^ ": " ^ string_of_int ok ^ " (" ^ + [0, 0, 0, 0, 0, 0] + fun total_of method ok n = + str_of_method method ^ string_of_int ok ^ " (" ^ Real.fmt (StringCvt.FIX (SOME 1)) (100.0 * Real.fromInt ok / Real.fromInt n) ^ "%)" val inst_inducts = Config.get ctxt instantiate_inducts @@ -148,15 +175,18 @@ "instantiate_inducts" |> not inst_inducts ? prefix "dont_"] val _ = print " * * *"; val _ = print ("Options: " ^ commas options); - val oks = Par_List.map solve_goal (tag_list 1 lines) + val oks = Par_List.map solve_goal (tag_list 1 mash_lines) val n = length oks - val [mepo_ok, mash_ok, mesh_ok, isar_ok] = + val [mepo_ok, mash_isar_ok, mash_prover_ok, mesh_isar_ok, mesh_prover_ok, + isar_ok] = map Integer.sum (map_transpose I oks) in ["Successes (of " ^ string_of_int n ^ " goals)", total_of MePoN mepo_ok n, - total_of MaShN mash_ok n, - total_of MeShN mesh_ok n, + total_of MaSh_IsarN mash_isar_ok n, + total_of MaSh_ProverN mash_prover_ok n, + total_of MeSh_IsarN mesh_isar_ok n, + total_of MeSh_ProverN mesh_prover_ok n, total_of IsarN isar_ok n] |> cat_lines |> print end diff -r d64bc889f7d6 -r 157d90cdcef0 src/HOL/TPTP/mash_export.ML --- a/src/HOL/TPTP/mash_export.ML Thu Jan 17 18:23:10 2013 +0100 +++ b/src/HOL/TPTP/mash_export.ML Thu Jan 17 22:43:09 2013 +0100 @@ -19,12 +19,14 @@ Proof.context -> params -> int * int option -> theory list -> bool -> string -> unit val generate_isar_commands : - Proof.context -> string -> theory list -> string -> unit + Proof.context -> string -> (int * int option) * int -> theory list -> string + -> unit val generate_prover_commands : - Proof.context -> params -> int * int option -> theory list -> string -> unit + Proof.context -> params -> (int * int option) * int -> theory list -> string + -> unit val generate_mepo_suggestions : - Proof.context -> params -> int * int option -> theory list -> int -> string - -> unit + Proof.context -> params -> (int * int option) * int -> theory list -> int + -> string -> unit val generate_mesh_suggestions : int -> string -> string -> string -> unit end; @@ -118,12 +120,13 @@ fun generate_prover_dependencies ctxt params = generate_isar_or_prover_dependencies ctxt (SOME params) -fun is_bad_query ctxt ho_atp th isar_deps = +fun is_bad_query ctxt ho_atp step j th isar_deps = + j mod step <> 0 orelse Thm.legacy_get_kind th = "" orelse null (these (trim_dependencies th isar_deps)) orelse is_blacklisted_or_something ctxt ho_atp (Thm.get_name_hint th) -fun generate_isar_or_prover_commands ctxt prover params_opt range thys +fun generate_isar_or_prover_commands ctxt prover params_opt (range, step) thys file_name = let val ho_atp = Sledgehammer_Provers.is_ho_atp ctxt prover @@ -145,7 +148,7 @@ encode_str name ^ ": " ^ encode_strs prevs ^ "; " ^ encode_features (sort_wrt fst feats) val query = - if is_bad_query ctxt ho_atp th isar_deps then "" + if is_bad_query ctxt ho_atp step j th isar_deps then "" else "? " ^ core ^ "\n" val update = "! " ^ core ^ "; " ^ @@ -161,13 +164,13 @@ in File.write_list path lines end fun generate_isar_commands ctxt prover = - generate_isar_or_prover_commands ctxt prover NONE (1, NONE) + generate_isar_or_prover_commands ctxt prover NONE fun generate_prover_commands ctxt (params as {provers = prover :: _, ...}) = generate_isar_or_prover_commands ctxt prover (SOME params) fun generate_mepo_suggestions ctxt (params as {provers = prover :: _, ...}) - range thys max_suggs file_name = + (range, step) thys max_suggs file_name = let val ho_atp = Sledgehammer_Provers.is_ho_atp ctxt prover val path = file_name |> Path.explode @@ -183,7 +186,7 @@ val (_, hyp_ts, concl_t) = ATP_Util.strip_subgoal ctxt goal 1 val isar_deps = isar_dependencies_of name_tabs th in - if is_bad_query ctxt ho_atp th isar_deps then + if is_bad_query ctxt ho_atp step j th isar_deps then "" else let diff -r d64bc889f7d6 -r 157d90cdcef0 src/HOL/Tools/ATP/atp_systems.ML --- a/src/HOL/Tools/ATP/atp_systems.ML Thu Jan 17 18:23:10 2013 +0100 +++ b/src/HOL/Tools/ATP/atp_systems.ML Thu Jan 17 22:43:09 2013 +0100 @@ -41,6 +41,13 @@ val e_default_sym_offs_weight : real Config.T val e_sym_offs_weight_base : real Config.T val e_sym_offs_weight_span : real Config.T + val spass_H1SOS : string + val spass_H2 : string + val spass_H2LR0LT0 : string + val spass_H2NuVS0 : string + val spass_H2NuVS0Red2 : string + val spass_H2SOS : string + val spass_extra_options : string Config.T val alt_ergoN : string val dummy_thfN : string val eN : string @@ -465,6 +472,9 @@ val spass_H2NuVS0Red2 = "-Heuristic=2 -RNuV=1 -Sorts=0 -RFRew=2 -RBRew=2 -RTaut=2" val spass_H2SOS = "-Heuristic=2 -SOS" +val spass_extra_options = + Attrib.setup_config_string @{binding atp_spass_extra_options} (K "") + (* FIXME: Make "SPASS_NEW_HOME" legacy. *) val spass_config : atp_config = {exec = (["SPASS_NEW_HOME", "SPASS_HOME"], ["SPASS"]), @@ -485,7 +495,7 @@ (InternalError, "Please report this error")] @ known_perl_failures, prem_role = Conjecture, - best_slices = fn _ => + best_slices = fn ctxt => (* FUDGE *) [(0.1667, ((150, DFG Monomorphic, "mono_native", combsN, true), "")), (0.1667, ((500, DFG Monomorphic, "mono_native", liftingN, true), spass_H2SOS)), @@ -494,7 +504,10 @@ (0.1000, ((1000, DFG Monomorphic, "mono_native", liftingN, true), spass_H1SOS)), (0.1000, ((150, DFG Monomorphic, "poly_guards??", liftingN, false), spass_H2NuVS0Red2)), (0.1000, ((300, DFG Monomorphic, "mono_native", combsN, true), spass_H2SOS)), - (0.1000, ((100, DFG Monomorphic, "mono_native", combs_and_liftingN, true), spass_H2))], + (0.1000, ((100, DFG Monomorphic, "mono_native", combs_and_liftingN, true), spass_H2))] + |> (case Config.get ctxt spass_extra_options of + "" => I + | opts => map (apsnd (apsnd (K opts)))), best_max_mono_iters = default_max_mono_iters, best_max_new_mono_instances = default_max_new_mono_instances} diff -r d64bc889f7d6 -r 157d90cdcef0 src/HOL/Tools/Sledgehammer/MaSh/src/dictionaries.py --- a/src/HOL/Tools/Sledgehammer/MaSh/src/dictionaries.py Thu Jan 17 18:23:10 2013 +0100 +++ b/src/HOL/Tools/Sledgehammer/MaSh/src/dictionaries.py Thu Jan 17 22:43:09 2013 +0100 @@ -32,27 +32,33 @@ self.dependenciesDict = {} self.accessibleDict = {} self.expandedAccessibles = {} - # For SInE-like prior - self.featureCountDict = {} - self.triggerFeatures = {} + # For SInE features + self.useSine = False + self.featureCountDict = {} + self.triggerFeaturesDict = {} + self.featureTriggeredFormulasDict = {} self.changed = True """ - Init functions. Side Effect: nameIdDict, idNameDict, featureIdDict, articleDict get filled! + Init functions. nameIdDict, idNameDict, featureIdDict, articleDict get filled! """ - def init_featureDict(self,featureFile): - self.featureDict,self.maxNameId,self.maxFeatureId, self.featureCountDict,self.triggerFeatures = create_feature_dict(self.nameIdDict,self.idNameDict,self.maxNameId,self.featureIdDict,\ - self.maxFeatureId,self.featureCountDict,self.triggerFeatures,featureFile) + def init_featureDict(self,featureFile,sineFeatures): + self.featureDict,self.maxNameId,self.maxFeatureId,self.featureCountDict,self.triggerFeaturesDict,self.featureTriggeredFormulasDict =\ + create_feature_dict(self.nameIdDict,self.idNameDict,self.maxNameId,self.featureIdDict,self.maxFeatureId,self.featureCountDict,\ + self.triggerFeaturesDict,self.featureTriggeredFormulasDict,sineFeatures,featureFile) def init_dependenciesDict(self,depFile): self.dependenciesDict = create_dependencies_dict(self.nameIdDict,depFile) def init_accessibleDict(self,accFile): self.accessibleDict,self.maxNameId = create_accessible_dict(self.nameIdDict,self.idNameDict,self.maxNameId,accFile) - def init_all(self,inputFolder,featureFileName = 'mash_features',depFileName='mash_dependencies',accFileName = 'mash_accessibility'): - featureFile = join(inputFolder,featureFileName) - depFile = join(inputFolder,depFileName) - accFile = join(inputFolder,accFileName) - self.init_featureDict(featureFile) + def init_all(self,args): + self.featureFileName = 'mash_features' + self.accFileName = 'mash_accessibility' + self.useSine = args.sineFeatures + featureFile = join(args.inputDir,self.featureFileName) + depFile = join(args.inputDir,args.depFile) + accFile = join(args.inputDir,self.accFileName) + self.init_featureDict(featureFile,self.useSine) self.init_accessibleDict(accFile) self.init_dependenciesDict(depFile) self.expandedAccessibles = {} @@ -76,11 +82,13 @@ def add_feature(self,featureName): if not self.featureIdDict.has_key(featureName): self.featureIdDict[featureName] = self.maxFeatureId - self.featureCountDict[self.maxFeatureId] = 0 + if self.useSine: + self.featureCountDict[self.maxFeatureId] = 0 self.maxFeatureId += 1 self.changed = True fId = self.featureIdDict[featureName] - self.featureCountDict[fId] += 1 + if self.useSine: + self.featureCountDict[fId] += 1 return fId def get_features(self,line): @@ -128,15 +136,22 @@ line = line.split(':') name = line[0].strip() nameId = self.get_name_id(name) - line = line[1].split(';') # Accessible Ids unExpAcc = [self.nameIdDict[a.strip()] for a in line[0].split()] self.accessibleDict[nameId] = unExpAcc features = self.get_features(line) self.featureDict[nameId] = features - minVal = min([self.featureCountDict[f] for f,_w in features]) - self.triggerFeatures[nameId] = [f for f,_w in features if self.featureCountDict[f] == minVal] + if self.useSine: + # SInE Features + minFeatureCount = min([self.featureCountDict[f] for f,_w in features]) + triggerFeatures = [f for f,_w in features if self.featureCountDict[f] == minFeatureCount] + self.triggerFeaturesDict[nameId] = triggerFeatures + for f in triggerFeatures: + if self.featureTriggeredFormulasDict.has_key(f): + self.featureTriggeredFormulasDict[f].append(nameId) + else: + self.featureTriggeredFormulasDict[f] = [nameId] self.dependenciesDict[nameId] = [self.nameIdDict[d.strip()] for d in line[2].split()] self.changed = True return nameId @@ -197,12 +212,14 @@ if self.changed: dictsStream = open(fileName, 'wb') dump((self.accessibleDict,self.dependenciesDict,self.expandedAccessibles,self.featureDict,\ - self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict,self.featureCountDict,self.triggerFeatures),dictsStream) + self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict,\ + self.featureCountDict,self.triggerFeaturesDict,self.featureTriggeredFormulasDict,self.useSine),dictsStream) self.changed = False dictsStream.close() def load(self,fileName): dictsStream = open(fileName, 'rb') self.accessibleDict,self.dependenciesDict,self.expandedAccessibles,self.featureDict,\ - self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict,self.featureCountDict,self.triggerFeatures = load(dictsStream) + self.featureIdDict,self.idNameDict,self.maxFeatureId,self.maxNameId,self.nameIdDict,\ + self.featureCountDict,self.triggerFeaturesDict,self.featureTriggeredFormulasDict,self.useSine = load(dictsStream) self.changed = False dictsStream.close() diff -r d64bc889f7d6 -r 157d90cdcef0 src/HOL/Tools/Sledgehammer/MaSh/src/mash.py --- a/src/HOL/Tools/Sledgehammer/MaSh/src/mash.py Thu Jan 17 18:23:10 2013 +0100 +++ b/src/HOL/Tools/Sledgehammer/MaSh/src/mash.py Thu Jan 17 22:43:09 2013 +0100 @@ -51,46 +51,62 @@ parser.add_argument('--learnTheories',default=False,action='store_true',help="Uses a two-lvl prediction mode. First the theories, then the premises. Default=False.") # Theory Parameters parser.add_argument('--theoryDefValPos',default=-7.5,help="Default value for positive unknown features. Default=-7.5.",type=float) -parser.add_argument('--theoryDefValNeg',default=-15.0,help="Default value for negative unknown features. Default=-15.0.",type=float) -parser.add_argument('--theoryPosWeight',default=10.0,help="Weight value for positive features. Default=10.0.",type=float) +parser.add_argument('--theoryDefValNeg',default=-10.0,help="Default value for negative unknown features. Default=-15.0.",type=float) +parser.add_argument('--theoryPosWeight',default=2.0,help="Weight value for positive features. Default=2.0.",type=float) parser.add_argument('--nb',default=False,action='store_true',help="Use Naive Bayes for learning. This is the default learning method.") # NB Parameters parser.add_argument('--NBDefaultPriorWeight',default=20.0,help="Initializes classifiers with value * p |- p. Default=20.0.",type=float) parser.add_argument('--NBDefVal',default=-15.0,help="Default value for unknown features. Default=-15.0.",type=float) parser.add_argument('--NBPosWeight',default=10.0,help="Weight value for positive features. Default=10.0.",type=float) -parser.add_argument('--NBSinePrior',default=False,action='store_true',help="Uses a SInE like prior for premise lvl predictions. Default=False.") -parser.add_argument('--NBSineWeight',default=100.0,help="How much the SInE prior is weighted. Default=100.0.",type=float) +# TODO: Rename to sineFeatures +parser.add_argument('--sineFeatures',default=False,action='store_true',help="Uses a SInE like prior for premise lvl predictions. Default=False.") +parser.add_argument('--sineWeight',default=0.5,help="How much the SInE prior is weighted. Default=0.5.",type=float) parser.add_argument('--snow',default=False,action='store_true',help="Use SNoW's naive bayes instead of Naive Bayes for learning.") -parser.add_argument('--predef',default=False,action='store_true',\ - help="Use predefined predictions. Used only for comparison with the actual learning. Expects mash_mepo_suggestions in inputDir.") +parser.add_argument('--predef',help="Use predefined predictions. Used only for comparison with the actual learning. Argument is the filename of the predictions.") parser.add_argument('--statistics',default=False,action='store_true',help="Create and show statistics for the top CUTOFF predictions.\ WARNING: This will make the program a lot slower! Default=False.") parser.add_argument('--saveStats',default=None,help="If defined, stores the statistics in the filename provided.") parser.add_argument('--cutOff',default=500,help="Option for statistics. Only consider the first cutOff predictions. Default=500.",type=int) parser.add_argument('-l','--log', default='../tmp/%s.log' % datetime.datetime.now(), help='Log file name. Default=../tmp/dateTime.log') parser.add_argument('-q','--quiet',default=False,action='store_true',help="If enabled, only print warnings. Default=False.") +parser.add_argument('--modelFile', default='../tmp/model.pickle', help='Model file name. Default=../tmp/model.pickle') +parser.add_argument('--dictsFile', default='../tmp/dict.pickle', help='Dict file name. Default=../tmp/dict.pickle') +parser.add_argument('--theoryFile', default='../tmp/theory.pickle', help='Model file name. Default=../tmp/theory.pickle') -def main(argv = sys.argv[1:]): +def mash(argv = sys.argv[1:]): # Initializing command-line arguments args = parser.parse_args(argv) - + # Set up logging logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', datefmt='%d-%m %H:%M:%S', filename=args.log, - filemode='w') - console = logging.StreamHandler(sys.stdout) - console.setLevel(logging.INFO) - formatter = logging.Formatter('# %(message)s') - console.setFormatter(formatter) - logging.getLogger('').addHandler(console) + filemode='w') logger = logging.getLogger('main.py') + + #""" + # remove old handler for tester + #logger.root.handlers[0].stream.close() + logger.root.removeHandler(logger.root.handlers[0]) + file_handler = logging.FileHandler(args.log) + file_handler.setLevel(logging.DEBUG) + formatter = logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s') + file_handler.setFormatter(formatter) + logger.root.addHandler(file_handler) + #""" if args.quiet: logger.setLevel(logging.WARNING) - console.setLevel(logging.WARNING) + #console.setLevel(logging.WARNING) + else: + console = logging.StreamHandler(sys.stdout) + console.setLevel(logging.INFO) + formatter = logging.Formatter('# %(message)s') + console.setFormatter(formatter) + logging.getLogger('').addHandler(console) + if not os.path.exists(args.outputDir): os.makedirs(args.outputDir) @@ -98,24 +114,16 @@ # Pick algorithm if args.nb: logger.info('Using sparse Naive Bayes for learning.') - model = sparseNBClassifier(args.NBDefaultPriorWeight,args.NBPosWeight,args.NBDefVal,args.NBSinePrior,args.NBSineWeight) - modelFile = os.path.join(args.outputDir,'NB.pickle') + model = sparseNBClassifier(args.NBDefaultPriorWeight,args.NBPosWeight,args.NBDefVal) elif args.snow: logger.info('Using naive bayes (SNoW) for learning.') model = SNoW() - modelFile = os.path.join(args.outputDir,'SNoW.pickle') elif args.predef: logger.info('Using predefined predictions.') - #predictionFile = os.path.join(args.inputDir,'mash_meng_paulson_suggestions') - predictionFile = os.path.join(args.inputDir,'mash_mepo_suggestions') - model = Predefined(predictionFile) - modelFile = os.path.join(args.outputDir,'mepo.pickle') + model = Predefined(args.predef) else: logger.info('No algorithm specified. Using sparse Naive Bayes.') - model = sparseNBClassifier(args.NBDefaultPriorWeight,args.NBPosWeight,args.NBDefVal,args.NBSinePrior,args.NBSineWeight) - modelFile = os.path.join(args.outputDir,'NB.pickle') - dictsFile = os.path.join(args.outputDir,'dicts.pickle') - theoryFile = os.path.join(args.outputDir,'theory.pickle') + model = sparseNBClassifier(args.NBDefaultPriorWeight,args.NBPosWeight,args.NBDefVal) # Initializing model if args.init: @@ -124,7 +132,7 @@ # Load all data dicts = Dictionaries() - dicts.init_all(args.inputDir,depFileName=args.depFile) + dicts.init_all(args) # Create Model trainData = dicts.featureDict.keys() @@ -134,10 +142,10 @@ depFile = os.path.join(args.inputDir,args.depFile) theoryModels = TheoryModels(args.theoryDefValPos,args.theoryDefValNeg,args.theoryPosWeight) theoryModels.init(depFile,dicts) - theoryModels.save(theoryFile) + theoryModels.save(args.theoryFile) - model.save(modelFile) - dicts.save(dictsFile) + model.save(args.modelFile) + dicts.save(args.dictsFile) logger.info('All Done. %s seconds needed.',round(time()-startTime,2)) return 0 @@ -149,12 +157,22 @@ dicts = Dictionaries() theoryModels = TheoryModels(args.theoryDefValPos,args.theoryDefValNeg,args.theoryPosWeight) # Load Files - if os.path.isfile(dictsFile): - dicts.load(dictsFile) - if os.path.isfile(modelFile): - model.load(modelFile) - if os.path.isfile(theoryFile) and args.learnTheories: - theoryModels.load(theoryFile) + if os.path.isfile(args.dictsFile): + #logger.info('Loading Dictionaries') + #startTime = time() + dicts.load(args.dictsFile) + #logger.info('Done %s',time()-startTime) + if os.path.isfile(args.modelFile): + #logger.info('Loading Model') + #startTime = time() + model.load(args.modelFile) + #logger.info('Done %s',time()-startTime) + if os.path.isfile(args.theoryFile) and args.learnTheories: + #logger.info('Loading Theory Models') + #startTime = time() + theoryModels.load(args.theoryFile) + #logger.info('Done %s',time()-startTime) + logger.info('All loading completed') # IO Streams OS = open(args.predictions,'w') @@ -173,7 +191,7 @@ # try: if True: if line.startswith('!'): - problemId = dicts.parse_fact(line) + problemId = dicts.parse_fact(line) # Statistics if args.statistics and computeStats: computeStats = False @@ -183,7 +201,7 @@ if args.learnTheories: tmp = [dicts.idNameDict[x] for x in dicts.dependenciesDict[problemId]] usedTheories = set([x.split('.')[0] for x in tmp]) - theoryStats.update((dicts.idNameDict[problemId]).split('.')[0],predictedTheories,usedTheories) + theoryStats.update((dicts.idNameDict[problemId]).split('.')[0],predictedTheories,usedTheories,len(theoryModels.accessibleTheories)) stats.update(predictions,dicts.dependenciesDict[problemId],statementCounter) if not stats.badPreds == []: bp = string.join([str(dicts.idNameDict[x]) for x in stats.badPreds], ',') @@ -211,7 +229,8 @@ computeStats = True if args.predef: continue - name,features,accessibles,hints = dicts.parse_problem(line) + name,features,accessibles,hints = dicts.parse_problem(line) + # Create predictions logger.info('Starting computation for problem on line %s',lineCounter) # Update Models with hints @@ -223,11 +242,29 @@ pass else: model.update('hints',features,hints) - + # Predict premises if args.learnTheories: predictedTheories,accessibles = theoryModels.predict(features,accessibles,dicts) - predictions,predictionValues = model.predict(features,accessibles,dicts) + + # Add additional features on premise lvl if sine is enabled + if args.sineFeatures: + origFeatures = [f for f,_w in features] + secondaryFeatures = [] + for f in origFeatures: + if dicts.featureCountDict[f] == 1: + continue + triggeredFormulas = dicts.featureTriggeredFormulasDict[f] + for formula in triggeredFormulas: + tFeatures = dicts.triggerFeaturesDict[formula] + #tFeatures = [ff for ff,_fw in dicts.featureDict[formula]] + newFeatures = set(tFeatures).difference(secondaryFeatures+origFeatures) + for fNew in newFeatures: + secondaryFeatures.append((fNew,args.sineWeight)) + predictionsFeatures = features+secondaryFeatures + else: + predictionsFeatures = features + predictions,predictionValues = model.predict(predictionsFeatures,accessibles,dicts) assert len(predictions) == len(predictionValues) # Delete hints @@ -268,10 +305,10 @@ # Save if args.saveModel: - model.save(modelFile) + model.save(args.modelFile) if args.learnTheories: - theoryModels.save(theoryFile) - dicts.save(dictsFile) + theoryModels.save(args.theoryFile) + dicts.save(args.dictsFile) if not args.saveStats == None: if args.learnTheories: theoryStatsFile = os.path.join(args.outputDir,'theoryStats') @@ -282,25 +319,37 @@ if __name__ == '__main__': # Example: + #List + # ISAR Theories + #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130110/List/','--learnTheories'] + #args = ['-i', '../data/20130110/List/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--learnTheories'] + # ISAR predef mesh + #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130110/List/','--predef','../data/20130110/List/mesh_suggestions'] + #args = ['-i', '../data/20130110/List/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','../data/20130110/List/mesh_suggestions','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats'] + + # Auth # ISAR Theories - #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121227b/Auth/','--learnTheories'] + #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121227b/Auth/','--learnTheories','--sineFeatures'] #args = ['-i', '../data/20121227b/Auth/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--learnTheories'] - # ISAR MePo - #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121227b/Auth/','--predef'] - #args = ['-i', '../data/20121227b/Auth/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats'] + # ISAR predef mesh + #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121227b/Auth/','--predef','../data/20121227b/Auth/mesh_suggestions'] + #args = ['-i', '../data/20121227b/Auth/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','../data/20121227b/Auth/mesh_suggestions','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats'] # Jinja # ISAR Theories - #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121227b/Jinja/','--learnTheories'] - #args = ['-i', '../data/20121227b/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--learnTheories'] + #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130111/Jinja/','--learnTheories'] + #args = ['-i', '../data/20130111/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--cutOff','500','--learnTheories'] + # ISAR Theories SinePrior + #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130111/Jinja/','--learnTheories','--sineFeatures'] + #args = ['-i', '../data/20130111/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--learnTheories','--sineFeatures'] # ISAR NB - #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121221/Jinja/'] - #args = ['-i', '../data/20121221/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500'] - # ISAR MePo - #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121212/Jinja/','--predef'] - #args = ['-i', '../data/20121212/Jinja/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats'] + #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130111/Jinja/'] + #args = ['-i', '../data/20130111/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500'] + # ISAR predef mesh + #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130111/Jinja/','--predef','../data/20130111/Jinja/mesh_suggestions'] + #args = ['-i', '../data/20130111/Jinja/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','../data/20130111/Jinja/mesh_suggestions','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats'] # ISAR NB ATP #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121212/Jinja/','--depFile','mash_atp_dependencies'] #args = ['-i', '../data/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--depFile','mash_atp_dependencies'] @@ -313,28 +362,5 @@ #args = ['-i', '../data/20121212/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--snow','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500'] - - # Probability - # ISAR Theories - #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121213/Probability/','--learnTheories'] - #args = ['-i', '../data/20121213/Probability/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--learnTheories'] - # ISAR NB - #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121213/Probability/'] - #args = ['-i', '../data/20121213/Probability/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/ProbIsarNB.stats','--cutOff','500'] - # ISAR MePo - #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121213/Probability/','--predef'] - #args = ['-i', '../data/20121213/Probability/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats'] - # ISAR NB ATP - #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121212/Jinja/','--depFile','mash_atp_dependencies'] - #args = ['-i', '../data/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--depFile','mash_atp_dependencies'] - #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/Jinja/','--predef','--depFile','mash_atp_dependencies'] - #args = ['-i', '../data/Jinja/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats','--depFile','mash_atp_dependencies'] - #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/Jinja/','--depFile','mash_atp_dependencies','--snow'] - #args = ['-i', '../data/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--snow','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--depFile','mash_atp_dependencies'] - - - - #startTime = time() - #sys.exit(main(args)) - #print 'New ' + str(round(time()-startTime,2)) - sys.exit(main()) + #sys.exit(mash(args)) + sys.exit(mash()) diff -r d64bc889f7d6 -r 157d90cdcef0 src/HOL/Tools/Sledgehammer/MaSh/src/readData.py --- a/src/HOL/Tools/Sledgehammer/MaSh/src/readData.py Thu Jan 17 18:23:10 2013 +0100 +++ b/src/HOL/Tools/Sledgehammer/MaSh/src/readData.py Thu Jan 17 22:43:09 2013 +0100 @@ -14,7 +14,8 @@ import sys,logging -def create_feature_dict(nameIdDict,idNameDict,maxNameId,featureIdDict,maxFeatureId,featureCountDict,triggerFeatures,inputFile): +def create_feature_dict(nameIdDict,idNameDict,maxNameId,featureIdDict,maxFeatureId,featureCountDict,\ + triggerFeaturesDict,featureTriggeredFormulasDict,sineFeatures,inputFile): logger = logging.getLogger('create_feature_dict') featureDict = {} IS = open(inputFile,'r') @@ -33,7 +34,7 @@ # Feature Ids featureNames = [f.strip() for f in line[1].split()] features = [] - minFeatureCount = 0 + minFeatureCount = 9999999 for fn in featureNames: weight = 1.0 tmp = fn.split('=') @@ -46,13 +47,21 @@ maxFeatureId += 1 fId = featureIdDict[fn] features.append((fId,weight)) - featureCountDict[fId] += 1 - minFeatureCount = min(triggerFeatures,featureCountDict[fId]) + if sineFeatures: + featureCountDict[fId] += 1 + minFeatureCount = min(minFeatureCount,featureCountDict[fId]) # Store results featureDict[nameId] = features - triggerFeatures[nameId] = [f for f,_w in features if featureCountDict[f] == minFeatureCount] + if sineFeatures: + triggerFeatures = [f for f,_w in features if featureCountDict[f] == minFeatureCount] + triggerFeaturesDict[nameId] = triggerFeatures + for f in triggerFeatures: + if featureTriggeredFormulasDict.has_key(f): + featureTriggeredFormulasDict[f].append(nameId) + else: + featureTriggeredFormulasDict[f] = [nameId] IS.close() - return featureDict,maxNameId,maxFeatureId,featureCountDict,triggerFeatures + return featureDict,maxNameId,maxFeatureId,featureCountDict,triggerFeaturesDict,featureTriggeredFormulasDict def create_dependencies_dict(nameIdDict,inputFile): logger = logging.getLogger('create_dependencies_dict') diff -r d64bc889f7d6 -r 157d90cdcef0 src/HOL/Tools/Sledgehammer/MaSh/src/singleNaiveBayes.py --- a/src/HOL/Tools/Sledgehammer/MaSh/src/singleNaiveBayes.py Thu Jan 17 18:23:10 2013 +0100 +++ b/src/HOL/Tools/Sledgehammer/MaSh/src/singleNaiveBayes.py Thu Jan 17 22:43:09 2013 +0100 @@ -44,9 +44,12 @@ for f,_w in features: if not self.counts.has_key(f): - fPosCount = 0.0 - fNegCount = 0.0 - self.counts[f] = [fPosCount,fNegCount] + if label: + fPosCount = 0.0 + fNegCount = 0.0 + self.counts[f] = [fPosCount,fNegCount] + else: + continue posCount,negCount = self.counts[f] if label: posCount += 1 @@ -89,8 +92,9 @@ elif self.pos ==0: return 0 logneg = log(self.neg) + lognegprior=log(float(self.neg)/5) logpos = log(self.pos) - prob = logpos - logneg + prob = logpos - lognegprior for f,_w in features: if self.counts.has_key(f): diff -r d64bc889f7d6 -r 157d90cdcef0 src/HOL/Tools/Sledgehammer/MaSh/src/sparseNaiveBayes.py --- a/src/HOL/Tools/Sledgehammer/MaSh/src/sparseNaiveBayes.py Thu Jan 17 18:23:10 2013 +0100 +++ b/src/HOL/Tools/Sledgehammer/MaSh/src/sparseNaiveBayes.py Thu Jan 17 22:43:09 2013 +0100 @@ -19,13 +19,11 @@ An updateable naive Bayes classifier. ''' - def __init__(self,defaultPriorWeight = 20.0,posWeight = 20.0,defVal = -15.0,useSinePrior = False,sineWeight = 100.0): + def __init__(self,defaultPriorWeight = 20.0,posWeight = 20.0,defVal = -15.0): ''' Constructor ''' self.counts = {} - self.sinePrior = useSinePrior - self.sineWeight = sineWeight self.defaultPriorWeight = defaultPriorWeight self.posWeight = posWeight self.defVal = defVal @@ -100,19 +98,11 @@ Returns a ranking of the accessibles. """ predictions = [] - fSet = set([f for f,_w in features]) for a in accessibles: posA = self.counts[a][0] fA = set(self.counts[a][1].keys()) fWeightsA = self.counts[a][1] - prior = posA - if self.sinePrior: - triggerFeatures = dicts.triggerFeatures[a] - triggeredFeatures = fSet.intersection(triggerFeatures) - for f in triggeredFeatures: - posW = dicts.featureCountDict[f] - prior += self.sineWeight / posW - resultA = log(prior) + resultA = log(posA) for f,w in features: # DEBUG #w = 1 @@ -131,12 +121,12 @@ def save(self,fileName): OStream = open(fileName, 'wb') - dump((self.counts,self.defaultPriorWeight,self.posWeight,self.defVal,self.sinePrior,self.sineWeight),OStream) + dump((self.counts,self.defaultPriorWeight,self.posWeight,self.defVal),OStream) OStream.close() def load(self,fileName): OStream = open(fileName, 'rb') - self.counts,self.defaultPriorWeight,self.posWeight,self.defVal,self.sinePrior,self.sineWeight = load(OStream) + self.counts,self.defaultPriorWeight,self.posWeight,self.defVal = load(OStream) OStream.close() diff -r d64bc889f7d6 -r 157d90cdcef0 src/HOL/Tools/Sledgehammer/MaSh/src/stats.py --- a/src/HOL/Tools/Sledgehammer/MaSh/src/stats.py Thu Jan 17 18:23:10 2013 +0100 +++ b/src/HOL/Tools/Sledgehammer/MaSh/src/stats.py Thu Jan 17 22:43:09 2013 +0100 @@ -120,7 +120,8 @@ round(100*self.avgAUC/self.problems,2),round(self.avgDepNr/self.problems,2),round(self.avgRecall100/self.problems,2),self.cutOff) #try: - if True: + #if True: + if False: from matplotlib.pyplot import plot,figure,show,xlabel,ylabel,axis,hist avgRecall = [float(x)/self.problems for x in self.recallData] figure('Recall') diff -r d64bc889f7d6 -r 157d90cdcef0 src/HOL/Tools/Sledgehammer/MaSh/src/tester.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/HOL/Tools/Sledgehammer/MaSh/src/tester.py Thu Jan 17 22:43:09 2013 +0100 @@ -0,0 +1,182 @@ +''' +Created on Jan 11, 2013 + +Searches for the best parameters. + +@author: Daniel Kuehlwein +''' + +import logging,sys,os +from multiprocessing import Process,Queue,current_process,cpu_count +from mash import mash + +def worker(inQueue, outQueue): + for func, args in iter(inQueue.get, 'STOP'): + result = func(*args) + #print '%s says that %s%s = %s' % (current_process().name, func.__name__, args, result) + outQueue.put(result) + +def run_mash(runId,inputDir,logFile,predictionFile,\ + learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\ + NBDefaultPriorWeight,NBDefVal,NBPosWeight,\ + sineFeatures,sineWeight): + # Init + runId = str(runId) + predictionFile = predictionFile + runId + args = ['--statistics','--init','--inputDir',inputDir,'-q','--log',logFile,'--theoryFile','../tmp/t'+runId,'--modelFile','../tmp/m'+runId,'--dictsFile','../tmp/d'+runId, + '--theoryDefValPos',str(theoryDefValPos),'--theoryDefValNeg',str(theoryDefValNeg),'--theoryPosWeight',str(theoryPosWeight),\ + '--NBDefaultPriorWeight',str(NBDefaultPriorWeight),'--NBDefVal',str(NBDefVal),'--NBPosWeight',str(NBPosWeight)] + if learnTheories: + args = args + ['--learnTheories'] + if sineFeatures: + args += ['--sineFeatures','--sineWeight',str(sineWeight)] + mash(args) + # Run + args = ['-q','-i',inputFile,'-p',predictionFile,'--statistics','--cutOff','500','--log',logFile,'--theoryFile','../tmp/t'+runId,'--modelFile','../tmp/m'+runId,'--dictsFile','../tmp/d'+runId,\ + '--theoryDefValPos',str(theoryDefValPos),'--theoryDefValNeg',str(theoryDefValNeg),'--theoryPosWeight',str(theoryPosWeight),\ + '--NBDefaultPriorWeight',str(NBDefaultPriorWeight),'--NBDefVal',str(NBDefVal),'--NBPosWeight',str(NBPosWeight)] + if learnTheories: + args = args + ['--learnTheories'] + if sineFeatures: + args += ['--sineFeatures','--sineWeight',str(sineWeight)] + mash(args) + + # Get Results + IS = open(logFile,'r') + lines = IS.readlines() + tmpRes = lines[-1].split() + avgAuc = tmpRes[5] + avgRecall100 = tmpRes[9] + tmpTheoryRes = lines[-3].split() + avgTheoryPrecision = tmpTheoryRes[5] + avgTheoryRecall100 = tmpTheoryRes[7] + avgTheoryRecall = tmpTheoryRes[9] + avgTheoryPredictedPercent = tmpTheoryRes[11] + IS.close() + + # Delete old models + os.remove(logFile) + os.remove(predictionFile) + os.remove('../tmp/t'+runId) + os.remove('../tmp/m'+runId) + os.remove('../tmp/d'+runId) + + outFile = open('tester','a') + #print 'avgAuc %s avgRecall100 %s avgTheoryPrecision %s avgTheoryRecall100 %s avgTheoryRecall %s avgTheoryPredictedPercent %s' + outFile.write('\t'.join([str(learnTheories),str(theoryDefValPos),str(theoryDefValNeg),str(theoryPosWeight),str(NBDefaultPriorWeight),str(NBDefVal),str(NBPosWeight),str(sineFeatures),str(sineWeight),str(avgAuc),str(avgRecall100),str(avgTheoryPrecision),str(avgTheoryRecall100),str(avgTheoryRecall),str(avgTheoryPredictedPercent)])+'\n') + outFile.close() + print learnTheories,'\t',theoryDefValPos,'\t',theoryDefValNeg,'\t',theoryPosWeight,'\t',\ + NBDefaultPriorWeight,'\t',NBDefVal,'\t',NBPosWeight,'\t',\ + sineFeatures,'\t',sineWeight,'\t',\ + avgAuc,'\t',avgRecall100,'\t',avgTheoryPrecision,'\t',avgTheoryRecall100,'\t',avgTheoryRecall,'\t',avgTheoryPredictedPercent + return learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\ + NBDefaultPriorWeight,NBDefVal,NBPosWeight,\ + sineFeatures,sineWeight,\ + avgAuc,avgRecall100,avgTheoryPrecision,avgTheoryRecall100,avgTheoryRecall,avgTheoryPredictedPercent + +def update_best_params(avgRecall100,bestAvgRecall100,\ + bestNBDefaultPriorWeight,bestNBDefVal,bestNBPosWeight,bestSineFeatures,bestSineWeight,\ + bestlearnTheories,besttheoryDefValPos,besttheoryDefValNeg,besttheoryPosWeight,\ + NBDefaultPriorWeight,NBDefVal,NBPosWeight,sineFeatures,sineWeight,\ + learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight): + if avgRecall100 > bestAvgRecall100: + bestAvgRecall100 = avgRecall100 + bestNBDefaultPriorWeight = NBDefaultPriorWeight + bestNBDefVal = NBDefVal + bestNBPosWeight = NBPosWeight + bestSineFeatures = sineFeatures + bestSineWeight = sineWeight + return bestlearnTheories,besttheoryDefValPos,besttheoryDefValNeg,besttheoryPosWeight,bestNBDefaultPriorWeight,bestNBDefVal,bestNBPosWeight,bestSineFeatures,bestSineWeight + +if __name__ == '__main__': + cores = cpu_count() + #cores = 1 + # Options + depFile = 'mash_dependencies' + outputDir = '../tmp/' + numberOfPredictions = 500 + + learnTheoriesRange = [True,False] + theoryDefValPosRange = [-x for x in range(1,20)] + theoryDefValNegRange = [-x for x in range(1,20)] + theoryPosWeightRange = [x for x in range(1,10)] + + NBDefaultPriorWeightRange = [10*x for x in range(10)] + NBDefValRange = [-x for x in range(1,20)] + NBPosWeightRange = [10*x for x in range(1,10)] + sineFeaturesRange = [True,False] + sineWeightRange = [0.1,0.25,0.5,0.75,1.0] + + # Test 1 + inputFile = '../data/20121227b/Auth/mash_commands' + inputDir = '../data/20121227b/Auth/' + predictionFile = '../tmp/auth.pred' + logFile = '../tmp/auth.log' + learnTheories = True + theoryDefValPos = -7.5 + theoryDefValNeg = -15.0 + theoryPosWeight = 10.0 + NBDefaultPriorWeight = 20.0 + NBDefVal =- 15.0 + NBPosWeight = 10.0 + sineFeatures = True + sineWeight = 0.5 + + task_queue = Queue() + done_queue = Queue() + + runs = 0 + for inputDir in ['../data/20121227b/Auth/']: + problemId = inputDir.split('/')[-2] + inputFile = os.path.join(inputDir,'mash_commands') + predictionFile = os.path.join('../tmp/',problemId+'.pred') + logFile = os.path.join('../tmp/',problemId+'.log') + learnTheories = True + theoryDefValPos = -7.5 + theoryDefValNeg = -15.0 + theoryPosWeight = 10.0 + + bestAvgRecall100 = 0.0 + bestNBDefaultPriorWeight = 1.0 + bestNBDefVal = 1.0 + bestNBPosWeight = 1.0 + bestSineFeatures = False + bestSineWeight = 0.0 + bestlearnTheories = True + besttheoryDefValPos = 1.0 + besttheoryDefValNeg = -15.0 + besttheoryPosWeight = 5.0 + for theoryPosWeight in theoryPosWeightRange: + for theoryDefValNeg in theoryDefValNegRange: + for NBDefaultPriorWeight in NBDefaultPriorWeightRange: + for NBDefVal in NBDefValRange: + for NBPosWeight in NBPosWeightRange: + for sineFeatures in sineFeaturesRange: + if sineFeatures: + for sineWeight in sineWeightRange: + localLogFile = logFile+str(runs) + task_queue.put((run_mash,(runs,inputDir, localLogFile, predictionFile, learnTheories, theoryDefValPos, theoryDefValNeg, theoryPosWeight, NBDefaultPriorWeight, NBDefVal, NBPosWeight, sineFeatures, sineWeight))) + runs += 1 + else: + localLogFile = logFile+str(runs) + task_queue.put((run_mash,(runs,inputDir, localLogFile, predictionFile, learnTheories, theoryDefValPos, theoryDefValNeg, theoryPosWeight, NBDefaultPriorWeight, NBDefVal, NBPosWeight, sineFeatures, sineWeight))) + runs += 1 + # Start worker processes + processes = [] + for _i in range(cores): + process = Process(target=worker, args=(task_queue, done_queue)) + process.start() + processes.append(process) + + for _i in range(runs): + learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\ + NBDefaultPriorWeight,NBDefVal,NBPosWeight,\ + sineFeatures,sineWeight,\ + avgAuc,avgRecall100,avgTheoryPrecision,avgTheoryRecall100,avgTheoryRecall,avgTheoryPredictedPercent = done_queue.get() + bestlearnTheories,besttheoryDefValPos,besttheoryDefValNeg,besttheoryPosWeight,bestNBDefaultPriorWeight,bestNBDefVal,bestNBPosWeight,bestSineFeatures,bestSineWeight = update_best_params(avgRecall100,bestAvgRecall100,\ + bestNBDefaultPriorWeight,bestNBDefVal,bestNBPosWeight,bestSineFeatures,bestSineWeight,\ + bestlearnTheories,besttheoryDefValPos,besttheoryDefValNeg,besttheoryPosWeight,\ + NBDefaultPriorWeight,NBDefVal,NBPosWeight,sineFeatures,sineWeight,\ + learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight) + print 'bestAvgRecall100 %s bestNBDefaultPriorWeight %s bestNBDefVal %s bestNBPosWeight %s bestSineFeatures %s bestSineWeight %s',bestAvgRecall100,bestNBDefaultPriorWeight,bestNBDefVal,bestNBPosWeight,bestSineFeatures,bestSineWeight + \ No newline at end of file diff -r d64bc889f7d6 -r 157d90cdcef0 src/HOL/Tools/Sledgehammer/MaSh/src/theoryStats.py --- a/src/HOL/Tools/Sledgehammer/MaSh/src/theoryStats.py Thu Jan 17 18:23:10 2013 +0100 +++ b/src/HOL/Tools/Sledgehammer/MaSh/src/theoryStats.py Thu Jan 17 22:43:09 2013 +0100 @@ -29,28 +29,35 @@ self.recall100 = 0 self.recall = 0.0 self.predicted = 0.0 + self.predictedPercent = 0.0 - def update(self,currentTheory,predictedTheories,usedTheories): + def update(self,currentTheory,predictedTheories,usedTheories,nrAvailableTheories): self.count += 1 allPredTheories = predictedTheories.union([currentTheory]) if set(usedTheories).issubset(allPredTheories): self.recall100 += 1 localPredicted = len(allPredTheories) self.predicted += localPredicted + localPredictedPercent = float(localPredicted)/nrAvailableTheories + self.predictedPercent += localPredictedPercent localPrec = float(len(set(usedTheories).intersection(allPredTheories))) / localPredicted self.precision += localPrec - localRecall = float(len(set(usedTheories).intersection(allPredTheories))) / len(set(usedTheories)) + if len(set(usedTheories)) == 0: + localRecall = 1.0 + else: + localRecall = float(len(set(usedTheories).intersection(allPredTheories))) / len(set(usedTheories)) self.recall += localRecall self.logger.info('Theory prediction results:') - self.logger.info('Problem: %s \t Recall100: %s \t Precision: %s \t Recall: %s \t PredictedTeories: %s',\ - self.count,self.recall100,round(localPrec,2),round(localRecall,2),localPredicted) + self.logger.info('Problem: %s \t Recall100: %s \t Precision: %s \t Recall: %s \t PredictedTeoriesPercent: %s PredictedTeories: %s',\ + self.count,self.recall100,round(localPrec,2),round(localRecall,2),round(localPredictedPercent,2),localPredicted) def printAvg(self): self.logger.info('Average theory results:') - self.logger.info('avgPrecision: %s \t avgRecall100: %s \t avgRecall: %s \t avgPredicted:%s', \ + self.logger.info('avgPrecision: %s \t avgRecall100: %s \t avgRecall: %s \t avgPredictedPercent: %s \t avgPredicted: %s', \ round(self.precision/self.count,2),\ round(float(self.recall100)/self.count,2),\ round(self.recall/self.count,2),\ + round(self.predictedPercent /self.count,2),\ round(self.predicted /self.count,2)) def save(self,fileName): diff -r d64bc889f7d6 -r 157d90cdcef0 src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML --- a/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML Thu Jan 17 18:23:10 2013 +0100 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML Thu Jan 17 22:43:09 2013 +0100 @@ -150,8 +150,7 @@ val core = "--inputFile " ^ cmd_file ^ " --predictions " ^ sugg_file ^ " --numberOfPredictions " ^ string_of_int max_suggs ^ - " --NBSinePrior" (* --learnTheories *) ^ - (if save then " --saveModel" else "") + (* " --learnTheories" ^ *) (if save then " --saveModel" else "") val command = "\"$ISABELLE_SLEDGEHAMMER_MASH/src/mash.py\" --quiet --outputDir " ^ File.shell_path (mash_model_dir ()) ^ " --log " ^ log_file ^ " " ^ core ^ @@ -774,27 +773,28 @@ val max_proximity_facts = 100 -fun find_mash_suggestions max_facts suggs facts chained raw_unknown = - let - val raw_mash = - facts |> find_suggested_facts suggs - (* The weights currently returned by "mash.py" are too spaced out to - make any sense. *) - |> map fst - val unknown_chained = - inter (Thm.eq_thm_prop o pairself snd) chained raw_unknown - val proximity = - facts |> sort (thm_ord o pairself snd o swap) - |> take max_proximity_facts - val mess = - [(0.90 (* FUDGE *), (map (rpair 1.0) unknown_chained, [])), - (0.08 (* FUDGE *), (weight_raw_mash_facts raw_mash, raw_unknown)), - (0.02 (* FUDGE *), (weight_proximity_facts proximity, []))] - val unknown = - raw_unknown - |> fold (subtract (Thm.eq_thm_prop o pairself snd)) - [unknown_chained, proximity] - in (mesh_facts (Thm.eq_thm_prop o pairself snd) max_facts mess, unknown) end +fun find_mash_suggestions _ [] _ _ raw_unknown = ([], raw_unknown) + | find_mash_suggestions max_facts suggs facts chained raw_unknown = + let + val raw_mash = + facts |> find_suggested_facts suggs + (* The weights currently returned by "mash.py" are too spaced out + to make any sense. *) + |> map fst + val unknown_chained = + inter (Thm.eq_thm_prop o pairself snd) chained raw_unknown + val proximity = + facts |> sort (thm_ord o pairself snd o swap) + |> take max_proximity_facts + val mess = + [(0.90 (* FUDGE *), (map (rpair 1.0) unknown_chained, [])), + (0.08 (* FUDGE *), (weight_raw_mash_facts raw_mash, raw_unknown)), + (0.02 (* FUDGE *), (weight_proximity_facts proximity, []))] + val unknown = + raw_unknown + |> fold (subtract (Thm.eq_thm_prop o pairself snd)) + [unknown_chained, proximity] + in (mesh_facts (Thm.eq_thm_prop o pairself snd) max_facts mess, unknown) end fun mash_suggested_facts ctxt ({overlord, learn, ...} : params) prover max_facts hyp_ts concl_t facts =