# HG changeset patch # User krauss # Date 1377030242 -7200 # Node ID ae863ed9f64fc6b0b830da3e4f74e8775a82dbd0 # Parent 186535065f5c776d54076c31e93f257f8202ec41# Parent d81211f61a1b4e6bb07c8b8c19f23acd0b328383 merged diff -r 186535065f5c -r ae863ed9f64f src/Doc/IsarImplementation/Tactic.thy --- a/src/Doc/IsarImplementation/Tactic.thy Tue Aug 20 11:39:53 2013 +0200 +++ b/src/Doc/IsarImplementation/Tactic.thy Tue Aug 20 22:24:02 2013 +0200 @@ -336,13 +336,15 @@ \item @{ML match_tac}, @{ML ematch_tac}, @{ML dmatch_tac}, and @{ML bimatch_tac} are similar to @{ML resolve_tac}, @{ML eresolve_tac}, @{ML dresolve_tac}, and @{ML biresolve_tac}, respectively, but do - not instantiate schematic variables in the goal state. + not instantiate schematic variables in the goal state.% +\footnote{Strictly speaking, matching means to treat the unknowns in the goal + state as constants, but these tactics merely discard unifiers that would + update the goal state. In rare situations (where the conclusion and + goal state have flexible terms at the same position), the tactic + will fail even though an acceptable unifier exists.} + These tactics were written for a specific application within the classical reasoner. Flexible subgoals are not updated at will, but are left alone. - Strictly speaking, matching means to treat the unknowns in the goal - state as constants; these tactics merely discard unifiers that would - update the goal state. - \end{description} *} diff -r 186535065f5c -r ae863ed9f64f src/Doc/Sledgehammer/document/root.tex --- a/src/Doc/Sledgehammer/document/root.tex Tue Aug 20 11:39:53 2013 +0200 +++ b/src/Doc/Sledgehammer/document/root.tex Tue Aug 20 22:24:02 2013 +0200 @@ -504,16 +504,6 @@ using \textit{type\_enc} (\S\ref{problem-encoding}), this is a bug, and you are strongly encouraged to report this to the author at \authoremail. -%\point{Why are the generated Isar proofs so ugly/broken?} -% -%The current implementation of the Isar proof feature, -%enabled by the \textit{isar\_proofs} option (\S\ref{output-format}), -%is experimental. Work on a new implementation has begun. There is a large body of -%research into transforming resolution proofs into natural deduction proofs (such -%as Isar proofs), which we hope to leverage. In the meantime, a workaround is to -%set the \textit{isar\_compress} option (\S\ref{output-format}) to a larger -%value or to try several provers and keep the nicest-looking proof. - \point{How can I tell whether a suggested proof is sound?} Earlier versions of Sledgehammer often suggested unsound proofs---either proofs @@ -633,8 +623,7 @@ Sledgehammer's philosophy should work out of the box, without user guidance. Many of the options are meant to be used mostly by the Sledgehammer developers -for experimentation purposes. Of course, feel free to experiment with them if -you are so inclined. +for experiments. Of course, feel free to try them out if you are so inclined. \section{Command Syntax} \label{command-syntax} @@ -852,10 +841,10 @@ \item[\labelitemi] \textbf{\textit{alt\_ergo}:} Alt-Ergo is a polymorphic ATP developed by Bobot et al.\ \cite{alt-ergo}. It supports the TPTP polymorphic typed first-order format (TFF1) via Why3 -\cite{why3}. It is included for experimental purposes. To use Alt-Ergo, set the -environment variable \texttt{WHY3\_HOME} to the directory that contains the -\texttt{why3} executable. Sledgehammer has been tested with Alt-Ergo 0.95.1 and -an unidentified development version of Why3. +\cite{why3}. To use Alt-Ergo, set the environment variable \texttt{WHY3\_HOME} +to the directory that contains the \texttt{why3} executable. Sledgehammer has +been tested with Alt-Ergo 0.95.1 and an unidentified development version of +Why3. \item[\labelitemi] \textbf{\textit{cvc3}:} CVC3 is an SMT solver developed by Clark Barrett, Cesare Tinelli, and their colleagues \cite{cvc3}. To use CVC3, @@ -1071,10 +1060,10 @@ The traditional memoryless MePo relevance filter. \item[\labelitemi] \textbf{\textit{mash}:} -The memoryful MaSh machine learner. MaSh relies on the external Python program -\texttt{mash.py}, which is part of Isabelle. To enable MaSh, set the environment -variable \texttt{MASH} to \texttt{yes}. Persistent data is stored in the -directory \texttt{\$ISABELLE\_HOME\_USER/mash}. +The experimental, memoryful MaSh machine learner. MaSh relies on the external +Python program \texttt{mash.py}, which is part of Isabelle. To enable MaSh, set +the environment variable \texttt{MASH} to \texttt{yes}. Persistent data is +stored in the directory \texttt{\$ISABELLE\_HOME\_USER/mash}. \item[\labelitemi] \textbf{\textit{mesh}:} The MeSh filter, which combines the rankings from MePo and MaSh. diff -r 186535065f5c -r ae863ed9f64f src/HOL/BNF/BNF_FP_Basic.thy --- a/src/HOL/BNF/BNF_FP_Basic.thy Tue Aug 20 11:39:53 2013 +0200 +++ b/src/HOL/BNF/BNF_FP_Basic.thy Tue Aug 20 22:24:02 2013 +0200 @@ -147,6 +147,12 @@ "(fun_rel R (fun_rel S T)) f g = (\x y. R x y \ (fun_rel S T) (f x) (g y))" unfolding fun_rel_def .. +lemma subst_eq_imp: "(\a b. a = b \ P a b) \ (\a. P a a)" + by auto + +lemma eq_subset: "op = \ (\a b. P a b \ a = b)" + by auto + ML_file "Tools/bnf_fp_util.ML" ML_file "Tools/bnf_fp_def_sugar_tactics.ML" ML_file "Tools/bnf_fp_def_sugar.ML" diff -r 186535065f5c -r ae863ed9f64f src/HOL/BNF/BNF_GFP.thy --- a/src/HOL/BNF/BNF_GFP.thy Tue Aug 20 11:39:53 2013 +0200 +++ b/src/HOL/BNF/BNF_GFP.thy Tue Aug 20 22:24:02 2013 +0200 @@ -70,9 +70,6 @@ lemma image2_eqI: "\b = f x; c = g x; x \ A\ \ (b, c) \ image2 A f g" unfolding image2_def by auto -lemma eq_subset: "op = \ (\a b. P a b \ a = b)" -by auto - lemma IdD: "(a, b) \ Id \ a = b" by auto diff -r 186535065f5c -r ae863ed9f64f src/HOL/BNF/Examples/Process.thy --- a/src/HOL/BNF/Examples/Process.thy Tue Aug 20 11:39:53 2013 +0200 +++ b/src/HOL/BNF/Examples/Process.thy Tue Aug 20 22:24:02 2013 +0200 @@ -8,7 +8,7 @@ header {* Processes *} theory Process -imports "../BNF" +imports Stream begin codatatype 'a process = @@ -38,81 +38,31 @@ subsection{* Coinduction *} theorem process_coind[elim, consumes 1, case_names iss Action Choice, induct pred: "HOL.eq"]: -assumes phi: "\ p p'" and -iss: "\p p'. \ p p' \ (isAction p \ isAction p') \ (isChoice p \ isChoice p')" and -Act: "\ a a' p p'. \ (Action a p) (Action a' p') \ a = a' \ \ p p'" and -Ch: "\ p q p' q'. \ (Choice p q) (Choice p' q') \ \ p p' \ \ q q'" -shows "p = p'" -proof(intro mp[OF process.dtor_coinduct, of \, OF _ phi], clarify) - fix p p' assume \: "\ p p'" - show "pre_process_rel (op =) \ (process_dtor p) (process_dtor p')" - proof(cases rule: process.exhaust[of p]) - case (Action a q) note p = Action - hence "isAction p'" using iss[OF \] by (cases rule: process.exhaust[of p'], auto) - then obtain a' q' where p': "p' = Action a' q'" by (cases rule: process.exhaust[of p'], auto) - have 0: "a = a' \ \ q q'" using Act[OF \[unfolded p p']] . - have dtor: "process_dtor p = Inl (a,q)" "process_dtor p' = Inl (a',q')" - unfolding p p' Action_def process.dtor_ctor by simp_all - show ?thesis using 0 unfolding dtor by simp - next - case (Choice p1 p2) note p = Choice - hence "isChoice p'" using iss[OF \] by (cases rule: process.exhaust[of p'], auto) - then obtain p1' p2' where p': "p' = Choice p1' p2'" - by (cases rule: process.exhaust[of p'], auto) - have 0: "\ p1 p1' \ \ p2 p2'" using Ch[OF \[unfolded p p']] . - have dtor: "process_dtor p = Inr (p1,p2)" "process_dtor p' = Inr (p1',p2')" - unfolding p p' Choice_def process.dtor_ctor by simp_all - show ?thesis using 0 unfolding dtor by simp - qed -qed + assumes phi: "\ p p'" and + iss: "\p p'. \ p p' \ (isAction p \ isAction p') \ (isChoice p \ isChoice p')" and + Act: "\ a a' p p'. \ (Action a p) (Action a' p') \ a = a' \ \ p p'" and + Ch: "\ p q p' q'. \ (Choice p q) (Choice p' q') \ \ p p' \ \ q q'" + shows "p = p'" + using assms + by (coinduct rule: process.coinduct) (metis process.collapse(1,2) process.discs(3)) (* Stronger coinduction, up to equality: *) theorem process_strong_coind[elim, consumes 1, case_names iss Action Choice]: -assumes phi: "\ p p'" and -iss: "\p p'. \ p p' \ (isAction p \ isAction p') \ (isChoice p \ isChoice p')" and -Act: "\ a a' p p'. \ (Action a p) (Action a' p') \ a = a' \ (\ p p' \ p = p')" and -Ch: "\ p q p' q'. \ (Choice p q) (Choice p' q') \ (\ p p' \ p = p') \ (\ q q' \ q = q')" -shows "p = p'" -proof(intro mp[OF process.dtor_strong_coinduct, of \, OF _ phi], clarify) - fix p p' assume \: "\ p p'" - show "pre_process_rel (op =) (\a b. \ a b \ a = b) (process_dtor p) (process_dtor p')" - proof(cases rule: process.exhaust[of p]) - case (Action a q) note p = Action - hence "isAction p'" using iss[OF \] by (cases rule: process.exhaust[of p'], auto) - then obtain a' q' where p': "p' = Action a' q'" by (cases rule: process.exhaust[of p'], auto) - have 0: "a = a' \ (\ q q' \ q = q')" using Act[OF \[unfolded p p']] . - have dtor: "process_dtor p = Inl (a,q)" "process_dtor p' = Inl (a',q')" - unfolding p p' Action_def process.dtor_ctor by simp_all - show ?thesis using 0 unfolding dtor by simp - next - case (Choice p1 p2) note p = Choice - hence "isChoice p'" using iss[OF \] by (cases rule: process.exhaust[of p'], auto) - then obtain p1' p2' where p': "p' = Choice p1' p2'" - by (cases rule: process.exhaust[of p'], auto) - have 0: "(\ p1 p1' \ p1 = p1') \ (\ p2 p2' \ p2 = p2')" using Ch[OF \[unfolded p p']] . - have dtor: "process_dtor p = Inr (p1,p2)" "process_dtor p' = Inr (p1',p2')" - unfolding p p' Choice_def process.dtor_ctor by simp_all - show ?thesis using 0 unfolding dtor by simp - qed -qed + assumes phi: "\ p p'" and + iss: "\p p'. \ p p' \ (isAction p \ isAction p') \ (isChoice p \ isChoice p')" and + Act: "\ a a' p p'. \ (Action a p) (Action a' p') \ a = a' \ (\ p p' \ p = p')" and + Ch: "\ p q p' q'. \ (Choice p q) (Choice p' q') \ (\ p p' \ p = p') \ (\ q q' \ q = q')" + shows "p = p'" + using assms + by (coinduct rule: process.strong_coinduct) (metis process.collapse(1,2) process.discs(3)) subsection {* Coiteration (unfold) *} section{* Coinductive definition of the notion of trace *} - -(* Say we have a type of streams: *) - -typedecl 'a stream - -consts Ccons :: "'a \ 'a stream \ 'a stream" - -(* Use the existing coinductive package (distinct from our -new codatatype package, but highly compatible with it): *) - coinductive trace where -"trace p as \ trace (Action a p) (Ccons a as)" +"trace p as \ trace (Action a p) (a ## as)" | "trace p as \ trace q as \ trace (Choice p q) as" diff -r 186535065f5c -r ae863ed9f64f src/HOL/BNF/Tools/bnf_fp_def_sugar.ML --- a/src/HOL/BNF/Tools/bnf_fp_def_sugar.ML Tue Aug 20 11:39:53 2013 +0200 +++ b/src/HOL/BNF/Tools/bnf_fp_def_sugar.ML Tue Aug 20 22:24:02 2013 +0200 @@ -24,6 +24,9 @@ val morph_fp_sugar: morphism -> fp_sugar -> fp_sugar val fp_sugar_of: Proof.context -> string -> fp_sugar option + val co_induct_of: 'a list -> 'a + val strong_co_induct_of: 'a list -> 'a + val tvar_subst: theory -> typ list -> typ list -> ((string * int) * typ) list val exists_subtype_in: typ list -> typ -> bool val flat_rec_arg_args: 'a list list -> 'a list @@ -59,7 +62,7 @@ (term list * thm list) * Proof.context val derive_induct_iters_thms_for_types: BNF_Def.bnf list -> (typ list list * typ list list list list * term list list * term list list list list) list -> - thm list -> thm list list -> BNF_Def.bnf list -> BNF_Def.bnf list -> typ list -> typ list -> + thm -> thm list list -> BNF_Def.bnf list -> BNF_Def.bnf list -> typ list -> typ list -> typ list -> typ list list list -> term list list -> thm list list -> term list list -> thm list list -> local_theory -> (thm list * thm * Args.src list) * (thm list list * Args.src list) @@ -67,7 +70,7 @@ val derive_coinduct_coiters_thms_for_types: BNF_Def.bnf list -> string * term list * term list list * ((term list list * term list list list) * (typ list * typ list list)) list -> - thm list -> thm list -> thm list list -> BNF_Def.bnf list -> typ list -> typ list -> typ list -> + thm -> thm list -> thm list list -> BNF_Def.bnf list -> typ list -> typ list -> typ list -> int list list -> int list list -> int list -> thm list list -> BNF_Ctr_Sugar.ctr_sugar list -> term list list -> thm list list -> (thm list -> thm list) -> local_theory -> ((thm list * thm) list * Args.src list) @@ -139,6 +142,9 @@ val fp_sugar_of = Symtab.lookup o Data.get o Context.Proof; +fun co_induct_of (i :: _) = i; +fun strong_co_induct_of [_, s] = s; + fun register_fp_sugar key fp_sugar = Local_Theory.declaration {syntax = false, pervasive = true} (fn phi => Data.map (Symtab.default (key, morph_fp_sugar phi fp_sugar))); @@ -557,7 +563,7 @@ (map3 generate_coiter coiterNs coiter_args_typess' dtor_coiters) lthy end; -fun derive_induct_iters_thms_for_types pre_bnfs [fold_args_types, rec_args_types] [ctor_induct] +fun derive_induct_iters_thms_for_types pre_bnfs [fold_args_types, rec_args_types] ctor_induct ctor_iter_thmss nesting_bnfs nested_bnfs fpTs Cs Xs ctrXs_Tsss ctrss ctr_defss iterss iter_defss lthy = let @@ -718,7 +724,7 @@ fun derive_coinduct_coiters_thms_for_types pre_bnfs (z, cs, cpss, coiters_args_types as [((pgss, crgsss), _), ((phss, cshsss), _)]) - dtor_coinducts dtor_ctors dtor_coiter_thmss nesting_bnfs fpTs Cs As kss mss ns ctr_defss + dtor_coinduct dtor_ctors dtor_coiter_thmss nesting_bnfs fpTs Cs As kss mss ns ctr_defss ctr_sugars coiterss coiter_defss export_args lthy = let val coiterss' = transpose coiterss; @@ -823,6 +829,11 @@ (if nn = 1 then thm RS mp else funpow nn (fn thm => reassoc_conjs (thm RS mp_conj)) thm) |> Drule.zero_var_indexes |> `(conj_dests nn); + + val rel_eqs = map rel_eq_of_bnf pre_bnfs; + val rel_monos = map rel_mono_of_bnf pre_bnfs; + val dtor_coinducts = + [dtor_coinduct, mk_strong_coinduct_thm dtor_coinduct rel_eqs rel_monos lthy]; in map2 (postproc nn oo prove) dtor_coinducts goals end; @@ -1096,7 +1107,7 @@ map dest_TFree Xs ~~ map (Term.typ_subst_atomic (As ~~ unsorted_As)) ctrXs_sum_prod_Ts; val (pre_bnfs, (fp_res as {bnfs = fp_bnfs as any_fp_bnf :: _, ctors = ctors0, dtors = dtors0, - xtor_co_iterss = xtor_co_iterss0, xtor_co_inducts, dtor_ctors, ctor_dtors, ctor_injects, + xtor_co_iterss = xtor_co_iterss0, xtor_co_induct, dtor_ctors, ctor_dtors, ctor_injects, xtor_map_thms, xtor_set_thmss, xtor_rel_thms, xtor_co_iter_thmss, ...}, lthy)) = fp_bnf (construct_fp mixfixes map_bs rel_bs set_bss) fp_bs (map dest_TFree unsorted_As) fp_eqs no_defs_lthy0; @@ -1351,7 +1362,7 @@ let val ((induct_thms, induct_thm, induct_attrs), (fold_thmss, fold_attrs), (rec_thmss, rec_attrs)) = - derive_induct_iters_thms_for_types pre_bnfs (the iters_args_types) xtor_co_inducts + derive_induct_iters_thms_for_types pre_bnfs (the iters_args_types) xtor_co_induct xtor_co_iter_thmss nesting_bnfs nested_bnfs fpTs Cs Xs ctrXs_Tsss ctrss ctr_defss iterss iter_defss lthy; @@ -1388,7 +1399,7 @@ (disc_unfold_thmss, disc_corec_thmss, disc_coiter_attrs), (disc_unfold_iff_thmss, disc_corec_iff_thmss, disc_coiter_iff_attrs), (sel_unfold_thmss, sel_corec_thmss, sel_coiter_attrs)) = - derive_coinduct_coiters_thms_for_types pre_bnfs (the coiters_args_types) xtor_co_inducts + derive_coinduct_coiters_thms_for_types pre_bnfs (the coiters_args_types) xtor_co_induct dtor_ctors xtor_co_iter_thmss nesting_bnfs fpTs Cs As kss mss ns ctr_defss ctr_sugars coiterss coiter_defss (Proof_Context.export lthy' no_defs_lthy) lthy; diff -r 186535065f5c -r ae863ed9f64f src/HOL/BNF/Tools/bnf_fp_util.ML --- a/src/HOL/BNF/Tools/bnf_fp_util.ML Tue Aug 20 11:39:53 2013 +0200 +++ b/src/HOL/BNF/Tools/bnf_fp_util.ML Tue Aug 20 22:24:02 2013 +0200 @@ -17,7 +17,7 @@ ctors: term list, dtors: term list, xtor_co_iterss: term list list, - xtor_co_inducts: thm list, + xtor_co_induct: thm, dtor_ctors: thm list, ctor_dtors: thm list, ctor_injects: thm list, @@ -29,8 +29,6 @@ val morph_fp_result: morphism -> fp_result -> fp_result val eq_fp_result: fp_result * fp_result -> bool - val co_induct_of: 'a list -> 'a - val strong_co_induct_of: 'a list -> 'a val un_fold_of: 'a list -> 'a val co_rec_of: 'a list -> 'a @@ -179,6 +177,8 @@ val mk_xtor_un_fold_o_map_thms: fp_kind -> bool -> int -> thm -> thm list -> thm list -> thm list -> thm list -> thm list + val mk_strong_coinduct_thm: thm -> thm list -> thm list -> Proof.context -> thm + val fp_bnf: (binding list -> (string * sort) list -> typ list * typ list list -> BNF_Def.bnf list -> local_theory -> 'a) -> binding list -> (string * sort) list -> ((string * sort) * typ) list -> local_theory -> @@ -203,7 +203,7 @@ ctors: term list, dtors: term list, xtor_co_iterss: term list list, - xtor_co_inducts: thm list, + xtor_co_induct: thm, dtor_ctors: thm list, ctor_dtors: thm list, ctor_injects: thm list, @@ -213,7 +213,7 @@ xtor_co_iter_thmss: thm list list, rel_co_induct_thm: thm}; -fun morph_fp_result phi {Ts, bnfs, ctors, dtors, xtor_co_iterss, xtor_co_inducts, dtor_ctors, +fun morph_fp_result phi {Ts, bnfs, ctors, dtors, xtor_co_iterss, xtor_co_induct, dtor_ctors, ctor_dtors, ctor_injects, xtor_map_thms, xtor_set_thmss, xtor_rel_thms, xtor_co_iter_thmss, rel_co_induct_thm} = {Ts = map (Morphism.typ phi) Ts, @@ -221,7 +221,7 @@ ctors = map (Morphism.term phi) ctors, dtors = map (Morphism.term phi) dtors, xtor_co_iterss = map (map (Morphism.term phi)) xtor_co_iterss, - xtor_co_inducts = map (Morphism.thm phi) xtor_co_inducts, + xtor_co_induct = Morphism.thm phi xtor_co_induct, dtor_ctors = map (Morphism.thm phi) dtor_ctors, ctor_dtors = map (Morphism.thm phi) ctor_dtors, ctor_injects = map (Morphism.thm phi) ctor_injects, @@ -234,9 +234,6 @@ fun eq_fp_result ({bnfs = bnfs1, ...} : fp_result, {bnfs = bnfs2, ...} : fp_result) = eq_list eq_bnf (bnfs1, bnfs2); -fun co_induct_of (i :: _) = i; -fun strong_co_induct_of [_, s] = s; - fun un_fold_of [f, _] = f; fun co_rec_of [_, r] = r; @@ -552,6 +549,25 @@ split_conj_thm (un_fold_unique OF map (fp_case fp I mk_sym) unique_prems) end; +fun mk_strong_coinduct_thm coind rel_eqs rel_monos ctxt = + let + val n = Thm.nprems_of coind; + val m = Thm.nprems_of (hd rel_monos) - n; + fun mk_inst phi = (phi, mk_union (phi, HOLogic.eq_const (fst (dest_pred2T (fastype_of phi))))) + |> pairself (certify ctxt); + val insts = Term.add_vars (Thm.prop_of coind) [] |> rev |> take n |> map (mk_inst o Var); + fun mk_unfold rel_eq rel_mono = + let + val eq = iffD2 OF [rel_eq RS @{thm predicate2_eqD}, refl]; + val mono = rel_mono OF (replicate m @{thm order_refl} @ replicate n @{thm eq_subset}); + in eq RS (mono RS @{thm predicate2D}) RS @{thm eqTrueI} end; + val unfolds = map2 mk_unfold rel_eqs rel_monos @ @{thms sup_fun_def sup_bool_def + imp_disjL all_conj_distrib subst_eq_imp simp_thms(18,21,35)}; + in + Thm.instantiate ([], insts) coind + |> unfold_thms ctxt unfolds + end; + fun fp_bnf construct_fp bs resBs eqs lthy = let val timer = time (Timer.startRealTimer ()); diff -r 186535065f5c -r ae863ed9f64f src/HOL/BNF/Tools/bnf_gfp.ML --- a/src/HOL/BNF/Tools/bnf_gfp.ML Tue Aug 20 11:39:53 2013 +0200 +++ b/src/HOL/BNF/Tools/bnf_gfp.ML Tue Aug 20 22:24:02 2013 +0200 @@ -2019,49 +2019,36 @@ val timer = time (timer "corec definitions & thms"); - (* TODO: Get rid of strong versions (since these can easily be derived from the weak ones). *) - val (dtor_map_coinduct_thm, coinduct_params, dtor_coinduct_thm, - dtor_map_strong_coinduct_thm, dtor_strong_coinduct_thm) = + val (dtor_map_coinduct_thm, coinduct_params, dtor_coinduct_thm) = let val zs = Jzs1 @ Jzs2; val frees = phis @ zs; - fun mk_phi strong_eq phi z1 z2 = if strong_eq - then Term.absfree (dest_Free z1) (Term.absfree (dest_Free z2) - (HOLogic.mk_disj (phi $ z1 $ z2, HOLogic.mk_eq (z1, z2)))) - else phi; - - fun phi_rels strong_eq = - map3 (fn phi => fn z1 => fn z2 => mk_phi strong_eq phi z1 z2) phis Jzs1 Jzs2; - val rels = map (Term.subst_atomic_types ((activeAs ~~ Ts) @ (activeBs ~~ Ts))) relsAsBs; fun mk_concl phi z1 z2 = HOLogic.mk_imp (phi $ z1 $ z2, HOLogic.mk_eq (z1, z2)); val concl = HOLogic.mk_Trueprop (Library.foldr1 HOLogic.mk_conj (map3 mk_concl phis Jzs1 Jzs2)); - fun mk_rel_prem strong_eq phi dtor rel Jz Jz_copy = + fun mk_rel_prem phi dtor rel Jz Jz_copy = let - val concl = Term.list_comb (rel, map HOLogic.eq_const passiveAs @ phi_rels strong_eq) $ + val concl = Term.list_comb (rel, map HOLogic.eq_const passiveAs @ phis) $ (dtor $ Jz) $ (dtor $ Jz_copy); in HOLogic.mk_Trueprop (list_all_free [Jz, Jz_copy] (HOLogic.mk_imp (phi $ Jz $ Jz_copy, concl))) end; - val rel_prems = map5 (mk_rel_prem false) phis dtors rels Jzs Jzs_copy; - val rel_strong_prems = map5 (mk_rel_prem true) phis dtors rels Jzs Jzs_copy; - + val rel_prems = map5 mk_rel_prem phis dtors rels Jzs Jzs_copy; val dtor_coinduct_goal = fold_rev Logic.all frees (Logic.list_implies (rel_prems, concl)); - val coinduct_params = rev (Term.add_tfrees dtor_coinduct_goal []); val dtor_coinduct = Goal.prove_sorry lthy [] [] dtor_coinduct_goal (K (mk_dtor_coinduct_tac m raw_coind_thm bis_rel_thm rel_congs)) |> Thm.close_derivation; - fun mk_prem strong_eq phi dtor map_nth sets Jz Jz_copy FJz = + fun mk_prem phi dtor map_nth sets Jz Jz_copy FJz = let val xs = [Jz, Jz_copy]; @@ -2071,7 +2058,7 @@ fun mk_set_conjunct set phi z1 z2 = list_all_free [z1, z2] (HOLogic.mk_imp (HOLogic.mk_mem (HOLogic.mk_prod (z1, z2), set $ FJz), - mk_phi strong_eq phi z1 z2 $ z1 $ z2)); + phi $ z1 $ z2)); val concl = list_exists_free [FJz] (HOLogic.mk_conj (Library.foldr1 HOLogic.mk_conj (map2 mk_map_conjunct [fstsTs, sndsTs] xs), @@ -2082,36 +2069,15 @@ (HOLogic.mk_Trueprop (Term.list_comb (phi, xs)), HOLogic.mk_Trueprop concl)) end; - fun mk_prems strong_eq = - map7 (mk_prem strong_eq) phis dtors map_FT_nths prodFT_setss Jzs Jzs_copy FJzs; - - val prems = mk_prems false; - val strong_prems = mk_prems true; + val prems = map7 mk_prem phis dtors map_FT_nths prodFT_setss Jzs Jzs_copy FJzs; val dtor_map_coinduct_goal = fold_rev Logic.all frees (Logic.list_implies (prems, concl)); val dtor_map_coinduct = Goal.prove_sorry lthy [] [] dtor_map_coinduct_goal (K (mk_dtor_map_coinduct_tac m ks raw_coind_thm bis_def)) |> Thm.close_derivation; - - val cTs = map (SOME o certifyT lthy o TFree) coinduct_params; - val cts = map3 (SOME o certify lthy ooo mk_phi true) phis Jzs1 Jzs2; - - val dtor_strong_coinduct = singleton (Proof_Context.export names_lthy lthy) - (Goal.prove_sorry lthy [] [] - (fold_rev Logic.all zs (Logic.list_implies (rel_strong_prems, concl))) - (K (mk_dtor_strong_coinduct_tac lthy m cTs cts dtor_coinduct rel_monos rel_eqs))) - |> Thm.close_derivation; - - val dtor_map_strong_coinduct = singleton (Proof_Context.export names_lthy lthy) - (Goal.prove_sorry lthy [] [] - (fold_rev Logic.all zs (Logic.list_implies (strong_prems, concl))) - (K (mk_dtor_map_strong_coinduct_tac ks cTs cts dtor_map_coinduct bis_def - (tcoalg_thm RS bis_Id_on_thm)))) - |> Thm.close_derivation; in - (dtor_map_coinduct, rev (Term.add_tfrees dtor_map_coinduct_goal []), - dtor_coinduct, dtor_map_strong_coinduct, dtor_strong_coinduct) + (dtor_map_coinduct, rev (Term.add_tfrees dtor_map_coinduct_goal []), dtor_coinduct) end; val timer = time (timer "coinduction"); @@ -2902,8 +2868,6 @@ val common_notes = [(dtor_coinductN, [dtor_coinduct_thm]), (dtor_map_coinductN, [dtor_map_coinduct_thm]), - (dtor_map_strong_coinductN, [dtor_map_strong_coinduct_thm]), - (dtor_strong_coinductN, [dtor_strong_coinduct_thm]), (rel_coinductN, [Jrel_coinduct_thm]), (dtor_unfold_transferN, dtor_unfold_transfer_thms)] |> map (fn (thmN, thms) => @@ -2933,7 +2897,7 @@ timer; ({Ts = Ts, bnfs = Jbnfs, ctors = ctors, dtors = dtors, xtor_co_iterss = transpose [unfolds, corecs], - xtor_co_inducts = [dtor_coinduct_thm, dtor_strong_coinduct_thm], dtor_ctors = dtor_ctor_thms, + xtor_co_induct = dtor_coinduct_thm, dtor_ctors = dtor_ctor_thms, ctor_dtors = ctor_dtor_thms, ctor_injects = ctor_inject_thms, xtor_map_thms = folded_dtor_map_thms, xtor_set_thmss = folded_dtor_set_thmss', xtor_rel_thms = dtor_Jrel_thms, diff -r 186535065f5c -r ae863ed9f64f src/HOL/BNF/Tools/bnf_gfp_tactics.ML --- a/src/HOL/BNF/Tools/bnf_gfp_tactics.ML Tue Aug 20 11:39:53 2013 +0200 +++ b/src/HOL/BNF/Tools/bnf_gfp_tactics.ML Tue Aug 20 22:24:02 2013 +0200 @@ -39,12 +39,8 @@ val mk_corec_unique_mor_tac: thm list -> thm list -> thm -> {prems: 'a, context: Proof.context} -> tactic val mk_dtor_map_coinduct_tac: int -> int list -> thm -> thm -> tactic - val mk_dtor_map_strong_coinduct_tac: int list -> ctyp option list -> cterm option list -> thm -> - thm -> thm -> tactic val mk_dtor_set_tac: int -> thm -> thm -> thm list -> tactic val mk_dtor_coinduct_tac: int -> thm -> thm -> thm list -> tactic - val mk_dtor_strong_coinduct_tac: Proof.context -> int -> ctyp option list -> - cterm option list -> thm -> thm list -> thm list -> tactic val mk_dtor_rel_tac: Proof.context -> thm list -> int -> thm -> thm -> thm -> thm -> thm list -> thm -> thm -> thm list -> thm list -> thm list list -> tactic val mk_dtor_o_ctor_tac: thm -> thm -> thm -> thm -> thm list -> @@ -991,18 +987,6 @@ CONJ_WRAP' (K (EVERY' [rtac impI, rtac @{thm IdD}, etac set_mp, rtac CollectI, etac @{thm prod_caseI}])) rel_congs] 1; -fun mk_dtor_strong_coinduct_tac ctxt m cTs cts dtor_coinduct rel_monos rel_eqs = - EVERY' [rtac rev_mp, rtac (Drule.instantiate' cTs cts dtor_coinduct), - EVERY' (map2 (fn rel_mono => fn rel_eq => - EVERY' [REPEAT_DETERM o resolve_tac [allI, impI], REPEAT_DETERM o etac allE, - etac disjE, etac mp, atac, hyp_subst_tac ctxt, rtac (rel_mono RS @{thm predicate2D}), - REPEAT_DETERM_N m o rtac @{thm order_refl}, - REPEAT_DETERM_N (length rel_monos) o rtac @{thm eq_subset}, - rtac (rel_eq RS sym RS @{thm eq_refl} RS @{thm predicate2D}), rtac refl]) - rel_monos rel_eqs), - rtac impI, REPEAT_DETERM o etac conjE, - CONJ_WRAP' (K (rtac impI THEN' etac mp THEN' etac disjI1)) rel_eqs] 1; - fun mk_dtor_map_coinduct_tac m ks raw_coind bis_def = let val n = length ks; @@ -1024,20 +1008,6 @@ rtac CollectI, etac (refl RSN (2, @{thm subst_Pair}))]) ks] 1 end; -fun mk_dtor_map_strong_coinduct_tac ks cTs cts dtor_map_coinduct bis_def bis_Id_on = - EVERY' [rtac rev_mp, rtac (Drule.instantiate' cTs cts dtor_map_coinduct), - EVERY' (map (fn i => - EVERY' [etac disjE, REPEAT_DETERM o dtac @{thm meta_spec}, etac meta_mp, - atac, rtac rev_mp, rtac subst, rtac bis_def, rtac bis_Id_on, - rtac impI, dtac conjunct2, dtac (mk_conjunctN (length ks) i), REPEAT_DETERM o etac allE, - etac impE, etac @{thm Id_on_UNIV_I}, REPEAT_DETERM o eresolve_tac [bexE, conjE, CollectE], - rtac exI, rtac conjI, etac conjI, atac, - CONJ_WRAP' (K (EVERY' [REPEAT_DETERM o resolve_tac [allI, impI], - rtac disjI2, rtac @{thm Id_onD}, etac set_mp, atac])) ks]) - ks), - rtac impI, REPEAT_DETERM o etac conjE, - CONJ_WRAP' (K (rtac impI THEN' etac mp THEN' etac disjI1)) ks] 1; - fun mk_map_tac m n cT unfold map_comp' map_cong0 = EVERY' [rtac ext, rtac (o_apply RS trans RS sym), rtac (o_apply RS trans RS sym), rtac (unfold RS trans), rtac (Thm.permute_prems 0 1 (map_comp' RS box_equals)), rtac map_cong0, diff -r 186535065f5c -r ae863ed9f64f src/HOL/BNF/Tools/bnf_lfp.ML --- a/src/HOL/BNF/Tools/bnf_lfp.ML Tue Aug 20 11:39:53 2013 +0200 +++ b/src/HOL/BNF/Tools/bnf_lfp.ML Tue Aug 20 22:24:02 2013 +0200 @@ -1858,7 +1858,7 @@ in timer; ({Ts = Ts, bnfs = Ibnfs, ctors = ctors, dtors = dtors, xtor_co_iterss = transpose [folds, recs], - xtor_co_inducts = [ctor_induct_thm], dtor_ctors = dtor_ctor_thms, ctor_dtors = ctor_dtor_thms, + xtor_co_induct = ctor_induct_thm, dtor_ctors = dtor_ctor_thms, ctor_dtors = ctor_dtor_thms, ctor_injects = ctor_inject_thms, xtor_map_thms = folded_ctor_map_thms, xtor_set_thmss = folded_ctor_set_thmss', xtor_rel_thms = ctor_Irel_thms, xtor_co_iter_thmss = transpose [ctor_fold_thms, ctor_rec_thms], diff -r 186535065f5c -r ae863ed9f64f src/HOL/Tools/Nitpick/kodkod.ML --- a/src/HOL/Tools/Nitpick/kodkod.ML Tue Aug 20 11:39:53 2013 +0200 +++ b/src/HOL/Tools/Nitpick/kodkod.ML Tue Aug 20 22:24:02 2013 +0200 @@ -881,7 +881,7 @@ val scan_nat = Scan.repeat1 (Scan.one Symbol.is_ascii_digit) - >> (the o Int.fromString o space_implode "") + >> (the o Int.fromString o implode) val scan_rel_name = ($$ "s" |-- scan_nat >> pair 1 || $$ "r" |-- scan_nat >> pair 2 diff -r 186535065f5c -r ae863ed9f64f src/HOL/Tools/Sledgehammer/MaSh/src/dictionaries.py --- a/src/HOL/Tools/Sledgehammer/MaSh/src/dictionaries.py Tue Aug 20 11:39:53 2013 +0200 +++ b/src/HOL/Tools/Sledgehammer/MaSh/src/dictionaries.py Tue Aug 20 22:24:02 2013 +0200 @@ -179,6 +179,13 @@ assert line.startswith('? ') line = line[2:] name = None + numberOfPredictions = None + + # Check whether there is a problem name: + tmp = line.split('#') + if len(tmp) == 2: + numberOfPredictions = int(tmp[0].strip()) + line = tmp[1] # Check whether there is a problem name: tmp = line.split(':') @@ -206,7 +213,7 @@ else: hints = [] - return name,features,accessibles,hints + return name,features,accessibles,hints,numberOfPredictions def save(self,fileName): if self.changed: diff -r 186535065f5c -r ae863ed9f64f src/HOL/Tools/Sledgehammer/MaSh/src/fullNaiveBayes.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/HOL/Tools/Sledgehammer/MaSh/src/fullNaiveBayes.py Tue Aug 20 22:24:02 2013 +0200 @@ -0,0 +1,203 @@ +''' +Created on Jul 11, 2012 + +@author: Daniel Kuehlwein +''' + +from cPickle import dump,load +from numpy import array,exp +from math import log + +class NBClassifier(object): + ''' + An updateable naive Bayes classifier. + ''' + + def __init__(self): + ''' + Constructor + ''' + self.counts = {} + self.negCounts = {} + + def initializeModel(self,trainData,dicts): + """ + Build basic model from training data. + """ + for d in trainData: + self.counts[d] = [0,{}] + self.negCounts[d] = [0,{}] + dAccUnExp = dicts.accessibleDict[d] + if dicts.expandedAccessibles.has_key(d): + dAcc = dicts.expandedAccessibles(d) + else: + if len(dicts.expandedAccessibles.keys()) >= 100: + dicts.expandedAccessibles = {} + dAcc = dicts.expand_accessibles(dAccUnExp) + dicts.expandedAccessibles[d] = dAcc + dDeps = set(dicts.dependenciesDict[d]) + dFeatures = dicts.featureDict[d] + # d proves d + self.counts[d][0] += 1 + for f in dFeatures: + if self.counts[d][1].has_key(f): + self.counts[d][1][f] += 1 + else: + self.counts[d][1][f] = 1 + for acc in dAcc: + if not self.counts.has_key(acc): + self.counts[acc] = [0,{}] + if not self.negCounts.has_key(acc): + self.negCounts[acc] = [0,{}] + if acc in dDeps: + self.counts[acc][0] += 1 + for f in dFeatures: + if self.counts[acc][1].has_key(f): + self.counts[acc][1][f] += 1 + else: + self.counts[acc][1][f] = 1 + else: + self.negCounts[acc][0] += 1 + for f in dFeatures: + if self.negCounts[acc][1].has_key(f): + self.negCounts[acc][1][f] += 1 + else: + self.negCounts[acc][1][f] = 1 + + def update(self,dataPoint,features,dependencies,dicts): + """ + Updates the Model. + """ + if not self.counts.has_key(dataPoint): + self.counts[dataPoint] = [0,{}] + if not self.negCounts.has_key(dataPoint): + self.negCounts[dataPoint] = [0,{}] + if dicts.expandedAccessibles.has_key(dataPoint): + dAcc = dicts.expandedAccessibles(dataPoint) + else: + if len(dicts.expandedAccessibles.keys()) >= 100: + dicts.expandedAccessibles = {} + dAccUnExp = dicts.accessibleDict[dataPoint] + dAcc = dicts.expand_accessibles(dAccUnExp) + dicts.expandedAccessibles[dataPoint] = dAcc + dDeps = set(dicts.dependenciesDict[dataPoint]) + dFeatures = dicts.featureDict[dataPoint] + # d proves d + self.counts[dataPoint][0] += 1 + for f in dFeatures: + if self.counts[dataPoint][1].has_key(f): + self.counts[dataPoint][1][f] += 1 + else: + self.counts[dataPoint][1][f] = 1 + + for acc in dAcc: + if acc in dDeps: + self.counts[acc][0] += 1 + for f in dFeatures: + if self.counts[acc][1].has_key(f): + self.counts[acc][1][f] += 1 + else: + self.counts[acc][1][f] = 1 + else: + self.negCounts[acc][0] += 1 + for f in dFeatures: + if self.negCounts[acc][1].has_key(f): + self.negCounts[acc][1][f] += 1 + else: + self.negCounts[acc][1][f] = 1 + + def delete(self,dataPoint,features,dependencies): + """ + Deletes a single datapoint from the model. + """ + for dep in dependencies: + self.counts[dep][0] -= 1 + for f in features: + self.counts[dep][1][f] -= 1 + + + def overwrite(self,problemId,newDependencies,dicts): + """ + Deletes the old dependencies of problemId and replaces them with the new ones. Updates the model accordingly. + """ + assert self.counts.has_key(problemId) + oldDeps = dicts.dependenciesDict[problemId] + features = dicts.featureDict[problemId] + self.delete(problemId,features,oldDeps) + self.update(problemId,features,newDependencies) + + def predict(self,features,accessibles): + """ + For each accessible, predicts the probability of it being useful given the features. + Returns a ranking of the accessibles. + """ + predictions = [] + for a in accessibles: + posA = self.counts[a][0] + negA = self.negCounts[a][0] + fPosA = set(self.counts[a][1].keys()) + fNegA = set(self.negCounts[a][1].keys()) + fPosWeightsA = self.counts[a][1] + fNegWeightsA = self.negCounts[a][1] + if negA == 0: + resultA = 0 + elif posA == 0: + print a + print 'xx' + import sys + sys.exit(-1) + else: + resultA = log(posA) - log(negA) + for f in features: + if f in fPosA: + # P(f | a) + if fPosWeightsA[f] == 0: + resultA -= 15 + else: + assert fPosWeightsA[f] <= posA + resultA += log(float(fPosWeightsA[f])/posA) + else: + resultA -= 15 + # P(f | not a) + if f in fNegA: + if fNegWeightsA[f] == 0: + resultA += 15 + else: + assert fNegWeightsA[f] <= negA + resultA -= log(float(fNegWeightsA[f])/negA) + else: + resultA += 15 + + predictions.append(resultA) + #expPredictions = array([exp(x) for x in predictions]) + predictions = array(predictions) + perm = (-predictions).argsort() + #return array(accessibles)[perm],expPredictions[perm] + return array(accessibles)[perm],predictions[perm] + + def save(self,fileName): + OStream = open(fileName, 'wb') + dump((self.counts,self.negCounts),OStream) + OStream.close() + + def load(self,fileName): + OStream = open(fileName, 'rb') + self.counts,self.negCounts = load(OStream) + OStream.close() + + +if __name__ == '__main__': + featureDict = {0:[0,1,2],1:[3,2,1]} + dependenciesDict = {0:[0],1:[0,1]} + libDicts = (featureDict,dependenciesDict,{}) + c = NBClassifier() + c.initializeModel([0,1],libDicts) + c.update(2,[14,1,3],[0,2]) + print c.counts + print c.predict([0,14],[0,1,2]) + c.storeModel('x') + d = NBClassifier() + d.loadModel('x') + print c.counts + print d.counts + print 'Done' \ No newline at end of file diff -r 186535065f5c -r ae863ed9f64f src/HOL/Tools/Sledgehammer/MaSh/src/mash.py --- a/src/HOL/Tools/Sledgehammer/MaSh/src/mash.py Tue Aug 20 11:39:53 2013 +0200 +++ b/src/HOL/Tools/Sledgehammer/MaSh/src/mash.py Tue Aug 20 22:24:02 2013 +0200 @@ -1,7 +1,7 @@ -#!/usr/bin/python -# Title: HOL/Tools/Sledgehammer/MaSh/src/mash.py +#!/usr/bin/env python +# Title: HOL/Tools/Sledgehammer/MaSh/src/mash # Author: Daniel Kuehlwein, ICIS, Radboud University Nijmegen -# Copyright 2012 +# Copyright 2012 - 2013 # # Entry point for MaSh (Machine Learning for Sledgehammer). @@ -15,69 +15,38 @@ @author: Daniel Kuehlwein ''' -import logging,datetime,string,os,sys -from argparse import ArgumentParser,RawDescriptionHelpFormatter -from time import time -from stats import Statistics +import socket,sys,time,logging + +from spawnDaemon import spawnDaemon + + +import logging,string,os,sys + + from theoryStats import TheoryStatistics from theoryModels import TheoryModels from dictionaries import Dictionaries -#from fullNaiveBayes import NBClassifier -from sparseNaiveBayes import sparseNBClassifier -from snow import SNoW from predefined import Predefined -# Set up command-line parser -parser = ArgumentParser(description='MaSh - Machine Learning for Sledgehammer. \n\n\ -MaSh allows to use different machine learning algorithms to predict relevant facts for Sledgehammer.\n\n\ ---------------- Example Usage ---------------\n\ -First initialize:\n./mash.py -l test.log -o ../tmp/ --init --inputDir ../data/Jinja/ \n\ -Then create predictions:\n./mash.py -i ../data/Jinja/mash_commands -p ../data/Jinja/mash_suggestions -l test.log -o ../tmp/ --statistics\n\ -\n\n\ -Author: Daniel Kuehlwein, July 2012',formatter_class=RawDescriptionHelpFormatter) -parser.add_argument('-i','--inputFile',help='File containing all problems to be solved.') -parser.add_argument('-o','--outputDir', default='../tmp/',help='Directory where all created files are stored. Default=../tmp/.') -parser.add_argument('-p','--predictions',default='../tmp/%s.predictions' % datetime.datetime.now(), - help='File where the predictions stored. Default=../tmp/dateTime.predictions.') -parser.add_argument('--numberOfPredictions',default=200,help="Number of premises to write in the output. Default=200.",type=int) - -parser.add_argument('--init',default=False,action='store_true',help="Initialize Mash. Requires --inputDir to be defined. Default=False.") -parser.add_argument('--inputDir',default='../data/20121212/Jinja/',\ - help='Directory containing all the input data. MaSh expects the following files: mash_features,mash_dependencies,mash_accessibility') -parser.add_argument('--depFile', default='mash_dependencies', - help='Name of the file with the premise dependencies. The file must be in inputDir. Default = mash_dependencies') -parser.add_argument('--saveModel',default=False,action='store_true',help="Stores the learned Model at the end of a prediction run. Default=False.") +from parameters import init_parser -parser.add_argument('--learnTheories',default=False,action='store_true',help="Uses a two-lvl prediction mode. First the theories, then the premises. Default=False.") -# Theory Parameters -parser.add_argument('--theoryDefValPos',default=-7.5,help="Default value for positive unknown features. Default=-7.5.",type=float) -parser.add_argument('--theoryDefValNeg',default=-10.0,help="Default value for negative unknown features. Default=-15.0.",type=float) -parser.add_argument('--theoryPosWeight',default=2.0,help="Weight value for positive features. Default=2.0.",type=float) - -parser.add_argument('--nb',default=False,action='store_true',help="Use Naive Bayes for learning. This is the default learning method.") -# NB Parameters -parser.add_argument('--NBDefaultPriorWeight',default=20.0,help="Initializes classifiers with value * p |- p. Default=20.0.",type=float) -parser.add_argument('--NBDefVal',default=-15.0,help="Default value for unknown features. Default=-15.0.",type=float) -parser.add_argument('--NBPosWeight',default=10.0,help="Weight value for positive features. Default=10.0.",type=float) -# TODO: Rename to sineFeatures -parser.add_argument('--sineFeatures',default=False,action='store_true',help="Uses a SInE like prior for premise lvl predictions. Default=False.") -parser.add_argument('--sineWeight',default=0.5,help="How much the SInE prior is weighted. Default=0.5.",type=float) - -parser.add_argument('--snow',default=False,action='store_true',help="Use SNoW's naive bayes instead of Naive Bayes for learning.") -parser.add_argument('--predef',help="Use predefined predictions. Used only for comparison with the actual learning. Argument is the filename of the predictions.") -parser.add_argument('--statistics',default=False,action='store_true',help="Create and show statistics for the top CUTOFF predictions.\ - WARNING: This will make the program a lot slower! Default=False.") -parser.add_argument('--saveStats',default=None,help="If defined, stores the statistics in the filename provided.") -parser.add_argument('--cutOff',default=500,help="Option for statistics. Only consider the first cutOff predictions. Default=500.",type=int) -parser.add_argument('-l','--log', default='../tmp/%s.log' % datetime.datetime.now(), help='Log file name. Default=../tmp/dateTime.log') -parser.add_argument('-q','--quiet',default=False,action='store_true',help="If enabled, only print warnings. Default=False.") -parser.add_argument('--modelFile', default='../tmp/model.pickle', help='Model file name. Default=../tmp/model.pickle') -parser.add_argument('--dictsFile', default='../tmp/dict.pickle', help='Dict file name. Default=../tmp/dict.pickle') -parser.add_argument('--theoryFile', default='../tmp/theory.pickle', help='Model file name. Default=../tmp/theory.pickle') +def communicate(data,host,port): + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + try: + sock.connect((host,port)) + sock.sendall(data) + received = sock.recv(262144) + except: + logger = logging.getLogger('communicate') + logger.warning('Communication with server failed.') + received = -1 + finally: + sock.close() + return received def mash(argv = sys.argv[1:]): # Initializing command-line arguments - args = parser.parse_args(argv) + args = init_parser(argv) # Set up logging logging.basicConfig(level=logging.DEBUG, @@ -85,18 +54,8 @@ datefmt='%d-%m %H:%M:%S', filename=args.log, filemode='w') - logger = logging.getLogger('main.py') - - #""" - # remove old handler for tester - #logger.root.handlers[0].stream.close() - logger.root.removeHandler(logger.root.handlers[0]) - file_handler = logging.FileHandler(args.log) - file_handler.setLevel(logging.DEBUG) - formatter = logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s') - file_handler.setFormatter(formatter) - logger.root.addHandler(file_handler) - #""" + logger = logging.getLogger('mash') + if args.quiet: logger.setLevel(logging.WARNING) #console.setLevel(logging.WARNING) @@ -110,257 +69,44 @@ if not os.path.exists(args.outputDir): os.makedirs(args.outputDir) - logger.info('Using the following settings: %s',args) - # Pick algorithm - if args.nb: - logger.info('Using sparse Naive Bayes for learning.') - model = sparseNBClassifier(args.NBDefaultPriorWeight,args.NBPosWeight,args.NBDefVal) - elif args.snow: - logger.info('Using naive bayes (SNoW) for learning.') - model = SNoW() - elif args.predef: - logger.info('Using predefined predictions.') - model = Predefined(args.predef) - else: - logger.info('No algorithm specified. Using sparse Naive Bayes.') - model = sparseNBClassifier(args.NBDefaultPriorWeight,args.NBPosWeight,args.NBDefVal) - - # Initializing model - if args.init: - logger.info('Initializing Model.') - startTime = time() - - # Load all data - dicts = Dictionaries() - dicts.init_all(args) - - # Create Model - trainData = dicts.featureDict.keys() - model.initializeModel(trainData,dicts) - - if args.learnTheories: - depFile = os.path.join(args.inputDir,args.depFile) - theoryModels = TheoryModels(args.theoryDefValPos,args.theoryDefValNeg,args.theoryPosWeight) - theoryModels.init(depFile,dicts) - theoryModels.save(args.theoryFile) - - model.save(args.modelFile) - dicts.save(args.dictsFile) - - logger.info('All Done. %s seconds needed.',round(time()-startTime,2)) - return 0 - # Create predictions and/or update model - else: - lineCounter = 1 - statementCounter = 1 - computeStats = False - dicts = Dictionaries() - theoryModels = TheoryModels(args.theoryDefValPos,args.theoryDefValNeg,args.theoryPosWeight) - # Load Files - if os.path.isfile(args.dictsFile): - #logger.info('Loading Dictionaries') - #startTime = time() - dicts.load(args.dictsFile) - #logger.info('Done %s',time()-startTime) - if os.path.isfile(args.modelFile): - #logger.info('Loading Model') - #startTime = time() - model.load(args.modelFile) - #logger.info('Done %s',time()-startTime) - if os.path.isfile(args.theoryFile) and args.learnTheories: - #logger.info('Loading Theory Models') - #startTime = time() - theoryModels.load(args.theoryFile) - #logger.info('Done %s',time()-startTime) - logger.info('All loading completed') - - # IO Streams - OS = open(args.predictions,'w') - IS = open(args.inputFile,'r') - - # Statistics - if args.statistics: - stats = Statistics(args.cutOff) - if args.learnTheories: - theoryStats = TheoryStatistics() - - predictions = None - predictedTheories = None - #Reading Input File - for line in IS: -# try: - if True: - if line.startswith('!'): - problemId = dicts.parse_fact(line) - # Statistics - if args.statistics and computeStats: - computeStats = False - # Assume '!' comes after '?' - if args.predef: - predictions = model.predict(problemId) - if args.learnTheories: - tmp = [dicts.idNameDict[x] for x in dicts.dependenciesDict[problemId]] - usedTheories = set([x.split('.')[0] for x in tmp]) - theoryStats.update((dicts.idNameDict[problemId]).split('.')[0],predictedTheories,usedTheories,len(theoryModels.accessibleTheories)) - stats.update(predictions,dicts.dependenciesDict[problemId],statementCounter) - if not stats.badPreds == []: - bp = string.join([str(dicts.idNameDict[x]) for x in stats.badPreds], ',') - logger.debug('Bad predictions: %s',bp) + # If server is not running, start it. + try: + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.connect((args.host,args.port)) + sock.close() + except: + logger.info('Starting Server.') + spawnDaemon('server.py') + # TODO: Make this fault tolerant + time.sleep(0.5) + # Init server + data = "i "+";".join(argv) + received = communicate(data,args.host,args.port) + logger.info(received) + + if args.inputFile == None: + return + logger.debug('Using the following settings: %s',args) + # IO Streams + OS = open(args.predictions,'w') + IS = open(args.inputFile,'r') + count = 0 + for line in IS: + count += 1 + #if count == 127: + # break as + received = communicate(line,args.host,args.port) + if not received == '': + OS.write('%s\n' % received) + #logger.info(received) + OS.close() + IS.close() - statementCounter += 1 - # Update Dependencies, p proves p - dicts.dependenciesDict[problemId] = [problemId]+dicts.dependenciesDict[problemId] - if args.learnTheories: - theoryModels.update(problemId,dicts.featureDict[problemId],dicts.dependenciesDict[problemId],dicts) - if args.snow: - model.update(problemId,dicts.featureDict[problemId],dicts.dependenciesDict[problemId],dicts) - else: - model.update(problemId,dicts.featureDict[problemId],dicts.dependenciesDict[problemId]) - elif line.startswith('p'): - # Overwrite old proof. - problemId,newDependencies = dicts.parse_overwrite(line) - newDependencies = [problemId]+newDependencies - model.overwrite(problemId,newDependencies,dicts) - if args.learnTheories: - theoryModels.overwrite(problemId,newDependencies,dicts) - dicts.dependenciesDict[problemId] = newDependencies - elif line.startswith('?'): - startTime = time() - computeStats = True - if args.predef: - continue - name,features,accessibles,hints = dicts.parse_problem(line) - - # Create predictions - logger.info('Starting computation for problem on line %s',lineCounter) - # Update Models with hints - if not hints == []: - if args.learnTheories: - accessibleTheories = set([(dicts.idNameDict[x]).split('.')[0] for x in accessibles]) - theoryModels.update_with_acc('hints',features,hints,dicts,accessibleTheories) - if args.snow: - pass - else: - model.update('hints',features,hints) - - # Predict premises - if args.learnTheories: - predictedTheories,accessibles = theoryModels.predict(features,accessibles,dicts) - - # Add additional features on premise lvl if sine is enabled - if args.sineFeatures: - origFeatures = [f for f,_w in features] - secondaryFeatures = [] - for f in origFeatures: - if dicts.featureCountDict[f] == 1: - continue - triggeredFormulas = dicts.featureTriggeredFormulasDict[f] - for formula in triggeredFormulas: - tFeatures = dicts.triggerFeaturesDict[formula] - #tFeatures = [ff for ff,_fw in dicts.featureDict[formula]] - newFeatures = set(tFeatures).difference(secondaryFeatures+origFeatures) - for fNew in newFeatures: - secondaryFeatures.append((fNew,args.sineWeight)) - predictionsFeatures = features+secondaryFeatures - else: - predictionsFeatures = features - predictions,predictionValues = model.predict(predictionsFeatures,accessibles,dicts) - assert len(predictions) == len(predictionValues) - - # Delete hints - if not hints == []: - if args.learnTheories: - theoryModels.delete('hints',features,hints,dicts) - if args.snow: - pass - else: - model.delete('hints',features,hints) + # Statistics + if args.statistics: + received = communicate('avgStats',args.host,args.port) + logger.info(received) - logger.info('Done. %s seconds needed.',round(time()-startTime,2)) - # Output - predictionNames = [str(dicts.idNameDict[p]) for p in predictions[:args.numberOfPredictions]] - predictionValues = [str(x) for x in predictionValues[:args.numberOfPredictions]] - predictionsStringList = ['%s=%s' % (predictionNames[i],predictionValues[i]) for i in range(len(predictionNames))] - predictionsString = string.join(predictionsStringList,' ') - outString = '%s: %s' % (name,predictionsString) - OS.write('%s\n' % outString) - else: - logger.warning('Unspecified input format: \n%s',line) - sys.exit(-1) - lineCounter += 1 - """ - except: - logger.warning('An error occurred on line %s .',line) - lineCounter += 1 - continue - """ - OS.close() - IS.close() - # Statistics - if args.statistics: - if args.learnTheories: - theoryStats.printAvg() - stats.printAvg() - - # Save - if args.saveModel: - model.save(args.modelFile) - if args.learnTheories: - theoryModels.save(args.theoryFile) - dicts.save(args.dictsFile) - if not args.saveStats == None: - if args.learnTheories: - theoryStatsFile = os.path.join(args.outputDir,'theoryStats') - theoryStats.save(theoryStatsFile) - statsFile = os.path.join(args.outputDir,args.saveStats) - stats.save(statsFile) - return 0 - -if __name__ == '__main__': - # Example: - #List - # ISAR Theories - #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130110/List/','--learnTheories'] - #args = ['-i', '../data/20130110/List/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--learnTheories'] - # ISAR predef mesh - #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130110/List/','--predef','../data/20130110/List/mesh_suggestions'] - #args = ['-i', '../data/20130110/List/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','../data/20130110/List/mesh_suggestions','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats'] - - - # Auth - # ISAR Theories - #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121227b/Auth/','--learnTheories','--sineFeatures'] - #args = ['-i', '../data/20121227b/Auth/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--learnTheories'] - # ISAR predef mesh - #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121227b/Auth/','--predef','../data/20121227b/Auth/mesh_suggestions'] - #args = ['-i', '../data/20121227b/Auth/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','../data/20121227b/Auth/mesh_suggestions','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats'] - - - # Jinja - # ISAR Theories - #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130111/Jinja/','--learnTheories'] - #args = ['-i', '../data/20130111/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--cutOff','500','--learnTheories'] - # ISAR Theories SinePrior - #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130111/Jinja/','--learnTheories','--sineFeatures'] - #args = ['-i', '../data/20130111/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--learnTheories','--sineFeatures'] - # ISAR NB - #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130111/Jinja/'] - #args = ['-i', '../data/20130111/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500'] - # ISAR predef mesh - #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130111/Jinja/','--predef','../data/20130111/Jinja/mesh_suggestions'] - #args = ['-i', '../data/20130111/Jinja/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','../data/20130111/Jinja/mesh_suggestions','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats'] - # ISAR NB ATP - #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121212/Jinja/','--depFile','mash_atp_dependencies'] - #args = ['-i', '../data/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--depFile','mash_atp_dependencies'] - #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/Jinja/','--predef','--depFile','mash_atp_dependencies'] - #args = ['-i', '../data/Jinja/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats','--depFile','mash_atp_dependencies'] - #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/Jinja/','--depFile','mash_atp_dependencies','--snow'] - #args = ['-i', '../data/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--snow','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--depFile','mash_atp_dependencies'] - # ISAR Snow - #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121212/Jinja/','--snow'] - #args = ['-i', '../data/20121212/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--snow','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500'] - - - #sys.exit(mash(args)) +if __name__ == "__main__": sys.exit(mash()) diff -r 186535065f5c -r ae863ed9f64f src/HOL/Tools/Sledgehammer/MaSh/src/mashOld.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/HOL/Tools/Sledgehammer/MaSh/src/mashOld.py Tue Aug 20 22:24:02 2013 +0200 @@ -0,0 +1,329 @@ +#!/usr/bin/python +# Title: HOL/Tools/Sledgehammer/MaSh/src/mash.py +# Author: Daniel Kuehlwein, ICIS, Radboud University Nijmegen +# Copyright 2012 +# +# Entry point for MaSh (Machine Learning for Sledgehammer). + +''' +MaSh - Machine Learning for Sledgehammer + +MaSh allows to use different machine learning algorithms to predict relevant fact for Sledgehammer. + +Created on July 12, 2012 + +@author: Daniel Kuehlwein +''' + +import logging,datetime,string,os,sys +from argparse import ArgumentParser,RawDescriptionHelpFormatter +from time import time +from stats import Statistics +from theoryStats import TheoryStatistics +from theoryModels import TheoryModels +from dictionaries import Dictionaries +#from fullNaiveBayes import NBClassifier +from sparseNaiveBayes import sparseNBClassifier +from snow import SNoW +from predefined import Predefined + +# Set up command-line parser +parser = ArgumentParser(description='MaSh - Machine Learning for Sledgehammer. \n\n\ +MaSh allows to use different machine learning algorithms to predict relevant facts for Sledgehammer.\n\n\ +--------------- Example Usage ---------------\n\ +First initialize:\n./mash.py -l test.log -o ../tmp/ --init --inputDir ../data/Jinja/ \n\ +Then create predictions:\n./mash.py -i ../data/Jinja/mash_commands -p ../data/Jinja/mash_suggestions -l test.log -o ../tmp/ --statistics\n\ +\n\n\ +Author: Daniel Kuehlwein, July 2012',formatter_class=RawDescriptionHelpFormatter) +parser.add_argument('-i','--inputFile',help='File containing all problems to be solved.') +parser.add_argument('-o','--outputDir', default='../tmp/',help='Directory where all created files are stored. Default=../tmp/.') +parser.add_argument('-p','--predictions',default='../tmp/%s.predictions' % datetime.datetime.now(), + help='File where the predictions stored. Default=../tmp/dateTime.predictions.') +parser.add_argument('--numberOfPredictions',default=200,help="Number of premises to write in the output. Default=200.",type=int) + +parser.add_argument('--init',default=False,action='store_true',help="Initialize Mash. Requires --inputDir to be defined. Default=False.") +parser.add_argument('--inputDir',default='../data/20121212/Jinja/',\ + help='Directory containing all the input data. MaSh expects the following files: mash_features,mash_dependencies,mash_accessibility') +parser.add_argument('--depFile', default='mash_dependencies', + help='Name of the file with the premise dependencies. The file must be in inputDir. Default = mash_dependencies') +parser.add_argument('--saveModel',default=False,action='store_true',help="Stores the learned Model at the end of a prediction run. Default=False.") + +parser.add_argument('--learnTheories',default=False,action='store_true',help="Uses a two-lvl prediction mode. First the theories, then the premises. Default=False.") +# Theory Parameters +parser.add_argument('--theoryDefValPos',default=-7.5,help="Default value for positive unknown features. Default=-7.5.",type=float) +parser.add_argument('--theoryDefValNeg',default=-10.0,help="Default value for negative unknown features. Default=-15.0.",type=float) +parser.add_argument('--theoryPosWeight',default=2.0,help="Weight value for positive features. Default=2.0.",type=float) + +parser.add_argument('--nb',default=False,action='store_true',help="Use Naive Bayes for learning. This is the default learning method.") +# NB Parameters +parser.add_argument('--NBDefaultPriorWeight',default=20.0,help="Initializes classifiers with value * p |- p. Default=20.0.",type=float) +parser.add_argument('--NBDefVal',default=-15.0,help="Default value for unknown features. Default=-15.0.",type=float) +parser.add_argument('--NBPosWeight',default=10.0,help="Weight value for positive features. Default=10.0.",type=float) +# TODO: Rename to sineFeatures +parser.add_argument('--sineFeatures',default=False,action='store_true',help="Uses a SInE like prior for premise lvl predictions. Default=False.") +parser.add_argument('--sineWeight',default=0.5,help="How much the SInE prior is weighted. Default=0.5.",type=float) + +parser.add_argument('--snow',default=False,action='store_true',help="Use SNoW's naive bayes instead of Naive Bayes for learning.") +parser.add_argument('--predef',help="Use predefined predictions. Used only for comparison with the actual learning. Argument is the filename of the predictions.") +parser.add_argument('--statistics',default=False,action='store_true',help="Create and show statistics for the top CUTOFF predictions.\ + WARNING: This will make the program a lot slower! Default=False.") +parser.add_argument('--saveStats',default=None,help="If defined, stores the statistics in the filename provided.") +parser.add_argument('--cutOff',default=500,help="Option for statistics. Only consider the first cutOff predictions. Default=500.",type=int) +parser.add_argument('-l','--log', default='../tmp/%s.log' % datetime.datetime.now(), help='Log file name. Default=../tmp/dateTime.log') +parser.add_argument('-q','--quiet',default=False,action='store_true',help="If enabled, only print warnings. Default=False.") +parser.add_argument('--modelFile', default='../tmp/model.pickle', help='Model file name. Default=../tmp/model.pickle') +parser.add_argument('--dictsFile', default='../tmp/dict.pickle', help='Dict file name. Default=../tmp/dict.pickle') +parser.add_argument('--theoryFile', default='../tmp/theory.pickle', help='Model file name. Default=../tmp/theory.pickle') + +def mash(argv = sys.argv[1:]): + # Initializing command-line arguments + args = parser.parse_args(argv) + + # Set up logging + logging.basicConfig(level=logging.DEBUG, + format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', + datefmt='%d-%m %H:%M:%S', + filename=args.log, + filemode='w') + logger = logging.getLogger('main.py') + + """ + # remove old handler for tester + # TODO: Comment out for Jasmins version. This crashes python 2.6.1 + logger.root.handlers[0].stream.close() + logger.root.removeHandler(logger.root.handlers[0]) + file_handler = logging.FileHandler(args.log) + file_handler.setLevel(logging.DEBUG) + formatter = logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s') + file_handler.setFormatter(formatter) + logger.root.addHandler(file_handler) + #""" + if args.quiet: + logger.setLevel(logging.WARNING) + #console.setLevel(logging.WARNING) + else: + console = logging.StreamHandler(sys.stdout) + console.setLevel(logging.INFO) + formatter = logging.Formatter('# %(message)s') + console.setFormatter(formatter) + logging.getLogger('').addHandler(console) + + if not os.path.exists(args.outputDir): + os.makedirs(args.outputDir) + + logger.info('Using the following settings: %s',args) + # Pick algorithm + if args.nb: + logger.info('Using sparse Naive Bayes for learning.') + model = sparseNBClassifier(args.NBDefaultPriorWeight,args.NBPosWeight,args.NBDefVal) + elif args.snow: + logger.info('Using naive bayes (SNoW) for learning.') + model = SNoW() + elif args.predef: + logger.info('Using predefined predictions.') + model = Predefined(args.predef) + else: + logger.info('No algorithm specified. Using sparse Naive Bayes.') + model = sparseNBClassifier(args.NBDefaultPriorWeight,args.NBPosWeight,args.NBDefVal) + + # Initializing model + if args.init: + logger.info('Initializing Model.') + startTime = time() + + # Load all data + dicts = Dictionaries() + dicts.init_all(args) + + # Create Model + trainData = dicts.featureDict.keys() + model.initializeModel(trainData,dicts) + + if args.learnTheories: + depFile = os.path.join(args.inputDir,args.depFile) + theoryModels = TheoryModels(args.theoryDefValPos,args.theoryDefValNeg,args.theoryPosWeight) + theoryModels.init(depFile,dicts) + theoryModels.save(args.theoryFile) + + model.save(args.modelFile) + dicts.save(args.dictsFile) + + logger.info('All Done. %s seconds needed.',round(time()-startTime,2)) + return 0 + # Create predictions and/or update model + else: + lineCounter = 1 + statementCounter = 1 + computeStats = False + dicts = Dictionaries() + theoryModels = TheoryModels(args.theoryDefValPos,args.theoryDefValNeg,args.theoryPosWeight) + # Load Files + if os.path.isfile(args.dictsFile): + #logger.info('Loading Dictionaries') + #startTime = time() + dicts.load(args.dictsFile) + #logger.info('Done %s',time()-startTime) + if os.path.isfile(args.modelFile): + #logger.info('Loading Model') + #startTime = time() + model.load(args.modelFile) + #logger.info('Done %s',time()-startTime) + if os.path.isfile(args.theoryFile) and args.learnTheories: + #logger.info('Loading Theory Models') + #startTime = time() + theoryModels.load(args.theoryFile) + #logger.info('Done %s',time()-startTime) + logger.info('All loading completed') + + # IO Streams + OS = open(args.predictions,'w') + IS = open(args.inputFile,'r') + + # Statistics + if args.statistics: + stats = Statistics(args.cutOff) + if args.learnTheories: + theoryStats = TheoryStatistics() + + predictions = None + predictedTheories = None + #Reading Input File + for line in IS: +# try: + if True: + if line.startswith('!'): + problemId = dicts.parse_fact(line) + # Statistics + if args.statistics and computeStats: + computeStats = False + # Assume '!' comes after '?' + if args.predef: + predictions = model.predict(problemId) + if args.learnTheories: + tmp = [dicts.idNameDict[x] for x in dicts.dependenciesDict[problemId]] + usedTheories = set([x.split('.')[0] for x in tmp]) + theoryStats.update((dicts.idNameDict[problemId]).split('.')[0],predictedTheories,usedTheories,len(theoryModels.accessibleTheories)) + stats.update(predictions,dicts.dependenciesDict[problemId],statementCounter) + if not stats.badPreds == []: + bp = string.join([str(dicts.idNameDict[x]) for x in stats.badPreds], ',') + logger.debug('Bad predictions: %s',bp) + + statementCounter += 1 + # Update Dependencies, p proves p + dicts.dependenciesDict[problemId] = [problemId]+dicts.dependenciesDict[problemId] + if args.learnTheories: + theoryModels.update(problemId,dicts.featureDict[problemId],dicts.dependenciesDict[problemId],dicts) + if args.snow: + model.update(problemId,dicts.featureDict[problemId],dicts.dependenciesDict[problemId],dicts) + else: + model.update(problemId,dicts.featureDict[problemId],dicts.dependenciesDict[problemId]) + elif line.startswith('p'): + # Overwrite old proof. + problemId,newDependencies = dicts.parse_overwrite(line) + newDependencies = [problemId]+newDependencies + model.overwrite(problemId,newDependencies,dicts) + if args.learnTheories: + theoryModels.overwrite(problemId,newDependencies,dicts) + dicts.dependenciesDict[problemId] = newDependencies + elif line.startswith('?'): + startTime = time() + computeStats = True + if args.predef: + continue + name,features,accessibles,hints = dicts.parse_problem(line) + + # Create predictions + logger.info('Starting computation for problem on line %s',lineCounter) + # Update Models with hints + if not hints == []: + if args.learnTheories: + accessibleTheories = set([(dicts.idNameDict[x]).split('.')[0] for x in accessibles]) + theoryModels.update_with_acc('hints',features,hints,dicts,accessibleTheories) + if args.snow: + pass + else: + model.update('hints',features,hints) + + # Predict premises + if args.learnTheories: + predictedTheories,accessibles = theoryModels.predict(features,accessibles,dicts) + + # Add additional features on premise lvl if sine is enabled + if args.sineFeatures: + origFeatures = [f for f,_w in features] + secondaryFeatures = [] + for f in origFeatures: + if dicts.featureCountDict[f] == 1: + continue + triggeredFormulas = dicts.featureTriggeredFormulasDict[f] + for formula in triggeredFormulas: + tFeatures = dicts.triggerFeaturesDict[formula] + #tFeatures = [ff for ff,_fw in dicts.featureDict[formula]] + newFeatures = set(tFeatures).difference(secondaryFeatures+origFeatures) + for fNew in newFeatures: + secondaryFeatures.append((fNew,args.sineWeight)) + predictionsFeatures = features+secondaryFeatures + else: + predictionsFeatures = features + predictions,predictionValues = model.predict(predictionsFeatures,accessibles,dicts) + assert len(predictions) == len(predictionValues) + + # Delete hints + if not hints == []: + if args.learnTheories: + theoryModels.delete('hints',features,hints,dicts) + if args.snow: + pass + else: + model.delete('hints',features,hints) + + logger.info('Done. %s seconds needed.',round(time()-startTime,2)) + # Output + predictionNames = [str(dicts.idNameDict[p]) for p in predictions[:args.numberOfPredictions]] + predictionValues = [str(x) for x in predictionValues[:args.numberOfPredictions]] + predictionsStringList = ['%s=%s' % (predictionNames[i],predictionValues[i]) for i in range(len(predictionNames))] + predictionsString = string.join(predictionsStringList,' ') + outString = '%s: %s' % (name,predictionsString) + OS.write('%s\n' % outString) + else: + logger.warning('Unspecified input format: \n%s',line) + sys.exit(-1) + lineCounter += 1 + """ + except: + logger.warning('An error occurred on line %s .',line) + lineCounter += 1 + continue + """ + OS.close() + IS.close() + + # Statistics + if args.statistics: + if args.learnTheories: + theoryStats.printAvg() + stats.printAvg() + + # Save + if args.saveModel: + model.save(args.modelFile) + if args.learnTheories: + theoryModels.save(args.theoryFile) + dicts.save(args.dictsFile) + if not args.saveStats == None: + if args.learnTheories: + theoryStatsFile = os.path.join(args.outputDir,'theoryStats') + theoryStats.save(theoryStatsFile) + statsFile = os.path.join(args.outputDir,args.saveStats) + stats.save(statsFile) + return 0 + +if __name__ == '__main__': + # Cezary Auth + args = ['--statistics', '--init', '--inputDir', '../data/20130118/Jinja', '--log', '../tmp/auth.log', '--theoryFile', '../tmp/t0', '--modelFile', '../tmp/m0', '--dictsFile', '../tmp/d0','--NBDefaultPriorWeight', '20.0', '--NBDefVal', '-15.0', '--NBPosWeight', '10.0'] + mash(args) + args = ['-i', '../data/20130118/Jinja/mash_commands', '-p', '../tmp/auth.pred0', '--statistics', '--cutOff', '500', '--log', '../tmp/auth.log','--modelFile', '../tmp/m0', '--dictsFile', '../tmp/d0'] + mash(args) + + #sys.exit(mash(args)) + sys.exit(mash()) diff -r 186535065f5c -r ae863ed9f64f src/HOL/Tools/Sledgehammer/MaSh/src/mashTest.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/HOL/Tools/Sledgehammer/MaSh/src/mashTest.py Tue Aug 20 22:24:02 2013 +0200 @@ -0,0 +1,12 @@ +''' +Created on Aug 20, 2013 + +@author: daniel +''' +from mash import mash + +if __name__ == "__main__": + args = ['--statistics', '--init', '--inputDir', '../data/20130118/Jinja', '--log', '../tmp/auth.log', '--modelFile', '../tmp/m0', '--dictsFile', '../tmp/d0','--NBDefaultPriorWeight', '20.0', '--NBDefVal', '-15.0', '--NBPosWeight', '10.0'] + mash(args) + args = ['-i', '../data/20130118/Jinja/mash_commands', '-p', '../tmp/auth.pred0', '--statistics', '--cutOff', '500', '--log', '../tmp/auth.log','--modelFile', '../tmp/m0', '--dictsFile', '../tmp/d0'] + mash(args) \ No newline at end of file diff -r 186535065f5c -r ae863ed9f64f src/HOL/Tools/Sledgehammer/MaSh/src/parameters.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/HOL/Tools/Sledgehammer/MaSh/src/parameters.py Tue Aug 20 22:24:02 2013 +0200 @@ -0,0 +1,46 @@ +import datetime +from argparse import ArgumentParser,RawDescriptionHelpFormatter + +def init_parser(argv): + # Set up command-line parser + parser = ArgumentParser(description='MaSh - Machine Learning for Sledgehammer. \n\n\ + MaSh allows to use different machine learning algorithms to predict relevant facts for Sledgehammer.\n\n\ + --------------- Example Usage ---------------\n\ + First initialize:\n./mash.py -l test.log -o ../tmp/ --init --inputDir ../data/Jinja/ \n\ + Then create predictions:\n./mash.py -i ../data/Jinja/mash_commands -p ../data/Jinja/mash_suggestions -l test.log -o ../tmp/ --statistics\n\ + \n\n\ + Author: Daniel Kuehlwein, July 2012',formatter_class=RawDescriptionHelpFormatter) + parser.add_argument('-i','--inputFile',help='File containing all problems to be solved.') + parser.add_argument('-o','--outputDir', default='../tmp/',help='Directory where all created files are stored. Default=../tmp/.') + parser.add_argument('-p','--predictions',default='../tmp/%s.predictions' % datetime.datetime.now(), + help='File where the predictions stored. Default=../tmp/dateTime.predictions.') + parser.add_argument('--numberOfPredictions',default=500,help="Default number of premises to write in the output. Default=500.",type=int) + + parser.add_argument('--init',default=False,action='store_true',help="Initialize Mash. Requires --inputDir to be defined. Default=False.") + parser.add_argument('--inputDir',\ + help='Directory containing all the input data. MaSh expects the following files: mash_features,mash_dependencies,mash_accessibility') + parser.add_argument('--depFile', default='mash_dependencies', + help='Name of the file with the premise dependencies. The file must be in inputDir. Default = mash_dependencies') + + parser.add_argument('--algorithm',default='nb',action='store_true',help="Which learning algorithm is used. nb = Naive Bayes,predef=predefined. Default=nb.") + # NB Parameters + parser.add_argument('--NBDefaultPriorWeight',default=20.0,help="Initializes classifiers with value * p |- p. Default=20.0.",type=float) + parser.add_argument('--NBDefVal',default=-15.0,help="Default value for unknown features. Default=-15.0.",type=float) + parser.add_argument('--NBPosWeight',default=10.0,help="Weight value for positive features. Default=10.0.",type=float) + # TODO: Rename to sineFeatures + parser.add_argument('--sineFeatures',default=False,action='store_true',help="Uses a SInE like prior for premise lvl predictions. Default=False.") + parser.add_argument('--sineWeight',default=0.5,help="How much the SInE prior is weighted. Default=0.5.",type=float) + + parser.add_argument('--statistics',default=False,action='store_true',help="Create and show statistics for the top CUTOFF predictions.\ + WARNING: This will make the program a lot slower! Default=False.") + parser.add_argument('--saveStats',default=None,help="If defined, stores the statistics in the filename provided.") + parser.add_argument('--cutOff',default=500,help="Option for statistics. Only consider the first cutOff predictions. Default=500.",type=int) + parser.add_argument('-l','--log', default='../tmp/%s.log' % datetime.datetime.now(), help='Log file name. Default=../tmp/dateTime.log') + parser.add_argument('-q','--quiet',default=False,action='store_true',help="If enabled, only print warnings. Default=False.") + parser.add_argument('--modelFile', default='../tmp/model.pickle', help='Model file name. Default=../tmp/model.pickle') + parser.add_argument('--dictsFile', default='../tmp/dict.pickle', help='Dict file name. Default=../tmp/dict.pickle') + + parser.add_argument('--port', default='9255', help='Port of the Mash server. Default=9255',type=int) + parser.add_argument('--host', default='localhost', help='Host of the Mash server. Default=localhost') + args = parser.parse_args(argv) + return args diff -r 186535065f5c -r ae863ed9f64f src/HOL/Tools/Sledgehammer/MaSh/src/predefined.py --- a/src/HOL/Tools/Sledgehammer/MaSh/src/predefined.py Tue Aug 20 11:39:53 2013 +0200 +++ b/src/HOL/Tools/Sledgehammer/MaSh/src/predefined.py Tue Aug 20 22:24:02 2013 +0200 @@ -35,7 +35,11 @@ name = line[0].strip() predId = dicts.get_name_id(name) line = line[1].split() - preds = [dicts.get_name_id(x.strip())for x in line] + predsTmp = [] + for x in line: + x = x.split('=') + predsTmp.append(x[0]) + preds = [dicts.get_name_id(x.strip())for x in predsTmp] self.predictions[predId] = preds IS.close() diff -r 186535065f5c -r ae863ed9f64f src/HOL/Tools/Sledgehammer/MaSh/src/server.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/HOL/Tools/Sledgehammer/MaSh/src/server.py Tue Aug 20 22:24:02 2013 +0200 @@ -0,0 +1,170 @@ +#!/usr/bin/env python +# Title: HOL/Tools/Sledgehammer/MaSh/src/server.py +# Author: Daniel Kuehlwein, ICIS, Radboud University Nijmegen +# Copyright 2013 +# +# The MaSh Server. + +import SocketServer,os,string,logging +from multiprocessing import Manager +from time import time +from dictionaries import Dictionaries +from parameters import init_parser +from sparseNaiveBayes import sparseNBClassifier +from stats import Statistics + + +class ThreadingTCPServer(SocketServer.ThreadingTCPServer): + def __init__(self, *args, **kwargs): + SocketServer.ThreadingTCPServer.__init__(self,*args, **kwargs) + self.manager = Manager() + self.lock = Manager().Lock() + +class MaShHandler(SocketServer.BaseRequestHandler): + + def init(self,argv): + if argv == '': + self.server.args = init_parser([]) + else: + argv = argv.split(';') + self.server.args = init_parser(argv) + # Pick model + if self.server.args.algorithm == 'nb': + self.server.model = sparseNBClassifier(self.server.args.NBDefaultPriorWeight,self.server.args.NBPosWeight,self.server.args.NBDefVal) + else: # Default case + self.server.model = sparseNBClassifier(self.server.args.NBDefaultPriorWeight,self.server.args.NBPosWeight,self.server.args.NBDefVal) + # Load all data + # TODO: rewrite dicts for concurrency and without sine + self.server.dicts = Dictionaries() + if os.path.isfile(self.server.args.dictsFile): + self.server.dicts.load(self.server.args.dictsFile) + elif self.server.args.init: + self.server.dicts.init_all(self.server.args) + # Create Model + if os.path.isfile(self.server.args.modelFile): + self.server.model.load(self.server.args.modelFile) + elif self.server.args.init: + trainData = self.server.dicts.featureDict.keys() + self.server.model.initializeModel(trainData,self.server.dicts) + + if self.server.args.statistics: + self.server.stats = Statistics(self.server.args.cutOff) + self.server.statementCounter = 1 + self.server.computeStats = False + + # Set up logging + logging.basicConfig(level=logging.DEBUG, + format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', + datefmt='%d-%m %H:%M:%S', + filename=self.server.args.log+'server', + filemode='w') + self.server.logger = logging.getLogger('server') + self.server.logger.debug('Initialized in '+str(round(time()-self.startTime,2))+' seconds.') + self.request.sendall('Server initialized in '+str(round(time()-self.startTime,2))+' seconds.') + self.server.callCounter = 1 + + def update(self): + problemId = self.server.dicts.parse_fact(self.data) + # Statistics + if self.server.args.statistics and self.server.computeStats: + self.server.computeStats = False + # Assume '!' comes after '?' + if self.server.args.algorithm == 'predef': + self.server.predictions = self.server.model.predict(problemId) + self.server.stats.update(self.server.predictions,self.server.dicts.dependenciesDict[problemId],self.server.statementCounter) + if not self.server.stats.badPreds == []: + bp = string.join([str(self.server.dicts.idNameDict[x]) for x in self.server.stats.badPreds], ',') + self.server.logger.debug('Poor predictions: %s',bp) + self.server.statementCounter += 1 + + # Update Dependencies, p proves p + self.server.dicts.dependenciesDict[problemId] = [problemId]+self.server.dicts.dependenciesDict[problemId] + self.server.model.update(problemId,self.server.dicts.featureDict[problemId],self.server.dicts.dependenciesDict[problemId]) + + def overwrite(self): + # Overwrite old proof. + problemId,newDependencies = self.server.dicts.parse_overwrite(self.data) + newDependencies = [problemId]+newDependencies + self.server.model.overwrite(problemId,newDependencies,self.server.dicts) + self.server.dicts.dependenciesDict[problemId] = newDependencies + + def predict(self): + self.server.computeStats = True + if self.server.args.algorithm == 'predef': + return + name,features,accessibles,hints,numberOfPredictions = self.server.dicts.parse_problem(self.data) + if numberOfPredictions == None: + numberOfPredictions = self.server.args.numberOfPredictions + if not hints == []: + self.server.model.update('hints',features,hints) + + # Create predictions + self.server.logger.debug('Starting computation for line %s',self.server.callCounter) + predictionsFeatures = features + self.server.predictions,predictionValues = self.server.model.predict(predictionsFeatures,accessibles,self.server.dicts) + assert len(self.server.predictions) == len(predictionValues) + self.server.logger.debug('Time needed: '+str(round(time()-self.startTime,2))) + + # Output + predictionNames = [str(self.server.dicts.idNameDict[p]) for p in self.server.predictions[:numberOfPredictions]] + predictionValues = [str(x) for x in predictionValues[:self.server.args.numberOfPredictions]] + predictionsStringList = ['%s=%s' % (predictionNames[i],predictionValues[i]) for i in range(len(predictionNames))] + predictionsString = string.join(predictionsStringList,' ') + outString = '%s: %s' % (name,predictionsString) + self.request.sendall(outString) + + def shutdown(self): + self.request.sendall('Shutting down server.') + # Save Models + self.server.model.save(self.server.args.modelFile) + self.server.dicts.save(self.server.args.dictsFile) + if not self.server.args.saveStats == None: + statsFile = os.path.join(self.server.args.outputDir,self.server.args.saveStats) + self.server.stats.save(statsFile) + self.server.shutdown() + + def handle(self): + # self.request is the TCP socket connected to the client + self.data = self.request.recv(262144).strip() + #print "{} wrote:".format(self.client_address[0]) + #print self.data + self.startTime = time() + if self.data == 's': + self.shutdown() + elif self.data.startswith('i'): + self.init(self.data[2:]) + elif self.data.startswith('!'): + self.update() + elif self.data.startswith('p'): + self.overwrite() + elif self.data.startswith('?'): + self.predict() + elif self.data == '': + # Empty Socket + return + elif self.data == 'avgStats': + self.request.sendall(self.server.stats.printAvg()) + else: + self.request.sendall('Unspecified input format: \n%s',self.data) + self.server.callCounter += 1 + + #returnString = 'Time needed: '+str(round(time()-self.startTime,2)) + #print returnString + +if __name__ == "__main__": + HOST, PORT = "localhost", 9255 + #print 'Started Server' + # Create the server, binding to localhost on port 9999 + SocketServer.TCPServer.allow_reuse_address = True + server = ThreadingTCPServer((HOST, PORT), MaShHandler) + #server = SocketServer.TCPServer((HOST, PORT), MaShHandler) + + # Activate the server; this will keep running until you + # interrupt the program with Ctrl-C + server.serve_forever() + + + + + + \ No newline at end of file diff -r 186535065f5c -r ae863ed9f64f src/HOL/Tools/Sledgehammer/MaSh/src/snow.py --- a/src/HOL/Tools/Sledgehammer/MaSh/src/snow.py Tue Aug 20 11:39:53 2013 +0200 +++ b/src/HOL/Tools/Sledgehammer/MaSh/src/snow.py Tue Aug 20 22:24:02 2013 +0200 @@ -27,7 +27,7 @@ self.SNoWTrainFile = '../tmp/snow.train' self.SNoWTestFile = '../snow.test' self.SNoWNetFile = '../tmp/snow.net' - self.defMaxNameId = 20000 + self.defMaxNameId = 100000 def initializeModel(self,trainData,dicts): """ @@ -51,6 +51,7 @@ # Build Model self.logger.debug('Building Model START.') snowTrainCommand = '../bin/snow -train -M+ -I %s -F %s -g- -B :0-%s' % (self.SNoWTrainFile,self.SNoWNetFile,dicts.maxNameId-1) + #print snowTrainCommand #snowTrainCommand = '../bin/snow -train -M+ -I %s -F %s -g- -B :0-%s' % (self.SNoWTrainFile,self.SNoWNetFile,self.defMaxNameId-1) args = shlex.split(snowTrainCommand) p = subprocess.Popen(args,stdout=subprocess.PIPE,stderr=subprocess.STDOUT) diff -r 186535065f5c -r ae863ed9f64f src/HOL/Tools/Sledgehammer/MaSh/src/sparseNaiveBayes.py --- a/src/HOL/Tools/Sledgehammer/MaSh/src/sparseNaiveBayes.py Tue Aug 20 11:39:53 2013 +0200 +++ b/src/HOL/Tools/Sledgehammer/MaSh/src/sparseNaiveBayes.py Tue Aug 20 22:24:02 2013 +0200 @@ -95,6 +95,7 @@ For each accessible, predicts the probability of it being useful given the features. Returns a ranking of the accessibles. """ + tau = 0.01 # Jasmin, change value here predictions = [] for a in accessibles: posA = self.counts[a][0] @@ -112,6 +113,11 @@ resultA += w*log(float(self.posWeight*fWeightsA[f])/posA) else: resultA += w*self.defVal + if not tau == 0.0: + observedFeatures = [f for f,_w in features] + missingFeatures = list(fA.difference(observedFeatures)) + sumOfWeights = sum([log(float(fWeightsA[x])/posA) for x in missingFeatures]) + resultA -= tau * sumOfWeights predictions.append(resultA) predictions = array(predictions) perm = (-predictions).argsort() diff -r 186535065f5c -r ae863ed9f64f src/HOL/Tools/Sledgehammer/MaSh/src/spawnDaemon.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/HOL/Tools/Sledgehammer/MaSh/src/spawnDaemon.py Tue Aug 20 22:24:02 2013 +0200 @@ -0,0 +1,35 @@ +# http://stackoverflow.com/questions/972362/spawning-process-from-python/972383#972383 +import os + +def spawnDaemon(path_to_executable, *args): + """Spawn a completely detached subprocess (i.e., a daemon). + + E.g. for mark: + spawnDaemon("../bin/producenotify.py", "producenotify.py", "xx") + """ + # fork the first time (to make a non-session-leader child process) + try: + pid = os.fork() + except OSError, e: + raise RuntimeError("1st fork failed: %s [%d]" % (e.strerror, e.errno)) + if pid != 0: + # parent (calling) process is all done + return + + # detach from controlling terminal (to make child a session-leader) + os.setsid() + try: + pid = os.fork() + except OSError, e: + raise RuntimeError("2nd fork failed: %s [%d]" % (e.strerror, e.errno)) + raise Exception, "%s [%d]" % (e.strerror, e.errno) + if pid != 0: + # child process is all done + os._exit(0) + + # and finally let's execute the executable for the daemon! + try: + os.execv(path_to_executable, [path_to_executable]) + except Exception, e: + # oops, we're cut off from the world, let's just give up + os._exit(255) diff -r 186535065f5c -r ae863ed9f64f src/HOL/Tools/Sledgehammer/MaSh/src/stats.py --- a/src/HOL/Tools/Sledgehammer/MaSh/src/stats.py Tue Aug 20 11:39:53 2013 +0200 +++ b/src/HOL/Tools/Sledgehammer/MaSh/src/stats.py Tue Aug 20 22:24:02 2013 +0200 @@ -30,6 +30,7 @@ self.problems = 0.0 self.cutOff = cutOff self.recallData = [0]*cutOff + self.recall100Median = [] self.recall100Data = [0]*cutOff self.aucData = [] self.premiseOccurenceCounter = {} @@ -107,6 +108,7 @@ self.aucData.append(auc) self.avgAUC += auc self.avgRecall100 += recall100 + self.recall100Median.append(recall100) self.problems += 1 self.badPreds = badPreds self.avgAvailable += available @@ -116,8 +118,29 @@ def printAvg(self): self.logger.info('Average results:') - self.logger.info('avgAUC: %s \t avgDepNr: %s \t avgRecall100: %s \t cutOff:%s', \ - round(100*self.avgAUC/self.problems,2),round(self.avgDepNr/self.problems,2),round(self.avgRecall100/self.problems,2),self.cutOff) + #self.logger.info('avgAUC: %s \t avgDepNr: %s \t avgRecall100: %s \t cutOff:%s', \ + # round(100*self.avgAUC/self.problems,2),round(self.avgDepNr/self.problems,2),round(self.avgRecall100/self.problems,2),self.cutOff) + # HACK FOR PAPER + assert len(self.aucData) == len(self.recall100Median) + nrDataPoints = len(self.aucData) + if nrDataPoints % 2 == 1: + medianAUC = sorted(self.aucData)[nrDataPoints/2 + 1] + else: + medianAUC = float(sorted(self.aucData)[nrDataPoints/2] + sorted(self.aucData)[nrDataPoints/2 + 1])/2 + #nrDataPoints = len(self.recall100Median) + if nrDataPoints % 2 == 1: + medianrecall100 = sorted(self.recall100Median)[nrDataPoints/2 + 1] + else: + medianrecall100 = float(sorted(self.recall100Median)[nrDataPoints/2] + sorted(self.recall100Median)[nrDataPoints/2 + 1])/2 + + returnString = 'avgAUC: %s \t medianAUC: %s \t avgDepNr: %s \t avgRecall100: %s \t medianRecall100: %s \t cutOff:%s' %\ + (round(100*self.avgAUC/self.problems,2),round(100*medianAUC,2),round(self.avgDepNr/self.problems,2),round(self.avgRecall100/self.problems,2),round(medianrecall100,2),self.cutOff) + self.logger.info(returnString) + return returnString + + """ + self.logger.info('avgAUC: %s \t medianAUC: %s \t avgDepNr: %s \t avgRecall100: %s \t medianRecall100: %s \t cutOff:%s', \ + round(100*self.avgAUC/self.problems,2),round(100*medianAUC,2),round(self.avgDepNr/self.problems,2),round(self.avgRecall100/self.problems,2),round(medianrecall100,2),self.cutOff) #try: #if True: @@ -156,6 +179,7 @@ show() #except: # self.logger.warning('Matplotlib module missing. Skipping graphs.') + """ def save(self,fileName): oStream = open(fileName, 'wb') diff -r 186535065f5c -r ae863ed9f64f src/HOL/Tools/Sledgehammer/MaSh/src/tester.py --- a/src/HOL/Tools/Sledgehammer/MaSh/src/tester.py Tue Aug 20 11:39:53 2013 +0200 +++ b/src/HOL/Tools/Sledgehammer/MaSh/src/tester.py Tue Aug 20 22:24:02 2013 +0200 @@ -16,29 +16,39 @@ #print '%s says that %s%s = %s' % (current_process().name, func.__name__, args, result) outQueue.put(result) -def run_mash(runId,inputDir,logFile,predictionFile,\ +def run_mash(runId,inputDir,logFile,predictionFile,predef,\ learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\ NBDefaultPriorWeight,NBDefVal,NBPosWeight,\ - sineFeatures,sineWeight): + sineFeatures,sineWeight,quiet=True): # Init runId = str(runId) predictionFile = predictionFile + runId - args = ['--statistics','--init','--inputDir',inputDir,'-q','--log',logFile,'--theoryFile','../tmp/t'+runId,'--modelFile','../tmp/m'+runId,'--dictsFile','../tmp/d'+runId, + args = ['--statistics','--init','--inputDir',inputDir,'--log',logFile,'--theoryFile','../tmp/t'+runId,'--modelFile','../tmp/m'+runId,'--dictsFile','../tmp/d'+runId, '--theoryDefValPos',str(theoryDefValPos),'--theoryDefValNeg',str(theoryDefValNeg),'--theoryPosWeight',str(theoryPosWeight),\ '--NBDefaultPriorWeight',str(NBDefaultPriorWeight),'--NBDefVal',str(NBDefVal),'--NBPosWeight',str(NBPosWeight)] if learnTheories: - args = args + ['--learnTheories'] + args += ['--learnTheories'] if sineFeatures: args += ['--sineFeatures','--sineWeight',str(sineWeight)] + if not predef == '': + args += ['--predef',predef] + if quit: + args += ['-q'] + #print args mash(args) # Run - args = ['-q','-i',inputFile,'-p',predictionFile,'--statistics','--cutOff','500','--log',logFile,'--theoryFile','../tmp/t'+runId,'--modelFile','../tmp/m'+runId,'--dictsFile','../tmp/d'+runId,\ + args = ['-i',inputFile,'-p',predictionFile,'--statistics','--cutOff','1024','--log',logFile,'--theoryFile','../tmp/t'+runId,'--modelFile','../tmp/m'+runId,'--dictsFile','../tmp/d'+runId,\ '--theoryDefValPos',str(theoryDefValPos),'--theoryDefValNeg',str(theoryDefValNeg),'--theoryPosWeight',str(theoryPosWeight),\ '--NBDefaultPriorWeight',str(NBDefaultPriorWeight),'--NBDefVal',str(NBDefVal),'--NBPosWeight',str(NBPosWeight)] if learnTheories: - args = args + ['--learnTheories'] + args += ['--learnTheories'] if sineFeatures: args += ['--sineFeatures','--sineWeight',str(sineWeight)] + if not predef == '': + args += ['--predef',predef] + if quit: + args += ['-q'] + #print args mash(args) # Get Results @@ -46,29 +56,42 @@ lines = IS.readlines() tmpRes = lines[-1].split() avgAuc = tmpRes[5] - avgRecall100 = tmpRes[9] + medianAuc = tmpRes[7] + avgRecall100 = tmpRes[11] + medianRecall100 = tmpRes[13] tmpTheoryRes = lines[-3].split() - avgTheoryPrecision = tmpTheoryRes[5] - avgTheoryRecall100 = tmpTheoryRes[7] - avgTheoryRecall = tmpTheoryRes[9] - avgTheoryPredictedPercent = tmpTheoryRes[11] + if learnTheories: + avgTheoryPrecision = tmpTheoryRes[5] + avgTheoryRecall100 = tmpTheoryRes[7] + avgTheoryRecall = tmpTheoryRes[9] + avgTheoryPredictedPercent = tmpTheoryRes[11] + else: + avgTheoryPrecision = 'NA' + avgTheoryRecall100 = 'NA' + avgTheoryRecall = 'NA' + avgTheoryPredictedPercent = 'NA' IS.close() # Delete old models os.remove(logFile) os.remove(predictionFile) - os.remove('../tmp/t'+runId) + if learnTheories: + os.remove('../tmp/t'+runId) os.remove('../tmp/m'+runId) os.remove('../tmp/d'+runId) outFile = open('tester','a') #print 'avgAuc %s avgRecall100 %s avgTheoryPrecision %s avgTheoryRecall100 %s avgTheoryRecall %s avgTheoryPredictedPercent %s' - outFile.write('\t'.join([str(learnTheories),str(theoryDefValPos),str(theoryDefValNeg),str(theoryPosWeight),str(NBDefaultPriorWeight),str(NBDefVal),str(NBPosWeight),str(sineFeatures),str(sineWeight),str(avgAuc),str(avgRecall100),str(avgTheoryPrecision),str(avgTheoryRecall100),str(avgTheoryRecall),str(avgTheoryPredictedPercent)])+'\n') + outFile.write('\t'.join([str(learnTheories),str(theoryDefValPos),str(theoryDefValNeg),str(theoryPosWeight),\ + str(NBDefaultPriorWeight),str(NBDefVal),str(NBPosWeight),str(sineFeatures),str(sineWeight),\ + str(avgAuc),str(medianAuc),str(avgRecall100),str(medianRecall100),str(avgTheoryPrecision),\ + str(avgTheoryRecall100),str(avgTheoryRecall),str(avgTheoryPredictedPercent)])+'\n') outFile.close() print learnTheories,'\t',theoryDefValPos,'\t',theoryDefValNeg,'\t',theoryPosWeight,'\t',\ NBDefaultPriorWeight,'\t',NBDefVal,'\t',NBPosWeight,'\t',\ sineFeatures,'\t',sineWeight,'\t',\ - avgAuc,'\t',avgRecall100,'\t',avgTheoryPrecision,'\t',avgTheoryRecall100,'\t',avgTheoryRecall,'\t',avgTheoryPredictedPercent + avgAuc,'\t',medianAuc,'\t',avgRecall100,'\t',medianRecall100,'\t',\ + avgTheoryPrecision,'\t',avgTheoryRecall100,'\t',avgTheoryRecall,'\t',avgTheoryPredictedPercent return learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\ NBDefaultPriorWeight,NBDefVal,NBPosWeight,\ sineFeatures,sineWeight,\ @@ -93,8 +116,9 @@ #cores = 1 # Options depFile = 'mash_dependencies' + predef = '' outputDir = '../tmp/' - numberOfPredictions = 500 + numberOfPredictions = 1024 learnTheoriesRange = [True,False] theoryDefValPosRange = [-x for x in range(1,20)] @@ -107,6 +131,7 @@ sineFeaturesRange = [True,False] sineWeightRange = [0.1,0.25,0.5,0.75,1.0] + """ # Test 1 inputFile = '../data/20121227b/Auth/mash_commands' inputDir = '../data/20121227b/Auth/' @@ -179,4 +204,78 @@ NBDefaultPriorWeight,NBDefVal,NBPosWeight,sineFeatures,sineWeight,\ learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight) print 'bestAvgRecall100 %s bestNBDefaultPriorWeight %s bestNBDefVal %s bestNBPosWeight %s bestSineFeatures %s bestSineWeight %s',bestAvgRecall100,bestNBDefaultPriorWeight,bestNBDefVal,bestNBPosWeight,bestSineFeatures,bestSineWeight - \ No newline at end of file + + """ + # Test 2 + #inputDir = '../data/20130118/Jinja' + inputDir = '../data/notheory/Prob' + inputFile = inputDir+'/mash_commands' + #inputFile = inputDir+'/mash_prover_commands' + + #depFile = 'mash_prover_dependencies' + depFile = 'mash_dependencies' + outputDir = '../tmp/' + numberOfPredictions = 1024 + predictionFile = '../tmp/auth.pred' + logFile = '../tmp/auth.log' + learnTheories = False + theoryDefValPos = -7.5 + theoryDefValNeg = -10.0 + theoryPosWeight = 2.0 + NBDefaultPriorWeight = 20.0 + NBDefVal =- 15.0 + NBPosWeight = 10.0 + sineFeatures = False + sineWeight = 0.5 + quiet = False + print inputDir + + #predef = inputDir+'/mash_prover_suggestions' + predef = inputDir+'/mash_suggestions' + print 'Mash Isar' + run_mash(0,inputDir,logFile,predictionFile,predef,\ + learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\ + NBDefaultPriorWeight,NBDefVal,NBPosWeight,\ + sineFeatures,sineWeight,quiet=quiet) + + #""" + predef = inputDir+'/mesh_suggestions' + #predef = inputDir+'/mesh_prover_suggestions' + print 'Mesh Isar' + run_mash(0,inputDir,logFile,predictionFile,predef,\ + learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\ + NBDefaultPriorWeight,NBDefVal,NBPosWeight,\ + sineFeatures,sineWeight,quiet=quiet) + #""" + predef = inputDir+'/mepo_suggestions' + print 'Mepo Isar' + run_mash(0,inputDir,logFile,predictionFile,predef,\ + learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\ + NBDefaultPriorWeight,NBDefVal,NBPosWeight,\ + sineFeatures,sineWeight,quiet=quiet) + + """ + inputFile = inputDir+'/mash_prover_commands' + depFile = 'mash_prover_dependencies' + + predef = inputDir+'/mash_prover_suggestions' + print 'Mash Prover Isar' + run_mash(0,inputDir,logFile,predictionFile,predef,\ + learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\ + NBDefaultPriorWeight,NBDefVal,NBPosWeight,\ + sineFeatures,sineWeight,quiet=quiet) + + predef = inputDir+'/mesh_prover_suggestions' + print 'Mesh Prover Isar' + run_mash(0,inputDir,logFile,predictionFile,predef,\ + learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\ + NBDefaultPriorWeight,NBDefVal,NBPosWeight,\ + sineFeatures,sineWeight,quiet=quiet) + + predef = inputDir+'/mepo_suggestions' + print 'Mepo Prover Isar' + run_mash(0,inputDir,logFile,predictionFile,predef,\ + learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\ + NBDefaultPriorWeight,NBDefVal,NBPosWeight,\ + sineFeatures,sineWeight,quiet=quiet) + #""" \ No newline at end of file diff -r 186535065f5c -r ae863ed9f64f src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML --- a/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML Tue Aug 20 11:39:53 2013 +0200 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML Tue Aug 20 22:24:02 2013 +0200 @@ -43,8 +43,10 @@ val relearn : Proof.context -> bool -> (string * string list) list -> unit val query : - Proof.context -> bool -> bool -> int - -> string list * (string * real) list * string list -> string list + Proof.context -> bool -> int + -> (string * string list * (string * real) list * string list) list + * string list * string list * (string * real) list + -> string list end val mash_unlearn : Proof.context -> unit @@ -69,6 +71,8 @@ Proof.context -> params -> string -> int -> raw_fact list -> string Symtab.table * string Symtab.table -> thm -> bool * string list + val attach_parents_to_facts : + ('a * thm) list -> ('a * thm) list -> (string list * ('a * thm)) list val weight_mepo_facts : 'a list -> ('a * real) list val weight_mash_facts : 'a list -> ('a * real) list val find_mash_suggestions : @@ -80,8 +84,6 @@ val mash_learn_proof : Proof.context -> params -> string -> term -> ('a * thm) list -> thm list -> unit - val attach_parents_to_facts : - ('a * thm) list -> ('a * thm) list -> (string list * ('a * thm)) list val mash_learn : Proof.context -> params -> fact_override -> thm list -> bool -> unit val is_mash_enabled : unit -> bool @@ -140,11 +142,10 @@ fun write_file banner (xs, f) path = (case banner of SOME s => File.write path s | NONE => (); - xs |> chunk_list 500 - |> List.app (File.append path o space_implode "" o map f)) + xs |> chunk_list 500 |> List.app (File.append path o implode o map f)) handle IO.Io _ => () -fun run_mash_tool ctxt overlord save max_suggs write_cmds read_suggs = +fun run_mash_tool ctxt overlord write_cmds read_suggs = let val (temp_dir, serial) = if overlord then (getenv "ISABELLE_HOME_USER", "") @@ -156,17 +157,13 @@ val cmd_file = temp_dir ^ "/mash_commands" ^ serial val cmd_path = Path.explode cmd_file val model_dir = File.shell_path (mash_model_dir ()) - val core = - "--inputFile " ^ cmd_file ^ " --predictions " ^ sugg_file ^ - " --numberOfPredictions " ^ string_of_int max_suggs ^ - (* " --learnTheories" ^ *) (if save then " --saveModel" else "") + val core = "--inputFile " ^ cmd_file ^ " --predictions " ^ sugg_file val command = "cd \"$ISABELLE_SLEDGEHAMMER_MASH\"/src; " ^ "./mash.py --quiet" ^ " --outputDir " ^ model_dir ^ " --modelFile=" ^ model_dir ^ "/model.pickle" ^ " --dictsFile=" ^ model_dir ^ "/dict.pickle" ^ - " --theoryFile=" ^ model_dir ^ "/theory.pickle" ^ " --log " ^ log_file ^ " " ^ core ^ " >& " ^ err_file fun run_on () = @@ -226,15 +223,13 @@ fun str_of_relearn (name, deps) = "p " ^ encode_str name ^ ": " ^ encode_strs deps ^ "\n" -fun str_of_query learn_hints (parents, feats, hints) = - (if not learn_hints orelse null hints then "" - else str_of_learn (freshish_name (), parents, feats, hints)) ^ - "? " ^ encode_strs parents ^ "; " ^ encode_features feats ^ - (if learn_hints orelse null hints then "" else "; " ^ encode_strs hints) ^ - "\n" +fun str_of_query max_suggs (learns, hints, parents, feats) = + implode (map str_of_learn learns) ^ + "? " ^ string_of_int max_suggs ^ " # " ^ encode_strs parents ^ "; " ^ + encode_features feats ^ + (if null hints then "" else "; " ^ encode_strs hints) ^ "\n" -(* The weights currently returned by "mash.py" are too spaced out to make any - sense. *) +(* The suggested weights don't make much sense. *) fun extract_suggestion sugg = case space_explode "=" sugg of [name, _ (* weight *)] => @@ -264,18 +259,17 @@ | learn ctxt overlord learns = (trace_msg ctxt (fn () => "MaSh learn " ^ elide_string 1000 (space_implode " " (map #1 learns))); - run_mash_tool ctxt overlord true 0 (learns, str_of_learn) (K ())) + run_mash_tool ctxt overlord (learns, str_of_learn) (K ())) fun relearn _ _ [] = () | relearn ctxt overlord relearns = (trace_msg ctxt (fn () => "MaSh relearn " ^ elide_string 1000 (space_implode " " (map #1 relearns))); - run_mash_tool ctxt overlord true 0 (relearns, str_of_relearn) (K ())) + run_mash_tool ctxt overlord (relearns, str_of_relearn) (K ())) -fun query ctxt overlord learn_hints max_suggs (query as (_, feats, hints)) = +fun query ctxt overlord max_suggs (query as (learns, hints, _, feats)) = (trace_msg ctxt (fn () => "MaSh query " ^ encode_features feats); - run_mash_tool ctxt overlord (learn_hints andalso not (null hints)) - max_suggs ([query], str_of_query learn_hints) + run_mash_tool ctxt overlord ([query], str_of_query max_suggs) (fn suggs => case suggs () of [] => [] @@ -329,15 +323,18 @@ string_of_int (length (Graph.minimals G)) ^ " minimal, " ^ string_of_int (length (Graph.maximals G)) ^ " maximal" -type mash_state = {access_G : unit Graph.T, dirty : string list option} +type mash_state = + {access_G : unit Graph.T, + num_known_facts : int, + dirty : string list option} -val empty_state = {access_G = Graph.empty, dirty = SOME []} +val empty_state = {access_G = Graph.empty, num_known_facts = 0, dirty = SOME []} local -val version = "*** MaSh version 20130819a ***" +val version = "*** MaSh version 20130820 ***" -exception Too_New of unit +exception FILE_VERSION_TOO_NEW of unit fun extract_node line = case space_explode ":" line of @@ -365,24 +362,27 @@ | SOME (name, parents, kind) => update_access_graph_node (name, kind) #> fold (add_edge_to name) parents - val access_G = + val (access_G, num_known_facts) = case string_ord (version', version) of EQUAL => - try_graph ctxt "loading state" Graph.empty (fn () => - fold add_node node_lines Graph.empty) + (try_graph ctxt "loading state" Graph.empty (fn () => + fold add_node node_lines Graph.empty), + length node_lines) | LESS => - (MaSh.unlearn ctxt; Graph.empty) (* can't parse old file *) - | GREATER => raise Too_New () + (* can't parse old file *) + (MaSh.unlearn ctxt; (Graph.empty, 0)) + | GREATER => raise FILE_VERSION_TOO_NEW () in trace_msg ctxt (fn () => "Loaded fact graph (" ^ graph_info access_G ^ ")"); - {access_G = access_G, dirty = SOME []} + {access_G = access_G, num_known_facts = num_known_facts, + dirty = SOME []} end | _ => empty_state) end fun save _ (state as {dirty = SOME [], ...}) = state - | save ctxt {access_G, dirty} = + | save ctxt {access_G, num_known_facts, dirty} = let fun str_of_entry (name, parents, kind) = str_of_proof_kind kind ^ " " ^ encode_str name ^ ": " ^ @@ -402,7 +402,7 @@ SOME dirty => "; " ^ string_of_int (length dirty) ^ " dirty fact(s)" | _ => "") ^ ")"); - {access_G = access_G, dirty = SOME []} + {access_G = access_G, num_known_facts = num_known_facts, dirty = SOME []} end val global_state = @@ -412,7 +412,7 @@ fun map_state ctxt f = Synchronized.change global_state (load ctxt ##> (f #> save ctxt)) - handle Too_New () => () + handle FILE_VERSION_TOO_NEW () => () fun peek_state ctxt f = Synchronized.change_result global_state @@ -717,6 +717,9 @@ | NONE => false) | is_size_def _ _ = false +fun no_dependencies_for_status status = + status = Non_Rec_Def orelse status = Rec_Def + fun trim_dependencies deps = if length deps > max_dependencies then NONE else SOME deps @@ -784,159 +787,9 @@ (*** High-level communication with MaSh ***) -fun maximal_wrt_graph G keys = - let - val tab = Symtab.empty |> fold (fn name => Symtab.default (name, ())) keys - fun insert_new seen name = - not (Symtab.defined seen name) ? insert (op =) name - fun num_keys keys = Graph.Keys.fold (K (Integer.add 1)) keys 0 - fun find_maxes _ (maxs, []) = map snd maxs - | find_maxes seen (maxs, new :: news) = - find_maxes - (seen |> num_keys (Graph.imm_succs G new) > 1 - ? Symtab.default (new, ())) - (if Symtab.defined tab new then - let - val newp = Graph.all_preds G [new] - fun is_ancestor x yp = member (op =) yp x - val maxs = - maxs |> filter (fn (_, max) => not (is_ancestor max newp)) - in - if exists (is_ancestor new o fst) maxs then - (maxs, news) - else - ((newp, new) - :: filter_out (fn (_, max) => is_ancestor max newp) maxs, - news) - end - else - (maxs, Graph.Keys.fold (insert_new seen) - (Graph.imm_preds G new) news)) - in find_maxes Symtab.empty ([], Graph.maximals G) end - -fun maximal_wrt_access_graph access_G = - map (nickname_of_thm o snd) - #> maximal_wrt_graph access_G - -fun is_fact_in_graph access_G get_th fact = - can (Graph.get_node access_G) (nickname_of_thm (get_th fact)) - -(* FUDGE *) -fun weight_of_mepo_fact rank = - Math.pow (0.62, log2 (Real.fromInt (rank + 1))) - -fun weight_mepo_facts facts = - facts ~~ map weight_of_mepo_fact (0 upto length facts - 1) - -val weight_raw_mash_facts = weight_mepo_facts -val weight_mash_facts = weight_raw_mash_facts - -(* FUDGE *) -fun weight_of_proximity_fact rank = - Math.pow (1.3, 15.5 - 0.2 * Real.fromInt rank) + 15.0 - -fun weight_proximity_facts facts = - facts ~~ map weight_of_proximity_fact (0 upto length facts - 1) - -val max_proximity_facts = 100 - -fun find_mash_suggestions _ _ [] _ _ raw_unknown = ([], raw_unknown) - | find_mash_suggestions ctxt max_facts suggs facts chained raw_unknown = - let - val raw_mash = find_suggested_facts ctxt facts suggs - val unknown_chained = - inter (Thm.eq_thm_prop o pairself snd) chained raw_unknown - val proximity = - facts |> sort (crude_thm_ord o pairself snd o swap) - |> take max_proximity_facts - val mess = - [(0.90 (* FUDGE *), (map (rpair 1.0) unknown_chained, [])), - (0.08 (* FUDGE *), (weight_raw_mash_facts raw_mash, raw_unknown)), - (0.02 (* FUDGE *), (weight_proximity_facts proximity, []))] - val unknown = - raw_unknown - |> fold (subtract (Thm.eq_thm_prop o pairself snd)) - [unknown_chained, proximity] - in (mesh_facts (Thm.eq_thm_prop o pairself snd) max_facts mess, unknown) end - -fun mash_suggested_facts ctxt ({overlord, learn, ...} : params) prover max_facts - hyp_ts concl_t facts = - let - val thy = Proof_Context.theory_of ctxt - val chained = facts |> filter (fn ((_, (scope, _)), _) => scope = Chained) - val (access_G, suggs) = - peek_state ctxt (fn {access_G, ...} => - if Graph.is_empty access_G then - (access_G, []) - else - let - val parents = maximal_wrt_access_graph access_G facts - val feats = - features_of ctxt prover thy (Local, General) (concl_t :: hyp_ts) - val hints = - chained |> filter (is_fact_in_graph access_G snd) - |> map (nickname_of_thm o snd) - in - (access_G, MaSh.query ctxt overlord learn max_facts - (parents, feats, hints)) - end) - val unknown = facts |> filter_out (is_fact_in_graph access_G snd) - in - find_mash_suggestions ctxt max_facts suggs facts chained unknown - |> pairself (map fact_of_raw_fact) - end - -fun learn_wrt_access_graph ctxt (name, parents, feats, deps) (learns, graph) = - let - fun maybe_learn_from from (accum as (parents, graph)) = - try_graph ctxt "updating graph" accum (fn () => - (from :: parents, Graph.add_edge_acyclic (from, name) graph)) - val graph = graph |> Graph.default_node (name, Isar_Proof) - val (parents, graph) = ([], graph) |> fold maybe_learn_from parents - val (deps, _) = ([], graph) |> fold maybe_learn_from deps - in ((name, parents, feats, deps) :: learns, graph) end - -fun relearn_wrt_access_graph ctxt (name, deps) (relearns, graph) = - let - fun maybe_relearn_from from (accum as (parents, graph)) = - try_graph ctxt "updating graph" accum (fn () => - (from :: parents, Graph.add_edge_acyclic (from, name) graph)) - val graph = graph |> update_access_graph_node (name, Automatic_Proof) - val (deps, _) = ([], graph) |> fold maybe_relearn_from deps - in ((name, deps) :: relearns, graph) end - -fun flop_wrt_access_graph name = - update_access_graph_node (name, Isar_Proof_wegen_Prover_Flop) - -val learn_timeout_slack = 2.0 - -fun launch_thread timeout task = - let - val hard_timeout = time_mult learn_timeout_slack timeout - val birth_time = Time.now () - val death_time = Time.+ (birth_time, hard_timeout) - val desc = ("Machine learner for Sledgehammer", "") - in Async_Manager.thread MaShN birth_time death_time desc task end - -fun mash_learn_proof ctxt ({overlord, timeout, ...} : params) prover t facts - used_ths = - launch_thread (timeout |> the_default one_day) (fn () => - let - val thy = Proof_Context.theory_of ctxt - val name = freshish_name () - val feats = features_of ctxt prover thy (Local, General) [t] - in - peek_state ctxt (fn {access_G, ...} => - let - val parents = maximal_wrt_access_graph access_G facts - val deps = - used_ths |> filter (is_fact_in_graph access_G I) - |> map nickname_of_thm - in - MaSh.learn ctxt overlord [(name, parents, feats, deps)] - end); - (true, "") - end) +fun attach_crude_parents_to_facts _ [] = [] + | attach_crude_parents_to_facts parents ((fact as (_, th)) :: facts) = + (parents, fact) :: attach_crude_parents_to_facts [nickname_of_thm th] facts (* In the following functions, chunks are risers w.r.t. "thm_less_eq". *) @@ -988,6 +841,191 @@ |> drop (length old_facts) end +fun maximal_wrt_graph G keys = + let + val tab = Symtab.empty |> fold (fn name => Symtab.default (name, ())) keys + fun insert_new seen name = + not (Symtab.defined seen name) ? insert (op =) name + fun num_keys keys = Graph.Keys.fold (K (Integer.add 1)) keys 0 + fun find_maxes _ (maxs, []) = map snd maxs + | find_maxes seen (maxs, new :: news) = + find_maxes + (seen |> num_keys (Graph.imm_succs G new) > 1 + ? Symtab.default (new, ())) + (if Symtab.defined tab new then + let + val newp = Graph.all_preds G [new] + fun is_ancestor x yp = member (op =) yp x + val maxs = + maxs |> filter (fn (_, max) => not (is_ancestor max newp)) + in + if exists (is_ancestor new o fst) maxs then + (maxs, news) + else + ((newp, new) + :: filter_out (fn (_, max) => is_ancestor max newp) maxs, + news) + end + else + (maxs, Graph.Keys.fold (insert_new seen) + (Graph.imm_preds G new) news)) + in find_maxes Symtab.empty ([], Graph.maximals G) end + +fun maximal_wrt_access_graph access_G = + map (nickname_of_thm o snd) + #> maximal_wrt_graph access_G + +fun is_fact_in_graph access_G = can (Graph.get_node access_G) o nickname_of_thm + +(* FUDGE *) +fun weight_of_mepo_fact rank = + Math.pow (0.62, log2 (Real.fromInt (rank + 1))) + +fun weight_mepo_facts facts = + facts ~~ map weight_of_mepo_fact (0 upto length facts - 1) + +val weight_raw_mash_facts = weight_mepo_facts +val weight_mash_facts = weight_raw_mash_facts + +(* FUDGE *) +fun weight_of_proximity_fact rank = + Math.pow (1.3, 15.5 - 0.2 * Real.fromInt rank) + 15.0 + +fun weight_proximity_facts facts = + facts ~~ map weight_of_proximity_fact (0 upto length facts - 1) + +val max_proximity_facts = 100 + +fun find_mash_suggestions _ _ [] _ _ raw_unknown = ([], raw_unknown) + | find_mash_suggestions ctxt max_facts suggs facts chained raw_unknown = + let + val raw_mash = find_suggested_facts ctxt facts suggs + val unknown_chained = + inter (Thm.eq_thm_prop o pairself snd) chained raw_unknown + val proximity = + facts |> sort (crude_thm_ord o pairself snd o swap) + |> take max_proximity_facts + val mess = + [(0.90 (* FUDGE *), (map (rpair 1.0) unknown_chained, [])), + (0.08 (* FUDGE *), (weight_raw_mash_facts raw_mash, raw_unknown)), + (0.02 (* FUDGE *), (weight_proximity_facts proximity, []))] + val unknown = + raw_unknown + |> fold (subtract (Thm.eq_thm_prop o pairself snd)) + [unknown_chained, proximity] + in (mesh_facts (Thm.eq_thm_prop o pairself snd) max_facts mess, unknown) end + +val max_learn_on_query = 500 + +fun mash_suggested_facts ctxt ({overlord, learn, ...} : params) prover max_facts + hyp_ts concl_t facts = + let + val thy = Proof_Context.theory_of ctxt + val chained = facts |> filter (fn ((_, (scope, _)), _) => scope = Chained) + val (access_G, suggs) = + peek_state ctxt (fn {access_G, num_known_facts, ...} => + if Graph.is_empty access_G then + (access_G, []) + else + let + val parents = maximal_wrt_access_graph access_G facts + val feats = + features_of ctxt prover thy (Local, General) (concl_t :: hyp_ts) + val hints = + chained |> filter (is_fact_in_graph access_G o snd) + |> map (nickname_of_thm o snd) + val (learns, parents) = + if length facts - num_known_facts <= max_learn_on_query then + let + val name_tabs = build_name_tables nickname_of_thm facts + fun deps_of status th = + if no_dependencies_for_status status then + SOME [] + else + isar_dependencies_of name_tabs th + |> trim_dependencies + fun learn_new_fact (parents, + ((_, stature as (_, status)), th)) = + let + val name = nickname_of_thm th + val feats = + features_of ctxt prover (theory_of_thm th) stature + [prop_of th] + val deps = deps_of status th |> these + in (name, parents, feats, deps) end + val new_facts = + facts |> filter_out (is_fact_in_graph access_G o snd) + |> sort (crude_thm_ord o pairself snd) + |> attach_crude_parents_to_facts parents + val learns = new_facts |> map learn_new_fact + val parents = + if null new_facts then parents + else [#1 (List.last learns)] + in (learns, parents) end + else + ([], parents) + in + (access_G, MaSh.query ctxt overlord max_facts + (learns, hints, parents, feats)) + end) + val unknown = facts |> filter_out (is_fact_in_graph access_G o snd) + in + find_mash_suggestions ctxt max_facts suggs facts chained unknown + |> pairself (map fact_of_raw_fact) + end + +fun learn_wrt_access_graph ctxt (name, parents, feats, deps) (learns, graph) = + let + fun maybe_learn_from from (accum as (parents, graph)) = + try_graph ctxt "updating graph" accum (fn () => + (from :: parents, Graph.add_edge_acyclic (from, name) graph)) + val graph = graph |> Graph.default_node (name, Isar_Proof) + val (parents, graph) = ([], graph) |> fold maybe_learn_from parents + val (deps, _) = ([], graph) |> fold maybe_learn_from deps + in ((name, parents, feats, deps) :: learns, graph) end + +fun relearn_wrt_access_graph ctxt (name, deps) (relearns, graph) = + let + fun maybe_relearn_from from (accum as (parents, graph)) = + try_graph ctxt "updating graph" accum (fn () => + (from :: parents, Graph.add_edge_acyclic (from, name) graph)) + val graph = graph |> update_access_graph_node (name, Automatic_Proof) + val (deps, _) = ([], graph) |> fold maybe_relearn_from deps + in ((name, deps) :: relearns, graph) end + +fun flop_wrt_access_graph name = + update_access_graph_node (name, Isar_Proof_wegen_Prover_Flop) + +val learn_timeout_slack = 2.0 + +fun launch_thread timeout task = + let + val hard_timeout = time_mult learn_timeout_slack timeout + val birth_time = Time.now () + val death_time = Time.+ (birth_time, hard_timeout) + val desc = ("Machine learner for Sledgehammer", "") + in Async_Manager.thread MaShN birth_time death_time desc task end + +fun mash_learn_proof ctxt ({overlord, timeout, ...} : params) prover t facts + used_ths = + launch_thread (timeout |> the_default one_day) (fn () => + let + val thy = Proof_Context.theory_of ctxt + val name = freshish_name () + val feats = features_of ctxt prover thy (Local, General) [t] + in + peek_state ctxt (fn {access_G, ...} => + let + val parents = maximal_wrt_access_graph access_G facts + val deps = + used_ths |> filter (is_fact_in_graph access_G) + |> map nickname_of_thm + in + MaSh.learn ctxt overlord [(name, parents, feats, deps)] + end); + (true, "") + end) + fun sendback sub = Active.sendback_markup [Markup.padding_command] (sledgehammerN ^ " " ^ sub) @@ -1001,7 +1039,7 @@ fun next_commit_time () = Time.+ (Timer.checkRealTimer timer, commit_timeout) val {access_G, ...} = peek_state ctxt I - val is_in_access_G = is_fact_in_graph access_G snd + val is_in_access_G = is_fact_in_graph access_G o snd val no_new_facts = forall is_in_access_G facts in if no_new_facts andalso not run_prover then @@ -1019,7 +1057,7 @@ let val name_tabs = build_name_tables nickname_of_thm facts fun deps_of status th = - if status = Non_Rec_Def orelse status = Rec_Def then + if no_dependencies_for_status status then SOME [] else if run_prover then prover_dependencies_of ctxt params prover auto_level facts name_tabs @@ -1030,7 +1068,7 @@ isar_dependencies_of name_tabs th |> trim_dependencies fun do_commit [] [] [] state = state - | do_commit learns relearns flops {access_G, dirty} = + | do_commit learns relearns flops {access_G, num_known_facts, dirty} = let val was_empty = Graph.is_empty access_G val (learns, access_G) = @@ -1038,6 +1076,7 @@ val (relearns, access_G) = ([], access_G) |> fold (relearn_wrt_access_graph ctxt) relearns val access_G = access_G |> fold flop_wrt_access_graph flops + val num_known_facts = num_known_facts + length learns val dirty = case (was_empty, dirty, relearns) of (false, SOME names, []) => SOME (map #1 learns @ names) @@ -1045,7 +1084,8 @@ in MaSh.learn ctxt overlord (rev learns); MaSh.relearn ctxt overlord relearns; - {access_G = access_G, dirty = dirty} + {access_G = access_G, num_known_facts = num_known_facts, + dirty = dirty} end fun commit last learns relearns flops = (if debug andalso auto_level = 0 then