--- a/src/Doc/IsarImplementation/Tactic.thy Tue Aug 20 11:39:53 2013 +0200
+++ b/src/Doc/IsarImplementation/Tactic.thy Tue Aug 20 22:24:02 2013 +0200
@@ -336,13 +336,15 @@
\item @{ML match_tac}, @{ML ematch_tac}, @{ML dmatch_tac}, and @{ML
bimatch_tac} are similar to @{ML resolve_tac}, @{ML eresolve_tac},
@{ML dresolve_tac}, and @{ML biresolve_tac}, respectively, but do
- not instantiate schematic variables in the goal state.
+ not instantiate schematic variables in the goal state.%
+\footnote{Strictly speaking, matching means to treat the unknowns in the goal
+ state as constants, but these tactics merely discard unifiers that would
+ update the goal state. In rare situations (where the conclusion and
+ goal state have flexible terms at the same position), the tactic
+ will fail even though an acceptable unifier exists.}
+ These tactics were written for a specific application within the classical reasoner.
Flexible subgoals are not updated at will, but are left alone.
- Strictly speaking, matching means to treat the unknowns in the goal
- state as constants; these tactics merely discard unifiers that would
- update the goal state.
-
\end{description}
*}
--- a/src/Doc/Sledgehammer/document/root.tex Tue Aug 20 11:39:53 2013 +0200
+++ b/src/Doc/Sledgehammer/document/root.tex Tue Aug 20 22:24:02 2013 +0200
@@ -504,16 +504,6 @@
using \textit{type\_enc} (\S\ref{problem-encoding}), this is a bug, and you are
strongly encouraged to report this to the author at \authoremail.
-%\point{Why are the generated Isar proofs so ugly/broken?}
-%
-%The current implementation of the Isar proof feature,
-%enabled by the \textit{isar\_proofs} option (\S\ref{output-format}),
-%is experimental. Work on a new implementation has begun. There is a large body of
-%research into transforming resolution proofs into natural deduction proofs (such
-%as Isar proofs), which we hope to leverage. In the meantime, a workaround is to
-%set the \textit{isar\_compress} option (\S\ref{output-format}) to a larger
-%value or to try several provers and keep the nicest-looking proof.
-
\point{How can I tell whether a suggested proof is sound?}
Earlier versions of Sledgehammer often suggested unsound proofs---either proofs
@@ -633,8 +623,7 @@
Sledgehammer's philosophy should work out of the box, without user guidance.
Many of the options are meant to be used mostly by the Sledgehammer developers
-for experimentation purposes. Of course, feel free to experiment with them if
-you are so inclined.
+for experiments. Of course, feel free to try them out if you are so inclined.
\section{Command Syntax}
\label{command-syntax}
@@ -852,10 +841,10 @@
\item[\labelitemi] \textbf{\textit{alt\_ergo}:} Alt-Ergo is a polymorphic
ATP developed by Bobot et al.\ \cite{alt-ergo}.
It supports the TPTP polymorphic typed first-order format (TFF1) via Why3
-\cite{why3}. It is included for experimental purposes. To use Alt-Ergo, set the
-environment variable \texttt{WHY3\_HOME} to the directory that contains the
-\texttt{why3} executable. Sledgehammer has been tested with Alt-Ergo 0.95.1 and
-an unidentified development version of Why3.
+\cite{why3}. To use Alt-Ergo, set the environment variable \texttt{WHY3\_HOME}
+to the directory that contains the \texttt{why3} executable. Sledgehammer has
+been tested with Alt-Ergo 0.95.1 and an unidentified development version of
+Why3.
\item[\labelitemi] \textbf{\textit{cvc3}:} CVC3 is an SMT solver developed by
Clark Barrett, Cesare Tinelli, and their colleagues \cite{cvc3}. To use CVC3,
@@ -1071,10 +1060,10 @@
The traditional memoryless MePo relevance filter.
\item[\labelitemi] \textbf{\textit{mash}:}
-The memoryful MaSh machine learner. MaSh relies on the external Python program
-\texttt{mash.py}, which is part of Isabelle. To enable MaSh, set the environment
-variable \texttt{MASH} to \texttt{yes}. Persistent data is stored in the
-directory \texttt{\$ISABELLE\_HOME\_USER/mash}.
+The experimental, memoryful MaSh machine learner. MaSh relies on the external
+Python program \texttt{mash.py}, which is part of Isabelle. To enable MaSh, set
+the environment variable \texttt{MASH} to \texttt{yes}. Persistent data is
+stored in the directory \texttt{\$ISABELLE\_HOME\_USER/mash}.
\item[\labelitemi] \textbf{\textit{mesh}:} The MeSh filter, which combines the
rankings from MePo and MaSh.
--- a/src/HOL/BNF/BNF_FP_Basic.thy Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/BNF/BNF_FP_Basic.thy Tue Aug 20 22:24:02 2013 +0200
@@ -147,6 +147,12 @@
"(fun_rel R (fun_rel S T)) f g = (\<forall>x y. R x y \<longrightarrow> (fun_rel S T) (f x) (g y))"
unfolding fun_rel_def ..
+lemma subst_eq_imp: "(\<forall>a b. a = b \<longrightarrow> P a b) \<equiv> (\<forall>a. P a a)"
+ by auto
+
+lemma eq_subset: "op = \<le> (\<lambda>a b. P a b \<or> a = b)"
+ by auto
+
ML_file "Tools/bnf_fp_util.ML"
ML_file "Tools/bnf_fp_def_sugar_tactics.ML"
ML_file "Tools/bnf_fp_def_sugar.ML"
--- a/src/HOL/BNF/BNF_GFP.thy Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/BNF/BNF_GFP.thy Tue Aug 20 22:24:02 2013 +0200
@@ -70,9 +70,6 @@
lemma image2_eqI: "\<lbrakk>b = f x; c = g x; x \<in> A\<rbrakk> \<Longrightarrow> (b, c) \<in> image2 A f g"
unfolding image2_def by auto
-lemma eq_subset: "op = \<le> (\<lambda>a b. P a b \<or> a = b)"
-by auto
-
lemma IdD: "(a, b) \<in> Id \<Longrightarrow> a = b"
by auto
--- a/src/HOL/BNF/Examples/Process.thy Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/BNF/Examples/Process.thy Tue Aug 20 22:24:02 2013 +0200
@@ -8,7 +8,7 @@
header {* Processes *}
theory Process
-imports "../BNF"
+imports Stream
begin
codatatype 'a process =
@@ -38,81 +38,31 @@
subsection{* Coinduction *}
theorem process_coind[elim, consumes 1, case_names iss Action Choice, induct pred: "HOL.eq"]:
-assumes phi: "\<phi> p p'" and
-iss: "\<And>p p'. \<phi> p p' \<Longrightarrow> (isAction p \<longleftrightarrow> isAction p') \<and> (isChoice p \<longleftrightarrow> isChoice p')" and
-Act: "\<And> a a' p p'. \<phi> (Action a p) (Action a' p') \<Longrightarrow> a = a' \<and> \<phi> p p'" and
-Ch: "\<And> p q p' q'. \<phi> (Choice p q) (Choice p' q') \<Longrightarrow> \<phi> p p' \<and> \<phi> q q'"
-shows "p = p'"
-proof(intro mp[OF process.dtor_coinduct, of \<phi>, OF _ phi], clarify)
- fix p p' assume \<phi>: "\<phi> p p'"
- show "pre_process_rel (op =) \<phi> (process_dtor p) (process_dtor p')"
- proof(cases rule: process.exhaust[of p])
- case (Action a q) note p = Action
- hence "isAction p'" using iss[OF \<phi>] by (cases rule: process.exhaust[of p'], auto)
- then obtain a' q' where p': "p' = Action a' q'" by (cases rule: process.exhaust[of p'], auto)
- have 0: "a = a' \<and> \<phi> q q'" using Act[OF \<phi>[unfolded p p']] .
- have dtor: "process_dtor p = Inl (a,q)" "process_dtor p' = Inl (a',q')"
- unfolding p p' Action_def process.dtor_ctor by simp_all
- show ?thesis using 0 unfolding dtor by simp
- next
- case (Choice p1 p2) note p = Choice
- hence "isChoice p'" using iss[OF \<phi>] by (cases rule: process.exhaust[of p'], auto)
- then obtain p1' p2' where p': "p' = Choice p1' p2'"
- by (cases rule: process.exhaust[of p'], auto)
- have 0: "\<phi> p1 p1' \<and> \<phi> p2 p2'" using Ch[OF \<phi>[unfolded p p']] .
- have dtor: "process_dtor p = Inr (p1,p2)" "process_dtor p' = Inr (p1',p2')"
- unfolding p p' Choice_def process.dtor_ctor by simp_all
- show ?thesis using 0 unfolding dtor by simp
- qed
-qed
+ assumes phi: "\<phi> p p'" and
+ iss: "\<And>p p'. \<phi> p p' \<Longrightarrow> (isAction p \<longleftrightarrow> isAction p') \<and> (isChoice p \<longleftrightarrow> isChoice p')" and
+ Act: "\<And> a a' p p'. \<phi> (Action a p) (Action a' p') \<Longrightarrow> a = a' \<and> \<phi> p p'" and
+ Ch: "\<And> p q p' q'. \<phi> (Choice p q) (Choice p' q') \<Longrightarrow> \<phi> p p' \<and> \<phi> q q'"
+ shows "p = p'"
+ using assms
+ by (coinduct rule: process.coinduct) (metis process.collapse(1,2) process.discs(3))
(* Stronger coinduction, up to equality: *)
theorem process_strong_coind[elim, consumes 1, case_names iss Action Choice]:
-assumes phi: "\<phi> p p'" and
-iss: "\<And>p p'. \<phi> p p' \<Longrightarrow> (isAction p \<longleftrightarrow> isAction p') \<and> (isChoice p \<longleftrightarrow> isChoice p')" and
-Act: "\<And> a a' p p'. \<phi> (Action a p) (Action a' p') \<Longrightarrow> a = a' \<and> (\<phi> p p' \<or> p = p')" and
-Ch: "\<And> p q p' q'. \<phi> (Choice p q) (Choice p' q') \<Longrightarrow> (\<phi> p p' \<or> p = p') \<and> (\<phi> q q' \<or> q = q')"
-shows "p = p'"
-proof(intro mp[OF process.dtor_strong_coinduct, of \<phi>, OF _ phi], clarify)
- fix p p' assume \<phi>: "\<phi> p p'"
- show "pre_process_rel (op =) (\<lambda>a b. \<phi> a b \<or> a = b) (process_dtor p) (process_dtor p')"
- proof(cases rule: process.exhaust[of p])
- case (Action a q) note p = Action
- hence "isAction p'" using iss[OF \<phi>] by (cases rule: process.exhaust[of p'], auto)
- then obtain a' q' where p': "p' = Action a' q'" by (cases rule: process.exhaust[of p'], auto)
- have 0: "a = a' \<and> (\<phi> q q' \<or> q = q')" using Act[OF \<phi>[unfolded p p']] .
- have dtor: "process_dtor p = Inl (a,q)" "process_dtor p' = Inl (a',q')"
- unfolding p p' Action_def process.dtor_ctor by simp_all
- show ?thesis using 0 unfolding dtor by simp
- next
- case (Choice p1 p2) note p = Choice
- hence "isChoice p'" using iss[OF \<phi>] by (cases rule: process.exhaust[of p'], auto)
- then obtain p1' p2' where p': "p' = Choice p1' p2'"
- by (cases rule: process.exhaust[of p'], auto)
- have 0: "(\<phi> p1 p1' \<or> p1 = p1') \<and> (\<phi> p2 p2' \<or> p2 = p2')" using Ch[OF \<phi>[unfolded p p']] .
- have dtor: "process_dtor p = Inr (p1,p2)" "process_dtor p' = Inr (p1',p2')"
- unfolding p p' Choice_def process.dtor_ctor by simp_all
- show ?thesis using 0 unfolding dtor by simp
- qed
-qed
+ assumes phi: "\<phi> p p'" and
+ iss: "\<And>p p'. \<phi> p p' \<Longrightarrow> (isAction p \<longleftrightarrow> isAction p') \<and> (isChoice p \<longleftrightarrow> isChoice p')" and
+ Act: "\<And> a a' p p'. \<phi> (Action a p) (Action a' p') \<Longrightarrow> a = a' \<and> (\<phi> p p' \<or> p = p')" and
+ Ch: "\<And> p q p' q'. \<phi> (Choice p q) (Choice p' q') \<Longrightarrow> (\<phi> p p' \<or> p = p') \<and> (\<phi> q q' \<or> q = q')"
+ shows "p = p'"
+ using assms
+ by (coinduct rule: process.strong_coinduct) (metis process.collapse(1,2) process.discs(3))
subsection {* Coiteration (unfold) *}
section{* Coinductive definition of the notion of trace *}
-
-(* Say we have a type of streams: *)
-
-typedecl 'a stream
-
-consts Ccons :: "'a \<Rightarrow> 'a stream \<Rightarrow> 'a stream"
-
-(* Use the existing coinductive package (distinct from our
-new codatatype package, but highly compatible with it): *)
-
coinductive trace where
-"trace p as \<Longrightarrow> trace (Action a p) (Ccons a as)"
+"trace p as \<Longrightarrow> trace (Action a p) (a ## as)"
|
"trace p as \<or> trace q as \<Longrightarrow> trace (Choice p q) as"
--- a/src/HOL/BNF/Tools/bnf_fp_def_sugar.ML Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/BNF/Tools/bnf_fp_def_sugar.ML Tue Aug 20 22:24:02 2013 +0200
@@ -24,6 +24,9 @@
val morph_fp_sugar: morphism -> fp_sugar -> fp_sugar
val fp_sugar_of: Proof.context -> string -> fp_sugar option
+ val co_induct_of: 'a list -> 'a
+ val strong_co_induct_of: 'a list -> 'a
+
val tvar_subst: theory -> typ list -> typ list -> ((string * int) * typ) list
val exists_subtype_in: typ list -> typ -> bool
val flat_rec_arg_args: 'a list list -> 'a list
@@ -59,7 +62,7 @@
(term list * thm list) * Proof.context
val derive_induct_iters_thms_for_types: BNF_Def.bnf list ->
(typ list list * typ list list list list * term list list * term list list list list) list ->
- thm list -> thm list list -> BNF_Def.bnf list -> BNF_Def.bnf list -> typ list -> typ list ->
+ thm -> thm list list -> BNF_Def.bnf list -> BNF_Def.bnf list -> typ list -> typ list ->
typ list -> typ list list list -> term list list -> thm list list -> term list list ->
thm list list -> local_theory ->
(thm list * thm * Args.src list) * (thm list list * Args.src list)
@@ -67,7 +70,7 @@
val derive_coinduct_coiters_thms_for_types: BNF_Def.bnf list ->
string * term list * term list list * ((term list list * term list list list)
* (typ list * typ list list)) list ->
- thm list -> thm list -> thm list list -> BNF_Def.bnf list -> typ list -> typ list -> typ list ->
+ thm -> thm list -> thm list list -> BNF_Def.bnf list -> typ list -> typ list -> typ list ->
int list list -> int list list -> int list -> thm list list -> BNF_Ctr_Sugar.ctr_sugar list ->
term list list -> thm list list -> (thm list -> thm list) -> local_theory ->
((thm list * thm) list * Args.src list)
@@ -139,6 +142,9 @@
val fp_sugar_of = Symtab.lookup o Data.get o Context.Proof;
+fun co_induct_of (i :: _) = i;
+fun strong_co_induct_of [_, s] = s;
+
fun register_fp_sugar key fp_sugar =
Local_Theory.declaration {syntax = false, pervasive = true}
(fn phi => Data.map (Symtab.default (key, morph_fp_sugar phi fp_sugar)));
@@ -557,7 +563,7 @@
(map3 generate_coiter coiterNs coiter_args_typess' dtor_coiters) lthy
end;
-fun derive_induct_iters_thms_for_types pre_bnfs [fold_args_types, rec_args_types] [ctor_induct]
+fun derive_induct_iters_thms_for_types pre_bnfs [fold_args_types, rec_args_types] ctor_induct
ctor_iter_thmss nesting_bnfs nested_bnfs fpTs Cs Xs ctrXs_Tsss ctrss ctr_defss iterss iter_defss
lthy =
let
@@ -718,7 +724,7 @@
fun derive_coinduct_coiters_thms_for_types pre_bnfs (z, cs, cpss,
coiters_args_types as [((pgss, crgsss), _), ((phss, cshsss), _)])
- dtor_coinducts dtor_ctors dtor_coiter_thmss nesting_bnfs fpTs Cs As kss mss ns ctr_defss
+ dtor_coinduct dtor_ctors dtor_coiter_thmss nesting_bnfs fpTs Cs As kss mss ns ctr_defss
ctr_sugars coiterss coiter_defss export_args lthy =
let
val coiterss' = transpose coiterss;
@@ -823,6 +829,11 @@
(if nn = 1 then thm RS mp else funpow nn (fn thm => reassoc_conjs (thm RS mp_conj)) thm)
|> Drule.zero_var_indexes
|> `(conj_dests nn);
+
+ val rel_eqs = map rel_eq_of_bnf pre_bnfs;
+ val rel_monos = map rel_mono_of_bnf pre_bnfs;
+ val dtor_coinducts =
+ [dtor_coinduct, mk_strong_coinduct_thm dtor_coinduct rel_eqs rel_monos lthy];
in
map2 (postproc nn oo prove) dtor_coinducts goals
end;
@@ -1096,7 +1107,7 @@
map dest_TFree Xs ~~ map (Term.typ_subst_atomic (As ~~ unsorted_As)) ctrXs_sum_prod_Ts;
val (pre_bnfs, (fp_res as {bnfs = fp_bnfs as any_fp_bnf :: _, ctors = ctors0, dtors = dtors0,
- xtor_co_iterss = xtor_co_iterss0, xtor_co_inducts, dtor_ctors, ctor_dtors, ctor_injects,
+ xtor_co_iterss = xtor_co_iterss0, xtor_co_induct, dtor_ctors, ctor_dtors, ctor_injects,
xtor_map_thms, xtor_set_thmss, xtor_rel_thms, xtor_co_iter_thmss, ...}, lthy)) =
fp_bnf (construct_fp mixfixes map_bs rel_bs set_bss) fp_bs (map dest_TFree unsorted_As) fp_eqs
no_defs_lthy0;
@@ -1351,7 +1362,7 @@
let
val ((induct_thms, induct_thm, induct_attrs), (fold_thmss, fold_attrs),
(rec_thmss, rec_attrs)) =
- derive_induct_iters_thms_for_types pre_bnfs (the iters_args_types) xtor_co_inducts
+ derive_induct_iters_thms_for_types pre_bnfs (the iters_args_types) xtor_co_induct
xtor_co_iter_thmss nesting_bnfs nested_bnfs fpTs Cs Xs ctrXs_Tsss ctrss ctr_defss iterss
iter_defss lthy;
@@ -1388,7 +1399,7 @@
(disc_unfold_thmss, disc_corec_thmss, disc_coiter_attrs),
(disc_unfold_iff_thmss, disc_corec_iff_thmss, disc_coiter_iff_attrs),
(sel_unfold_thmss, sel_corec_thmss, sel_coiter_attrs)) =
- derive_coinduct_coiters_thms_for_types pre_bnfs (the coiters_args_types) xtor_co_inducts
+ derive_coinduct_coiters_thms_for_types pre_bnfs (the coiters_args_types) xtor_co_induct
dtor_ctors xtor_co_iter_thmss nesting_bnfs fpTs Cs As kss mss ns ctr_defss ctr_sugars
coiterss coiter_defss (Proof_Context.export lthy' no_defs_lthy) lthy;
--- a/src/HOL/BNF/Tools/bnf_fp_util.ML Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/BNF/Tools/bnf_fp_util.ML Tue Aug 20 22:24:02 2013 +0200
@@ -17,7 +17,7 @@
ctors: term list,
dtors: term list,
xtor_co_iterss: term list list,
- xtor_co_inducts: thm list,
+ xtor_co_induct: thm,
dtor_ctors: thm list,
ctor_dtors: thm list,
ctor_injects: thm list,
@@ -29,8 +29,6 @@
val morph_fp_result: morphism -> fp_result -> fp_result
val eq_fp_result: fp_result * fp_result -> bool
- val co_induct_of: 'a list -> 'a
- val strong_co_induct_of: 'a list -> 'a
val un_fold_of: 'a list -> 'a
val co_rec_of: 'a list -> 'a
@@ -179,6 +177,8 @@
val mk_xtor_un_fold_o_map_thms: fp_kind -> bool -> int -> thm -> thm list -> thm list ->
thm list -> thm list -> thm list
+ val mk_strong_coinduct_thm: thm -> thm list -> thm list -> Proof.context -> thm
+
val fp_bnf: (binding list -> (string * sort) list -> typ list * typ list list ->
BNF_Def.bnf list -> local_theory -> 'a) ->
binding list -> (string * sort) list -> ((string * sort) * typ) list -> local_theory ->
@@ -203,7 +203,7 @@
ctors: term list,
dtors: term list,
xtor_co_iterss: term list list,
- xtor_co_inducts: thm list,
+ xtor_co_induct: thm,
dtor_ctors: thm list,
ctor_dtors: thm list,
ctor_injects: thm list,
@@ -213,7 +213,7 @@
xtor_co_iter_thmss: thm list list,
rel_co_induct_thm: thm};
-fun morph_fp_result phi {Ts, bnfs, ctors, dtors, xtor_co_iterss, xtor_co_inducts, dtor_ctors,
+fun morph_fp_result phi {Ts, bnfs, ctors, dtors, xtor_co_iterss, xtor_co_induct, dtor_ctors,
ctor_dtors, ctor_injects, xtor_map_thms, xtor_set_thmss, xtor_rel_thms, xtor_co_iter_thmss,
rel_co_induct_thm} =
{Ts = map (Morphism.typ phi) Ts,
@@ -221,7 +221,7 @@
ctors = map (Morphism.term phi) ctors,
dtors = map (Morphism.term phi) dtors,
xtor_co_iterss = map (map (Morphism.term phi)) xtor_co_iterss,
- xtor_co_inducts = map (Morphism.thm phi) xtor_co_inducts,
+ xtor_co_induct = Morphism.thm phi xtor_co_induct,
dtor_ctors = map (Morphism.thm phi) dtor_ctors,
ctor_dtors = map (Morphism.thm phi) ctor_dtors,
ctor_injects = map (Morphism.thm phi) ctor_injects,
@@ -234,9 +234,6 @@
fun eq_fp_result ({bnfs = bnfs1, ...} : fp_result, {bnfs = bnfs2, ...} : fp_result) =
eq_list eq_bnf (bnfs1, bnfs2);
-fun co_induct_of (i :: _) = i;
-fun strong_co_induct_of [_, s] = s;
-
fun un_fold_of [f, _] = f;
fun co_rec_of [_, r] = r;
@@ -552,6 +549,25 @@
split_conj_thm (un_fold_unique OF map (fp_case fp I mk_sym) unique_prems)
end;
+fun mk_strong_coinduct_thm coind rel_eqs rel_monos ctxt =
+ let
+ val n = Thm.nprems_of coind;
+ val m = Thm.nprems_of (hd rel_monos) - n;
+ fun mk_inst phi = (phi, mk_union (phi, HOLogic.eq_const (fst (dest_pred2T (fastype_of phi)))))
+ |> pairself (certify ctxt);
+ val insts = Term.add_vars (Thm.prop_of coind) [] |> rev |> take n |> map (mk_inst o Var);
+ fun mk_unfold rel_eq rel_mono =
+ let
+ val eq = iffD2 OF [rel_eq RS @{thm predicate2_eqD}, refl];
+ val mono = rel_mono OF (replicate m @{thm order_refl} @ replicate n @{thm eq_subset});
+ in eq RS (mono RS @{thm predicate2D}) RS @{thm eqTrueI} end;
+ val unfolds = map2 mk_unfold rel_eqs rel_monos @ @{thms sup_fun_def sup_bool_def
+ imp_disjL all_conj_distrib subst_eq_imp simp_thms(18,21,35)};
+ in
+ Thm.instantiate ([], insts) coind
+ |> unfold_thms ctxt unfolds
+ end;
+
fun fp_bnf construct_fp bs resBs eqs lthy =
let
val timer = time (Timer.startRealTimer ());
--- a/src/HOL/BNF/Tools/bnf_gfp.ML Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/BNF/Tools/bnf_gfp.ML Tue Aug 20 22:24:02 2013 +0200
@@ -2019,49 +2019,36 @@
val timer = time (timer "corec definitions & thms");
- (* TODO: Get rid of strong versions (since these can easily be derived from the weak ones). *)
- val (dtor_map_coinduct_thm, coinduct_params, dtor_coinduct_thm,
- dtor_map_strong_coinduct_thm, dtor_strong_coinduct_thm) =
+ val (dtor_map_coinduct_thm, coinduct_params, dtor_coinduct_thm) =
let
val zs = Jzs1 @ Jzs2;
val frees = phis @ zs;
- fun mk_phi strong_eq phi z1 z2 = if strong_eq
- then Term.absfree (dest_Free z1) (Term.absfree (dest_Free z2)
- (HOLogic.mk_disj (phi $ z1 $ z2, HOLogic.mk_eq (z1, z2))))
- else phi;
-
- fun phi_rels strong_eq =
- map3 (fn phi => fn z1 => fn z2 => mk_phi strong_eq phi z1 z2) phis Jzs1 Jzs2;
-
val rels = map (Term.subst_atomic_types ((activeAs ~~ Ts) @ (activeBs ~~ Ts))) relsAsBs;
fun mk_concl phi z1 z2 = HOLogic.mk_imp (phi $ z1 $ z2, HOLogic.mk_eq (z1, z2));
val concl = HOLogic.mk_Trueprop (Library.foldr1 HOLogic.mk_conj
(map3 mk_concl phis Jzs1 Jzs2));
- fun mk_rel_prem strong_eq phi dtor rel Jz Jz_copy =
+ fun mk_rel_prem phi dtor rel Jz Jz_copy =
let
- val concl = Term.list_comb (rel, map HOLogic.eq_const passiveAs @ phi_rels strong_eq) $
+ val concl = Term.list_comb (rel, map HOLogic.eq_const passiveAs @ phis) $
(dtor $ Jz) $ (dtor $ Jz_copy);
in
HOLogic.mk_Trueprop
(list_all_free [Jz, Jz_copy] (HOLogic.mk_imp (phi $ Jz $ Jz_copy, concl)))
end;
- val rel_prems = map5 (mk_rel_prem false) phis dtors rels Jzs Jzs_copy;
- val rel_strong_prems = map5 (mk_rel_prem true) phis dtors rels Jzs Jzs_copy;
-
+ val rel_prems = map5 mk_rel_prem phis dtors rels Jzs Jzs_copy;
val dtor_coinduct_goal =
fold_rev Logic.all frees (Logic.list_implies (rel_prems, concl));
- val coinduct_params = rev (Term.add_tfrees dtor_coinduct_goal []);
val dtor_coinduct =
Goal.prove_sorry lthy [] [] dtor_coinduct_goal
(K (mk_dtor_coinduct_tac m raw_coind_thm bis_rel_thm rel_congs))
|> Thm.close_derivation;
- fun mk_prem strong_eq phi dtor map_nth sets Jz Jz_copy FJz =
+ fun mk_prem phi dtor map_nth sets Jz Jz_copy FJz =
let
val xs = [Jz, Jz_copy];
@@ -2071,7 +2058,7 @@
fun mk_set_conjunct set phi z1 z2 =
list_all_free [z1, z2]
(HOLogic.mk_imp (HOLogic.mk_mem (HOLogic.mk_prod (z1, z2), set $ FJz),
- mk_phi strong_eq phi z1 z2 $ z1 $ z2));
+ phi $ z1 $ z2));
val concl = list_exists_free [FJz] (HOLogic.mk_conj
(Library.foldr1 HOLogic.mk_conj (map2 mk_map_conjunct [fstsTs, sndsTs] xs),
@@ -2082,36 +2069,15 @@
(HOLogic.mk_Trueprop (Term.list_comb (phi, xs)), HOLogic.mk_Trueprop concl))
end;
- fun mk_prems strong_eq =
- map7 (mk_prem strong_eq) phis dtors map_FT_nths prodFT_setss Jzs Jzs_copy FJzs;
-
- val prems = mk_prems false;
- val strong_prems = mk_prems true;
+ val prems = map7 mk_prem phis dtors map_FT_nths prodFT_setss Jzs Jzs_copy FJzs;
val dtor_map_coinduct_goal = fold_rev Logic.all frees (Logic.list_implies (prems, concl));
val dtor_map_coinduct =
Goal.prove_sorry lthy [] [] dtor_map_coinduct_goal
(K (mk_dtor_map_coinduct_tac m ks raw_coind_thm bis_def))
|> Thm.close_derivation;
-
- val cTs = map (SOME o certifyT lthy o TFree) coinduct_params;
- val cts = map3 (SOME o certify lthy ooo mk_phi true) phis Jzs1 Jzs2;
-
- val dtor_strong_coinduct = singleton (Proof_Context.export names_lthy lthy)
- (Goal.prove_sorry lthy [] []
- (fold_rev Logic.all zs (Logic.list_implies (rel_strong_prems, concl)))
- (K (mk_dtor_strong_coinduct_tac lthy m cTs cts dtor_coinduct rel_monos rel_eqs)))
- |> Thm.close_derivation;
-
- val dtor_map_strong_coinduct = singleton (Proof_Context.export names_lthy lthy)
- (Goal.prove_sorry lthy [] []
- (fold_rev Logic.all zs (Logic.list_implies (strong_prems, concl)))
- (K (mk_dtor_map_strong_coinduct_tac ks cTs cts dtor_map_coinduct bis_def
- (tcoalg_thm RS bis_Id_on_thm))))
- |> Thm.close_derivation;
in
- (dtor_map_coinduct, rev (Term.add_tfrees dtor_map_coinduct_goal []),
- dtor_coinduct, dtor_map_strong_coinduct, dtor_strong_coinduct)
+ (dtor_map_coinduct, rev (Term.add_tfrees dtor_map_coinduct_goal []), dtor_coinduct)
end;
val timer = time (timer "coinduction");
@@ -2902,8 +2868,6 @@
val common_notes =
[(dtor_coinductN, [dtor_coinduct_thm]),
(dtor_map_coinductN, [dtor_map_coinduct_thm]),
- (dtor_map_strong_coinductN, [dtor_map_strong_coinduct_thm]),
- (dtor_strong_coinductN, [dtor_strong_coinduct_thm]),
(rel_coinductN, [Jrel_coinduct_thm]),
(dtor_unfold_transferN, dtor_unfold_transfer_thms)]
|> map (fn (thmN, thms) =>
@@ -2933,7 +2897,7 @@
timer;
({Ts = Ts, bnfs = Jbnfs, ctors = ctors, dtors = dtors,
xtor_co_iterss = transpose [unfolds, corecs],
- xtor_co_inducts = [dtor_coinduct_thm, dtor_strong_coinduct_thm], dtor_ctors = dtor_ctor_thms,
+ xtor_co_induct = dtor_coinduct_thm, dtor_ctors = dtor_ctor_thms,
ctor_dtors = ctor_dtor_thms, ctor_injects = ctor_inject_thms,
xtor_map_thms = folded_dtor_map_thms, xtor_set_thmss = folded_dtor_set_thmss',
xtor_rel_thms = dtor_Jrel_thms,
--- a/src/HOL/BNF/Tools/bnf_gfp_tactics.ML Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/BNF/Tools/bnf_gfp_tactics.ML Tue Aug 20 22:24:02 2013 +0200
@@ -39,12 +39,8 @@
val mk_corec_unique_mor_tac: thm list -> thm list -> thm -> {prems: 'a, context: Proof.context} ->
tactic
val mk_dtor_map_coinduct_tac: int -> int list -> thm -> thm -> tactic
- val mk_dtor_map_strong_coinduct_tac: int list -> ctyp option list -> cterm option list -> thm ->
- thm -> thm -> tactic
val mk_dtor_set_tac: int -> thm -> thm -> thm list -> tactic
val mk_dtor_coinduct_tac: int -> thm -> thm -> thm list -> tactic
- val mk_dtor_strong_coinduct_tac: Proof.context -> int -> ctyp option list ->
- cterm option list -> thm -> thm list -> thm list -> tactic
val mk_dtor_rel_tac: Proof.context -> thm list -> int -> thm -> thm -> thm -> thm -> thm list ->
thm -> thm -> thm list -> thm list -> thm list list -> tactic
val mk_dtor_o_ctor_tac: thm -> thm -> thm -> thm -> thm list ->
@@ -991,18 +987,6 @@
CONJ_WRAP' (K (EVERY' [rtac impI, rtac @{thm IdD}, etac set_mp,
rtac CollectI, etac @{thm prod_caseI}])) rel_congs] 1;
-fun mk_dtor_strong_coinduct_tac ctxt m cTs cts dtor_coinduct rel_monos rel_eqs =
- EVERY' [rtac rev_mp, rtac (Drule.instantiate' cTs cts dtor_coinduct),
- EVERY' (map2 (fn rel_mono => fn rel_eq =>
- EVERY' [REPEAT_DETERM o resolve_tac [allI, impI], REPEAT_DETERM o etac allE,
- etac disjE, etac mp, atac, hyp_subst_tac ctxt, rtac (rel_mono RS @{thm predicate2D}),
- REPEAT_DETERM_N m o rtac @{thm order_refl},
- REPEAT_DETERM_N (length rel_monos) o rtac @{thm eq_subset},
- rtac (rel_eq RS sym RS @{thm eq_refl} RS @{thm predicate2D}), rtac refl])
- rel_monos rel_eqs),
- rtac impI, REPEAT_DETERM o etac conjE,
- CONJ_WRAP' (K (rtac impI THEN' etac mp THEN' etac disjI1)) rel_eqs] 1;
-
fun mk_dtor_map_coinduct_tac m ks raw_coind bis_def =
let
val n = length ks;
@@ -1024,20 +1008,6 @@
rtac CollectI, etac (refl RSN (2, @{thm subst_Pair}))]) ks] 1
end;
-fun mk_dtor_map_strong_coinduct_tac ks cTs cts dtor_map_coinduct bis_def bis_Id_on =
- EVERY' [rtac rev_mp, rtac (Drule.instantiate' cTs cts dtor_map_coinduct),
- EVERY' (map (fn i =>
- EVERY' [etac disjE, REPEAT_DETERM o dtac @{thm meta_spec}, etac meta_mp,
- atac, rtac rev_mp, rtac subst, rtac bis_def, rtac bis_Id_on,
- rtac impI, dtac conjunct2, dtac (mk_conjunctN (length ks) i), REPEAT_DETERM o etac allE,
- etac impE, etac @{thm Id_on_UNIV_I}, REPEAT_DETERM o eresolve_tac [bexE, conjE, CollectE],
- rtac exI, rtac conjI, etac conjI, atac,
- CONJ_WRAP' (K (EVERY' [REPEAT_DETERM o resolve_tac [allI, impI],
- rtac disjI2, rtac @{thm Id_onD}, etac set_mp, atac])) ks])
- ks),
- rtac impI, REPEAT_DETERM o etac conjE,
- CONJ_WRAP' (K (rtac impI THEN' etac mp THEN' etac disjI1)) ks] 1;
-
fun mk_map_tac m n cT unfold map_comp' map_cong0 =
EVERY' [rtac ext, rtac (o_apply RS trans RS sym), rtac (o_apply RS trans RS sym),
rtac (unfold RS trans), rtac (Thm.permute_prems 0 1 (map_comp' RS box_equals)), rtac map_cong0,
--- a/src/HOL/BNF/Tools/bnf_lfp.ML Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/BNF/Tools/bnf_lfp.ML Tue Aug 20 22:24:02 2013 +0200
@@ -1858,7 +1858,7 @@
in
timer;
({Ts = Ts, bnfs = Ibnfs, ctors = ctors, dtors = dtors, xtor_co_iterss = transpose [folds, recs],
- xtor_co_inducts = [ctor_induct_thm], dtor_ctors = dtor_ctor_thms, ctor_dtors = ctor_dtor_thms,
+ xtor_co_induct = ctor_induct_thm, dtor_ctors = dtor_ctor_thms, ctor_dtors = ctor_dtor_thms,
ctor_injects = ctor_inject_thms, xtor_map_thms = folded_ctor_map_thms,
xtor_set_thmss = folded_ctor_set_thmss', xtor_rel_thms = ctor_Irel_thms,
xtor_co_iter_thmss = transpose [ctor_fold_thms, ctor_rec_thms],
--- a/src/HOL/Tools/Nitpick/kodkod.ML Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/Tools/Nitpick/kodkod.ML Tue Aug 20 22:24:02 2013 +0200
@@ -881,7 +881,7 @@
val scan_nat =
Scan.repeat1 (Scan.one Symbol.is_ascii_digit)
- >> (the o Int.fromString o space_implode "")
+ >> (the o Int.fromString o implode)
val scan_rel_name =
($$ "s" |-- scan_nat >> pair 1
|| $$ "r" |-- scan_nat >> pair 2
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/dictionaries.py Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/dictionaries.py Tue Aug 20 22:24:02 2013 +0200
@@ -179,6 +179,13 @@
assert line.startswith('? ')
line = line[2:]
name = None
+ numberOfPredictions = None
+
+ # Check whether there is a problem name:
+ tmp = line.split('#')
+ if len(tmp) == 2:
+ numberOfPredictions = int(tmp[0].strip())
+ line = tmp[1]
# Check whether there is a problem name:
tmp = line.split(':')
@@ -206,7 +213,7 @@
else:
hints = []
- return name,features,accessibles,hints
+ return name,features,accessibles,hints,numberOfPredictions
def save(self,fileName):
if self.changed:
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/fullNaiveBayes.py Tue Aug 20 22:24:02 2013 +0200
@@ -0,0 +1,203 @@
+'''
+Created on Jul 11, 2012
+
+@author: Daniel Kuehlwein
+'''
+
+from cPickle import dump,load
+from numpy import array,exp
+from math import log
+
+class NBClassifier(object):
+ '''
+ An updateable naive Bayes classifier.
+ '''
+
+ def __init__(self):
+ '''
+ Constructor
+ '''
+ self.counts = {}
+ self.negCounts = {}
+
+ def initializeModel(self,trainData,dicts):
+ """
+ Build basic model from training data.
+ """
+ for d in trainData:
+ self.counts[d] = [0,{}]
+ self.negCounts[d] = [0,{}]
+ dAccUnExp = dicts.accessibleDict[d]
+ if dicts.expandedAccessibles.has_key(d):
+ dAcc = dicts.expandedAccessibles(d)
+ else:
+ if len(dicts.expandedAccessibles.keys()) >= 100:
+ dicts.expandedAccessibles = {}
+ dAcc = dicts.expand_accessibles(dAccUnExp)
+ dicts.expandedAccessibles[d] = dAcc
+ dDeps = set(dicts.dependenciesDict[d])
+ dFeatures = dicts.featureDict[d]
+ # d proves d
+ self.counts[d][0] += 1
+ for f in dFeatures:
+ if self.counts[d][1].has_key(f):
+ self.counts[d][1][f] += 1
+ else:
+ self.counts[d][1][f] = 1
+ for acc in dAcc:
+ if not self.counts.has_key(acc):
+ self.counts[acc] = [0,{}]
+ if not self.negCounts.has_key(acc):
+ self.negCounts[acc] = [0,{}]
+ if acc in dDeps:
+ self.counts[acc][0] += 1
+ for f in dFeatures:
+ if self.counts[acc][1].has_key(f):
+ self.counts[acc][1][f] += 1
+ else:
+ self.counts[acc][1][f] = 1
+ else:
+ self.negCounts[acc][0] += 1
+ for f in dFeatures:
+ if self.negCounts[acc][1].has_key(f):
+ self.negCounts[acc][1][f] += 1
+ else:
+ self.negCounts[acc][1][f] = 1
+
+ def update(self,dataPoint,features,dependencies,dicts):
+ """
+ Updates the Model.
+ """
+ if not self.counts.has_key(dataPoint):
+ self.counts[dataPoint] = [0,{}]
+ if not self.negCounts.has_key(dataPoint):
+ self.negCounts[dataPoint] = [0,{}]
+ if dicts.expandedAccessibles.has_key(dataPoint):
+ dAcc = dicts.expandedAccessibles(dataPoint)
+ else:
+ if len(dicts.expandedAccessibles.keys()) >= 100:
+ dicts.expandedAccessibles = {}
+ dAccUnExp = dicts.accessibleDict[dataPoint]
+ dAcc = dicts.expand_accessibles(dAccUnExp)
+ dicts.expandedAccessibles[dataPoint] = dAcc
+ dDeps = set(dicts.dependenciesDict[dataPoint])
+ dFeatures = dicts.featureDict[dataPoint]
+ # d proves d
+ self.counts[dataPoint][0] += 1
+ for f in dFeatures:
+ if self.counts[dataPoint][1].has_key(f):
+ self.counts[dataPoint][1][f] += 1
+ else:
+ self.counts[dataPoint][1][f] = 1
+
+ for acc in dAcc:
+ if acc in dDeps:
+ self.counts[acc][0] += 1
+ for f in dFeatures:
+ if self.counts[acc][1].has_key(f):
+ self.counts[acc][1][f] += 1
+ else:
+ self.counts[acc][1][f] = 1
+ else:
+ self.negCounts[acc][0] += 1
+ for f in dFeatures:
+ if self.negCounts[acc][1].has_key(f):
+ self.negCounts[acc][1][f] += 1
+ else:
+ self.negCounts[acc][1][f] = 1
+
+ def delete(self,dataPoint,features,dependencies):
+ """
+ Deletes a single datapoint from the model.
+ """
+ for dep in dependencies:
+ self.counts[dep][0] -= 1
+ for f in features:
+ self.counts[dep][1][f] -= 1
+
+
+ def overwrite(self,problemId,newDependencies,dicts):
+ """
+ Deletes the old dependencies of problemId and replaces them with the new ones. Updates the model accordingly.
+ """
+ assert self.counts.has_key(problemId)
+ oldDeps = dicts.dependenciesDict[problemId]
+ features = dicts.featureDict[problemId]
+ self.delete(problemId,features,oldDeps)
+ self.update(problemId,features,newDependencies)
+
+ def predict(self,features,accessibles):
+ """
+ For each accessible, predicts the probability of it being useful given the features.
+ Returns a ranking of the accessibles.
+ """
+ predictions = []
+ for a in accessibles:
+ posA = self.counts[a][0]
+ negA = self.negCounts[a][0]
+ fPosA = set(self.counts[a][1].keys())
+ fNegA = set(self.negCounts[a][1].keys())
+ fPosWeightsA = self.counts[a][1]
+ fNegWeightsA = self.negCounts[a][1]
+ if negA == 0:
+ resultA = 0
+ elif posA == 0:
+ print a
+ print 'xx'
+ import sys
+ sys.exit(-1)
+ else:
+ resultA = log(posA) - log(negA)
+ for f in features:
+ if f in fPosA:
+ # P(f | a)
+ if fPosWeightsA[f] == 0:
+ resultA -= 15
+ else:
+ assert fPosWeightsA[f] <= posA
+ resultA += log(float(fPosWeightsA[f])/posA)
+ else:
+ resultA -= 15
+ # P(f | not a)
+ if f in fNegA:
+ if fNegWeightsA[f] == 0:
+ resultA += 15
+ else:
+ assert fNegWeightsA[f] <= negA
+ resultA -= log(float(fNegWeightsA[f])/negA)
+ else:
+ resultA += 15
+
+ predictions.append(resultA)
+ #expPredictions = array([exp(x) for x in predictions])
+ predictions = array(predictions)
+ perm = (-predictions).argsort()
+ #return array(accessibles)[perm],expPredictions[perm]
+ return array(accessibles)[perm],predictions[perm]
+
+ def save(self,fileName):
+ OStream = open(fileName, 'wb')
+ dump((self.counts,self.negCounts),OStream)
+ OStream.close()
+
+ def load(self,fileName):
+ OStream = open(fileName, 'rb')
+ self.counts,self.negCounts = load(OStream)
+ OStream.close()
+
+
+if __name__ == '__main__':
+ featureDict = {0:[0,1,2],1:[3,2,1]}
+ dependenciesDict = {0:[0],1:[0,1]}
+ libDicts = (featureDict,dependenciesDict,{})
+ c = NBClassifier()
+ c.initializeModel([0,1],libDicts)
+ c.update(2,[14,1,3],[0,2])
+ print c.counts
+ print c.predict([0,14],[0,1,2])
+ c.storeModel('x')
+ d = NBClassifier()
+ d.loadModel('x')
+ print c.counts
+ print d.counts
+ print 'Done'
\ No newline at end of file
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/mash.py Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/mash.py Tue Aug 20 22:24:02 2013 +0200
@@ -1,7 +1,7 @@
-#!/usr/bin/python
-# Title: HOL/Tools/Sledgehammer/MaSh/src/mash.py
+#!/usr/bin/env python
+# Title: HOL/Tools/Sledgehammer/MaSh/src/mash
# Author: Daniel Kuehlwein, ICIS, Radboud University Nijmegen
-# Copyright 2012
+# Copyright 2012 - 2013
#
# Entry point for MaSh (Machine Learning for Sledgehammer).
@@ -15,69 +15,38 @@
@author: Daniel Kuehlwein
'''
-import logging,datetime,string,os,sys
-from argparse import ArgumentParser,RawDescriptionHelpFormatter
-from time import time
-from stats import Statistics
+import socket,sys,time,logging
+
+from spawnDaemon import spawnDaemon
+
+
+import logging,string,os,sys
+
+
from theoryStats import TheoryStatistics
from theoryModels import TheoryModels
from dictionaries import Dictionaries
-#from fullNaiveBayes import NBClassifier
-from sparseNaiveBayes import sparseNBClassifier
-from snow import SNoW
from predefined import Predefined
-# Set up command-line parser
-parser = ArgumentParser(description='MaSh - Machine Learning for Sledgehammer. \n\n\
-MaSh allows to use different machine learning algorithms to predict relevant facts for Sledgehammer.\n\n\
---------------- Example Usage ---------------\n\
-First initialize:\n./mash.py -l test.log -o ../tmp/ --init --inputDir ../data/Jinja/ \n\
-Then create predictions:\n./mash.py -i ../data/Jinja/mash_commands -p ../data/Jinja/mash_suggestions -l test.log -o ../tmp/ --statistics\n\
-\n\n\
-Author: Daniel Kuehlwein, July 2012',formatter_class=RawDescriptionHelpFormatter)
-parser.add_argument('-i','--inputFile',help='File containing all problems to be solved.')
-parser.add_argument('-o','--outputDir', default='../tmp/',help='Directory where all created files are stored. Default=../tmp/.')
-parser.add_argument('-p','--predictions',default='../tmp/%s.predictions' % datetime.datetime.now(),
- help='File where the predictions stored. Default=../tmp/dateTime.predictions.')
-parser.add_argument('--numberOfPredictions',default=200,help="Number of premises to write in the output. Default=200.",type=int)
-
-parser.add_argument('--init',default=False,action='store_true',help="Initialize Mash. Requires --inputDir to be defined. Default=False.")
-parser.add_argument('--inputDir',default='../data/20121212/Jinja/',\
- help='Directory containing all the input data. MaSh expects the following files: mash_features,mash_dependencies,mash_accessibility')
-parser.add_argument('--depFile', default='mash_dependencies',
- help='Name of the file with the premise dependencies. The file must be in inputDir. Default = mash_dependencies')
-parser.add_argument('--saveModel',default=False,action='store_true',help="Stores the learned Model at the end of a prediction run. Default=False.")
+from parameters import init_parser
-parser.add_argument('--learnTheories',default=False,action='store_true',help="Uses a two-lvl prediction mode. First the theories, then the premises. Default=False.")
-# Theory Parameters
-parser.add_argument('--theoryDefValPos',default=-7.5,help="Default value for positive unknown features. Default=-7.5.",type=float)
-parser.add_argument('--theoryDefValNeg',default=-10.0,help="Default value for negative unknown features. Default=-15.0.",type=float)
-parser.add_argument('--theoryPosWeight',default=2.0,help="Weight value for positive features. Default=2.0.",type=float)
-
-parser.add_argument('--nb',default=False,action='store_true',help="Use Naive Bayes for learning. This is the default learning method.")
-# NB Parameters
-parser.add_argument('--NBDefaultPriorWeight',default=20.0,help="Initializes classifiers with value * p |- p. Default=20.0.",type=float)
-parser.add_argument('--NBDefVal',default=-15.0,help="Default value for unknown features. Default=-15.0.",type=float)
-parser.add_argument('--NBPosWeight',default=10.0,help="Weight value for positive features. Default=10.0.",type=float)
-# TODO: Rename to sineFeatures
-parser.add_argument('--sineFeatures',default=False,action='store_true',help="Uses a SInE like prior for premise lvl predictions. Default=False.")
-parser.add_argument('--sineWeight',default=0.5,help="How much the SInE prior is weighted. Default=0.5.",type=float)
-
-parser.add_argument('--snow',default=False,action='store_true',help="Use SNoW's naive bayes instead of Naive Bayes for learning.")
-parser.add_argument('--predef',help="Use predefined predictions. Used only for comparison with the actual learning. Argument is the filename of the predictions.")
-parser.add_argument('--statistics',default=False,action='store_true',help="Create and show statistics for the top CUTOFF predictions.\
- WARNING: This will make the program a lot slower! Default=False.")
-parser.add_argument('--saveStats',default=None,help="If defined, stores the statistics in the filename provided.")
-parser.add_argument('--cutOff',default=500,help="Option for statistics. Only consider the first cutOff predictions. Default=500.",type=int)
-parser.add_argument('-l','--log', default='../tmp/%s.log' % datetime.datetime.now(), help='Log file name. Default=../tmp/dateTime.log')
-parser.add_argument('-q','--quiet',default=False,action='store_true',help="If enabled, only print warnings. Default=False.")
-parser.add_argument('--modelFile', default='../tmp/model.pickle', help='Model file name. Default=../tmp/model.pickle')
-parser.add_argument('--dictsFile', default='../tmp/dict.pickle', help='Dict file name. Default=../tmp/dict.pickle')
-parser.add_argument('--theoryFile', default='../tmp/theory.pickle', help='Model file name. Default=../tmp/theory.pickle')
+def communicate(data,host,port):
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ try:
+ sock.connect((host,port))
+ sock.sendall(data)
+ received = sock.recv(262144)
+ except:
+ logger = logging.getLogger('communicate')
+ logger.warning('Communication with server failed.')
+ received = -1
+ finally:
+ sock.close()
+ return received
def mash(argv = sys.argv[1:]):
# Initializing command-line arguments
- args = parser.parse_args(argv)
+ args = init_parser(argv)
# Set up logging
logging.basicConfig(level=logging.DEBUG,
@@ -85,18 +54,8 @@
datefmt='%d-%m %H:%M:%S',
filename=args.log,
filemode='w')
- logger = logging.getLogger('main.py')
-
- #"""
- # remove old handler for tester
- #logger.root.handlers[0].stream.close()
- logger.root.removeHandler(logger.root.handlers[0])
- file_handler = logging.FileHandler(args.log)
- file_handler.setLevel(logging.DEBUG)
- formatter = logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
- file_handler.setFormatter(formatter)
- logger.root.addHandler(file_handler)
- #"""
+ logger = logging.getLogger('mash')
+
if args.quiet:
logger.setLevel(logging.WARNING)
#console.setLevel(logging.WARNING)
@@ -110,257 +69,44 @@
if not os.path.exists(args.outputDir):
os.makedirs(args.outputDir)
- logger.info('Using the following settings: %s',args)
- # Pick algorithm
- if args.nb:
- logger.info('Using sparse Naive Bayes for learning.')
- model = sparseNBClassifier(args.NBDefaultPriorWeight,args.NBPosWeight,args.NBDefVal)
- elif args.snow:
- logger.info('Using naive bayes (SNoW) for learning.')
- model = SNoW()
- elif args.predef:
- logger.info('Using predefined predictions.')
- model = Predefined(args.predef)
- else:
- logger.info('No algorithm specified. Using sparse Naive Bayes.')
- model = sparseNBClassifier(args.NBDefaultPriorWeight,args.NBPosWeight,args.NBDefVal)
-
- # Initializing model
- if args.init:
- logger.info('Initializing Model.')
- startTime = time()
-
- # Load all data
- dicts = Dictionaries()
- dicts.init_all(args)
-
- # Create Model
- trainData = dicts.featureDict.keys()
- model.initializeModel(trainData,dicts)
-
- if args.learnTheories:
- depFile = os.path.join(args.inputDir,args.depFile)
- theoryModels = TheoryModels(args.theoryDefValPos,args.theoryDefValNeg,args.theoryPosWeight)
- theoryModels.init(depFile,dicts)
- theoryModels.save(args.theoryFile)
-
- model.save(args.modelFile)
- dicts.save(args.dictsFile)
-
- logger.info('All Done. %s seconds needed.',round(time()-startTime,2))
- return 0
- # Create predictions and/or update model
- else:
- lineCounter = 1
- statementCounter = 1
- computeStats = False
- dicts = Dictionaries()
- theoryModels = TheoryModels(args.theoryDefValPos,args.theoryDefValNeg,args.theoryPosWeight)
- # Load Files
- if os.path.isfile(args.dictsFile):
- #logger.info('Loading Dictionaries')
- #startTime = time()
- dicts.load(args.dictsFile)
- #logger.info('Done %s',time()-startTime)
- if os.path.isfile(args.modelFile):
- #logger.info('Loading Model')
- #startTime = time()
- model.load(args.modelFile)
- #logger.info('Done %s',time()-startTime)
- if os.path.isfile(args.theoryFile) and args.learnTheories:
- #logger.info('Loading Theory Models')
- #startTime = time()
- theoryModels.load(args.theoryFile)
- #logger.info('Done %s',time()-startTime)
- logger.info('All loading completed')
-
- # IO Streams
- OS = open(args.predictions,'w')
- IS = open(args.inputFile,'r')
-
- # Statistics
- if args.statistics:
- stats = Statistics(args.cutOff)
- if args.learnTheories:
- theoryStats = TheoryStatistics()
-
- predictions = None
- predictedTheories = None
- #Reading Input File
- for line in IS:
-# try:
- if True:
- if line.startswith('!'):
- problemId = dicts.parse_fact(line)
- # Statistics
- if args.statistics and computeStats:
- computeStats = False
- # Assume '!' comes after '?'
- if args.predef:
- predictions = model.predict(problemId)
- if args.learnTheories:
- tmp = [dicts.idNameDict[x] for x in dicts.dependenciesDict[problemId]]
- usedTheories = set([x.split('.')[0] for x in tmp])
- theoryStats.update((dicts.idNameDict[problemId]).split('.')[0],predictedTheories,usedTheories,len(theoryModels.accessibleTheories))
- stats.update(predictions,dicts.dependenciesDict[problemId],statementCounter)
- if not stats.badPreds == []:
- bp = string.join([str(dicts.idNameDict[x]) for x in stats.badPreds], ',')
- logger.debug('Bad predictions: %s',bp)
+ # If server is not running, start it.
+ try:
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ sock.connect((args.host,args.port))
+ sock.close()
+ except:
+ logger.info('Starting Server.')
+ spawnDaemon('server.py')
+ # TODO: Make this fault tolerant
+ time.sleep(0.5)
+ # Init server
+ data = "i "+";".join(argv)
+ received = communicate(data,args.host,args.port)
+ logger.info(received)
+
+ if args.inputFile == None:
+ return
+ logger.debug('Using the following settings: %s',args)
+ # IO Streams
+ OS = open(args.predictions,'w')
+ IS = open(args.inputFile,'r')
+ count = 0
+ for line in IS:
+ count += 1
+ #if count == 127:
+ # break as
+ received = communicate(line,args.host,args.port)
+ if not received == '':
+ OS.write('%s\n' % received)
+ #logger.info(received)
+ OS.close()
+ IS.close()
- statementCounter += 1
- # Update Dependencies, p proves p
- dicts.dependenciesDict[problemId] = [problemId]+dicts.dependenciesDict[problemId]
- if args.learnTheories:
- theoryModels.update(problemId,dicts.featureDict[problemId],dicts.dependenciesDict[problemId],dicts)
- if args.snow:
- model.update(problemId,dicts.featureDict[problemId],dicts.dependenciesDict[problemId],dicts)
- else:
- model.update(problemId,dicts.featureDict[problemId],dicts.dependenciesDict[problemId])
- elif line.startswith('p'):
- # Overwrite old proof.
- problemId,newDependencies = dicts.parse_overwrite(line)
- newDependencies = [problemId]+newDependencies
- model.overwrite(problemId,newDependencies,dicts)
- if args.learnTheories:
- theoryModels.overwrite(problemId,newDependencies,dicts)
- dicts.dependenciesDict[problemId] = newDependencies
- elif line.startswith('?'):
- startTime = time()
- computeStats = True
- if args.predef:
- continue
- name,features,accessibles,hints = dicts.parse_problem(line)
-
- # Create predictions
- logger.info('Starting computation for problem on line %s',lineCounter)
- # Update Models with hints
- if not hints == []:
- if args.learnTheories:
- accessibleTheories = set([(dicts.idNameDict[x]).split('.')[0] for x in accessibles])
- theoryModels.update_with_acc('hints',features,hints,dicts,accessibleTheories)
- if args.snow:
- pass
- else:
- model.update('hints',features,hints)
-
- # Predict premises
- if args.learnTheories:
- predictedTheories,accessibles = theoryModels.predict(features,accessibles,dicts)
-
- # Add additional features on premise lvl if sine is enabled
- if args.sineFeatures:
- origFeatures = [f for f,_w in features]
- secondaryFeatures = []
- for f in origFeatures:
- if dicts.featureCountDict[f] == 1:
- continue
- triggeredFormulas = dicts.featureTriggeredFormulasDict[f]
- for formula in triggeredFormulas:
- tFeatures = dicts.triggerFeaturesDict[formula]
- #tFeatures = [ff for ff,_fw in dicts.featureDict[formula]]
- newFeatures = set(tFeatures).difference(secondaryFeatures+origFeatures)
- for fNew in newFeatures:
- secondaryFeatures.append((fNew,args.sineWeight))
- predictionsFeatures = features+secondaryFeatures
- else:
- predictionsFeatures = features
- predictions,predictionValues = model.predict(predictionsFeatures,accessibles,dicts)
- assert len(predictions) == len(predictionValues)
-
- # Delete hints
- if not hints == []:
- if args.learnTheories:
- theoryModels.delete('hints',features,hints,dicts)
- if args.snow:
- pass
- else:
- model.delete('hints',features,hints)
+ # Statistics
+ if args.statistics:
+ received = communicate('avgStats',args.host,args.port)
+ logger.info(received)
- logger.info('Done. %s seconds needed.',round(time()-startTime,2))
- # Output
- predictionNames = [str(dicts.idNameDict[p]) for p in predictions[:args.numberOfPredictions]]
- predictionValues = [str(x) for x in predictionValues[:args.numberOfPredictions]]
- predictionsStringList = ['%s=%s' % (predictionNames[i],predictionValues[i]) for i in range(len(predictionNames))]
- predictionsString = string.join(predictionsStringList,' ')
- outString = '%s: %s' % (name,predictionsString)
- OS.write('%s\n' % outString)
- else:
- logger.warning('Unspecified input format: \n%s',line)
- sys.exit(-1)
- lineCounter += 1
- """
- except:
- logger.warning('An error occurred on line %s .',line)
- lineCounter += 1
- continue
- """
- OS.close()
- IS.close()
- # Statistics
- if args.statistics:
- if args.learnTheories:
- theoryStats.printAvg()
- stats.printAvg()
-
- # Save
- if args.saveModel:
- model.save(args.modelFile)
- if args.learnTheories:
- theoryModels.save(args.theoryFile)
- dicts.save(args.dictsFile)
- if not args.saveStats == None:
- if args.learnTheories:
- theoryStatsFile = os.path.join(args.outputDir,'theoryStats')
- theoryStats.save(theoryStatsFile)
- statsFile = os.path.join(args.outputDir,args.saveStats)
- stats.save(statsFile)
- return 0
-
-if __name__ == '__main__':
- # Example:
- #List
- # ISAR Theories
- #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130110/List/','--learnTheories']
- #args = ['-i', '../data/20130110/List/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--learnTheories']
- # ISAR predef mesh
- #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130110/List/','--predef','../data/20130110/List/mesh_suggestions']
- #args = ['-i', '../data/20130110/List/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','../data/20130110/List/mesh_suggestions','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats']
-
-
- # Auth
- # ISAR Theories
- #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121227b/Auth/','--learnTheories','--sineFeatures']
- #args = ['-i', '../data/20121227b/Auth/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--learnTheories']
- # ISAR predef mesh
- #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121227b/Auth/','--predef','../data/20121227b/Auth/mesh_suggestions']
- #args = ['-i', '../data/20121227b/Auth/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','../data/20121227b/Auth/mesh_suggestions','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats']
-
-
- # Jinja
- # ISAR Theories
- #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130111/Jinja/','--learnTheories']
- #args = ['-i', '../data/20130111/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--cutOff','500','--learnTheories']
- # ISAR Theories SinePrior
- #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130111/Jinja/','--learnTheories','--sineFeatures']
- #args = ['-i', '../data/20130111/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--learnTheories','--sineFeatures']
- # ISAR NB
- #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130111/Jinja/']
- #args = ['-i', '../data/20130111/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500']
- # ISAR predef mesh
- #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130111/Jinja/','--predef','../data/20130111/Jinja/mesh_suggestions']
- #args = ['-i', '../data/20130111/Jinja/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','../data/20130111/Jinja/mesh_suggestions','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats']
- # ISAR NB ATP
- #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121212/Jinja/','--depFile','mash_atp_dependencies']
- #args = ['-i', '../data/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--depFile','mash_atp_dependencies']
- #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/Jinja/','--predef','--depFile','mash_atp_dependencies']
- #args = ['-i', '../data/Jinja/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats','--depFile','mash_atp_dependencies']
- #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/Jinja/','--depFile','mash_atp_dependencies','--snow']
- #args = ['-i', '../data/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--snow','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--depFile','mash_atp_dependencies']
- # ISAR Snow
- #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121212/Jinja/','--snow']
- #args = ['-i', '../data/20121212/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--snow','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500']
-
-
- #sys.exit(mash(args))
+if __name__ == "__main__":
sys.exit(mash())
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/mashOld.py Tue Aug 20 22:24:02 2013 +0200
@@ -0,0 +1,329 @@
+#!/usr/bin/python
+# Title: HOL/Tools/Sledgehammer/MaSh/src/mash.py
+# Author: Daniel Kuehlwein, ICIS, Radboud University Nijmegen
+# Copyright 2012
+#
+# Entry point for MaSh (Machine Learning for Sledgehammer).
+
+'''
+MaSh - Machine Learning for Sledgehammer
+
+MaSh allows to use different machine learning algorithms to predict relevant fact for Sledgehammer.
+
+Created on July 12, 2012
+
+@author: Daniel Kuehlwein
+'''
+
+import logging,datetime,string,os,sys
+from argparse import ArgumentParser,RawDescriptionHelpFormatter
+from time import time
+from stats import Statistics
+from theoryStats import TheoryStatistics
+from theoryModels import TheoryModels
+from dictionaries import Dictionaries
+#from fullNaiveBayes import NBClassifier
+from sparseNaiveBayes import sparseNBClassifier
+from snow import SNoW
+from predefined import Predefined
+
+# Set up command-line parser
+parser = ArgumentParser(description='MaSh - Machine Learning for Sledgehammer. \n\n\
+MaSh allows to use different machine learning algorithms to predict relevant facts for Sledgehammer.\n\n\
+--------------- Example Usage ---------------\n\
+First initialize:\n./mash.py -l test.log -o ../tmp/ --init --inputDir ../data/Jinja/ \n\
+Then create predictions:\n./mash.py -i ../data/Jinja/mash_commands -p ../data/Jinja/mash_suggestions -l test.log -o ../tmp/ --statistics\n\
+\n\n\
+Author: Daniel Kuehlwein, July 2012',formatter_class=RawDescriptionHelpFormatter)
+parser.add_argument('-i','--inputFile',help='File containing all problems to be solved.')
+parser.add_argument('-o','--outputDir', default='../tmp/',help='Directory where all created files are stored. Default=../tmp/.')
+parser.add_argument('-p','--predictions',default='../tmp/%s.predictions' % datetime.datetime.now(),
+ help='File where the predictions stored. Default=../tmp/dateTime.predictions.')
+parser.add_argument('--numberOfPredictions',default=200,help="Number of premises to write in the output. Default=200.",type=int)
+
+parser.add_argument('--init',default=False,action='store_true',help="Initialize Mash. Requires --inputDir to be defined. Default=False.")
+parser.add_argument('--inputDir',default='../data/20121212/Jinja/',\
+ help='Directory containing all the input data. MaSh expects the following files: mash_features,mash_dependencies,mash_accessibility')
+parser.add_argument('--depFile', default='mash_dependencies',
+ help='Name of the file with the premise dependencies. The file must be in inputDir. Default = mash_dependencies')
+parser.add_argument('--saveModel',default=False,action='store_true',help="Stores the learned Model at the end of a prediction run. Default=False.")
+
+parser.add_argument('--learnTheories',default=False,action='store_true',help="Uses a two-lvl prediction mode. First the theories, then the premises. Default=False.")
+# Theory Parameters
+parser.add_argument('--theoryDefValPos',default=-7.5,help="Default value for positive unknown features. Default=-7.5.",type=float)
+parser.add_argument('--theoryDefValNeg',default=-10.0,help="Default value for negative unknown features. Default=-15.0.",type=float)
+parser.add_argument('--theoryPosWeight',default=2.0,help="Weight value for positive features. Default=2.0.",type=float)
+
+parser.add_argument('--nb',default=False,action='store_true',help="Use Naive Bayes for learning. This is the default learning method.")
+# NB Parameters
+parser.add_argument('--NBDefaultPriorWeight',default=20.0,help="Initializes classifiers with value * p |- p. Default=20.0.",type=float)
+parser.add_argument('--NBDefVal',default=-15.0,help="Default value for unknown features. Default=-15.0.",type=float)
+parser.add_argument('--NBPosWeight',default=10.0,help="Weight value for positive features. Default=10.0.",type=float)
+# TODO: Rename to sineFeatures
+parser.add_argument('--sineFeatures',default=False,action='store_true',help="Uses a SInE like prior for premise lvl predictions. Default=False.")
+parser.add_argument('--sineWeight',default=0.5,help="How much the SInE prior is weighted. Default=0.5.",type=float)
+
+parser.add_argument('--snow',default=False,action='store_true',help="Use SNoW's naive bayes instead of Naive Bayes for learning.")
+parser.add_argument('--predef',help="Use predefined predictions. Used only for comparison with the actual learning. Argument is the filename of the predictions.")
+parser.add_argument('--statistics',default=False,action='store_true',help="Create and show statistics for the top CUTOFF predictions.\
+ WARNING: This will make the program a lot slower! Default=False.")
+parser.add_argument('--saveStats',default=None,help="If defined, stores the statistics in the filename provided.")
+parser.add_argument('--cutOff',default=500,help="Option for statistics. Only consider the first cutOff predictions. Default=500.",type=int)
+parser.add_argument('-l','--log', default='../tmp/%s.log' % datetime.datetime.now(), help='Log file name. Default=../tmp/dateTime.log')
+parser.add_argument('-q','--quiet',default=False,action='store_true',help="If enabled, only print warnings. Default=False.")
+parser.add_argument('--modelFile', default='../tmp/model.pickle', help='Model file name. Default=../tmp/model.pickle')
+parser.add_argument('--dictsFile', default='../tmp/dict.pickle', help='Dict file name. Default=../tmp/dict.pickle')
+parser.add_argument('--theoryFile', default='../tmp/theory.pickle', help='Model file name. Default=../tmp/theory.pickle')
+
+def mash(argv = sys.argv[1:]):
+ # Initializing command-line arguments
+ args = parser.parse_args(argv)
+
+ # Set up logging
+ logging.basicConfig(level=logging.DEBUG,
+ format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
+ datefmt='%d-%m %H:%M:%S',
+ filename=args.log,
+ filemode='w')
+ logger = logging.getLogger('main.py')
+
+ """
+ # remove old handler for tester
+ # TODO: Comment out for Jasmins version. This crashes python 2.6.1
+ logger.root.handlers[0].stream.close()
+ logger.root.removeHandler(logger.root.handlers[0])
+ file_handler = logging.FileHandler(args.log)
+ file_handler.setLevel(logging.DEBUG)
+ formatter = logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
+ file_handler.setFormatter(formatter)
+ logger.root.addHandler(file_handler)
+ #"""
+ if args.quiet:
+ logger.setLevel(logging.WARNING)
+ #console.setLevel(logging.WARNING)
+ else:
+ console = logging.StreamHandler(sys.stdout)
+ console.setLevel(logging.INFO)
+ formatter = logging.Formatter('# %(message)s')
+ console.setFormatter(formatter)
+ logging.getLogger('').addHandler(console)
+
+ if not os.path.exists(args.outputDir):
+ os.makedirs(args.outputDir)
+
+ logger.info('Using the following settings: %s',args)
+ # Pick algorithm
+ if args.nb:
+ logger.info('Using sparse Naive Bayes for learning.')
+ model = sparseNBClassifier(args.NBDefaultPriorWeight,args.NBPosWeight,args.NBDefVal)
+ elif args.snow:
+ logger.info('Using naive bayes (SNoW) for learning.')
+ model = SNoW()
+ elif args.predef:
+ logger.info('Using predefined predictions.')
+ model = Predefined(args.predef)
+ else:
+ logger.info('No algorithm specified. Using sparse Naive Bayes.')
+ model = sparseNBClassifier(args.NBDefaultPriorWeight,args.NBPosWeight,args.NBDefVal)
+
+ # Initializing model
+ if args.init:
+ logger.info('Initializing Model.')
+ startTime = time()
+
+ # Load all data
+ dicts = Dictionaries()
+ dicts.init_all(args)
+
+ # Create Model
+ trainData = dicts.featureDict.keys()
+ model.initializeModel(trainData,dicts)
+
+ if args.learnTheories:
+ depFile = os.path.join(args.inputDir,args.depFile)
+ theoryModels = TheoryModels(args.theoryDefValPos,args.theoryDefValNeg,args.theoryPosWeight)
+ theoryModels.init(depFile,dicts)
+ theoryModels.save(args.theoryFile)
+
+ model.save(args.modelFile)
+ dicts.save(args.dictsFile)
+
+ logger.info('All Done. %s seconds needed.',round(time()-startTime,2))
+ return 0
+ # Create predictions and/or update model
+ else:
+ lineCounter = 1
+ statementCounter = 1
+ computeStats = False
+ dicts = Dictionaries()
+ theoryModels = TheoryModels(args.theoryDefValPos,args.theoryDefValNeg,args.theoryPosWeight)
+ # Load Files
+ if os.path.isfile(args.dictsFile):
+ #logger.info('Loading Dictionaries')
+ #startTime = time()
+ dicts.load(args.dictsFile)
+ #logger.info('Done %s',time()-startTime)
+ if os.path.isfile(args.modelFile):
+ #logger.info('Loading Model')
+ #startTime = time()
+ model.load(args.modelFile)
+ #logger.info('Done %s',time()-startTime)
+ if os.path.isfile(args.theoryFile) and args.learnTheories:
+ #logger.info('Loading Theory Models')
+ #startTime = time()
+ theoryModels.load(args.theoryFile)
+ #logger.info('Done %s',time()-startTime)
+ logger.info('All loading completed')
+
+ # IO Streams
+ OS = open(args.predictions,'w')
+ IS = open(args.inputFile,'r')
+
+ # Statistics
+ if args.statistics:
+ stats = Statistics(args.cutOff)
+ if args.learnTheories:
+ theoryStats = TheoryStatistics()
+
+ predictions = None
+ predictedTheories = None
+ #Reading Input File
+ for line in IS:
+# try:
+ if True:
+ if line.startswith('!'):
+ problemId = dicts.parse_fact(line)
+ # Statistics
+ if args.statistics and computeStats:
+ computeStats = False
+ # Assume '!' comes after '?'
+ if args.predef:
+ predictions = model.predict(problemId)
+ if args.learnTheories:
+ tmp = [dicts.idNameDict[x] for x in dicts.dependenciesDict[problemId]]
+ usedTheories = set([x.split('.')[0] for x in tmp])
+ theoryStats.update((dicts.idNameDict[problemId]).split('.')[0],predictedTheories,usedTheories,len(theoryModels.accessibleTheories))
+ stats.update(predictions,dicts.dependenciesDict[problemId],statementCounter)
+ if not stats.badPreds == []:
+ bp = string.join([str(dicts.idNameDict[x]) for x in stats.badPreds], ',')
+ logger.debug('Bad predictions: %s',bp)
+
+ statementCounter += 1
+ # Update Dependencies, p proves p
+ dicts.dependenciesDict[problemId] = [problemId]+dicts.dependenciesDict[problemId]
+ if args.learnTheories:
+ theoryModels.update(problemId,dicts.featureDict[problemId],dicts.dependenciesDict[problemId],dicts)
+ if args.snow:
+ model.update(problemId,dicts.featureDict[problemId],dicts.dependenciesDict[problemId],dicts)
+ else:
+ model.update(problemId,dicts.featureDict[problemId],dicts.dependenciesDict[problemId])
+ elif line.startswith('p'):
+ # Overwrite old proof.
+ problemId,newDependencies = dicts.parse_overwrite(line)
+ newDependencies = [problemId]+newDependencies
+ model.overwrite(problemId,newDependencies,dicts)
+ if args.learnTheories:
+ theoryModels.overwrite(problemId,newDependencies,dicts)
+ dicts.dependenciesDict[problemId] = newDependencies
+ elif line.startswith('?'):
+ startTime = time()
+ computeStats = True
+ if args.predef:
+ continue
+ name,features,accessibles,hints = dicts.parse_problem(line)
+
+ # Create predictions
+ logger.info('Starting computation for problem on line %s',lineCounter)
+ # Update Models with hints
+ if not hints == []:
+ if args.learnTheories:
+ accessibleTheories = set([(dicts.idNameDict[x]).split('.')[0] for x in accessibles])
+ theoryModels.update_with_acc('hints',features,hints,dicts,accessibleTheories)
+ if args.snow:
+ pass
+ else:
+ model.update('hints',features,hints)
+
+ # Predict premises
+ if args.learnTheories:
+ predictedTheories,accessibles = theoryModels.predict(features,accessibles,dicts)
+
+ # Add additional features on premise lvl if sine is enabled
+ if args.sineFeatures:
+ origFeatures = [f for f,_w in features]
+ secondaryFeatures = []
+ for f in origFeatures:
+ if dicts.featureCountDict[f] == 1:
+ continue
+ triggeredFormulas = dicts.featureTriggeredFormulasDict[f]
+ for formula in triggeredFormulas:
+ tFeatures = dicts.triggerFeaturesDict[formula]
+ #tFeatures = [ff for ff,_fw in dicts.featureDict[formula]]
+ newFeatures = set(tFeatures).difference(secondaryFeatures+origFeatures)
+ for fNew in newFeatures:
+ secondaryFeatures.append((fNew,args.sineWeight))
+ predictionsFeatures = features+secondaryFeatures
+ else:
+ predictionsFeatures = features
+ predictions,predictionValues = model.predict(predictionsFeatures,accessibles,dicts)
+ assert len(predictions) == len(predictionValues)
+
+ # Delete hints
+ if not hints == []:
+ if args.learnTheories:
+ theoryModels.delete('hints',features,hints,dicts)
+ if args.snow:
+ pass
+ else:
+ model.delete('hints',features,hints)
+
+ logger.info('Done. %s seconds needed.',round(time()-startTime,2))
+ # Output
+ predictionNames = [str(dicts.idNameDict[p]) for p in predictions[:args.numberOfPredictions]]
+ predictionValues = [str(x) for x in predictionValues[:args.numberOfPredictions]]
+ predictionsStringList = ['%s=%s' % (predictionNames[i],predictionValues[i]) for i in range(len(predictionNames))]
+ predictionsString = string.join(predictionsStringList,' ')
+ outString = '%s: %s' % (name,predictionsString)
+ OS.write('%s\n' % outString)
+ else:
+ logger.warning('Unspecified input format: \n%s',line)
+ sys.exit(-1)
+ lineCounter += 1
+ """
+ except:
+ logger.warning('An error occurred on line %s .',line)
+ lineCounter += 1
+ continue
+ """
+ OS.close()
+ IS.close()
+
+ # Statistics
+ if args.statistics:
+ if args.learnTheories:
+ theoryStats.printAvg()
+ stats.printAvg()
+
+ # Save
+ if args.saveModel:
+ model.save(args.modelFile)
+ if args.learnTheories:
+ theoryModels.save(args.theoryFile)
+ dicts.save(args.dictsFile)
+ if not args.saveStats == None:
+ if args.learnTheories:
+ theoryStatsFile = os.path.join(args.outputDir,'theoryStats')
+ theoryStats.save(theoryStatsFile)
+ statsFile = os.path.join(args.outputDir,args.saveStats)
+ stats.save(statsFile)
+ return 0
+
+if __name__ == '__main__':
+ # Cezary Auth
+ args = ['--statistics', '--init', '--inputDir', '../data/20130118/Jinja', '--log', '../tmp/auth.log', '--theoryFile', '../tmp/t0', '--modelFile', '../tmp/m0', '--dictsFile', '../tmp/d0','--NBDefaultPriorWeight', '20.0', '--NBDefVal', '-15.0', '--NBPosWeight', '10.0']
+ mash(args)
+ args = ['-i', '../data/20130118/Jinja/mash_commands', '-p', '../tmp/auth.pred0', '--statistics', '--cutOff', '500', '--log', '../tmp/auth.log','--modelFile', '../tmp/m0', '--dictsFile', '../tmp/d0']
+ mash(args)
+
+ #sys.exit(mash(args))
+ sys.exit(mash())
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/mashTest.py Tue Aug 20 22:24:02 2013 +0200
@@ -0,0 +1,12 @@
+'''
+Created on Aug 20, 2013
+
+@author: daniel
+'''
+from mash import mash
+
+if __name__ == "__main__":
+ args = ['--statistics', '--init', '--inputDir', '../data/20130118/Jinja', '--log', '../tmp/auth.log', '--modelFile', '../tmp/m0', '--dictsFile', '../tmp/d0','--NBDefaultPriorWeight', '20.0', '--NBDefVal', '-15.0', '--NBPosWeight', '10.0']
+ mash(args)
+ args = ['-i', '../data/20130118/Jinja/mash_commands', '-p', '../tmp/auth.pred0', '--statistics', '--cutOff', '500', '--log', '../tmp/auth.log','--modelFile', '../tmp/m0', '--dictsFile', '../tmp/d0']
+ mash(args)
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/parameters.py Tue Aug 20 22:24:02 2013 +0200
@@ -0,0 +1,46 @@
+import datetime
+from argparse import ArgumentParser,RawDescriptionHelpFormatter
+
+def init_parser(argv):
+ # Set up command-line parser
+ parser = ArgumentParser(description='MaSh - Machine Learning for Sledgehammer. \n\n\
+ MaSh allows to use different machine learning algorithms to predict relevant facts for Sledgehammer.\n\n\
+ --------------- Example Usage ---------------\n\
+ First initialize:\n./mash.py -l test.log -o ../tmp/ --init --inputDir ../data/Jinja/ \n\
+ Then create predictions:\n./mash.py -i ../data/Jinja/mash_commands -p ../data/Jinja/mash_suggestions -l test.log -o ../tmp/ --statistics\n\
+ \n\n\
+ Author: Daniel Kuehlwein, July 2012',formatter_class=RawDescriptionHelpFormatter)
+ parser.add_argument('-i','--inputFile',help='File containing all problems to be solved.')
+ parser.add_argument('-o','--outputDir', default='../tmp/',help='Directory where all created files are stored. Default=../tmp/.')
+ parser.add_argument('-p','--predictions',default='../tmp/%s.predictions' % datetime.datetime.now(),
+ help='File where the predictions stored. Default=../tmp/dateTime.predictions.')
+ parser.add_argument('--numberOfPredictions',default=500,help="Default number of premises to write in the output. Default=500.",type=int)
+
+ parser.add_argument('--init',default=False,action='store_true',help="Initialize Mash. Requires --inputDir to be defined. Default=False.")
+ parser.add_argument('--inputDir',\
+ help='Directory containing all the input data. MaSh expects the following files: mash_features,mash_dependencies,mash_accessibility')
+ parser.add_argument('--depFile', default='mash_dependencies',
+ help='Name of the file with the premise dependencies. The file must be in inputDir. Default = mash_dependencies')
+
+ parser.add_argument('--algorithm',default='nb',action='store_true',help="Which learning algorithm is used. nb = Naive Bayes,predef=predefined. Default=nb.")
+ # NB Parameters
+ parser.add_argument('--NBDefaultPriorWeight',default=20.0,help="Initializes classifiers with value * p |- p. Default=20.0.",type=float)
+ parser.add_argument('--NBDefVal',default=-15.0,help="Default value for unknown features. Default=-15.0.",type=float)
+ parser.add_argument('--NBPosWeight',default=10.0,help="Weight value for positive features. Default=10.0.",type=float)
+ # TODO: Rename to sineFeatures
+ parser.add_argument('--sineFeatures',default=False,action='store_true',help="Uses a SInE like prior for premise lvl predictions. Default=False.")
+ parser.add_argument('--sineWeight',default=0.5,help="How much the SInE prior is weighted. Default=0.5.",type=float)
+
+ parser.add_argument('--statistics',default=False,action='store_true',help="Create and show statistics for the top CUTOFF predictions.\
+ WARNING: This will make the program a lot slower! Default=False.")
+ parser.add_argument('--saveStats',default=None,help="If defined, stores the statistics in the filename provided.")
+ parser.add_argument('--cutOff',default=500,help="Option for statistics. Only consider the first cutOff predictions. Default=500.",type=int)
+ parser.add_argument('-l','--log', default='../tmp/%s.log' % datetime.datetime.now(), help='Log file name. Default=../tmp/dateTime.log')
+ parser.add_argument('-q','--quiet',default=False,action='store_true',help="If enabled, only print warnings. Default=False.")
+ parser.add_argument('--modelFile', default='../tmp/model.pickle', help='Model file name. Default=../tmp/model.pickle')
+ parser.add_argument('--dictsFile', default='../tmp/dict.pickle', help='Dict file name. Default=../tmp/dict.pickle')
+
+ parser.add_argument('--port', default='9255', help='Port of the Mash server. Default=9255',type=int)
+ parser.add_argument('--host', default='localhost', help='Host of the Mash server. Default=localhost')
+ args = parser.parse_args(argv)
+ return args
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/predefined.py Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/predefined.py Tue Aug 20 22:24:02 2013 +0200
@@ -35,7 +35,11 @@
name = line[0].strip()
predId = dicts.get_name_id(name)
line = line[1].split()
- preds = [dicts.get_name_id(x.strip())for x in line]
+ predsTmp = []
+ for x in line:
+ x = x.split('=')
+ predsTmp.append(x[0])
+ preds = [dicts.get_name_id(x.strip())for x in predsTmp]
self.predictions[predId] = preds
IS.close()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/server.py Tue Aug 20 22:24:02 2013 +0200
@@ -0,0 +1,170 @@
+#!/usr/bin/env python
+# Title: HOL/Tools/Sledgehammer/MaSh/src/server.py
+# Author: Daniel Kuehlwein, ICIS, Radboud University Nijmegen
+# Copyright 2013
+#
+# The MaSh Server.
+
+import SocketServer,os,string,logging
+from multiprocessing import Manager
+from time import time
+from dictionaries import Dictionaries
+from parameters import init_parser
+from sparseNaiveBayes import sparseNBClassifier
+from stats import Statistics
+
+
+class ThreadingTCPServer(SocketServer.ThreadingTCPServer):
+ def __init__(self, *args, **kwargs):
+ SocketServer.ThreadingTCPServer.__init__(self,*args, **kwargs)
+ self.manager = Manager()
+ self.lock = Manager().Lock()
+
+class MaShHandler(SocketServer.BaseRequestHandler):
+
+ def init(self,argv):
+ if argv == '':
+ self.server.args = init_parser([])
+ else:
+ argv = argv.split(';')
+ self.server.args = init_parser(argv)
+ # Pick model
+ if self.server.args.algorithm == 'nb':
+ self.server.model = sparseNBClassifier(self.server.args.NBDefaultPriorWeight,self.server.args.NBPosWeight,self.server.args.NBDefVal)
+ else: # Default case
+ self.server.model = sparseNBClassifier(self.server.args.NBDefaultPriorWeight,self.server.args.NBPosWeight,self.server.args.NBDefVal)
+ # Load all data
+ # TODO: rewrite dicts for concurrency and without sine
+ self.server.dicts = Dictionaries()
+ if os.path.isfile(self.server.args.dictsFile):
+ self.server.dicts.load(self.server.args.dictsFile)
+ elif self.server.args.init:
+ self.server.dicts.init_all(self.server.args)
+ # Create Model
+ if os.path.isfile(self.server.args.modelFile):
+ self.server.model.load(self.server.args.modelFile)
+ elif self.server.args.init:
+ trainData = self.server.dicts.featureDict.keys()
+ self.server.model.initializeModel(trainData,self.server.dicts)
+
+ if self.server.args.statistics:
+ self.server.stats = Statistics(self.server.args.cutOff)
+ self.server.statementCounter = 1
+ self.server.computeStats = False
+
+ # Set up logging
+ logging.basicConfig(level=logging.DEBUG,
+ format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
+ datefmt='%d-%m %H:%M:%S',
+ filename=self.server.args.log+'server',
+ filemode='w')
+ self.server.logger = logging.getLogger('server')
+ self.server.logger.debug('Initialized in '+str(round(time()-self.startTime,2))+' seconds.')
+ self.request.sendall('Server initialized in '+str(round(time()-self.startTime,2))+' seconds.')
+ self.server.callCounter = 1
+
+ def update(self):
+ problemId = self.server.dicts.parse_fact(self.data)
+ # Statistics
+ if self.server.args.statistics and self.server.computeStats:
+ self.server.computeStats = False
+ # Assume '!' comes after '?'
+ if self.server.args.algorithm == 'predef':
+ self.server.predictions = self.server.model.predict(problemId)
+ self.server.stats.update(self.server.predictions,self.server.dicts.dependenciesDict[problemId],self.server.statementCounter)
+ if not self.server.stats.badPreds == []:
+ bp = string.join([str(self.server.dicts.idNameDict[x]) for x in self.server.stats.badPreds], ',')
+ self.server.logger.debug('Poor predictions: %s',bp)
+ self.server.statementCounter += 1
+
+ # Update Dependencies, p proves p
+ self.server.dicts.dependenciesDict[problemId] = [problemId]+self.server.dicts.dependenciesDict[problemId]
+ self.server.model.update(problemId,self.server.dicts.featureDict[problemId],self.server.dicts.dependenciesDict[problemId])
+
+ def overwrite(self):
+ # Overwrite old proof.
+ problemId,newDependencies = self.server.dicts.parse_overwrite(self.data)
+ newDependencies = [problemId]+newDependencies
+ self.server.model.overwrite(problemId,newDependencies,self.server.dicts)
+ self.server.dicts.dependenciesDict[problemId] = newDependencies
+
+ def predict(self):
+ self.server.computeStats = True
+ if self.server.args.algorithm == 'predef':
+ return
+ name,features,accessibles,hints,numberOfPredictions = self.server.dicts.parse_problem(self.data)
+ if numberOfPredictions == None:
+ numberOfPredictions = self.server.args.numberOfPredictions
+ if not hints == []:
+ self.server.model.update('hints',features,hints)
+
+ # Create predictions
+ self.server.logger.debug('Starting computation for line %s',self.server.callCounter)
+ predictionsFeatures = features
+ self.server.predictions,predictionValues = self.server.model.predict(predictionsFeatures,accessibles,self.server.dicts)
+ assert len(self.server.predictions) == len(predictionValues)
+ self.server.logger.debug('Time needed: '+str(round(time()-self.startTime,2)))
+
+ # Output
+ predictionNames = [str(self.server.dicts.idNameDict[p]) for p in self.server.predictions[:numberOfPredictions]]
+ predictionValues = [str(x) for x in predictionValues[:self.server.args.numberOfPredictions]]
+ predictionsStringList = ['%s=%s' % (predictionNames[i],predictionValues[i]) for i in range(len(predictionNames))]
+ predictionsString = string.join(predictionsStringList,' ')
+ outString = '%s: %s' % (name,predictionsString)
+ self.request.sendall(outString)
+
+ def shutdown(self):
+ self.request.sendall('Shutting down server.')
+ # Save Models
+ self.server.model.save(self.server.args.modelFile)
+ self.server.dicts.save(self.server.args.dictsFile)
+ if not self.server.args.saveStats == None:
+ statsFile = os.path.join(self.server.args.outputDir,self.server.args.saveStats)
+ self.server.stats.save(statsFile)
+ self.server.shutdown()
+
+ def handle(self):
+ # self.request is the TCP socket connected to the client
+ self.data = self.request.recv(262144).strip()
+ #print "{} wrote:".format(self.client_address[0])
+ #print self.data
+ self.startTime = time()
+ if self.data == 's':
+ self.shutdown()
+ elif self.data.startswith('i'):
+ self.init(self.data[2:])
+ elif self.data.startswith('!'):
+ self.update()
+ elif self.data.startswith('p'):
+ self.overwrite()
+ elif self.data.startswith('?'):
+ self.predict()
+ elif self.data == '':
+ # Empty Socket
+ return
+ elif self.data == 'avgStats':
+ self.request.sendall(self.server.stats.printAvg())
+ else:
+ self.request.sendall('Unspecified input format: \n%s',self.data)
+ self.server.callCounter += 1
+
+ #returnString = 'Time needed: '+str(round(time()-self.startTime,2))
+ #print returnString
+
+if __name__ == "__main__":
+ HOST, PORT = "localhost", 9255
+ #print 'Started Server'
+ # Create the server, binding to localhost on port 9999
+ SocketServer.TCPServer.allow_reuse_address = True
+ server = ThreadingTCPServer((HOST, PORT), MaShHandler)
+ #server = SocketServer.TCPServer((HOST, PORT), MaShHandler)
+
+ # Activate the server; this will keep running until you
+ # interrupt the program with Ctrl-C
+ server.serve_forever()
+
+
+
+
+
+
\ No newline at end of file
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/snow.py Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/snow.py Tue Aug 20 22:24:02 2013 +0200
@@ -27,7 +27,7 @@
self.SNoWTrainFile = '../tmp/snow.train'
self.SNoWTestFile = '../snow.test'
self.SNoWNetFile = '../tmp/snow.net'
- self.defMaxNameId = 20000
+ self.defMaxNameId = 100000
def initializeModel(self,trainData,dicts):
"""
@@ -51,6 +51,7 @@
# Build Model
self.logger.debug('Building Model START.')
snowTrainCommand = '../bin/snow -train -M+ -I %s -F %s -g- -B :0-%s' % (self.SNoWTrainFile,self.SNoWNetFile,dicts.maxNameId-1)
+ #print snowTrainCommand
#snowTrainCommand = '../bin/snow -train -M+ -I %s -F %s -g- -B :0-%s' % (self.SNoWTrainFile,self.SNoWNetFile,self.defMaxNameId-1)
args = shlex.split(snowTrainCommand)
p = subprocess.Popen(args,stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/sparseNaiveBayes.py Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/sparseNaiveBayes.py Tue Aug 20 22:24:02 2013 +0200
@@ -95,6 +95,7 @@
For each accessible, predicts the probability of it being useful given the features.
Returns a ranking of the accessibles.
"""
+ tau = 0.01 # Jasmin, change value here
predictions = []
for a in accessibles:
posA = self.counts[a][0]
@@ -112,6 +113,11 @@
resultA += w*log(float(self.posWeight*fWeightsA[f])/posA)
else:
resultA += w*self.defVal
+ if not tau == 0.0:
+ observedFeatures = [f for f,_w in features]
+ missingFeatures = list(fA.difference(observedFeatures))
+ sumOfWeights = sum([log(float(fWeightsA[x])/posA) for x in missingFeatures])
+ resultA -= tau * sumOfWeights
predictions.append(resultA)
predictions = array(predictions)
perm = (-predictions).argsort()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/spawnDaemon.py Tue Aug 20 22:24:02 2013 +0200
@@ -0,0 +1,35 @@
+# http://stackoverflow.com/questions/972362/spawning-process-from-python/972383#972383
+import os
+
+def spawnDaemon(path_to_executable, *args):
+ """Spawn a completely detached subprocess (i.e., a daemon).
+
+ E.g. for mark:
+ spawnDaemon("../bin/producenotify.py", "producenotify.py", "xx")
+ """
+ # fork the first time (to make a non-session-leader child process)
+ try:
+ pid = os.fork()
+ except OSError, e:
+ raise RuntimeError("1st fork failed: %s [%d]" % (e.strerror, e.errno))
+ if pid != 0:
+ # parent (calling) process is all done
+ return
+
+ # detach from controlling terminal (to make child a session-leader)
+ os.setsid()
+ try:
+ pid = os.fork()
+ except OSError, e:
+ raise RuntimeError("2nd fork failed: %s [%d]" % (e.strerror, e.errno))
+ raise Exception, "%s [%d]" % (e.strerror, e.errno)
+ if pid != 0:
+ # child process is all done
+ os._exit(0)
+
+ # and finally let's execute the executable for the daemon!
+ try:
+ os.execv(path_to_executable, [path_to_executable])
+ except Exception, e:
+ # oops, we're cut off from the world, let's just give up
+ os._exit(255)
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/stats.py Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/stats.py Tue Aug 20 22:24:02 2013 +0200
@@ -30,6 +30,7 @@
self.problems = 0.0
self.cutOff = cutOff
self.recallData = [0]*cutOff
+ self.recall100Median = []
self.recall100Data = [0]*cutOff
self.aucData = []
self.premiseOccurenceCounter = {}
@@ -107,6 +108,7 @@
self.aucData.append(auc)
self.avgAUC += auc
self.avgRecall100 += recall100
+ self.recall100Median.append(recall100)
self.problems += 1
self.badPreds = badPreds
self.avgAvailable += available
@@ -116,8 +118,29 @@
def printAvg(self):
self.logger.info('Average results:')
- self.logger.info('avgAUC: %s \t avgDepNr: %s \t avgRecall100: %s \t cutOff:%s', \
- round(100*self.avgAUC/self.problems,2),round(self.avgDepNr/self.problems,2),round(self.avgRecall100/self.problems,2),self.cutOff)
+ #self.logger.info('avgAUC: %s \t avgDepNr: %s \t avgRecall100: %s \t cutOff:%s', \
+ # round(100*self.avgAUC/self.problems,2),round(self.avgDepNr/self.problems,2),round(self.avgRecall100/self.problems,2),self.cutOff)
+ # HACK FOR PAPER
+ assert len(self.aucData) == len(self.recall100Median)
+ nrDataPoints = len(self.aucData)
+ if nrDataPoints % 2 == 1:
+ medianAUC = sorted(self.aucData)[nrDataPoints/2 + 1]
+ else:
+ medianAUC = float(sorted(self.aucData)[nrDataPoints/2] + sorted(self.aucData)[nrDataPoints/2 + 1])/2
+ #nrDataPoints = len(self.recall100Median)
+ if nrDataPoints % 2 == 1:
+ medianrecall100 = sorted(self.recall100Median)[nrDataPoints/2 + 1]
+ else:
+ medianrecall100 = float(sorted(self.recall100Median)[nrDataPoints/2] + sorted(self.recall100Median)[nrDataPoints/2 + 1])/2
+
+ returnString = 'avgAUC: %s \t medianAUC: %s \t avgDepNr: %s \t avgRecall100: %s \t medianRecall100: %s \t cutOff:%s' %\
+ (round(100*self.avgAUC/self.problems,2),round(100*medianAUC,2),round(self.avgDepNr/self.problems,2),round(self.avgRecall100/self.problems,2),round(medianrecall100,2),self.cutOff)
+ self.logger.info(returnString)
+ return returnString
+
+ """
+ self.logger.info('avgAUC: %s \t medianAUC: %s \t avgDepNr: %s \t avgRecall100: %s \t medianRecall100: %s \t cutOff:%s', \
+ round(100*self.avgAUC/self.problems,2),round(100*medianAUC,2),round(self.avgDepNr/self.problems,2),round(self.avgRecall100/self.problems,2),round(medianrecall100,2),self.cutOff)
#try:
#if True:
@@ -156,6 +179,7 @@
show()
#except:
# self.logger.warning('Matplotlib module missing. Skipping graphs.')
+ """
def save(self,fileName):
oStream = open(fileName, 'wb')
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/tester.py Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/tester.py Tue Aug 20 22:24:02 2013 +0200
@@ -16,29 +16,39 @@
#print '%s says that %s%s = %s' % (current_process().name, func.__name__, args, result)
outQueue.put(result)
-def run_mash(runId,inputDir,logFile,predictionFile,\
+def run_mash(runId,inputDir,logFile,predictionFile,predef,\
learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
- sineFeatures,sineWeight):
+ sineFeatures,sineWeight,quiet=True):
# Init
runId = str(runId)
predictionFile = predictionFile + runId
- args = ['--statistics','--init','--inputDir',inputDir,'-q','--log',logFile,'--theoryFile','../tmp/t'+runId,'--modelFile','../tmp/m'+runId,'--dictsFile','../tmp/d'+runId,
+ args = ['--statistics','--init','--inputDir',inputDir,'--log',logFile,'--theoryFile','../tmp/t'+runId,'--modelFile','../tmp/m'+runId,'--dictsFile','../tmp/d'+runId,
'--theoryDefValPos',str(theoryDefValPos),'--theoryDefValNeg',str(theoryDefValNeg),'--theoryPosWeight',str(theoryPosWeight),\
'--NBDefaultPriorWeight',str(NBDefaultPriorWeight),'--NBDefVal',str(NBDefVal),'--NBPosWeight',str(NBPosWeight)]
if learnTheories:
- args = args + ['--learnTheories']
+ args += ['--learnTheories']
if sineFeatures:
args += ['--sineFeatures','--sineWeight',str(sineWeight)]
+ if not predef == '':
+ args += ['--predef',predef]
+ if quit:
+ args += ['-q']
+ #print args
mash(args)
# Run
- args = ['-q','-i',inputFile,'-p',predictionFile,'--statistics','--cutOff','500','--log',logFile,'--theoryFile','../tmp/t'+runId,'--modelFile','../tmp/m'+runId,'--dictsFile','../tmp/d'+runId,\
+ args = ['-i',inputFile,'-p',predictionFile,'--statistics','--cutOff','1024','--log',logFile,'--theoryFile','../tmp/t'+runId,'--modelFile','../tmp/m'+runId,'--dictsFile','../tmp/d'+runId,\
'--theoryDefValPos',str(theoryDefValPos),'--theoryDefValNeg',str(theoryDefValNeg),'--theoryPosWeight',str(theoryPosWeight),\
'--NBDefaultPriorWeight',str(NBDefaultPriorWeight),'--NBDefVal',str(NBDefVal),'--NBPosWeight',str(NBPosWeight)]
if learnTheories:
- args = args + ['--learnTheories']
+ args += ['--learnTheories']
if sineFeatures:
args += ['--sineFeatures','--sineWeight',str(sineWeight)]
+ if not predef == '':
+ args += ['--predef',predef]
+ if quit:
+ args += ['-q']
+ #print args
mash(args)
# Get Results
@@ -46,29 +56,42 @@
lines = IS.readlines()
tmpRes = lines[-1].split()
avgAuc = tmpRes[5]
- avgRecall100 = tmpRes[9]
+ medianAuc = tmpRes[7]
+ avgRecall100 = tmpRes[11]
+ medianRecall100 = tmpRes[13]
tmpTheoryRes = lines[-3].split()
- avgTheoryPrecision = tmpTheoryRes[5]
- avgTheoryRecall100 = tmpTheoryRes[7]
- avgTheoryRecall = tmpTheoryRes[9]
- avgTheoryPredictedPercent = tmpTheoryRes[11]
+ if learnTheories:
+ avgTheoryPrecision = tmpTheoryRes[5]
+ avgTheoryRecall100 = tmpTheoryRes[7]
+ avgTheoryRecall = tmpTheoryRes[9]
+ avgTheoryPredictedPercent = tmpTheoryRes[11]
+ else:
+ avgTheoryPrecision = 'NA'
+ avgTheoryRecall100 = 'NA'
+ avgTheoryRecall = 'NA'
+ avgTheoryPredictedPercent = 'NA'
IS.close()
# Delete old models
os.remove(logFile)
os.remove(predictionFile)
- os.remove('../tmp/t'+runId)
+ if learnTheories:
+ os.remove('../tmp/t'+runId)
os.remove('../tmp/m'+runId)
os.remove('../tmp/d'+runId)
outFile = open('tester','a')
#print 'avgAuc %s avgRecall100 %s avgTheoryPrecision %s avgTheoryRecall100 %s avgTheoryRecall %s avgTheoryPredictedPercent %s'
- outFile.write('\t'.join([str(learnTheories),str(theoryDefValPos),str(theoryDefValNeg),str(theoryPosWeight),str(NBDefaultPriorWeight),str(NBDefVal),str(NBPosWeight),str(sineFeatures),str(sineWeight),str(avgAuc),str(avgRecall100),str(avgTheoryPrecision),str(avgTheoryRecall100),str(avgTheoryRecall),str(avgTheoryPredictedPercent)])+'\n')
+ outFile.write('\t'.join([str(learnTheories),str(theoryDefValPos),str(theoryDefValNeg),str(theoryPosWeight),\
+ str(NBDefaultPriorWeight),str(NBDefVal),str(NBPosWeight),str(sineFeatures),str(sineWeight),\
+ str(avgAuc),str(medianAuc),str(avgRecall100),str(medianRecall100),str(avgTheoryPrecision),\
+ str(avgTheoryRecall100),str(avgTheoryRecall),str(avgTheoryPredictedPercent)])+'\n')
outFile.close()
print learnTheories,'\t',theoryDefValPos,'\t',theoryDefValNeg,'\t',theoryPosWeight,'\t',\
NBDefaultPriorWeight,'\t',NBDefVal,'\t',NBPosWeight,'\t',\
sineFeatures,'\t',sineWeight,'\t',\
- avgAuc,'\t',avgRecall100,'\t',avgTheoryPrecision,'\t',avgTheoryRecall100,'\t',avgTheoryRecall,'\t',avgTheoryPredictedPercent
+ avgAuc,'\t',medianAuc,'\t',avgRecall100,'\t',medianRecall100,'\t',\
+ avgTheoryPrecision,'\t',avgTheoryRecall100,'\t',avgTheoryRecall,'\t',avgTheoryPredictedPercent
return learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
sineFeatures,sineWeight,\
@@ -93,8 +116,9 @@
#cores = 1
# Options
depFile = 'mash_dependencies'
+ predef = ''
outputDir = '../tmp/'
- numberOfPredictions = 500
+ numberOfPredictions = 1024
learnTheoriesRange = [True,False]
theoryDefValPosRange = [-x for x in range(1,20)]
@@ -107,6 +131,7 @@
sineFeaturesRange = [True,False]
sineWeightRange = [0.1,0.25,0.5,0.75,1.0]
+ """
# Test 1
inputFile = '../data/20121227b/Auth/mash_commands'
inputDir = '../data/20121227b/Auth/'
@@ -179,4 +204,78 @@
NBDefaultPriorWeight,NBDefVal,NBPosWeight,sineFeatures,sineWeight,\
learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight)
print 'bestAvgRecall100 %s bestNBDefaultPriorWeight %s bestNBDefVal %s bestNBPosWeight %s bestSineFeatures %s bestSineWeight %s',bestAvgRecall100,bestNBDefaultPriorWeight,bestNBDefVal,bestNBPosWeight,bestSineFeatures,bestSineWeight
-
\ No newline at end of file
+
+ """
+ # Test 2
+ #inputDir = '../data/20130118/Jinja'
+ inputDir = '../data/notheory/Prob'
+ inputFile = inputDir+'/mash_commands'
+ #inputFile = inputDir+'/mash_prover_commands'
+
+ #depFile = 'mash_prover_dependencies'
+ depFile = 'mash_dependencies'
+ outputDir = '../tmp/'
+ numberOfPredictions = 1024
+ predictionFile = '../tmp/auth.pred'
+ logFile = '../tmp/auth.log'
+ learnTheories = False
+ theoryDefValPos = -7.5
+ theoryDefValNeg = -10.0
+ theoryPosWeight = 2.0
+ NBDefaultPriorWeight = 20.0
+ NBDefVal =- 15.0
+ NBPosWeight = 10.0
+ sineFeatures = False
+ sineWeight = 0.5
+ quiet = False
+ print inputDir
+
+ #predef = inputDir+'/mash_prover_suggestions'
+ predef = inputDir+'/mash_suggestions'
+ print 'Mash Isar'
+ run_mash(0,inputDir,logFile,predictionFile,predef,\
+ learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
+ NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
+ sineFeatures,sineWeight,quiet=quiet)
+
+ #"""
+ predef = inputDir+'/mesh_suggestions'
+ #predef = inputDir+'/mesh_prover_suggestions'
+ print 'Mesh Isar'
+ run_mash(0,inputDir,logFile,predictionFile,predef,\
+ learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
+ NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
+ sineFeatures,sineWeight,quiet=quiet)
+ #"""
+ predef = inputDir+'/mepo_suggestions'
+ print 'Mepo Isar'
+ run_mash(0,inputDir,logFile,predictionFile,predef,\
+ learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
+ NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
+ sineFeatures,sineWeight,quiet=quiet)
+
+ """
+ inputFile = inputDir+'/mash_prover_commands'
+ depFile = 'mash_prover_dependencies'
+
+ predef = inputDir+'/mash_prover_suggestions'
+ print 'Mash Prover Isar'
+ run_mash(0,inputDir,logFile,predictionFile,predef,\
+ learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
+ NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
+ sineFeatures,sineWeight,quiet=quiet)
+
+ predef = inputDir+'/mesh_prover_suggestions'
+ print 'Mesh Prover Isar'
+ run_mash(0,inputDir,logFile,predictionFile,predef,\
+ learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
+ NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
+ sineFeatures,sineWeight,quiet=quiet)
+
+ predef = inputDir+'/mepo_suggestions'
+ print 'Mepo Prover Isar'
+ run_mash(0,inputDir,logFile,predictionFile,predef,\
+ learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
+ NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
+ sineFeatures,sineWeight,quiet=quiet)
+ #"""
\ No newline at end of file
--- a/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML Tue Aug 20 22:24:02 2013 +0200
@@ -43,8 +43,10 @@
val relearn :
Proof.context -> bool -> (string * string list) list -> unit
val query :
- Proof.context -> bool -> bool -> int
- -> string list * (string * real) list * string list -> string list
+ Proof.context -> bool -> int
+ -> (string * string list * (string * real) list * string list) list
+ * string list * string list * (string * real) list
+ -> string list
end
val mash_unlearn : Proof.context -> unit
@@ -69,6 +71,8 @@
Proof.context -> params -> string -> int -> raw_fact list
-> string Symtab.table * string Symtab.table -> thm
-> bool * string list
+ val attach_parents_to_facts :
+ ('a * thm) list -> ('a * thm) list -> (string list * ('a * thm)) list
val weight_mepo_facts : 'a list -> ('a * real) list
val weight_mash_facts : 'a list -> ('a * real) list
val find_mash_suggestions :
@@ -80,8 +84,6 @@
val mash_learn_proof :
Proof.context -> params -> string -> term -> ('a * thm) list -> thm list
-> unit
- val attach_parents_to_facts :
- ('a * thm) list -> ('a * thm) list -> (string list * ('a * thm)) list
val mash_learn :
Proof.context -> params -> fact_override -> thm list -> bool -> unit
val is_mash_enabled : unit -> bool
@@ -140,11 +142,10 @@
fun write_file banner (xs, f) path =
(case banner of SOME s => File.write path s | NONE => ();
- xs |> chunk_list 500
- |> List.app (File.append path o space_implode "" o map f))
+ xs |> chunk_list 500 |> List.app (File.append path o implode o map f))
handle IO.Io _ => ()
-fun run_mash_tool ctxt overlord save max_suggs write_cmds read_suggs =
+fun run_mash_tool ctxt overlord write_cmds read_suggs =
let
val (temp_dir, serial) =
if overlord then (getenv "ISABELLE_HOME_USER", "")
@@ -156,17 +157,13 @@
val cmd_file = temp_dir ^ "/mash_commands" ^ serial
val cmd_path = Path.explode cmd_file
val model_dir = File.shell_path (mash_model_dir ())
- val core =
- "--inputFile " ^ cmd_file ^ " --predictions " ^ sugg_file ^
- " --numberOfPredictions " ^ string_of_int max_suggs ^
- (* " --learnTheories" ^ *) (if save then " --saveModel" else "")
+ val core = "--inputFile " ^ cmd_file ^ " --predictions " ^ sugg_file
val command =
"cd \"$ISABELLE_SLEDGEHAMMER_MASH\"/src; " ^
"./mash.py --quiet" ^
" --outputDir " ^ model_dir ^
" --modelFile=" ^ model_dir ^ "/model.pickle" ^
" --dictsFile=" ^ model_dir ^ "/dict.pickle" ^
- " --theoryFile=" ^ model_dir ^ "/theory.pickle" ^
" --log " ^ log_file ^ " " ^ core ^
" >& " ^ err_file
fun run_on () =
@@ -226,15 +223,13 @@
fun str_of_relearn (name, deps) =
"p " ^ encode_str name ^ ": " ^ encode_strs deps ^ "\n"
-fun str_of_query learn_hints (parents, feats, hints) =
- (if not learn_hints orelse null hints then ""
- else str_of_learn (freshish_name (), parents, feats, hints)) ^
- "? " ^ encode_strs parents ^ "; " ^ encode_features feats ^
- (if learn_hints orelse null hints then "" else "; " ^ encode_strs hints) ^
- "\n"
+fun str_of_query max_suggs (learns, hints, parents, feats) =
+ implode (map str_of_learn learns) ^
+ "? " ^ string_of_int max_suggs ^ " # " ^ encode_strs parents ^ "; " ^
+ encode_features feats ^
+ (if null hints then "" else "; " ^ encode_strs hints) ^ "\n"
-(* The weights currently returned by "mash.py" are too spaced out to make any
- sense. *)
+(* The suggested weights don't make much sense. *)
fun extract_suggestion sugg =
case space_explode "=" sugg of
[name, _ (* weight *)] =>
@@ -264,18 +259,17 @@
| learn ctxt overlord learns =
(trace_msg ctxt (fn () => "MaSh learn " ^
elide_string 1000 (space_implode " " (map #1 learns)));
- run_mash_tool ctxt overlord true 0 (learns, str_of_learn) (K ()))
+ run_mash_tool ctxt overlord (learns, str_of_learn) (K ()))
fun relearn _ _ [] = ()
| relearn ctxt overlord relearns =
(trace_msg ctxt (fn () => "MaSh relearn " ^
elide_string 1000 (space_implode " " (map #1 relearns)));
- run_mash_tool ctxt overlord true 0 (relearns, str_of_relearn) (K ()))
+ run_mash_tool ctxt overlord (relearns, str_of_relearn) (K ()))
-fun query ctxt overlord learn_hints max_suggs (query as (_, feats, hints)) =
+fun query ctxt overlord max_suggs (query as (learns, hints, _, feats)) =
(trace_msg ctxt (fn () => "MaSh query " ^ encode_features feats);
- run_mash_tool ctxt overlord (learn_hints andalso not (null hints))
- max_suggs ([query], str_of_query learn_hints)
+ run_mash_tool ctxt overlord ([query], str_of_query max_suggs)
(fn suggs =>
case suggs () of
[] => []
@@ -329,15 +323,18 @@
string_of_int (length (Graph.minimals G)) ^ " minimal, " ^
string_of_int (length (Graph.maximals G)) ^ " maximal"
-type mash_state = {access_G : unit Graph.T, dirty : string list option}
+type mash_state =
+ {access_G : unit Graph.T,
+ num_known_facts : int,
+ dirty : string list option}
-val empty_state = {access_G = Graph.empty, dirty = SOME []}
+val empty_state = {access_G = Graph.empty, num_known_facts = 0, dirty = SOME []}
local
-val version = "*** MaSh version 20130819a ***"
+val version = "*** MaSh version 20130820 ***"
-exception Too_New of unit
+exception FILE_VERSION_TOO_NEW of unit
fun extract_node line =
case space_explode ":" line of
@@ -365,24 +362,27 @@
| SOME (name, parents, kind) =>
update_access_graph_node (name, kind)
#> fold (add_edge_to name) parents
- val access_G =
+ val (access_G, num_known_facts) =
case string_ord (version', version) of
EQUAL =>
- try_graph ctxt "loading state" Graph.empty (fn () =>
- fold add_node node_lines Graph.empty)
+ (try_graph ctxt "loading state" Graph.empty (fn () =>
+ fold add_node node_lines Graph.empty),
+ length node_lines)
| LESS =>
- (MaSh.unlearn ctxt; Graph.empty) (* can't parse old file *)
- | GREATER => raise Too_New ()
+ (* can't parse old file *)
+ (MaSh.unlearn ctxt; (Graph.empty, 0))
+ | GREATER => raise FILE_VERSION_TOO_NEW ()
in
trace_msg ctxt (fn () =>
"Loaded fact graph (" ^ graph_info access_G ^ ")");
- {access_G = access_G, dirty = SOME []}
+ {access_G = access_G, num_known_facts = num_known_facts,
+ dirty = SOME []}
end
| _ => empty_state)
end
fun save _ (state as {dirty = SOME [], ...}) = state
- | save ctxt {access_G, dirty} =
+ | save ctxt {access_G, num_known_facts, dirty} =
let
fun str_of_entry (name, parents, kind) =
str_of_proof_kind kind ^ " " ^ encode_str name ^ ": " ^
@@ -402,7 +402,7 @@
SOME dirty =>
"; " ^ string_of_int (length dirty) ^ " dirty fact(s)"
| _ => "") ^ ")");
- {access_G = access_G, dirty = SOME []}
+ {access_G = access_G, num_known_facts = num_known_facts, dirty = SOME []}
end
val global_state =
@@ -412,7 +412,7 @@
fun map_state ctxt f =
Synchronized.change global_state (load ctxt ##> (f #> save ctxt))
- handle Too_New () => ()
+ handle FILE_VERSION_TOO_NEW () => ()
fun peek_state ctxt f =
Synchronized.change_result global_state
@@ -717,6 +717,9 @@
| NONE => false)
| is_size_def _ _ = false
+fun no_dependencies_for_status status =
+ status = Non_Rec_Def orelse status = Rec_Def
+
fun trim_dependencies deps =
if length deps > max_dependencies then NONE else SOME deps
@@ -784,159 +787,9 @@
(*** High-level communication with MaSh ***)
-fun maximal_wrt_graph G keys =
- let
- val tab = Symtab.empty |> fold (fn name => Symtab.default (name, ())) keys
- fun insert_new seen name =
- not (Symtab.defined seen name) ? insert (op =) name
- fun num_keys keys = Graph.Keys.fold (K (Integer.add 1)) keys 0
- fun find_maxes _ (maxs, []) = map snd maxs
- | find_maxes seen (maxs, new :: news) =
- find_maxes
- (seen |> num_keys (Graph.imm_succs G new) > 1
- ? Symtab.default (new, ()))
- (if Symtab.defined tab new then
- let
- val newp = Graph.all_preds G [new]
- fun is_ancestor x yp = member (op =) yp x
- val maxs =
- maxs |> filter (fn (_, max) => not (is_ancestor max newp))
- in
- if exists (is_ancestor new o fst) maxs then
- (maxs, news)
- else
- ((newp, new)
- :: filter_out (fn (_, max) => is_ancestor max newp) maxs,
- news)
- end
- else
- (maxs, Graph.Keys.fold (insert_new seen)
- (Graph.imm_preds G new) news))
- in find_maxes Symtab.empty ([], Graph.maximals G) end
-
-fun maximal_wrt_access_graph access_G =
- map (nickname_of_thm o snd)
- #> maximal_wrt_graph access_G
-
-fun is_fact_in_graph access_G get_th fact =
- can (Graph.get_node access_G) (nickname_of_thm (get_th fact))
-
-(* FUDGE *)
-fun weight_of_mepo_fact rank =
- Math.pow (0.62, log2 (Real.fromInt (rank + 1)))
-
-fun weight_mepo_facts facts =
- facts ~~ map weight_of_mepo_fact (0 upto length facts - 1)
-
-val weight_raw_mash_facts = weight_mepo_facts
-val weight_mash_facts = weight_raw_mash_facts
-
-(* FUDGE *)
-fun weight_of_proximity_fact rank =
- Math.pow (1.3, 15.5 - 0.2 * Real.fromInt rank) + 15.0
-
-fun weight_proximity_facts facts =
- facts ~~ map weight_of_proximity_fact (0 upto length facts - 1)
-
-val max_proximity_facts = 100
-
-fun find_mash_suggestions _ _ [] _ _ raw_unknown = ([], raw_unknown)
- | find_mash_suggestions ctxt max_facts suggs facts chained raw_unknown =
- let
- val raw_mash = find_suggested_facts ctxt facts suggs
- val unknown_chained =
- inter (Thm.eq_thm_prop o pairself snd) chained raw_unknown
- val proximity =
- facts |> sort (crude_thm_ord o pairself snd o swap)
- |> take max_proximity_facts
- val mess =
- [(0.90 (* FUDGE *), (map (rpair 1.0) unknown_chained, [])),
- (0.08 (* FUDGE *), (weight_raw_mash_facts raw_mash, raw_unknown)),
- (0.02 (* FUDGE *), (weight_proximity_facts proximity, []))]
- val unknown =
- raw_unknown
- |> fold (subtract (Thm.eq_thm_prop o pairself snd))
- [unknown_chained, proximity]
- in (mesh_facts (Thm.eq_thm_prop o pairself snd) max_facts mess, unknown) end
-
-fun mash_suggested_facts ctxt ({overlord, learn, ...} : params) prover max_facts
- hyp_ts concl_t facts =
- let
- val thy = Proof_Context.theory_of ctxt
- val chained = facts |> filter (fn ((_, (scope, _)), _) => scope = Chained)
- val (access_G, suggs) =
- peek_state ctxt (fn {access_G, ...} =>
- if Graph.is_empty access_G then
- (access_G, [])
- else
- let
- val parents = maximal_wrt_access_graph access_G facts
- val feats =
- features_of ctxt prover thy (Local, General) (concl_t :: hyp_ts)
- val hints =
- chained |> filter (is_fact_in_graph access_G snd)
- |> map (nickname_of_thm o snd)
- in
- (access_G, MaSh.query ctxt overlord learn max_facts
- (parents, feats, hints))
- end)
- val unknown = facts |> filter_out (is_fact_in_graph access_G snd)
- in
- find_mash_suggestions ctxt max_facts suggs facts chained unknown
- |> pairself (map fact_of_raw_fact)
- end
-
-fun learn_wrt_access_graph ctxt (name, parents, feats, deps) (learns, graph) =
- let
- fun maybe_learn_from from (accum as (parents, graph)) =
- try_graph ctxt "updating graph" accum (fn () =>
- (from :: parents, Graph.add_edge_acyclic (from, name) graph))
- val graph = graph |> Graph.default_node (name, Isar_Proof)
- val (parents, graph) = ([], graph) |> fold maybe_learn_from parents
- val (deps, _) = ([], graph) |> fold maybe_learn_from deps
- in ((name, parents, feats, deps) :: learns, graph) end
-
-fun relearn_wrt_access_graph ctxt (name, deps) (relearns, graph) =
- let
- fun maybe_relearn_from from (accum as (parents, graph)) =
- try_graph ctxt "updating graph" accum (fn () =>
- (from :: parents, Graph.add_edge_acyclic (from, name) graph))
- val graph = graph |> update_access_graph_node (name, Automatic_Proof)
- val (deps, _) = ([], graph) |> fold maybe_relearn_from deps
- in ((name, deps) :: relearns, graph) end
-
-fun flop_wrt_access_graph name =
- update_access_graph_node (name, Isar_Proof_wegen_Prover_Flop)
-
-val learn_timeout_slack = 2.0
-
-fun launch_thread timeout task =
- let
- val hard_timeout = time_mult learn_timeout_slack timeout
- val birth_time = Time.now ()
- val death_time = Time.+ (birth_time, hard_timeout)
- val desc = ("Machine learner for Sledgehammer", "")
- in Async_Manager.thread MaShN birth_time death_time desc task end
-
-fun mash_learn_proof ctxt ({overlord, timeout, ...} : params) prover t facts
- used_ths =
- launch_thread (timeout |> the_default one_day) (fn () =>
- let
- val thy = Proof_Context.theory_of ctxt
- val name = freshish_name ()
- val feats = features_of ctxt prover thy (Local, General) [t]
- in
- peek_state ctxt (fn {access_G, ...} =>
- let
- val parents = maximal_wrt_access_graph access_G facts
- val deps =
- used_ths |> filter (is_fact_in_graph access_G I)
- |> map nickname_of_thm
- in
- MaSh.learn ctxt overlord [(name, parents, feats, deps)]
- end);
- (true, "")
- end)
+fun attach_crude_parents_to_facts _ [] = []
+ | attach_crude_parents_to_facts parents ((fact as (_, th)) :: facts) =
+ (parents, fact) :: attach_crude_parents_to_facts [nickname_of_thm th] facts
(* In the following functions, chunks are risers w.r.t. "thm_less_eq". *)
@@ -988,6 +841,191 @@
|> drop (length old_facts)
end
+fun maximal_wrt_graph G keys =
+ let
+ val tab = Symtab.empty |> fold (fn name => Symtab.default (name, ())) keys
+ fun insert_new seen name =
+ not (Symtab.defined seen name) ? insert (op =) name
+ fun num_keys keys = Graph.Keys.fold (K (Integer.add 1)) keys 0
+ fun find_maxes _ (maxs, []) = map snd maxs
+ | find_maxes seen (maxs, new :: news) =
+ find_maxes
+ (seen |> num_keys (Graph.imm_succs G new) > 1
+ ? Symtab.default (new, ()))
+ (if Symtab.defined tab new then
+ let
+ val newp = Graph.all_preds G [new]
+ fun is_ancestor x yp = member (op =) yp x
+ val maxs =
+ maxs |> filter (fn (_, max) => not (is_ancestor max newp))
+ in
+ if exists (is_ancestor new o fst) maxs then
+ (maxs, news)
+ else
+ ((newp, new)
+ :: filter_out (fn (_, max) => is_ancestor max newp) maxs,
+ news)
+ end
+ else
+ (maxs, Graph.Keys.fold (insert_new seen)
+ (Graph.imm_preds G new) news))
+ in find_maxes Symtab.empty ([], Graph.maximals G) end
+
+fun maximal_wrt_access_graph access_G =
+ map (nickname_of_thm o snd)
+ #> maximal_wrt_graph access_G
+
+fun is_fact_in_graph access_G = can (Graph.get_node access_G) o nickname_of_thm
+
+(* FUDGE *)
+fun weight_of_mepo_fact rank =
+ Math.pow (0.62, log2 (Real.fromInt (rank + 1)))
+
+fun weight_mepo_facts facts =
+ facts ~~ map weight_of_mepo_fact (0 upto length facts - 1)
+
+val weight_raw_mash_facts = weight_mepo_facts
+val weight_mash_facts = weight_raw_mash_facts
+
+(* FUDGE *)
+fun weight_of_proximity_fact rank =
+ Math.pow (1.3, 15.5 - 0.2 * Real.fromInt rank) + 15.0
+
+fun weight_proximity_facts facts =
+ facts ~~ map weight_of_proximity_fact (0 upto length facts - 1)
+
+val max_proximity_facts = 100
+
+fun find_mash_suggestions _ _ [] _ _ raw_unknown = ([], raw_unknown)
+ | find_mash_suggestions ctxt max_facts suggs facts chained raw_unknown =
+ let
+ val raw_mash = find_suggested_facts ctxt facts suggs
+ val unknown_chained =
+ inter (Thm.eq_thm_prop o pairself snd) chained raw_unknown
+ val proximity =
+ facts |> sort (crude_thm_ord o pairself snd o swap)
+ |> take max_proximity_facts
+ val mess =
+ [(0.90 (* FUDGE *), (map (rpair 1.0) unknown_chained, [])),
+ (0.08 (* FUDGE *), (weight_raw_mash_facts raw_mash, raw_unknown)),
+ (0.02 (* FUDGE *), (weight_proximity_facts proximity, []))]
+ val unknown =
+ raw_unknown
+ |> fold (subtract (Thm.eq_thm_prop o pairself snd))
+ [unknown_chained, proximity]
+ in (mesh_facts (Thm.eq_thm_prop o pairself snd) max_facts mess, unknown) end
+
+val max_learn_on_query = 500
+
+fun mash_suggested_facts ctxt ({overlord, learn, ...} : params) prover max_facts
+ hyp_ts concl_t facts =
+ let
+ val thy = Proof_Context.theory_of ctxt
+ val chained = facts |> filter (fn ((_, (scope, _)), _) => scope = Chained)
+ val (access_G, suggs) =
+ peek_state ctxt (fn {access_G, num_known_facts, ...} =>
+ if Graph.is_empty access_G then
+ (access_G, [])
+ else
+ let
+ val parents = maximal_wrt_access_graph access_G facts
+ val feats =
+ features_of ctxt prover thy (Local, General) (concl_t :: hyp_ts)
+ val hints =
+ chained |> filter (is_fact_in_graph access_G o snd)
+ |> map (nickname_of_thm o snd)
+ val (learns, parents) =
+ if length facts - num_known_facts <= max_learn_on_query then
+ let
+ val name_tabs = build_name_tables nickname_of_thm facts
+ fun deps_of status th =
+ if no_dependencies_for_status status then
+ SOME []
+ else
+ isar_dependencies_of name_tabs th
+ |> trim_dependencies
+ fun learn_new_fact (parents,
+ ((_, stature as (_, status)), th)) =
+ let
+ val name = nickname_of_thm th
+ val feats =
+ features_of ctxt prover (theory_of_thm th) stature
+ [prop_of th]
+ val deps = deps_of status th |> these
+ in (name, parents, feats, deps) end
+ val new_facts =
+ facts |> filter_out (is_fact_in_graph access_G o snd)
+ |> sort (crude_thm_ord o pairself snd)
+ |> attach_crude_parents_to_facts parents
+ val learns = new_facts |> map learn_new_fact
+ val parents =
+ if null new_facts then parents
+ else [#1 (List.last learns)]
+ in (learns, parents) end
+ else
+ ([], parents)
+ in
+ (access_G, MaSh.query ctxt overlord max_facts
+ (learns, hints, parents, feats))
+ end)
+ val unknown = facts |> filter_out (is_fact_in_graph access_G o snd)
+ in
+ find_mash_suggestions ctxt max_facts suggs facts chained unknown
+ |> pairself (map fact_of_raw_fact)
+ end
+
+fun learn_wrt_access_graph ctxt (name, parents, feats, deps) (learns, graph) =
+ let
+ fun maybe_learn_from from (accum as (parents, graph)) =
+ try_graph ctxt "updating graph" accum (fn () =>
+ (from :: parents, Graph.add_edge_acyclic (from, name) graph))
+ val graph = graph |> Graph.default_node (name, Isar_Proof)
+ val (parents, graph) = ([], graph) |> fold maybe_learn_from parents
+ val (deps, _) = ([], graph) |> fold maybe_learn_from deps
+ in ((name, parents, feats, deps) :: learns, graph) end
+
+fun relearn_wrt_access_graph ctxt (name, deps) (relearns, graph) =
+ let
+ fun maybe_relearn_from from (accum as (parents, graph)) =
+ try_graph ctxt "updating graph" accum (fn () =>
+ (from :: parents, Graph.add_edge_acyclic (from, name) graph))
+ val graph = graph |> update_access_graph_node (name, Automatic_Proof)
+ val (deps, _) = ([], graph) |> fold maybe_relearn_from deps
+ in ((name, deps) :: relearns, graph) end
+
+fun flop_wrt_access_graph name =
+ update_access_graph_node (name, Isar_Proof_wegen_Prover_Flop)
+
+val learn_timeout_slack = 2.0
+
+fun launch_thread timeout task =
+ let
+ val hard_timeout = time_mult learn_timeout_slack timeout
+ val birth_time = Time.now ()
+ val death_time = Time.+ (birth_time, hard_timeout)
+ val desc = ("Machine learner for Sledgehammer", "")
+ in Async_Manager.thread MaShN birth_time death_time desc task end
+
+fun mash_learn_proof ctxt ({overlord, timeout, ...} : params) prover t facts
+ used_ths =
+ launch_thread (timeout |> the_default one_day) (fn () =>
+ let
+ val thy = Proof_Context.theory_of ctxt
+ val name = freshish_name ()
+ val feats = features_of ctxt prover thy (Local, General) [t]
+ in
+ peek_state ctxt (fn {access_G, ...} =>
+ let
+ val parents = maximal_wrt_access_graph access_G facts
+ val deps =
+ used_ths |> filter (is_fact_in_graph access_G)
+ |> map nickname_of_thm
+ in
+ MaSh.learn ctxt overlord [(name, parents, feats, deps)]
+ end);
+ (true, "")
+ end)
+
fun sendback sub =
Active.sendback_markup [Markup.padding_command] (sledgehammerN ^ " " ^ sub)
@@ -1001,7 +1039,7 @@
fun next_commit_time () =
Time.+ (Timer.checkRealTimer timer, commit_timeout)
val {access_G, ...} = peek_state ctxt I
- val is_in_access_G = is_fact_in_graph access_G snd
+ val is_in_access_G = is_fact_in_graph access_G o snd
val no_new_facts = forall is_in_access_G facts
in
if no_new_facts andalso not run_prover then
@@ -1019,7 +1057,7 @@
let
val name_tabs = build_name_tables nickname_of_thm facts
fun deps_of status th =
- if status = Non_Rec_Def orelse status = Rec_Def then
+ if no_dependencies_for_status status then
SOME []
else if run_prover then
prover_dependencies_of ctxt params prover auto_level facts name_tabs
@@ -1030,7 +1068,7 @@
isar_dependencies_of name_tabs th
|> trim_dependencies
fun do_commit [] [] [] state = state
- | do_commit learns relearns flops {access_G, dirty} =
+ | do_commit learns relearns flops {access_G, num_known_facts, dirty} =
let
val was_empty = Graph.is_empty access_G
val (learns, access_G) =
@@ -1038,6 +1076,7 @@
val (relearns, access_G) =
([], access_G) |> fold (relearn_wrt_access_graph ctxt) relearns
val access_G = access_G |> fold flop_wrt_access_graph flops
+ val num_known_facts = num_known_facts + length learns
val dirty =
case (was_empty, dirty, relearns) of
(false, SOME names, []) => SOME (map #1 learns @ names)
@@ -1045,7 +1084,8 @@
in
MaSh.learn ctxt overlord (rev learns);
MaSh.relearn ctxt overlord relearns;
- {access_G = access_G, dirty = dirty}
+ {access_G = access_G, num_known_facts = num_known_facts,
+ dirty = dirty}
end
fun commit last learns relearns flops =
(if debug andalso auto_level = 0 then