# HG changeset patch
# User krauss
# Date 1377030242 -7200
# Node ID ae863ed9f64fc6b0b830da3e4f74e8775a82dbd0
# Parent  186535065f5c776d54076c31e93f257f8202ec41# Parent  d81211f61a1b4e6bb07c8b8c19f23acd0b328383
merged

diff -r 186535065f5c -r ae863ed9f64f src/Doc/IsarImplementation/Tactic.thy
--- a/src/Doc/IsarImplementation/Tactic.thy	Tue Aug 20 11:39:53 2013 +0200
+++ b/src/Doc/IsarImplementation/Tactic.thy	Tue Aug 20 22:24:02 2013 +0200
@@ -336,13 +336,15 @@
   \item @{ML match_tac}, @{ML ematch_tac}, @{ML dmatch_tac}, and @{ML
   bimatch_tac} are similar to @{ML resolve_tac}, @{ML eresolve_tac},
   @{ML dresolve_tac}, and @{ML biresolve_tac}, respectively, but do
-  not instantiate schematic variables in the goal state.
+  not instantiate schematic variables in the goal state.%
+\footnote{Strictly speaking, matching means to treat the unknowns in the goal
+  state as constants, but these tactics merely discard unifiers that would
+  update the goal state. In rare situations (where the conclusion and 
+  goal state have flexible terms at the same position), the tactic
+  will fail even though an acceptable unifier exists.}
+  These tactics were written for a specific application within the classical reasoner.
 
   Flexible subgoals are not updated at will, but are left alone.
-  Strictly speaking, matching means to treat the unknowns in the goal
-  state as constants; these tactics merely discard unifiers that would
-  update the goal state.
-
   \end{description}
 *}
 
diff -r 186535065f5c -r ae863ed9f64f src/Doc/Sledgehammer/document/root.tex
--- a/src/Doc/Sledgehammer/document/root.tex	Tue Aug 20 11:39:53 2013 +0200
+++ b/src/Doc/Sledgehammer/document/root.tex	Tue Aug 20 22:24:02 2013 +0200
@@ -504,16 +504,6 @@
 using \textit{type\_enc} (\S\ref{problem-encoding}), this is a bug, and you are
 strongly encouraged to report this to the author at \authoremail.
 
-%\point{Why are the generated Isar proofs so ugly/broken?}
-%
-%The current implementation of the Isar proof feature,
-%enabled by the \textit{isar\_proofs} option (\S\ref{output-format}),
-%is experimental. Work on a new implementation has begun. There is a large body of
-%research into transforming resolution proofs into natural deduction proofs (such
-%as Isar proofs), which we hope to leverage. In the meantime, a workaround is to
-%set the \textit{isar\_compress} option (\S\ref{output-format}) to a larger
-%value or to try several provers and keep the nicest-looking proof.
-
 \point{How can I tell whether a suggested proof is sound?}
 
 Earlier versions of Sledgehammer often suggested unsound proofs---either proofs
@@ -633,8 +623,7 @@
 
 Sledgehammer's philosophy should work out of the box, without user guidance.
 Many of the options are meant to be used mostly by the Sledgehammer developers
-for experimentation purposes. Of course, feel free to experiment with them if
-you are so inclined.
+for experiments. Of course, feel free to try them out if you are so inclined.
 
 \section{Command Syntax}
 \label{command-syntax}
@@ -852,10 +841,10 @@
 \item[\labelitemi] \textbf{\textit{alt\_ergo}:} Alt-Ergo is a polymorphic
 ATP developed by Bobot et al.\ \cite{alt-ergo}.
 It supports the TPTP polymorphic typed first-order format (TFF1) via Why3
-\cite{why3}. It is included for experimental purposes. To use Alt-Ergo, set the
-environment variable \texttt{WHY3\_HOME} to the directory that contains the
-\texttt{why3} executable. Sledgehammer has been tested with Alt-Ergo 0.95.1 and
-an unidentified development version of Why3.
+\cite{why3}. To use Alt-Ergo, set the environment variable \texttt{WHY3\_HOME}
+to the directory that contains the \texttt{why3} executable. Sledgehammer has
+been tested with Alt-Ergo 0.95.1 and an unidentified development version of
+Why3.
 
 \item[\labelitemi] \textbf{\textit{cvc3}:} CVC3 is an SMT solver developed by
 Clark Barrett, Cesare Tinelli, and their colleagues \cite{cvc3}. To use CVC3,
@@ -1071,10 +1060,10 @@
 The traditional memoryless MePo relevance filter.
 
 \item[\labelitemi] \textbf{\textit{mash}:}
-The memoryful MaSh machine learner. MaSh relies on the external Python program
-\texttt{mash.py}, which is part of Isabelle. To enable MaSh, set the environment
-variable \texttt{MASH} to \texttt{yes}. Persistent data is stored in the
-directory \texttt{\$ISABELLE\_HOME\_USER/mash}.
+The experimental, memoryful MaSh machine learner. MaSh relies on the external
+Python program \texttt{mash.py}, which is part of Isabelle. To enable MaSh, set
+the environment variable \texttt{MASH} to \texttt{yes}. Persistent data is
+stored in the directory \texttt{\$ISABELLE\_HOME\_USER/mash}.
 
 \item[\labelitemi] \textbf{\textit{mesh}:} The MeSh filter, which combines the
 rankings from MePo and MaSh.
diff -r 186535065f5c -r ae863ed9f64f src/HOL/BNF/BNF_FP_Basic.thy
--- a/src/HOL/BNF/BNF_FP_Basic.thy	Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/BNF/BNF_FP_Basic.thy	Tue Aug 20 22:24:02 2013 +0200
@@ -147,6 +147,12 @@
   "(fun_rel R (fun_rel S T)) f g = (\<forall>x y. R x y \<longrightarrow> (fun_rel S T) (f x) (g y))"
   unfolding fun_rel_def ..
 
+lemma subst_eq_imp: "(\<forall>a b. a = b \<longrightarrow> P a b) \<equiv> (\<forall>a. P a a)"
+  by auto
+
+lemma eq_subset: "op = \<le> (\<lambda>a b. P a b \<or> a = b)"
+  by auto
+
 ML_file "Tools/bnf_fp_util.ML"
 ML_file "Tools/bnf_fp_def_sugar_tactics.ML"
 ML_file "Tools/bnf_fp_def_sugar.ML"
diff -r 186535065f5c -r ae863ed9f64f src/HOL/BNF/BNF_GFP.thy
--- a/src/HOL/BNF/BNF_GFP.thy	Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/BNF/BNF_GFP.thy	Tue Aug 20 22:24:02 2013 +0200
@@ -70,9 +70,6 @@
 lemma image2_eqI: "\<lbrakk>b = f x; c = g x; x \<in> A\<rbrakk> \<Longrightarrow> (b, c) \<in> image2 A f g"
 unfolding image2_def by auto
 
-lemma eq_subset: "op = \<le> (\<lambda>a b. P a b \<or> a = b)"
-by auto
-
 lemma IdD: "(a, b) \<in> Id \<Longrightarrow> a = b"
 by auto
 
diff -r 186535065f5c -r ae863ed9f64f src/HOL/BNF/Examples/Process.thy
--- a/src/HOL/BNF/Examples/Process.thy	Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/BNF/Examples/Process.thy	Tue Aug 20 22:24:02 2013 +0200
@@ -8,7 +8,7 @@
 header {* Processes *}
 
 theory Process
-imports "../BNF"
+imports Stream 
 begin
 
 codatatype 'a process =
@@ -38,81 +38,31 @@
 subsection{* Coinduction *}
 
 theorem process_coind[elim, consumes 1, case_names iss Action Choice, induct pred: "HOL.eq"]:
-assumes phi: "\<phi> p p'" and
-iss: "\<And>p p'. \<phi> p p' \<Longrightarrow> (isAction p \<longleftrightarrow> isAction p') \<and> (isChoice p \<longleftrightarrow> isChoice p')" and
-Act: "\<And> a a' p p'. \<phi> (Action a p) (Action a' p') \<Longrightarrow> a = a' \<and> \<phi> p p'" and
-Ch: "\<And> p q p' q'. \<phi> (Choice p q) (Choice p' q') \<Longrightarrow> \<phi> p p' \<and> \<phi> q q'"
-shows "p = p'"
-proof(intro mp[OF process.dtor_coinduct, of \<phi>, OF _ phi], clarify)
-  fix p p'  assume \<phi>: "\<phi> p p'"
-  show "pre_process_rel (op =) \<phi> (process_dtor p) (process_dtor p')"
-  proof(cases rule: process.exhaust[of p])
-    case (Action a q) note p = Action
-    hence "isAction p'" using iss[OF \<phi>] by (cases rule: process.exhaust[of p'], auto)
-    then obtain a' q' where p': "p' = Action a' q'" by (cases rule: process.exhaust[of p'], auto)
-    have 0: "a = a' \<and> \<phi> q q'" using Act[OF \<phi>[unfolded p p']] .
-    have dtor: "process_dtor p = Inl (a,q)" "process_dtor p' = Inl (a',q')"
-    unfolding p p' Action_def process.dtor_ctor by simp_all
-    show ?thesis using 0 unfolding dtor by simp
-  next
-    case (Choice p1 p2) note p = Choice
-    hence "isChoice p'" using iss[OF \<phi>] by (cases rule: process.exhaust[of p'], auto)
-    then obtain p1' p2' where p': "p' = Choice p1' p2'"
-    by (cases rule: process.exhaust[of p'], auto)
-    have 0: "\<phi> p1 p1' \<and> \<phi> p2 p2'" using Ch[OF \<phi>[unfolded p p']] .
-    have dtor: "process_dtor p = Inr (p1,p2)" "process_dtor p' = Inr (p1',p2')"
-    unfolding p p' Choice_def process.dtor_ctor by simp_all
-    show ?thesis using 0 unfolding dtor by simp
-  qed
-qed
+  assumes phi: "\<phi> p p'" and
+  iss: "\<And>p p'. \<phi> p p' \<Longrightarrow> (isAction p \<longleftrightarrow> isAction p') \<and> (isChoice p \<longleftrightarrow> isChoice p')" and
+  Act: "\<And> a a' p p'. \<phi> (Action a p) (Action a' p') \<Longrightarrow> a = a' \<and> \<phi> p p'" and
+  Ch: "\<And> p q p' q'. \<phi> (Choice p q) (Choice p' q') \<Longrightarrow> \<phi> p p' \<and> \<phi> q q'"
+  shows "p = p'"
+  using assms
+  by (coinduct rule: process.coinduct) (metis process.collapse(1,2) process.discs(3))
 
 (* Stronger coinduction, up to equality: *)
 theorem process_strong_coind[elim, consumes 1, case_names iss Action Choice]:
-assumes phi: "\<phi> p p'" and
-iss: "\<And>p p'. \<phi> p p' \<Longrightarrow> (isAction p \<longleftrightarrow> isAction p') \<and> (isChoice p \<longleftrightarrow> isChoice p')" and
-Act: "\<And> a a' p p'. \<phi> (Action a p) (Action a' p') \<Longrightarrow> a = a' \<and> (\<phi> p p' \<or> p = p')" and
-Ch: "\<And> p q p' q'. \<phi> (Choice p q) (Choice p' q') \<Longrightarrow> (\<phi> p p' \<or> p = p') \<and> (\<phi> q q' \<or> q = q')"
-shows "p = p'"
-proof(intro mp[OF process.dtor_strong_coinduct, of \<phi>, OF _ phi], clarify)
-  fix p p'  assume \<phi>: "\<phi> p p'"
-  show "pre_process_rel (op =) (\<lambda>a b. \<phi> a b \<or> a = b) (process_dtor p) (process_dtor p')"
-  proof(cases rule: process.exhaust[of p])
-    case (Action a q) note p = Action
-    hence "isAction p'" using iss[OF \<phi>] by (cases rule: process.exhaust[of p'], auto)
-    then obtain a' q' where p': "p' = Action a' q'" by (cases rule: process.exhaust[of p'], auto)
-    have 0: "a = a' \<and> (\<phi> q q' \<or> q = q')" using Act[OF \<phi>[unfolded p p']] .
-    have dtor: "process_dtor p = Inl (a,q)" "process_dtor p' = Inl (a',q')"
-    unfolding p p' Action_def process.dtor_ctor by simp_all
-    show ?thesis using 0 unfolding dtor by simp
-  next
-    case (Choice p1 p2) note p = Choice
-    hence "isChoice p'" using iss[OF \<phi>] by (cases rule: process.exhaust[of p'], auto)
-    then obtain p1' p2' where p': "p' = Choice p1' p2'"
-    by (cases rule: process.exhaust[of p'], auto)
-    have 0: "(\<phi> p1 p1' \<or> p1 = p1') \<and> (\<phi> p2 p2' \<or> p2 = p2')" using Ch[OF \<phi>[unfolded p p']] .
-    have dtor: "process_dtor p = Inr (p1,p2)" "process_dtor p' = Inr (p1',p2')"
-    unfolding p p' Choice_def process.dtor_ctor by simp_all
-    show ?thesis using 0 unfolding dtor by simp
-  qed
-qed
+  assumes phi: "\<phi> p p'" and
+  iss: "\<And>p p'. \<phi> p p' \<Longrightarrow> (isAction p \<longleftrightarrow> isAction p') \<and> (isChoice p \<longleftrightarrow> isChoice p')" and
+  Act: "\<And> a a' p p'. \<phi> (Action a p) (Action a' p') \<Longrightarrow> a = a' \<and> (\<phi> p p' \<or> p = p')" and
+  Ch: "\<And> p q p' q'. \<phi> (Choice p q) (Choice p' q') \<Longrightarrow> (\<phi> p p' \<or> p = p') \<and> (\<phi> q q' \<or> q = q')"
+  shows "p = p'"
+  using assms
+  by (coinduct rule: process.strong_coinduct) (metis process.collapse(1,2) process.discs(3))
 
 
 subsection {* Coiteration (unfold) *}
 
 
 section{* Coinductive definition of the notion of trace *}
-
-(* Say we have a type of streams: *)
-
-typedecl 'a stream
-
-consts Ccons :: "'a \<Rightarrow> 'a stream \<Rightarrow> 'a stream"
-
-(* Use the existing coinductive package (distinct from our
-new codatatype package, but highly compatible with it): *)
-
 coinductive trace where
-"trace p as \<Longrightarrow> trace (Action a p) (Ccons a as)"
+"trace p as \<Longrightarrow> trace (Action a p) (a ## as)"
 |
 "trace p as \<or> trace q as \<Longrightarrow> trace (Choice p q) as"
 
diff -r 186535065f5c -r ae863ed9f64f src/HOL/BNF/Tools/bnf_fp_def_sugar.ML
--- a/src/HOL/BNF/Tools/bnf_fp_def_sugar.ML	Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/BNF/Tools/bnf_fp_def_sugar.ML	Tue Aug 20 22:24:02 2013 +0200
@@ -24,6 +24,9 @@
   val morph_fp_sugar: morphism -> fp_sugar -> fp_sugar
   val fp_sugar_of: Proof.context -> string -> fp_sugar option
 
+  val co_induct_of: 'a list -> 'a
+  val strong_co_induct_of: 'a list -> 'a
+
   val tvar_subst: theory -> typ list -> typ list -> ((string * int) * typ) list
   val exists_subtype_in: typ list -> typ -> bool
   val flat_rec_arg_args: 'a list list -> 'a list
@@ -59,7 +62,7 @@
     (term list * thm list) * Proof.context
   val derive_induct_iters_thms_for_types: BNF_Def.bnf list ->
     (typ list list * typ list list list list * term list list * term list list list list) list ->
-    thm list -> thm list list -> BNF_Def.bnf list -> BNF_Def.bnf list -> typ list -> typ list ->
+    thm -> thm list list -> BNF_Def.bnf list -> BNF_Def.bnf list -> typ list -> typ list ->
     typ list -> typ list list list -> term list list -> thm list list -> term list list ->
     thm list list -> local_theory ->
     (thm list * thm * Args.src list) * (thm list list * Args.src list)
@@ -67,7 +70,7 @@
   val derive_coinduct_coiters_thms_for_types: BNF_Def.bnf list ->
     string * term list * term list list * ((term list list * term list list list)
       * (typ list * typ list list)) list ->
-    thm list -> thm list -> thm list list -> BNF_Def.bnf list -> typ list -> typ list -> typ list ->
+    thm -> thm list -> thm list list -> BNF_Def.bnf list -> typ list -> typ list -> typ list ->
     int list list -> int list list -> int list -> thm list list -> BNF_Ctr_Sugar.ctr_sugar list ->
     term list list -> thm list list -> (thm list -> thm list) -> local_theory ->
     ((thm list * thm) list * Args.src list)
@@ -139,6 +142,9 @@
 
 val fp_sugar_of = Symtab.lookup o Data.get o Context.Proof;
 
+fun co_induct_of (i :: _) = i;
+fun strong_co_induct_of [_, s] = s;
+
 fun register_fp_sugar key fp_sugar =
   Local_Theory.declaration {syntax = false, pervasive = true}
     (fn phi => Data.map (Symtab.default (key, morph_fp_sugar phi fp_sugar)));
@@ -557,7 +563,7 @@
       (map3 generate_coiter coiterNs coiter_args_typess' dtor_coiters) lthy
   end;
 
-fun derive_induct_iters_thms_for_types pre_bnfs [fold_args_types, rec_args_types] [ctor_induct]
+fun derive_induct_iters_thms_for_types pre_bnfs [fold_args_types, rec_args_types] ctor_induct
     ctor_iter_thmss nesting_bnfs nested_bnfs fpTs Cs Xs ctrXs_Tsss ctrss ctr_defss iterss iter_defss
     lthy =
   let
@@ -718,7 +724,7 @@
 
 fun derive_coinduct_coiters_thms_for_types pre_bnfs (z, cs, cpss,
       coiters_args_types as [((pgss, crgsss), _), ((phss, cshsss), _)])
-    dtor_coinducts dtor_ctors dtor_coiter_thmss nesting_bnfs fpTs Cs As kss mss ns ctr_defss
+    dtor_coinduct dtor_ctors dtor_coiter_thmss nesting_bnfs fpTs Cs As kss mss ns ctr_defss
     ctr_sugars coiterss coiter_defss export_args lthy =
   let
     val coiterss' = transpose coiterss;
@@ -823,6 +829,11 @@
             (if nn = 1 then thm RS mp else funpow nn (fn thm => reassoc_conjs (thm RS mp_conj)) thm)
           |> Drule.zero_var_indexes
           |> `(conj_dests nn);
+
+        val rel_eqs = map rel_eq_of_bnf pre_bnfs;
+        val rel_monos = map rel_mono_of_bnf pre_bnfs;
+        val dtor_coinducts =
+          [dtor_coinduct, mk_strong_coinduct_thm dtor_coinduct rel_eqs rel_monos lthy];
       in
         map2 (postproc nn oo prove) dtor_coinducts goals
       end;
@@ -1096,7 +1107,7 @@
       map dest_TFree Xs ~~ map (Term.typ_subst_atomic (As ~~ unsorted_As)) ctrXs_sum_prod_Ts;
 
     val (pre_bnfs, (fp_res as {bnfs = fp_bnfs as any_fp_bnf :: _, ctors = ctors0, dtors = dtors0,
-           xtor_co_iterss = xtor_co_iterss0, xtor_co_inducts, dtor_ctors, ctor_dtors, ctor_injects,
+           xtor_co_iterss = xtor_co_iterss0, xtor_co_induct, dtor_ctors, ctor_dtors, ctor_injects,
            xtor_map_thms, xtor_set_thmss, xtor_rel_thms, xtor_co_iter_thmss, ...}, lthy)) =
       fp_bnf (construct_fp mixfixes map_bs rel_bs set_bss) fp_bs (map dest_TFree unsorted_As) fp_eqs
         no_defs_lthy0;
@@ -1351,7 +1362,7 @@
       let
         val ((induct_thms, induct_thm, induct_attrs), (fold_thmss, fold_attrs),
              (rec_thmss, rec_attrs)) =
-          derive_induct_iters_thms_for_types pre_bnfs (the iters_args_types) xtor_co_inducts
+          derive_induct_iters_thms_for_types pre_bnfs (the iters_args_types) xtor_co_induct
             xtor_co_iter_thmss nesting_bnfs nested_bnfs fpTs Cs Xs ctrXs_Tsss ctrss ctr_defss iterss
             iter_defss lthy;
 
@@ -1388,7 +1399,7 @@
              (disc_unfold_thmss, disc_corec_thmss, disc_coiter_attrs),
              (disc_unfold_iff_thmss, disc_corec_iff_thmss, disc_coiter_iff_attrs),
              (sel_unfold_thmss, sel_corec_thmss, sel_coiter_attrs)) =
-          derive_coinduct_coiters_thms_for_types pre_bnfs (the coiters_args_types) xtor_co_inducts
+          derive_coinduct_coiters_thms_for_types pre_bnfs (the coiters_args_types) xtor_co_induct
             dtor_ctors xtor_co_iter_thmss nesting_bnfs fpTs Cs As kss mss ns ctr_defss ctr_sugars
             coiterss coiter_defss (Proof_Context.export lthy' no_defs_lthy) lthy;
 
diff -r 186535065f5c -r ae863ed9f64f src/HOL/BNF/Tools/bnf_fp_util.ML
--- a/src/HOL/BNF/Tools/bnf_fp_util.ML	Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/BNF/Tools/bnf_fp_util.ML	Tue Aug 20 22:24:02 2013 +0200
@@ -17,7 +17,7 @@
      ctors: term list,
      dtors: term list,
      xtor_co_iterss: term list list,
-     xtor_co_inducts: thm list,
+     xtor_co_induct: thm,
      dtor_ctors: thm list,
      ctor_dtors: thm list,
      ctor_injects: thm list,
@@ -29,8 +29,6 @@
 
   val morph_fp_result: morphism -> fp_result -> fp_result
   val eq_fp_result: fp_result * fp_result -> bool
-  val co_induct_of: 'a list -> 'a
-  val strong_co_induct_of: 'a list -> 'a
   val un_fold_of: 'a list -> 'a
   val co_rec_of: 'a list -> 'a
 
@@ -179,6 +177,8 @@
   val mk_xtor_un_fold_o_map_thms: fp_kind -> bool -> int -> thm -> thm list -> thm list ->
     thm list -> thm list -> thm list
 
+  val mk_strong_coinduct_thm: thm -> thm list -> thm list -> Proof.context -> thm
+
   val fp_bnf: (binding list -> (string * sort) list -> typ list * typ list list ->
       BNF_Def.bnf list -> local_theory -> 'a) ->
     binding list -> (string * sort) list -> ((string * sort) * typ) list -> local_theory ->
@@ -203,7 +203,7 @@
    ctors: term list,
    dtors: term list,
    xtor_co_iterss: term list list,
-   xtor_co_inducts: thm list,
+   xtor_co_induct: thm,
    dtor_ctors: thm list,
    ctor_dtors: thm list,
    ctor_injects: thm list,
@@ -213,7 +213,7 @@
    xtor_co_iter_thmss: thm list list,
    rel_co_induct_thm: thm};
 
-fun morph_fp_result phi {Ts, bnfs, ctors, dtors, xtor_co_iterss, xtor_co_inducts, dtor_ctors,
+fun morph_fp_result phi {Ts, bnfs, ctors, dtors, xtor_co_iterss, xtor_co_induct, dtor_ctors,
     ctor_dtors, ctor_injects, xtor_map_thms, xtor_set_thmss, xtor_rel_thms, xtor_co_iter_thmss,
     rel_co_induct_thm} =
   {Ts = map (Morphism.typ phi) Ts,
@@ -221,7 +221,7 @@
    ctors = map (Morphism.term phi) ctors,
    dtors = map (Morphism.term phi) dtors,
    xtor_co_iterss = map (map (Morphism.term phi)) xtor_co_iterss,
-   xtor_co_inducts = map (Morphism.thm phi) xtor_co_inducts,
+   xtor_co_induct = Morphism.thm phi xtor_co_induct,
    dtor_ctors = map (Morphism.thm phi) dtor_ctors,
    ctor_dtors = map (Morphism.thm phi) ctor_dtors,
    ctor_injects = map (Morphism.thm phi) ctor_injects,
@@ -234,9 +234,6 @@
 fun eq_fp_result ({bnfs = bnfs1, ...} : fp_result, {bnfs = bnfs2, ...} : fp_result) =
   eq_list eq_bnf (bnfs1, bnfs2);
 
-fun co_induct_of (i :: _) = i;
-fun strong_co_induct_of [_, s] = s;
-
 fun un_fold_of [f, _] = f;
 fun co_rec_of [_, r] = r;
 
@@ -552,6 +549,25 @@
     split_conj_thm (un_fold_unique OF map (fp_case fp I mk_sym) unique_prems)
   end;
 
+fun mk_strong_coinduct_thm coind rel_eqs rel_monos ctxt =
+  let
+    val n = Thm.nprems_of coind;
+    val m = Thm.nprems_of (hd rel_monos) - n;
+    fun mk_inst phi = (phi, mk_union (phi, HOLogic.eq_const (fst (dest_pred2T (fastype_of phi)))))
+      |> pairself (certify ctxt);
+    val insts = Term.add_vars (Thm.prop_of coind) [] |> rev |> take n |> map (mk_inst o Var);
+    fun mk_unfold rel_eq rel_mono =
+      let
+        val eq = iffD2 OF [rel_eq RS @{thm predicate2_eqD}, refl];
+        val mono = rel_mono OF (replicate m @{thm order_refl} @ replicate n @{thm eq_subset});
+      in eq RS (mono RS @{thm predicate2D}) RS @{thm eqTrueI} end;
+    val unfolds = map2 mk_unfold rel_eqs rel_monos @ @{thms sup_fun_def sup_bool_def
+      imp_disjL all_conj_distrib subst_eq_imp simp_thms(18,21,35)};
+  in
+    Thm.instantiate ([], insts) coind
+    |> unfold_thms ctxt unfolds
+  end;
+
 fun fp_bnf construct_fp bs resBs eqs lthy =
   let
     val timer = time (Timer.startRealTimer ());
diff -r 186535065f5c -r ae863ed9f64f src/HOL/BNF/Tools/bnf_gfp.ML
--- a/src/HOL/BNF/Tools/bnf_gfp.ML	Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/BNF/Tools/bnf_gfp.ML	Tue Aug 20 22:24:02 2013 +0200
@@ -2019,49 +2019,36 @@
 
     val timer = time (timer "corec definitions & thms");
 
-    (* TODO: Get rid of strong versions (since these can easily be derived from the weak ones). *)
-    val (dtor_map_coinduct_thm, coinduct_params, dtor_coinduct_thm,
-         dtor_map_strong_coinduct_thm, dtor_strong_coinduct_thm) =
+    val (dtor_map_coinduct_thm, coinduct_params, dtor_coinduct_thm) =
       let
         val zs = Jzs1 @ Jzs2;
         val frees = phis @ zs;
 
-        fun mk_phi strong_eq phi z1 z2 = if strong_eq
-          then Term.absfree (dest_Free z1) (Term.absfree (dest_Free z2)
-            (HOLogic.mk_disj (phi $ z1 $ z2, HOLogic.mk_eq (z1, z2))))
-          else phi;
-
-        fun phi_rels strong_eq =
-          map3 (fn phi => fn z1 => fn z2 => mk_phi strong_eq phi z1 z2) phis Jzs1 Jzs2;
-
         val rels = map (Term.subst_atomic_types ((activeAs ~~ Ts) @ (activeBs ~~ Ts))) relsAsBs;
 
         fun mk_concl phi z1 z2 = HOLogic.mk_imp (phi $ z1 $ z2, HOLogic.mk_eq (z1, z2));
         val concl = HOLogic.mk_Trueprop (Library.foldr1 HOLogic.mk_conj
           (map3 mk_concl phis Jzs1 Jzs2));
 
-        fun mk_rel_prem strong_eq phi dtor rel Jz Jz_copy =
+        fun mk_rel_prem phi dtor rel Jz Jz_copy =
           let
-            val concl = Term.list_comb (rel, map HOLogic.eq_const passiveAs @ phi_rels strong_eq) $
+            val concl = Term.list_comb (rel, map HOLogic.eq_const passiveAs @ phis) $
               (dtor $ Jz) $ (dtor $ Jz_copy);
           in
             HOLogic.mk_Trueprop
               (list_all_free [Jz, Jz_copy] (HOLogic.mk_imp (phi $ Jz $ Jz_copy, concl)))
           end;
 
-        val rel_prems = map5 (mk_rel_prem false) phis dtors rels Jzs Jzs_copy;
-        val rel_strong_prems = map5 (mk_rel_prem true) phis dtors rels Jzs Jzs_copy;
-
+        val rel_prems = map5 mk_rel_prem phis dtors rels Jzs Jzs_copy;
         val dtor_coinduct_goal =
           fold_rev Logic.all frees (Logic.list_implies (rel_prems, concl));
-        val coinduct_params = rev (Term.add_tfrees dtor_coinduct_goal []);
 
         val dtor_coinduct =
           Goal.prove_sorry lthy [] [] dtor_coinduct_goal
             (K (mk_dtor_coinduct_tac m raw_coind_thm bis_rel_thm rel_congs))
           |> Thm.close_derivation;
 
-        fun mk_prem strong_eq phi dtor map_nth sets Jz Jz_copy FJz =
+        fun mk_prem phi dtor map_nth sets Jz Jz_copy FJz =
           let
             val xs = [Jz, Jz_copy];
 
@@ -2071,7 +2058,7 @@
             fun mk_set_conjunct set phi z1 z2 =
               list_all_free [z1, z2]
                 (HOLogic.mk_imp (HOLogic.mk_mem (HOLogic.mk_prod (z1, z2), set $ FJz),
-                  mk_phi strong_eq phi z1 z2 $ z1 $ z2));
+                  phi $ z1 $ z2));
 
             val concl = list_exists_free [FJz] (HOLogic.mk_conj
               (Library.foldr1 HOLogic.mk_conj (map2 mk_map_conjunct [fstsTs, sndsTs] xs),
@@ -2082,36 +2069,15 @@
               (HOLogic.mk_Trueprop (Term.list_comb (phi, xs)), HOLogic.mk_Trueprop concl))
           end;
 
-        fun mk_prems strong_eq =
-          map7 (mk_prem strong_eq) phis dtors map_FT_nths prodFT_setss Jzs Jzs_copy FJzs;
-
-        val prems = mk_prems false;
-        val strong_prems = mk_prems true;
+        val prems = map7 mk_prem phis dtors map_FT_nths prodFT_setss Jzs Jzs_copy FJzs;
 
         val dtor_map_coinduct_goal = fold_rev Logic.all frees (Logic.list_implies (prems, concl));
         val dtor_map_coinduct =
           Goal.prove_sorry lthy [] [] dtor_map_coinduct_goal
             (K (mk_dtor_map_coinduct_tac m ks raw_coind_thm bis_def))
           |> Thm.close_derivation;
-
-        val cTs = map (SOME o certifyT lthy o TFree) coinduct_params;
-        val cts = map3 (SOME o certify lthy ooo mk_phi true) phis Jzs1 Jzs2;
-
-        val dtor_strong_coinduct = singleton (Proof_Context.export names_lthy lthy)
-          (Goal.prove_sorry lthy [] []
-            (fold_rev Logic.all zs (Logic.list_implies (rel_strong_prems, concl)))
-            (K (mk_dtor_strong_coinduct_tac lthy m cTs cts dtor_coinduct rel_monos rel_eqs)))
-          |> Thm.close_derivation;
-
-        val dtor_map_strong_coinduct = singleton (Proof_Context.export names_lthy lthy)
-          (Goal.prove_sorry lthy [] []
-            (fold_rev Logic.all zs (Logic.list_implies (strong_prems, concl)))
-            (K (mk_dtor_map_strong_coinduct_tac ks cTs cts dtor_map_coinduct bis_def
-              (tcoalg_thm RS bis_Id_on_thm))))
-          |> Thm.close_derivation;
       in
-        (dtor_map_coinduct, rev (Term.add_tfrees dtor_map_coinduct_goal []),
-         dtor_coinduct, dtor_map_strong_coinduct, dtor_strong_coinduct)
+        (dtor_map_coinduct, rev (Term.add_tfrees dtor_map_coinduct_goal []), dtor_coinduct)
       end;
 
     val timer = time (timer "coinduction");
@@ -2902,8 +2868,6 @@
       val common_notes =
         [(dtor_coinductN, [dtor_coinduct_thm]),
         (dtor_map_coinductN, [dtor_map_coinduct_thm]),
-        (dtor_map_strong_coinductN, [dtor_map_strong_coinduct_thm]),
-        (dtor_strong_coinductN, [dtor_strong_coinduct_thm]),
         (rel_coinductN, [Jrel_coinduct_thm]),
         (dtor_unfold_transferN, dtor_unfold_transfer_thms)]
         |> map (fn (thmN, thms) =>
@@ -2933,7 +2897,7 @@
     timer;
     ({Ts = Ts, bnfs = Jbnfs, ctors = ctors, dtors = dtors,
       xtor_co_iterss = transpose [unfolds, corecs],
-      xtor_co_inducts = [dtor_coinduct_thm, dtor_strong_coinduct_thm], dtor_ctors = dtor_ctor_thms,
+      xtor_co_induct = dtor_coinduct_thm, dtor_ctors = dtor_ctor_thms,
       ctor_dtors = ctor_dtor_thms, ctor_injects = ctor_inject_thms,
       xtor_map_thms = folded_dtor_map_thms, xtor_set_thmss = folded_dtor_set_thmss',
       xtor_rel_thms = dtor_Jrel_thms,
diff -r 186535065f5c -r ae863ed9f64f src/HOL/BNF/Tools/bnf_gfp_tactics.ML
--- a/src/HOL/BNF/Tools/bnf_gfp_tactics.ML	Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/BNF/Tools/bnf_gfp_tactics.ML	Tue Aug 20 22:24:02 2013 +0200
@@ -39,12 +39,8 @@
   val mk_corec_unique_mor_tac: thm list -> thm list -> thm -> {prems: 'a, context: Proof.context} ->
     tactic
   val mk_dtor_map_coinduct_tac: int -> int list -> thm -> thm -> tactic
-  val mk_dtor_map_strong_coinduct_tac: int list -> ctyp option list -> cterm option list -> thm ->
-    thm -> thm -> tactic
   val mk_dtor_set_tac: int -> thm -> thm -> thm list -> tactic
   val mk_dtor_coinduct_tac: int -> thm -> thm -> thm list -> tactic
-  val mk_dtor_strong_coinduct_tac: Proof.context -> int -> ctyp option list ->
-    cterm option list -> thm -> thm list -> thm list -> tactic
   val mk_dtor_rel_tac: Proof.context -> thm list -> int -> thm -> thm -> thm -> thm -> thm list ->
     thm -> thm -> thm list -> thm list -> thm list list -> tactic
   val mk_dtor_o_ctor_tac: thm -> thm -> thm -> thm -> thm list ->
@@ -991,18 +987,6 @@
     CONJ_WRAP' (K (EVERY' [rtac impI, rtac @{thm IdD}, etac set_mp,
       rtac CollectI, etac @{thm prod_caseI}])) rel_congs] 1;
 
-fun mk_dtor_strong_coinduct_tac ctxt m cTs cts dtor_coinduct rel_monos rel_eqs =
-  EVERY' [rtac rev_mp, rtac (Drule.instantiate' cTs cts dtor_coinduct),
-    EVERY' (map2 (fn rel_mono => fn rel_eq =>
-      EVERY' [REPEAT_DETERM o resolve_tac [allI, impI], REPEAT_DETERM o etac allE,
-        etac disjE, etac mp, atac, hyp_subst_tac ctxt, rtac (rel_mono RS @{thm predicate2D}),
-        REPEAT_DETERM_N m o rtac @{thm order_refl},
-        REPEAT_DETERM_N (length rel_monos) o rtac @{thm eq_subset},
-        rtac (rel_eq RS sym RS @{thm eq_refl} RS @{thm predicate2D}), rtac refl])
-    rel_monos rel_eqs),
-    rtac impI, REPEAT_DETERM o etac conjE,
-    CONJ_WRAP' (K (rtac impI THEN' etac mp THEN' etac disjI1)) rel_eqs] 1;
-
 fun mk_dtor_map_coinduct_tac m ks raw_coind bis_def =
   let
     val n = length ks;
@@ -1024,20 +1008,6 @@
         rtac CollectI, etac (refl RSN (2, @{thm subst_Pair}))]) ks] 1
   end;
 
-fun mk_dtor_map_strong_coinduct_tac ks cTs cts dtor_map_coinduct bis_def bis_Id_on =
-  EVERY' [rtac rev_mp, rtac (Drule.instantiate' cTs cts dtor_map_coinduct),
-    EVERY' (map (fn i =>
-      EVERY' [etac disjE, REPEAT_DETERM o dtac @{thm meta_spec}, etac meta_mp,
-        atac, rtac rev_mp, rtac subst, rtac bis_def, rtac bis_Id_on,
-        rtac impI, dtac conjunct2, dtac (mk_conjunctN (length ks) i), REPEAT_DETERM o etac allE,
-        etac impE, etac @{thm Id_on_UNIV_I}, REPEAT_DETERM o eresolve_tac [bexE, conjE, CollectE],
-        rtac exI, rtac conjI, etac conjI, atac,
-        CONJ_WRAP' (K (EVERY' [REPEAT_DETERM o resolve_tac [allI, impI],
-          rtac disjI2, rtac @{thm Id_onD}, etac set_mp, atac])) ks])
-    ks),
-    rtac impI, REPEAT_DETERM o etac conjE,
-    CONJ_WRAP' (K (rtac impI THEN' etac mp THEN' etac disjI1)) ks] 1;
-
 fun mk_map_tac m n cT unfold map_comp' map_cong0 =
   EVERY' [rtac ext, rtac (o_apply RS trans RS sym), rtac (o_apply RS trans RS sym),
     rtac (unfold RS trans), rtac (Thm.permute_prems 0 1 (map_comp' RS box_equals)), rtac map_cong0,
diff -r 186535065f5c -r ae863ed9f64f src/HOL/BNF/Tools/bnf_lfp.ML
--- a/src/HOL/BNF/Tools/bnf_lfp.ML	Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/BNF/Tools/bnf_lfp.ML	Tue Aug 20 22:24:02 2013 +0200
@@ -1858,7 +1858,7 @@
   in
     timer;
     ({Ts = Ts, bnfs = Ibnfs, ctors = ctors, dtors = dtors, xtor_co_iterss = transpose [folds, recs],
-      xtor_co_inducts = [ctor_induct_thm], dtor_ctors = dtor_ctor_thms, ctor_dtors = ctor_dtor_thms,
+      xtor_co_induct = ctor_induct_thm, dtor_ctors = dtor_ctor_thms, ctor_dtors = ctor_dtor_thms,
       ctor_injects = ctor_inject_thms, xtor_map_thms = folded_ctor_map_thms,
       xtor_set_thmss = folded_ctor_set_thmss', xtor_rel_thms = ctor_Irel_thms,
       xtor_co_iter_thmss = transpose [ctor_fold_thms, ctor_rec_thms],
diff -r 186535065f5c -r ae863ed9f64f src/HOL/Tools/Nitpick/kodkod.ML
--- a/src/HOL/Tools/Nitpick/kodkod.ML	Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/Tools/Nitpick/kodkod.ML	Tue Aug 20 22:24:02 2013 +0200
@@ -881,7 +881,7 @@
 
 val scan_nat =
   Scan.repeat1 (Scan.one Symbol.is_ascii_digit)
-  >> (the o Int.fromString o space_implode "")
+  >> (the o Int.fromString o implode)
 val scan_rel_name =
   ($$ "s" |-- scan_nat >> pair 1
    || $$ "r" |-- scan_nat >> pair 2
diff -r 186535065f5c -r ae863ed9f64f src/HOL/Tools/Sledgehammer/MaSh/src/dictionaries.py
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/dictionaries.py	Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/dictionaries.py	Tue Aug 20 22:24:02 2013 +0200
@@ -179,6 +179,13 @@
         assert line.startswith('? ')
         line = line[2:]
         name = None
+        numberOfPredictions = None
+
+        # Check whether there is a problem name:
+        tmp = line.split('#')
+        if len(tmp) == 2:
+            numberOfPredictions = int(tmp[0].strip())
+            line = tmp[1]
 
         # Check whether there is a problem name:
         tmp = line.split(':')
@@ -206,7 +213,7 @@
         else:
             hints = []
 
-        return name,features,accessibles,hints
+        return name,features,accessibles,hints,numberOfPredictions
 
     def save(self,fileName):
         if self.changed:
diff -r 186535065f5c -r ae863ed9f64f src/HOL/Tools/Sledgehammer/MaSh/src/fullNaiveBayes.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/fullNaiveBayes.py	Tue Aug 20 22:24:02 2013 +0200
@@ -0,0 +1,203 @@
+'''
+Created on Jul 11, 2012
+
+@author: Daniel Kuehlwein
+'''
+
+from cPickle import dump,load
+from numpy import array,exp
+from math import log
+
+class NBClassifier(object):
+    '''
+    An updateable naive Bayes classifier.
+    '''
+
+    def __init__(self):
+        '''
+        Constructor
+        '''
+        self.counts = {}
+        self.negCounts = {}
+    
+    def initializeModel(self,trainData,dicts):
+        """
+        Build basic model from training data.
+        """        
+        for d in trainData:
+            self.counts[d] = [0,{}]
+            self.negCounts[d] = [0,{}]
+            dAccUnExp = dicts.accessibleDict[d]
+            if dicts.expandedAccessibles.has_key(d):
+                dAcc = dicts.expandedAccessibles(d)
+            else:
+                if len(dicts.expandedAccessibles.keys()) >= 100:
+                    dicts.expandedAccessibles = {}
+                dAcc = dicts.expand_accessibles(dAccUnExp)
+                dicts.expandedAccessibles[d] = dAcc 
+            dDeps = set(dicts.dependenciesDict[d])
+            dFeatures = dicts.featureDict[d]
+            # d proves d
+            self.counts[d][0] += 1
+            for f in dFeatures:
+                if self.counts[d][1].has_key(f):
+                    self.counts[d][1][f] += 1
+                else:
+                    self.counts[d][1][f] = 1
+            for acc in dAcc:
+                if not self.counts.has_key(acc):
+                    self.counts[acc] = [0,{}]
+                if not self.negCounts.has_key(acc):
+                    self.negCounts[acc] = [0,{}]        
+                if acc in dDeps:
+                    self.counts[acc][0] += 1
+                    for f in dFeatures:
+                        if self.counts[acc][1].has_key(f):
+                            self.counts[acc][1][f] += 1
+                        else:
+                            self.counts[acc][1][f] = 1
+                else:
+                    self.negCounts[acc][0] += 1
+                    for f in dFeatures:
+                        if self.negCounts[acc][1].has_key(f):
+                            self.negCounts[acc][1][f] += 1
+                        else:
+                            self.negCounts[acc][1][f] = 1
+    
+    def update(self,dataPoint,features,dependencies,dicts):
+        """
+        Updates the Model.
+        """
+        if not self.counts.has_key(dataPoint):
+            self.counts[dataPoint] = [0,{}]
+        if not self.negCounts.has_key(dataPoint):
+            self.negCounts[dataPoint] = [0,{}]
+        if dicts.expandedAccessibles.has_key(dataPoint):
+            dAcc = dicts.expandedAccessibles(dataPoint)
+        else:
+            if len(dicts.expandedAccessibles.keys()) >= 100:
+                dicts.expandedAccessibles = {}
+            dAccUnExp = dicts.accessibleDict[dataPoint]
+            dAcc = dicts.expand_accessibles(dAccUnExp)
+            dicts.expandedAccessibles[dataPoint] = dAcc 
+        dDeps = set(dicts.dependenciesDict[dataPoint])
+        dFeatures = dicts.featureDict[dataPoint]
+        # d proves d
+        self.counts[dataPoint][0] += 1
+        for f in dFeatures:
+            if self.counts[dataPoint][1].has_key(f):
+                self.counts[dataPoint][1][f] += 1
+            else:
+                self.counts[dataPoint][1][f] = 1
+
+        for acc in dAcc:
+            if acc in dDeps:
+                self.counts[acc][0] += 1
+                for f in dFeatures:
+                    if self.counts[acc][1].has_key(f):
+                        self.counts[acc][1][f] += 1
+                    else:
+                        self.counts[acc][1][f] = 1
+            else:
+                self.negCounts[acc][0] += 1
+                for f in dFeatures:
+                    if self.negCounts[acc][1].has_key(f):
+                        self.negCounts[acc][1][f] += 1
+                    else:
+                        self.negCounts[acc][1][f] = 1
+
+    def delete(self,dataPoint,features,dependencies):
+        """
+        Deletes a single datapoint from the model.
+        """
+        for dep in dependencies:
+            self.counts[dep][0] -= 1
+            for f in features:
+                self.counts[dep][1][f] -= 1
+
+            
+    def overwrite(self,problemId,newDependencies,dicts):
+        """
+        Deletes the old dependencies of problemId and replaces them with the new ones. Updates the model accordingly.
+        """
+        assert self.counts.has_key(problemId)
+        oldDeps = dicts.dependenciesDict[problemId]
+        features = dicts.featureDict[problemId]
+        self.delete(problemId,features,oldDeps)
+        self.update(problemId,features,newDependencies)
+    
+    def predict(self,features,accessibles):
+        """
+        For each accessible, predicts the probability of it being useful given the features.
+        Returns a ranking of the accessibles.
+        """
+        predictions = []
+        for a in accessibles:            
+            posA = self.counts[a][0]
+            negA = self.negCounts[a][0]
+            fPosA = set(self.counts[a][1].keys())
+            fNegA = set(self.negCounts[a][1].keys())
+            fPosWeightsA = self.counts[a][1]
+            fNegWeightsA = self.negCounts[a][1]
+            if negA == 0:
+                resultA = 0 
+            elif posA == 0:
+                print a
+                print 'xx'
+                import sys
+                sys.exit(-1)
+            else:
+                resultA = log(posA) - log(negA) 
+                for f in features:
+                    if f in fPosA:
+                        # P(f | a)
+                        if fPosWeightsA[f] == 0:
+                            resultA -= 15
+                        else:
+                            assert fPosWeightsA[f] <= posA
+                            resultA += log(float(fPosWeightsA[f])/posA)
+                    else:
+                        resultA -= 15
+                    # P(f | not a)
+                    if f in fNegA:
+                        if fNegWeightsA[f] == 0:
+                            resultA += 15
+                        else:
+                            assert fNegWeightsA[f] <= negA
+                            resultA -= log(float(fNegWeightsA[f])/negA)
+                    else: 
+                        resultA += 15
+
+            predictions.append(resultA)
+        #expPredictions = array([exp(x) for x in predictions])
+        predictions = array(predictions)
+        perm = (-predictions).argsort()        
+        #return array(accessibles)[perm],expPredictions[perm] 
+        return array(accessibles)[perm],predictions[perm]
+    
+    def save(self,fileName):
+        OStream = open(fileName, 'wb')
+        dump((self.counts,self.negCounts),OStream)        
+        OStream.close()
+        
+    def load(self,fileName):
+        OStream = open(fileName, 'rb')
+        self.counts,self.negCounts = load(OStream)      
+        OStream.close()
+
+    
+if __name__ == '__main__':
+    featureDict = {0:[0,1,2],1:[3,2,1]}
+    dependenciesDict = {0:[0],1:[0,1]}
+    libDicts = (featureDict,dependenciesDict,{})
+    c = NBClassifier()
+    c.initializeModel([0,1],libDicts)
+    c.update(2,[14,1,3],[0,2])
+    print c.counts
+    print c.predict([0,14],[0,1,2])
+    c.storeModel('x')
+    d = NBClassifier()
+    d.loadModel('x')
+    print c.counts
+    print d.counts
+    print 'Done'
\ No newline at end of file
diff -r 186535065f5c -r ae863ed9f64f src/HOL/Tools/Sledgehammer/MaSh/src/mash.py
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/mash.py	Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/mash.py	Tue Aug 20 22:24:02 2013 +0200
@@ -1,7 +1,7 @@
-#!/usr/bin/python
-#     Title:      HOL/Tools/Sledgehammer/MaSh/src/mash.py
+#!/usr/bin/env python
+#     Title:      HOL/Tools/Sledgehammer/MaSh/src/mash
 #     Author:     Daniel Kuehlwein, ICIS, Radboud University Nijmegen
-#     Copyright   2012
+#     Copyright   2012 - 2013
 #
 # Entry point for MaSh (Machine Learning for Sledgehammer).
 
@@ -15,69 +15,38 @@
 @author: Daniel Kuehlwein
 '''
 
-import logging,datetime,string,os,sys
-from argparse import ArgumentParser,RawDescriptionHelpFormatter
-from time import time
-from stats import Statistics
+import socket,sys,time,logging
+
+from spawnDaemon import spawnDaemon
+
+
+import logging,string,os,sys
+
+
 from theoryStats import TheoryStatistics
 from theoryModels import TheoryModels
 from dictionaries import Dictionaries
-#from fullNaiveBayes import NBClassifier
-from sparseNaiveBayes import sparseNBClassifier
-from snow import SNoW
 from predefined import Predefined
 
-# Set up command-line parser
-parser = ArgumentParser(description='MaSh - Machine Learning for Sledgehammer.  \n\n\
-MaSh allows to use different machine learning algorithms to predict relevant facts for Sledgehammer.\n\n\
---------------- Example Usage ---------------\n\
-First initialize:\n./mash.py -l test.log -o ../tmp/ --init --inputDir ../data/Jinja/ \n\
-Then create predictions:\n./mash.py -i ../data/Jinja/mash_commands -p ../data/Jinja/mash_suggestions -l test.log -o ../tmp/ --statistics\n\
-\n\n\
-Author: Daniel Kuehlwein, July 2012',formatter_class=RawDescriptionHelpFormatter)
-parser.add_argument('-i','--inputFile',help='File containing all problems to be solved.')
-parser.add_argument('-o','--outputDir', default='../tmp/',help='Directory where all created files are stored. Default=../tmp/.')
-parser.add_argument('-p','--predictions',default='../tmp/%s.predictions' % datetime.datetime.now(),
-                    help='File where the predictions stored. Default=../tmp/dateTime.predictions.')
-parser.add_argument('--numberOfPredictions',default=200,help="Number of premises to write in the output. Default=200.",type=int)
-
-parser.add_argument('--init',default=False,action='store_true',help="Initialize Mash. Requires --inputDir to be defined. Default=False.")
-parser.add_argument('--inputDir',default='../data/20121212/Jinja/',\
-                    help='Directory containing all the input data. MaSh expects the following files: mash_features,mash_dependencies,mash_accessibility')
-parser.add_argument('--depFile', default='mash_dependencies',
-                    help='Name of the file with the premise dependencies. The file must be in inputDir. Default = mash_dependencies')
-parser.add_argument('--saveModel',default=False,action='store_true',help="Stores the learned Model at the end of a prediction run. Default=False.")
+from parameters import init_parser
 
-parser.add_argument('--learnTheories',default=False,action='store_true',help="Uses a two-lvl prediction mode. First the theories, then the premises. Default=False.")
-# Theory Parameters
-parser.add_argument('--theoryDefValPos',default=-7.5,help="Default value for positive unknown features. Default=-7.5.",type=float)
-parser.add_argument('--theoryDefValNeg',default=-10.0,help="Default value for negative unknown features. Default=-15.0.",type=float)
-parser.add_argument('--theoryPosWeight',default=2.0,help="Weight value for positive features. Default=2.0.",type=float)
-
-parser.add_argument('--nb',default=False,action='store_true',help="Use Naive Bayes for learning. This is the default learning method.")
-# NB Parameters
-parser.add_argument('--NBDefaultPriorWeight',default=20.0,help="Initializes classifiers with value * p |- p. Default=20.0.",type=float)
-parser.add_argument('--NBDefVal',default=-15.0,help="Default value for unknown features. Default=-15.0.",type=float)
-parser.add_argument('--NBPosWeight',default=10.0,help="Weight value for positive features. Default=10.0.",type=float)
-# TODO: Rename to sineFeatures
-parser.add_argument('--sineFeatures',default=False,action='store_true',help="Uses a SInE like prior for premise lvl predictions. Default=False.")
-parser.add_argument('--sineWeight',default=0.5,help="How much the SInE prior is weighted. Default=0.5.",type=float)
-
-parser.add_argument('--snow',default=False,action='store_true',help="Use SNoW's naive bayes instead of Naive Bayes for learning.")
-parser.add_argument('--predef',help="Use predefined predictions. Used only for comparison with the actual learning. Argument is the filename of the predictions.")
-parser.add_argument('--statistics',default=False,action='store_true',help="Create and show statistics for the top CUTOFF predictions.\
-                    WARNING: This will make the program a lot slower! Default=False.")
-parser.add_argument('--saveStats',default=None,help="If defined, stores the statistics in the filename provided.")
-parser.add_argument('--cutOff',default=500,help="Option for statistics. Only consider the first cutOff predictions. Default=500.",type=int)
-parser.add_argument('-l','--log', default='../tmp/%s.log' % datetime.datetime.now(), help='Log file name. Default=../tmp/dateTime.log')
-parser.add_argument('-q','--quiet',default=False,action='store_true',help="If enabled, only print warnings. Default=False.")
-parser.add_argument('--modelFile', default='../tmp/model.pickle', help='Model file name. Default=../tmp/model.pickle')
-parser.add_argument('--dictsFile', default='../tmp/dict.pickle', help='Dict file name. Default=../tmp/dict.pickle')
-parser.add_argument('--theoryFile', default='../tmp/theory.pickle', help='Model file name. Default=../tmp/theory.pickle')
+def communicate(data,host,port):
+    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    try:
+        sock.connect((host,port))
+        sock.sendall(data)
+        received = sock.recv(262144)
+    except:
+        logger = logging.getLogger('communicate')
+        logger.warning('Communication with server failed.')
+        received = -1
+    finally:
+        sock.close()    
+    return received
 
 def mash(argv = sys.argv[1:]):
     # Initializing command-line arguments
-    args = parser.parse_args(argv)
+    args = init_parser(argv)
     
     # Set up logging
     logging.basicConfig(level=logging.DEBUG,
@@ -85,18 +54,8 @@
                         datefmt='%d-%m %H:%M:%S',
                         filename=args.log,
                         filemode='w')    
-    logger = logging.getLogger('main.py')
-    
-    #"""
-    # remove old handler for tester
-    #logger.root.handlers[0].stream.close()
-    logger.root.removeHandler(logger.root.handlers[0])
-    file_handler = logging.FileHandler(args.log)
-    file_handler.setLevel(logging.DEBUG)
-    formatter = logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
-    file_handler.setFormatter(formatter)
-    logger.root.addHandler(file_handler)
-    #"""
+    logger = logging.getLogger('mash')
+
     if args.quiet:
         logger.setLevel(logging.WARNING)
         #console.setLevel(logging.WARNING)
@@ -110,257 +69,44 @@
     if not os.path.exists(args.outputDir):
         os.makedirs(args.outputDir)
 
-    logger.info('Using the following settings: %s',args)
-    # Pick algorithm
-    if args.nb:
-        logger.info('Using sparse Naive Bayes for learning.')
-        model = sparseNBClassifier(args.NBDefaultPriorWeight,args.NBPosWeight,args.NBDefVal)
-    elif args.snow:
-        logger.info('Using naive bayes (SNoW) for learning.')
-        model = SNoW()
-    elif args.predef:
-        logger.info('Using predefined predictions.')
-        model = Predefined(args.predef)
-    else:
-        logger.info('No algorithm specified. Using sparse Naive Bayes.')
-        model = sparseNBClassifier(args.NBDefaultPriorWeight,args.NBPosWeight,args.NBDefVal)
-
-    # Initializing model
-    if args.init:
-        logger.info('Initializing Model.')
-        startTime = time()
-
-        # Load all data
-        dicts = Dictionaries()
-        dicts.init_all(args)
-        
-        # Create Model
-        trainData = dicts.featureDict.keys()
-        model.initializeModel(trainData,dicts)
-
-        if args.learnTheories:
-            depFile = os.path.join(args.inputDir,args.depFile)
-            theoryModels = TheoryModels(args.theoryDefValPos,args.theoryDefValNeg,args.theoryPosWeight)
-            theoryModels.init(depFile,dicts)
-            theoryModels.save(args.theoryFile)
-            
-        model.save(args.modelFile)
-        dicts.save(args.dictsFile)
-
-        logger.info('All Done. %s seconds needed.',round(time()-startTime,2))
-        return 0
-    # Create predictions and/or update model
-    else:
-        lineCounter = 1
-        statementCounter = 1
-        computeStats = False
-        dicts = Dictionaries()
-        theoryModels = TheoryModels(args.theoryDefValPos,args.theoryDefValNeg,args.theoryPosWeight)
-        # Load Files
-        if os.path.isfile(args.dictsFile):
-            #logger.info('Loading Dictionaries')
-            #startTime = time()
-            dicts.load(args.dictsFile)            
-            #logger.info('Done %s',time()-startTime)
-        if os.path.isfile(args.modelFile):
-            #logger.info('Loading Model')
-            #startTime = time()
-            model.load(args.modelFile)            
-            #logger.info('Done %s',time()-startTime)
-        if os.path.isfile(args.theoryFile) and args.learnTheories:
-            #logger.info('Loading Theory Models')
-            #startTime = time()
-            theoryModels.load(args.theoryFile)
-            #logger.info('Done %s',time()-startTime)
-        logger.info('All loading completed')
-
-        # IO Streams
-        OS = open(args.predictions,'w')
-        IS = open(args.inputFile,'r')
-
-        # Statistics
-        if args.statistics:
-            stats = Statistics(args.cutOff)
-            if args.learnTheories:
-                theoryStats = TheoryStatistics()
-
-        predictions = None
-        predictedTheories = None
-        #Reading Input File
-        for line in IS:
-#           try:
-            if True:
-                if line.startswith('!'):
-                    problemId = dicts.parse_fact(line)    
-                    # Statistics
-                    if args.statistics and computeStats:
-                        computeStats = False
-                        # Assume '!' comes after '?'
-                        if args.predef:
-                            predictions = model.predict(problemId)
-                        if args.learnTheories:
-                            tmp = [dicts.idNameDict[x] for x in dicts.dependenciesDict[problemId]]
-                            usedTheories = set([x.split('.')[0] for x in tmp]) 
-                            theoryStats.update((dicts.idNameDict[problemId]).split('.')[0],predictedTheories,usedTheories,len(theoryModels.accessibleTheories))                        
-                        stats.update(predictions,dicts.dependenciesDict[problemId],statementCounter)
-                        if not stats.badPreds == []:
-                            bp = string.join([str(dicts.idNameDict[x]) for x in stats.badPreds], ',')
-                            logger.debug('Bad predictions: %s',bp)
+    # If server is not running, start it.
+    try:
+        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        sock.connect((args.host,args.port))       
+        sock.close()
+    except:
+        logger.info('Starting Server.')
+        spawnDaemon('server.py')
+        # TODO: Make this fault tolerant
+        time.sleep(0.5)
+        # Init server
+        data = "i "+";".join(argv)
+        received = communicate(data,args.host,args.port)
+        logger.info(received)     
+    
+    if args.inputFile == None:
+        return
+    logger.debug('Using the following settings: %s',args)
+    # IO Streams
+    OS = open(args.predictions,'w')
+    IS = open(args.inputFile,'r')
+    count = 0
+    for line in IS:
+        count += 1
+        #if count == 127:
+        #    break as       
+        received = communicate(line,args.host,args.port)
+        if not received == '':
+            OS.write('%s\n' % received)
+        #logger.info(received)
+    OS.close()
+    IS.close()
 
-                    statementCounter += 1
-                    # Update Dependencies, p proves p
-                    dicts.dependenciesDict[problemId] = [problemId]+dicts.dependenciesDict[problemId]
-                    if args.learnTheories:
-                        theoryModels.update(problemId,dicts.featureDict[problemId],dicts.dependenciesDict[problemId],dicts)
-                    if args.snow:
-                        model.update(problemId,dicts.featureDict[problemId],dicts.dependenciesDict[problemId],dicts)
-                    else:
-                        model.update(problemId,dicts.featureDict[problemId],dicts.dependenciesDict[problemId])
-                elif line.startswith('p'):
-                    # Overwrite old proof.
-                    problemId,newDependencies = dicts.parse_overwrite(line)
-                    newDependencies = [problemId]+newDependencies
-                    model.overwrite(problemId,newDependencies,dicts)
-                    if args.learnTheories:
-                        theoryModels.overwrite(problemId,newDependencies,dicts)
-                    dicts.dependenciesDict[problemId] = newDependencies
-                elif line.startswith('?'):               
-                    startTime = time()
-                    computeStats = True
-                    if args.predef:
-                        continue
-                    name,features,accessibles,hints = dicts.parse_problem(line)  
-                        
-                    # Create predictions
-                    logger.info('Starting computation for problem on line %s',lineCounter)
-                    # Update Models with hints
-                    if not hints == []:
-                        if args.learnTheories:
-                            accessibleTheories = set([(dicts.idNameDict[x]).split('.')[0] for x in accessibles])
-                            theoryModels.update_with_acc('hints',features,hints,dicts,accessibleTheories)
-                        if args.snow:
-                            pass
-                        else:
-                            model.update('hints',features,hints)
-
-                    # Predict premises
-                    if args.learnTheories:
-                        predictedTheories,accessibles = theoryModels.predict(features,accessibles,dicts)
-
-                    # Add additional features on premise lvl if sine is enabled
-                    if args.sineFeatures:
-                        origFeatures = [f for f,_w in features]
-                        secondaryFeatures = []
-                        for f in origFeatures:
-                            if dicts.featureCountDict[f] == 1:
-                                continue
-                            triggeredFormulas = dicts.featureTriggeredFormulasDict[f]                                
-                            for formula in triggeredFormulas: 
-                                tFeatures = dicts.triggerFeaturesDict[formula]                                
-                                #tFeatures = [ff for ff,_fw in dicts.featureDict[formula]]
-                                newFeatures = set(tFeatures).difference(secondaryFeatures+origFeatures)
-                            for fNew in newFeatures:
-                                secondaryFeatures.append((fNew,args.sineWeight))
-                        predictionsFeatures = features+secondaryFeatures
-                    else:
-                        predictionsFeatures = features                    
-                    predictions,predictionValues = model.predict(predictionsFeatures,accessibles,dicts)
-                    assert len(predictions) == len(predictionValues)
-                    
-                    # Delete hints
-                    if not hints == []:
-                        if args.learnTheories:
-                            theoryModels.delete('hints',features,hints,dicts)
-                        if args.snow:
-                            pass
-                        else:
-                            model.delete('hints',features,hints)
+    # Statistics
+    if args.statistics:
+        received = communicate('avgStats',args.host,args.port)
+        logger.info(received)
 
-                    logger.info('Done. %s seconds needed.',round(time()-startTime,2))
-                    # Output        
-                    predictionNames = [str(dicts.idNameDict[p]) for p in predictions[:args.numberOfPredictions]]
-                    predictionValues = [str(x) for x in predictionValues[:args.numberOfPredictions]]
-                    predictionsStringList = ['%s=%s' % (predictionNames[i],predictionValues[i]) for i in range(len(predictionNames))]
-                    predictionsString = string.join(predictionsStringList,' ')
-                    outString = '%s: %s' % (name,predictionsString)
-                    OS.write('%s\n' % outString)
-                else:
-                    logger.warning('Unspecified input format: \n%s',line)
-                    sys.exit(-1)
-                lineCounter += 1
-            """
-            except:
-                logger.warning('An error occurred on line %s .',line)
-                lineCounter += 1
-                continue
-            """
-        OS.close()
-        IS.close()
 
-        # Statistics
-        if args.statistics:
-            if args.learnTheories:
-                theoryStats.printAvg()
-            stats.printAvg()
-
-        # Save
-        if args.saveModel:
-            model.save(args.modelFile)
-            if args.learnTheories:
-                theoryModels.save(args.theoryFile)
-        dicts.save(args.dictsFile)
-        if not args.saveStats == None:
-            if args.learnTheories:
-                theoryStatsFile = os.path.join(args.outputDir,'theoryStats')
-                theoryStats.save(theoryStatsFile)
-            statsFile = os.path.join(args.outputDir,args.saveStats)
-            stats.save(statsFile)
-    return 0
-
-if __name__ == '__main__':
-    # Example:
-    #List
-    # ISAR Theories
-    #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130110/List/','--learnTheories']    
-    #args = ['-i', '../data/20130110/List/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--learnTheories']
-    # ISAR predef mesh 
-    #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130110/List/','--predef','../data/20130110/List/mesh_suggestions']
-    #args = ['-i', '../data/20130110/List/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','../data/20130110/List/mesh_suggestions','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats']
- 
-    
-    # Auth
-    # ISAR Theories
-    #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121227b/Auth/','--learnTheories','--sineFeatures']    
-    #args = ['-i', '../data/20121227b/Auth/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--learnTheories']
-    # ISAR predef mesh 
-    #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121227b/Auth/','--predef','../data/20121227b/Auth/mesh_suggestions']
-    #args = ['-i', '../data/20121227b/Auth/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','../data/20121227b/Auth/mesh_suggestions','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats']
-
-    
-    # Jinja
-    # ISAR Theories
-    #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130111/Jinja/','--learnTheories']    
-    #args = ['-i', '../data/20130111/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--cutOff','500','--learnTheories']
-    # ISAR Theories SinePrior
-    #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130111/Jinja/','--learnTheories','--sineFeatures']    
-    #args = ['-i', '../data/20130111/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--learnTheories','--sineFeatures']    
-    # ISAR NB
-    #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130111/Jinja/']    
-    #args = ['-i', '../data/20130111/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500']
-    # ISAR predef mesh
-    #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20130111/Jinja/','--predef','../data/20130111/Jinja/mesh_suggestions']
-    #args = ['-i', '../data/20130111/Jinja/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','../data/20130111/Jinja/mesh_suggestions','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats']
-    # ISAR NB ATP
-    #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121212/Jinja/','--depFile','mash_atp_dependencies']    
-    #args = ['-i', '../data/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--nb','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--depFile','mash_atp_dependencies']
-    #args = ['-l','testIsabelle.log','-o','../tmp/','--statistics','--init','--inputDir','../data/Jinja/','--predef','--depFile','mash_atp_dependencies']
-    #args = ['-i', '../data/Jinja/mash_commands','-p','../tmp/JinjaMePo.pred','-l','testIsabelle.log','--predef','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaMePo.stats','--depFile','mash_atp_dependencies']
-    #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/Jinja/','--depFile','mash_atp_dependencies','--snow']    
-    #args = ['-i', '../data/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--snow','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500','--depFile','mash_atp_dependencies']
-    # ISAR Snow
-    #args = ['-l','testNB.log','-o','../tmp/','--statistics','--init','--inputDir','../data/20121212/Jinja/','--snow']    
-    #args = ['-i', '../data/20121212/Jinja/mash_commands','-p','../tmp/testNB.pred','-l','../tmp/testNB.log','--snow','-o','../tmp/','--statistics','--saveStats','../tmp/JinjaIsarNB.stats','--cutOff','500']
- 
-
-    #sys.exit(mash(args))
+if __name__ == "__main__":
     sys.exit(mash())
diff -r 186535065f5c -r ae863ed9f64f src/HOL/Tools/Sledgehammer/MaSh/src/mashOld.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/mashOld.py	Tue Aug 20 22:24:02 2013 +0200
@@ -0,0 +1,329 @@
+#!/usr/bin/python
+#     Title:      HOL/Tools/Sledgehammer/MaSh/src/mash.py
+#     Author:     Daniel Kuehlwein, ICIS, Radboud University Nijmegen
+#     Copyright   2012
+#
+# Entry point for MaSh (Machine Learning for Sledgehammer).
+
+'''
+MaSh - Machine Learning for Sledgehammer
+
+MaSh allows to use different machine learning algorithms to predict relevant fact for Sledgehammer.
+
+Created on July 12, 2012
+
+@author: Daniel Kuehlwein
+'''
+
+import logging,datetime,string,os,sys
+from argparse import ArgumentParser,RawDescriptionHelpFormatter
+from time import time
+from stats import Statistics
+from theoryStats import TheoryStatistics
+from theoryModels import TheoryModels
+from dictionaries import Dictionaries
+#from fullNaiveBayes import NBClassifier
+from sparseNaiveBayes import sparseNBClassifier
+from snow import SNoW
+from predefined import Predefined
+
+# Set up command-line parser
+parser = ArgumentParser(description='MaSh - Machine Learning for Sledgehammer.  \n\n\
+MaSh allows to use different machine learning algorithms to predict relevant facts for Sledgehammer.\n\n\
+--------------- Example Usage ---------------\n\
+First initialize:\n./mash.py -l test.log -o ../tmp/ --init --inputDir ../data/Jinja/ \n\
+Then create predictions:\n./mash.py -i ../data/Jinja/mash_commands -p ../data/Jinja/mash_suggestions -l test.log -o ../tmp/ --statistics\n\
+\n\n\
+Author: Daniel Kuehlwein, July 2012',formatter_class=RawDescriptionHelpFormatter)
+parser.add_argument('-i','--inputFile',help='File containing all problems to be solved.')
+parser.add_argument('-o','--outputDir', default='../tmp/',help='Directory where all created files are stored. Default=../tmp/.')
+parser.add_argument('-p','--predictions',default='../tmp/%s.predictions' % datetime.datetime.now(),
+                    help='File where the predictions stored. Default=../tmp/dateTime.predictions.')
+parser.add_argument('--numberOfPredictions',default=200,help="Number of premises to write in the output. Default=200.",type=int)
+
+parser.add_argument('--init',default=False,action='store_true',help="Initialize Mash. Requires --inputDir to be defined. Default=False.")
+parser.add_argument('--inputDir',default='../data/20121212/Jinja/',\
+                    help='Directory containing all the input data. MaSh expects the following files: mash_features,mash_dependencies,mash_accessibility')
+parser.add_argument('--depFile', default='mash_dependencies',
+                    help='Name of the file with the premise dependencies. The file must be in inputDir. Default = mash_dependencies')
+parser.add_argument('--saveModel',default=False,action='store_true',help="Stores the learned Model at the end of a prediction run. Default=False.")
+
+parser.add_argument('--learnTheories',default=False,action='store_true',help="Uses a two-lvl prediction mode. First the theories, then the premises. Default=False.")
+# Theory Parameters
+parser.add_argument('--theoryDefValPos',default=-7.5,help="Default value for positive unknown features. Default=-7.5.",type=float)
+parser.add_argument('--theoryDefValNeg',default=-10.0,help="Default value for negative unknown features. Default=-15.0.",type=float)
+parser.add_argument('--theoryPosWeight',default=2.0,help="Weight value for positive features. Default=2.0.",type=float)
+
+parser.add_argument('--nb',default=False,action='store_true',help="Use Naive Bayes for learning. This is the default learning method.")
+# NB Parameters
+parser.add_argument('--NBDefaultPriorWeight',default=20.0,help="Initializes classifiers with value * p |- p. Default=20.0.",type=float)
+parser.add_argument('--NBDefVal',default=-15.0,help="Default value for unknown features. Default=-15.0.",type=float)
+parser.add_argument('--NBPosWeight',default=10.0,help="Weight value for positive features. Default=10.0.",type=float)
+# TODO: Rename to sineFeatures
+parser.add_argument('--sineFeatures',default=False,action='store_true',help="Uses a SInE like prior for premise lvl predictions. Default=False.")
+parser.add_argument('--sineWeight',default=0.5,help="How much the SInE prior is weighted. Default=0.5.",type=float)
+
+parser.add_argument('--snow',default=False,action='store_true',help="Use SNoW's naive bayes instead of Naive Bayes for learning.")
+parser.add_argument('--predef',help="Use predefined predictions. Used only for comparison with the actual learning. Argument is the filename of the predictions.")
+parser.add_argument('--statistics',default=False,action='store_true',help="Create and show statistics for the top CUTOFF predictions.\
+                    WARNING: This will make the program a lot slower! Default=False.")
+parser.add_argument('--saveStats',default=None,help="If defined, stores the statistics in the filename provided.")
+parser.add_argument('--cutOff',default=500,help="Option for statistics. Only consider the first cutOff predictions. Default=500.",type=int)
+parser.add_argument('-l','--log', default='../tmp/%s.log' % datetime.datetime.now(), help='Log file name. Default=../tmp/dateTime.log')
+parser.add_argument('-q','--quiet',default=False,action='store_true',help="If enabled, only print warnings. Default=False.")
+parser.add_argument('--modelFile', default='../tmp/model.pickle', help='Model file name. Default=../tmp/model.pickle')
+parser.add_argument('--dictsFile', default='../tmp/dict.pickle', help='Dict file name. Default=../tmp/dict.pickle')
+parser.add_argument('--theoryFile', default='../tmp/theory.pickle', help='Model file name. Default=../tmp/theory.pickle')
+
+def mash(argv = sys.argv[1:]):
+    # Initializing command-line arguments
+    args = parser.parse_args(argv)
+    
+    # Set up logging
+    logging.basicConfig(level=logging.DEBUG,
+                        format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
+                        datefmt='%d-%m %H:%M:%S',
+                        filename=args.log,
+                        filemode='w')    
+    logger = logging.getLogger('main.py')
+    
+    """
+    # remove old handler for tester
+    # TODO: Comment out for Jasmins version. This crashes python 2.6.1
+    logger.root.handlers[0].stream.close()
+    logger.root.removeHandler(logger.root.handlers[0])
+    file_handler = logging.FileHandler(args.log)
+    file_handler.setLevel(logging.DEBUG)
+    formatter = logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
+    file_handler.setFormatter(formatter)
+    logger.root.addHandler(file_handler)
+    #"""
+    if args.quiet:
+        logger.setLevel(logging.WARNING)
+        #console.setLevel(logging.WARNING)
+    else:
+        console = logging.StreamHandler(sys.stdout)
+        console.setLevel(logging.INFO)
+        formatter = logging.Formatter('# %(message)s')
+        console.setFormatter(formatter)
+        logging.getLogger('').addHandler(console)
+        
+    if not os.path.exists(args.outputDir):
+        os.makedirs(args.outputDir)
+
+    logger.info('Using the following settings: %s',args)
+    # Pick algorithm
+    if args.nb:
+        logger.info('Using sparse Naive Bayes for learning.')
+        model = sparseNBClassifier(args.NBDefaultPriorWeight,args.NBPosWeight,args.NBDefVal)
+    elif args.snow:
+        logger.info('Using naive bayes (SNoW) for learning.')
+        model = SNoW()
+    elif args.predef:
+        logger.info('Using predefined predictions.')
+        model = Predefined(args.predef)
+    else:
+        logger.info('No algorithm specified. Using sparse Naive Bayes.')
+        model = sparseNBClassifier(args.NBDefaultPriorWeight,args.NBPosWeight,args.NBDefVal)
+
+    # Initializing model
+    if args.init:
+        logger.info('Initializing Model.')
+        startTime = time()
+
+        # Load all data
+        dicts = Dictionaries()
+        dicts.init_all(args)
+        
+        # Create Model
+        trainData = dicts.featureDict.keys()
+        model.initializeModel(trainData,dicts)
+
+        if args.learnTheories:
+            depFile = os.path.join(args.inputDir,args.depFile)
+            theoryModels = TheoryModels(args.theoryDefValPos,args.theoryDefValNeg,args.theoryPosWeight)
+            theoryModels.init(depFile,dicts)
+            theoryModels.save(args.theoryFile)
+            
+        model.save(args.modelFile)
+        dicts.save(args.dictsFile)
+
+        logger.info('All Done. %s seconds needed.',round(time()-startTime,2))
+        return 0
+    # Create predictions and/or update model
+    else:
+        lineCounter = 1
+        statementCounter = 1
+        computeStats = False
+        dicts = Dictionaries()
+        theoryModels = TheoryModels(args.theoryDefValPos,args.theoryDefValNeg,args.theoryPosWeight)
+        # Load Files
+        if os.path.isfile(args.dictsFile):
+            #logger.info('Loading Dictionaries')
+            #startTime = time()
+            dicts.load(args.dictsFile)            
+            #logger.info('Done %s',time()-startTime)
+        if os.path.isfile(args.modelFile):
+            #logger.info('Loading Model')
+            #startTime = time()
+            model.load(args.modelFile)            
+            #logger.info('Done %s',time()-startTime)
+        if os.path.isfile(args.theoryFile) and args.learnTheories:
+            #logger.info('Loading Theory Models')
+            #startTime = time()
+            theoryModels.load(args.theoryFile)
+            #logger.info('Done %s',time()-startTime)
+        logger.info('All loading completed')
+
+        # IO Streams
+        OS = open(args.predictions,'w')
+        IS = open(args.inputFile,'r')
+
+        # Statistics
+        if args.statistics:
+            stats = Statistics(args.cutOff)
+            if args.learnTheories:
+                theoryStats = TheoryStatistics()
+
+        predictions = None
+        predictedTheories = None
+        #Reading Input File
+        for line in IS:
+#           try:
+            if True:
+                if line.startswith('!'):
+                    problemId = dicts.parse_fact(line)    
+                    # Statistics
+                    if args.statistics and computeStats:
+                        computeStats = False
+                        # Assume '!' comes after '?'
+                        if args.predef:
+                            predictions = model.predict(problemId)
+                        if args.learnTheories:
+                            tmp = [dicts.idNameDict[x] for x in dicts.dependenciesDict[problemId]]
+                            usedTheories = set([x.split('.')[0] for x in tmp]) 
+                            theoryStats.update((dicts.idNameDict[problemId]).split('.')[0],predictedTheories,usedTheories,len(theoryModels.accessibleTheories))                        
+                        stats.update(predictions,dicts.dependenciesDict[problemId],statementCounter)
+                        if not stats.badPreds == []:
+                            bp = string.join([str(dicts.idNameDict[x]) for x in stats.badPreds], ',')
+                            logger.debug('Bad predictions: %s',bp)
+
+                    statementCounter += 1
+                    # Update Dependencies, p proves p
+                    dicts.dependenciesDict[problemId] = [problemId]+dicts.dependenciesDict[problemId]
+                    if args.learnTheories:
+                        theoryModels.update(problemId,dicts.featureDict[problemId],dicts.dependenciesDict[problemId],dicts)
+                    if args.snow:
+                        model.update(problemId,dicts.featureDict[problemId],dicts.dependenciesDict[problemId],dicts)
+                    else:
+                        model.update(problemId,dicts.featureDict[problemId],dicts.dependenciesDict[problemId])
+                elif line.startswith('p'):
+                    # Overwrite old proof.
+                    problemId,newDependencies = dicts.parse_overwrite(line)
+                    newDependencies = [problemId]+newDependencies
+                    model.overwrite(problemId,newDependencies,dicts)
+                    if args.learnTheories:
+                        theoryModels.overwrite(problemId,newDependencies,dicts)
+                    dicts.dependenciesDict[problemId] = newDependencies
+                elif line.startswith('?'):               
+                    startTime = time()
+                    computeStats = True
+                    if args.predef:
+                        continue
+                    name,features,accessibles,hints = dicts.parse_problem(line)  
+                        
+                    # Create predictions
+                    logger.info('Starting computation for problem on line %s',lineCounter)
+                    # Update Models with hints
+                    if not hints == []:
+                        if args.learnTheories:
+                            accessibleTheories = set([(dicts.idNameDict[x]).split('.')[0] for x in accessibles])
+                            theoryModels.update_with_acc('hints',features,hints,dicts,accessibleTheories)
+                        if args.snow:
+                            pass
+                        else:
+                            model.update('hints',features,hints)
+
+                    # Predict premises
+                    if args.learnTheories:
+                        predictedTheories,accessibles = theoryModels.predict(features,accessibles,dicts)
+
+                    # Add additional features on premise lvl if sine is enabled
+                    if args.sineFeatures:
+                        origFeatures = [f for f,_w in features]
+                        secondaryFeatures = []
+                        for f in origFeatures:
+                            if dicts.featureCountDict[f] == 1:
+                                continue
+                            triggeredFormulas = dicts.featureTriggeredFormulasDict[f]                                
+                            for formula in triggeredFormulas: 
+                                tFeatures = dicts.triggerFeaturesDict[formula]                                
+                                #tFeatures = [ff for ff,_fw in dicts.featureDict[formula]]
+                                newFeatures = set(tFeatures).difference(secondaryFeatures+origFeatures)
+                            for fNew in newFeatures:
+                                secondaryFeatures.append((fNew,args.sineWeight))
+                        predictionsFeatures = features+secondaryFeatures
+                    else:
+                        predictionsFeatures = features                    
+                    predictions,predictionValues = model.predict(predictionsFeatures,accessibles,dicts)
+                    assert len(predictions) == len(predictionValues)
+                    
+                    # Delete hints
+                    if not hints == []:
+                        if args.learnTheories:
+                            theoryModels.delete('hints',features,hints,dicts)
+                        if args.snow:
+                            pass
+                        else:
+                            model.delete('hints',features,hints)
+
+                    logger.info('Done. %s seconds needed.',round(time()-startTime,2))
+                    # Output        
+                    predictionNames = [str(dicts.idNameDict[p]) for p in predictions[:args.numberOfPredictions]]
+                    predictionValues = [str(x) for x in predictionValues[:args.numberOfPredictions]]
+                    predictionsStringList = ['%s=%s' % (predictionNames[i],predictionValues[i]) for i in range(len(predictionNames))]
+                    predictionsString = string.join(predictionsStringList,' ')
+                    outString = '%s: %s' % (name,predictionsString)
+                    OS.write('%s\n' % outString)
+                else:
+                    logger.warning('Unspecified input format: \n%s',line)
+                    sys.exit(-1)
+                lineCounter += 1
+            """
+            except:
+                logger.warning('An error occurred on line %s .',line)
+                lineCounter += 1
+                continue
+            """
+        OS.close()
+        IS.close()
+
+        # Statistics
+        if args.statistics:
+            if args.learnTheories:
+                theoryStats.printAvg()
+            stats.printAvg()
+
+        # Save
+        if args.saveModel:
+            model.save(args.modelFile)
+            if args.learnTheories:
+                theoryModels.save(args.theoryFile)
+        dicts.save(args.dictsFile)
+        if not args.saveStats == None:
+            if args.learnTheories:
+                theoryStatsFile = os.path.join(args.outputDir,'theoryStats')
+                theoryStats.save(theoryStatsFile)
+            statsFile = os.path.join(args.outputDir,args.saveStats)
+            stats.save(statsFile)
+    return 0
+
+if __name__ == '__main__':
+    # Cezary Auth  
+    args = ['--statistics', '--init', '--inputDir', '../data/20130118/Jinja', '--log', '../tmp/auth.log', '--theoryFile', '../tmp/t0', '--modelFile', '../tmp/m0', '--dictsFile', '../tmp/d0','--NBDefaultPriorWeight', '20.0', '--NBDefVal', '-15.0', '--NBPosWeight', '10.0']
+    mash(args)
+    args = ['-i', '../data/20130118/Jinja/mash_commands', '-p', '../tmp/auth.pred0', '--statistics', '--cutOff', '500', '--log', '../tmp/auth.log','--modelFile', '../tmp/m0', '--dictsFile', '../tmp/d0']
+    mash(args)   
+
+    #sys.exit(mash(args))
+    sys.exit(mash())
diff -r 186535065f5c -r ae863ed9f64f src/HOL/Tools/Sledgehammer/MaSh/src/mashTest.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/mashTest.py	Tue Aug 20 22:24:02 2013 +0200
@@ -0,0 +1,12 @@
+'''
+Created on Aug 20, 2013
+
+@author: daniel
+'''
+from mash import mash
+
+if __name__ == "__main__":
+    args = ['--statistics', '--init', '--inputDir', '../data/20130118/Jinja', '--log', '../tmp/auth.log', '--modelFile', '../tmp/m0', '--dictsFile', '../tmp/d0','--NBDefaultPriorWeight', '20.0', '--NBDefVal', '-15.0', '--NBPosWeight', '10.0']
+    mash(args)
+    args = ['-i', '../data/20130118/Jinja/mash_commands', '-p', '../tmp/auth.pred0', '--statistics', '--cutOff', '500', '--log', '../tmp/auth.log','--modelFile', '../tmp/m0', '--dictsFile', '../tmp/d0']
+    mash(args) 
\ No newline at end of file
diff -r 186535065f5c -r ae863ed9f64f src/HOL/Tools/Sledgehammer/MaSh/src/parameters.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/parameters.py	Tue Aug 20 22:24:02 2013 +0200
@@ -0,0 +1,46 @@
+import datetime
+from argparse import ArgumentParser,RawDescriptionHelpFormatter
+
+def init_parser(argv):
+    # Set up command-line parser
+    parser = ArgumentParser(description='MaSh - Machine Learning for Sledgehammer.  \n\n\
+    MaSh allows to use different machine learning algorithms to predict relevant facts for Sledgehammer.\n\n\
+    --------------- Example Usage ---------------\n\
+    First initialize:\n./mash.py -l test.log -o ../tmp/ --init --inputDir ../data/Jinja/ \n\
+    Then create predictions:\n./mash.py -i ../data/Jinja/mash_commands -p ../data/Jinja/mash_suggestions -l test.log -o ../tmp/ --statistics\n\
+    \n\n\
+    Author: Daniel Kuehlwein, July 2012',formatter_class=RawDescriptionHelpFormatter)
+    parser.add_argument('-i','--inputFile',help='File containing all problems to be solved.')
+    parser.add_argument('-o','--outputDir', default='../tmp/',help='Directory where all created files are stored. Default=../tmp/.')
+    parser.add_argument('-p','--predictions',default='../tmp/%s.predictions' % datetime.datetime.now(),
+                        help='File where the predictions stored. Default=../tmp/dateTime.predictions.')
+    parser.add_argument('--numberOfPredictions',default=500,help="Default number of premises to write in the output. Default=500.",type=int)
+    
+    parser.add_argument('--init',default=False,action='store_true',help="Initialize Mash. Requires --inputDir to be defined. Default=False.")
+    parser.add_argument('--inputDir',\
+                        help='Directory containing all the input data. MaSh expects the following files: mash_features,mash_dependencies,mash_accessibility')
+    parser.add_argument('--depFile', default='mash_dependencies',
+                        help='Name of the file with the premise dependencies. The file must be in inputDir. Default = mash_dependencies')
+    
+    parser.add_argument('--algorithm',default='nb',action='store_true',help="Which learning algorithm is used. nb = Naive Bayes,predef=predefined. Default=nb.")
+    # NB Parameters
+    parser.add_argument('--NBDefaultPriorWeight',default=20.0,help="Initializes classifiers with value * p |- p. Default=20.0.",type=float)
+    parser.add_argument('--NBDefVal',default=-15.0,help="Default value for unknown features. Default=-15.0.",type=float)
+    parser.add_argument('--NBPosWeight',default=10.0,help="Weight value for positive features. Default=10.0.",type=float)
+    # TODO: Rename to sineFeatures
+    parser.add_argument('--sineFeatures',default=False,action='store_true',help="Uses a SInE like prior for premise lvl predictions. Default=False.")
+    parser.add_argument('--sineWeight',default=0.5,help="How much the SInE prior is weighted. Default=0.5.",type=float)
+    
+    parser.add_argument('--statistics',default=False,action='store_true',help="Create and show statistics for the top CUTOFF predictions.\
+                        WARNING: This will make the program a lot slower! Default=False.")
+    parser.add_argument('--saveStats',default=None,help="If defined, stores the statistics in the filename provided.")
+    parser.add_argument('--cutOff',default=500,help="Option for statistics. Only consider the first cutOff predictions. Default=500.",type=int)
+    parser.add_argument('-l','--log', default='../tmp/%s.log' % datetime.datetime.now(), help='Log file name. Default=../tmp/dateTime.log')
+    parser.add_argument('-q','--quiet',default=False,action='store_true',help="If enabled, only print warnings. Default=False.")
+    parser.add_argument('--modelFile', default='../tmp/model.pickle', help='Model file name. Default=../tmp/model.pickle')
+    parser.add_argument('--dictsFile', default='../tmp/dict.pickle', help='Dict file name. Default=../tmp/dict.pickle')
+    
+    parser.add_argument('--port', default='9255', help='Port of the Mash server. Default=9255',type=int)
+    parser.add_argument('--host', default='localhost', help='Host of the Mash server. Default=localhost')
+    args = parser.parse_args(argv)    
+    return args
diff -r 186535065f5c -r ae863ed9f64f src/HOL/Tools/Sledgehammer/MaSh/src/predefined.py
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/predefined.py	Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/predefined.py	Tue Aug 20 22:24:02 2013 +0200
@@ -35,7 +35,11 @@
             name = line[0].strip()
             predId = dicts.get_name_id(name)
             line = line[1].split()
-            preds = [dicts.get_name_id(x.strip())for x in line]
+            predsTmp = []
+            for x in line:
+                x = x.split('=')
+                predsTmp.append(x[0])
+            preds = [dicts.get_name_id(x.strip())for x in predsTmp]
             self.predictions[predId] = preds
         IS.close()        
 
diff -r 186535065f5c -r ae863ed9f64f src/HOL/Tools/Sledgehammer/MaSh/src/server.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/server.py	Tue Aug 20 22:24:02 2013 +0200
@@ -0,0 +1,170 @@
+#!/usr/bin/env python
+#     Title:      HOL/Tools/Sledgehammer/MaSh/src/server.py
+#     Author:     Daniel Kuehlwein, ICIS, Radboud University Nijmegen
+#     Copyright   2013
+#
+# The MaSh Server.
+
+import SocketServer,os,string,logging
+from multiprocessing import Manager
+from time import time
+from dictionaries import Dictionaries
+from parameters import init_parser
+from sparseNaiveBayes import sparseNBClassifier
+from stats import Statistics
+
+
+class ThreadingTCPServer(SocketServer.ThreadingTCPServer): 
+    def __init__(self, *args, **kwargs):
+        SocketServer.ThreadingTCPServer.__init__(self,*args, **kwargs)
+        self.manager = Manager()
+        self.lock = Manager().Lock()
+
+class MaShHandler(SocketServer.BaseRequestHandler):
+
+    def init(self,argv):
+        if argv == '':
+            self.server.args = init_parser([])
+        else:
+            argv = argv.split(';')
+            self.server.args = init_parser(argv)
+        # Pick model
+        if self.server.args.algorithm == 'nb':
+            self.server.model = sparseNBClassifier(self.server.args.NBDefaultPriorWeight,self.server.args.NBPosWeight,self.server.args.NBDefVal)
+        else: # Default case
+            self.server.model = sparseNBClassifier(self.server.args.NBDefaultPriorWeight,self.server.args.NBPosWeight,self.server.args.NBDefVal)
+        # Load all data
+        # TODO: rewrite dicts for concurrency and without sine
+        self.server.dicts = Dictionaries()
+        if os.path.isfile(self.server.args.dictsFile):
+            self.server.dicts.load(self.server.args.dictsFile)            
+        elif self.server.args.init:
+            self.server.dicts.init_all(self.server.args)
+        # Create Model
+        if os.path.isfile(self.server.args.modelFile):
+            self.server.model.load(self.server.args.modelFile)          
+        elif self.server.args.init:
+            trainData = self.server.dicts.featureDict.keys()
+            self.server.model.initializeModel(trainData,self.server.dicts)
+            
+        if self.server.args.statistics:
+            self.server.stats = Statistics(self.server.args.cutOff)
+            self.server.statementCounter = 1
+            self.server.computeStats = False
+
+        # Set up logging
+        logging.basicConfig(level=logging.DEBUG,
+                            format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
+                            datefmt='%d-%m %H:%M:%S',
+                            filename=self.server.args.log+'server',
+                            filemode='w')    
+        self.server.logger = logging.getLogger('server')
+        self.server.logger.debug('Initialized in '+str(round(time()-self.startTime,2))+' seconds.')
+        self.request.sendall('Server initialized in '+str(round(time()-self.startTime,2))+' seconds.')
+        self.server.callCounter = 1
+
+    def update(self):
+        problemId = self.server.dicts.parse_fact(self.data)    
+        # Statistics
+        if self.server.args.statistics and self.server.computeStats:
+            self.server.computeStats = False
+            # Assume '!' comes after '?'
+            if self.server.args.algorithm == 'predef':
+                self.server.predictions = self.server.model.predict(problemId)
+            self.server.stats.update(self.server.predictions,self.server.dicts.dependenciesDict[problemId],self.server.statementCounter)
+            if not self.server.stats.badPreds == []:
+                bp = string.join([str(self.server.dicts.idNameDict[x]) for x in self.server.stats.badPreds], ',')
+                self.server.logger.debug('Poor predictions: %s',bp)
+            self.server.statementCounter += 1
+
+        # Update Dependencies, p proves p
+        self.server.dicts.dependenciesDict[problemId] = [problemId]+self.server.dicts.dependenciesDict[problemId]
+        self.server.model.update(problemId,self.server.dicts.featureDict[problemId],self.server.dicts.dependenciesDict[problemId])
+
+    def overwrite(self):
+        # Overwrite old proof.
+        problemId,newDependencies = self.server.dicts.parse_overwrite(self.data)
+        newDependencies = [problemId]+newDependencies
+        self.server.model.overwrite(problemId,newDependencies,self.server.dicts)
+        self.server.dicts.dependenciesDict[problemId] = newDependencies
+        
+    def predict(self):
+        self.server.computeStats = True
+        if self.server.args.algorithm == 'predef':
+            return
+        name,features,accessibles,hints,numberOfPredictions = self.server.dicts.parse_problem(self.data)  
+        if numberOfPredictions == None:
+            numberOfPredictions = self.server.args.numberOfPredictions
+        if not hints == []:
+            self.server.model.update('hints',features,hints)
+        
+        # Create predictions
+        self.server.logger.debug('Starting computation for line %s',self.server.callCounter)
+        predictionsFeatures = features                    
+        self.server.predictions,predictionValues = self.server.model.predict(predictionsFeatures,accessibles,self.server.dicts)
+        assert len(self.server.predictions) == len(predictionValues)
+        self.server.logger.debug('Time needed: '+str(round(time()-self.startTime,2)))
+
+        # Output        
+        predictionNames = [str(self.server.dicts.idNameDict[p]) for p in self.server.predictions[:numberOfPredictions]]
+        predictionValues = [str(x) for x in predictionValues[:self.server.args.numberOfPredictions]]
+        predictionsStringList = ['%s=%s' % (predictionNames[i],predictionValues[i]) for i in range(len(predictionNames))]
+        predictionsString = string.join(predictionsStringList,' ')
+        outString = '%s: %s' % (name,predictionsString)
+        self.request.sendall(outString)
+    
+    def shutdown(self):
+        self.request.sendall('Shutting down server.')
+        # Save Models
+        self.server.model.save(self.server.args.modelFile)
+        self.server.dicts.save(self.server.args.dictsFile)
+        if not self.server.args.saveStats == None:
+            statsFile = os.path.join(self.server.args.outputDir,self.server.args.saveStats)
+            self.server.stats.save(statsFile)
+        self.server.shutdown()
+    
+    def handle(self):
+        # self.request is the TCP socket connected to the client
+        self.data = self.request.recv(262144).strip()
+        #print "{} wrote:".format(self.client_address[0])
+        #print self.data
+        self.startTime = time()
+        if self.data == 's':
+            self.shutdown()            
+        elif self.data.startswith('i'):            
+            self.init(self.data[2:])
+        elif self.data.startswith('!'):
+            self.update()
+        elif self.data.startswith('p'):
+            self.overwrite()
+        elif self.data.startswith('?'):               
+            self.predict()
+        elif self.data == '':
+            # Empty Socket
+            return
+        elif self.data == 'avgStats':
+            self.request.sendall(self.server.stats.printAvg())            
+        else:
+            self.request.sendall('Unspecified input format: \n%s',self.data)
+        self.server.callCounter += 1
+
+        #returnString = 'Time needed: '+str(round(time()-self.startTime,2))
+        #print returnString
+
+if __name__ == "__main__":
+    HOST, PORT = "localhost", 9255
+    #print 'Started Server'
+    # Create the server, binding to localhost on port 9999
+    SocketServer.TCPServer.allow_reuse_address = True
+    server = ThreadingTCPServer((HOST, PORT), MaShHandler)
+    #server = SocketServer.TCPServer((HOST, PORT), MaShHandler)
+
+    # Activate the server; this will keep running until you
+    # interrupt the program with Ctrl-C
+    server.serve_forever()        
+
+
+
+    
+    
+    
\ No newline at end of file
diff -r 186535065f5c -r ae863ed9f64f src/HOL/Tools/Sledgehammer/MaSh/src/snow.py
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/snow.py	Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/snow.py	Tue Aug 20 22:24:02 2013 +0200
@@ -27,7 +27,7 @@
         self.SNoWTrainFile = '../tmp/snow.train'
         self.SNoWTestFile = '../snow.test'
         self.SNoWNetFile = '../tmp/snow.net'
-        self.defMaxNameId = 20000
+        self.defMaxNameId = 100000
 
     def initializeModel(self,trainData,dicts):
         """
@@ -51,6 +51,7 @@
         # Build Model
         self.logger.debug('Building Model START.')
         snowTrainCommand = '../bin/snow -train -M+ -I %s -F %s -g- -B :0-%s' % (self.SNoWTrainFile,self.SNoWNetFile,dicts.maxNameId-1)
+        #print snowTrainCommand
         #snowTrainCommand = '../bin/snow -train -M+ -I %s -F %s -g- -B :0-%s' % (self.SNoWTrainFile,self.SNoWNetFile,self.defMaxNameId-1)
         args = shlex.split(snowTrainCommand)
         p = subprocess.Popen(args,stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
diff -r 186535065f5c -r ae863ed9f64f src/HOL/Tools/Sledgehammer/MaSh/src/sparseNaiveBayes.py
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/sparseNaiveBayes.py	Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/sparseNaiveBayes.py	Tue Aug 20 22:24:02 2013 +0200
@@ -95,6 +95,7 @@
         For each accessible, predicts the probability of it being useful given the features.
         Returns a ranking of the accessibles.
         """
+        tau = 0.01 # Jasmin, change value here
         predictions = []
         for a in accessibles:
             posA = self.counts[a][0]
@@ -112,6 +113,11 @@
                         resultA += w*log(float(self.posWeight*fWeightsA[f])/posA)
                 else:
                     resultA += w*self.defVal
+            if not tau == 0.0:
+                observedFeatures = [f for f,_w in features]
+                missingFeatures = list(fA.difference(observedFeatures))
+                sumOfWeights = sum([log(float(fWeightsA[x])/posA) for x in missingFeatures])
+                resultA -= tau * sumOfWeights
             predictions.append(resultA)
         predictions = array(predictions)
         perm = (-predictions).argsort()
diff -r 186535065f5c -r ae863ed9f64f src/HOL/Tools/Sledgehammer/MaSh/src/spawnDaemon.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/spawnDaemon.py	Tue Aug 20 22:24:02 2013 +0200
@@ -0,0 +1,35 @@
+# http://stackoverflow.com/questions/972362/spawning-process-from-python/972383#972383
+import os
+
+def spawnDaemon(path_to_executable, *args):
+    """Spawn a completely detached subprocess (i.e., a daemon).
+
+    E.g. for mark:
+    spawnDaemon("../bin/producenotify.py", "producenotify.py", "xx")
+    """
+    # fork the first time (to make a non-session-leader child process)
+    try:
+        pid = os.fork()
+    except OSError, e:
+        raise RuntimeError("1st fork failed: %s [%d]" % (e.strerror, e.errno))
+    if pid != 0:
+        # parent (calling) process is all done
+        return
+
+    # detach from controlling terminal (to make child a session-leader)
+    os.setsid()
+    try:
+        pid = os.fork()
+    except OSError, e:
+        raise RuntimeError("2nd fork failed: %s [%d]" % (e.strerror, e.errno))
+        raise Exception, "%s [%d]" % (e.strerror, e.errno)
+    if pid != 0:
+        # child process is all done
+        os._exit(0)
+
+    # and finally let's execute the executable for the daemon!
+    try:
+        os.execv(path_to_executable, [path_to_executable])
+    except Exception, e:
+        # oops, we're cut off from the world, let's just give up
+        os._exit(255)
diff -r 186535065f5c -r ae863ed9f64f src/HOL/Tools/Sledgehammer/MaSh/src/stats.py
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/stats.py	Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/stats.py	Tue Aug 20 22:24:02 2013 +0200
@@ -30,6 +30,7 @@
         self.problems = 0.0
         self.cutOff = cutOff
         self.recallData = [0]*cutOff
+        self.recall100Median = []
         self.recall100Data = [0]*cutOff
         self.aucData = []
         self.premiseOccurenceCounter = {}
@@ -107,6 +108,7 @@
         self.aucData.append(auc)
         self.avgAUC += auc
         self.avgRecall100 += recall100
+        self.recall100Median.append(recall100)
         self.problems += 1
         self.badPreds = badPreds
         self.avgAvailable += available
@@ -116,8 +118,29 @@
 
     def printAvg(self):
         self.logger.info('Average results:')
-        self.logger.info('avgAUC: %s \t avgDepNr: %s \t avgRecall100: %s \t cutOff:%s', \
-                         round(100*self.avgAUC/self.problems,2),round(self.avgDepNr/self.problems,2),round(self.avgRecall100/self.problems,2),self.cutOff)
+        #self.logger.info('avgAUC: %s \t avgDepNr: %s \t avgRecall100: %s \t cutOff:%s', \
+        #                 round(100*self.avgAUC/self.problems,2),round(self.avgDepNr/self.problems,2),round(self.avgRecall100/self.problems,2),self.cutOff)
+        # HACK FOR PAPER
+        assert len(self.aucData) == len(self.recall100Median)
+        nrDataPoints = len(self.aucData)
+        if nrDataPoints % 2 == 1:
+            medianAUC = sorted(self.aucData)[nrDataPoints/2 + 1]
+        else:
+            medianAUC = float(sorted(self.aucData)[nrDataPoints/2] + sorted(self.aucData)[nrDataPoints/2 + 1])/2
+        #nrDataPoints = len(self.recall100Median)
+        if nrDataPoints % 2 == 1:
+            medianrecall100 = sorted(self.recall100Median)[nrDataPoints/2 + 1]
+        else:
+            medianrecall100 = float(sorted(self.recall100Median)[nrDataPoints/2] + sorted(self.recall100Median)[nrDataPoints/2 + 1])/2
+
+        returnString = 'avgAUC: %s \t medianAUC: %s \t avgDepNr: %s \t avgRecall100: %s \t medianRecall100: %s \t cutOff:%s' %\
+                         (round(100*self.avgAUC/self.problems,2),round(100*medianAUC,2),round(self.avgDepNr/self.problems,2),round(self.avgRecall100/self.problems,2),round(medianrecall100,2),self.cutOff)
+        self.logger.info(returnString)
+        return returnString
+    
+        """
+        self.logger.info('avgAUC: %s \t medianAUC: %s \t avgDepNr: %s \t avgRecall100: %s \t medianRecall100: %s \t cutOff:%s', \
+                         round(100*self.avgAUC/self.problems,2),round(100*medianAUC,2),round(self.avgDepNr/self.problems,2),round(self.avgRecall100/self.problems,2),round(medianrecall100,2),self.cutOff)
 
         #try:
         #if True:
@@ -156,6 +179,7 @@
             show()
         #except:
         #    self.logger.warning('Matplotlib module missing. Skipping graphs.')
+        """
 
     def save(self,fileName):
         oStream = open(fileName, 'wb')
diff -r 186535065f5c -r ae863ed9f64f src/HOL/Tools/Sledgehammer/MaSh/src/tester.py
--- a/src/HOL/Tools/Sledgehammer/MaSh/src/tester.py	Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/MaSh/src/tester.py	Tue Aug 20 22:24:02 2013 +0200
@@ -16,29 +16,39 @@
         #print '%s says that %s%s = %s' % (current_process().name, func.__name__, args, result)
         outQueue.put(result)
 
-def run_mash(runId,inputDir,logFile,predictionFile,\
+def run_mash(runId,inputDir,logFile,predictionFile,predef,\
              learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
              NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
-             sineFeatures,sineWeight):
+             sineFeatures,sineWeight,quiet=True):
     # Init
     runId = str(runId)
     predictionFile = predictionFile + runId
-    args = ['--statistics','--init','--inputDir',inputDir,'-q','--log',logFile,'--theoryFile','../tmp/t'+runId,'--modelFile','../tmp/m'+runId,'--dictsFile','../tmp/d'+runId,
+    args = ['--statistics','--init','--inputDir',inputDir,'--log',logFile,'--theoryFile','../tmp/t'+runId,'--modelFile','../tmp/m'+runId,'--dictsFile','../tmp/d'+runId,
             '--theoryDefValPos',str(theoryDefValPos),'--theoryDefValNeg',str(theoryDefValNeg),'--theoryPosWeight',str(theoryPosWeight),\
             '--NBDefaultPriorWeight',str(NBDefaultPriorWeight),'--NBDefVal',str(NBDefVal),'--NBPosWeight',str(NBPosWeight)]    
     if learnTheories:
-        args = args + ['--learnTheories']    
+        args += ['--learnTheories']    
     if sineFeatures:
         args += ['--sineFeatures','--sineWeight',str(sineWeight)]
+    if not predef == '':
+        args += ['--predef',predef]
+    if quit:
+        args += ['-q']
+    #print args
     mash(args)
     # Run
-    args = ['-q','-i',inputFile,'-p',predictionFile,'--statistics','--cutOff','500','--log',logFile,'--theoryFile','../tmp/t'+runId,'--modelFile','../tmp/m'+runId,'--dictsFile','../tmp/d'+runId,\
+    args = ['-i',inputFile,'-p',predictionFile,'--statistics','--cutOff','1024','--log',logFile,'--theoryFile','../tmp/t'+runId,'--modelFile','../tmp/m'+runId,'--dictsFile','../tmp/d'+runId,\
             '--theoryDefValPos',str(theoryDefValPos),'--theoryDefValNeg',str(theoryDefValNeg),'--theoryPosWeight',str(theoryPosWeight),\
             '--NBDefaultPriorWeight',str(NBDefaultPriorWeight),'--NBDefVal',str(NBDefVal),'--NBPosWeight',str(NBPosWeight)]
     if learnTheories:
-        args = args + ['--learnTheories']    
+        args += ['--learnTheories']    
     if sineFeatures:
         args += ['--sineFeatures','--sineWeight',str(sineWeight)]
+    if not predef == '':
+        args += ['--predef',predef]
+    if quit:
+        args += ['-q']
+    #print args
     mash(args)
 
     # Get Results
@@ -46,29 +56,42 @@
     lines =  IS.readlines()
     tmpRes = lines[-1].split()
     avgAuc = tmpRes[5]
-    avgRecall100 = tmpRes[9]
+    medianAuc = tmpRes[7]
+    avgRecall100 = tmpRes[11]
+    medianRecall100 = tmpRes[13]
     tmpTheoryRes = lines[-3].split()
-    avgTheoryPrecision = tmpTheoryRes[5] 
-    avgTheoryRecall100 = tmpTheoryRes[7]
-    avgTheoryRecall = tmpTheoryRes[9]
-    avgTheoryPredictedPercent = tmpTheoryRes[11]
+    if learnTheories:
+        avgTheoryPrecision = tmpTheoryRes[5] 
+        avgTheoryRecall100 = tmpTheoryRes[7]
+        avgTheoryRecall = tmpTheoryRes[9]
+        avgTheoryPredictedPercent = tmpTheoryRes[11]
+    else:
+        avgTheoryPrecision = 'NA' 
+        avgTheoryRecall100 = 'NA'
+        avgTheoryRecall = 'NA'
+        avgTheoryPredictedPercent = 'NA'    
     IS.close()
     
     # Delete old models
     os.remove(logFile)
     os.remove(predictionFile)
-    os.remove('../tmp/t'+runId)
+    if learnTheories:
+        os.remove('../tmp/t'+runId)
     os.remove('../tmp/m'+runId)
     os.remove('../tmp/d'+runId)
     
     outFile = open('tester','a')
     #print 'avgAuc %s avgRecall100 %s avgTheoryPrecision %s avgTheoryRecall100 %s avgTheoryRecall %s avgTheoryPredictedPercent %s'
-    outFile.write('\t'.join([str(learnTheories),str(theoryDefValPos),str(theoryDefValNeg),str(theoryPosWeight),str(NBDefaultPriorWeight),str(NBDefVal),str(NBPosWeight),str(sineFeatures),str(sineWeight),str(avgAuc),str(avgRecall100),str(avgTheoryPrecision),str(avgTheoryRecall100),str(avgTheoryRecall),str(avgTheoryPredictedPercent)])+'\n')
+    outFile.write('\t'.join([str(learnTheories),str(theoryDefValPos),str(theoryDefValNeg),str(theoryPosWeight),\
+                             str(NBDefaultPriorWeight),str(NBDefVal),str(NBPosWeight),str(sineFeatures),str(sineWeight),\
+                             str(avgAuc),str(medianAuc),str(avgRecall100),str(medianRecall100),str(avgTheoryPrecision),\
+                             str(avgTheoryRecall100),str(avgTheoryRecall),str(avgTheoryPredictedPercent)])+'\n')
     outFile.close()
     print learnTheories,'\t',theoryDefValPos,'\t',theoryDefValNeg,'\t',theoryPosWeight,'\t',\
         NBDefaultPriorWeight,'\t',NBDefVal,'\t',NBPosWeight,'\t',\
         sineFeatures,'\t',sineWeight,'\t',\
-        avgAuc,'\t',avgRecall100,'\t',avgTheoryPrecision,'\t',avgTheoryRecall100,'\t',avgTheoryRecall,'\t',avgTheoryPredictedPercent    
+        avgAuc,'\t',medianAuc,'\t',avgRecall100,'\t',medianRecall100,'\t',\
+        avgTheoryPrecision,'\t',avgTheoryRecall100,'\t',avgTheoryRecall,'\t',avgTheoryPredictedPercent    
     return learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
              NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
              sineFeatures,sineWeight,\
@@ -93,8 +116,9 @@
     #cores = 1
     # Options
     depFile = 'mash_dependencies'
+    predef = ''
     outputDir = '../tmp/'
-    numberOfPredictions = 500
+    numberOfPredictions = 1024
     
     learnTheoriesRange = [True,False]
     theoryDefValPosRange = [-x for x in range(1,20)]
@@ -107,6 +131,7 @@
     sineFeaturesRange = [True,False]    
     sineWeightRange = [0.1,0.25,0.5,0.75,1.0]
     
+    """
     # Test 1
     inputFile = '../data/20121227b/Auth/mash_commands'
     inputDir = '../data/20121227b/Auth/'
@@ -179,4 +204,78 @@
                        NBDefaultPriorWeight,NBDefVal,NBPosWeight,sineFeatures,sineWeight,\
                        learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight)              
         print 'bestAvgRecall100 %s bestNBDefaultPriorWeight %s bestNBDefVal %s bestNBPosWeight %s bestSineFeatures %s bestSineWeight %s',bestAvgRecall100,bestNBDefaultPriorWeight,bestNBDefVal,bestNBPosWeight,bestSineFeatures,bestSineWeight
-    
\ No newline at end of file
+    
+    """
+    # Test 2
+    #inputDir = '../data/20130118/Jinja'
+    inputDir = '../data/notheory/Prob'
+    inputFile = inputDir+'/mash_commands'
+    #inputFile = inputDir+'/mash_prover_commands'
+    
+    #depFile = 'mash_prover_dependencies'
+    depFile = 'mash_dependencies'    
+    outputDir = '../tmp/'
+    numberOfPredictions = 1024
+    predictionFile = '../tmp/auth.pred'
+    logFile = '../tmp/auth.log'
+    learnTheories = False
+    theoryDefValPos = -7.5
+    theoryDefValNeg = -10.0
+    theoryPosWeight = 2.0
+    NBDefaultPriorWeight = 20.0
+    NBDefVal =- 15.0
+    NBPosWeight = 10.0
+    sineFeatures = False
+    sineWeight =  0.5    
+    quiet = False
+    print inputDir
+    
+    #predef = inputDir+'/mash_prover_suggestions'
+    predef = inputDir+'/mash_suggestions'    
+    print 'Mash Isar'
+    run_mash(0,inputDir,logFile,predictionFile,predef,\
+             learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
+             NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
+             sineFeatures,sineWeight,quiet=quiet)
+
+    #"""
+    predef = inputDir+'/mesh_suggestions'
+    #predef = inputDir+'/mesh_prover_suggestions'
+    print 'Mesh Isar'
+    run_mash(0,inputDir,logFile,predictionFile,predef,\
+             learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
+             NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
+             sineFeatures,sineWeight,quiet=quiet)
+    #"""
+    predef = inputDir+'/mepo_suggestions'
+    print 'Mepo Isar'
+    run_mash(0,inputDir,logFile,predictionFile,predef,\
+             learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
+             NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
+             sineFeatures,sineWeight,quiet=quiet)    
+    
+    """
+    inputFile = inputDir+'/mash_prover_commands'
+    depFile = 'mash_prover_dependencies'
+    
+    predef = inputDir+'/mash_prover_suggestions'    
+    print 'Mash Prover Isar'
+    run_mash(0,inputDir,logFile,predictionFile,predef,\
+             learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
+             NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
+             sineFeatures,sineWeight,quiet=quiet)
+
+    predef = inputDir+'/mesh_prover_suggestions'
+    print 'Mesh Prover Isar'
+    run_mash(0,inputDir,logFile,predictionFile,predef,\
+             learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
+             NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
+             sineFeatures,sineWeight,quiet=quiet)
+
+    predef = inputDir+'/mepo_suggestions'
+    print 'Mepo Prover Isar'
+    run_mash(0,inputDir,logFile,predictionFile,predef,\
+             learnTheories,theoryDefValPos,theoryDefValNeg,theoryPosWeight,\
+             NBDefaultPriorWeight,NBDefVal,NBPosWeight,\
+             sineFeatures,sineWeight,quiet=quiet)
+    #"""
\ No newline at end of file
diff -r 186535065f5c -r ae863ed9f64f src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML
--- a/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML	Tue Aug 20 11:39:53 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML	Tue Aug 20 22:24:02 2013 +0200
@@ -43,8 +43,10 @@
     val relearn :
       Proof.context -> bool -> (string * string list) list -> unit
     val query :
-      Proof.context -> bool -> bool -> int
-      -> string list * (string * real) list * string list -> string list
+      Proof.context -> bool -> int
+      -> (string * string list * (string * real) list * string list) list
+         * string list * string list * (string * real) list
+      -> string list
   end
 
   val mash_unlearn : Proof.context -> unit
@@ -69,6 +71,8 @@
     Proof.context -> params -> string -> int -> raw_fact list
     -> string Symtab.table * string Symtab.table -> thm
     -> bool * string list
+  val attach_parents_to_facts :
+    ('a * thm) list -> ('a * thm) list -> (string list * ('a * thm)) list
   val weight_mepo_facts : 'a list -> ('a * real) list
   val weight_mash_facts : 'a list -> ('a * real) list
   val find_mash_suggestions :
@@ -80,8 +84,6 @@
   val mash_learn_proof :
     Proof.context -> params -> string -> term -> ('a * thm) list -> thm list
     -> unit
-  val attach_parents_to_facts :
-    ('a * thm) list -> ('a * thm) list -> (string list * ('a * thm)) list
   val mash_learn :
     Proof.context -> params -> fact_override -> thm list -> bool -> unit
   val is_mash_enabled : unit -> bool
@@ -140,11 +142,10 @@
 
 fun write_file banner (xs, f) path =
   (case banner of SOME s => File.write path s | NONE => ();
-   xs |> chunk_list 500
-      |> List.app (File.append path o space_implode "" o map f))
+   xs |> chunk_list 500 |> List.app (File.append path o implode o map f))
   handle IO.Io _ => ()
 
-fun run_mash_tool ctxt overlord save max_suggs write_cmds read_suggs =
+fun run_mash_tool ctxt overlord write_cmds read_suggs =
   let
     val (temp_dir, serial) =
       if overlord then (getenv "ISABELLE_HOME_USER", "")
@@ -156,17 +157,13 @@
     val cmd_file = temp_dir ^ "/mash_commands" ^ serial
     val cmd_path = Path.explode cmd_file
     val model_dir = File.shell_path (mash_model_dir ())
-    val core =
-      "--inputFile " ^ cmd_file ^ " --predictions " ^ sugg_file ^
-      " --numberOfPredictions " ^ string_of_int max_suggs ^
-      (* " --learnTheories" ^ *) (if save then " --saveModel" else "")
+    val core = "--inputFile " ^ cmd_file ^ " --predictions " ^ sugg_file
     val command =
       "cd \"$ISABELLE_SLEDGEHAMMER_MASH\"/src; " ^
       "./mash.py --quiet" ^
       " --outputDir " ^ model_dir ^
       " --modelFile=" ^ model_dir ^ "/model.pickle" ^
       " --dictsFile=" ^ model_dir ^ "/dict.pickle" ^
-      " --theoryFile=" ^ model_dir ^ "/theory.pickle" ^
       " --log " ^ log_file ^ " " ^ core ^
       " >& " ^ err_file
     fun run_on () =
@@ -226,15 +223,13 @@
 fun str_of_relearn (name, deps) =
   "p " ^ encode_str name ^ ": " ^ encode_strs deps ^ "\n"
 
-fun str_of_query learn_hints (parents, feats, hints) =
-  (if not learn_hints orelse null hints then ""
-   else str_of_learn (freshish_name (), parents, feats, hints)) ^
-  "? " ^ encode_strs parents ^ "; " ^ encode_features feats ^
-  (if learn_hints orelse null hints then "" else "; " ^ encode_strs hints) ^
-  "\n"
+fun str_of_query max_suggs (learns, hints, parents, feats) =
+  implode (map str_of_learn learns) ^
+  "? " ^ string_of_int max_suggs ^ " # " ^ encode_strs parents ^ "; " ^
+  encode_features feats ^
+  (if null hints then "" else "; " ^ encode_strs hints) ^ "\n"
 
-(* The weights currently returned by "mash.py" are too spaced out to make any
-   sense. *)
+(* The suggested weights don't make much sense. *)
 fun extract_suggestion sugg =
   case space_explode "=" sugg of
     [name, _ (* weight *)] =>
@@ -264,18 +259,17 @@
   | learn ctxt overlord learns =
     (trace_msg ctxt (fn () => "MaSh learn " ^
          elide_string 1000 (space_implode " " (map #1 learns)));
-     run_mash_tool ctxt overlord true 0 (learns, str_of_learn) (K ()))
+     run_mash_tool ctxt overlord (learns, str_of_learn) (K ()))
 
 fun relearn _ _ [] = ()
   | relearn ctxt overlord relearns =
     (trace_msg ctxt (fn () => "MaSh relearn " ^
          elide_string 1000 (space_implode " " (map #1 relearns)));
-     run_mash_tool ctxt overlord true 0 (relearns, str_of_relearn) (K ()))
+     run_mash_tool ctxt overlord (relearns, str_of_relearn) (K ()))
 
-fun query ctxt overlord learn_hints max_suggs (query as (_, feats, hints)) =
+fun query ctxt overlord max_suggs (query as (learns, hints, _, feats)) =
   (trace_msg ctxt (fn () => "MaSh query " ^ encode_features feats);
-   run_mash_tool ctxt overlord (learn_hints andalso not (null hints))
-       max_suggs ([query], str_of_query learn_hints)
+   run_mash_tool ctxt overlord ([query], str_of_query max_suggs)
        (fn suggs =>
            case suggs () of
              [] => []
@@ -329,15 +323,18 @@
   string_of_int (length (Graph.minimals G)) ^ " minimal, " ^
   string_of_int (length (Graph.maximals G)) ^ " maximal"
 
-type mash_state = {access_G : unit Graph.T, dirty : string list option}
+type mash_state =
+  {access_G : unit Graph.T,
+   num_known_facts : int,
+   dirty : string list option}
 
-val empty_state = {access_G = Graph.empty, dirty = SOME []}
+val empty_state = {access_G = Graph.empty, num_known_facts = 0, dirty = SOME []}
 
 local
 
-val version = "*** MaSh version 20130819a ***"
+val version = "*** MaSh version 20130820 ***"
 
-exception Too_New of unit
+exception FILE_VERSION_TOO_NEW of unit
 
 fun extract_node line =
   case space_explode ":" line of
@@ -365,24 +362,27 @@
              | SOME (name, parents, kind) =>
                update_access_graph_node (name, kind)
                #> fold (add_edge_to name) parents
-           val access_G =
+           val (access_G, num_known_facts) =
              case string_ord (version', version) of
                EQUAL =>
-               try_graph ctxt "loading state" Graph.empty (fn () =>
-                   fold add_node node_lines Graph.empty)
+               (try_graph ctxt "loading state" Graph.empty (fn () =>
+                    fold add_node node_lines Graph.empty),
+                length node_lines)
              | LESS =>
-               (MaSh.unlearn ctxt; Graph.empty) (* can't parse old file *)
-             | GREATER => raise Too_New ()
+               (* can't parse old file *)
+               (MaSh.unlearn ctxt; (Graph.empty, 0))
+             | GREATER => raise FILE_VERSION_TOO_NEW ()
          in
            trace_msg ctxt (fn () =>
                "Loaded fact graph (" ^ graph_info access_G ^ ")");
-           {access_G = access_G, dirty = SOME []}
+           {access_G = access_G, num_known_facts = num_known_facts,
+            dirty = SOME []}
          end
        | _ => empty_state)
     end
 
 fun save _ (state as {dirty = SOME [], ...}) = state
-  | save ctxt {access_G, dirty} =
+  | save ctxt {access_G, num_known_facts, dirty} =
     let
       fun str_of_entry (name, parents, kind) =
         str_of_proof_kind kind ^ " " ^ encode_str name ^ ": " ^
@@ -402,7 +402,7 @@
              SOME dirty =>
              "; " ^ string_of_int (length dirty) ^ " dirty fact(s)"
            | _ => "") ^  ")");
-      {access_G = access_G, dirty = SOME []}
+      {access_G = access_G, num_known_facts = num_known_facts, dirty = SOME []}
     end
 
 val global_state =
@@ -412,7 +412,7 @@
 
 fun map_state ctxt f =
   Synchronized.change global_state (load ctxt ##> (f #> save ctxt))
-  handle Too_New () => ()
+  handle FILE_VERSION_TOO_NEW () => ()
 
 fun peek_state ctxt f =
   Synchronized.change_result global_state
@@ -717,6 +717,9 @@
      | NONE => false)
   | is_size_def _ _ = false
 
+fun no_dependencies_for_status status =
+  status = Non_Rec_Def orelse status = Rec_Def
+
 fun trim_dependencies deps =
   if length deps > max_dependencies then NONE else SOME deps
 
@@ -784,159 +787,9 @@
 
 (*** High-level communication with MaSh ***)
 
-fun maximal_wrt_graph G keys =
-  let
-    val tab = Symtab.empty |> fold (fn name => Symtab.default (name, ())) keys
-    fun insert_new seen name =
-      not (Symtab.defined seen name) ? insert (op =) name
-    fun num_keys keys = Graph.Keys.fold (K (Integer.add 1)) keys 0
-    fun find_maxes _ (maxs, []) = map snd maxs
-      | find_maxes seen (maxs, new :: news) =
-        find_maxes
-            (seen |> num_keys (Graph.imm_succs G new) > 1
-                     ? Symtab.default (new, ()))
-            (if Symtab.defined tab new then
-               let
-                 val newp = Graph.all_preds G [new]
-                 fun is_ancestor x yp = member (op =) yp x
-                 val maxs =
-                   maxs |> filter (fn (_, max) => not (is_ancestor max newp))
-               in
-                 if exists (is_ancestor new o fst) maxs then
-                   (maxs, news)
-                 else
-                   ((newp, new)
-                    :: filter_out (fn (_, max) => is_ancestor max newp) maxs,
-                    news)
-               end
-             else
-               (maxs, Graph.Keys.fold (insert_new seen)
-                                      (Graph.imm_preds G new) news))
-  in find_maxes Symtab.empty ([], Graph.maximals G) end
-
-fun maximal_wrt_access_graph access_G =
-  map (nickname_of_thm o snd)
-  #> maximal_wrt_graph access_G
-
-fun is_fact_in_graph access_G get_th fact =
-  can (Graph.get_node access_G) (nickname_of_thm (get_th fact))
-
-(* FUDGE *)
-fun weight_of_mepo_fact rank =
-  Math.pow (0.62, log2 (Real.fromInt (rank + 1)))
-
-fun weight_mepo_facts facts =
-  facts ~~ map weight_of_mepo_fact (0 upto length facts - 1)
-
-val weight_raw_mash_facts = weight_mepo_facts
-val weight_mash_facts = weight_raw_mash_facts
-
-(* FUDGE *)
-fun weight_of_proximity_fact rank =
-  Math.pow (1.3, 15.5 - 0.2 * Real.fromInt rank) + 15.0
-
-fun weight_proximity_facts facts =
-  facts ~~ map weight_of_proximity_fact (0 upto length facts - 1)
-
-val max_proximity_facts = 100
-
-fun find_mash_suggestions _ _ [] _ _ raw_unknown = ([], raw_unknown)
-  | find_mash_suggestions ctxt max_facts suggs facts chained raw_unknown =
-    let
-      val raw_mash = find_suggested_facts ctxt facts suggs
-      val unknown_chained =
-        inter (Thm.eq_thm_prop o pairself snd) chained raw_unknown
-      val proximity =
-        facts |> sort (crude_thm_ord o pairself snd o swap)
-              |> take max_proximity_facts
-      val mess =
-        [(0.90 (* FUDGE *), (map (rpair 1.0) unknown_chained, [])),
-         (0.08 (* FUDGE *), (weight_raw_mash_facts raw_mash, raw_unknown)),
-         (0.02 (* FUDGE *), (weight_proximity_facts proximity, []))]
-      val unknown =
-        raw_unknown
-        |> fold (subtract (Thm.eq_thm_prop o pairself snd))
-                [unknown_chained, proximity]
-    in (mesh_facts (Thm.eq_thm_prop o pairself snd) max_facts mess, unknown) end
-
-fun mash_suggested_facts ctxt ({overlord, learn, ...} : params) prover max_facts
-                         hyp_ts concl_t facts =
-  let
-    val thy = Proof_Context.theory_of ctxt
-    val chained = facts |> filter (fn ((_, (scope, _)), _) => scope = Chained)
-    val (access_G, suggs) =
-      peek_state ctxt (fn {access_G, ...} =>
-          if Graph.is_empty access_G then
-            (access_G, [])
-          else
-            let
-              val parents = maximal_wrt_access_graph access_G facts
-              val feats =
-                features_of ctxt prover thy (Local, General) (concl_t :: hyp_ts)
-              val hints =
-                chained |> filter (is_fact_in_graph access_G snd)
-                        |> map (nickname_of_thm o snd)
-            in
-              (access_G, MaSh.query ctxt overlord learn max_facts
-                                    (parents, feats, hints))
-            end)
-    val unknown = facts |> filter_out (is_fact_in_graph access_G snd)
-  in
-    find_mash_suggestions ctxt max_facts suggs facts chained unknown
-    |> pairself (map fact_of_raw_fact)
-  end
-
-fun learn_wrt_access_graph ctxt (name, parents, feats, deps) (learns, graph) =
-  let
-    fun maybe_learn_from from (accum as (parents, graph)) =
-      try_graph ctxt "updating graph" accum (fn () =>
-          (from :: parents, Graph.add_edge_acyclic (from, name) graph))
-    val graph = graph |> Graph.default_node (name, Isar_Proof)
-    val (parents, graph) = ([], graph) |> fold maybe_learn_from parents
-    val (deps, _) = ([], graph) |> fold maybe_learn_from deps
-  in ((name, parents, feats, deps) :: learns, graph) end
-
-fun relearn_wrt_access_graph ctxt (name, deps) (relearns, graph) =
-  let
-    fun maybe_relearn_from from (accum as (parents, graph)) =
-      try_graph ctxt "updating graph" accum (fn () =>
-          (from :: parents, Graph.add_edge_acyclic (from, name) graph))
-    val graph = graph |> update_access_graph_node (name, Automatic_Proof)
-    val (deps, _) = ([], graph) |> fold maybe_relearn_from deps
-  in ((name, deps) :: relearns, graph) end
-
-fun flop_wrt_access_graph name =
-  update_access_graph_node (name, Isar_Proof_wegen_Prover_Flop)
-
-val learn_timeout_slack = 2.0
-
-fun launch_thread timeout task =
-  let
-    val hard_timeout = time_mult learn_timeout_slack timeout
-    val birth_time = Time.now ()
-    val death_time = Time.+ (birth_time, hard_timeout)
-    val desc = ("Machine learner for Sledgehammer", "")
-  in Async_Manager.thread MaShN birth_time death_time desc task end
-
-fun mash_learn_proof ctxt ({overlord, timeout, ...} : params) prover t facts
-                     used_ths =
-  launch_thread (timeout |> the_default one_day) (fn () =>
-      let
-        val thy = Proof_Context.theory_of ctxt
-        val name = freshish_name ()
-        val feats = features_of ctxt prover thy (Local, General) [t]
-      in
-        peek_state ctxt (fn {access_G, ...} =>
-            let
-              val parents = maximal_wrt_access_graph access_G facts
-              val deps =
-                used_ths |> filter (is_fact_in_graph access_G I)
-                         |> map nickname_of_thm
-            in
-              MaSh.learn ctxt overlord [(name, parents, feats, deps)]
-            end);
-        (true, "")
-      end)
+fun attach_crude_parents_to_facts _ [] = []
+  | attach_crude_parents_to_facts parents ((fact as (_, th)) :: facts) =
+    (parents, fact) :: attach_crude_parents_to_facts [nickname_of_thm th] facts
 
 (* In the following functions, chunks are risers w.r.t. "thm_less_eq". *)
 
@@ -988,6 +841,191 @@
       |> drop (length old_facts)
     end
 
+fun maximal_wrt_graph G keys =
+  let
+    val tab = Symtab.empty |> fold (fn name => Symtab.default (name, ())) keys
+    fun insert_new seen name =
+      not (Symtab.defined seen name) ? insert (op =) name
+    fun num_keys keys = Graph.Keys.fold (K (Integer.add 1)) keys 0
+    fun find_maxes _ (maxs, []) = map snd maxs
+      | find_maxes seen (maxs, new :: news) =
+        find_maxes
+            (seen |> num_keys (Graph.imm_succs G new) > 1
+                     ? Symtab.default (new, ()))
+            (if Symtab.defined tab new then
+               let
+                 val newp = Graph.all_preds G [new]
+                 fun is_ancestor x yp = member (op =) yp x
+                 val maxs =
+                   maxs |> filter (fn (_, max) => not (is_ancestor max newp))
+               in
+                 if exists (is_ancestor new o fst) maxs then
+                   (maxs, news)
+                 else
+                   ((newp, new)
+                    :: filter_out (fn (_, max) => is_ancestor max newp) maxs,
+                    news)
+               end
+             else
+               (maxs, Graph.Keys.fold (insert_new seen)
+                                      (Graph.imm_preds G new) news))
+  in find_maxes Symtab.empty ([], Graph.maximals G) end
+
+fun maximal_wrt_access_graph access_G =
+  map (nickname_of_thm o snd)
+  #> maximal_wrt_graph access_G
+
+fun is_fact_in_graph access_G = can (Graph.get_node access_G) o nickname_of_thm
+
+(* FUDGE *)
+fun weight_of_mepo_fact rank =
+  Math.pow (0.62, log2 (Real.fromInt (rank + 1)))
+
+fun weight_mepo_facts facts =
+  facts ~~ map weight_of_mepo_fact (0 upto length facts - 1)
+
+val weight_raw_mash_facts = weight_mepo_facts
+val weight_mash_facts = weight_raw_mash_facts
+
+(* FUDGE *)
+fun weight_of_proximity_fact rank =
+  Math.pow (1.3, 15.5 - 0.2 * Real.fromInt rank) + 15.0
+
+fun weight_proximity_facts facts =
+  facts ~~ map weight_of_proximity_fact (0 upto length facts - 1)
+
+val max_proximity_facts = 100
+
+fun find_mash_suggestions _ _ [] _ _ raw_unknown = ([], raw_unknown)
+  | find_mash_suggestions ctxt max_facts suggs facts chained raw_unknown =
+    let
+      val raw_mash = find_suggested_facts ctxt facts suggs
+      val unknown_chained =
+        inter (Thm.eq_thm_prop o pairself snd) chained raw_unknown
+      val proximity =
+        facts |> sort (crude_thm_ord o pairself snd o swap)
+              |> take max_proximity_facts
+      val mess =
+        [(0.90 (* FUDGE *), (map (rpair 1.0) unknown_chained, [])),
+         (0.08 (* FUDGE *), (weight_raw_mash_facts raw_mash, raw_unknown)),
+         (0.02 (* FUDGE *), (weight_proximity_facts proximity, []))]
+      val unknown =
+        raw_unknown
+        |> fold (subtract (Thm.eq_thm_prop o pairself snd))
+                [unknown_chained, proximity]
+    in (mesh_facts (Thm.eq_thm_prop o pairself snd) max_facts mess, unknown) end
+
+val max_learn_on_query = 500
+
+fun mash_suggested_facts ctxt ({overlord, learn, ...} : params) prover max_facts
+                         hyp_ts concl_t facts =
+  let
+    val thy = Proof_Context.theory_of ctxt
+    val chained = facts |> filter (fn ((_, (scope, _)), _) => scope = Chained)
+    val (access_G, suggs) =
+      peek_state ctxt (fn {access_G, num_known_facts, ...} =>
+          if Graph.is_empty access_G then
+            (access_G, [])
+          else
+            let
+              val parents = maximal_wrt_access_graph access_G facts
+              val feats =
+                features_of ctxt prover thy (Local, General) (concl_t :: hyp_ts)
+              val hints =
+                chained |> filter (is_fact_in_graph access_G o snd)
+                        |> map (nickname_of_thm o snd)
+              val (learns, parents) =
+                if length facts - num_known_facts <= max_learn_on_query then
+                  let
+                    val name_tabs = build_name_tables nickname_of_thm facts
+                    fun deps_of status th =
+                      if no_dependencies_for_status status then
+                        SOME []
+                      else
+                        isar_dependencies_of name_tabs th
+                        |> trim_dependencies
+                    fun learn_new_fact (parents,
+                                        ((_, stature as (_, status)), th)) =
+                      let
+                        val name = nickname_of_thm th
+                        val feats =
+                          features_of ctxt prover (theory_of_thm th) stature
+                                      [prop_of th]
+                        val deps = deps_of status th |> these
+                      in (name, parents, feats, deps) end
+                    val new_facts =
+                      facts |> filter_out (is_fact_in_graph access_G o snd)
+                            |> sort (crude_thm_ord o pairself snd)
+                            |> attach_crude_parents_to_facts parents
+                    val learns = new_facts |> map learn_new_fact
+                    val parents =
+                      if null new_facts then parents
+                      else [#1 (List.last learns)]
+                  in (learns, parents) end
+                else
+                  ([], parents)
+            in
+              (access_G, MaSh.query ctxt overlord max_facts
+                                    (learns, hints, parents, feats))
+            end)
+    val unknown = facts |> filter_out (is_fact_in_graph access_G o snd)
+  in
+    find_mash_suggestions ctxt max_facts suggs facts chained unknown
+    |> pairself (map fact_of_raw_fact)
+  end
+
+fun learn_wrt_access_graph ctxt (name, parents, feats, deps) (learns, graph) =
+  let
+    fun maybe_learn_from from (accum as (parents, graph)) =
+      try_graph ctxt "updating graph" accum (fn () =>
+          (from :: parents, Graph.add_edge_acyclic (from, name) graph))
+    val graph = graph |> Graph.default_node (name, Isar_Proof)
+    val (parents, graph) = ([], graph) |> fold maybe_learn_from parents
+    val (deps, _) = ([], graph) |> fold maybe_learn_from deps
+  in ((name, parents, feats, deps) :: learns, graph) end
+
+fun relearn_wrt_access_graph ctxt (name, deps) (relearns, graph) =
+  let
+    fun maybe_relearn_from from (accum as (parents, graph)) =
+      try_graph ctxt "updating graph" accum (fn () =>
+          (from :: parents, Graph.add_edge_acyclic (from, name) graph))
+    val graph = graph |> update_access_graph_node (name, Automatic_Proof)
+    val (deps, _) = ([], graph) |> fold maybe_relearn_from deps
+  in ((name, deps) :: relearns, graph) end
+
+fun flop_wrt_access_graph name =
+  update_access_graph_node (name, Isar_Proof_wegen_Prover_Flop)
+
+val learn_timeout_slack = 2.0
+
+fun launch_thread timeout task =
+  let
+    val hard_timeout = time_mult learn_timeout_slack timeout
+    val birth_time = Time.now ()
+    val death_time = Time.+ (birth_time, hard_timeout)
+    val desc = ("Machine learner for Sledgehammer", "")
+  in Async_Manager.thread MaShN birth_time death_time desc task end
+
+fun mash_learn_proof ctxt ({overlord, timeout, ...} : params) prover t facts
+                     used_ths =
+  launch_thread (timeout |> the_default one_day) (fn () =>
+      let
+        val thy = Proof_Context.theory_of ctxt
+        val name = freshish_name ()
+        val feats = features_of ctxt prover thy (Local, General) [t]
+      in
+        peek_state ctxt (fn {access_G, ...} =>
+            let
+              val parents = maximal_wrt_access_graph access_G facts
+              val deps =
+                used_ths |> filter (is_fact_in_graph access_G)
+                         |> map nickname_of_thm
+            in
+              MaSh.learn ctxt overlord [(name, parents, feats, deps)]
+            end);
+        (true, "")
+      end)
+
 fun sendback sub =
   Active.sendback_markup [Markup.padding_command] (sledgehammerN ^ " " ^ sub)
 
@@ -1001,7 +1039,7 @@
     fun next_commit_time () =
       Time.+ (Timer.checkRealTimer timer, commit_timeout)
     val {access_G, ...} = peek_state ctxt I
-    val is_in_access_G = is_fact_in_graph access_G snd
+    val is_in_access_G = is_fact_in_graph access_G o snd
     val no_new_facts = forall is_in_access_G facts
   in
     if no_new_facts andalso not run_prover then
@@ -1019,7 +1057,7 @@
       let
         val name_tabs = build_name_tables nickname_of_thm facts
         fun deps_of status th =
-          if status = Non_Rec_Def orelse status = Rec_Def then
+          if no_dependencies_for_status status then
             SOME []
           else if run_prover then
             prover_dependencies_of ctxt params prover auto_level facts name_tabs
@@ -1030,7 +1068,7 @@
             isar_dependencies_of name_tabs th
             |> trim_dependencies
         fun do_commit [] [] [] state = state
-          | do_commit learns relearns flops {access_G, dirty} =
+          | do_commit learns relearns flops {access_G, num_known_facts, dirty} =
             let
               val was_empty = Graph.is_empty access_G
               val (learns, access_G) =
@@ -1038,6 +1076,7 @@
               val (relearns, access_G) =
                 ([], access_G) |> fold (relearn_wrt_access_graph ctxt) relearns
               val access_G = access_G |> fold flop_wrt_access_graph flops
+              val num_known_facts = num_known_facts + length learns
               val dirty =
                 case (was_empty, dirty, relearns) of
                   (false, SOME names, []) => SOME (map #1 learns @ names)
@@ -1045,7 +1084,8 @@
             in
               MaSh.learn ctxt overlord (rev learns);
               MaSh.relearn ctxt overlord relearns;
-              {access_G = access_G, dirty = dirty}
+              {access_G = access_G, num_known_facts = num_known_facts,
+               dirty = dirty}
             end
         fun commit last learns relearns flops =
           (if debug andalso auto_level = 0 then