# HG changeset patch # User desharna # Date 1404227214 -7200 # Node ID 048606cf1b8ee78d00703c4a18e9c0e964f730ee # Parent c2ee3f6754c8f880fca28148b97179f5344c644b# Parent 9512b867259c3e9e27ad73ea8edb7fbaa22c588f merge diff -r c2ee3f6754c8 -r 048606cf1b8e src/Doc/Sledgehammer/document/root.tex --- a/src/Doc/Sledgehammer/document/root.tex Tue Jul 01 17:01:48 2014 +0200 +++ b/src/Doc/Sledgehammer/document/root.tex Tue Jul 01 17:06:54 2014 +0200 @@ -1059,22 +1059,26 @@ The MaSh machine learner. Three learning engines are provided: \begin{enum} -\item[\labelitemi] \textbf{\textit{nb}} (also called \textbf{\textit{sml}} -and \textbf{\textit{yes}}) is a Standard ML implementation of naive Bayes. +\item[\labelitemi] \textbf{\textit{nb}} is an implementation of naive Bayes. -\item[\labelitemi] \textbf{\textit{knn}} is a Standard ML implementation of -$k$-nearest neighbors. +\item[\labelitemi] \textbf{\textit{knn}} is an implementation of $k$-nearest +neighbors. + +\item[\labelitemi] \textbf{\textit{nb\_knn}} (also called \textbf{\textit{yes}} +and \textbf{\textit{sml}}) is a combination of naive Bayes and $k$-nearest +neighbors. \end{enum} In addition, the special value \textit{none} is used to disable machine learning by default (cf.\ \textit{smart} below). -The engine can be selected by setting \texttt{MASH} to the name of the desired -engine---either in the environment in which Isabelle is launched, in your -\texttt{\$ISABELLE\_HOME\_USER/etc/settings} file, or via the ``MaSh'' option -under ``Plugins > Plugin Options > Isabelle > General'' in Isabelle/jEdit. -Persistent data for both engines is stored in the directory -\texttt{\$ISABELLE\_HOME\_USER/mash}. +The default engine is \textit{nb\_knn}. The engine can be selected by setting +\texttt{MASH} to the name of the desired engine---either in the environment in +which Isabelle is launched, in your +\texttt{\$ISABELLE\_\allowbreak HOME\_\allowbreak USER/\allowbreak etc/\allowbreak settings} +file, or via the ``MaSh'' option under ``Plugins > Plugin Options > Isabelle > +General'' in Isabelle/jEdit. Persistent data for both engines is stored in the +directory \texttt{\$ISABELLE\_\allowbreak HOME\_\allowbreak USER/\allowbreak mash}. \item[\labelitemi] \textbf{\textit{mesh}:} The MeSh filter, which combines the rankings from MePo and MaSh. diff -r c2ee3f6754c8 -r 048606cf1b8e src/HOL/TPTP/MaSh_Eval.thy --- a/src/HOL/TPTP/MaSh_Eval.thy Tue Jul 01 17:01:48 2014 +0200 +++ b/src/HOL/TPTP/MaSh_Eval.thy Tue Jul 01 17:06:54 2014 +0200 @@ -11,11 +11,8 @@ ML_file "mash_eval.ML" sledgehammer_params - [provers = spass, max_facts = 32, strict, dont_slice, type_enc = mono_native, - lam_trans = lifting, timeout = 15, dont_preplay, minimize] - -declare [[sledgehammer_fact_duplicates = true]] -declare [[sledgehammer_instantiate_inducts = false]] + [provers = e, max_facts = 64, strict, dont_slice, type_enc = poly_guards??, + lam_trans = combs, timeout = 30, dont_preplay, minimize] ML {* Multithreading.max_threads_value () @@ -43,15 +40,13 @@ ML {* if do_it then - evaluate_mash_suggestions @{context} params range - [MePoN, MaSh_IsarN, MaSh_ProverN, MeSh_IsarN, MeSh_ProverN, IsarN] - (SOME prob_dir) - (prefix ^ "mepo_suggestions") - (prefix ^ "mash_suggestions") - (prefix ^ "mash_prover_suggestions") - (prefix ^ "mesh_suggestions") - (prefix ^ "mesh_prover_suggestions") - (prefix ^ "mash_eval") + evaluate_mash_suggestions @{context} params range (SOME prob_dir) + [prefix ^ "mepo_suggestions", + prefix ^ "mash_suggestions", + prefix ^ "mash_prover_suggestions", + prefix ^ "mesh_suggestions", + prefix ^ "mesh_prover_suggestions"] + (prefix ^ "mash_eval") else () *} diff -r c2ee3f6754c8 -r 048606cf1b8e src/HOL/TPTP/MaSh_Export.thy --- a/src/HOL/TPTP/MaSh_Export.thy Tue Jul 01 17:01:48 2014 +0200 +++ b/src/HOL/TPTP/MaSh_Export.thy Tue Jul 01 17:06:54 2014 +0200 @@ -46,32 +46,74 @@ () *} -ML {* Options.put_default @{system_option MaSh} "nb" *} - ML {* if do_it then - generate_mash_suggestions @{context} params (range, step) thys max_suggestions + generate_mash_suggestions "nb" @{context} params (range, step) thys max_suggestions (prefix ^ "mash_nb_suggestions") else () *} -ML {* Options.put_default @{system_option MaSh} "knn" *} - ML {* if do_it then - generate_mash_suggestions @{context} params (range, step) thys max_suggestions + generate_mash_suggestions "knn" @{context} params (range, step) thys max_suggestions (prefix ^ "mash_knn_suggestions") else () *} -ML {* Options.put_default @{system_option MaSh} "py" *} +ML {* +if do_it then + generate_mepo_suggestions @{context} params (range, step) thys max_suggestions + (prefix ^ "mepo_suggestions") +else + () +*} + +ML {* +if do_it then + generate_mesh_suggestions max_suggestions (prefix ^ "mash_nb_suggestions") + (prefix ^ "mepo_suggestions") (prefix ^ "mesh_nb_suggestions") +else + () +*} + +ML {* +if do_it then + generate_mesh_suggestions max_suggestions (prefix ^ "mash_knn_suggestions") + (prefix ^ "mepo_suggestions") (prefix ^ "mesh_knn_suggestions") +else + () +*} ML {* if do_it then - generate_mash_suggestions @{context} params (range, step) thys max_suggestions - (prefix ^ "mash_py_suggestions") + generate_prover_dependencies @{context} params range thys + (prefix ^ "mash_nb_prover_dependencies") +else + () +*} + +ML {* +if do_it then + generate_prover_dependencies @{context} params range thys + (prefix ^ "mash_knn_prover_dependencies") +else + () +*} + +ML {* +if do_it then + generate_mesh_suggestions max_suggestions (prefix ^ "mash_nb_prover_suggestions") + (prefix ^ "mepo_suggestions") (prefix ^ "mesh_nb_prover_suggestions") +else + () +*} + +ML {* +if do_it then + generate_mesh_suggestions max_suggestions (prefix ^ "mash_knn_prover_suggestions") + (prefix ^ "mepo_suggestions") (prefix ^ "mesh_knn_prover_suggestions") else () *} @@ -107,41 +149,10 @@ ML {* if do_it then - generate_mepo_suggestions @{context} params (range, step) thys max_suggestions - (prefix ^ "mepo_suggestions") -else - () -*} - -ML {* -if do_it then - generate_mesh_suggestions max_suggestions (prefix ^ "mash_suggestions") - (prefix ^ "mepo_suggestions") (prefix ^ "mesh_suggestions") -else - () -*} - -ML {* -if do_it then - generate_prover_dependencies @{context} params range thys (prefix ^ "mash_prover_dependencies") -else - () -*} - -ML {* -if do_it then generate_prover_commands @{context} params (range, step) thys max_suggestions (prefix ^ "mash_prover_commands") else () *} -ML {* -if do_it then - generate_mesh_suggestions max_suggestions (prefix ^ "mash_prover_suggestions") - (prefix ^ "mepo_suggestions") (prefix ^ "mesh_prover_suggestions") -else - () -*} - end diff -r c2ee3f6754c8 -r 048606cf1b8e src/HOL/TPTP/mash_eval.ML --- a/src/HOL/TPTP/mash_eval.ML Tue Jul 01 17:01:48 2014 +0200 +++ b/src/HOL/TPTP/mash_eval.ML Tue Jul 01 17:06:54 2014 +0200 @@ -9,14 +9,8 @@ sig type params = Sledgehammer_Prover.params - val MePoN : string - val MaSh_IsarN : string - val MaSh_ProverN : string - val MeSh_IsarN : string - val MeSh_ProverN : string - val IsarN : string - val evaluate_mash_suggestions : Proof.context -> params -> int * int option -> string list -> - string option -> string -> string -> string -> string -> string -> string -> unit + val evaluate_mash_suggestions : Proof.context -> params -> int * int option -> string option -> + string list -> string -> unit end; structure MaSh_Eval : MASH_EVAL = @@ -33,15 +27,7 @@ val prefix = Library.prefix -val MaSh_IsarN = MaShN ^ "-Isar" -val MaSh_ProverN = MaShN ^ "-Prover" -val MeSh_IsarN = MeShN ^ "-Isar" -val MeSh_ProverN = MeShN ^ "-Prover" -val IsarN = "Isar" - -fun evaluate_mash_suggestions ctxt params range methods prob_dir_name mepo_file_name - mash_isar_file_name mash_prover_file_name mesh_isar_file_name mesh_prover_file_name - report_file_name = +fun evaluate_mash_suggestions ctxt params range prob_dir_name file_names report_file_name = let val thy = Proof_Context.theory_of ctxt val zeros = [0, 0, 0, 0, 0, 0] @@ -53,20 +39,19 @@ val {provers, max_facts, slice, type_enc, lam_trans, timeout, ...} = default_params thy [] val prover = hd provers val max_suggs = generous_max_suggestions (the max_facts) + + val method_of_file_name = + perhaps (try (unsuffix "_suggestions")) o List.last o space_explode "/" + + val methods = "isar" :: map method_of_file_name file_names val lines_of = Path.explode #> try File.read_lines #> these - val file_names = - [mepo_file_name, mash_isar_file_name, mash_prover_file_name, - mesh_isar_file_name, mesh_prover_file_name] - val lines as [mepo_lines, mash_isar_lines, mash_prover_lines, - mesh_isar_lines, mesh_prover_lines] = - map lines_of file_names - val num_lines = fold (Integer.max o length) lines 0 + val liness0 = map lines_of file_names + val num_lines = fold (Integer.max o length) liness0 0 fun pad lines = lines @ replicate (num_lines - length lines) "" - val lines = - pad mepo_lines ~~ pad mash_isar_lines ~~ pad mash_prover_lines ~~ - pad mesh_isar_lines ~~ pad mesh_prover_lines + val liness = map pad liness0 + val css = clasimpset_rule_table_of ctxt val facts = all_facts ctxt true false Symtab.empty [] [] css val name_tabs = build_name_tables nickname_of_thm facts @@ -95,19 +80,12 @@ |> space_implode " ")) end - fun solve_goal (j, ((((mepo_line, mash_isar_line), mash_prover_line), mesh_isar_line), - mesh_prover_line)) = + fun solve_goal (j, lines) = if in_range range j then let val get_suggs = extract_suggestions ##> (take max_suggs #> map fst) - val (name1, mepo_suggs) = get_suggs mepo_line - val (name2, mash_isar_suggs) = get_suggs mash_isar_line - val (name3, mash_prover_suggs) = get_suggs mash_prover_line - val (name4, mesh_isar_suggs) = get_suggs mesh_isar_line - val (name5, mesh_prover_suggs) = get_suggs mesh_prover_line - val [name] = - [name1, name2, name3, name4, name5] - |> filter (curry (op <>) "") |> distinct (op =) + val (names, suggss0) = split_list (map get_suggs lines) + val [name] = names |> filter (curry (op <>) "") |> distinct (op =) handle General.Match => error "Input files out of sync." val th = case find_first (fn (_, th) => nickname_of_thm th = name) facts of @@ -116,6 +94,7 @@ val goal = goal_of_thm (Proof_Context.theory_of ctxt) th val (_, hyp_ts, concl_t) = ATP_Util.strip_subgoal goal 1 ctxt val isar_deps = these (isar_dependencies_of name_tabs th) + val suggss = isar_deps :: suggss0 val facts = facts |> filter (fn (_, th') => thm_less (th', th)) (* adapted from "mirabelle_sledgehammer.ML" *) @@ -130,7 +109,7 @@ | set_file_name _ NONE = I fun prove method suggs = - if not (member (op =) methods method) orelse (null facts andalso method <> IsarN) then + if null facts then (str_of_method method ^ "Skipped", 0) else let @@ -151,14 +130,7 @@ (str_of_result method facts res, ok) end - val ress = - [fn () => prove MePoN mepo_suggs, - fn () => prove MaSh_IsarN mash_isar_suggs, - fn () => prove MaSh_ProverN mash_prover_suggs, - fn () => prove MeSh_IsarN mesh_isar_suggs, - fn () => prove MeSh_ProverN mesh_prover_suggs, - fn () => prove IsarN isar_deps] - |> (* Par_List. *) map (fn f => f ()) + val ress = map2 prove methods suggss in "Goal " ^ string_of_int j ^ ": " ^ name :: map fst ress |> cat_lines |> print; @@ -167,10 +139,6 @@ else zeros - fun total_of method ok n = - str_of_method method ^ string_of_int ok ^ " (" ^ Real.fmt (StringCvt.FIX (SOME 1)) - (100.0 * Real.fromInt ok / Real.fromInt (Int.max (1, n))) ^ "%)" - val inst_inducts = Config.get ctxt instantiate_inducts val options = ["prover = " ^ prover, @@ -182,18 +150,17 @@ "instantiate_inducts" |> not inst_inducts ? prefix "dont_"] val _ = print " * * *"; val _ = print ("Options: " ^ commas options); - val oks = Par_List.map solve_goal (tag_list 1 lines) + val oks = Par_List.map solve_goal (tag_list 1 liness) val n = length oks - val [mepo_ok, mash_isar_ok, mash_prover_ok, mesh_isar_ok, mesh_prover_ok, isar_ok] = - if n = 0 then zeros else map Integer.sum (map_transpose I oks) + + fun total_of method ok = + str_of_method method ^ string_of_int ok ^ " (" ^ Real.fmt (StringCvt.FIX (SOME 1)) + (100.0 * Real.fromInt ok / Real.fromInt (Int.max (1, n))) ^ "%)" + + val oks' = if n = 0 then zeros else map Integer.sum (map_transpose I oks) in - ["Successes (of " ^ string_of_int n ^ " goals)", - total_of MePoN mepo_ok n, - total_of MaSh_IsarN mash_isar_ok n, - total_of MaSh_ProverN mash_prover_ok n, - total_of MeSh_IsarN mesh_isar_ok n, - total_of MeSh_ProverN mesh_prover_ok n, - total_of IsarN isar_ok n] + "Successes (of " ^ string_of_int n ^ " goals)" :: + map2 total_of methods oks' |> cat_lines |> print end diff -r c2ee3f6754c8 -r 048606cf1b8e src/HOL/TPTP/mash_export.ML --- a/src/HOL/TPTP/mash_export.ML Tue Jul 01 17:01:48 2014 +0200 +++ b/src/HOL/TPTP/mash_export.ML Tue Jul 01 17:06:54 2014 +0200 @@ -24,7 +24,7 @@ theory list -> int -> string -> unit val generate_mepo_suggestions : Proof.context -> params -> (int * int option) * int -> theory list -> int -> string -> unit - val generate_mash_suggestions : Proof.context -> params -> (int * int option) * int -> + val generate_mash_suggestions : string -> Proof.context -> params -> (int * int option) * int -> theory list -> int -> string -> unit val generate_mesh_suggestions : int -> string -> string -> string -> unit end; @@ -284,15 +284,16 @@ not (Config.get ctxt Sledgehammer_MaSh.duplicates) ? Sledgehammer_Fact.drop_duplicate_facts #> Sledgehammer_MePo.mepo_suggested_facts ctxt params max_suggs NONE hyp_ts concl_t) -fun generate_mash_suggestions ctxt params = - (Sledgehammer_MaSh.mash_unlearn (); +fun generate_mash_suggestions engine = + (Options.put_default @{system_option MaSh} engine; + Sledgehammer_MaSh.mash_unlearn (); generate_mepo_or_mash_suggestions (fn ctxt => fn thy => fn params as {provers = prover :: _, ...} => fn max_suggs => fn hyp_ts => fn concl_t => tap (Sledgehammer_MaSh.mash_learn_facts ctxt params prover 2 false Sledgehammer_Util.one_year) #> Sledgehammer_MaSh.mash_suggested_facts ctxt thy params max_suggs hyp_ts concl_t - #> fst) ctxt params) + #> fst)) fun generate_mesh_suggestions max_suggs mash_file_name mepo_file_name mesh_file_name = let diff -r c2ee3f6754c8 -r 048606cf1b8e src/HOL/Tools/ATP/atp_problem_generate.ML --- a/src/HOL/Tools/ATP/atp_problem_generate.ML Tue Jul 01 17:01:48 2014 +0200 +++ b/src/HOL/Tools/ATP/atp_problem_generate.ML Tue Jul 01 17:06:54 2014 +0200 @@ -749,11 +749,9 @@ fun concealed_bound_name j = atp_weak_prefix ^ string_of_int j fun conceal_bounds Ts t = - subst_bounds (map (Free o apfst concealed_bound_name) - (0 upto length Ts - 1 ~~ Ts), t) + subst_bounds (map_index (Free o apfst concealed_bound_name) Ts, t) fun reveal_bounds Ts = - subst_atomic (map (fn (j, T) => (Free (concealed_bound_name j, T), Bound j)) - (0 upto length Ts - 1 ~~ Ts)) + subst_atomic (map_index (fn (j, T) => (Free (concealed_bound_name j, T), Bound j)) Ts) fun do_introduce_combinators ctxt Ts t = let val thy = Proof_Context.theory_of ctxt in @@ -1327,7 +1325,7 @@ if forall null footprint then [] else - 0 upto length footprint - 1 ~~ footprint + map_index I footprint |> sort (rev_order o list_ord Term_Ord.indexname_ord o pairself snd) |> cover [] end @@ -1878,7 +1876,7 @@ val conjs = map (pair prem_role) hyp_ts @ [(Conjecture, s_not_prop concl_t)] |> map (apsnd freeze_term) - |> map2 (pair o rpair (Local, General) o string_of_int) (0 upto length hyp_ts) + |> map_index (uncurry (pair o rpair (Local, General) o string_of_int)) val ((conjs, facts), lam_facts) = (conjs, facts) |> presimp ? pairself (map (apsnd (apsnd (presimp_prop ctxt type_enc)))) @@ -1930,7 +1928,7 @@ val cover = type_arg_cover thy pos s ary in exists (fn (j, tm) => tm = var andalso member (op =) cover j) - (0 upto ary - 1 ~~ tms) orelse + (0 upto ary - 1 ~~ tms) orelse exists is_undercover tms end | is_undercover _ = true @@ -1984,7 +1982,7 @@ val ary = length args fun arg_site j = if is_tptp_equal s then Eq_Arg pos else Arg (s, j, ary) in - map2 (fn j => term (arg_site j)) (0 upto ary - 1) args + map_index (uncurry (term o arg_site)) args |> mk_aterm type_enc name T_args end | IVar (name, _) => @@ -2102,7 +2100,7 @@ if is_type_enc_polymorphic type_enc then let val type_classes = (polymorphism_of_type_enc type_enc = Type_Class_Polymorphic) - fun line j (cl, T) = + fun line (j, (cl, T)) = if type_classes then Class_Memb (class_memb_prefix ^ string_of_int j, [], native_atp_type_of_typ type_enc false 0 T, `make_class cl) @@ -2113,7 +2111,7 @@ fold (union (op =)) (map #atomic_types facts) [] |> class_membs_of_types type_enc add_sorts_on_tfree in - map2 line (0 upto length membs - 1) membs + map_index line membs end else [] @@ -2314,8 +2312,7 @@ fun honor_conj_sym_role in_conj = (if in_conj then Hypothesis else Axiom, I) -fun line_of_guards_sym_decl ctxt mono type_enc n s j - (s', T_args, T, _, ary, in_conj) = +fun line_of_guards_sym_decl ctxt mono type_enc n s j (s', T_args, T, _, ary, in_conj) = let val thy = Proof_Context.theory_of ctxt val (role, maybe_negate) = honor_conj_sym_role in_conj @@ -2328,7 +2325,7 @@ All_Types => if null T_args then replicate ary NONE else map SOME arg_Ts | Undercover_Types => let val cover = type_arg_cover thy NONE s ary in - map2 (fn j => if member (op =) cover j then SOME else K NONE) (0 upto ary - 1) arg_Ts + map_index (uncurry (fn j => if member (op =) cover j then SOME else K NONE)) arg_Ts end | _ => replicate ary NONE) in @@ -2372,7 +2369,7 @@ let val cover = type_arg_cover thy NONE s ary fun maybe_tag (j, arg_T) = member (op =) cover j ? tag_with arg_T - val bounds = bounds |> map2 maybe_tag (0 upto ary - 1 ~~ arg_Ts) + val bounds = map2 maybe_tag (0 upto ary - 1 ~~ arg_Ts) bounds in formula (cst bounds) end else formula (cst bounds) @@ -2399,14 +2396,14 @@ val n = length decls val decls = decls |> filter (should_encode_type ctxt mono level o result_type_of_decl) in - (0 upto length decls - 1, decls) |-> map2 (line_of_guards_sym_decl ctxt mono type_enc n s) + map_index (uncurry (line_of_guards_sym_decl ctxt mono type_enc n s)) decls end | Tags (_, level) => if is_type_level_uniform level then [] else let val n = length decls in - (0 upto n - 1 ~~ decls) |> maps (lines_of_tags_sym_decl ctxt mono type_enc n s) + maps (lines_of_tags_sym_decl ctxt mono type_enc n s) (0 upto n - 1 ~~ decls) end) fun lines_of_sym_decl_table ctxt mono type_enc mono_Ts sym_decl_tab = @@ -2603,6 +2600,7 @@ val thy = Proof_Context.theory_of ctxt val type_enc = type_enc |> adjust_type_enc format val completish = (mode = Sledgehammer_Completish) + (* Forcing explicit applications is expensive for polymorphic encodings, because it takes only one existential variable ranging over "'a => 'b" to ruin everything. Hence we do it only if there are few facts (which is @@ -2614,32 +2612,45 @@ if is_type_enc_polymorphic type_enc then Min_App_Op else Sufficient_App_Op else Sufficient_App_Op_And_Predicator + val lam_trans = if lam_trans = keep_lamsN andalso not (is_type_enc_higher_order type_enc) then liftingN else lam_trans + val (classes, conjs, facts, subclass_pairs, tcon_clauses, lifted) = translate_formulas ctxt prem_role format type_enc lam_trans presimp hyp_ts concl_t facts + val (_, sym_tab0) = sym_table_of_facts ctxt type_enc app_op_level conjs facts - val mono = conjs @ facts |> mononotonicity_info_of_facts ctxt type_enc completish val ctrss = all_ctrss_of_datatypes ctxt + fun firstorderize in_helper = firstorderize_fact thy ctrss type_enc (uncurried_aliases andalso not in_helper) completish sym_tab0 + val (conjs, facts) = (conjs, facts) |> pairself (map (firstorderize false)) val (ho_stuff, sym_tab) = sym_table_of_facts ctxt type_enc Min_App_Op conjs facts + val helpers = + sym_tab |> helper_facts_of_sym_table ctxt format type_enc completish + |> map (firstorderize true) + + val all_facts = helpers @ conjs @ facts + val mono = mononotonicity_info_of_facts ctxt type_enc completish all_facts + val mono_Ts = monotonic_types_of_facts ctxt mono type_enc all_facts + val (uncurried_alias_eq_tms, uncurried_alias_eq_lines) = uncurried_alias_lines_of_sym_table ctxt ctrss mono type_enc uncurried_aliases sym_tab0 sym_tab val (_, sym_tab) = (ho_stuff, sym_tab) |> fold (add_iterm_syms_to_sym_table ctxt Min_App_Op false false) uncurried_alias_eq_tms - val helpers = - sym_tab |> helper_facts_of_sym_table ctxt format type_enc completish - |> map (firstorderize true) - val all_facts = helpers @ conjs @ facts - val mono_Ts = monotonic_types_of_facts ctxt mono type_enc all_facts val datatypes = datatypes_of_sym_table ctxt ctrss format type_enc uncurried_aliases sym_tab + + val num_facts = length facts + val freshen = mode <> Exporter andalso mode <> Translator + val pos = mode <> Exporter + val rank_of = rank_of_fact_num num_facts + val class_decl_lines = decl_lines_of_classes type_enc classes val sym_decl_lines = (conjs, helpers @ facts, uncurried_alias_eq_tms) @@ -2647,22 +2658,18 @@ |> lines_of_sym_decl_table ctxt mono type_enc mono_Ts val datatype_decl_lines = map decl_line_of_datatype datatypes val decl_lines = class_decl_lines @ sym_decl_lines @ datatype_decl_lines - val num_facts = length facts - val freshen = mode <> Exporter andalso mode <> Translator - val pos = mode <> Exporter - val rank_of = rank_of_fact_num num_facts val fact_lines = - map (line_of_fact ctxt fact_prefix ascii_of I freshen pos mono type_enc rank_of) - (0 upto num_facts - 1 ~~ facts) + map_index (line_of_fact ctxt fact_prefix ascii_of I freshen pos mono type_enc rank_of) facts val subclass_lines = maps (lines_of_subclass_pair type_enc) subclass_pairs val tcon_lines = map (line_of_tcon_clause type_enc) tcon_clauses val helper_lines = - 0 upto length helpers - 1 ~~ helpers - |> map (line_of_fact ctxt helper_prefix I (K "") false true mono type_enc (K default_rank)) + map_index (line_of_fact ctxt helper_prefix I (K "") false true mono type_enc (K default_rank)) + helpers val free_type_lines = lines_of_free_types type_enc (facts @ conjs) val conj_lines = map (line_of_conjecture ctxt mono type_enc) conjs + (* Reordering these might confuse the proof reconstruction code. *) - val problem = + val (problem, pool) = [(explicit_declsN, decl_lines), (uncurried_alias_eqsN, uncurried_alias_eq_lines), (factsN, fact_lines), @@ -2671,14 +2678,13 @@ (helpersN, helper_lines), (free_typesN, free_type_lines), (conjsN, conj_lines)] - val problem = - problem |> (case format of CNF => ensure_cnf_problem | CNF_UEQ => filter_cnf_ueq_problem | FOF => I | _ => declare_undeclared_in_problem implicit_declsN) - val (problem, pool) = problem |> nice_atp_problem readable_names format + |> nice_atp_problem readable_names format + fun add_sym_ary (s, {min_ary, ...} : sym_info) = min_ary > 0 ? Symtab.insert (op =) (s, min_ary) in (problem, Option.map snd pool |> the_default Symtab.empty, lifted, @@ -2715,7 +2721,7 @@ fact_min_weight + (fact_max_weight - fact_min_weight) * Real.fromInt j / Real.fromInt num_facts in - map weight_of (0 upto num_facts - 1) ~~ facts + map_index (apfst weight_of) facts |> fold (uncurry add_line_weights) end val get = these o AList.lookup (op =) problem diff -r c2ee3f6754c8 -r 048606cf1b8e src/HOL/Tools/Meson/meson_clausify.ML --- a/src/HOL/Tools/Meson/meson_clausify.ML Tue Jul 01 17:01:48 2014 +0200 +++ b/src/HOL/Tools/Meson/meson_clausify.ML Tue Jul 01 17:06:54 2014 +0200 @@ -190,10 +190,9 @@ (* Given the definition of a Skolem function, return a theorem to replace an existential formula by a use of that function. Example: "EX x. x : A & x ~: B ==> sko A B : A & sko A B ~: B" [.] *) -fun old_skolem_theorem_of_def thy rhs0 = +fun old_skolem_theorem_of_def ctxt rhs0 = let - val ctxt = Proof_Context.init_global thy (* FIXME proper context!? *) - + val thy = Proof_Context.theory_of ctxt val rhs = rhs0 |> Type.legacy_freeze_thaw |> #1 |> cterm_of thy val rhs' = rhs |> Thm.dest_comb |> snd val (ch, frees) = c_variant_abs_multi (rhs', []) @@ -201,8 +200,7 @@ val T = case hilbert of Const (_, Type (@{type_name fun}, [_, T])) => T - | _ => raise TERM ("old_skolem_theorem_of_def: expected \"Eps\"", - [hilbert]) + | _ => raise TERM ("old_skolem_theorem_of_def: expected \"Eps\"", [hilbert]) val cex = cterm_of thy (HOLogic.exists_const T) val ex_tm = Thm.apply cTrueprop (Thm.apply cex cabs) val conc = @@ -373,7 +371,7 @@ nnf_axiom choice_ths new_skolem ax_no th ctxt0 fun clausify th = make_cnf (if new_skolem orelse null choice_ths then [] - else map (old_skolem_theorem_of_def thy) (old_skolem_defs th)) + else map (old_skolem_theorem_of_def ctxt) (old_skolem_defs th)) th ctxt ctxt val (cnf_ths, ctxt) = clausify nnf_th fun intr_imp ct th = diff -r c2ee3f6754c8 -r 048606cf1b8e src/HOL/Tools/Sledgehammer/sledgehammer.ML --- a/src/HOL/Tools/Sledgehammer/sledgehammer.ML Tue Jul 01 17:01:48 2014 +0200 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer.ML Tue Jul 01 17:06:54 2014 +0200 @@ -196,8 +196,11 @@ (facts |> map (fst o fst) |> space_implode " ") ^ "." fun string_of_factss factss = - if forall (null o snd) factss then "Found no relevant facts." - else cat_lines (map (fn (filter, facts) => quote filter ^ ": " ^ string_of_facts facts) factss) + if forall (null o snd) factss then + "Found no relevant facts." + else + cat_lines (map (fn (filter, facts) => + (if filter = "" then "" else quote filter ^ ": ") ^ string_of_facts facts) factss) fun run_sledgehammer (params as {verbose, spy, blocking, provers, max_facts, ...}) mode output_result i (fact_override as {only, ...}) minimize_command state = diff -r c2ee3f6754c8 -r 048606cf1b8e src/HOL/Tools/Sledgehammer/sledgehammer_fact.ML --- a/src/HOL/Tools/Sledgehammer/sledgehammer_fact.ML Tue Jul 01 17:01:48 2014 +0200 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_fact.ML Tue Jul 01 17:06:54 2014 +0200 @@ -84,6 +84,7 @@ val risky_defs = @{thms Bit0_def Bit1_def} fun is_rec_eq lhs = Term.exists_subterm (curry (op =) (head_of lhs)) + fun is_rec_def (@{const Trueprop} $ t) = is_rec_def t | is_rec_def (@{const Pure.imp} $ _ $ t2) = is_rec_def t2 | is_rec_def (Const (@{const_name Pure.eq}, _) $ t1 $ t2) = is_rec_eq t1 t2 @@ -114,6 +115,7 @@ ~1 => (TVar ((Name.uu, nT), S), ((z :: knownT, nT + 1), accum)) | j => (TVar ((Name.uu, nT - j - 1), S), ((knownT, nT), accum)))) | normT (T as TFree _) = pair T + fun norm (t $ u) = norm t ##>> norm u #>> op $ | norm (Const (s, T)) = normT T #>> curry Const s | norm (Var (z as (_, T))) = @@ -164,6 +166,7 @@ val bracket = map (enclose "[" "]" o Pretty.str_of o Args.pretty_src ctxt) args |> implode + fun nth_name j = (case xref of Facts.Fact s => backquote s ^ bracket @@ -173,12 +176,15 @@ | Facts.Named ((name, _), SOME intervals) => make_name reserved true (nth (maps (explode_interval (length ths)) intervals) j) name ^ bracket) + fun add_nth th (j, rest) = let val name = nth_name j in (j + 1, ((name, stature_of_thm false [] chained css name th), th) :: rest) end - in (0, []) |> fold add_nth ths |> snd end + in + (0, []) |> fold add_nth ths |> snd + end (* Reject theorems with names like "List.filter.filter_list_def" or "Accessible_Part.acc.defs", as these are definitions arising from packages. *) @@ -222,6 +228,7 @@ (* FIXME: make more reliable *) val sep_class_sep = Long_Name.separator ^ "class" ^ Long_Name.separator + fun is_low_level_class_const (s, _) = s = @{const_name equal_class.equal} orelse String.isSubstring sep_class_sep s @@ -250,6 +257,7 @@ not (member (op =) atp_widely_irrelevant_consts s) | is_interesting_subterm (Free _) = true | is_interesting_subterm _ = false + fun interest_of_bool t = if exists_Const (is_technical_const orf is_low_level_class_const orf type_has_top_sort o snd) t then @@ -259,6 +267,7 @@ Boring else Interesting + fun interest_of_prop _ (@{const Trueprop} $ t) = interest_of_bool t | interest_of_prop Ts (@{const Pure.imp} $ t $ u) = combine_interests (interest_of_prop Ts t) (interest_of_prop Ts u) @@ -269,6 +278,7 @@ | interest_of_prop _ (Const (@{const_name Pure.eq}, _) $ t $ u) = combine_interests (interest_of_bool t) (interest_of_bool u) | interest_of_prop _ _ = Deal_Breaker + val t = prop_of th in (interest_of_prop [] t <> Interesting andalso @@ -277,11 +287,9 @@ end fun is_blacklisted_or_something ctxt ho_atp = - let - val blist = multi_base_blacklist ctxt ho_atp - fun is_blisted name = - is_package_def name orelse exists (fn s => String.isSuffix s name) blist - in is_blisted end + let val blist = multi_base_blacklist ctxt ho_atp in + fn name => is_package_def name orelse exists (fn s => String.isSuffix s name) blist + end (* This is a terrible hack. Free variables are sometimes coded as "M__" when they are displayed as "M" and we want to avoid clashes with these. But @@ -316,6 +324,7 @@ let fun add stature th = Termtab.update (normalize_vars (prop_of th), stature) + val {safeIs, (* safeEs, *) hazIs, (* hazEs, *) ...} = ctxt |> claset_of |> Classical.rep_cs val intros = Item_Net.content safeIs @ Item_Net.content hazIs @@ -331,18 +340,18 @@ |> filter_out (member Thm.eq_thm_prop risky_defs) |> List.partition (is_rec_def o prop_of) val spec_intros = - specs |> filter (member (op =) [Spec_Rules.Inductive, - Spec_Rules.Co_Inductive] o fst) + specs |> filter (member (op =) [Spec_Rules.Inductive, Spec_Rules.Co_Inductive] o fst) |> maps (snd o snd) in - Termtab.empty |> fold (add Simp o snd) simps - |> fold (add Rec_Def) rec_defs - |> fold (add Non_Rec_Def) nonrec_defs + Termtab.empty + |> fold (add Simp o snd) simps + |> fold (add Rec_Def) rec_defs + |> fold (add Non_Rec_Def) nonrec_defs (* Add once it is used: - |> fold (add Elim) elims + |> fold (add Elim) elims *) - |> fold (add Intro) intros - |> fold (add Inductive) spec_intros + |> fold (add Intro) intros + |> fold (add Inductive) spec_intros end end @@ -359,9 +368,8 @@ fun if_thm_before th th' = if Theory.subthy (pairself Thm.theory_of_thm (th, th')) then th else th' -(* Hack: Conflate the facts about a class as seen from the outside with the - corresponding low-level facts, so that MaSh can learn from the low-level - proofs. *) +(* Hack: Conflate the facts about a class as seen from the outside with the corresponding low-level + facts, so that MaSh can learn from the low-level proofs. *) fun un_class_ify s = (case first_field "_class" s of SOME (pref, suf) => [s, pref ^ suf] @@ -407,6 +415,7 @@ fun varify_noninducts (t as Free (s, T)) = if (s, T) = ind_x orelse can dest_funT T then t else Var ((s, 0), T) | varify_noninducts t = t + val p_inst = concl_prop |> map_aterms varify_noninducts |> close_form |> lambda (Free ind_x) @@ -455,40 +464,44 @@ val thy = Proof_Context.theory_of ctxt val global_facts = Global_Theory.facts_of thy val is_too_complex = - if generous orelse - fact_count global_facts >= max_facts_for_complex_check then + if generous orelse fact_count global_facts >= max_facts_for_complex_check then K false else is_too_complex val local_facts = Proof_Context.facts_of ctxt val named_locals = local_facts |> Facts.dest_static true [global_facts] val assms = Assumption.all_assms_of ctxt + fun is_good_unnamed_local th = not (Thm.has_name_hint th) andalso forall (fn (_, ths) => not (member Thm.eq_thm_prop ths th)) named_locals + val unnamed_locals = union Thm.eq_thm_prop (Facts.props local_facts) chained |> filter is_good_unnamed_local |> map (pair "" o single) val full_space = Name_Space.merge (Facts.space_of global_facts, Facts.space_of local_facts) val is_blacklisted_or_something = is_blacklisted_or_something ctxt ho_atp + fun add_facts global foldx facts = foldx (fn (name0, ths) => fn accum => if name0 <> "" andalso forall (not o member Thm.eq_thm_prop add_ths) ths andalso (Facts.is_concealed facts name0 orelse + Long_Name.is_hidden (Facts.intern facts name0) orelse (not generous andalso is_blacklisted_or_something name0)) then accum else let val n = length ths val multi = n > 1 + fun check_thms a = (case try (Proof_Context.get_thms ctxt) a of NONE => false | SOME ths' => eq_list Thm.eq_thm_prop (ths, ths')) in - snd (fold_rev (fn th => fn (j, accum as (uni_accum, multi_accum)) => + snd (fold_rev (fn th => fn (j, accum) => (j - 1, if not (member Thm.eq_thm_prop add_ths th) andalso (is_likely_tautology_too_meta_or_too_technical th orelse @@ -496,21 +509,26 @@ accum else let - val new = - (((fn () => - if name0 = "" then - backquote_thm ctxt th - else - [Facts.extern ctxt facts name0, - Name_Space.extern ctxt full_space name0] - |> find_first check_thms - |> the_default name0 - |> make_name reserved multi j), - stature_of_thm global assms chained css name0 th), - th) + fun get_name () = + if name0 = "" then + backquote_thm ctxt th + else + let val short_name = Facts.extern ctxt facts name0 in + if check_thms short_name then + short_name + else + let val long_name = Name_Space.extern ctxt full_space name0 in + if check_thms long_name then + long_name + else + name0 + end + end + |> make_name reserved multi j + val stature = stature_of_thm global assms chained css name0 th + val new = ((get_name, stature), th) in - if multi then (uni_accum, new :: multi_accum) - else (new :: uni_accum, multi_accum) + (if multi then apsnd else apfst) (cons new) accum end)) ths (n, accum)) end) in @@ -522,8 +540,7 @@ |> op @ end -fun nearly_all_facts ctxt ho_atp {add, del, only} reserved css chained hyp_ts - concl_t = +fun nearly_all_facts ctxt ho_atp {add, del, only} reserved css chained hyp_ts concl_t = if only andalso null add then [] else diff -r c2ee3f6754c8 -r 048606cf1b8e src/HOL/Tools/Sledgehammer/sledgehammer_isar_annotate.ML --- a/src/HOL/Tools/Sledgehammer/sledgehammer_isar_annotate.ML Tue Jul 01 17:01:48 2014 +0200 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_isar_annotate.ML Tue Jul 01 17:06:54 2014 +0200 @@ -70,11 +70,10 @@ val thy = Proof_Context.theory_of ctxt val get_types = post_fold_term_type (K cons) [] in - fold (Sign.typ_match thy) (get_types t1 ~~ get_types t2) Vartab.empty - handle Type.TYPE_MATCH => raise Fail "Sledgehammer_Isar_Annotate: match_types" + fold (perhaps o try o Sign.typ_match thy) (get_types t1 ~~ get_types t2) Vartab.empty end -fun handle_trivial_tfrees ctxt (t', subst) = +fun handle_trivial_tfrees ctxt t' subst = let val add_tfree_names = snd #> snd #> fold_atyps (fn TFree (x, _) => cons x | _ => I) @@ -181,10 +180,10 @@ let val t' = generalize_types ctxt t val subst = match_types ctxt t' t - val (t', subst) = (t', subst) |> handle_trivial_tfrees ctxt - val typing_spots = t' |> typing_spot_table |> reverse_greedy |> sort int_ord + val (t'', subst') = handle_trivial_tfrees ctxt t' subst + val typing_spots = t'' |> typing_spot_table |> reverse_greedy |> sort int_ord in - introduce_annotations subst typing_spots t t' + introduce_annotations subst' typing_spots t t'' end end; diff -r c2ee3f6754c8 -r 048606cf1b8e src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML --- a/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML Tue Jul 01 17:01:48 2014 +0200 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML Tue Jul 01 17:06:54 2014 +0200 @@ -34,17 +34,18 @@ val decode_strs : string -> string list datatype mash_engine = - MaSh_kNN + MaSh_NB + | MaSh_kNN + | MaSh_NB_kNN + | MaSh_NB_Ext | MaSh_kNN_Ext - | MaSh_NB - | MaSh_NB_Ext val is_mash_enabled : unit -> bool val the_mash_engine : unit -> mash_engine + val mesh_facts : ('a * 'a -> bool) -> int -> (real * (('a * real) list * 'a list)) list -> 'a list val nickname_of_thm : thm -> string val find_suggested_facts : Proof.context -> ('b * thm) list -> string list -> ('b * thm) list - val mesh_facts : ('a * 'a -> bool) -> int -> (real * (('a * real) list * 'a list)) list -> 'a list val crude_thm_ord : thm * thm -> order val thm_less : thm * thm -> bool val goal_of_thm : theory -> thm -> thm @@ -138,36 +139,70 @@ end datatype mash_engine = - MaSh_kNN + MaSh_NB +| MaSh_kNN +| MaSh_NB_kNN +| MaSh_NB_Ext | MaSh_kNN_Ext -| MaSh_NB -| MaSh_NB_Ext fun mash_engine () = let val flag1 = Options.default_string @{system_option MaSh} in (case if flag1 <> "none" (* default *) then flag1 else getenv "MASH" of - "yes" => SOME MaSh_NB - | "sml" => SOME MaSh_NB + "yes" => SOME MaSh_NB_kNN + | "sml" => SOME MaSh_NB_kNN + | "nb" => SOME MaSh_NB | "knn" => SOME MaSh_kNN + | "nb_knn" => SOME MaSh_NB_kNN + | "nb_ext" => SOME MaSh_NB_Ext | "knn_ext" => SOME MaSh_kNN_Ext - | "nb" => SOME MaSh_NB - | "nb_ext" => SOME MaSh_NB_Ext - | _ => NONE) + | engine => (warning ("Unknown MaSh engine: " ^ quote engine ^ "."); NONE)) end val is_mash_enabled = is_some o mash_engine -val the_mash_engine = the_default MaSh_NB o mash_engine +val the_mash_engine = the_default MaSh_NB_kNN o mash_engine + +fun scaled_avg [] = 0 + | scaled_avg xs = Real.ceil (100000000.0 * fold (curry (op +)) xs 0.0) div length xs + +fun avg [] = 0.0 + | avg xs = fold (curry (op +)) xs 0.0 / Real.fromInt (length xs) +fun normalize_scores _ [] = [] + | normalize_scores max_facts xs = + map (apsnd (curry Real.* (1.0 / avg (map snd (take max_facts xs))))) xs -(*** Isabelle-agnostic machine learning ***) +fun mesh_facts fact_eq max_facts [(_, (sels, unks))] = + distinct fact_eq (map fst (take max_facts sels) @ take (max_facts - length sels) unks) + | mesh_facts fact_eq max_facts mess = + let + val mess = mess |> map (apsnd (apfst (normalize_scores max_facts))) -structure MaSh = -struct + fun score_in fact (global_weight, (sels, unks)) = + let val score_at = try (nth sels) #> Option.map (fn (_, score) => global_weight * score) in + (case find_index (curry fact_eq fact o fst) sels of + ~1 => if member fact_eq unks fact then NONE else SOME 0.0 + | rank => score_at rank) + end -fun heap cmp bnd al a = + fun weight_of fact = mess |> map_filter (score_in fact) |> scaled_avg + in + fold (union fact_eq o map fst o take max_facts o fst o snd) mess [] + |> map (`weight_of) |> sort (int_ord o pairself fst o swap) + |> map snd |> take max_facts + end + +fun smooth_weight_of_fact rank = Math.pow (1.3, 15.5 - 0.2 * Real.fromInt rank) + 15.0 (* FUDGE *) +fun steep_weight_of_fact rank = Math.pow (0.62, log2 (Real.fromInt (rank + 1))) (* FUDGE *) + +fun weight_facts_smoothly facts = facts ~~ map smooth_weight_of_fact (0 upto length facts - 1) +fun weight_facts_steeply facts = facts ~~ map steep_weight_of_fact (0 upto length facts - 1) + +fun rev_sort_array_prefix cmp bnd a = let exception BOTTOM of int + val al = Array.length a + fun maxson l i = let val i31 = i + i + i + 1 in if i31 + 2 < l then @@ -231,7 +266,17 @@ () end -val number_of_nearest_neighbors = 10 (* FUDGE *) +fun rev_sort_list_prefix cmp bnd xs = + let val ary = Array.fromList xs in + rev_sort_array_prefix cmp bnd ary; + Array.foldr (op ::) [] ary + end + + +(*** Isabelle-agnostic machine learning ***) + +structure MaSh = +struct fun select_visible_facts big_number recommends = List.app (fn at => @@ -239,78 +284,6 @@ Array.update (recommends, at, (j, big_number + ov)) end) -fun k_nearest_neighbors dffreq num_facts depss feat_facts max_suggs visible_facts goal_feats = - let - exception EXIT of unit - - val ln_afreq = Math.ln (Real.fromInt num_facts) - fun tfidf feat = ln_afreq - Math.ln (Real.fromInt (Vector.sub (dffreq, feat))) - - val overlaps_sqr = Array.tabulate (num_facts, rpair 0.0) - - fun do_feat (s, sw0) = - let - val sw = sw0 * tfidf s - val w2 = sw * sw - - fun inc_overlap j = - let val (_, ov) = Array.sub (overlaps_sqr, j) in - Array.update (overlaps_sqr, j, (j, w2 + ov)) - end - in - List.app inc_overlap (Array.sub (feat_facts, s)) - end - - val _ = List.app do_feat goal_feats - val _ = heap (Real.compare o pairself snd) num_facts num_facts overlaps_sqr - val no_recommends = Unsynchronized.ref 0 - val recommends = Array.tabulate (num_facts, rpair 0.0) - val age = Unsynchronized.ref 500000000.0 - - fun inc_recommend j v = - let val (_, ov) = Array.sub (recommends, j) in - if ov <= 0.0 then - (no_recommends := !no_recommends + 1; Array.update (recommends, j, (j, !age + ov))) - else if ov < !age + 1000.0 then - Array.update (recommends, j, (j, v + ov)) - else - () - end - - val k = Unsynchronized.ref 0 - fun do_k k = - if k >= num_facts then - raise EXIT () - else - let - val (j, o2) = Array.sub (overlaps_sqr, num_facts - k - 1) - val o1 = Math.sqrt o2 - val _ = inc_recommend j o1 - val ds = Vector.sub (depss, j) - val l = Real.fromInt (length ds) - in - List.app (fn d => inc_recommend d (o1 / l)) ds - end - - fun while1 () = - if !k = number_of_nearest_neighbors then () else (do_k (!k); k := !k + 1; while1 ()) - handle EXIT () => () - - fun while2 () = - if !no_recommends >= max_suggs then () - else (do_k (!k); k := !k + 1; age := !age - 10000.0; while2 ()) - handle EXIT () => () - - fun ret acc at = - if at = num_facts then acc else ret (Array.sub (recommends, at) :: acc) (at + 1) - in - while1 (); - while2 (); - select_visible_facts 1000000000.0 recommends visible_facts; - heap (Real.compare o pairself snd) max_suggs num_facts recommends; - ret [] (Integer.max 0 (num_facts - max_suggs)) - end - fun wider_array_of_vector init vec = let val ary = Array.array init in Array.copyVec {src = vec, dst = ary, di = 0}; @@ -389,10 +362,88 @@ if at = num_facts then acc else ret (at + 1) (Array.sub (posterior, at) :: acc) in select_visible_facts 100000.0 posterior visible_facts; - heap (Real.compare o pairself snd) max_suggs num_facts posterior; + rev_sort_array_prefix (Real.compare o pairself snd) max_suggs posterior; ret (Integer.max 0 (num_facts - max_suggs)) [] end +val number_of_nearest_neighbors = 10 (* FUDGE *) + +fun k_nearest_neighbors dffreq num_facts num_feats depss featss max_suggs visible_facts goal_feats = + let + exception EXIT of unit + + val ln_afreq = Math.ln (Real.fromInt num_facts) + fun tfidf feat = ln_afreq - Math.ln (Real.fromInt (Vector.sub (dffreq, feat))) + + val overlaps_sqr = Array.tabulate (num_facts, rpair 0.0) + + val feat_facts = Array.array (num_feats, []) + val _ = Vector.foldl (fn (feats, fact) => + (List.app (map_array_at feat_facts (cons fact)) feats; fact + 1)) 0 featss + + fun do_feat (s, sw0) = + let + val sw = sw0 * tfidf s + val w2 = sw * sw + + fun inc_overlap j = + let val (_, ov) = Array.sub (overlaps_sqr, j) in + Array.update (overlaps_sqr, j, (j, w2 + ov)) + end + in + List.app inc_overlap (Array.sub (feat_facts, s)) + end + + val _ = List.app do_feat goal_feats + val _ = rev_sort_array_prefix (Real.compare o pairself snd) num_facts overlaps_sqr + val no_recommends = Unsynchronized.ref 0 + val recommends = Array.tabulate (num_facts, rpair 0.0) + val age = Unsynchronized.ref 500000000.0 + + fun inc_recommend v j = + let val (_, ov) = Array.sub (recommends, j) in + if ov <= 0.0 then + (no_recommends := !no_recommends + 1; Array.update (recommends, j, (j, !age + ov))) + else if ov < !age + 1000.0 then + Array.update (recommends, j, (j, v + ov)) + else + () + end + + val k = Unsynchronized.ref 0 + fun do_k k = + if k >= num_facts then + raise EXIT () + else + let + val (j, o2) = Array.sub (overlaps_sqr, num_facts - k - 1) + val o1 = Math.sqrt o2 + val _ = inc_recommend o1 j + val ds = Vector.sub (depss, j) + val l = Real.fromInt (length ds) + in + List.app (inc_recommend (o1 / l)) ds + end + + fun while1 () = + if !k = number_of_nearest_neighbors then () else (do_k (!k); k := !k + 1; while1 ()) + handle EXIT () => () + + fun while2 () = + if !no_recommends >= max_suggs then () + else (do_k (!k); k := !k + 1; age := !age - 10000.0; while2 ()) + handle EXIT () => () + + fun ret acc at = + if at = num_facts then acc else ret (Array.sub (recommends, at) :: acc) (at + 1) + in + while1 (); + while2 (); + select_visible_facts 1000000000.0 recommends visible_facts; + rev_sort_array_prefix (Real.compare o pairself snd) max_suggs recommends; + ret [] (Integer.max 0 (num_facts - max_suggs)) + end + (* experimental *) fun external_tool tool max_suggs learns goal_feats = let @@ -435,26 +486,35 @@ fun query_external ctxt engine max_suggs learns goal_feats = (trace_msg ctxt (fn () => "MaSh query external " ^ commas (map fst goal_feats)); (case engine of - MaSh_kNN_Ext => k_nearest_neighbors_ext max_suggs learns goal_feats - | MaSh_NB_Ext => naive_bayes_ext max_suggs learns goal_feats)) + MaSh_NB_Ext => naive_bayes_ext max_suggs learns goal_feats + | MaSh_kNN_Ext => k_nearest_neighbors_ext max_suggs learns goal_feats)) fun query_internal ctxt engine num_facts num_feats (fact_names, featss, depss) (freqs as (_, _, dffreq)) visible_facts max_suggs goal_feats int_goal_feats = - (trace_msg ctxt (fn () => "MaSh query internal " ^ commas (map fst goal_feats) ^ " from {" ^ - elide_string 1000 (space_implode " " (Vector.foldr (op ::) [] fact_names)) ^ "}"); - (case engine of - MaSh_kNN => - let - val feat_facts = Array.array (num_feats, []) - val _ = - Vector.foldl (fn (feats, fact) => - (List.app (map_array_at feat_facts (cons fact)) feats; fact + 1)) - 0 featss - in - k_nearest_neighbors dffreq num_facts depss feat_facts max_suggs visible_facts int_goal_feats - end - | MaSh_NB => naive_bayes freqs num_facts max_suggs visible_facts int_goal_feats) - |> map (curry Vector.sub fact_names o fst)) + let + fun nb () = + naive_bayes freqs num_facts max_suggs visible_facts int_goal_feats + |> map fst + fun knn () = + k_nearest_neighbors dffreq num_facts num_feats depss featss max_suggs visible_facts + int_goal_feats + |> map fst + in + (trace_msg ctxt (fn () => "MaSh query internal " ^ commas (map fst goal_feats) ^ " from {" ^ + elide_string 1000 (space_implode " " (Vector.foldr (op ::) [] fact_names)) ^ "}"); + (case engine of + MaSh_NB => nb () + | MaSh_kNN => knn () + | MaSh_NB_kNN => + let + val mess = + [(0.5 (* FUDGE *), (weight_facts_steeply (nb ()), [])), + (0.5 (* FUDGE *), (weight_facts_steeply (knn ()), []))] + in + mesh_facts (op =) max_suggs mess + end) + |> map (curry Vector.sub fact_names)) + end end; @@ -481,7 +541,7 @@ val decode_str = String.explode #> unmeta_chars [] val encode_strs = map encode_str #> space_implode " " -val decode_strs = space_explode " " #> filter_out (curry (op =) "") #> map decode_str +val decode_strs = space_explode " " #> map decode_str datatype proof_kind = Isar_Proof | Automatic_Proof | Isar_Proof_wegen_Prover_Flop @@ -706,36 +766,6 @@ |> tap (fn NONE => trace_msg ctxt (fn () => "Cannot find " ^ quote nick) | _ => ()) in map_filter lookup end -fun scaled_avg [] = 0 - | scaled_avg xs = Real.ceil (100000000.0 * fold (curry (op +)) xs 0.0) div length xs - -fun avg [] = 0.0 - | avg xs = fold (curry (op +)) xs 0.0 / Real.fromInt (length xs) - -fun normalize_scores _ [] = [] - | normalize_scores max_facts xs = - map (apsnd (curry Real.* (1.0 / avg (map snd (take max_facts xs))))) xs - -fun mesh_facts fact_eq max_facts [(_, (sels, unks))] = - distinct fact_eq (map fst (take max_facts sels) @ take (max_facts - length sels) unks) - | mesh_facts fact_eq max_facts mess = - let - val mess = mess |> map (apsnd (apfst (normalize_scores max_facts))) - - fun score_in fact (global_weight, (sels, unks)) = - let val score_at = try (nth sels) #> Option.map (fn (_, score) => global_weight * score) in - (case find_index (curry fact_eq fact o fst) sels of - ~1 => if member fact_eq unks fact then NONE else SOME 0.0 - | rank => score_at rank) - end - - fun weight_of fact = mess |> map_filter (score_in fact) |> scaled_avg - in - fold (union fact_eq o map fst o take max_facts o fst o snd) mess [] - |> map (`weight_of) |> sort (int_ord o swap o pairself fst) - |> map snd |> take max_facts - end - fun free_feature_of s = "f" ^ s fun thy_feature_of s = "y" ^ s fun type_feature_of s = "t" ^ s @@ -1088,9 +1118,13 @@ find_maxes Symtab.empty ([], Graph.maximals G) end -fun maximal_wrt_access_graph access_G facts = - map (nickname_of_thm o snd) facts - |> maximal_wrt_graph access_G +fun maximal_wrt_access_graph _ [] = [] + | maximal_wrt_access_graph access_G ((fact as (_, th)) :: facts) = + let val thy = theory_of_thm th in + fact :: filter_out (fn (_, th') => Theory.subthy (theory_of_thm th', thy)) facts + |> map (nickname_of_thm o snd) + |> maximal_wrt_graph access_G + end fun is_fact_in_graph access_G = can (Graph.get_node access_G) o nickname_of_thm @@ -1098,20 +1132,6 @@ val extra_feature_factor = 0.1 (* FUDGE *) val num_extra_feature_facts = 10 (* FUDGE *) -(* FUDGE *) -fun weight_of_proximity_fact rank = - Math.pow (1.3, 15.5 - 0.2 * Real.fromInt rank) + 15.0 - -fun weight_facts_smoothly facts = - facts ~~ map weight_of_proximity_fact (0 upto length facts - 1) - -(* FUDGE *) -fun steep_weight_of_fact rank = - Math.pow (0.62, log2 (Real.fromInt (rank + 1))) - -fun weight_facts_steeply facts = - facts ~~ map steep_weight_of_fact (0 upto length facts - 1) - val max_proximity_facts = 100 fun find_mash_suggestions ctxt max_facts suggs facts chained raw_unknown = @@ -1136,8 +1156,11 @@ val thy_name = Context.theory_name thy val engine = the_mash_engine () - val facts = facts |> sort (crude_thm_ord o pairself snd o swap) - val chained = facts |> filter (fn ((_, (scope, _)), _) => scope = Chained) + val facts = facts + |> rev_sort_list_prefix (crude_thm_ord o pairself snd) + (Int.max (num_extra_feature_facts, max_proximity_facts)) + + val chained = filter (fn ((_, (scope, _)), _) => scope = Chained) facts fun fact_has_right_theory (_, th) = thy_name = Context.theory_name (theory_of_thm th) @@ -1147,53 +1170,44 @@ |> features_of ctxt (theory_of_thm th) stature |> map (rpair (weight * factor)) - fun query_args access_G = - let - val parents = maximal_wrt_access_graph access_G facts - - val goal_feats = features_of ctxt thy (Local, General) (concl_t :: hyp_ts) - val chained_feats = chained - |> map (rpair 1.0) - |> map (chained_or_extra_features_of chained_feature_factor) - |> rpair [] |-> fold (union (eq_fst (op =))) - val extra_feats = - facts - |> take (Int.max (0, num_extra_feature_facts - length chained)) - |> filter fact_has_right_theory - |> weight_facts_steeply - |> map (chained_or_extra_features_of extra_feature_factor) - |> rpair [] |-> fold (union (eq_fst (op =))) - val feats = - fold (union (eq_fst (op =))) [chained_feats, extra_feats] (map (rpair 1.0) goal_feats) - |> debug ? sort (Real.compare o swap o pairself snd) - in - (parents, feats) - end - val {access_G, xtabs = ((num_facts, fact_tab), (num_feats, feat_tab)), ffds, freqs, ...} = peek_state ctxt + val goal_feats0 = features_of ctxt thy (Local, General) (concl_t :: hyp_ts) + val chained_feats = chained + |> map (rpair 1.0) + |> map (chained_or_extra_features_of chained_feature_factor) + |> rpair [] |-> fold (union (eq_fst (op =))) + val extra_feats = facts + |> take (Int.max (0, num_extra_feature_facts - length chained)) + |> filter fact_has_right_theory + |> weight_facts_steeply + |> map (chained_or_extra_features_of extra_feature_factor) + |> rpair [] |-> fold (union (eq_fst (op =))) + + val goal_feats = + fold (union (eq_fst (op =))) [chained_feats, extra_feats] (map (rpair 1.0) goal_feats0) + |> debug ? sort (Real.compare o swap o pairself snd) + + val parents = maximal_wrt_access_graph access_G facts + val visible_facts = map_filter (Symtab.lookup fact_tab) (Graph.all_preds access_G parents) + val suggs = - let - val (parents, goal_feats) = query_args access_G - val visible_facts = map_filter (Symtab.lookup fact_tab) (Graph.all_preds access_G parents) - in - if engine = MaSh_kNN_Ext orelse engine = MaSh_NB_Ext then - let - val learns = - Graph.schedule (fn _ => fn (fact, (_, feats, deps)) => (fact, feats, deps)) access_G - in - MaSh.query_external ctxt engine max_suggs learns goal_feats - end - else - let - val int_goal_feats = - map_filter (fn (s, w) => Option.map (rpair w) (Symtab.lookup feat_tab s)) goal_feats - in - MaSh.query_internal ctxt engine num_facts num_feats ffds freqs visible_facts max_suggs - goal_feats int_goal_feats - end - end + if engine = MaSh_NB_Ext orelse engine = MaSh_kNN_Ext then + let + val learns = + Graph.schedule (fn _ => fn (fact, (_, feats, deps)) => (fact, feats, deps)) access_G + in + MaSh.query_external ctxt engine max_suggs learns goal_feats + end + else + let + val int_goal_feats = + map_filter (fn (s, w) => Option.map (rpair w) (Symtab.lookup feat_tab s)) goal_feats + in + MaSh.query_internal ctxt engine num_facts num_feats ffds freqs visible_facts max_suggs + goal_feats int_goal_feats + end val unknown = filter_out (is_fact_in_graph access_G o snd) facts in @@ -1256,6 +1270,7 @@ let val thy = Proof_Context.theory_of ctxt val feats = features_of ctxt thy (Local, General) [t] + val facts = rev_sort_list_prefix (crude_thm_ord o pairself snd) 1 facts in map_state ctxt (fn {access_G, xtabs as ((num_facts0, _), _), ffds, freqs, dirty_facts} => @@ -1587,7 +1602,6 @@ end fun kill_learners () = Async_Manager.kill_threads MaShN "learner" - fun running_learners () = Async_Manager.running_threads MaShN "learner" end; diff -r c2ee3f6754c8 -r 048606cf1b8e src/HOL/Tools/Sledgehammer/sledgehammer_proof_methods.ML --- a/src/HOL/Tools/Sledgehammer/sledgehammer_proof_methods.ML Tue Jul 01 17:01:48 2014 +0200 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_proof_methods.ML Tue Jul 01 17:06:54 2014 +0200 @@ -76,6 +76,7 @@ | is_proof_method_direct Meson_Method = true | is_proof_method_direct SMT2_Method = true | is_proof_method_direct Simp_Method = true + | is_proof_method_direct Simp_Size_Method = true | is_proof_method_direct _ = false fun maybe_paren s = s |> not (Symbol_Pos.is_identifier s) ? enclose "(" ")" @@ -103,7 +104,7 @@ maybe_paren (space_implode " " (meth_s :: ss)) end -val silence_unifier = Try0.silence_methods false +val silence_methods = Try0.silence_methods false fun tac_of_proof_method ctxt (local_facts, global_facts) meth = Method.insert_tac local_facts THEN' @@ -113,22 +114,22 @@ Metis_Tactic.metis_tac [type_enc_opt |> the_default (hd partial_type_encs)] (lam_trans_opt |> the_default default_metis_lam_trans) ctxt global_facts end - | Meson_Method => Meson_Tactic.meson_general_tac ctxt global_facts + | Meson_Method => Meson_Tactic.meson_general_tac (silence_methods ctxt) global_facts | SMT2_Method => let val ctxt = Config.put SMT2_Config.verbose false ctxt in SMT2_Solver.smt2_tac ctxt global_facts end - | Simp_Method => Simplifier.asm_full_simp_tac (ctxt addsimps global_facts) + | Simp_Method => Simplifier.asm_full_simp_tac (silence_methods ctxt addsimps global_facts) + | Simp_Size_Method => + Simplifier.asm_full_simp_tac (silence_methods ctxt addsimps @{thms size_ne_size_imp_ne}) | _ => Method.insert_tac global_facts THEN' (case meth of SATx_Method => SAT.satx_tac ctxt | Blast_Method => blast_tac ctxt - | Simp_Size_Method => - Simplifier.asm_full_simp_tac (Simplifier.add_simp @{thm size_ne_size_imp_ne} ctxt) - | Auto_Method => K (Clasimp.auto_tac (silence_unifier ctxt)) - | Fastforce_Method => Clasimp.fast_force_tac (silence_unifier ctxt) - | Force_Method => Clasimp.force_tac (silence_unifier ctxt) + | Auto_Method => K (Clasimp.auto_tac (silence_methods ctxt)) + | Fastforce_Method => Clasimp.fast_force_tac (silence_methods ctxt) + | Force_Method => Clasimp.force_tac (silence_methods ctxt) | Linarith_Method => let val ctxt = Config.put Lin_Arith.verbose false ctxt in Lin_Arith.tac ctxt end | Presburger_Method => Cooper.tac true [] [] ctxt diff -r c2ee3f6754c8 -r 048606cf1b8e src/HOL/Tools/Sledgehammer/sledgehammer_prover_atp.ML --- a/src/HOL/Tools/Sledgehammer/sledgehammer_prover_atp.ML Tue Jul 01 17:01:48 2014 +0200 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_prover_atp.ML Tue Jul 01 17:06:54 2014 +0200 @@ -383,7 +383,7 @@ val atp_proof = atp_proof |> termify_atp_proof ctxt name format type_enc pool lifted sym_tab - |> introduce_spass_skolem + |> spass ? introduce_spass_skolem |> factify_atp_proof (map fst used_from) hyp_ts concl_t in (verbose, (metis_type_enc, metis_lam_trans), preplay_timeout, compress, try0, diff -r c2ee3f6754c8 -r 048606cf1b8e src/HOL/Tools/etc/options --- a/src/HOL/Tools/etc/options Tue Jul 01 17:01:48 2014 +0200 +++ b/src/HOL/Tools/etc/options Tue Jul 01 17:06:54 2014 +0200 @@ -36,4 +36,4 @@ -- "status of Z3 activation for non-commercial use (yes, no, unknown)" public option MaSh : string = "sml" - -- "machine learning engine to use by Sledgehammer (sml, nb, knn, none)" + -- "machine learning engine to use by Sledgehammer (nb_knn, nb, knn, none)"