# HG changeset patch # User blanchet # Date 1342035799 -7200 # Node ID b6eb45a52c286928ddc855e8cbd7a5bac54ff75b # Parent 8f37d2ddabc8a7db82f5289aa13ccca757378973 split relevance filter code into three files diff -r 8f37d2ddabc8 -r b6eb45a52c28 src/HOL/Tools/Sledgehammer/sledgehammer_filter.ML --- a/src/HOL/Tools/Sledgehammer/sledgehammer_filter.ML Wed Jul 11 21:43:19 2012 +0200 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_filter.ML Wed Jul 11 21:43:19 2012 +0200 @@ -2,50 +2,17 @@ Author: Jia Meng, Cambridge University Computer Laboratory and NICTA Author: Jasmin Blanchette, TU Muenchen -Sledgehammer's relevance filter. +Sledgehammer's hybrid relevance filter. *) signature SLEDGEHAMMER_FILTER = sig type status = ATP_Problem_Generate.status type stature = ATP_Problem_Generate.stature + type relevance_fudge = Sledgehammer_Filter_Iter.relevance_fudge + type relevance_override = Sledgehammer_Filter_Iter.relevance_override - type relevance_fudge = - {local_const_multiplier : real, - worse_irrel_freq : real, - higher_order_irrel_weight : real, - abs_rel_weight : real, - abs_irrel_weight : real, - skolem_irrel_weight : real, - theory_const_rel_weight : real, - theory_const_irrel_weight : real, - chained_const_irrel_weight : real, - intro_bonus : real, - elim_bonus : real, - simp_bonus : real, - local_bonus : real, - assum_bonus : real, - chained_bonus : real, - max_imperfect : real, - max_imperfect_exp : real, - threshold_divisor : real, - ridiculous_threshold : real} - - type relevance_override = - {add : (Facts.ref * Attrib.src list) list, - del : (Facts.ref * Attrib.src list) list, - only : bool} - - val trace : bool Config.T - val ignore_no_atp : bool Config.T - val instantiate_inducts : bool Config.T - val pseudo_abs_name : string - val pseudo_skolem_prefix : string val no_relevance_override : relevance_override - val const_names_in_fact : - theory -> (string * typ -> term list -> bool * term list) -> term - -> string list - val clasimpset_rule_table_of : Proof.context -> status Termtab.table val fact_from_ref : Proof.context -> unit Symtab.table -> thm list -> status Termtab.table -> Facts.ref * Attrib.src list -> ((string * stature) * thm) list @@ -53,6 +20,7 @@ Proof.context -> bool -> 'a Symtab.table -> bool -> thm list -> thm list -> status Termtab.table -> (((unit -> string) * stature) * thm) list + val clasimpset_rule_table_of : Proof.context -> status Termtab.table val maybe_instantiate_inducts : Proof.context -> term list -> term -> (((unit -> string) * 'a) * thm) list -> (((unit -> string) * 'a) * thm) list @@ -74,103 +42,11 @@ open ATP_Problem_Generate open Metis_Tactic open Sledgehammer_Util - -val trace = - Attrib.setup_config_bool @{binding sledgehammer_filter_trace} (K false) -fun trace_msg ctxt msg = if Config.get ctxt trace then tracing (msg ()) else () - -(* experimental features *) -val ignore_no_atp = - Attrib.setup_config_bool @{binding sledgehammer_ignore_no_atp} (K false) -val instantiate_inducts = - Attrib.setup_config_bool @{binding sledgehammer_instantiate_inducts} (K false) - -type relevance_fudge = - {local_const_multiplier : real, - worse_irrel_freq : real, - higher_order_irrel_weight : real, - abs_rel_weight : real, - abs_irrel_weight : real, - skolem_irrel_weight : real, - theory_const_rel_weight : real, - theory_const_irrel_weight : real, - chained_const_irrel_weight : real, - intro_bonus : real, - elim_bonus : real, - simp_bonus : real, - local_bonus : real, - assum_bonus : real, - chained_bonus : real, - max_imperfect : real, - max_imperfect_exp : real, - threshold_divisor : real, - ridiculous_threshold : real} - -type relevance_override = - {add : (Facts.ref * Attrib.src list) list, - del : (Facts.ref * Attrib.src list) list, - only : bool} - -val no_relevance_override = {add = [], del = [], only = false} +open Sledgehammer_Filter_Iter val sledgehammer_prefix = "Sledgehammer" ^ Long_Name.separator -val pseudo_abs_name = sledgehammer_prefix ^ "abs" -val pseudo_skolem_prefix = sledgehammer_prefix ^ "sko" -val theory_const_suffix = Long_Name.separator ^ " 1" -(* unfolding these can yield really huge terms *) -val risky_defs = @{thms Bit0_def Bit1_def} - -fun is_rec_eq lhs = Term.exists_subterm (curry (op =) (head_of lhs)) -fun is_rec_def (@{const Trueprop} $ t) = is_rec_def t - | is_rec_def (@{const ==>} $ _ $ t2) = is_rec_def t2 - | is_rec_def (Const (@{const_name "=="}, _) $ t1 $ t2) = is_rec_eq t1 t2 - | is_rec_def (Const (@{const_name HOL.eq}, _) $ t1 $ t2) = is_rec_eq t1 t2 - | is_rec_def _ = false - -fun clasimpset_rule_table_of ctxt = - let - val thy = Proof_Context.theory_of ctxt - val atomize = HOLogic.mk_Trueprop o Object_Logic.atomize_term thy - fun add stature normalizers get_th = - fold (fn rule => - let - val th = rule |> get_th - val t = - th |> Thm.maxidx_of th > 0 ? zero_var_indexes |> prop_of - in - fold (fn normalize => Termtab.update (normalize t, stature)) - (I :: normalizers) - end) - val {safeIs, (* safeEs, *) hazIs, (* hazEs, *) ...} = - ctxt |> claset_of |> Classical.rep_cs - val intros = Item_Net.content safeIs @ Item_Net.content hazIs -(* Add once it is used: - val elims = - Item_Net.content safeEs @ Item_Net.content hazEs - |> map Classical.classical_rule -*) - val simps = ctxt |> simpset_of |> dest_ss |> #simps - val specs = ctxt |> Spec_Rules.get - val (rec_defs, nonrec_defs) = - specs |> filter (curry (op =) Spec_Rules.Equational o fst) - |> maps (snd o snd) - |> filter_out (member Thm.eq_thm_prop risky_defs) - |> List.partition (is_rec_def o prop_of) - val spec_intros = - specs |> filter (member (op =) [Spec_Rules.Inductive, - Spec_Rules.Co_Inductive] o fst) - |> maps (snd o snd) - in - Termtab.empty |> add Simp [atomize] snd simps - |> add Simp [] I rec_defs - |> add Def [] I nonrec_defs -(* Add once it is used: - |> add Elim [] I elims -*) - |> add Intro [] I intros - |> add Inductive [] I spec_intros - end +val no_relevance_override = {add = [], del = [], only = false} fun needs_quoting reserved s = Symtab.defined reserved s orelse @@ -187,6 +63,16 @@ val backquote = raw_explode #> map (fn "`" => "\\`" | s => s) #> implode #> enclose "`" "`" +(* unfolding these can yield really huge terms *) +val risky_defs = @{thms Bit0_def Bit1_def} + +fun is_rec_eq lhs = Term.exists_subterm (curry (op =) (head_of lhs)) +fun is_rec_def (@{const Trueprop} $ t) = is_rec_def t + | is_rec_def (@{const ==>} $ _ $ t2) = is_rec_def t2 + | is_rec_def (Const (@{const_name "=="}, _) $ t1 $ t2) = is_rec_eq t1 t2 + | is_rec_def (Const (@{const_name HOL.eq}, _) $ t1 $ t2) = is_rec_eq t1 t2 + | is_rec_def _ = false + fun is_assum assms th = exists (fn ct => prop_of th aconv term_of ct) assms fun is_chained chained_ths = member Thm.eq_thm_prop chained_ths @@ -240,571 +126,6 @@ |> snd end -(* This is a terrible hack. Free variables are sometimes coded as "M__" when - they are displayed as "M" and we want to avoid clashes with these. But - sometimes it's even worse: "Ma__" encodes "M". So we simply reserve all - prefixes of all free variables. In the worse case scenario, where the fact - won't be resolved correctly, the user can fix it manually, e.g., by naming - the fact in question. Ideally we would need nothing of it, but backticks - simply don't work with schematic variables. *) -fun all_prefixes_of s = - map (fn i => String.extract (s, 0, SOME i)) (1 upto size s - 1) -fun close_form t = - (t, [] |> Term.add_free_names t |> maps all_prefixes_of) - |> fold (fn ((s, i), T) => fn (t', taken) => - let val s' = singleton (Name.variant_list taken) s in - ((if fastype_of t' = HOLogic.boolT then HOLogic.all_const - else Logic.all_const) T - $ Abs (s', T, abstract_over (Var ((s, i), T), t')), - s' :: taken) - end) - (Term.add_vars t [] |> sort_wrt (fst o fst)) - |> fst - -fun string_for_term ctxt t = - Print_Mode.setmp (filter (curry (op =) Symbol.xsymbolsN) - (print_mode_value ())) (Syntax.string_of_term ctxt) t - |> String.translate (fn c => if Char.isPrint c then str c else "") - |> simplify_spaces - -(** Structural induction rules **) - -fun struct_induct_rule_on th = - case Logic.strip_horn (prop_of th) of - (prems, @{const Trueprop} - $ ((p as Var ((p_name, 0), _)) $ (a as Var (_, ind_T)))) => - if not (is_TVar ind_T) andalso length prems > 1 andalso - exists (exists_subterm (curry (op aconv) p)) prems andalso - not (exists (exists_subterm (curry (op aconv) a)) prems) then - SOME (p_name, ind_T) - else - NONE - | _ => NONE - -fun instantiate_induct_rule ctxt concl_prop p_name ((name, stature), th) ind_x = - let - fun varify_noninducts (t as Free (s, T)) = - if (s, T) = ind_x orelse can dest_funT T then t else Var ((s, 0), T) - | varify_noninducts t = t - val p_inst = - concl_prop |> map_aterms varify_noninducts |> close_form - |> lambda (Free ind_x) - |> string_for_term ctxt - in - ((fn () => name () ^ "[where " ^ p_name ^ " = " ^ quote p_inst ^ "]", - stature), th |> read_instantiate ctxt [((p_name, 0), p_inst)]) - end - -fun type_match thy (T1, T2) = - (Sign.typ_match thy (T2, T1) Vartab.empty; true) - handle Type.TYPE_MATCH => false - -fun instantiate_if_induct_rule ctxt stmt stmt_xs (ax as (_, th)) = - case struct_induct_rule_on th of - SOME (p_name, ind_T) => - let val thy = Proof_Context.theory_of ctxt in - stmt_xs |> filter (fn (_, T) => type_match thy (T, ind_T)) - |> map_filter (try (instantiate_induct_rule ctxt stmt p_name ax)) - end - | NONE => [ax] - -(***************************************************************) -(* Relevance Filtering *) -(***************************************************************) - -(*** constants with types ***) - -fun order_of_type (Type (@{type_name fun}, [T1, T2])) = - Int.max (order_of_type T1 + 1, order_of_type T2) - | order_of_type (Type (_, Ts)) = fold (Integer.max o order_of_type) Ts 0 - | order_of_type _ = 0 - -(* An abstraction of Isabelle types and first-order terms *) -datatype pattern = PVar | PApp of string * pattern list -datatype ptype = PType of int * pattern list - -fun string_for_pattern PVar = "_" - | string_for_pattern (PApp (s, ps)) = - if null ps then s else s ^ string_for_patterns ps -and string_for_patterns ps = "(" ^ commas (map string_for_pattern ps) ^ ")" -fun string_for_ptype (PType (_, ps)) = string_for_patterns ps - -(*Is the second type an instance of the first one?*) -fun match_pattern (PVar, _) = true - | match_pattern (PApp _, PVar) = false - | match_pattern (PApp (s, ps), PApp (t, qs)) = - s = t andalso match_patterns (ps, qs) -and match_patterns (_, []) = true - | match_patterns ([], _) = false - | match_patterns (p :: ps, q :: qs) = - match_pattern (p, q) andalso match_patterns (ps, qs) -fun match_ptype (PType (_, ps), PType (_, qs)) = match_patterns (ps, qs) - -(* Is there a unifiable constant? *) -fun pconst_mem f consts (s, ps) = - exists (curry (match_ptype o f) ps) - (map snd (filter (curry (op =) s o fst) consts)) -fun pconst_hyper_mem f const_tab (s, ps) = - exists (curry (match_ptype o f) ps) (these (Symtab.lookup const_tab s)) - -fun pattern_for_type (Type (s, Ts)) = PApp (s, map pattern_for_type Ts) - | pattern_for_type (TFree (s, _)) = PApp (s, []) - | pattern_for_type (TVar _) = PVar - -(* Pairs a constant with the list of its type instantiations. *) -fun ptype thy const x = - (if const then map pattern_for_type (these (try (Sign.const_typargs thy) x)) - else []) -fun rich_ptype thy const (s, T) = - PType (order_of_type T, ptype thy const (s, T)) -fun rich_pconst thy const (s, T) = (s, rich_ptype thy const (s, T)) - -fun string_for_hyper_pconst (s, ps) = - s ^ "{" ^ commas (map string_for_ptype ps) ^ "}" - -(* Add a pconstant to the table, but a [] entry means a standard - connective, which we ignore.*) -fun add_pconst_to_table also_skolem (s, p) = - if (not also_skolem andalso String.isPrefix pseudo_skolem_prefix s) then I - else Symtab.map_default (s, [p]) (insert (op =) p) - -(* Set constants tend to pull in too many irrelevant facts. We limit the damage - by treating them more or less as if they were built-in but add their - axiomatization at the end. *) -val set_consts = [@{const_name Collect}, @{const_name Set.member}] -val set_thms = @{thms Collect_mem_eq mem_Collect_eq Collect_cong} - -fun add_pconsts_in_term thy is_built_in_const also_skolems pos = - let - val flip = Option.map not - (* We include free variables, as well as constants, to handle locales. For - each quantifiers that must necessarily be skolemized by the automatic - prover, we introduce a fresh constant to simulate the effect of - Skolemization. *) - fun do_const const ext_arg (x as (s, _)) ts = - let val (built_in, ts) = is_built_in_const x ts in - if member (op =) set_consts s then - fold (do_term ext_arg) ts - else - (not built_in - ? add_pconst_to_table also_skolems (rich_pconst thy const x)) - #> fold (do_term false) ts - end - and do_term ext_arg t = - case strip_comb t of - (Const x, ts) => do_const true ext_arg x ts - | (Free x, ts) => do_const false ext_arg x ts - | (Abs (_, T, t'), ts) => - ((null ts andalso not ext_arg) - (* Since lambdas on the right-hand side of equalities are usually - extensionalized later by "abs_extensionalize_term", we don't - penalize them here. *) - ? add_pconst_to_table true (pseudo_abs_name, - PType (order_of_type T + 1, []))) - #> fold (do_term false) (t' :: ts) - | (_, ts) => fold (do_term false) ts - fun do_quantifier will_surely_be_skolemized abs_T body_t = - do_formula pos body_t - #> (if also_skolems andalso will_surely_be_skolemized then - add_pconst_to_table true (pseudo_skolem_prefix ^ serial_string (), - PType (order_of_type abs_T, [])) - else - I) - and do_term_or_formula ext_arg T = - if T = HOLogic.boolT then do_formula NONE else do_term ext_arg - and do_formula pos t = - case t of - Const (@{const_name all}, _) $ Abs (_, T, t') => - do_quantifier (pos = SOME false) T t' - | @{const "==>"} $ t1 $ t2 => - do_formula (flip pos) t1 #> do_formula pos t2 - | Const (@{const_name "=="}, Type (_, [T, _])) $ t1 $ t2 => - do_term_or_formula false T t1 #> do_term_or_formula true T t2 - | @{const Trueprop} $ t1 => do_formula pos t1 - | @{const False} => I - | @{const True} => I - | @{const Not} $ t1 => do_formula (flip pos) t1 - | Const (@{const_name All}, _) $ Abs (_, T, t') => - do_quantifier (pos = SOME false) T t' - | Const (@{const_name Ex}, _) $ Abs (_, T, t') => - do_quantifier (pos = SOME true) T t' - | @{const HOL.conj} $ t1 $ t2 => fold (do_formula pos) [t1, t2] - | @{const HOL.disj} $ t1 $ t2 => fold (do_formula pos) [t1, t2] - | @{const HOL.implies} $ t1 $ t2 => - do_formula (flip pos) t1 #> do_formula pos t2 - | Const (@{const_name HOL.eq}, Type (_, [T, _])) $ t1 $ t2 => - do_term_or_formula false T t1 #> do_term_or_formula true T t2 - | Const (@{const_name If}, Type (_, [_, Type (_, [T, _])])) - $ t1 $ t2 $ t3 => - do_formula NONE t1 #> fold (do_term_or_formula false T) [t2, t3] - | Const (@{const_name Ex1}, _) $ Abs (_, T, t') => - do_quantifier (is_some pos) T t' - | Const (@{const_name Ball}, _) $ t1 $ Abs (_, T, t') => - do_quantifier (pos = SOME false) T - (HOLogic.mk_imp (incr_boundvars 1 t1 $ Bound 0, t')) - | Const (@{const_name Bex}, _) $ t1 $ Abs (_, T, t') => - do_quantifier (pos = SOME true) T - (HOLogic.mk_conj (incr_boundvars 1 t1 $ Bound 0, t')) - | (t0 as Const (_, @{typ bool})) $ t1 => - do_term false t0 #> do_formula pos t1 (* theory constant *) - | _ => do_term false t - in do_formula pos end - -fun pconsts_in_fact thy is_built_in_const t = - Symtab.fold (fn (s, pss) => fold (cons o pair s) pss) - (Symtab.empty |> add_pconsts_in_term thy is_built_in_const true - (SOME true) t) [] - -val const_names_in_fact = map fst ooo pconsts_in_fact - -(* Inserts a dummy "constant" referring to the theory name, so that relevance - takes the given theory into account. *) -fun theory_constify ({theory_const_rel_weight, theory_const_irrel_weight, ...} - : relevance_fudge) thy_name t = - if exists (curry (op <) 0.0) [theory_const_rel_weight, - theory_const_irrel_weight] then - Const (thy_name ^ theory_const_suffix, @{typ bool}) $ t - else - t - -fun theory_const_prop_of fudge th = - theory_constify fudge (Context.theory_name (theory_of_thm th)) (prop_of th) - -fun pair_consts_fact thy is_built_in_const fudge fact = - case fact |> snd |> theory_const_prop_of fudge - |> pconsts_in_fact thy is_built_in_const of - [] => NONE - | consts => SOME ((fact, consts), NONE) - - -(**** Constant / Type Frequencies ****) - -(* A two-dimensional symbol table counts frequencies of constants. It's keyed - first by constant name and second by its list of type instantiations. For the - latter, we need a linear ordering on "pattern list". *) - -fun pattern_ord p = - case p of - (PVar, PVar) => EQUAL - | (PVar, PApp _) => LESS - | (PApp _, PVar) => GREATER - | (PApp q1, PApp q2) => - prod_ord fast_string_ord (dict_ord pattern_ord) (q1, q2) -fun ptype_ord (PType p, PType q) = - prod_ord (dict_ord pattern_ord) int_ord (swap p, swap q) - -structure PType_Tab = Table(type key = ptype val ord = ptype_ord) - -fun count_fact_consts thy fudge = - let - fun do_const const (s, T) ts = - (* Two-dimensional table update. Constant maps to types maps to count. *) - PType_Tab.map_default (rich_ptype thy const (s, T), 0) (Integer.add 1) - |> Symtab.map_default (s, PType_Tab.empty) - #> fold do_term ts - and do_term t = - case strip_comb t of - (Const x, ts) => do_const true x ts - | (Free x, ts) => do_const false x ts - | (Abs (_, _, t'), ts) => fold do_term (t' :: ts) - | (_, ts) => fold do_term ts - in do_term o theory_const_prop_of fudge o snd end - - -(**** Actual Filtering Code ****) - -fun pow_int _ 0 = 1.0 - | pow_int x 1 = x - | pow_int x n = if n > 0 then x * pow_int x (n - 1) else pow_int x (n + 1) / x - -(*The frequency of a constant is the sum of those of all instances of its type.*) -fun pconst_freq match const_tab (c, ps) = - PType_Tab.fold (fn (qs, m) => match (ps, qs) ? Integer.add m) - (the (Symtab.lookup const_tab c)) 0 - - -(* A surprising number of theorems contain only a few significant constants. - These include all induction rules, and other general theorems. *) - -(* "log" seems best in practice. A constant function of one ignores the constant - frequencies. Rare constants give more points if they are relevant than less - rare ones. *) -fun rel_weight_for _ freq = 1.0 + 2.0 / Math.ln (Real.fromInt freq + 1.0) - -(* Irrelevant constants are treated differently. We associate lower penalties to - very rare constants and very common ones -- the former because they can't - lead to the inclusion of too many new facts, and the latter because they are - so common as to be of little interest. *) -fun irrel_weight_for ({worse_irrel_freq, higher_order_irrel_weight, ...} - : relevance_fudge) order freq = - let val (k, x) = worse_irrel_freq |> `Real.ceil in - (if freq < k then Math.ln (Real.fromInt (freq + 1)) / Math.ln x - else rel_weight_for order freq / rel_weight_for order k) - * pow_int higher_order_irrel_weight (order - 1) - end - -fun multiplier_for_const_name local_const_multiplier s = - if String.isSubstring "." s then 1.0 else local_const_multiplier - -(* Computes a constant's weight, as determined by its frequency. *) -fun generic_pconst_weight local_const_multiplier abs_weight skolem_weight - theory_const_weight chained_const_weight weight_for f - const_tab chained_const_tab (c as (s, PType (m, _))) = - if s = pseudo_abs_name then - abs_weight - else if String.isPrefix pseudo_skolem_prefix s then - skolem_weight - else if String.isSuffix theory_const_suffix s then - theory_const_weight - else - multiplier_for_const_name local_const_multiplier s - * weight_for m (pconst_freq (match_ptype o f) const_tab c) - |> (if chained_const_weight < 1.0 andalso - pconst_hyper_mem I chained_const_tab c then - curry (op *) chained_const_weight - else - I) - -fun rel_pconst_weight ({local_const_multiplier, abs_rel_weight, - theory_const_rel_weight, ...} : relevance_fudge) - const_tab = - generic_pconst_weight local_const_multiplier abs_rel_weight 0.0 - theory_const_rel_weight 0.0 rel_weight_for I const_tab - Symtab.empty - -fun irrel_pconst_weight (fudge as {local_const_multiplier, abs_irrel_weight, - skolem_irrel_weight, - theory_const_irrel_weight, - chained_const_irrel_weight, ...}) - const_tab chained_const_tab = - generic_pconst_weight local_const_multiplier abs_irrel_weight - skolem_irrel_weight theory_const_irrel_weight - chained_const_irrel_weight (irrel_weight_for fudge) swap - const_tab chained_const_tab - -fun stature_bonus ({intro_bonus, ...} : relevance_fudge) (_, Intro) = - intro_bonus - | stature_bonus {elim_bonus, ...} (_, Elim) = elim_bonus - | stature_bonus {simp_bonus, ...} (_, Simp) = simp_bonus - | stature_bonus {local_bonus, ...} (Local, _) = local_bonus - | stature_bonus {assum_bonus, ...} (Assum, _) = assum_bonus - | stature_bonus {chained_bonus, ...} (Chained, _) = chained_bonus - | stature_bonus _ _ = 0.0 - -fun is_odd_const_name s = - s = pseudo_abs_name orelse String.isPrefix pseudo_skolem_prefix s orelse - String.isSuffix theory_const_suffix s - -fun fact_weight fudge stature const_tab relevant_consts chained_consts - fact_consts = - case fact_consts |> List.partition (pconst_hyper_mem I relevant_consts) - ||> filter_out (pconst_hyper_mem swap relevant_consts) of - ([], _) => 0.0 - | (rel, irrel) => - if forall (forall (is_odd_const_name o fst)) [rel, irrel] then - 0.0 - else - let - val irrel = irrel |> filter_out (pconst_mem swap rel) - val rel_weight = - 0.0 |> fold (curry (op +) o rel_pconst_weight fudge const_tab) rel - val irrel_weight = - ~ (stature_bonus fudge stature) - |> fold (curry (op +) - o irrel_pconst_weight fudge const_tab chained_consts) irrel - val res = rel_weight / (rel_weight + irrel_weight) - in if Real.isFinite res then res else 0.0 end - -type annotated_thm = - (((unit -> string) * stature) * thm) * (string * ptype) list - -fun take_most_relevant ctxt max_relevant remaining_max - ({max_imperfect, max_imperfect_exp, ...} : relevance_fudge) - (candidates : (annotated_thm * real) list) = - let - val max_imperfect = - Real.ceil (Math.pow (max_imperfect, - Math.pow (Real.fromInt remaining_max - / Real.fromInt max_relevant, max_imperfect_exp))) - val (perfect, imperfect) = - candidates |> sort (Real.compare o swap o pairself snd) - |> take_prefix (fn (_, w) => w > 0.99999) - val ((accepts, more_rejects), rejects) = - chop max_imperfect imperfect |>> append perfect |>> chop remaining_max - in - trace_msg ctxt (fn () => - "Actually passed (" ^ string_of_int (length accepts) ^ " of " ^ - string_of_int (length candidates) ^ "): " ^ - (accepts |> map (fn ((((name, _), _), _), weight) => - name () ^ " [" ^ Real.toString weight ^ "]") - |> commas)); - (accepts, more_rejects @ rejects) - end - -fun if_empty_replace_with_scope thy is_built_in_const facts sc tab = - if Symtab.is_empty tab then - Symtab.empty - |> fold (add_pconsts_in_term thy is_built_in_const false (SOME false)) - (map_filter (fn ((_, (sc', _)), th) => - if sc' = sc then SOME (prop_of th) else NONE) facts) - else - tab - -fun consider_arities is_built_in_const th = - let - fun aux _ _ NONE = NONE - | aux t args (SOME tab) = - case t of - t1 $ t2 => SOME tab |> aux t1 (t2 :: args) |> aux t2 [] - | Const (x as (s, _)) => - (if is_built_in_const x args |> fst then - SOME tab - else case Symtab.lookup tab s of - NONE => SOME (Symtab.update (s, length args) tab) - | SOME n => if n = length args then SOME tab else NONE) - | _ => SOME tab - in aux (prop_of th) [] end - -(* FIXME: This is currently only useful for polymorphic type encodings. *) -fun could_benefit_from_ext is_built_in_const facts = - fold (consider_arities is_built_in_const o snd) facts (SOME Symtab.empty) - |> is_none - -(* High enough so that it isn't wrongly considered as very relevant (e.g., for E - weights), but low enough so that it is unlikely to be truncated away if few - facts are included. *) -val special_fact_index = 75 - -fun relevance_filter ctxt threshold0 decay max_relevant is_built_in_const - (fudge as {threshold_divisor, ridiculous_threshold, ...}) - ({add, del, ...} : relevance_override) facts chained_ts hyp_ts concl_t = - let - val thy = Proof_Context.theory_of ctxt - val const_tab = fold (count_fact_consts thy fudge) facts Symtab.empty - val add_pconsts = add_pconsts_in_term thy is_built_in_const false o SOME - val chained_const_tab = Symtab.empty |> fold (add_pconsts true) chained_ts - val goal_const_tab = - Symtab.empty |> fold (add_pconsts true) hyp_ts - |> add_pconsts false concl_t - |> (fn tab => if Symtab.is_empty tab then chained_const_tab else tab) - |> fold (if_empty_replace_with_scope thy is_built_in_const facts) - [Chained, Assum, Local] - val add_ths = Attrib.eval_thms ctxt add - val del_ths = Attrib.eval_thms ctxt del - val facts = facts |> filter_out (member Thm.eq_thm_prop del_ths o snd) - fun iter j remaining_max threshold rel_const_tab hopeless hopeful = - let - fun relevant [] _ [] = - (* Nothing has been added this iteration. *) - if j = 0 andalso threshold >= ridiculous_threshold then - (* First iteration? Try again. *) - iter 0 max_relevant (threshold / threshold_divisor) rel_const_tab - hopeless hopeful - else - [] - | relevant candidates rejects [] = - let - val (accepts, more_rejects) = - take_most_relevant ctxt max_relevant remaining_max fudge - candidates - val rel_const_tab' = - rel_const_tab - |> fold (add_pconst_to_table false) (maps (snd o fst) accepts) - fun is_dirty (c, _) = - Symtab.lookup rel_const_tab' c <> Symtab.lookup rel_const_tab c - val (hopeful_rejects, hopeless_rejects) = - (rejects @ hopeless, ([], [])) - |-> fold (fn (ax as (_, consts), old_weight) => - if exists is_dirty consts then - apfst (cons (ax, NONE)) - else - apsnd (cons (ax, old_weight))) - |>> append (more_rejects - |> map (fn (ax as (_, consts), old_weight) => - (ax, if exists is_dirty consts then NONE - else SOME old_weight))) - val threshold = - 1.0 - (1.0 - threshold) - * Math.pow (decay, Real.fromInt (length accepts)) - val remaining_max = remaining_max - length accepts - in - trace_msg ctxt (fn () => "New or updated constants: " ^ - commas (rel_const_tab' |> Symtab.dest - |> subtract (op =) (rel_const_tab |> Symtab.dest) - |> map string_for_hyper_pconst)); - map (fst o fst) accepts @ - (if remaining_max = 0 then - [] - else - iter (j + 1) remaining_max threshold rel_const_tab' - hopeless_rejects hopeful_rejects) - end - | relevant candidates rejects - (((ax as (((_, stature), _), fact_consts)), cached_weight) - :: hopeful) = - let - val weight = - case cached_weight of - SOME w => w - | NONE => fact_weight fudge stature const_tab rel_const_tab - chained_const_tab fact_consts - in - if weight >= threshold then - relevant ((ax, weight) :: candidates) rejects hopeful - else - relevant candidates ((ax, weight) :: rejects) hopeful - end - in - trace_msg ctxt (fn () => - "ITERATION " ^ string_of_int j ^ ": current threshold: " ^ - Real.toString threshold ^ ", constants: " ^ - commas (rel_const_tab |> Symtab.dest - |> filter (curry (op <>) [] o snd) - |> map string_for_hyper_pconst)); - relevant [] [] hopeful - end - fun prepend_facts ths accepts = - ((facts |> filter (member Thm.eq_thm_prop ths o snd)) @ - (accepts |> filter_out (member Thm.eq_thm_prop ths o snd))) - |> take max_relevant - fun uses_const s t = - fold_aterms (curry (fn (Const (s', _), false) => s' = s | (_, b) => b)) t - false - fun uses_const_anywhere accepts s = - exists (uses_const s o prop_of o snd) accepts orelse - exists (uses_const s) (concl_t :: hyp_ts) - fun add_set_const_thms accepts = - exists (uses_const_anywhere accepts) set_consts ? append set_thms - fun insert_into_facts accepts [] = accepts - | insert_into_facts accepts ths = - let - val add = facts |> filter (member Thm.eq_thm_prop ths o snd) - val (bef, after) = - accepts |> filter_out (member Thm.eq_thm_prop ths o snd) - |> take (max_relevant - length add) - |> chop special_fact_index - in bef @ add @ after end - fun insert_special_facts accepts = - (* FIXME: get rid of "ext" here once it is treated as a helper *) - [] |> could_benefit_from_ext is_built_in_const accepts ? cons @{thm ext} - |> add_set_const_thms accepts - |> insert_into_facts accepts - in - facts |> map_filter (pair_consts_fact thy is_built_in_const fudge) - |> iter 0 max_relevant threshold0 goal_const_tab [] - |> not (null add_ths) ? prepend_facts add_ths - |> insert_special_facts - |> tap (fn accepts => trace_msg ctxt (fn () => - "Total relevant: " ^ string_of_int (length accepts))) - end - - -(***************************************************************) -(* Retrieving and filtering lemmas *) -(***************************************************************) - -(*** retrieve lemmas and filter them ***) - (*Reject theorems with names like "List.filter.filter_list_def" or "Accessible_Part.acc.defs", as these are definitions arising from packages.*) fun is_package_def a = @@ -813,10 +134,6 @@ String.isSuffix "_def" a) orelse String.isSuffix "_defs" a end -fun uniquify xs = - Termtab.fold (cons o snd) - (fold (Termtab.update o `(prop_of o snd)) xs Termtab.empty) [] - (* FIXME: put other record thms here, or declare as "no_atp" *) fun multi_base_blacklist ctxt ho_atp = ["defs", "select_defs", "update_defs", "split", "splits", "split_asm", @@ -877,9 +194,6 @@ andalso exists_subterm (fn Free (s, _) => s = Name.skolem Auto_Bind.thesisN | _ => false) (prop_of th) -(**** Predicates to detect unwanted facts (prolific or likely to cause - unsoundness) ****) - fun is_theorem_bad_for_atps ho_atp exporter thm = is_metastrange_theorem thm orelse (not exporter andalso @@ -889,6 +203,34 @@ is_that_fact thm end) +fun string_for_term ctxt t = + Print_Mode.setmp (filter (curry (op =) Symbol.xsymbolsN) + (print_mode_value ())) (Syntax.string_of_term ctxt) t + |> String.translate (fn c => if Char.isPrint c then str c else "") + |> simplify_spaces + +(* This is a terrible hack. Free variables are sometimes coded as "M__" when + they are displayed as "M" and we want to avoid clashes with these. But + sometimes it's even worse: "Ma__" encodes "M". So we simply reserve all + prefixes of all free variables. In the worse case scenario, where the fact + won't be resolved correctly, the user can fix it manually, e.g., by naming + the fact in question. Ideally we would need nothing of it, but backticks + simply don't work with schematic variables. *) +fun all_prefixes_of s = + map (fn i => String.extract (s, 0, SOME i)) (1 upto size s - 1) + +fun close_form t = + (t, [] |> Term.add_free_names t |> maps all_prefixes_of) + |> fold (fn ((s, i), T) => fn (t', taken) => + let val s' = singleton (Name.variant_list taken) s in + ((if fastype_of t' = HOLogic.boolT then HOLogic.all_const + else Logic.all_const) T + $ Abs (s', T, abstract_over (Var ((s, i), T), t')), + s' :: taken) + end) + (Term.add_vars t [] |> sort_wrt (fst o fst)) + |> fst + fun all_facts ctxt ho_atp reserved exporter add_ths chained_ths css_table = let val thy = Proof_Context.theory_of ctxt @@ -960,6 +302,93 @@ |> op @ end +fun clasimpset_rule_table_of ctxt = + let + val thy = Proof_Context.theory_of ctxt + val atomize = HOLogic.mk_Trueprop o Object_Logic.atomize_term thy + fun add stature normalizers get_th = + fold (fn rule => + let + val th = rule |> get_th + val t = + th |> Thm.maxidx_of th > 0 ? zero_var_indexes |> prop_of + in + fold (fn normalize => Termtab.update (normalize t, stature)) + (I :: normalizers) + end) + val {safeIs, (* safeEs, *) hazIs, (* hazEs, *) ...} = + ctxt |> claset_of |> Classical.rep_cs + val intros = Item_Net.content safeIs @ Item_Net.content hazIs +(* Add once it is used: + val elims = + Item_Net.content safeEs @ Item_Net.content hazEs + |> map Classical.classical_rule +*) + val simps = ctxt |> simpset_of |> dest_ss |> #simps + val specs = ctxt |> Spec_Rules.get + val (rec_defs, nonrec_defs) = + specs |> filter (curry (op =) Spec_Rules.Equational o fst) + |> maps (snd o snd) + |> filter_out (member Thm.eq_thm_prop risky_defs) + |> List.partition (is_rec_def o prop_of) + val spec_intros = + specs |> filter (member (op =) [Spec_Rules.Inductive, + Spec_Rules.Co_Inductive] o fst) + |> maps (snd o snd) + in + Termtab.empty |> add Simp [atomize] snd simps + |> add Simp [] I rec_defs + |> add Def [] I nonrec_defs +(* Add once it is used: + |> add Elim [] I elims +*) + |> add Intro [] I intros + |> add Inductive [] I spec_intros + end + +fun uniquify xs = + Termtab.fold (cons o snd) + (fold (Termtab.update o `(prop_of o snd)) xs Termtab.empty) [] + +fun struct_induct_rule_on th = + case Logic.strip_horn (prop_of th) of + (prems, @{const Trueprop} + $ ((p as Var ((p_name, 0), _)) $ (a as Var (_, ind_T)))) => + if not (is_TVar ind_T) andalso length prems > 1 andalso + exists (exists_subterm (curry (op aconv) p)) prems andalso + not (exists (exists_subterm (curry (op aconv) a)) prems) then + SOME (p_name, ind_T) + else + NONE + | _ => NONE + +fun instantiate_induct_rule ctxt concl_prop p_name ((name, stature), th) ind_x = + let + fun varify_noninducts (t as Free (s, T)) = + if (s, T) = ind_x orelse can dest_funT T then t else Var ((s, 0), T) + | varify_noninducts t = t + val p_inst = + concl_prop |> map_aterms varify_noninducts |> close_form + |> lambda (Free ind_x) + |> string_for_term ctxt + in + ((fn () => name () ^ "[where " ^ p_name ^ " = " ^ quote p_inst ^ "]", + stature), th |> read_instantiate ctxt [((p_name, 0), p_inst)]) + end + +fun type_match thy (T1, T2) = + (Sign.typ_match thy (T2, T1) Vartab.empty; true) + handle Type.TYPE_MATCH => false + +fun instantiate_if_induct_rule ctxt stmt stmt_xs (ax as (_, th)) = + case struct_induct_rule_on th of + SOME (p_name, ind_T) => + let val thy = Proof_Context.theory_of ctxt in + stmt_xs |> filter (fn (_, T) => type_match thy (T, ind_T)) + |> map_filter (try (instantiate_induct_rule ctxt stmt p_name ax)) + end + | NONE => [ax] + fun external_frees t = [] |> Term.add_frees t |> filter_out (can Name.dest_internal o fst) @@ -998,26 +427,6 @@ |> uniquify end -fun relevant_facts ctxt (threshold0, threshold1) max_relevant is_built_in_const - fudge (override as {only, ...}) chained_ths hyp_ts concl_t - facts = - let - val thy = Proof_Context.theory_of ctxt - val decay = Math.pow ((1.0 - threshold1) / (1.0 - threshold0), - 1.0 / Real.fromInt (max_relevant + 1)) - in - trace_msg ctxt (fn () => "Considering " ^ string_of_int (length facts) ^ - " facts"); - (if only orelse threshold1 < 0.0 then - facts - else if threshold0 > 1.0 orelse threshold0 > threshold1 orelse - max_relevant = 0 then - [] - else - relevance_filter ctxt threshold0 decay max_relevant is_built_in_const - fudge override facts (chained_ths |> map prop_of) hyp_ts - (concl_t |> theory_constify fudge (Context.theory_name thy))) - |> map (apfst (apfst (fn f => f ()))) - end +val relevant_facts = iterative_relevant_facts end; diff -r 8f37d2ddabc8 -r b6eb45a52c28 src/HOL/Tools/Sledgehammer/sledgehammer_filter_iter.ML --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_filter_iter.ML Wed Jul 11 21:43:19 2012 +0200 @@ -0,0 +1,600 @@ +(* Title: HOL/Tools/Sledgehammer/sledgehammer_filter_iter.ML + Author: Jia Meng, Cambridge University Computer Laboratory and NICTA + Author: Jasmin Blanchette, TU Muenchen + +Sledgehammer's iterative relevance filter. +*) + +signature SLEDGEHAMMER_FILTER_ITER = +sig + type stature = ATP_Problem_Generate.stature + + type relevance_fudge = + {local_const_multiplier : real, + worse_irrel_freq : real, + higher_order_irrel_weight : real, + abs_rel_weight : real, + abs_irrel_weight : real, + skolem_irrel_weight : real, + theory_const_rel_weight : real, + theory_const_irrel_weight : real, + chained_const_irrel_weight : real, + intro_bonus : real, + elim_bonus : real, + simp_bonus : real, + local_bonus : real, + assum_bonus : real, + chained_bonus : real, + max_imperfect : real, + max_imperfect_exp : real, + threshold_divisor : real, + ridiculous_threshold : real} + + type relevance_override = + {add : (Facts.ref * Attrib.src list) list, + del : (Facts.ref * Attrib.src list) list, + only : bool} + + val trace : bool Config.T + val ignore_no_atp : bool Config.T + val instantiate_inducts : bool Config.T + val pseudo_abs_name : string + val pseudo_skolem_prefix : string + val const_names_in_fact : + theory -> (string * typ -> term list -> bool * term list) -> term + -> string list + val iterative_relevant_facts : + Proof.context -> real * real -> int + -> (string * typ -> term list -> bool * term list) -> relevance_fudge + -> relevance_override -> thm list -> term list -> term + -> (((unit -> string) * stature) * thm) list + -> ((string * stature) * thm) list +end; + +structure Sledgehammer_Filter_Iter : SLEDGEHAMMER_FILTER_ITER = +struct + +open ATP_Problem_Generate + +val trace = + Attrib.setup_config_bool @{binding sledgehammer_filter_trace} (K false) +fun trace_msg ctxt msg = if Config.get ctxt trace then tracing (msg ()) else () + +(* experimental features *) +val ignore_no_atp = + Attrib.setup_config_bool @{binding sledgehammer_ignore_no_atp} (K false) +val instantiate_inducts = + Attrib.setup_config_bool @{binding sledgehammer_instantiate_inducts} (K false) + +type relevance_fudge = + {local_const_multiplier : real, + worse_irrel_freq : real, + higher_order_irrel_weight : real, + abs_rel_weight : real, + abs_irrel_weight : real, + skolem_irrel_weight : real, + theory_const_rel_weight : real, + theory_const_irrel_weight : real, + chained_const_irrel_weight : real, + intro_bonus : real, + elim_bonus : real, + simp_bonus : real, + local_bonus : real, + assum_bonus : real, + chained_bonus : real, + max_imperfect : real, + max_imperfect_exp : real, + threshold_divisor : real, + ridiculous_threshold : real} + +type relevance_override = + {add : (Facts.ref * Attrib.src list) list, + del : (Facts.ref * Attrib.src list) list, + only : bool} + +val sledgehammer_prefix = "Sledgehammer" ^ Long_Name.separator +val pseudo_abs_name = sledgehammer_prefix ^ "abs" +val pseudo_skolem_prefix = sledgehammer_prefix ^ "sko" +val theory_const_suffix = Long_Name.separator ^ " 1" + +fun order_of_type (Type (@{type_name fun}, [T1, T2])) = + Int.max (order_of_type T1 + 1, order_of_type T2) + | order_of_type (Type (_, Ts)) = fold (Integer.max o order_of_type) Ts 0 + | order_of_type _ = 0 + +(* An abstraction of Isabelle types and first-order terms *) +datatype pattern = PVar | PApp of string * pattern list +datatype ptype = PType of int * pattern list + +fun string_for_pattern PVar = "_" + | string_for_pattern (PApp (s, ps)) = + if null ps then s else s ^ string_for_patterns ps +and string_for_patterns ps = "(" ^ commas (map string_for_pattern ps) ^ ")" +fun string_for_ptype (PType (_, ps)) = string_for_patterns ps + +(*Is the second type an instance of the first one?*) +fun match_pattern (PVar, _) = true + | match_pattern (PApp _, PVar) = false + | match_pattern (PApp (s, ps), PApp (t, qs)) = + s = t andalso match_patterns (ps, qs) +and match_patterns (_, []) = true + | match_patterns ([], _) = false + | match_patterns (p :: ps, q :: qs) = + match_pattern (p, q) andalso match_patterns (ps, qs) +fun match_ptype (PType (_, ps), PType (_, qs)) = match_patterns (ps, qs) + +(* Is there a unifiable constant? *) +fun pconst_mem f consts (s, ps) = + exists (curry (match_ptype o f) ps) + (map snd (filter (curry (op =) s o fst) consts)) +fun pconst_hyper_mem f const_tab (s, ps) = + exists (curry (match_ptype o f) ps) (these (Symtab.lookup const_tab s)) + +fun pattern_for_type (Type (s, Ts)) = PApp (s, map pattern_for_type Ts) + | pattern_for_type (TFree (s, _)) = PApp (s, []) + | pattern_for_type (TVar _) = PVar + +(* Pairs a constant with the list of its type instantiations. *) +fun ptype thy const x = + (if const then map pattern_for_type (these (try (Sign.const_typargs thy) x)) + else []) +fun rich_ptype thy const (s, T) = + PType (order_of_type T, ptype thy const (s, T)) +fun rich_pconst thy const (s, T) = (s, rich_ptype thy const (s, T)) + +fun string_for_hyper_pconst (s, ps) = + s ^ "{" ^ commas (map string_for_ptype ps) ^ "}" + +(* Add a pconstant to the table, but a [] entry means a standard + connective, which we ignore.*) +fun add_pconst_to_table also_skolem (s, p) = + if (not also_skolem andalso String.isPrefix pseudo_skolem_prefix s) then I + else Symtab.map_default (s, [p]) (insert (op =) p) + +(* Set constants tend to pull in too many irrelevant facts. We limit the damage + by treating them more or less as if they were built-in but add their + axiomatization at the end. *) +val set_consts = [@{const_name Collect}, @{const_name Set.member}] +val set_thms = @{thms Collect_mem_eq mem_Collect_eq Collect_cong} + +fun add_pconsts_in_term thy is_built_in_const also_skolems pos = + let + val flip = Option.map not + (* We include free variables, as well as constants, to handle locales. For + each quantifiers that must necessarily be skolemized by the automatic + prover, we introduce a fresh constant to simulate the effect of + Skolemization. *) + fun do_const const ext_arg (x as (s, _)) ts = + let val (built_in, ts) = is_built_in_const x ts in + if member (op =) set_consts s then + fold (do_term ext_arg) ts + else + (not built_in + ? add_pconst_to_table also_skolems (rich_pconst thy const x)) + #> fold (do_term false) ts + end + and do_term ext_arg t = + case strip_comb t of + (Const x, ts) => do_const true ext_arg x ts + | (Free x, ts) => do_const false ext_arg x ts + | (Abs (_, T, t'), ts) => + ((null ts andalso not ext_arg) + (* Since lambdas on the right-hand side of equalities are usually + extensionalized later by "abs_extensionalize_term", we don't + penalize them here. *) + ? add_pconst_to_table true (pseudo_abs_name, + PType (order_of_type T + 1, []))) + #> fold (do_term false) (t' :: ts) + | (_, ts) => fold (do_term false) ts + fun do_quantifier will_surely_be_skolemized abs_T body_t = + do_formula pos body_t + #> (if also_skolems andalso will_surely_be_skolemized then + add_pconst_to_table true (pseudo_skolem_prefix ^ serial_string (), + PType (order_of_type abs_T, [])) + else + I) + and do_term_or_formula ext_arg T = + if T = HOLogic.boolT then do_formula NONE else do_term ext_arg + and do_formula pos t = + case t of + Const (@{const_name all}, _) $ Abs (_, T, t') => + do_quantifier (pos = SOME false) T t' + | @{const "==>"} $ t1 $ t2 => + do_formula (flip pos) t1 #> do_formula pos t2 + | Const (@{const_name "=="}, Type (_, [T, _])) $ t1 $ t2 => + do_term_or_formula false T t1 #> do_term_or_formula true T t2 + | @{const Trueprop} $ t1 => do_formula pos t1 + | @{const False} => I + | @{const True} => I + | @{const Not} $ t1 => do_formula (flip pos) t1 + | Const (@{const_name All}, _) $ Abs (_, T, t') => + do_quantifier (pos = SOME false) T t' + | Const (@{const_name Ex}, _) $ Abs (_, T, t') => + do_quantifier (pos = SOME true) T t' + | @{const HOL.conj} $ t1 $ t2 => fold (do_formula pos) [t1, t2] + | @{const HOL.disj} $ t1 $ t2 => fold (do_formula pos) [t1, t2] + | @{const HOL.implies} $ t1 $ t2 => + do_formula (flip pos) t1 #> do_formula pos t2 + | Const (@{const_name HOL.eq}, Type (_, [T, _])) $ t1 $ t2 => + do_term_or_formula false T t1 #> do_term_or_formula true T t2 + | Const (@{const_name If}, Type (_, [_, Type (_, [T, _])])) + $ t1 $ t2 $ t3 => + do_formula NONE t1 #> fold (do_term_or_formula false T) [t2, t3] + | Const (@{const_name Ex1}, _) $ Abs (_, T, t') => + do_quantifier (is_some pos) T t' + | Const (@{const_name Ball}, _) $ t1 $ Abs (_, T, t') => + do_quantifier (pos = SOME false) T + (HOLogic.mk_imp (incr_boundvars 1 t1 $ Bound 0, t')) + | Const (@{const_name Bex}, _) $ t1 $ Abs (_, T, t') => + do_quantifier (pos = SOME true) T + (HOLogic.mk_conj (incr_boundvars 1 t1 $ Bound 0, t')) + | (t0 as Const (_, @{typ bool})) $ t1 => + do_term false t0 #> do_formula pos t1 (* theory constant *) + | _ => do_term false t + in do_formula pos end + +fun pconsts_in_fact thy is_built_in_const t = + Symtab.fold (fn (s, pss) => fold (cons o pair s) pss) + (Symtab.empty |> add_pconsts_in_term thy is_built_in_const true + (SOME true) t) [] + +val const_names_in_fact = map fst ooo pconsts_in_fact + +(* Inserts a dummy "constant" referring to the theory name, so that relevance + takes the given theory into account. *) +fun theory_constify ({theory_const_rel_weight, theory_const_irrel_weight, ...} + : relevance_fudge) thy_name t = + if exists (curry (op <) 0.0) [theory_const_rel_weight, + theory_const_irrel_weight] then + Const (thy_name ^ theory_const_suffix, @{typ bool}) $ t + else + t + +fun theory_const_prop_of fudge th = + theory_constify fudge (Context.theory_name (theory_of_thm th)) (prop_of th) + +fun pair_consts_fact thy is_built_in_const fudge fact = + case fact |> snd |> theory_const_prop_of fudge + |> pconsts_in_fact thy is_built_in_const of + [] => NONE + | consts => SOME ((fact, consts), NONE) + +(* A two-dimensional symbol table counts frequencies of constants. It's keyed + first by constant name and second by its list of type instantiations. For the + latter, we need a linear ordering on "pattern list". *) + +fun pattern_ord p = + case p of + (PVar, PVar) => EQUAL + | (PVar, PApp _) => LESS + | (PApp _, PVar) => GREATER + | (PApp q1, PApp q2) => + prod_ord fast_string_ord (dict_ord pattern_ord) (q1, q2) +fun ptype_ord (PType p, PType q) = + prod_ord (dict_ord pattern_ord) int_ord (swap p, swap q) + +structure PType_Tab = Table(type key = ptype val ord = ptype_ord) + +fun count_fact_consts thy fudge = + let + fun do_const const (s, T) ts = + (* Two-dimensional table update. Constant maps to types maps to count. *) + PType_Tab.map_default (rich_ptype thy const (s, T), 0) (Integer.add 1) + |> Symtab.map_default (s, PType_Tab.empty) + #> fold do_term ts + and do_term t = + case strip_comb t of + (Const x, ts) => do_const true x ts + | (Free x, ts) => do_const false x ts + | (Abs (_, _, t'), ts) => fold do_term (t' :: ts) + | (_, ts) => fold do_term ts + in do_term o theory_const_prop_of fudge o snd end + +fun pow_int _ 0 = 1.0 + | pow_int x 1 = x + | pow_int x n = if n > 0 then x * pow_int x (n - 1) else pow_int x (n + 1) / x + +(*The frequency of a constant is the sum of those of all instances of its type.*) +fun pconst_freq match const_tab (c, ps) = + PType_Tab.fold (fn (qs, m) => match (ps, qs) ? Integer.add m) + (the (Symtab.lookup const_tab c)) 0 + + +(* A surprising number of theorems contain only a few significant constants. + These include all induction rules, and other general theorems. *) + +(* "log" seems best in practice. A constant function of one ignores the constant + frequencies. Rare constants give more points if they are relevant than less + rare ones. *) +fun rel_weight_for _ freq = 1.0 + 2.0 / Math.ln (Real.fromInt freq + 1.0) + +(* Irrelevant constants are treated differently. We associate lower penalties to + very rare constants and very common ones -- the former because they can't + lead to the inclusion of too many new facts, and the latter because they are + so common as to be of little interest. *) +fun irrel_weight_for ({worse_irrel_freq, higher_order_irrel_weight, ...} + : relevance_fudge) order freq = + let val (k, x) = worse_irrel_freq |> `Real.ceil in + (if freq < k then Math.ln (Real.fromInt (freq + 1)) / Math.ln x + else rel_weight_for order freq / rel_weight_for order k) + * pow_int higher_order_irrel_weight (order - 1) + end + +fun multiplier_for_const_name local_const_multiplier s = + if String.isSubstring "." s then 1.0 else local_const_multiplier + +(* Computes a constant's weight, as determined by its frequency. *) +fun generic_pconst_weight local_const_multiplier abs_weight skolem_weight + theory_const_weight chained_const_weight weight_for f + const_tab chained_const_tab (c as (s, PType (m, _))) = + if s = pseudo_abs_name then + abs_weight + else if String.isPrefix pseudo_skolem_prefix s then + skolem_weight + else if String.isSuffix theory_const_suffix s then + theory_const_weight + else + multiplier_for_const_name local_const_multiplier s + * weight_for m (pconst_freq (match_ptype o f) const_tab c) + |> (if chained_const_weight < 1.0 andalso + pconst_hyper_mem I chained_const_tab c then + curry (op *) chained_const_weight + else + I) + +fun rel_pconst_weight ({local_const_multiplier, abs_rel_weight, + theory_const_rel_weight, ...} : relevance_fudge) + const_tab = + generic_pconst_weight local_const_multiplier abs_rel_weight 0.0 + theory_const_rel_weight 0.0 rel_weight_for I const_tab + Symtab.empty + +fun irrel_pconst_weight (fudge as {local_const_multiplier, abs_irrel_weight, + skolem_irrel_weight, + theory_const_irrel_weight, + chained_const_irrel_weight, ...}) + const_tab chained_const_tab = + generic_pconst_weight local_const_multiplier abs_irrel_weight + skolem_irrel_weight theory_const_irrel_weight + chained_const_irrel_weight (irrel_weight_for fudge) swap + const_tab chained_const_tab + +fun stature_bonus ({intro_bonus, ...} : relevance_fudge) (_, Intro) = + intro_bonus + | stature_bonus {elim_bonus, ...} (_, Elim) = elim_bonus + | stature_bonus {simp_bonus, ...} (_, Simp) = simp_bonus + | stature_bonus {local_bonus, ...} (Local, _) = local_bonus + | stature_bonus {assum_bonus, ...} (Assum, _) = assum_bonus + | stature_bonus {chained_bonus, ...} (Chained, _) = chained_bonus + | stature_bonus _ _ = 0.0 + +fun is_odd_const_name s = + s = pseudo_abs_name orelse String.isPrefix pseudo_skolem_prefix s orelse + String.isSuffix theory_const_suffix s + +fun fact_weight fudge stature const_tab relevant_consts chained_consts + fact_consts = + case fact_consts |> List.partition (pconst_hyper_mem I relevant_consts) + ||> filter_out (pconst_hyper_mem swap relevant_consts) of + ([], _) => 0.0 + | (rel, irrel) => + if forall (forall (is_odd_const_name o fst)) [rel, irrel] then + 0.0 + else + let + val irrel = irrel |> filter_out (pconst_mem swap rel) + val rel_weight = + 0.0 |> fold (curry (op +) o rel_pconst_weight fudge const_tab) rel + val irrel_weight = + ~ (stature_bonus fudge stature) + |> fold (curry (op +) + o irrel_pconst_weight fudge const_tab chained_consts) irrel + val res = rel_weight / (rel_weight + irrel_weight) + in if Real.isFinite res then res else 0.0 end + +type annotated_thm = + (((unit -> string) * stature) * thm) * (string * ptype) list + +fun take_most_relevant ctxt max_relevant remaining_max + ({max_imperfect, max_imperfect_exp, ...} : relevance_fudge) + (candidates : (annotated_thm * real) list) = + let + val max_imperfect = + Real.ceil (Math.pow (max_imperfect, + Math.pow (Real.fromInt remaining_max + / Real.fromInt max_relevant, max_imperfect_exp))) + val (perfect, imperfect) = + candidates |> sort (Real.compare o swap o pairself snd) + |> take_prefix (fn (_, w) => w > 0.99999) + val ((accepts, more_rejects), rejects) = + chop max_imperfect imperfect |>> append perfect |>> chop remaining_max + in + trace_msg ctxt (fn () => + "Actually passed (" ^ string_of_int (length accepts) ^ " of " ^ + string_of_int (length candidates) ^ "): " ^ + (accepts |> map (fn ((((name, _), _), _), weight) => + name () ^ " [" ^ Real.toString weight ^ "]") + |> commas)); + (accepts, more_rejects @ rejects) + end + +fun if_empty_replace_with_scope thy is_built_in_const facts sc tab = + if Symtab.is_empty tab then + Symtab.empty + |> fold (add_pconsts_in_term thy is_built_in_const false (SOME false)) + (map_filter (fn ((_, (sc', _)), th) => + if sc' = sc then SOME (prop_of th) else NONE) facts) + else + tab + +fun consider_arities is_built_in_const th = + let + fun aux _ _ NONE = NONE + | aux t args (SOME tab) = + case t of + t1 $ t2 => SOME tab |> aux t1 (t2 :: args) |> aux t2 [] + | Const (x as (s, _)) => + (if is_built_in_const x args |> fst then + SOME tab + else case Symtab.lookup tab s of + NONE => SOME (Symtab.update (s, length args) tab) + | SOME n => if n = length args then SOME tab else NONE) + | _ => SOME tab + in aux (prop_of th) [] end + +(* FIXME: This is currently only useful for polymorphic type encodings. *) +fun could_benefit_from_ext is_built_in_const facts = + fold (consider_arities is_built_in_const o snd) facts (SOME Symtab.empty) + |> is_none + +(* High enough so that it isn't wrongly considered as very relevant (e.g., for E + weights), but low enough so that it is unlikely to be truncated away if few + facts are included. *) +val special_fact_index = 75 + +fun relevance_filter ctxt threshold0 decay max_relevant is_built_in_const + (fudge as {threshold_divisor, ridiculous_threshold, ...}) + ({add, del, ...} : relevance_override) facts chained_ts hyp_ts concl_t = + let + val thy = Proof_Context.theory_of ctxt + val const_tab = fold (count_fact_consts thy fudge) facts Symtab.empty + val add_pconsts = add_pconsts_in_term thy is_built_in_const false o SOME + val chained_const_tab = Symtab.empty |> fold (add_pconsts true) chained_ts + val goal_const_tab = + Symtab.empty |> fold (add_pconsts true) hyp_ts + |> add_pconsts false concl_t + |> (fn tab => if Symtab.is_empty tab then chained_const_tab else tab) + |> fold (if_empty_replace_with_scope thy is_built_in_const facts) + [Chained, Assum, Local] + val add_ths = Attrib.eval_thms ctxt add + val del_ths = Attrib.eval_thms ctxt del + val facts = facts |> filter_out (member Thm.eq_thm_prop del_ths o snd) + fun iter j remaining_max threshold rel_const_tab hopeless hopeful = + let + fun relevant [] _ [] = + (* Nothing has been added this iteration. *) + if j = 0 andalso threshold >= ridiculous_threshold then + (* First iteration? Try again. *) + iter 0 max_relevant (threshold / threshold_divisor) rel_const_tab + hopeless hopeful + else + [] + | relevant candidates rejects [] = + let + val (accepts, more_rejects) = + take_most_relevant ctxt max_relevant remaining_max fudge + candidates + val rel_const_tab' = + rel_const_tab + |> fold (add_pconst_to_table false) (maps (snd o fst) accepts) + fun is_dirty (c, _) = + Symtab.lookup rel_const_tab' c <> Symtab.lookup rel_const_tab c + val (hopeful_rejects, hopeless_rejects) = + (rejects @ hopeless, ([], [])) + |-> fold (fn (ax as (_, consts), old_weight) => + if exists is_dirty consts then + apfst (cons (ax, NONE)) + else + apsnd (cons (ax, old_weight))) + |>> append (more_rejects + |> map (fn (ax as (_, consts), old_weight) => + (ax, if exists is_dirty consts then NONE + else SOME old_weight))) + val threshold = + 1.0 - (1.0 - threshold) + * Math.pow (decay, Real.fromInt (length accepts)) + val remaining_max = remaining_max - length accepts + in + trace_msg ctxt (fn () => "New or updated constants: " ^ + commas (rel_const_tab' |> Symtab.dest + |> subtract (op =) (rel_const_tab |> Symtab.dest) + |> map string_for_hyper_pconst)); + map (fst o fst) accepts @ + (if remaining_max = 0 then + [] + else + iter (j + 1) remaining_max threshold rel_const_tab' + hopeless_rejects hopeful_rejects) + end + | relevant candidates rejects + (((ax as (((_, stature), _), fact_consts)), cached_weight) + :: hopeful) = + let + val weight = + case cached_weight of + SOME w => w + | NONE => fact_weight fudge stature const_tab rel_const_tab + chained_const_tab fact_consts + in + if weight >= threshold then + relevant ((ax, weight) :: candidates) rejects hopeful + else + relevant candidates ((ax, weight) :: rejects) hopeful + end + in + trace_msg ctxt (fn () => + "ITERATION " ^ string_of_int j ^ ": current threshold: " ^ + Real.toString threshold ^ ", constants: " ^ + commas (rel_const_tab |> Symtab.dest + |> filter (curry (op <>) [] o snd) + |> map string_for_hyper_pconst)); + relevant [] [] hopeful + end + fun prepend_facts ths accepts = + ((facts |> filter (member Thm.eq_thm_prop ths o snd)) @ + (accepts |> filter_out (member Thm.eq_thm_prop ths o snd))) + |> take max_relevant + fun uses_const s t = + fold_aterms (curry (fn (Const (s', _), false) => s' = s | (_, b) => b)) t + false + fun uses_const_anywhere accepts s = + exists (uses_const s o prop_of o snd) accepts orelse + exists (uses_const s) (concl_t :: hyp_ts) + fun add_set_const_thms accepts = + exists (uses_const_anywhere accepts) set_consts ? append set_thms + fun insert_into_facts accepts [] = accepts + | insert_into_facts accepts ths = + let + val add = facts |> filter (member Thm.eq_thm_prop ths o snd) + val (bef, after) = + accepts |> filter_out (member Thm.eq_thm_prop ths o snd) + |> take (max_relevant - length add) + |> chop special_fact_index + in bef @ add @ after end + fun insert_special_facts accepts = + (* FIXME: get rid of "ext" here once it is treated as a helper *) + [] |> could_benefit_from_ext is_built_in_const accepts ? cons @{thm ext} + |> add_set_const_thms accepts + |> insert_into_facts accepts + in + facts |> map_filter (pair_consts_fact thy is_built_in_const fudge) + |> iter 0 max_relevant threshold0 goal_const_tab [] + |> not (null add_ths) ? prepend_facts add_ths + |> insert_special_facts + |> tap (fn accepts => trace_msg ctxt (fn () => + "Total relevant: " ^ string_of_int (length accepts))) + end + +fun iterative_relevant_facts ctxt (threshold0, threshold1) max_relevant + is_built_in_const fudge (override as {only, ...}) + chained_ths hyp_ts concl_t facts = + let + val thy = Proof_Context.theory_of ctxt + val decay = Math.pow ((1.0 - threshold1) / (1.0 - threshold0), + 1.0 / Real.fromInt (max_relevant + 1)) + in + trace_msg ctxt (fn () => "Considering " ^ string_of_int (length facts) ^ + " facts"); + (if only orelse threshold1 < 0.0 then + facts + else if threshold0 > 1.0 orelse threshold0 > threshold1 orelse + max_relevant = 0 then + [] + else + relevance_filter ctxt threshold0 decay max_relevant is_built_in_const + fudge override facts (chained_ths |> map prop_of) hyp_ts + (concl_t |> theory_constify fudge (Context.theory_name thy))) + |> map (apfst (apfst (fn f => f ()))) + end + +end; diff -r 8f37d2ddabc8 -r b6eb45a52c28 src/HOL/Tools/Sledgehammer/sledgehammer_filter_mash.ML --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_filter_mash.ML Wed Jul 11 21:43:19 2012 +0200 @@ -0,0 +1,13 @@ +(* Title: HOL/Tools/Sledgehammer/sledgehammer_filter_mash.ML + Author: Jasmin Blanchette, TU Muenchen + +Sledgehammer's machine-learning-based relevance filter (MaSh). +*) + +signature SLEDGEHAMMER_FILTER_MASH = +sig +end; + +structure Sledgehammer_Filter_MaSh : SLEDGEHAMMER_FILTER_MASH = +struct +end;