src/HOL/Tools/Sledgehammer/sledgehammer_filter.ML
author wenzelm
Mon Jun 27 15:03:55 2011 +0200 (2011-06-27)
changeset 43559 c1966f322105
parent 43492 43326cadc31a
child 43581 c3e4d280bdeb
permissions -rw-r--r--
old gensym is now legacy -- global state is out of fashion, and its result is not guaranteed to be fresh;
     1 (*  Title:      HOL/Tools/Sledgehammer/sledgehammer_filter.ML
     2     Author:     Jia Meng, Cambridge University Computer Laboratory and NICTA
     3     Author:     Jasmin Blanchette, TU Muenchen
     4 
     5 Sledgehammer's relevance filter.
     6 *)
     7 
     8 signature SLEDGEHAMMER_FILTER =
     9 sig
    10   type locality = ATP_Translate.locality
    11 
    12   type relevance_fudge =
    13     {local_const_multiplier : real,
    14      worse_irrel_freq : real,
    15      higher_order_irrel_weight : real,
    16      abs_rel_weight : real,
    17      abs_irrel_weight : real,
    18      skolem_irrel_weight : real,
    19      theory_const_rel_weight : real,
    20      theory_const_irrel_weight : real,
    21      chained_const_irrel_weight : real,
    22      intro_bonus : real,
    23      elim_bonus : real,
    24      simp_bonus : real,
    25      local_bonus : real,
    26      assum_bonus : real,
    27      chained_bonus : real,
    28      max_imperfect : real,
    29      max_imperfect_exp : real,
    30      threshold_divisor : real,
    31      ridiculous_threshold : real}
    32 
    33   type relevance_override =
    34     {add : (Facts.ref * Attrib.src list) list,
    35      del : (Facts.ref * Attrib.src list) list,
    36      only : bool}
    37 
    38   val trace : bool Config.T
    39   val new_monomorphizer : bool Config.T
    40   val ignore_no_atp : bool Config.T
    41   val instantiate_inducts : bool Config.T
    42   val const_names_in_fact :
    43     theory -> (string * typ -> term list -> bool * term list) -> term
    44     -> string list
    45   val fact_from_ref :
    46     Proof.context -> unit Symtab.table -> thm list
    47     -> Facts.ref * Attrib.src list -> ((string * locality) * thm) list
    48   val all_facts :
    49     Proof.context -> 'a Symtab.table -> bool -> thm list -> thm list
    50     -> (((unit -> string) * locality) * thm) list
    51   val nearly_all_facts :
    52     Proof.context -> relevance_override -> thm list -> term list -> term
    53     -> (((unit -> string) * locality) * thm) list
    54   val relevant_facts :
    55     Proof.context -> real * real -> int
    56     -> (string * typ -> term list -> bool * term list) -> relevance_fudge
    57     -> relevance_override -> thm list -> term list -> term
    58     -> (((unit -> string) * locality) * thm) list
    59     -> ((string * locality) * thm) list
    60 end;
    61 
    62 structure Sledgehammer_Filter : SLEDGEHAMMER_FILTER =
    63 struct
    64 
    65 open ATP_Translate
    66 open Sledgehammer_Util
    67 
    68 val trace =
    69   Attrib.setup_config_bool @{binding sledgehammer_filter_trace} (K false)
    70 fun trace_msg ctxt msg = if Config.get ctxt trace then tracing (msg ()) else ()
    71 
    72 (* experimental features *)
    73 val new_monomorphizer =
    74   Attrib.setup_config_bool @{binding sledgehammer_new_monomorphizer} (K false)
    75 val ignore_no_atp =
    76   Attrib.setup_config_bool @{binding sledgehammer_ignore_no_atp} (K false)
    77 val instantiate_inducts =
    78   Attrib.setup_config_bool @{binding sledgehammer_instantiate_inducts} (K false)
    79 
    80 type relevance_fudge =
    81   {local_const_multiplier : real,
    82    worse_irrel_freq : real,
    83    higher_order_irrel_weight : real,
    84    abs_rel_weight : real,
    85    abs_irrel_weight : real,
    86    skolem_irrel_weight : real,
    87    theory_const_rel_weight : real,
    88    theory_const_irrel_weight : real,
    89    chained_const_irrel_weight : real,
    90    intro_bonus : real,
    91    elim_bonus : real,
    92    simp_bonus : real,
    93    local_bonus : real,
    94    assum_bonus : real,
    95    chained_bonus : real,
    96    max_imperfect : real,
    97    max_imperfect_exp : real,
    98    threshold_divisor : real,
    99    ridiculous_threshold : real}
   100 
   101 type relevance_override =
   102   {add : (Facts.ref * Attrib.src list) list,
   103    del : (Facts.ref * Attrib.src list) list,
   104    only : bool}
   105 
   106 val sledgehammer_prefix = "Sledgehammer" ^ Long_Name.separator
   107 val abs_name = sledgehammer_prefix ^ "abs"
   108 val skolem_prefix = sledgehammer_prefix ^ "sko"
   109 val theory_const_suffix = Long_Name.separator ^ " 1"
   110 
   111 fun needs_quoting reserved s =
   112   Symtab.defined reserved s orelse
   113   exists (not o Lexicon.is_identifier) (Long_Name.explode s)
   114 
   115 fun make_name reserved multi j name =
   116   (name |> needs_quoting reserved name ? quote) ^
   117   (if multi then "(" ^ string_of_int j ^ ")" else "")
   118 
   119 fun explode_interval _ (Facts.FromTo (i, j)) = i upto j
   120   | explode_interval max (Facts.From i) = i upto i + max - 1
   121   | explode_interval _ (Facts.Single i) = [i]
   122 
   123 val backquote =
   124   raw_explode #> map (fn "`" => "\\`" | s => s) #> implode #> enclose "`" "`"
   125 fun fact_from_ref ctxt reserved chained_ths (xthm as (xref, args)) =
   126   let
   127     val ths = Attrib.eval_thms ctxt [xthm]
   128     val bracket =
   129       map (enclose "[" "]" o Pretty.str_of o Args.pretty_src ctxt) args
   130       |> implode
   131     fun nth_name j =
   132       case xref of
   133         Facts.Fact s => backquote s ^ bracket
   134       | Facts.Named (("", _), _) => "[" ^ bracket ^ "]"
   135       | Facts.Named ((name, _), NONE) =>
   136         make_name reserved (length ths > 1) (j + 1) name ^ bracket
   137       | Facts.Named ((name, _), SOME intervals) =>
   138         make_name reserved true
   139                  (nth (maps (explode_interval (length ths)) intervals) j) name ^
   140         bracket
   141   in
   142     (ths, (0, []))
   143     |-> fold (fn th => fn (j, rest) =>
   144                  (j + 1,
   145                   ((nth_name j,
   146                     if member Thm.eq_thm_prop chained_ths th then Chained
   147                     else if Thm.eq_thm_prop (th, @{thm ext}) then Extensionality
   148                     else General), th) :: rest))
   149     |> snd
   150   end
   151 
   152 (* This is a terrible hack. Free variables are sometimes code as "M__" when they
   153    are displayed as "M" and we want to avoid clashes with these. But sometimes
   154    it's even worse: "Ma__" encodes "M". So we simply reserve all prefixes of all
   155    free variables. In the worse case scenario, where the fact won't be resolved
   156    correctly, the user can fix it manually, e.g., by naming the fact in
   157    question. Ideally we would need nothing of it, but backticks just don't work
   158    with schematic variables. *)
   159 fun all_prefixes_of s =
   160   map (fn i => String.extract (s, 0, SOME i)) (1 upto size s - 1)
   161 fun close_form t =
   162   (t, [] |> Term.add_free_names t |> maps all_prefixes_of)
   163   |> fold (fn ((s, i), T) => fn (t', taken) =>
   164               let val s' = singleton (Name.variant_list taken) s in
   165                 ((if fastype_of t' = HOLogic.boolT then HOLogic.all_const
   166                   else Term.all) T
   167                  $ Abs (s', T, abstract_over (Var ((s, i), T), t')),
   168                  s' :: taken)
   169               end)
   170           (Term.add_vars t [] |> sort_wrt (fst o fst))
   171   |> fst
   172 
   173 fun string_for_term ctxt t =
   174   Print_Mode.setmp (filter (curry (op =) Symbol.xsymbolsN)
   175                    (print_mode_value ())) (Syntax.string_of_term ctxt) t
   176   |> String.translate (fn c => if Char.isPrint c then str c else "")
   177   |> simplify_spaces
   178 
   179 (** Structural induction rules **)
   180 
   181 fun struct_induct_rule_on th =
   182   case Logic.strip_horn (prop_of th) of
   183     (prems, @{const Trueprop}
   184             $ ((p as Var ((p_name, 0), _)) $ (a as Var (_, ind_T)))) =>
   185     if not (is_TVar ind_T) andalso length prems > 1 andalso
   186        exists (exists_subterm (curry (op aconv) p)) prems andalso
   187        not (exists (exists_subterm (curry (op aconv) a)) prems) then
   188       SOME (p_name, ind_T)
   189     else
   190       NONE
   191   | _ => NONE
   192 
   193 fun instantiate_induct_rule ctxt concl_prop p_name ((name, loc), th) ind_x =
   194   let
   195     fun varify_noninducts (t as Free (s, T)) =
   196         if (s, T) = ind_x orelse can dest_funT T then t else Var ((s, 0), T)
   197       | varify_noninducts t = t
   198     val p_inst =
   199       concl_prop |> map_aterms varify_noninducts |> close_form
   200                  |> lambda (Free ind_x)
   201                  |> string_for_term ctxt
   202   in
   203     ((fn () => name () ^ "[where " ^ p_name ^ " = " ^ quote p_inst ^ "]", loc),
   204      th |> read_instantiate ctxt [((p_name, 0), p_inst)])
   205   end
   206 
   207 fun type_match thy (T1, T2) =
   208   (Sign.typ_match thy (T2, T1) Vartab.empty; true)
   209   handle Type.TYPE_MATCH => false
   210 
   211 fun instantiate_if_induct_rule ctxt stmt stmt_xs (ax as (_, th)) =
   212   case struct_induct_rule_on th of
   213     SOME (p_name, ind_T) =>
   214     let val thy = Proof_Context.theory_of ctxt in
   215       stmt_xs |> filter (fn (_, T) => type_match thy (T, ind_T))
   216               |> map_filter (try (instantiate_induct_rule ctxt stmt p_name ax))
   217     end
   218   | NONE => [ax]
   219 
   220 (***************************************************************)
   221 (* Relevance Filtering                                         *)
   222 (***************************************************************)
   223 
   224 (*** constants with types ***)
   225 
   226 fun order_of_type (Type (@{type_name fun}, [T1, @{typ bool}])) =
   227     order_of_type T1 (* cheat: pretend sets are first-order *)
   228   | order_of_type (Type (@{type_name fun}, [T1, T2])) =
   229     Int.max (order_of_type T1 + 1, order_of_type T2)
   230   | order_of_type (Type (_, Ts)) = fold (Integer.max o order_of_type) Ts 0
   231   | order_of_type _ = 0
   232 
   233 (* An abstraction of Isabelle types and first-order terms *)
   234 datatype pattern = PVar | PApp of string * pattern list
   235 datatype ptype = PType of int * pattern list
   236 
   237 fun string_for_pattern PVar = "_"
   238   | string_for_pattern (PApp (s, ps)) =
   239     if null ps then s else s ^ string_for_patterns ps
   240 and string_for_patterns ps = "(" ^ commas (map string_for_pattern ps) ^ ")"
   241 fun string_for_ptype (PType (_, ps)) = string_for_patterns ps
   242 
   243 (*Is the second type an instance of the first one?*)
   244 fun match_pattern (PVar, _) = true
   245   | match_pattern (PApp _, PVar) = false
   246   | match_pattern (PApp (s, ps), PApp (t, qs)) =
   247     s = t andalso match_patterns (ps, qs)
   248 and match_patterns (_, []) = true
   249   | match_patterns ([], _) = false
   250   | match_patterns (p :: ps, q :: qs) =
   251     match_pattern (p, q) andalso match_patterns (ps, qs)
   252 fun match_ptype (PType (_, ps), PType (_, qs)) = match_patterns (ps, qs)
   253 
   254 (* Is there a unifiable constant? *)
   255 fun pconst_mem f consts (s, ps) =
   256   exists (curry (match_ptype o f) ps)
   257          (map snd (filter (curry (op =) s o fst) consts))
   258 fun pconst_hyper_mem f const_tab (s, ps) =
   259   exists (curry (match_ptype o f) ps) (these (Symtab.lookup const_tab s))
   260 
   261 fun pattern_for_type (Type (s, Ts)) = PApp (s, map pattern_for_type Ts)
   262   | pattern_for_type (TFree (s, _)) = PApp (s, [])
   263   | pattern_for_type (TVar _) = PVar
   264 
   265 (* Pairs a constant with the list of its type instantiations. *)
   266 fun ptype thy const x =
   267   (if const then map pattern_for_type (these (try (Sign.const_typargs thy) x))
   268    else [])
   269 fun rich_ptype thy const (s, T) =
   270   PType (order_of_type T, ptype thy const (s, T))
   271 fun rich_pconst thy const (s, T) = (s, rich_ptype thy const (s, T))
   272 
   273 fun string_for_hyper_pconst (s, ps) =
   274   s ^ "{" ^ commas (map string_for_ptype ps) ^ "}"
   275 
   276 (* Add a pconstant to the table, but a [] entry means a standard
   277    connective, which we ignore.*)
   278 fun add_pconst_to_table also_skolem (s, p) =
   279   if (not also_skolem andalso String.isPrefix skolem_prefix s) then I
   280   else Symtab.map_default (s, [p]) (insert (op =) p)
   281 
   282 (* Set constants tend to pull in too many irrelevant facts. We limit the damage
   283    by treating them more or less as if they were built-in but add their
   284    definition at the end. *)
   285 val set_consts =
   286   [(@{const_name Collect}, @{thms Collect_def}),
   287    (@{const_name Set.member}, @{thms mem_def})]
   288 
   289 val is_set_const = AList.defined (op =) set_consts
   290 
   291 fun add_pconsts_in_term thy is_built_in_const also_skolems pos =
   292   let
   293     val flip = Option.map not
   294     (* We include free variables, as well as constants, to handle locales. For
   295        each quantifiers that must necessarily be skolemized by the automatic
   296        prover, we introduce a fresh constant to simulate the effect of
   297        Skolemization. *)
   298     fun do_const const ext_arg (x as (s, _)) ts =
   299       let val (built_in, ts) = is_built_in_const x ts in
   300         if is_set_const s then
   301           fold (do_term ext_arg) ts
   302         else
   303           (not built_in
   304            ? add_pconst_to_table also_skolems (rich_pconst thy const x))
   305           #> fold (do_term false) ts
   306       end
   307     and do_term ext_arg t =
   308       case strip_comb t of
   309         (Const x, ts) => do_const true ext_arg x ts
   310       | (Free x, ts) => do_const false ext_arg x ts
   311       | (Abs (_, T, t'), ts) =>
   312         ((null ts andalso not ext_arg)
   313          (* Since lambdas on the right-hand side of equalities are usually
   314             extensionalized later by "extensionalize_term", we don't penalize
   315             them here. *)
   316          ? add_pconst_to_table true (abs_name, PType (order_of_type T + 1, [])))
   317         #> fold (do_term false) (t' :: ts)
   318       | (_, ts) => fold (do_term false) ts
   319     fun do_quantifier will_surely_be_skolemized abs_T body_t =
   320       do_formula pos body_t
   321       #> (if also_skolems andalso will_surely_be_skolemized then
   322             add_pconst_to_table true
   323                 (legacy_gensym skolem_prefix, PType (order_of_type abs_T, []))
   324           else
   325             I)
   326     and do_term_or_formula ext_arg T =
   327       if T = HOLogic.boolT then do_formula NONE else do_term ext_arg
   328     and do_formula pos t =
   329       case t of
   330         Const (@{const_name all}, _) $ Abs (_, T, t') =>
   331         do_quantifier (pos = SOME false) T t'
   332       | @{const "==>"} $ t1 $ t2 =>
   333         do_formula (flip pos) t1 #> do_formula pos t2
   334       | Const (@{const_name "=="}, Type (_, [T, _])) $ t1 $ t2 =>
   335         do_term_or_formula false T t1 #> do_term_or_formula true T t2
   336       | @{const Trueprop} $ t1 => do_formula pos t1
   337       | @{const False} => I
   338       | @{const True} => I
   339       | @{const Not} $ t1 => do_formula (flip pos) t1
   340       | Const (@{const_name All}, _) $ Abs (_, T, t') =>
   341         do_quantifier (pos = SOME false) T t'
   342       | Const (@{const_name Ex}, _) $ Abs (_, T, t') =>
   343         do_quantifier (pos = SOME true) T t'
   344       | @{const HOL.conj} $ t1 $ t2 => fold (do_formula pos) [t1, t2]
   345       | @{const HOL.disj} $ t1 $ t2 => fold (do_formula pos) [t1, t2]
   346       | @{const HOL.implies} $ t1 $ t2 =>
   347         do_formula (flip pos) t1 #> do_formula pos t2
   348       | Const (@{const_name HOL.eq}, Type (_, [T, _])) $ t1 $ t2 =>
   349         do_term_or_formula false T t1 #> do_term_or_formula true T t2
   350       | Const (@{const_name If}, Type (_, [_, Type (_, [T, _])]))
   351         $ t1 $ t2 $ t3 =>
   352         do_formula NONE t1 #> fold (do_term_or_formula false T) [t2, t3]
   353       | Const (@{const_name Ex1}, _) $ Abs (_, T, t') =>
   354         do_quantifier (is_some pos) T t'
   355       | Const (@{const_name Ball}, _) $ t1 $ Abs (_, T, t') =>
   356         do_quantifier (pos = SOME false) T
   357                       (HOLogic.mk_imp (incr_boundvars 1 t1 $ Bound 0, t'))
   358       | Const (@{const_name Bex}, _) $ t1 $ Abs (_, T, t') =>
   359         do_quantifier (pos = SOME true) T
   360                       (HOLogic.mk_conj (incr_boundvars 1 t1 $ Bound 0, t'))
   361       | (t0 as Const (_, @{typ bool})) $ t1 =>
   362         do_term false t0 #> do_formula pos t1  (* theory constant *)
   363       | _ => do_term false t
   364   in do_formula pos end
   365 
   366 (*Inserts a dummy "constant" referring to the theory name, so that relevance
   367   takes the given theory into account.*)
   368 fun theory_constify ({theory_const_rel_weight, theory_const_irrel_weight, ...}
   369                      : relevance_fudge) thy_name t =
   370   if exists (curry (op <) 0.0) [theory_const_rel_weight,
   371                                 theory_const_irrel_weight] then
   372     Const (thy_name ^ theory_const_suffix, @{typ bool}) $ t
   373   else
   374     t
   375 
   376 fun theory_const_prop_of fudge th =
   377   theory_constify fudge (Context.theory_name (theory_of_thm th)) (prop_of th)
   378 
   379 (**** Constant / Type Frequencies ****)
   380 
   381 (* A two-dimensional symbol table counts frequencies of constants. It's keyed
   382    first by constant name and second by its list of type instantiations. For the
   383    latter, we need a linear ordering on "pattern list". *)
   384 
   385 fun pattern_ord p =
   386   case p of
   387     (PVar, PVar) => EQUAL
   388   | (PVar, PApp _) => LESS
   389   | (PApp _, PVar) => GREATER
   390   | (PApp q1, PApp q2) =>
   391     prod_ord fast_string_ord (dict_ord pattern_ord) (q1, q2)
   392 fun ptype_ord (PType p, PType q) =
   393   prod_ord (dict_ord pattern_ord) int_ord (swap p, swap q)
   394 
   395 structure PType_Tab = Table(type key = ptype val ord = ptype_ord)
   396 
   397 fun count_fact_consts thy fudge =
   398   let
   399     fun do_const const (s, T) ts =
   400       (* Two-dimensional table update. Constant maps to types maps to count. *)
   401       PType_Tab.map_default (rich_ptype thy const (s, T), 0) (Integer.add 1)
   402       |> Symtab.map_default (s, PType_Tab.empty)
   403       #> fold do_term ts
   404     and do_term t =
   405       case strip_comb t of
   406         (Const x, ts) => do_const true x ts
   407       | (Free x, ts) => do_const false x ts
   408       | (Abs (_, _, t'), ts) => fold do_term (t' :: ts)
   409       | (_, ts) => fold do_term ts
   410   in do_term o theory_const_prop_of fudge o snd end
   411 
   412 
   413 (**** Actual Filtering Code ****)
   414 
   415 fun pow_int _ 0 = 1.0
   416   | pow_int x 1 = x
   417   | pow_int x n = if n > 0 then x * pow_int x (n - 1) else pow_int x (n + 1) / x
   418 
   419 (*The frequency of a constant is the sum of those of all instances of its type.*)
   420 fun pconst_freq match const_tab (c, ps) =
   421   PType_Tab.fold (fn (qs, m) => match (ps, qs) ? Integer.add m)
   422                  (the (Symtab.lookup const_tab c)) 0
   423 
   424 
   425 (* A surprising number of theorems contain only a few significant constants.
   426    These include all induction rules, and other general theorems. *)
   427 
   428 (* "log" seems best in practice. A constant function of one ignores the constant
   429    frequencies. Rare constants give more points if they are relevant than less
   430    rare ones. *)
   431 fun rel_weight_for _ freq = 1.0 + 2.0 / Math.ln (Real.fromInt freq + 1.0)
   432 
   433 (* Irrelevant constants are treated differently. We associate lower penalties to
   434    very rare constants and very common ones -- the former because they can't
   435    lead to the inclusion of too many new facts, and the latter because they are
   436    so common as to be of little interest. *)
   437 fun irrel_weight_for ({worse_irrel_freq, higher_order_irrel_weight, ...}
   438                       : relevance_fudge) order freq =
   439   let val (k, x) = worse_irrel_freq |> `Real.ceil in
   440     (if freq < k then Math.ln (Real.fromInt (freq + 1)) / Math.ln x
   441      else rel_weight_for order freq / rel_weight_for order k)
   442     * pow_int higher_order_irrel_weight (order - 1)
   443   end
   444 
   445 fun multiplier_for_const_name local_const_multiplier s =
   446   if String.isSubstring "." s then 1.0 else local_const_multiplier
   447 
   448 (* Computes a constant's weight, as determined by its frequency. *)
   449 fun generic_pconst_weight local_const_multiplier abs_weight skolem_weight
   450                           theory_const_weight chained_const_weight weight_for f
   451                           const_tab chained_const_tab (c as (s, PType (m, _))) =
   452   if s = abs_name then
   453     abs_weight
   454   else if String.isPrefix skolem_prefix s then
   455     skolem_weight
   456   else if String.isSuffix theory_const_suffix s then
   457     theory_const_weight
   458   else
   459     multiplier_for_const_name local_const_multiplier s
   460     * weight_for m (pconst_freq (match_ptype o f) const_tab c)
   461     |> (if chained_const_weight < 1.0 andalso
   462            pconst_hyper_mem I chained_const_tab c then
   463           curry (op *) chained_const_weight
   464         else
   465           I)
   466 
   467 fun rel_pconst_weight ({local_const_multiplier, abs_rel_weight,
   468                         theory_const_rel_weight, ...} : relevance_fudge)
   469                       const_tab =
   470   generic_pconst_weight local_const_multiplier abs_rel_weight 0.0
   471                         theory_const_rel_weight 0.0 rel_weight_for I const_tab
   472                         Symtab.empty
   473 
   474 fun irrel_pconst_weight (fudge as {local_const_multiplier, abs_irrel_weight,
   475                                    skolem_irrel_weight,
   476                                    theory_const_irrel_weight,
   477                                    chained_const_irrel_weight, ...})
   478                         const_tab chained_const_tab =
   479   generic_pconst_weight local_const_multiplier abs_irrel_weight
   480                         skolem_irrel_weight theory_const_irrel_weight
   481                         chained_const_irrel_weight (irrel_weight_for fudge) swap
   482                         const_tab chained_const_tab
   483 
   484 fun locality_bonus ({intro_bonus, ...} : relevance_fudge) Intro = intro_bonus
   485   | locality_bonus {elim_bonus, ...} Elim = elim_bonus
   486   | locality_bonus {simp_bonus, ...} Simp = simp_bonus
   487   | locality_bonus {local_bonus, ...} Local = local_bonus
   488   | locality_bonus {assum_bonus, ...} Assum = assum_bonus
   489   | locality_bonus {chained_bonus, ...} Chained = chained_bonus
   490   | locality_bonus _ _ = 0.0
   491 
   492 fun is_odd_const_name s =
   493   s = abs_name orelse String.isPrefix skolem_prefix s orelse
   494   String.isSuffix theory_const_suffix s
   495 
   496 fun fact_weight fudge loc const_tab relevant_consts chained_consts fact_consts =
   497   case fact_consts |> List.partition (pconst_hyper_mem I relevant_consts)
   498                    ||> filter_out (pconst_hyper_mem swap relevant_consts) of
   499     ([], _) => 0.0
   500   | (rel, irrel) =>
   501     if forall (forall (is_odd_const_name o fst)) [rel, irrel] then
   502       0.0
   503     else
   504       let
   505         val irrel = irrel |> filter_out (pconst_mem swap rel)
   506         val rel_weight =
   507           0.0 |> fold (curry (op +) o rel_pconst_weight fudge const_tab) rel
   508         val irrel_weight =
   509           ~ (locality_bonus fudge loc)
   510           |> fold (curry (op +)
   511                    o irrel_pconst_weight fudge const_tab chained_consts) irrel
   512         val res = rel_weight / (rel_weight + irrel_weight)
   513       in if Real.isFinite res then res else 0.0 end
   514 
   515 fun pconsts_in_fact thy is_built_in_const t =
   516   Symtab.fold (fn (s, pss) => fold (cons o pair s) pss)
   517               (Symtab.empty |> add_pconsts_in_term thy is_built_in_const true
   518                                                    (SOME true) t) []
   519 
   520 fun pair_consts_fact thy is_built_in_const fudge fact =
   521   case fact |> snd |> theory_const_prop_of fudge
   522             |> pconsts_in_fact thy is_built_in_const of
   523     [] => NONE
   524   | consts => SOME ((fact, consts), NONE)
   525 
   526 val const_names_in_fact = map fst ooo pconsts_in_fact
   527 
   528 type annotated_thm =
   529   (((unit -> string) * locality) * thm) * (string * ptype) list
   530 
   531 fun take_most_relevant ctxt max_relevant remaining_max
   532         ({max_imperfect, max_imperfect_exp, ...} : relevance_fudge)
   533         (candidates : (annotated_thm * real) list) =
   534   let
   535     val max_imperfect =
   536       Real.ceil (Math.pow (max_imperfect,
   537                     Math.pow (Real.fromInt remaining_max
   538                               / Real.fromInt max_relevant, max_imperfect_exp)))
   539     val (perfect, imperfect) =
   540       candidates |> sort (Real.compare o swap o pairself snd)
   541                  |> take_prefix (fn (_, w) => w > 0.99999)
   542     val ((accepts, more_rejects), rejects) =
   543       chop max_imperfect imperfect |>> append perfect |>> chop remaining_max
   544   in
   545     trace_msg ctxt (fn () =>
   546         "Actually passed (" ^ string_of_int (length accepts) ^ " of " ^
   547         string_of_int (length candidates) ^ "): " ^
   548         (accepts |> map (fn ((((name, _), _), _), weight) =>
   549                             name () ^ " [" ^ Real.toString weight ^ "]")
   550                  |> commas));
   551     (accepts, more_rejects @ rejects)
   552   end
   553 
   554 fun if_empty_replace_with_locality thy is_built_in_const facts loc tab =
   555   if Symtab.is_empty tab then
   556     Symtab.empty
   557     |> fold (add_pconsts_in_term thy is_built_in_const false (SOME false))
   558             (map_filter (fn ((_, loc'), th) =>
   559                             if loc' = loc then SOME (prop_of th) else NONE)
   560                         facts)
   561   else
   562     tab
   563 
   564 fun consider_arities is_built_in_const th =
   565   let
   566     fun aux _ _ NONE = NONE
   567       | aux t args (SOME tab) =
   568         case t of
   569           t1 $ t2 => SOME tab |> aux t1 (t2 :: args) |> aux t2 []
   570         | Const (x as (s, _)) =>
   571           (if is_built_in_const x args |> fst then
   572              SOME tab
   573            else case Symtab.lookup tab s of
   574              NONE => SOME (Symtab.update (s, length args) tab)
   575            | SOME n => if n = length args then SOME tab else NONE)
   576         | _ => SOME tab
   577   in aux (prop_of th) [] end
   578 
   579 (* FIXME: This is currently only useful for polymorphic type systems. *)
   580 fun could_benefit_from_ext is_built_in_const facts =
   581   fold (consider_arities is_built_in_const o snd) facts (SOME Symtab.empty)
   582   |> is_none
   583 
   584 (* High enough so that it isn't wrongly considered as very relevant (e.g., for E
   585    weights), but low enough so that it is unlikely to be truncated away if few
   586    facts are included. *)
   587 val special_fact_index = 75
   588 
   589 fun relevance_filter ctxt threshold0 decay max_relevant is_built_in_const
   590         (fudge as {threshold_divisor, ridiculous_threshold, ...})
   591         ({add, del, ...} : relevance_override) facts chained_ts hyp_ts concl_t =
   592   let
   593     val thy = Proof_Context.theory_of ctxt
   594     val const_tab = fold (count_fact_consts thy fudge) facts Symtab.empty
   595     val add_pconsts = add_pconsts_in_term thy is_built_in_const false o SOME
   596     val chained_const_tab = Symtab.empty |> fold (add_pconsts true) chained_ts
   597     val goal_const_tab =
   598       Symtab.empty |> fold (add_pconsts true) hyp_ts
   599                    |> add_pconsts false concl_t
   600       |> (fn tab => if Symtab.is_empty tab then chained_const_tab else tab)
   601       |> fold (if_empty_replace_with_locality thy is_built_in_const facts)
   602               [Chained, Assum, Local]
   603     val add_ths = Attrib.eval_thms ctxt add
   604     val del_ths = Attrib.eval_thms ctxt del
   605     val facts = facts |> filter_out (member Thm.eq_thm_prop del_ths o snd)
   606     fun iter j remaining_max threshold rel_const_tab hopeless hopeful =
   607       let
   608         fun relevant [] _ [] =
   609             (* Nothing has been added this iteration. *)
   610             if j = 0 andalso threshold >= ridiculous_threshold then
   611               (* First iteration? Try again. *)
   612               iter 0 max_relevant (threshold / threshold_divisor) rel_const_tab
   613                    hopeless hopeful
   614             else
   615               []
   616           | relevant candidates rejects [] =
   617             let
   618               val (accepts, more_rejects) =
   619                 take_most_relevant ctxt max_relevant remaining_max fudge
   620                                    candidates
   621               val rel_const_tab' =
   622                 rel_const_tab
   623                 |> fold (add_pconst_to_table false) (maps (snd o fst) accepts)
   624               fun is_dirty (c, _) =
   625                 Symtab.lookup rel_const_tab' c <> Symtab.lookup rel_const_tab c
   626               val (hopeful_rejects, hopeless_rejects) =
   627                  (rejects @ hopeless, ([], []))
   628                  |-> fold (fn (ax as (_, consts), old_weight) =>
   629                               if exists is_dirty consts then
   630                                 apfst (cons (ax, NONE))
   631                               else
   632                                 apsnd (cons (ax, old_weight)))
   633                  |>> append (more_rejects
   634                              |> map (fn (ax as (_, consts), old_weight) =>
   635                                         (ax, if exists is_dirty consts then NONE
   636                                              else SOME old_weight)))
   637               val threshold =
   638                 1.0 - (1.0 - threshold)
   639                       * Math.pow (decay, Real.fromInt (length accepts))
   640               val remaining_max = remaining_max - length accepts
   641             in
   642               trace_msg ctxt (fn () => "New or updated constants: " ^
   643                   commas (rel_const_tab' |> Symtab.dest
   644                           |> subtract (op =) (rel_const_tab |> Symtab.dest)
   645                           |> map string_for_hyper_pconst));
   646               map (fst o fst) accepts @
   647               (if remaining_max = 0 then
   648                  []
   649                else
   650                  iter (j + 1) remaining_max threshold rel_const_tab'
   651                       hopeless_rejects hopeful_rejects)
   652             end
   653           | relevant candidates rejects
   654                      (((ax as (((_, loc), _), fact_consts)), cached_weight)
   655                       :: hopeful) =
   656             let
   657               val weight =
   658                 case cached_weight of
   659                   SOME w => w
   660                 | NONE => fact_weight fudge loc const_tab rel_const_tab
   661                                       chained_const_tab fact_consts
   662             in
   663               if weight >= threshold then
   664                 relevant ((ax, weight) :: candidates) rejects hopeful
   665               else
   666                 relevant candidates ((ax, weight) :: rejects) hopeful
   667             end
   668         in
   669           trace_msg ctxt (fn () =>
   670               "ITERATION " ^ string_of_int j ^ ": current threshold: " ^
   671               Real.toString threshold ^ ", constants: " ^
   672               commas (rel_const_tab |> Symtab.dest
   673                       |> filter (curry (op <>) [] o snd)
   674                       |> map string_for_hyper_pconst));
   675           relevant [] [] hopeful
   676         end
   677     fun prepend_facts ths accepts =
   678       ((facts |> filter (member Thm.eq_thm_prop ths o snd)) @
   679        (accepts |> filter_out (member Thm.eq_thm_prop ths o snd)))
   680       |> take max_relevant
   681     fun uses_const s t =
   682       fold_aterms (curry (fn (Const (s', _), false) => s' = s | (_, b) => b)) t
   683                   false
   684     fun add_set_const_thms accepts (s, ths) =
   685       if exists (uses_const s o prop_of o snd) accepts orelse
   686          exists (uses_const s) (concl_t :: hyp_ts) then
   687         append ths
   688       else
   689         I
   690     fun insert_into_facts accepts [] = accepts
   691       | insert_into_facts accepts ths =
   692         let
   693           val add = facts |> filter (member Thm.eq_thm_prop ths o snd)
   694           val (bef, after) =
   695             accepts |> filter_out (member Thm.eq_thm_prop ths o snd)
   696                     |> take (max_relevant - length add)
   697                     |> chop special_fact_index
   698         in bef @ add @ after end
   699     fun insert_special_facts accepts =
   700        [] |> could_benefit_from_ext is_built_in_const accepts ? cons @{thm ext}
   701           |> fold (add_set_const_thms accepts) set_consts
   702           |> insert_into_facts accepts
   703   in
   704     facts |> map_filter (pair_consts_fact thy is_built_in_const fudge)
   705           |> iter 0 max_relevant threshold0 goal_const_tab []
   706           |> not (null add_ths) ? prepend_facts add_ths
   707           |> insert_special_facts
   708           |> tap (fn accepts => trace_msg ctxt (fn () =>
   709                       "Total relevant: " ^ string_of_int (length accepts)))
   710   end
   711 
   712 
   713 (***************************************************************)
   714 (* Retrieving and filtering lemmas                             *)
   715 (***************************************************************)
   716 
   717 (*** retrieve lemmas and filter them ***)
   718 
   719 (*Reject theorems with names like "List.filter.filter_list_def" or
   720   "Accessible_Part.acc.defs", as these are definitions arising from packages.*)
   721 fun is_package_def a =
   722   let val names = Long_Name.explode a in
   723     (length names > 2 andalso not (hd names = "local") andalso
   724      String.isSuffix "_def" a) orelse String.isSuffix "_defs" a
   725   end
   726 
   727 fun uniquify xs =
   728   Termtab.fold (cons o snd)
   729                (fold (Termtab.update o `(prop_of o snd)) xs Termtab.empty) []
   730 
   731 (* FIXME: put other record thms here, or declare as "no_atp" *)
   732 fun multi_base_blacklist ctxt =
   733   ["defs", "select_defs", "update_defs", "split", "splits", "split_asm",
   734    "cases", "ext_cases", "eq.simps", "eq.refl", "nchotomy", "case_cong",
   735    "weak_case_cong"]
   736   |> not (Config.get ctxt instantiate_inducts) ? append ["induct", "inducts"]
   737   |> map (prefix ".")
   738 
   739 val max_lambda_nesting = 3
   740 
   741 fun term_has_too_many_lambdas max (t1 $ t2) =
   742     exists (term_has_too_many_lambdas max) [t1, t2]
   743   | term_has_too_many_lambdas max (Abs (_, _, t)) =
   744     max = 0 orelse term_has_too_many_lambdas (max - 1) t
   745   | term_has_too_many_lambdas _ _ = false
   746 
   747 (* Don't count nested lambdas at the level of formulas, since they are
   748    quantifiers. *)
   749 fun formula_has_too_many_lambdas Ts (Abs (_, T, t)) =
   750     formula_has_too_many_lambdas (T :: Ts) t
   751   | formula_has_too_many_lambdas Ts t =
   752     if member (op =) [HOLogic.boolT, propT] (fastype_of1 (Ts, t)) then
   753       exists (formula_has_too_many_lambdas Ts) (#2 (strip_comb t))
   754     else
   755       term_has_too_many_lambdas max_lambda_nesting t
   756 
   757 (* The max apply depth of any "metis" call in "Metis_Examples" (on 2007-10-31)
   758    was 11. *)
   759 val max_apply_depth = 15
   760 
   761 fun apply_depth (f $ t) = Int.max (apply_depth f, apply_depth t + 1)
   762   | apply_depth (Abs (_, _, t)) = apply_depth t
   763   | apply_depth _ = 0
   764 
   765 fun is_formula_too_complex t =
   766   apply_depth t > max_apply_depth orelse formula_has_too_many_lambdas [] t
   767 
   768 (* FIXME: Extend to "Meson" and "Metis" *)
   769 val exists_sledgehammer_const =
   770   exists_Const (fn (s, _) => String.isPrefix sledgehammer_prefix s)
   771 
   772 (* FIXME: make more reliable *)
   773 val exists_low_level_class_const =
   774   exists_Const (fn (s, _) =>
   775      String.isSubstring (Long_Name.separator ^ "class" ^ Long_Name.separator) s)
   776 
   777 fun is_metastrange_theorem th =
   778   case head_of (concl_of th) of
   779       Const (a, _) => (a <> @{const_name Trueprop} andalso
   780                        a <> @{const_name "=="})
   781     | _ => false
   782 
   783 fun is_that_fact th =
   784   String.isSuffix (Long_Name.separator ^ Obtain.thatN) (Thm.get_name_hint th)
   785   andalso exists_subterm (fn Free (s, _) => s = Name.skolem Auto_Bind.thesisN
   786                            | _ => false) (prop_of th)
   787 
   788 val type_has_top_sort =
   789   exists_subtype (fn TFree (_, []) => true | TVar (_, []) => true | _ => false)
   790 
   791 (**** Predicates to detect unwanted facts (prolific or likely to cause
   792       unsoundness) ****)
   793 
   794 fun is_theorem_bad_for_atps thm =
   795   let val t = prop_of thm in
   796     is_formula_too_complex t orelse exists_type type_has_top_sort t orelse
   797     exists_sledgehammer_const t orelse exists_low_level_class_const t orelse
   798     is_metastrange_theorem thm orelse is_that_fact thm
   799   end
   800 
   801 fun meta_equify (@{const Trueprop}
   802                  $ (Const (@{const_name HOL.eq}, Type (_, [T, _])) $ t1 $ t2)) =
   803     Const (@{const_name "=="}, T --> T --> @{typ prop}) $ t1 $ t2
   804   | meta_equify t = t
   805 
   806 val normalize_simp_prop =
   807   meta_equify
   808   #> map_aterms (fn Var ((s, _), T) => Var ((s, 0), T) | t => t)
   809   #> map_types (map_type_tvar (fn ((s, _), S) => TVar ((s, 0), S)))
   810 
   811 fun clasimpset_rule_table_of ctxt =
   812   let
   813     fun add loc g f = fold (Termtab.update o rpair loc o g o prop_of o f)
   814     val {safeIs, safeEs, hazIs, hazEs, ...} = ctxt |> claset_of |> Classical.rep_cs
   815     val intros = Item_Net.content safeIs @ Item_Net.content hazIs
   816     val elims = map Classical.classical_rule (Item_Net.content safeEs @ Item_Net.content hazEs)
   817     val simps = ctxt |> simpset_of |> dest_ss |> #simps
   818   in
   819     Termtab.empty |> add Intro I I intros
   820                   |> add Elim I I elims
   821                   |> add Simp I snd simps
   822                   |> add Simp normalize_simp_prop snd simps
   823   end
   824 
   825 fun all_facts ctxt reserved really_all add_ths chained_ths =
   826   let
   827     val thy = Proof_Context.theory_of ctxt
   828     val global_facts = Global_Theory.facts_of thy
   829     val local_facts = Proof_Context.facts_of ctxt
   830     val named_locals = local_facts |> Facts.dest_static []
   831     val assms = Assumption.all_assms_of ctxt
   832     fun is_assum th = exists (fn ct => prop_of th aconv term_of ct) assms
   833     val is_chained = member Thm.eq_thm_prop chained_ths
   834     val clasimpset_table = clasimpset_rule_table_of ctxt
   835     fun locality_of_theorem global th =
   836       if is_chained th then
   837         Chained
   838       else if global then
   839         case Termtab.lookup clasimpset_table (prop_of th) of
   840           SOME loc => loc
   841         | NONE => if Thm.eq_thm_prop (th, @{thm ext}) then Extensionality
   842                   else General
   843       else
   844         if is_assum th then Assum else Local
   845     fun is_good_unnamed_local th =
   846       not (Thm.has_name_hint th) andalso
   847       forall (fn (_, ths) => not (member Thm.eq_thm_prop ths th)) named_locals
   848     val unnamed_locals =
   849       union Thm.eq_thm_prop (Facts.props local_facts) chained_ths
   850       |> filter is_good_unnamed_local |> map (pair "" o single)
   851     val full_space =
   852       Name_Space.merge (Facts.space_of global_facts, Facts.space_of local_facts)
   853     fun add_facts global foldx facts =
   854       foldx (fn (name0, ths) =>
   855         if not really_all andalso name0 <> "" andalso
   856            forall (not o member Thm.eq_thm_prop add_ths) ths andalso
   857            (Facts.is_concealed facts name0 orelse
   858             (not (Config.get ctxt ignore_no_atp) andalso
   859              is_package_def name0) orelse
   860             exists (fn s => String.isSuffix s name0)
   861                    (multi_base_blacklist ctxt) orelse
   862             String.isSuffix "_def_raw" (* FIXME: crude hack *) name0) then
   863           I
   864         else
   865           let
   866             val multi = length ths > 1
   867             val backquote_thm =
   868               backquote o string_for_term ctxt o close_form o prop_of
   869             fun check_thms a =
   870               case try (Proof_Context.get_thms ctxt) a of
   871                 NONE => false
   872               | SOME ths' => eq_list Thm.eq_thm_prop (ths, ths')
   873           in
   874             pair 1
   875             #> fold (fn th => fn (j, (multis, unis)) =>
   876                         (j + 1,
   877                          if not really_all andalso
   878                             not (member Thm.eq_thm_prop add_ths th) andalso
   879                             is_theorem_bad_for_atps th then
   880                            (multis, unis)
   881                          else
   882                            let
   883                              val new =
   884                                (((fn () =>
   885                                  if name0 = "" then
   886                                    th |> backquote_thm
   887                                  else
   888                                    [Facts.extern ctxt facts name0,
   889                                     Name_Space.extern ctxt full_space name0,
   890                                     name0]
   891                                    |> find_first check_thms
   892                                    |> (fn SOME name =>
   893                                           make_name reserved multi j name
   894                                         | NONE => "")),
   895                                 locality_of_theorem global th), th)
   896                            in
   897                              if multi then (new :: multis, unis)
   898                              else (multis, new :: unis)
   899                            end)) ths
   900             #> snd
   901           end)
   902   in
   903     (* The single-name theorems go after the multiple-name ones, so that single
   904        names are preferred when both are available. *)
   905     ([], []) |> add_facts false fold local_facts (unnamed_locals @ named_locals)
   906              |> add_facts true Facts.fold_static global_facts global_facts
   907              |> op @
   908   end
   909 
   910 fun external_frees t =
   911   [] |> Term.add_frees t |> filter_out (can Name.dest_internal o fst)
   912 
   913 fun nearly_all_facts ctxt ({add, only, ...} : relevance_override) chained_ths
   914                      hyp_ts concl_t =
   915   let
   916     val thy = Proof_Context.theory_of ctxt
   917     val reserved = reserved_isar_keyword_table ()
   918     val add_ths = Attrib.eval_thms ctxt add
   919     val ind_stmt =
   920       Logic.list_implies (hyp_ts |> filter_out (null o external_frees), concl_t)
   921       |> Object_Logic.atomize_term thy
   922     val ind_stmt_xs = external_frees ind_stmt
   923   in
   924     (if only then
   925        maps (map (fn ((name, loc), th) => ((K name, loc), th))
   926              o fact_from_ref ctxt reserved chained_ths) add
   927      else
   928        all_facts ctxt reserved false add_ths chained_ths)
   929     |> Config.get ctxt instantiate_inducts
   930        ? maps (instantiate_if_induct_rule ctxt ind_stmt ind_stmt_xs)
   931     |> (not (Config.get ctxt ignore_no_atp) andalso not only)
   932        ? filter_out (No_ATPs.member ctxt o snd)
   933     |> uniquify
   934   end
   935 
   936 fun relevant_facts ctxt (threshold0, threshold1) max_relevant
   937                    is_built_in_const fudge (override as {only, ...}) chained_ths
   938                    hyp_ts concl_t facts =
   939   let
   940     val thy = Proof_Context.theory_of ctxt
   941     val decay = Math.pow ((1.0 - threshold1) / (1.0 - threshold0),
   942                           1.0 / Real.fromInt (max_relevant + 1))
   943   in
   944     trace_msg ctxt (fn () => "Considering " ^ string_of_int (length facts) ^
   945                              " facts");
   946     (if only orelse threshold1 < 0.0 then
   947        facts
   948      else if threshold0 > 1.0 orelse threshold0 > threshold1 orelse
   949              max_relevant = 0 then
   950        []
   951      else
   952        relevance_filter ctxt threshold0 decay max_relevant is_built_in_const
   953            fudge override facts (chained_ths |> map prop_of) hyp_ts
   954            (concl_t |> theory_constify fudge (Context.theory_name thy)))
   955     |> map (apfst (apfst (fn f => f ())))
   956   end
   957 
   958 end;