src/HOL/Tools/Sledgehammer/sledgehammer_filter.ML
author blanchet
Wed Sep 07 13:50:17 2011 +0200 (2011-09-07)
changeset 44783 3634c6dba23f
parent 44625 4a1132815a70
child 44785 f4975fa4a2f8
permissions -rw-r--r--
tuning
     1 (*  Title:      HOL/Tools/Sledgehammer/sledgehammer_filter.ML
     2     Author:     Jia Meng, Cambridge University Computer Laboratory and NICTA
     3     Author:     Jasmin Blanchette, TU Muenchen
     4 
     5 Sledgehammer's relevance filter.
     6 *)
     7 
     8 signature SLEDGEHAMMER_FILTER =
     9 sig
    10   type locality = ATP_Translate.locality
    11 
    12   type relevance_fudge =
    13     {local_const_multiplier : real,
    14      worse_irrel_freq : real,
    15      higher_order_irrel_weight : real,
    16      abs_rel_weight : real,
    17      abs_irrel_weight : real,
    18      skolem_irrel_weight : real,
    19      theory_const_rel_weight : real,
    20      theory_const_irrel_weight : real,
    21      chained_const_irrel_weight : real,
    22      intro_bonus : real,
    23      elim_bonus : real,
    24      simp_bonus : real,
    25      local_bonus : real,
    26      assum_bonus : real,
    27      chained_bonus : real,
    28      max_imperfect : real,
    29      max_imperfect_exp : real,
    30      threshold_divisor : real,
    31      ridiculous_threshold : real}
    32 
    33   type relevance_override =
    34     {add : (Facts.ref * Attrib.src list) list,
    35      del : (Facts.ref * Attrib.src list) list,
    36      only : bool}
    37 
    38   val trace : bool Config.T
    39   val ignore_no_atp : bool Config.T
    40   val instantiate_inducts : bool Config.T
    41   val no_relevance_override : relevance_override
    42   val const_names_in_fact :
    43     theory -> (string * typ -> term list -> bool * term list) -> term
    44     -> string list
    45   val fact_from_ref :
    46     Proof.context -> unit Symtab.table -> thm list
    47     -> Facts.ref * Attrib.src list -> ((string * locality) * thm) list
    48   val all_facts :
    49     Proof.context -> bool -> 'a Symtab.table -> bool -> thm list -> thm list
    50     -> (((unit -> string) * locality) * thm) list
    51   val nearly_all_facts :
    52     Proof.context -> bool -> relevance_override -> thm list -> term list -> term
    53     -> (((unit -> string) * locality) * thm) list
    54   val relevant_facts :
    55     Proof.context -> real * real -> int
    56     -> (string * typ -> term list -> bool * term list) -> relevance_fudge
    57     -> relevance_override -> thm list -> term list -> term
    58     -> (((unit -> string) * locality) * thm) list
    59     -> ((string * locality) * thm) list
    60 end;
    61 
    62 structure Sledgehammer_Filter : SLEDGEHAMMER_FILTER =
    63 struct
    64 
    65 open ATP_Translate
    66 open Sledgehammer_Util
    67 
    68 val trace =
    69   Attrib.setup_config_bool @{binding sledgehammer_filter_trace} (K false)
    70 fun trace_msg ctxt msg = if Config.get ctxt trace then tracing (msg ()) else ()
    71 
    72 (* experimental features *)
    73 val ignore_no_atp =
    74   Attrib.setup_config_bool @{binding sledgehammer_ignore_no_atp} (K false)
    75 val instantiate_inducts =
    76   Attrib.setup_config_bool @{binding sledgehammer_instantiate_inducts} (K false)
    77 
    78 type relevance_fudge =
    79   {local_const_multiplier : real,
    80    worse_irrel_freq : real,
    81    higher_order_irrel_weight : real,
    82    abs_rel_weight : real,
    83    abs_irrel_weight : real,
    84    skolem_irrel_weight : real,
    85    theory_const_rel_weight : real,
    86    theory_const_irrel_weight : real,
    87    chained_const_irrel_weight : real,
    88    intro_bonus : real,
    89    elim_bonus : real,
    90    simp_bonus : real,
    91    local_bonus : real,
    92    assum_bonus : real,
    93    chained_bonus : real,
    94    max_imperfect : real,
    95    max_imperfect_exp : real,
    96    threshold_divisor : real,
    97    ridiculous_threshold : real}
    98 
    99 type relevance_override =
   100   {add : (Facts.ref * Attrib.src list) list,
   101    del : (Facts.ref * Attrib.src list) list,
   102    only : bool}
   103 
   104 val no_relevance_override = {add = [], del = [], only = false}
   105 
   106 val sledgehammer_prefix = "Sledgehammer" ^ Long_Name.separator
   107 val abs_name = sledgehammer_prefix ^ "abs"
   108 val skolem_prefix = sledgehammer_prefix ^ "sko"
   109 val theory_const_suffix = Long_Name.separator ^ " 1"
   110 
   111 fun needs_quoting reserved s =
   112   Symtab.defined reserved s orelse
   113   exists (not o Lexicon.is_identifier) (Long_Name.explode s)
   114 
   115 fun make_name reserved multi j name =
   116   (name |> needs_quoting reserved name ? quote) ^
   117   (if multi then "(" ^ string_of_int j ^ ")" else "")
   118 
   119 fun explode_interval _ (Facts.FromTo (i, j)) = i upto j
   120   | explode_interval max (Facts.From i) = i upto i + max - 1
   121   | explode_interval _ (Facts.Single i) = [i]
   122 
   123 val backquote =
   124   raw_explode #> map (fn "`" => "\\`" | s => s) #> implode #> enclose "`" "`"
   125 fun fact_from_ref ctxt reserved chained_ths (xthm as (xref, args)) =
   126   let
   127     val ths = Attrib.eval_thms ctxt [xthm]
   128     val bracket =
   129       map (enclose "[" "]" o Pretty.str_of o Args.pretty_src ctxt) args
   130       |> implode
   131     fun nth_name j =
   132       case xref of
   133         Facts.Fact s => backquote s ^ bracket
   134       | Facts.Named (("", _), _) => "[" ^ bracket ^ "]"
   135       | Facts.Named ((name, _), NONE) =>
   136         make_name reserved (length ths > 1) (j + 1) name ^ bracket
   137       | Facts.Named ((name, _), SOME intervals) =>
   138         make_name reserved true
   139                  (nth (maps (explode_interval (length ths)) intervals) j) name ^
   140         bracket
   141   in
   142     (ths, (0, []))
   143     |-> fold (fn th => fn (j, rest) =>
   144                  (j + 1,
   145                   ((nth_name j,
   146                     if member Thm.eq_thm_prop chained_ths th then Chained
   147                     else General), th) :: rest))
   148     |> snd
   149   end
   150 
   151 (* This is a terrible hack. Free variables are sometimes code as "M__" when they
   152    are displayed as "M" and we want to avoid clashes with these. But sometimes
   153    it's even worse: "Ma__" encodes "M". So we simply reserve all prefixes of all
   154    free variables. In the worse case scenario, where the fact won't be resolved
   155    correctly, the user can fix it manually, e.g., by naming the fact in
   156    question. Ideally we would need nothing of it, but backticks just don't work
   157    with schematic variables. *)
   158 fun all_prefixes_of s =
   159   map (fn i => String.extract (s, 0, SOME i)) (1 upto size s - 1)
   160 fun close_form t =
   161   (t, [] |> Term.add_free_names t |> maps all_prefixes_of)
   162   |> fold (fn ((s, i), T) => fn (t', taken) =>
   163               let val s' = singleton (Name.variant_list taken) s in
   164                 ((if fastype_of t' = HOLogic.boolT then HOLogic.all_const
   165                   else Term.all) T
   166                  $ Abs (s', T, abstract_over (Var ((s, i), T), t')),
   167                  s' :: taken)
   168               end)
   169           (Term.add_vars t [] |> sort_wrt (fst o fst))
   170   |> fst
   171 
   172 fun string_for_term ctxt t =
   173   Print_Mode.setmp (filter (curry (op =) Symbol.xsymbolsN)
   174                    (print_mode_value ())) (Syntax.string_of_term ctxt) t
   175   |> String.translate (fn c => if Char.isPrint c then str c else "")
   176   |> simplify_spaces
   177 
   178 (** Structural induction rules **)
   179 
   180 fun struct_induct_rule_on th =
   181   case Logic.strip_horn (prop_of th) of
   182     (prems, @{const Trueprop}
   183             $ ((p as Var ((p_name, 0), _)) $ (a as Var (_, ind_T)))) =>
   184     if not (is_TVar ind_T) andalso length prems > 1 andalso
   185        exists (exists_subterm (curry (op aconv) p)) prems andalso
   186        not (exists (exists_subterm (curry (op aconv) a)) prems) then
   187       SOME (p_name, ind_T)
   188     else
   189       NONE
   190   | _ => NONE
   191 
   192 fun instantiate_induct_rule ctxt concl_prop p_name ((name, loc), th) ind_x =
   193   let
   194     fun varify_noninducts (t as Free (s, T)) =
   195         if (s, T) = ind_x orelse can dest_funT T then t else Var ((s, 0), T)
   196       | varify_noninducts t = t
   197     val p_inst =
   198       concl_prop |> map_aterms varify_noninducts |> close_form
   199                  |> lambda (Free ind_x)
   200                  |> string_for_term ctxt
   201   in
   202     ((fn () => name () ^ "[where " ^ p_name ^ " = " ^ quote p_inst ^ "]", loc),
   203      th |> read_instantiate ctxt [((p_name, 0), p_inst)])
   204   end
   205 
   206 fun type_match thy (T1, T2) =
   207   (Sign.typ_match thy (T2, T1) Vartab.empty; true)
   208   handle Type.TYPE_MATCH => false
   209 
   210 fun instantiate_if_induct_rule ctxt stmt stmt_xs (ax as (_, th)) =
   211   case struct_induct_rule_on th of
   212     SOME (p_name, ind_T) =>
   213     let val thy = Proof_Context.theory_of ctxt in
   214       stmt_xs |> filter (fn (_, T) => type_match thy (T, ind_T))
   215               |> map_filter (try (instantiate_induct_rule ctxt stmt p_name ax))
   216     end
   217   | NONE => [ax]
   218 
   219 (***************************************************************)
   220 (* Relevance Filtering                                         *)
   221 (***************************************************************)
   222 
   223 (*** constants with types ***)
   224 
   225 fun order_of_type (Type (@{type_name fun}, [T1, @{typ bool}])) =
   226     order_of_type T1 (* cheat: pretend sets are first-order *)
   227   | order_of_type (Type (@{type_name fun}, [T1, T2])) =
   228     Int.max (order_of_type T1 + 1, order_of_type T2)
   229   | order_of_type (Type (_, Ts)) = fold (Integer.max o order_of_type) Ts 0
   230   | order_of_type _ = 0
   231 
   232 (* An abstraction of Isabelle types and first-order terms *)
   233 datatype pattern = PVar | PApp of string * pattern list
   234 datatype ptype = PType of int * pattern list
   235 
   236 fun string_for_pattern PVar = "_"
   237   | string_for_pattern (PApp (s, ps)) =
   238     if null ps then s else s ^ string_for_patterns ps
   239 and string_for_patterns ps = "(" ^ commas (map string_for_pattern ps) ^ ")"
   240 fun string_for_ptype (PType (_, ps)) = string_for_patterns ps
   241 
   242 (*Is the second type an instance of the first one?*)
   243 fun match_pattern (PVar, _) = true
   244   | match_pattern (PApp _, PVar) = false
   245   | match_pattern (PApp (s, ps), PApp (t, qs)) =
   246     s = t andalso match_patterns (ps, qs)
   247 and match_patterns (_, []) = true
   248   | match_patterns ([], _) = false
   249   | match_patterns (p :: ps, q :: qs) =
   250     match_pattern (p, q) andalso match_patterns (ps, qs)
   251 fun match_ptype (PType (_, ps), PType (_, qs)) = match_patterns (ps, qs)
   252 
   253 (* Is there a unifiable constant? *)
   254 fun pconst_mem f consts (s, ps) =
   255   exists (curry (match_ptype o f) ps)
   256          (map snd (filter (curry (op =) s o fst) consts))
   257 fun pconst_hyper_mem f const_tab (s, ps) =
   258   exists (curry (match_ptype o f) ps) (these (Symtab.lookup const_tab s))
   259 
   260 fun pattern_for_type (Type (s, Ts)) = PApp (s, map pattern_for_type Ts)
   261   | pattern_for_type (TFree (s, _)) = PApp (s, [])
   262   | pattern_for_type (TVar _) = PVar
   263 
   264 (* Pairs a constant with the list of its type instantiations. *)
   265 fun ptype thy const x =
   266   (if const then map pattern_for_type (these (try (Sign.const_typargs thy) x))
   267    else [])
   268 fun rich_ptype thy const (s, T) =
   269   PType (order_of_type T, ptype thy const (s, T))
   270 fun rich_pconst thy const (s, T) = (s, rich_ptype thy const (s, T))
   271 
   272 fun string_for_hyper_pconst (s, ps) =
   273   s ^ "{" ^ commas (map string_for_ptype ps) ^ "}"
   274 
   275 (* Add a pconstant to the table, but a [] entry means a standard
   276    connective, which we ignore.*)
   277 fun add_pconst_to_table also_skolem (s, p) =
   278   if (not also_skolem andalso String.isPrefix skolem_prefix s) then I
   279   else Symtab.map_default (s, [p]) (insert (op =) p)
   280 
   281 (* Set constants tend to pull in too many irrelevant facts. We limit the damage
   282    by treating them more or less as if they were built-in but add their
   283    definition at the end. *)
   284 val set_consts =
   285   [(@{const_name Collect}, @{thms Collect_def}),
   286    (@{const_name Set.member}, @{thms mem_def})]
   287 
   288 val is_set_const = AList.defined (op =) set_consts
   289 
   290 fun add_pconsts_in_term thy is_built_in_const also_skolems pos =
   291   let
   292     val flip = Option.map not
   293     (* We include free variables, as well as constants, to handle locales. For
   294        each quantifiers that must necessarily be skolemized by the automatic
   295        prover, we introduce a fresh constant to simulate the effect of
   296        Skolemization. *)
   297     fun do_const const ext_arg (x as (s, _)) ts =
   298       let val (built_in, ts) = is_built_in_const x ts in
   299         if is_set_const s then
   300           fold (do_term ext_arg) ts
   301         else
   302           (not built_in
   303            ? add_pconst_to_table also_skolems (rich_pconst thy const x))
   304           #> fold (do_term false) ts
   305       end
   306     and do_term ext_arg t =
   307       case strip_comb t of
   308         (Const x, ts) => do_const true ext_arg x ts
   309       | (Free x, ts) => do_const false ext_arg x ts
   310       | (Abs (_, T, t'), ts) =>
   311         ((null ts andalso not ext_arg)
   312          (* Since lambdas on the right-hand side of equalities are usually
   313             extensionalized later by "extensionalize_term", we don't penalize
   314             them here. *)
   315          ? add_pconst_to_table true (abs_name, PType (order_of_type T + 1, [])))
   316         #> fold (do_term false) (t' :: ts)
   317       | (_, ts) => fold (do_term false) ts
   318     fun do_quantifier will_surely_be_skolemized abs_T body_t =
   319       do_formula pos body_t
   320       #> (if also_skolems andalso will_surely_be_skolemized then
   321             add_pconst_to_table true
   322                 (legacy_gensym skolem_prefix, PType (order_of_type abs_T, []))
   323           else
   324             I)
   325     and do_term_or_formula ext_arg T =
   326       if T = HOLogic.boolT then do_formula NONE else do_term ext_arg
   327     and do_formula pos t =
   328       case t of
   329         Const (@{const_name all}, _) $ Abs (_, T, t') =>
   330         do_quantifier (pos = SOME false) T t'
   331       | @{const "==>"} $ t1 $ t2 =>
   332         do_formula (flip pos) t1 #> do_formula pos t2
   333       | Const (@{const_name "=="}, Type (_, [T, _])) $ t1 $ t2 =>
   334         do_term_or_formula false T t1 #> do_term_or_formula true T t2
   335       | @{const Trueprop} $ t1 => do_formula pos t1
   336       | @{const False} => I
   337       | @{const True} => I
   338       | @{const Not} $ t1 => do_formula (flip pos) t1
   339       | Const (@{const_name All}, _) $ Abs (_, T, t') =>
   340         do_quantifier (pos = SOME false) T t'
   341       | Const (@{const_name Ex}, _) $ Abs (_, T, t') =>
   342         do_quantifier (pos = SOME true) T t'
   343       | @{const HOL.conj} $ t1 $ t2 => fold (do_formula pos) [t1, t2]
   344       | @{const HOL.disj} $ t1 $ t2 => fold (do_formula pos) [t1, t2]
   345       | @{const HOL.implies} $ t1 $ t2 =>
   346         do_formula (flip pos) t1 #> do_formula pos t2
   347       | Const (@{const_name HOL.eq}, Type (_, [T, _])) $ t1 $ t2 =>
   348         do_term_or_formula false T t1 #> do_term_or_formula true T t2
   349       | Const (@{const_name If}, Type (_, [_, Type (_, [T, _])]))
   350         $ t1 $ t2 $ t3 =>
   351         do_formula NONE t1 #> fold (do_term_or_formula false T) [t2, t3]
   352       | Const (@{const_name Ex1}, _) $ Abs (_, T, t') =>
   353         do_quantifier (is_some pos) T t'
   354       | Const (@{const_name Ball}, _) $ t1 $ Abs (_, T, t') =>
   355         do_quantifier (pos = SOME false) T
   356                       (HOLogic.mk_imp (incr_boundvars 1 t1 $ Bound 0, t'))
   357       | Const (@{const_name Bex}, _) $ t1 $ Abs (_, T, t') =>
   358         do_quantifier (pos = SOME true) T
   359                       (HOLogic.mk_conj (incr_boundvars 1 t1 $ Bound 0, t'))
   360       | (t0 as Const (_, @{typ bool})) $ t1 =>
   361         do_term false t0 #> do_formula pos t1  (* theory constant *)
   362       | _ => do_term false t
   363   in do_formula pos end
   364 
   365 (*Inserts a dummy "constant" referring to the theory name, so that relevance
   366   takes the given theory into account.*)
   367 fun theory_constify ({theory_const_rel_weight, theory_const_irrel_weight, ...}
   368                      : relevance_fudge) thy_name t =
   369   if exists (curry (op <) 0.0) [theory_const_rel_weight,
   370                                 theory_const_irrel_weight] then
   371     Const (thy_name ^ theory_const_suffix, @{typ bool}) $ t
   372   else
   373     t
   374 
   375 fun theory_const_prop_of fudge th =
   376   theory_constify fudge (Context.theory_name (theory_of_thm th)) (prop_of th)
   377 
   378 (**** Constant / Type Frequencies ****)
   379 
   380 (* A two-dimensional symbol table counts frequencies of constants. It's keyed
   381    first by constant name and second by its list of type instantiations. For the
   382    latter, we need a linear ordering on "pattern list". *)
   383 
   384 fun pattern_ord p =
   385   case p of
   386     (PVar, PVar) => EQUAL
   387   | (PVar, PApp _) => LESS
   388   | (PApp _, PVar) => GREATER
   389   | (PApp q1, PApp q2) =>
   390     prod_ord fast_string_ord (dict_ord pattern_ord) (q1, q2)
   391 fun ptype_ord (PType p, PType q) =
   392   prod_ord (dict_ord pattern_ord) int_ord (swap p, swap q)
   393 
   394 structure PType_Tab = Table(type key = ptype val ord = ptype_ord)
   395 
   396 fun count_fact_consts thy fudge =
   397   let
   398     fun do_const const (s, T) ts =
   399       (* Two-dimensional table update. Constant maps to types maps to count. *)
   400       PType_Tab.map_default (rich_ptype thy const (s, T), 0) (Integer.add 1)
   401       |> Symtab.map_default (s, PType_Tab.empty)
   402       #> fold do_term ts
   403     and do_term t =
   404       case strip_comb t of
   405         (Const x, ts) => do_const true x ts
   406       | (Free x, ts) => do_const false x ts
   407       | (Abs (_, _, t'), ts) => fold do_term (t' :: ts)
   408       | (_, ts) => fold do_term ts
   409   in do_term o theory_const_prop_of fudge o snd end
   410 
   411 
   412 (**** Actual Filtering Code ****)
   413 
   414 fun pow_int _ 0 = 1.0
   415   | pow_int x 1 = x
   416   | pow_int x n = if n > 0 then x * pow_int x (n - 1) else pow_int x (n + 1) / x
   417 
   418 (*The frequency of a constant is the sum of those of all instances of its type.*)
   419 fun pconst_freq match const_tab (c, ps) =
   420   PType_Tab.fold (fn (qs, m) => match (ps, qs) ? Integer.add m)
   421                  (the (Symtab.lookup const_tab c)) 0
   422 
   423 
   424 (* A surprising number of theorems contain only a few significant constants.
   425    These include all induction rules, and other general theorems. *)
   426 
   427 (* "log" seems best in practice. A constant function of one ignores the constant
   428    frequencies. Rare constants give more points if they are relevant than less
   429    rare ones. *)
   430 fun rel_weight_for _ freq = 1.0 + 2.0 / Math.ln (Real.fromInt freq + 1.0)
   431 
   432 (* Irrelevant constants are treated differently. We associate lower penalties to
   433    very rare constants and very common ones -- the former because they can't
   434    lead to the inclusion of too many new facts, and the latter because they are
   435    so common as to be of little interest. *)
   436 fun irrel_weight_for ({worse_irrel_freq, higher_order_irrel_weight, ...}
   437                       : relevance_fudge) order freq =
   438   let val (k, x) = worse_irrel_freq |> `Real.ceil in
   439     (if freq < k then Math.ln (Real.fromInt (freq + 1)) / Math.ln x
   440      else rel_weight_for order freq / rel_weight_for order k)
   441     * pow_int higher_order_irrel_weight (order - 1)
   442   end
   443 
   444 fun multiplier_for_const_name local_const_multiplier s =
   445   if String.isSubstring "." s then 1.0 else local_const_multiplier
   446 
   447 (* Computes a constant's weight, as determined by its frequency. *)
   448 fun generic_pconst_weight local_const_multiplier abs_weight skolem_weight
   449                           theory_const_weight chained_const_weight weight_for f
   450                           const_tab chained_const_tab (c as (s, PType (m, _))) =
   451   if s = abs_name then
   452     abs_weight
   453   else if String.isPrefix skolem_prefix s then
   454     skolem_weight
   455   else if String.isSuffix theory_const_suffix s then
   456     theory_const_weight
   457   else
   458     multiplier_for_const_name local_const_multiplier s
   459     * weight_for m (pconst_freq (match_ptype o f) const_tab c)
   460     |> (if chained_const_weight < 1.0 andalso
   461            pconst_hyper_mem I chained_const_tab c then
   462           curry (op *) chained_const_weight
   463         else
   464           I)
   465 
   466 fun rel_pconst_weight ({local_const_multiplier, abs_rel_weight,
   467                         theory_const_rel_weight, ...} : relevance_fudge)
   468                       const_tab =
   469   generic_pconst_weight local_const_multiplier abs_rel_weight 0.0
   470                         theory_const_rel_weight 0.0 rel_weight_for I const_tab
   471                         Symtab.empty
   472 
   473 fun irrel_pconst_weight (fudge as {local_const_multiplier, abs_irrel_weight,
   474                                    skolem_irrel_weight,
   475                                    theory_const_irrel_weight,
   476                                    chained_const_irrel_weight, ...})
   477                         const_tab chained_const_tab =
   478   generic_pconst_weight local_const_multiplier abs_irrel_weight
   479                         skolem_irrel_weight theory_const_irrel_weight
   480                         chained_const_irrel_weight (irrel_weight_for fudge) swap
   481                         const_tab chained_const_tab
   482 
   483 fun locality_bonus ({intro_bonus, ...} : relevance_fudge) Intro = intro_bonus
   484   | locality_bonus {elim_bonus, ...} Elim = elim_bonus
   485   | locality_bonus {simp_bonus, ...} Simp = simp_bonus
   486   | locality_bonus {local_bonus, ...} Local = local_bonus
   487   | locality_bonus {assum_bonus, ...} Assum = assum_bonus
   488   | locality_bonus {chained_bonus, ...} Chained = chained_bonus
   489   | locality_bonus _ _ = 0.0
   490 
   491 fun is_odd_const_name s =
   492   s = abs_name orelse String.isPrefix skolem_prefix s orelse
   493   String.isSuffix theory_const_suffix s
   494 
   495 fun fact_weight fudge loc const_tab relevant_consts chained_consts fact_consts =
   496   case fact_consts |> List.partition (pconst_hyper_mem I relevant_consts)
   497                    ||> filter_out (pconst_hyper_mem swap relevant_consts) of
   498     ([], _) => 0.0
   499   | (rel, irrel) =>
   500     if forall (forall (is_odd_const_name o fst)) [rel, irrel] then
   501       0.0
   502     else
   503       let
   504         val irrel = irrel |> filter_out (pconst_mem swap rel)
   505         val rel_weight =
   506           0.0 |> fold (curry (op +) o rel_pconst_weight fudge const_tab) rel
   507         val irrel_weight =
   508           ~ (locality_bonus fudge loc)
   509           |> fold (curry (op +)
   510                    o irrel_pconst_weight fudge const_tab chained_consts) irrel
   511         val res = rel_weight / (rel_weight + irrel_weight)
   512       in if Real.isFinite res then res else 0.0 end
   513 
   514 fun pconsts_in_fact thy is_built_in_const t =
   515   Symtab.fold (fn (s, pss) => fold (cons o pair s) pss)
   516               (Symtab.empty |> add_pconsts_in_term thy is_built_in_const true
   517                                                    (SOME true) t) []
   518 
   519 fun pair_consts_fact thy is_built_in_const fudge fact =
   520   case fact |> snd |> theory_const_prop_of fudge
   521             |> pconsts_in_fact thy is_built_in_const of
   522     [] => NONE
   523   | consts => SOME ((fact, consts), NONE)
   524 
   525 val const_names_in_fact = map fst ooo pconsts_in_fact
   526 
   527 type annotated_thm =
   528   (((unit -> string) * locality) * thm) * (string * ptype) list
   529 
   530 fun take_most_relevant ctxt max_relevant remaining_max
   531         ({max_imperfect, max_imperfect_exp, ...} : relevance_fudge)
   532         (candidates : (annotated_thm * real) list) =
   533   let
   534     val max_imperfect =
   535       Real.ceil (Math.pow (max_imperfect,
   536                     Math.pow (Real.fromInt remaining_max
   537                               / Real.fromInt max_relevant, max_imperfect_exp)))
   538     val (perfect, imperfect) =
   539       candidates |> sort (Real.compare o swap o pairself snd)
   540                  |> take_prefix (fn (_, w) => w > 0.99999)
   541     val ((accepts, more_rejects), rejects) =
   542       chop max_imperfect imperfect |>> append perfect |>> chop remaining_max
   543   in
   544     trace_msg ctxt (fn () =>
   545         "Actually passed (" ^ string_of_int (length accepts) ^ " of " ^
   546         string_of_int (length candidates) ^ "): " ^
   547         (accepts |> map (fn ((((name, _), _), _), weight) =>
   548                             name () ^ " [" ^ Real.toString weight ^ "]")
   549                  |> commas));
   550     (accepts, more_rejects @ rejects)
   551   end
   552 
   553 fun if_empty_replace_with_locality thy is_built_in_const facts loc tab =
   554   if Symtab.is_empty tab then
   555     Symtab.empty
   556     |> fold (add_pconsts_in_term thy is_built_in_const false (SOME false))
   557             (map_filter (fn ((_, loc'), th) =>
   558                             if loc' = loc then SOME (prop_of th) else NONE)
   559                         facts)
   560   else
   561     tab
   562 
   563 fun consider_arities is_built_in_const th =
   564   let
   565     fun aux _ _ NONE = NONE
   566       | aux t args (SOME tab) =
   567         case t of
   568           t1 $ t2 => SOME tab |> aux t1 (t2 :: args) |> aux t2 []
   569         | Const (x as (s, _)) =>
   570           (if is_built_in_const x args |> fst then
   571              SOME tab
   572            else case Symtab.lookup tab s of
   573              NONE => SOME (Symtab.update (s, length args) tab)
   574            | SOME n => if n = length args then SOME tab else NONE)
   575         | _ => SOME tab
   576   in aux (prop_of th) [] end
   577 
   578 (* FIXME: This is currently only useful for polymorphic type systems. *)
   579 fun could_benefit_from_ext is_built_in_const facts =
   580   fold (consider_arities is_built_in_const o snd) facts (SOME Symtab.empty)
   581   |> is_none
   582 
   583 (* High enough so that it isn't wrongly considered as very relevant (e.g., for E
   584    weights), but low enough so that it is unlikely to be truncated away if few
   585    facts are included. *)
   586 val special_fact_index = 75
   587 
   588 fun relevance_filter ctxt threshold0 decay max_relevant is_built_in_const
   589         (fudge as {threshold_divisor, ridiculous_threshold, ...})
   590         ({add, del, ...} : relevance_override) facts chained_ts hyp_ts concl_t =
   591   let
   592     val thy = Proof_Context.theory_of ctxt
   593     val const_tab = fold (count_fact_consts thy fudge) facts Symtab.empty
   594     val add_pconsts = add_pconsts_in_term thy is_built_in_const false o SOME
   595     val chained_const_tab = Symtab.empty |> fold (add_pconsts true) chained_ts
   596     val goal_const_tab =
   597       Symtab.empty |> fold (add_pconsts true) hyp_ts
   598                    |> add_pconsts false concl_t
   599       |> (fn tab => if Symtab.is_empty tab then chained_const_tab else tab)
   600       |> fold (if_empty_replace_with_locality thy is_built_in_const facts)
   601               [Chained, Assum, Local]
   602     val add_ths = Attrib.eval_thms ctxt add
   603     val del_ths = Attrib.eval_thms ctxt del
   604     val facts = facts |> filter_out (member Thm.eq_thm_prop del_ths o snd)
   605     fun iter j remaining_max threshold rel_const_tab hopeless hopeful =
   606       let
   607         fun relevant [] _ [] =
   608             (* Nothing has been added this iteration. *)
   609             if j = 0 andalso threshold >= ridiculous_threshold then
   610               (* First iteration? Try again. *)
   611               iter 0 max_relevant (threshold / threshold_divisor) rel_const_tab
   612                    hopeless hopeful
   613             else
   614               []
   615           | relevant candidates rejects [] =
   616             let
   617               val (accepts, more_rejects) =
   618                 take_most_relevant ctxt max_relevant remaining_max fudge
   619                                    candidates
   620               val rel_const_tab' =
   621                 rel_const_tab
   622                 |> fold (add_pconst_to_table false) (maps (snd o fst) accepts)
   623               fun is_dirty (c, _) =
   624                 Symtab.lookup rel_const_tab' c <> Symtab.lookup rel_const_tab c
   625               val (hopeful_rejects, hopeless_rejects) =
   626                  (rejects @ hopeless, ([], []))
   627                  |-> fold (fn (ax as (_, consts), old_weight) =>
   628                               if exists is_dirty consts then
   629                                 apfst (cons (ax, NONE))
   630                               else
   631                                 apsnd (cons (ax, old_weight)))
   632                  |>> append (more_rejects
   633                              |> map (fn (ax as (_, consts), old_weight) =>
   634                                         (ax, if exists is_dirty consts then NONE
   635                                              else SOME old_weight)))
   636               val threshold =
   637                 1.0 - (1.0 - threshold)
   638                       * Math.pow (decay, Real.fromInt (length accepts))
   639               val remaining_max = remaining_max - length accepts
   640             in
   641               trace_msg ctxt (fn () => "New or updated constants: " ^
   642                   commas (rel_const_tab' |> Symtab.dest
   643                           |> subtract (op =) (rel_const_tab |> Symtab.dest)
   644                           |> map string_for_hyper_pconst));
   645               map (fst o fst) accepts @
   646               (if remaining_max = 0 then
   647                  []
   648                else
   649                  iter (j + 1) remaining_max threshold rel_const_tab'
   650                       hopeless_rejects hopeful_rejects)
   651             end
   652           | relevant candidates rejects
   653                      (((ax as (((_, loc), _), fact_consts)), cached_weight)
   654                       :: hopeful) =
   655             let
   656               val weight =
   657                 case cached_weight of
   658                   SOME w => w
   659                 | NONE => fact_weight fudge loc const_tab rel_const_tab
   660                                       chained_const_tab fact_consts
   661             in
   662               if weight >= threshold then
   663                 relevant ((ax, weight) :: candidates) rejects hopeful
   664               else
   665                 relevant candidates ((ax, weight) :: rejects) hopeful
   666             end
   667         in
   668           trace_msg ctxt (fn () =>
   669               "ITERATION " ^ string_of_int j ^ ": current threshold: " ^
   670               Real.toString threshold ^ ", constants: " ^
   671               commas (rel_const_tab |> Symtab.dest
   672                       |> filter (curry (op <>) [] o snd)
   673                       |> map string_for_hyper_pconst));
   674           relevant [] [] hopeful
   675         end
   676     fun prepend_facts ths accepts =
   677       ((facts |> filter (member Thm.eq_thm_prop ths o snd)) @
   678        (accepts |> filter_out (member Thm.eq_thm_prop ths o snd)))
   679       |> take max_relevant
   680     fun uses_const s t =
   681       fold_aterms (curry (fn (Const (s', _), false) => s' = s | (_, b) => b)) t
   682                   false
   683     fun add_set_const_thms accepts (s, ths) =
   684       if exists (uses_const s o prop_of o snd) accepts orelse
   685          exists (uses_const s) (concl_t :: hyp_ts) then
   686         append ths
   687       else
   688         I
   689     fun insert_into_facts accepts [] = accepts
   690       | insert_into_facts accepts ths =
   691         let
   692           val add = facts |> filter (member Thm.eq_thm_prop ths o snd)
   693           val (bef, after) =
   694             accepts |> filter_out (member Thm.eq_thm_prop ths o snd)
   695                     |> take (max_relevant - length add)
   696                     |> chop special_fact_index
   697         in bef @ add @ after end
   698     fun insert_special_facts accepts =
   699        [] |> could_benefit_from_ext is_built_in_const accepts ? cons @{thm ext}
   700           |> fold (add_set_const_thms accepts) set_consts
   701           |> insert_into_facts accepts
   702   in
   703     facts |> map_filter (pair_consts_fact thy is_built_in_const fudge)
   704           |> iter 0 max_relevant threshold0 goal_const_tab []
   705           |> not (null add_ths) ? prepend_facts add_ths
   706           |> insert_special_facts
   707           |> tap (fn accepts => trace_msg ctxt (fn () =>
   708                       "Total relevant: " ^ string_of_int (length accepts)))
   709   end
   710 
   711 
   712 (***************************************************************)
   713 (* Retrieving and filtering lemmas                             *)
   714 (***************************************************************)
   715 
   716 (*** retrieve lemmas and filter them ***)
   717 
   718 (*Reject theorems with names like "List.filter.filter_list_def" or
   719   "Accessible_Part.acc.defs", as these are definitions arising from packages.*)
   720 fun is_package_def a =
   721   let val names = Long_Name.explode a in
   722     (length names > 2 andalso not (hd names = "local") andalso
   723      String.isSuffix "_def" a) orelse String.isSuffix "_defs" a
   724   end
   725 
   726 fun uniquify xs =
   727   Termtab.fold (cons o snd)
   728                (fold (Termtab.update o `(prop_of o snd)) xs Termtab.empty) []
   729 
   730 (* FIXME: put other record thms here, or declare as "no_atp" *)
   731 fun multi_base_blacklist ctxt ho_atp =
   732   ["defs", "select_defs", "update_defs", "split", "splits", "split_asm",
   733    "cases", "ext_cases", "eq.simps", "eq.refl", "nchotomy", "case_cong",
   734    "weak_case_cong"]
   735   |> not (ho_atp orelse (Config.get ctxt instantiate_inducts)) ?
   736         append ["induct", "inducts"]
   737   |> map (prefix ".")
   738 
   739 val max_lambda_nesting = 3 (*only applies if not ho_atp*)
   740 
   741 fun term_has_too_many_lambdas max (t1 $ t2) =
   742     exists (term_has_too_many_lambdas max) [t1, t2]
   743   | term_has_too_many_lambdas max (Abs (_, _, t)) =
   744     max = 0 orelse term_has_too_many_lambdas (max - 1) t
   745   | term_has_too_many_lambdas _ _ = false
   746 
   747 (* Don't count nested lambdas at the level of formulas, since they are
   748    quantifiers. *)
   749 fun formula_has_too_many_lambdas true _ _ = false (*i.e. ho_atp*)
   750   | formula_has_too_many_lambdas _ Ts (Abs (_, T, t)) =
   751       formula_has_too_many_lambdas false (T :: Ts) t
   752   | formula_has_too_many_lambdas _ Ts t =
   753     if member (op =) [HOLogic.boolT, propT] (fastype_of1 (Ts, t)) then
   754       exists (formula_has_too_many_lambdas false Ts) (#2 (strip_comb t))
   755     else
   756       term_has_too_many_lambdas max_lambda_nesting t
   757 
   758 (* The max apply depth of any "metis" call in "Metis_Examples" (on 2007-10-31)
   759    was 11. *)
   760 val max_apply_depth = 15
   761 
   762 fun apply_depth (f $ t) = Int.max (apply_depth f, apply_depth t + 1)
   763   | apply_depth (Abs (_, _, t)) = apply_depth t
   764   | apply_depth _ = 0
   765 
   766 fun is_formula_too_complex ho_atp t =
   767   apply_depth t > max_apply_depth orelse formula_has_too_many_lambdas ho_atp [] t
   768 
   769 (* FIXME: Extend to "Meson" and "Metis" *)
   770 val exists_sledgehammer_const =
   771   exists_Const (fn (s, _) => String.isPrefix sledgehammer_prefix s)
   772 
   773 (* FIXME: make more reliable *)
   774 val exists_low_level_class_const =
   775   exists_Const (fn (s, _) =>
   776      String.isSubstring (Long_Name.separator ^ "class" ^ Long_Name.separator) s)
   777 
   778 fun is_metastrange_theorem th =
   779   case head_of (concl_of th) of
   780     Const (s, _) => (s <> @{const_name Trueprop} andalso
   781                      s <> @{const_name "=="})
   782   | _ => false
   783 
   784 fun is_that_fact th =
   785   String.isSuffix (Long_Name.separator ^ Obtain.thatN) (Thm.get_name_hint th)
   786   andalso exists_subterm (fn Free (s, _) => s = Name.skolem Auto_Bind.thesisN
   787                            | _ => false) (prop_of th)
   788 
   789 val type_has_top_sort =
   790   exists_subtype (fn TFree (_, []) => true | TVar (_, []) => true | _ => false)
   791 
   792 (**** Predicates to detect unwanted facts (prolific or likely to cause
   793       unsoundness) ****)
   794 
   795 fun is_theorem_bad_for_atps ho_atp exporter thm =
   796   is_metastrange_theorem thm orelse
   797   (not exporter andalso
   798    let val t = prop_of thm in
   799      is_formula_too_complex ho_atp t orelse exists_type type_has_top_sort t orelse
   800      exists_sledgehammer_const t orelse exists_low_level_class_const t orelse
   801      is_that_fact thm
   802    end)
   803 
   804 fun meta_equify (@{const Trueprop}
   805                  $ (Const (@{const_name HOL.eq}, Type (_, [T, _])) $ t1 $ t2)) =
   806     Const (@{const_name "=="}, T --> T --> @{typ prop}) $ t1 $ t2
   807   | meta_equify t = t
   808 
   809 val normalize_simp_prop =
   810   meta_equify
   811   #> map_aterms (fn Var ((s, _), T) => Var ((s, 0), T) | t => t)
   812   #> map_types (map_type_tvar (fn ((s, _), S) => TVar ((s, 0), S)))
   813 
   814 fun clasimpset_rule_table_of ctxt =
   815   let
   816     fun add loc g f = fold (Termtab.update o rpair loc o g o prop_of o f)
   817     val {safeIs, safeEs, hazIs, hazEs, ...} = ctxt |> claset_of |> Classical.rep_cs
   818     val intros = Item_Net.content safeIs @ Item_Net.content hazIs
   819     val elims = map Classical.classical_rule (Item_Net.content safeEs @ Item_Net.content hazEs)
   820     val simps = ctxt |> simpset_of |> dest_ss |> #simps
   821   in
   822     Termtab.empty |> add Intro I I intros
   823                   |> add Elim I I elims
   824                   |> add Simp I snd simps
   825                   |> add Simp normalize_simp_prop snd simps
   826   end
   827 
   828 fun all_facts ctxt ho_atp reserved exporter add_ths chained_ths =
   829   let
   830     val thy = Proof_Context.theory_of ctxt
   831     val global_facts = Global_Theory.facts_of thy
   832     val local_facts = Proof_Context.facts_of ctxt
   833     val named_locals = local_facts |> Facts.dest_static []
   834     val assms = Assumption.all_assms_of ctxt
   835     fun is_assum th = exists (fn ct => prop_of th aconv term_of ct) assms
   836     val is_chained = member Thm.eq_thm_prop chained_ths
   837     val clasimpset_table = clasimpset_rule_table_of ctxt
   838     fun locality_of_theorem global (name: string) th =
   839       if String.isSubstring ".induct" name orelse(*FIXME: use structured name*)
   840          String.isSubstring ".inducts" name then
   841            Induction
   842       else if is_chained th then
   843         Chained
   844       else if global then
   845         case Termtab.lookup clasimpset_table (prop_of th) of
   846           SOME loc => loc
   847         | NONE => General
   848       else if is_assum th then Assum else Local
   849     fun is_good_unnamed_local th =
   850       not (Thm.has_name_hint th) andalso
   851       forall (fn (_, ths) => not (member Thm.eq_thm_prop ths th)) named_locals
   852     val unnamed_locals =
   853       union Thm.eq_thm_prop (Facts.props local_facts) chained_ths
   854       |> filter is_good_unnamed_local |> map (pair "" o single)
   855     val full_space =
   856       Name_Space.merge (Facts.space_of global_facts, Facts.space_of local_facts)
   857     fun add_facts global foldx facts =
   858       foldx (fn (name0, ths) =>
   859         if not exporter andalso name0 <> "" andalso
   860            forall (not o member Thm.eq_thm_prop add_ths) ths andalso
   861            (Facts.is_concealed facts name0 orelse
   862             (not (Config.get ctxt ignore_no_atp) andalso
   863              is_package_def name0) orelse
   864             exists (fn s => String.isSuffix s name0)
   865                    (multi_base_blacklist ctxt ho_atp) orelse
   866             String.isSuffix "_def_raw" (* FIXME: crude hack *) name0) then
   867           I
   868         else
   869           let
   870             val multi = length ths > 1
   871             val backquote_thm =
   872               backquote o string_for_term ctxt o close_form o prop_of
   873             fun check_thms a =
   874               case try (Proof_Context.get_thms ctxt) a of
   875                 NONE => false
   876               | SOME ths' => eq_list Thm.eq_thm_prop (ths, ths')
   877           in
   878             pair 1
   879             #> fold (fn th => fn (j, (multis, unis)) =>
   880                         (j + 1,
   881                          if not (member Thm.eq_thm_prop add_ths th) andalso
   882                             is_theorem_bad_for_atps ho_atp exporter th then
   883                            (multis, unis)
   884                          else
   885                            let
   886                              val new =
   887                                (((fn () =>
   888                                  if name0 = "" then
   889                                    th |> backquote_thm
   890                                  else
   891                                    [Facts.extern ctxt facts name0,
   892                                     Name_Space.extern ctxt full_space name0,
   893                                     name0]
   894                                    |> find_first check_thms
   895                                    |> (fn SOME name =>
   896                                           make_name reserved multi j name
   897                                         | NONE => "")),
   898                                 locality_of_theorem global name0 th), th)
   899                            in
   900                              if multi then (new :: multis, unis)
   901                              else (multis, new :: unis)
   902                            end)) ths
   903             #> snd
   904           end)
   905   in
   906     (* The single-name theorems go after the multiple-name ones, so that single
   907        names are preferred when both are available. *)
   908     ([], []) |> add_facts false fold local_facts (unnamed_locals @ named_locals)
   909              |> add_facts true Facts.fold_static global_facts global_facts
   910              |> op @
   911   end
   912 
   913 fun external_frees t =
   914   [] |> Term.add_frees t |> filter_out (can Name.dest_internal o fst)
   915 
   916 fun nearly_all_facts ctxt ho_atp ({add, only, ...} : relevance_override)
   917                      chained_ths hyp_ts concl_t =
   918   let
   919     val thy = Proof_Context.theory_of ctxt
   920     val reserved = reserved_isar_keyword_table ()
   921     val add_ths = Attrib.eval_thms ctxt add
   922     val ind_stmt =
   923       Logic.list_implies (hyp_ts |> filter_out (null o external_frees), concl_t)
   924       |> Object_Logic.atomize_term thy
   925     val ind_stmt_xs = external_frees ind_stmt
   926   in
   927     (if only then
   928        maps (map (fn ((name, loc), th) => ((K name, loc), th))
   929              o fact_from_ref ctxt reserved chained_ths) add
   930      else
   931        all_facts ctxt ho_atp reserved false add_ths chained_ths)
   932     |> Config.get ctxt instantiate_inducts
   933        ? maps (instantiate_if_induct_rule ctxt ind_stmt ind_stmt_xs)
   934     |> (not (Config.get ctxt ignore_no_atp) andalso not only)
   935        ? filter_out (No_ATPs.member ctxt o snd)
   936     |> uniquify
   937   end
   938 
   939 fun relevant_facts ctxt (threshold0, threshold1) max_relevant is_built_in_const
   940                    fudge (override as {only, ...}) chained_ths hyp_ts concl_t
   941                    facts =
   942   let
   943     val thy = Proof_Context.theory_of ctxt
   944     val decay = Math.pow ((1.0 - threshold1) / (1.0 - threshold0),
   945                           1.0 / Real.fromInt (max_relevant + 1))
   946   in
   947     trace_msg ctxt (fn () => "Considering " ^ string_of_int (length facts) ^
   948                              " facts");
   949     (if only orelse threshold1 < 0.0 then
   950        facts
   951      else if threshold0 > 1.0 orelse threshold0 > threshold1 orelse
   952              max_relevant = 0 then
   953        []
   954      else
   955        relevance_filter ctxt threshold0 decay max_relevant is_built_in_const
   956            fudge override facts (chained_ths |> map prop_of) hyp_ts
   957            (concl_t |> theory_constify fudge (Context.theory_name thy)))
   958     |> map (apfst (apfst (fn f => f ())))
   959   end
   960 
   961 end;