src/HOL/Tools/Sledgehammer/sledgehammer_filter.ML
author blanchet
Sun May 01 18:37:23 2011 +0200 (2011-05-01)
changeset 42520 d1f7c4a01dbe
parent 42449 494e4ac5b0f8
child 42579 2552c09b1a72
permissions -rw-r--r--
renamings
     1 (*  Title:      HOL/Tools/Sledgehammer/sledgehammer_filter.ML
     2     Author:     Jia Meng, Cambridge University Computer Laboratory and NICTA
     3     Author:     Jasmin Blanchette, TU Muenchen
     4 
     5 Sledgehammer's relevance filter.
     6 *)
     7 
     8 signature SLEDGEHAMMER_FILTER =
     9 sig
    10   datatype locality = General | Intro | Elim | Simp | Local | Assum | Chained
    11 
    12   type relevance_fudge =
    13     {allow_ext : bool,
    14      local_const_multiplier : real,
    15      worse_irrel_freq : real,
    16      higher_order_irrel_weight : real,
    17      abs_rel_weight : real,
    18      abs_irrel_weight : real,
    19      skolem_irrel_weight : real,
    20      theory_const_rel_weight : real,
    21      theory_const_irrel_weight : real,
    22      intro_bonus : real,
    23      elim_bonus : real,
    24      simp_bonus : real,
    25      local_bonus : real,
    26      assum_bonus : real,
    27      chained_bonus : real,
    28      max_imperfect : real,
    29      max_imperfect_exp : real,
    30      threshold_divisor : real,
    31      ridiculous_threshold : real}
    32 
    33   type relevance_override =
    34     {add : (Facts.ref * Attrib.src list) list,
    35      del : (Facts.ref * Attrib.src list) list,
    36      only : bool}
    37 
    38   val trace : bool Unsynchronized.ref
    39   val is_global_locality : locality -> bool
    40   val fact_from_ref :
    41     Proof.context -> unit Symtab.table -> thm list
    42     -> Facts.ref * Attrib.src list -> ((string * locality) * thm) list
    43   val all_facts :
    44     Proof.context -> 'a Symtab.table -> bool -> bool -> relevance_fudge
    45     -> thm list -> thm list -> (((unit -> string) * locality) * (bool * thm)) list
    46   val const_names_in_fact :
    47     theory -> (string * typ -> term list -> bool * term list) -> term
    48     -> string list
    49   val relevant_facts :
    50     Proof.context -> bool -> real * real -> int
    51     -> (string * typ -> term list -> bool * term list) -> relevance_fudge
    52     -> relevance_override -> thm list -> term list -> term
    53     -> ((string * locality) * thm) list
    54 end;
    55 
    56 structure Sledgehammer_Filter : SLEDGEHAMMER_FILTER =
    57 struct
    58 
    59 open Sledgehammer_Util
    60 
    61 val trace = Unsynchronized.ref false
    62 fun trace_msg msg = if !trace then tracing (msg ()) else ()
    63 
    64 (* experimental features *)
    65 val respect_no_atp = true
    66 val instantiate_inducts = false
    67 
    68 datatype locality = General | Intro | Elim | Simp | Local | Assum | Chained
    69 
    70 (* (quasi-)underapproximation of the truth *)
    71 fun is_global_locality Local = false
    72   | is_global_locality Assum = false
    73   | is_global_locality Chained = false
    74   | is_global_locality _ = true
    75 
    76 type relevance_fudge =
    77   {allow_ext : bool,
    78    local_const_multiplier : real,
    79    worse_irrel_freq : real,
    80    higher_order_irrel_weight : real,
    81    abs_rel_weight : real,
    82    abs_irrel_weight : real,
    83    skolem_irrel_weight : real,
    84    theory_const_rel_weight : real,
    85    theory_const_irrel_weight : real,
    86    intro_bonus : real,
    87    elim_bonus : real,
    88    simp_bonus : real,
    89    local_bonus : real,
    90    assum_bonus : real,
    91    chained_bonus : real,
    92    max_imperfect : real,
    93    max_imperfect_exp : real,
    94    threshold_divisor : real,
    95    ridiculous_threshold : real}
    96 
    97 type relevance_override =
    98   {add : (Facts.ref * Attrib.src list) list,
    99    del : (Facts.ref * Attrib.src list) list,
   100    only : bool}
   101 
   102 val sledgehammer_prefix = "Sledgehammer" ^ Long_Name.separator
   103 val abs_name = sledgehammer_prefix ^ "abs"
   104 val skolem_prefix = sledgehammer_prefix ^ "sko"
   105 val theory_const_suffix = Long_Name.separator ^ " 1"
   106 
   107 fun needs_quoting reserved s =
   108   Symtab.defined reserved s orelse
   109   exists (not o Lexicon.is_identifier) (Long_Name.explode s)
   110 
   111 fun make_name reserved multi j name =
   112   (name |> needs_quoting reserved name ? quote) ^
   113   (if multi then "(" ^ string_of_int j ^ ")" else "")
   114 
   115 fun explode_interval _ (Facts.FromTo (i, j)) = i upto j
   116   | explode_interval max (Facts.From i) = i upto i + max - 1
   117   | explode_interval _ (Facts.Single i) = [i]
   118 
   119 val backquote =
   120   raw_explode #> map (fn "`" => "\\`" | s => s) #> implode #> enclose "`" "`"
   121 fun fact_from_ref ctxt reserved chained_ths (xthm as (xref, args)) =
   122   let
   123     val ths = Attrib.eval_thms ctxt [xthm]
   124     val bracket =
   125       map (enclose "[" "]" o Pretty.str_of o Args.pretty_src ctxt) args
   126       |> implode
   127     fun nth_name j =
   128       case xref of
   129         Facts.Fact s => backquote s ^ bracket
   130       | Facts.Named (("", _), _) => "[" ^ bracket ^ "]"
   131       | Facts.Named ((name, _), NONE) =>
   132         make_name reserved (length ths > 1) (j + 1) name ^ bracket
   133       | Facts.Named ((name, _), SOME intervals) =>
   134         make_name reserved true
   135                  (nth (maps (explode_interval (length ths)) intervals) j) name ^
   136         bracket
   137   in
   138     (ths, (0, []))
   139     |-> fold (fn th => fn (j, rest) =>
   140                  (j + 1, ((nth_name j,
   141                           if member Thm.eq_thm chained_ths th then Chained
   142                           else General), th) :: rest))
   143     |> snd
   144   end
   145 
   146 (* This is a terrible hack. Free variables are sometimes code as "M__" when they
   147    are displayed as "M" and we want to avoid clashes with these. But sometimes
   148    it's even worse: "Ma__" encodes "M". So we simply reserve all prefixes of all
   149    free variables. In the worse case scenario, where the fact won't be resolved
   150    correctly, the user can fix it manually, e.g., by naming the fact in
   151    question. Ideally we would need nothing of it, but backticks just don't work
   152    with schematic variables. *)
   153 fun all_prefixes_of s =
   154   map (fn i => String.extract (s, 0, SOME i)) (1 upto size s - 1)
   155 fun close_form t =
   156   (t, [] |> Term.add_free_names t |> maps all_prefixes_of)
   157   |> fold (fn ((s, i), T) => fn (t', taken) =>
   158               let val s' = Name.variant taken s in
   159                 ((if fastype_of t' = HOLogic.boolT then HOLogic.all_const
   160                   else Term.all) T
   161                  $ Abs (s', T, abstract_over (Var ((s, i), T), t')),
   162                  s' :: taken)
   163               end)
   164           (Term.add_vars t [] |> sort_wrt (fst o fst))
   165   |> fst
   166 
   167 fun string_for_term ctxt t =
   168   Print_Mode.setmp (filter (curry (op =) Symbol.xsymbolsN)
   169                    (print_mode_value ())) (Syntax.string_of_term ctxt) t
   170   |> String.translate (fn c => if Char.isPrint c then str c else "")
   171   |> simplify_spaces
   172 
   173 (** Structural induction rules **)
   174 
   175 fun struct_induct_rule_on th =
   176   case Logic.strip_horn (prop_of th) of
   177     (prems, @{const Trueprop}
   178             $ ((p as Var ((p_name, 0), _)) $ (a as Var (_, ind_T)))) =>
   179     if not (is_TVar ind_T) andalso length prems > 1 andalso
   180        exists (exists_subterm (curry (op aconv) p)) prems andalso
   181        not (exists (exists_subterm (curry (op aconv) a)) prems) then
   182       SOME (p_name, ind_T)
   183     else
   184       NONE
   185   | _ => NONE
   186 
   187 fun instantiate_induct_rule ctxt concl_prop p_name ((name, loc), (multi, th))
   188                             ind_x =
   189   let
   190     fun varify_noninducts (t as Free (s, T)) =
   191         if (s, T) = ind_x orelse can dest_funT T then t else Var ((s, 0), T)
   192       | varify_noninducts t = t
   193     val p_inst =
   194       concl_prop |> map_aterms varify_noninducts |> close_form
   195                  |> lambda (Free ind_x)
   196                  |> string_for_term ctxt
   197   in
   198     ((fn () => name () ^ "[where " ^ p_name ^ " = " ^ quote p_inst ^ "]", loc),
   199      (multi, th |> read_instantiate ctxt [((p_name, 0), p_inst)]))
   200   end
   201 
   202 fun type_match thy (T1, T2) =
   203   (Sign.typ_match thy (T2, T1) Vartab.empty; true)
   204   handle Type.TYPE_MATCH => false
   205 
   206 fun instantiate_if_induct_rule ctxt stmt stmt_xs (ax as (_, (_, th))) =
   207   case struct_induct_rule_on th of
   208     SOME (p_name, ind_T) =>
   209     let val thy = Proof_Context.theory_of ctxt in
   210       stmt_xs |> filter (fn (_, T) => type_match thy (T, ind_T))
   211               |> map_filter (try (instantiate_induct_rule ctxt stmt p_name ax))
   212     end
   213   | NONE => [ax]
   214 
   215 (***************************************************************)
   216 (* Relevance Filtering                                         *)
   217 (***************************************************************)
   218 
   219 (*** constants with types ***)
   220 
   221 fun order_of_type (Type (@{type_name fun}, [T1, @{typ bool}])) =
   222     order_of_type T1 (* cheat: pretend sets are first-order *)
   223   | order_of_type (Type (@{type_name fun}, [T1, T2])) =
   224     Int.max (order_of_type T1 + 1, order_of_type T2)
   225   | order_of_type (Type (_, Ts)) = fold (Integer.max o order_of_type) Ts 0
   226   | order_of_type _ = 0
   227 
   228 (* An abstraction of Isabelle types and first-order terms *)
   229 datatype pattern = PVar | PApp of string * pattern list
   230 datatype ptype = PType of int * pattern list
   231 
   232 fun string_for_pattern PVar = "_"
   233   | string_for_pattern (PApp (s, ps)) =
   234     if null ps then s else s ^ string_for_patterns ps
   235 and string_for_patterns ps = "(" ^ commas (map string_for_pattern ps) ^ ")"
   236 fun string_for_ptype (PType (_, ps)) = string_for_patterns ps
   237 
   238 (*Is the second type an instance of the first one?*)
   239 fun match_pattern (PVar, _) = true
   240   | match_pattern (PApp _, PVar) = false
   241   | match_pattern (PApp (s, ps), PApp (t, qs)) =
   242     s = t andalso match_patterns (ps, qs)
   243 and match_patterns (_, []) = true
   244   | match_patterns ([], _) = false
   245   | match_patterns (p :: ps, q :: qs) =
   246     match_pattern (p, q) andalso match_patterns (ps, qs)
   247 fun match_ptype (PType (_, ps), PType (_, qs)) = match_patterns (ps, qs)
   248 
   249 (* Is there a unifiable constant? *)
   250 fun pconst_mem f consts (s, ps) =
   251   exists (curry (match_ptype o f) ps)
   252          (map snd (filter (curry (op =) s o fst) consts))
   253 fun pconst_hyper_mem f const_tab (s, ps) =
   254   exists (curry (match_ptype o f) ps) (these (Symtab.lookup const_tab s))
   255 
   256 fun pattern_for_type (Type (s, Ts)) = PApp (s, map pattern_for_type Ts)
   257   | pattern_for_type (TFree (s, _)) = PApp (s, [])
   258   | pattern_for_type (TVar _) = PVar
   259 
   260 (* Pairs a constant with the list of its type instantiations. *)
   261 fun ptype thy const x =
   262   (if const then map pattern_for_type (these (try (Sign.const_typargs thy) x))
   263    else [])
   264 fun rich_ptype thy const (s, T) =
   265   PType (order_of_type T, ptype thy const (s, T))
   266 fun rich_pconst thy const (s, T) = (s, rich_ptype thy const (s, T))
   267 
   268 fun string_for_hyper_pconst (s, ps) =
   269   s ^ "{" ^ commas (map string_for_ptype ps) ^ "}"
   270 
   271 (* Add a pconstant to the table, but a [] entry means a standard
   272    connective, which we ignore.*)
   273 fun add_pconst_to_table also_skolem (s, p) =
   274   if (not also_skolem andalso String.isPrefix skolem_prefix s) then I
   275   else Symtab.map_default (s, [p]) (insert (op =) p)
   276 
   277 fun pconsts_in_terms thy is_built_in_const also_skolems pos ts =
   278   let
   279     val flip = Option.map not
   280     (* We include free variables, as well as constants, to handle locales. For
   281        each quantifiers that must necessarily be skolemized by the automatic
   282        prover, we introduce a fresh constant to simulate the effect of
   283        Skolemization. *)
   284     fun do_const const x ts =
   285       let val (built_in, ts) = is_built_in_const x ts in
   286         (not built_in
   287          ? add_pconst_to_table also_skolems (rich_pconst thy const x))
   288         #> fold do_term ts
   289       end
   290     and do_term t =
   291       case strip_comb t of
   292         (Const x, ts) => do_const true x ts
   293       | (Free x, ts) => do_const false x ts
   294       | (Abs (_, T, t'), ts) =>
   295         (null ts
   296          ? add_pconst_to_table true (abs_name, PType (order_of_type T + 1, [])))
   297         #> fold do_term (t' :: ts)
   298       | (_, ts) => fold do_term ts
   299     fun do_quantifier will_surely_be_skolemized abs_T body_t =
   300       do_formula pos body_t
   301       #> (if also_skolems andalso will_surely_be_skolemized then
   302             add_pconst_to_table true
   303                 (gensym skolem_prefix, PType (order_of_type abs_T, []))
   304           else
   305             I)
   306     and do_term_or_formula T =
   307       if T = HOLogic.boolT then do_formula NONE else do_term
   308     and do_formula pos t =
   309       case t of
   310         Const (@{const_name all}, _) $ Abs (_, T, t') =>
   311         do_quantifier (pos = SOME false) T t'
   312       | @{const "==>"} $ t1 $ t2 =>
   313         do_formula (flip pos) t1 #> do_formula pos t2
   314       | Const (@{const_name "=="}, Type (_, [T, _])) $ t1 $ t2 =>
   315         fold (do_term_or_formula T) [t1, t2]
   316       | @{const Trueprop} $ t1 => do_formula pos t1
   317       | @{const False} => I
   318       | @{const True} => I
   319       | @{const Not} $ t1 => do_formula (flip pos) t1
   320       | Const (@{const_name All}, _) $ Abs (_, T, t') =>
   321         do_quantifier (pos = SOME false) T t'
   322       | Const (@{const_name Ex}, _) $ Abs (_, T, t') =>
   323         do_quantifier (pos = SOME true) T t'
   324       | @{const HOL.conj} $ t1 $ t2 => fold (do_formula pos) [t1, t2]
   325       | @{const HOL.disj} $ t1 $ t2 => fold (do_formula pos) [t1, t2]
   326       | @{const HOL.implies} $ t1 $ t2 =>
   327         do_formula (flip pos) t1 #> do_formula pos t2
   328       | Const (@{const_name HOL.eq}, Type (_, [T, _])) $ t1 $ t2 =>
   329         fold (do_term_or_formula T) [t1, t2]
   330       | Const (@{const_name If}, Type (_, [_, Type (_, [T, _])]))
   331         $ t1 $ t2 $ t3 =>
   332         do_formula NONE t1 #> fold (do_term_or_formula T) [t2, t3]
   333       | Const (@{const_name Ex1}, _) $ Abs (_, T, t') =>
   334         do_quantifier (is_some pos) T t'
   335       | Const (@{const_name Ball}, _) $ t1 $ Abs (_, T, t') =>
   336         do_quantifier (pos = SOME false) T
   337                       (HOLogic.mk_imp (incr_boundvars 1 t1 $ Bound 0, t'))
   338       | Const (@{const_name Bex}, _) $ t1 $ Abs (_, T, t') =>
   339         do_quantifier (pos = SOME true) T
   340                       (HOLogic.mk_conj (incr_boundvars 1 t1 $ Bound 0, t'))
   341       | (t0 as Const (_, @{typ bool})) $ t1 =>
   342         do_term t0 #> do_formula pos t1  (* theory constant *)
   343       | _ => do_term t
   344   in Symtab.empty |> fold (do_formula pos) ts end
   345 
   346 (*Inserts a dummy "constant" referring to the theory name, so that relevance
   347   takes the given theory into account.*)
   348 fun theory_constify ({theory_const_rel_weight, theory_const_irrel_weight, ...}
   349                      : relevance_fudge) thy_name t =
   350   if exists (curry (op <) 0.0) [theory_const_rel_weight,
   351                                 theory_const_irrel_weight] then
   352     Const (thy_name ^ theory_const_suffix, @{typ bool}) $ t
   353   else
   354     t
   355 
   356 fun theory_const_prop_of fudge th =
   357   theory_constify fudge (Context.theory_name (theory_of_thm th)) (prop_of th)
   358 
   359 (**** Constant / Type Frequencies ****)
   360 
   361 (* A two-dimensional symbol table counts frequencies of constants. It's keyed
   362    first by constant name and second by its list of type instantiations. For the
   363    latter, we need a linear ordering on "pattern list". *)
   364 
   365 fun pattern_ord p =
   366   case p of
   367     (PVar, PVar) => EQUAL
   368   | (PVar, PApp _) => LESS
   369   | (PApp _, PVar) => GREATER
   370   | (PApp q1, PApp q2) =>
   371     prod_ord fast_string_ord (dict_ord pattern_ord) (q1, q2)
   372 fun ptype_ord (PType p, PType q) =
   373   prod_ord (dict_ord pattern_ord) int_ord (swap p, swap q)
   374 
   375 structure PType_Tab = Table(type key = ptype val ord = ptype_ord)
   376 
   377 fun count_fact_consts thy fudge =
   378   let
   379     fun do_const const (s, T) ts =
   380       (* Two-dimensional table update. Constant maps to types maps to count. *)
   381       PType_Tab.map_default (rich_ptype thy const (s, T), 0) (Integer.add 1)
   382       |> Symtab.map_default (s, PType_Tab.empty)
   383       #> fold do_term ts
   384     and do_term t =
   385       case strip_comb t of
   386         (Const x, ts) => do_const true x ts
   387       | (Free x, ts) => do_const false x ts
   388       | (Abs (_, _, t'), ts) => fold do_term (t' :: ts)
   389       | (_, ts) => fold do_term ts
   390   in do_term o theory_const_prop_of fudge o snd end
   391 
   392 
   393 (**** Actual Filtering Code ****)
   394 
   395 fun pow_int _ 0 = 1.0
   396   | pow_int x 1 = x
   397   | pow_int x n = if n > 0 then x * pow_int x (n - 1) else pow_int x (n + 1) / x
   398 
   399 (*The frequency of a constant is the sum of those of all instances of its type.*)
   400 fun pconst_freq match const_tab (c, ps) =
   401   PType_Tab.fold (fn (qs, m) => match (ps, qs) ? Integer.add m)
   402                  (the (Symtab.lookup const_tab c)) 0
   403 
   404 
   405 (* A surprising number of theorems contain only a few significant constants.
   406    These include all induction rules, and other general theorems. *)
   407 
   408 (* "log" seems best in practice. A constant function of one ignores the constant
   409    frequencies. Rare constants give more points if they are relevant than less
   410    rare ones. *)
   411 fun rel_weight_for _ freq = 1.0 + 2.0 / Math.ln (Real.fromInt freq + 1.0)
   412 
   413 (* Irrelevant constants are treated differently. We associate lower penalties to
   414    very rare constants and very common ones -- the former because they can't
   415    lead to the inclusion of too many new facts, and the latter because they are
   416    so common as to be of little interest. *)
   417 fun irrel_weight_for ({worse_irrel_freq, higher_order_irrel_weight, ...}
   418                       : relevance_fudge) order freq =
   419   let val (k, x) = worse_irrel_freq |> `Real.ceil in
   420     (if freq < k then Math.ln (Real.fromInt (freq + 1)) / Math.ln x
   421      else rel_weight_for order freq / rel_weight_for order k)
   422     * pow_int higher_order_irrel_weight (order - 1)
   423   end
   424 
   425 fun multiplier_for_const_name local_const_multiplier s =
   426   if String.isSubstring "." s then 1.0 else local_const_multiplier
   427 
   428 (* Computes a constant's weight, as determined by its frequency. *)
   429 fun generic_pconst_weight local_const_multiplier abs_weight skolem_weight
   430                           theory_const_weight weight_for f const_tab
   431                           (c as (s, PType (m, _))) =
   432   if s = abs_name then
   433     abs_weight
   434   else if String.isPrefix skolem_prefix s then
   435     skolem_weight
   436   else if String.isSuffix theory_const_suffix s then
   437     theory_const_weight
   438   else
   439     multiplier_for_const_name local_const_multiplier s
   440     * weight_for m (pconst_freq (match_ptype o f) const_tab c)
   441 
   442 fun rel_pconst_weight ({local_const_multiplier, abs_rel_weight,
   443                         theory_const_rel_weight, ...} : relevance_fudge)
   444                       const_tab =
   445   generic_pconst_weight local_const_multiplier abs_rel_weight 0.0
   446                         theory_const_rel_weight rel_weight_for I const_tab
   447 fun irrel_pconst_weight (fudge as {local_const_multiplier, abs_irrel_weight,
   448                                    skolem_irrel_weight,
   449                                    theory_const_irrel_weight, ...}) const_tab =
   450   generic_pconst_weight local_const_multiplier abs_irrel_weight
   451                         skolem_irrel_weight theory_const_irrel_weight
   452                         (irrel_weight_for fudge) swap const_tab
   453 
   454 fun locality_bonus (_ : relevance_fudge) General = 0.0
   455   | locality_bonus {intro_bonus, ...} Intro = intro_bonus
   456   | locality_bonus {elim_bonus, ...} Elim = elim_bonus
   457   | locality_bonus {simp_bonus, ...} Simp = simp_bonus
   458   | locality_bonus {local_bonus, ...} Local = local_bonus
   459   | locality_bonus {assum_bonus, ...} Assum = assum_bonus
   460   | locality_bonus {chained_bonus, ...} Chained = chained_bonus
   461 
   462 fun is_odd_const_name s =
   463   s = abs_name orelse String.isPrefix skolem_prefix s orelse
   464   String.isSuffix theory_const_suffix s
   465 
   466 fun fact_weight fudge loc const_tab relevant_consts fact_consts =
   467   case fact_consts |> List.partition (pconst_hyper_mem I relevant_consts)
   468                    ||> filter_out (pconst_hyper_mem swap relevant_consts) of
   469     ([], _) => 0.0
   470   | (rel, irrel) =>
   471     if forall (forall (is_odd_const_name o fst)) [rel, irrel] then
   472       0.0
   473     else
   474       let
   475         val irrel = irrel |> filter_out (pconst_mem swap rel)
   476         val rel_weight =
   477           0.0 |> fold (curry (op +) o rel_pconst_weight fudge const_tab) rel
   478         val irrel_weight =
   479           ~ (locality_bonus fudge loc)
   480           |> fold (curry (op +) o irrel_pconst_weight fudge const_tab) irrel
   481         val res = rel_weight / (rel_weight + irrel_weight)
   482       in if Real.isFinite res then res else 0.0 end
   483 
   484 fun pconsts_in_fact thy is_built_in_const t =
   485   Symtab.fold (fn (s, pss) => fold (cons o pair s) pss)
   486               (pconsts_in_terms thy is_built_in_const true (SOME true) [t]) []
   487 fun pair_consts_fact thy is_built_in_const fudge fact =
   488   case fact |> snd |> theory_const_prop_of fudge
   489             |> pconsts_in_fact thy is_built_in_const of
   490     [] => NONE
   491   | consts => SOME ((fact, consts), NONE)
   492 
   493 val const_names_in_fact = map fst ooo pconsts_in_fact
   494 
   495 type annotated_thm =
   496   (((unit -> string) * locality) * thm) * (string * ptype) list
   497 
   498 fun take_most_relevant max_relevant remaining_max
   499         ({max_imperfect, max_imperfect_exp, ...} : relevance_fudge) 
   500         (candidates : (annotated_thm * real) list) =
   501   let
   502     val max_imperfect =
   503       Real.ceil (Math.pow (max_imperfect,
   504                     Math.pow (Real.fromInt remaining_max
   505                               / Real.fromInt max_relevant, max_imperfect_exp)))
   506     val (perfect, imperfect) =
   507       candidates |> sort (Real.compare o swap o pairself snd)
   508                  |> take_prefix (fn (_, w) => w > 0.99999)
   509     val ((accepts, more_rejects), rejects) =
   510       chop max_imperfect imperfect |>> append perfect |>> chop remaining_max
   511   in
   512     trace_msg (fn () =>
   513         "Actually passed (" ^ string_of_int (length accepts) ^ " of " ^
   514         string_of_int (length candidates) ^ "): " ^
   515         (accepts |> map (fn ((((name, _), _), _), weight) =>
   516                             name () ^ " [" ^ Real.toString weight ^ "]")
   517                  |> commas));
   518     (accepts, more_rejects @ rejects)
   519   end
   520 
   521 fun if_empty_replace_with_locality thy is_built_in_const facts loc tab =
   522   if Symtab.is_empty tab then
   523     pconsts_in_terms thy is_built_in_const false (SOME false)
   524         (map_filter (fn ((_, loc'), th) =>
   525                         if loc' = loc then SOME (prop_of th) else NONE) facts)
   526   else
   527     tab
   528 
   529 fun add_arities is_built_in_const th =
   530   let
   531     fun aux _ _ NONE = NONE
   532       | aux t args (SOME tab) =
   533         case t of
   534           t1 $ t2 => SOME tab |> aux t1 (t2 :: args) |> aux t2 []
   535         | Const (x as (s, _)) =>
   536           (if is_built_in_const x args |> fst then
   537              SOME tab
   538            else case Symtab.lookup tab s of
   539              NONE => SOME (Symtab.update (s, length args) tab)
   540            | SOME n => if n = length args then SOME tab else NONE)
   541         | _ => SOME tab
   542   in aux (prop_of th) [] end
   543 
   544 fun needs_ext is_built_in_const facts =
   545   fold (add_arities is_built_in_const o snd) facts (SOME Symtab.empty)
   546   |> is_none
   547 
   548 fun relevance_filter ctxt threshold0 decay max_relevant is_built_in_const
   549         (fudge as {threshold_divisor, ridiculous_threshold, ...})
   550         ({add, del, ...} : relevance_override) facts goal_ts =
   551   let
   552     val thy = Proof_Context.theory_of ctxt
   553     val const_tab = fold (count_fact_consts thy fudge) facts Symtab.empty
   554     val goal_const_tab =
   555       pconsts_in_terms thy is_built_in_const false (SOME false) goal_ts
   556       |> fold (if_empty_replace_with_locality thy is_built_in_const facts)
   557               [Chained, Assum, Local]
   558     val add_ths = Attrib.eval_thms ctxt add
   559     val del_ths = Attrib.eval_thms ctxt del
   560     val facts = facts |> filter_out (member Thm.eq_thm del_ths o snd)
   561     fun iter j remaining_max threshold rel_const_tab hopeless hopeful =
   562       let
   563         fun relevant [] _ [] =
   564             (* Nothing has been added this iteration. *)
   565             if j = 0 andalso threshold >= ridiculous_threshold then
   566               (* First iteration? Try again. *)
   567               iter 0 max_relevant (threshold / threshold_divisor) rel_const_tab
   568                    hopeless hopeful
   569             else
   570               []
   571           | relevant candidates rejects [] =
   572             let
   573               val (accepts, more_rejects) =
   574                 take_most_relevant max_relevant remaining_max fudge candidates
   575               val rel_const_tab' =
   576                 rel_const_tab
   577                 |> fold (add_pconst_to_table false) (maps (snd o fst) accepts)
   578               fun is_dirty (c, _) =
   579                 Symtab.lookup rel_const_tab' c <> Symtab.lookup rel_const_tab c
   580               val (hopeful_rejects, hopeless_rejects) =
   581                  (rejects @ hopeless, ([], []))
   582                  |-> fold (fn (ax as (_, consts), old_weight) =>
   583                               if exists is_dirty consts then
   584                                 apfst (cons (ax, NONE))
   585                               else
   586                                 apsnd (cons (ax, old_weight)))
   587                  |>> append (more_rejects
   588                              |> map (fn (ax as (_, consts), old_weight) =>
   589                                         (ax, if exists is_dirty consts then NONE
   590                                              else SOME old_weight)))
   591               val threshold =
   592                 1.0 - (1.0 - threshold)
   593                       * Math.pow (decay, Real.fromInt (length accepts))
   594               val remaining_max = remaining_max - length accepts
   595             in
   596               trace_msg (fn () => "New or updated constants: " ^
   597                   commas (rel_const_tab' |> Symtab.dest
   598                           |> subtract (op =) (rel_const_tab |> Symtab.dest)
   599                           |> map string_for_hyper_pconst));
   600               map (fst o fst) accepts @
   601               (if remaining_max = 0 then
   602                  []
   603                else
   604                  iter (j + 1) remaining_max threshold rel_const_tab'
   605                       hopeless_rejects hopeful_rejects)
   606             end
   607           | relevant candidates rejects
   608                      (((ax as (((_, loc), _), fact_consts)), cached_weight)
   609                       :: hopeful) =
   610             let
   611               val weight =
   612                 case cached_weight of
   613                   SOME w => w
   614                 | NONE => fact_weight fudge loc const_tab rel_const_tab
   615                                       fact_consts
   616             in
   617               if weight >= threshold then
   618                 relevant ((ax, weight) :: candidates) rejects hopeful
   619               else
   620                 relevant candidates ((ax, weight) :: rejects) hopeful
   621             end
   622         in
   623           trace_msg (fn () =>
   624               "ITERATION " ^ string_of_int j ^ ": current threshold: " ^
   625               Real.toString threshold ^ ", constants: " ^
   626               commas (rel_const_tab |> Symtab.dest
   627                       |> filter (curry (op <>) [] o snd)
   628                       |> map string_for_hyper_pconst));
   629           relevant [] [] hopeful
   630         end
   631     fun add_facts ths accepts =
   632       (facts |> filter (member Thm.eq_thm ths o snd)) @
   633       (accepts |> filter_out (member Thm.eq_thm ths o snd))
   634       |> take max_relevant
   635   in
   636     facts |> map_filter (pair_consts_fact thy is_built_in_const fudge)
   637           |> iter 0 max_relevant threshold0 goal_const_tab []
   638           |> not (null add_ths) ? add_facts add_ths
   639           |> (fn accepts =>
   640                  accepts |> needs_ext is_built_in_const accepts
   641                             ? add_facts @{thms ext})
   642           |> tap (fn accepts => trace_msg (fn () =>
   643                       "Total relevant: " ^ string_of_int (length accepts)))
   644   end
   645 
   646 
   647 (***************************************************************)
   648 (* Retrieving and filtering lemmas                             *)
   649 (***************************************************************)
   650 
   651 (*** retrieve lemmas and filter them ***)
   652 
   653 (*Reject theorems with names like "List.filter.filter_list_def" or
   654   "Accessible_Part.acc.defs", as these are definitions arising from packages.*)
   655 fun is_package_def a =
   656   let val names = Long_Name.explode a in
   657     (length names > 2 andalso not (hd names = "local") andalso
   658      String.isSuffix "_def" a) orelse String.isSuffix "_defs" a
   659   end
   660 
   661 fun mk_fact_table f xs =
   662   fold (Termtab.update o `(prop_of o f)) xs Termtab.empty
   663 fun uniquify xs = Termtab.fold (cons o snd) (mk_fact_table snd xs) []
   664 
   665 (* FIXME: put other record thms here, or declare as "no_atp" *)
   666 val multi_base_blacklist =
   667   ["defs", "select_defs", "update_defs", "split", "splits", "split_asm",
   668    "cases", "ext_cases", "eq.simps", "eq.refl", "nchotomy", "case_cong",
   669    "weak_case_cong"]
   670   |> not instantiate_inducts ? append ["induct", "inducts"]
   671   |> map (prefix ".")
   672 
   673 val max_lambda_nesting = 3
   674 
   675 fun term_has_too_many_lambdas max (t1 $ t2) =
   676     exists (term_has_too_many_lambdas max) [t1, t2]
   677   | term_has_too_many_lambdas max (Abs (_, _, t)) =
   678     max = 0 orelse term_has_too_many_lambdas (max - 1) t
   679   | term_has_too_many_lambdas _ _ = false
   680 
   681 (* Don't count nested lambdas at the level of formulas, since they are
   682    quantifiers. *)
   683 fun formula_has_too_many_lambdas Ts (Abs (_, T, t)) =
   684     formula_has_too_many_lambdas (T :: Ts) t
   685   | formula_has_too_many_lambdas Ts t =
   686     if member (op =) [HOLogic.boolT, propT] (fastype_of1 (Ts, t)) then
   687       exists (formula_has_too_many_lambdas Ts) (#2 (strip_comb t))
   688     else
   689       term_has_too_many_lambdas max_lambda_nesting t
   690 
   691 (* The max apply depth of any "metis" call in "Metis_Examples" (on 2007-10-31)
   692    was 11. *)
   693 val max_apply_depth = 15
   694 
   695 fun apply_depth (f $ t) = Int.max (apply_depth f, apply_depth t + 1)
   696   | apply_depth (Abs (_, _, t)) = apply_depth t
   697   | apply_depth _ = 0
   698 
   699 fun is_formula_too_complex t =
   700   apply_depth t > max_apply_depth orelse formula_has_too_many_lambdas [] t
   701 
   702 (* FIXME: Extend to "Meson" and "Metis" *)
   703 val exists_sledgehammer_const =
   704   exists_Const (fn (s, _) => String.isPrefix sledgehammer_prefix s)
   705 
   706 (* FIXME: make more reliable *)
   707 val exists_low_level_class_const =
   708   exists_Const (fn (s, _) =>
   709      String.isSubstring (Long_Name.separator ^ "class" ^ Long_Name.separator) s)
   710 
   711 fun is_metastrange_theorem th =
   712   case head_of (concl_of th) of
   713       Const (a, _) => (a <> @{const_name Trueprop} andalso
   714                        a <> @{const_name "=="})
   715     | _ => false
   716 
   717 fun is_that_fact th =
   718   String.isSuffix (Long_Name.separator ^ Obtain.thatN) (Thm.get_name_hint th)
   719   andalso exists_subterm (fn Free (s, _) => s = Name.skolem Auto_Bind.thesisN
   720                            | _ => false) (prop_of th)
   721 
   722 val type_has_top_sort =
   723   exists_subtype (fn TFree (_, []) => true | TVar (_, []) => true | _ => false)
   724 
   725 (**** Predicates to detect unwanted facts (prolific or likely to cause
   726       unsoundness) ****)
   727 
   728 (* Too general means, positive equality literal with a variable X as one
   729    operand, when X does not occur properly in the other operand. This rules out
   730    clearly inconsistent facts such as X = a | X = b, though it by no means
   731    guarantees soundness. *)
   732 
   733 (* Unwanted equalities are those between a (bound or schematic) variable that
   734    does not properly occur in the second operand. *)
   735 val is_exhaustive_finite =
   736   let
   737     fun is_bad_equal (Var z) t =
   738         not (exists_subterm (fn Var z' => z = z' | _ => false) t)
   739       | is_bad_equal (Bound j) t = not (loose_bvar1 (t, j))
   740       | is_bad_equal _ _ = false
   741     fun do_equals t1 t2 = is_bad_equal t1 t2 orelse is_bad_equal t2 t1
   742     fun do_formula pos t =
   743       case (pos, t) of
   744         (_, @{const Trueprop} $ t1) => do_formula pos t1
   745       | (true, Const (@{const_name all}, _) $ Abs (_, _, t')) =>
   746         do_formula pos t'
   747       | (true, Const (@{const_name All}, _) $ Abs (_, _, t')) =>
   748         do_formula pos t'
   749       | (false, Const (@{const_name Ex}, _) $ Abs (_, _, t')) =>
   750         do_formula pos t'
   751       | (_, @{const "==>"} $ t1 $ t2) =>
   752         do_formula (not pos) t1 andalso
   753         (t2 = @{prop False} orelse do_formula pos t2)
   754       | (_, @{const HOL.implies} $ t1 $ t2) =>
   755         do_formula (not pos) t1 andalso
   756         (t2 = @{const False} orelse do_formula pos t2)
   757       | (_, @{const Not} $ t1) => do_formula (not pos) t1
   758       | (true, @{const HOL.disj} $ t1 $ t2) => forall (do_formula pos) [t1, t2]
   759       | (false, @{const HOL.conj} $ t1 $ t2) => forall (do_formula pos) [t1, t2]
   760       | (true, Const (@{const_name HOL.eq}, _) $ t1 $ t2) => do_equals t1 t2
   761       | (true, Const (@{const_name "=="}, _) $ t1 $ t2) => do_equals t1 t2
   762       | _ => false
   763   in do_formula true end
   764 
   765 fun has_bound_or_var_of_type tycons =
   766   exists_subterm (fn Var (_, Type (s, _)) => member (op =) tycons s
   767                    | Abs (_, Type (s, _), _) => member (op =) tycons s
   768                    | _ => false)
   769 
   770 (* Facts are forbidden to contain variables of these types. The typical reason
   771    is that they lead to unsoundness. Note that "unit" satisfies numerous
   772    equations like "?x = ()". The resulting clauses will have no type constraint,
   773    yielding false proofs. Even "bool" leads to many unsound proofs, though only
   774    for higher-order problems. *)
   775 val dangerous_types = [@{type_name unit}, @{type_name bool}, @{type_name prop}];
   776 
   777 (* Facts containing variables of type "unit" or "bool" or of the form
   778    "ALL x. x = A | x = B | x = C" are likely to lead to unsound proofs if types
   779    are omitted. *)
   780 fun is_dangerous_term no_dangerous_types t =
   781   not no_dangerous_types andalso
   782   let val t = transform_elim_term t in
   783     has_bound_or_var_of_type dangerous_types t orelse
   784     is_exhaustive_finite t
   785   end
   786 
   787 fun is_theorem_bad_for_atps no_dangerous_types thm =
   788   let val t = prop_of thm in
   789     is_formula_too_complex t orelse exists_type type_has_top_sort t orelse
   790     is_dangerous_term no_dangerous_types t orelse
   791     exists_sledgehammer_const t orelse exists_low_level_class_const t orelse
   792     is_metastrange_theorem thm orelse is_that_fact thm
   793   end
   794 
   795 fun clasimpset_rules_of ctxt =
   796   let
   797     val {safeIs, safeEs, hazIs, hazEs, ...} = ctxt |> claset_of |> rep_cs
   798     val intros = safeIs @ hazIs
   799     val elims = map Classical.classical_rule (safeEs @ hazEs)
   800     val simps = ctxt |> simpset_of |> dest_ss |> #simps |> map snd
   801   in (mk_fact_table I intros, mk_fact_table I elims, mk_fact_table I simps) end
   802 
   803 fun all_facts ctxt reserved really_all no_dangerous_types
   804               ({intro_bonus, elim_bonus, simp_bonus, ...} : relevance_fudge)
   805               add_ths chained_ths =
   806   let
   807     val thy = Proof_Context.theory_of ctxt
   808     val global_facts = Global_Theory.facts_of thy
   809     val local_facts = Proof_Context.facts_of ctxt
   810     val named_locals = local_facts |> Facts.dest_static []
   811     val assms = Assumption.all_assms_of ctxt
   812     fun is_assum th = exists (fn ct => prop_of th aconv term_of ct) assms
   813     val is_chained = member Thm.eq_thm chained_ths
   814     val (intros, elims, simps) =
   815       if exists (curry (op <) 0.0) [intro_bonus, elim_bonus, simp_bonus] then
   816         clasimpset_rules_of ctxt
   817       else
   818         (Termtab.empty, Termtab.empty, Termtab.empty)
   819     fun is_good_unnamed_local th =
   820       not (Thm.has_name_hint th) andalso
   821       forall (fn (_, ths) => not (member Thm.eq_thm ths th)) named_locals
   822     val unnamed_locals =
   823       union Thm.eq_thm (Facts.props local_facts) chained_ths
   824       |> filter is_good_unnamed_local |> map (pair "" o single)
   825     val full_space =
   826       Name_Space.merge (Facts.space_of global_facts, Facts.space_of local_facts)
   827     fun add_facts global foldx facts =
   828       foldx (fn (name0, ths) =>
   829         if not really_all andalso name0 <> "" andalso
   830            forall (not o member Thm.eq_thm add_ths) ths andalso
   831            (Facts.is_concealed facts name0 orelse
   832             (respect_no_atp andalso is_package_def name0) orelse
   833             exists (fn s => String.isSuffix s name0) multi_base_blacklist orelse
   834             String.isSuffix "_def_raw" (* FIXME: crude hack *) name0) then
   835           I
   836         else
   837           let
   838             val multi = length ths > 1
   839             val backquote_thm =
   840               backquote o string_for_term ctxt o close_form o prop_of
   841             fun check_thms a =
   842               case try (Proof_Context.get_thms ctxt) a of
   843                 NONE => false
   844               | SOME ths' => Thm.eq_thms (ths, ths')
   845           in
   846             pair 1
   847             #> fold (fn th => fn (j, rest) =>
   848                  (j + 1,
   849                   if is_theorem_bad_for_atps no_dangerous_types th andalso
   850                      not (member Thm.eq_thm add_ths th) then
   851                     rest
   852                   else
   853                     (((fn () =>
   854                           if name0 = "" then
   855                             th |> backquote_thm
   856                           else
   857                             let
   858                               val name1 = Facts.extern ctxt facts name0
   859                               val name2 = Name_Space.extern ctxt full_space name0
   860                             in
   861                               case find_first check_thms [name1, name2, name0] of
   862                                 SOME name => make_name reserved multi j name
   863                               | NONE => ""
   864                             end),
   865                       let val t = prop_of th in
   866                         if is_chained th then
   867                           Chained
   868                         else if global then
   869                           if Termtab.defined intros t then Intro
   870                           else if Termtab.defined elims t then Elim
   871                           else if Termtab.defined simps t then Simp
   872                           else General
   873                         else
   874                           if is_assum th then Assum else Local
   875                       end),
   876                       (multi, th)) :: rest)) ths
   877             #> snd
   878           end)
   879   in
   880     [] |> add_facts false fold local_facts (unnamed_locals @ named_locals)
   881        |> add_facts true Facts.fold_static global_facts global_facts
   882   end
   883 
   884 (* The single-name theorems go after the multiple-name ones, so that single
   885    names are preferred when both are available. *)
   886 fun rearrange_facts ctxt respect_no_atp =
   887   List.partition (fst o snd) #> op @ #> map (apsnd snd)
   888   #> respect_no_atp ? filter_out (No_ATPs.member ctxt o snd)
   889 
   890 fun external_frees t =
   891   [] |> Term.add_frees t |> filter_out (can Name.dest_internal o fst)
   892 
   893 fun relevant_facts ctxt no_dangerous_types (threshold0, threshold1)
   894                    max_relevant is_built_in_const fudge
   895                    (override as {add, only, ...}) chained_ths hyp_ts concl_t =
   896   let
   897     val thy = Proof_Context.theory_of ctxt
   898     val decay = Math.pow ((1.0 - threshold1) / (1.0 - threshold0),
   899                           1.0 / Real.fromInt (max_relevant + 1))
   900     val add_ths = Attrib.eval_thms ctxt add
   901     val reserved = reserved_isar_keyword_table ()
   902     val ind_stmt =
   903       Logic.list_implies (hyp_ts |> filter_out (null o external_frees), concl_t)
   904       |> Object_Logic.atomize_term thy
   905     val ind_stmt_xs = external_frees ind_stmt
   906     val facts =
   907       (if only then
   908          maps (map (fn ((name, loc), th) => ((K name, loc), (true, th)))
   909                o fact_from_ref ctxt reserved chained_ths) add
   910        else
   911          all_facts ctxt reserved false no_dangerous_types fudge add_ths
   912                    chained_ths)
   913       |> instantiate_inducts
   914          ? maps (instantiate_if_induct_rule ctxt ind_stmt ind_stmt_xs)
   915       |> rearrange_facts ctxt (respect_no_atp andalso not only)
   916       |> uniquify
   917   in
   918     trace_msg (fn () => "Considering " ^ string_of_int (length facts) ^
   919                         " facts");
   920     (if only orelse threshold1 < 0.0 then
   921        facts
   922      else if threshold0 > 1.0 orelse threshold0 > threshold1 orelse
   923              max_relevant = 0 then
   924        []
   925      else
   926        ((concl_t |> theory_constify fudge (Context.theory_name thy)) :: hyp_ts)
   927        |> relevance_filter ctxt threshold0 decay max_relevant is_built_in_const
   928                            fudge override facts)
   929     |> map (apfst (apfst (fn f => f ())))
   930   end
   931 
   932 end;