src/HOL/Tools/Sledgehammer/sledgehammer_filter.ML
author blanchet
Thu Dec 16 15:12:17 2010 +0100 (2010-12-16 ago)
changeset 41205 209546e0af2c
parent 41204 bd57cf5944cb
child 41207 f9c7bdc75dd0
permissions -rw-r--r--
comment tuning
     1 (*  Title:      HOL/Tools/Sledgehammer/sledgehammer_filter.ML
     2     Author:     Jia Meng, Cambridge University Computer Laboratory and NICTA
     3     Author:     Jasmin Blanchette, TU Muenchen
     4 
     5 Sledgehammer's relevance filter.
     6 *)
     7 
     8 signature SLEDGEHAMMER_FILTER =
     9 sig
    10   datatype locality = General | Intro | Elim | Simp | Local | Assum | Chained
    11 
    12   type relevance_fudge =
    13     {allow_ext : bool,
    14      worse_irrel_freq : real,
    15      higher_order_irrel_weight : real,
    16      abs_rel_weight : real,
    17      abs_irrel_weight : real,
    18      skolem_irrel_weight : real,
    19      theory_const_rel_weight : real,
    20      theory_const_irrel_weight : real,
    21      intro_bonus : real,
    22      elim_bonus : real,
    23      simp_bonus : real,
    24      local_bonus : real,
    25      assum_bonus : real,
    26      chained_bonus : real,
    27      max_imperfect : real,
    28      max_imperfect_exp : real,
    29      threshold_divisor : real,
    30      ridiculous_threshold : real}
    31 
    32   type relevance_override =
    33     {add : (Facts.ref * Attrib.src list) list,
    34      del : (Facts.ref * Attrib.src list) list,
    35      only : bool}
    36 
    37   val trace : bool Unsynchronized.ref
    38   val fact_from_ref :
    39     Proof.context -> unit Symtab.table -> thm list
    40     -> Facts.ref * Attrib.src list -> ((string * locality) * thm) list
    41   val relevant_facts :
    42     Proof.context -> bool -> real * real -> int
    43     -> (string * typ -> term list -> bool) -> relevance_fudge
    44     -> relevance_override -> thm list -> term list -> term
    45     -> ((string * locality) * thm) list
    46 end;
    47 
    48 structure Sledgehammer_Filter : SLEDGEHAMMER_FILTER =
    49 struct
    50 
    51 open Sledgehammer_Util
    52 
    53 val trace = Unsynchronized.ref false
    54 fun trace_msg msg = if !trace then tracing (msg ()) else ()
    55 
    56 (* experimental feature *)
    57 val respect_no_atp = true
    58 
    59 datatype locality = General | Intro | Elim | Simp | Local | Assum | Chained
    60 
    61 type relevance_fudge =
    62   {allow_ext : bool,
    63    worse_irrel_freq : real,
    64    higher_order_irrel_weight : real,
    65    abs_rel_weight : real,
    66    abs_irrel_weight : real,
    67    skolem_irrel_weight : real,
    68    theory_const_rel_weight : real,
    69    theory_const_irrel_weight : real,
    70    intro_bonus : real,
    71    elim_bonus : real,
    72    simp_bonus : real,
    73    local_bonus : real,
    74    assum_bonus : real,
    75    chained_bonus : real,
    76    max_imperfect : real,
    77    max_imperfect_exp : real,
    78    threshold_divisor : real,
    79    ridiculous_threshold : real}
    80 
    81 type relevance_override =
    82   {add : (Facts.ref * Attrib.src list) list,
    83    del : (Facts.ref * Attrib.src list) list,
    84    only : bool}
    85 
    86 val sledgehammer_prefix = "Sledgehammer" ^ Long_Name.separator
    87 val abs_name = sledgehammer_prefix ^ "abs"
    88 val skolem_prefix = sledgehammer_prefix ^ "sko"
    89 val theory_const_suffix = Long_Name.separator ^ " 1"
    90 
    91 fun needs_quoting reserved s =
    92   Symtab.defined reserved s orelse
    93   exists (not o Syntax.is_identifier) (Long_Name.explode s)
    94 
    95 fun make_name reserved multi j name =
    96   (name |> needs_quoting reserved name ? quote) ^
    97   (if multi then "(" ^ Int.toString j ^ ")" else "")
    98 
    99 fun explode_interval _ (Facts.FromTo (i, j)) = i upto j
   100   | explode_interval max (Facts.From i) = i upto i + max - 1
   101   | explode_interval _ (Facts.Single i) = [i]
   102 
   103 fun fact_from_ref ctxt reserved chained_ths (xthm as (xref, args)) =
   104   let
   105     val ths = Attrib.eval_thms ctxt [xthm]
   106     val bracket =
   107       implode (map (fn arg => "[" ^ Pretty.str_of (Args.pretty_src ctxt arg)
   108                                ^ "]") args)
   109     fun nth_name j =
   110       case xref of
   111         Facts.Fact s => "`" ^ s ^ "`" ^ bracket
   112       | Facts.Named (("", _), _) => "[" ^ bracket ^ "]"
   113       | Facts.Named ((name, _), NONE) =>
   114         make_name reserved (length ths > 1) (j + 1) name ^ bracket
   115       | Facts.Named ((name, _), SOME intervals) =>
   116         make_name reserved true
   117                  (nth (maps (explode_interval (length ths)) intervals) j) name ^
   118         bracket
   119   in
   120     (ths, (0, []))
   121     |-> fold (fn th => fn (j, rest) =>
   122                  (j + 1, ((nth_name j,
   123                           if member Thm.eq_thm chained_ths th then Chained
   124                           else General), th) :: rest))
   125     |> snd
   126   end
   127 
   128 (* This is a terrible hack. Free variables are sometimes code as "M__" when they
   129    are displayed as "M" and we want to avoid clashes with these. But sometimes
   130    it's even worse: "Ma__" encodes "M". So we simply reserve all prefixes of all
   131    free variables. In the worse case scenario, where the fact won't be resolved
   132    correctly, the user can fix it manually, e.g., by naming the fact in
   133    question. Ideally we would need nothing of it, but backticks just don't work
   134    with schematic variables. *)
   135 fun all_prefixes_of s =
   136   map (fn i => String.extract (s, 0, SOME i)) (1 upto size s - 1)
   137 fun close_form t =
   138   (t, [] |> Term.add_free_names t |> maps all_prefixes_of)
   139   |> fold (fn ((s, i), T) => fn (t', taken) =>
   140               let val s' = Name.variant taken s in
   141                 ((if fastype_of t' = HOLogic.boolT then HOLogic.all_const
   142                   else Term.all) T
   143                  $ Abs (s', T, abstract_over (Var ((s, i), T), t')),
   144                  s' :: taken)
   145               end)
   146           (Term.add_vars t [] |> sort_wrt (fst o fst))
   147   |> fst
   148 
   149 fun string_for_term ctxt t =
   150   Print_Mode.setmp (filter (curry (op =) Symbol.xsymbolsN)
   151                    (print_mode_value ())) (Syntax.string_of_term ctxt) t
   152   |> String.translate (fn c => if Char.isPrint c then str c else "")
   153   |> simplify_spaces
   154 
   155 (** Structural induction rules **)
   156 
   157 fun struct_induct_rule_on th =
   158   case Logic.strip_horn (prop_of th) of
   159     (prems, @{const Trueprop}
   160             $ ((p as Var ((p_name, 0), _)) $ (a as Var (_, ind_T)))) =>
   161     if not (is_TVar ind_T) andalso length prems > 1 andalso
   162        exists (exists_subterm (curry (op aconv) p)) prems andalso
   163        not (exists (exists_subterm (curry (op aconv) a)) prems) then
   164       SOME (p_name, ind_T)
   165     else
   166       NONE
   167   | _ => NONE
   168 
   169 fun instantiate_induct_rule ctxt concl_prop p_name ((name, _), (multi, th))
   170                             ind_x =
   171   let
   172     fun varify_noninducts (t as Free (s, T)) =
   173         if (s, T) = ind_x orelse can dest_funT T then t else Var ((s, 0), T)
   174       | varify_noninducts t = t
   175     val p_inst =
   176       concl_prop |> map_aterms varify_noninducts |> close_form
   177                  |> lambda (Free ind_x)
   178                  |> string_for_term ctxt
   179   in
   180     ((fn () => name () ^ "[where " ^ p_name ^ " = " ^ quote p_inst ^ "]",
   181       Local),
   182      (multi, th |> read_instantiate ctxt [((p_name, 0), p_inst)]))
   183   end
   184 
   185 fun type_match thy (T1, T2) =
   186   (Sign.typ_match thy (T2, T1) Vartab.empty; true)
   187   handle Type.TYPE_MATCH => false
   188 
   189 fun instantiate_if_induct_rule ctxt stmt stmt_xs (ax as (_, (_, th))) =
   190   case struct_induct_rule_on th of
   191     SOME (p_name, ind_T) =>
   192     let val thy = ProofContext.theory_of ctxt in
   193       stmt_xs |> filter (fn (_, T) => type_match thy (T, ind_T))
   194               |> map_filter (try (instantiate_induct_rule ctxt stmt p_name ax))
   195     end
   196   | NONE => [ax]
   197 
   198 (***************************************************************)
   199 (* Relevance Filtering                                         *)
   200 (***************************************************************)
   201 
   202 (*** constants with types ***)
   203 
   204 fun order_of_type (Type (@{type_name fun}, [T1, @{typ bool}])) =
   205     order_of_type T1 (* cheat: pretend sets are first-order *)
   206   | order_of_type (Type (@{type_name fun}, [T1, T2])) =
   207     Int.max (order_of_type T1 + 1, order_of_type T2)
   208   | order_of_type (Type (_, Ts)) = fold (Integer.max o order_of_type) Ts 0
   209   | order_of_type _ = 0
   210 
   211 (* An abstraction of Isabelle types and first-order terms *)
   212 datatype pattern = PVar | PApp of string * pattern list
   213 datatype ptype = PType of int * pattern list
   214 
   215 fun string_for_pattern PVar = "_"
   216   | string_for_pattern (PApp (s, ps)) =
   217     if null ps then s else s ^ string_for_patterns ps
   218 and string_for_patterns ps = "(" ^ commas (map string_for_pattern ps) ^ ")"
   219 fun string_for_ptype (PType (_, ps)) = string_for_patterns ps
   220 
   221 (*Is the second type an instance of the first one?*)
   222 fun match_pattern (PVar, _) = true
   223   | match_pattern (PApp _, PVar) = false
   224   | match_pattern (PApp (s, ps), PApp (t, qs)) =
   225     s = t andalso match_patterns (ps, qs)
   226 and match_patterns (_, []) = true
   227   | match_patterns ([], _) = false
   228   | match_patterns (p :: ps, q :: qs) =
   229     match_pattern (p, q) andalso match_patterns (ps, qs)
   230 fun match_ptype (PType (_, ps), PType (_, qs)) = match_patterns (ps, qs)
   231 
   232 (* Is there a unifiable constant? *)
   233 fun pconst_mem f consts (s, ps) =
   234   exists (curry (match_ptype o f) ps)
   235          (map snd (filter (curry (op =) s o fst) consts))
   236 fun pconst_hyper_mem f const_tab (s, ps) =
   237   exists (curry (match_ptype o f) ps) (these (Symtab.lookup const_tab s))
   238 
   239 fun pattern_for_type (Type (s, Ts)) = PApp (s, map pattern_for_type Ts)
   240   | pattern_for_type (TFree (s, _)) = PApp (s, [])
   241   | pattern_for_type (TVar _) = PVar
   242 
   243 (* Pairs a constant with the list of its type instantiations. *)
   244 fun ptype thy const x =
   245   (if const then map pattern_for_type (these (try (Sign.const_typargs thy) x))
   246    else [])
   247 fun rich_ptype thy const (s, T) =
   248   PType (order_of_type T, ptype thy const (s, T))
   249 fun rich_pconst thy const (s, T) = (s, rich_ptype thy const (s, T))
   250 
   251 fun string_for_hyper_pconst (s, ps) =
   252   s ^ "{" ^ commas (map string_for_ptype ps) ^ "}"
   253 
   254 (* Add a pconstant to the table, but a [] entry means a standard
   255    connective, which we ignore.*)
   256 fun add_pconst_to_table also_skolem (s, p) =
   257   if (not also_skolem andalso String.isPrefix skolem_prefix s) then I
   258   else Symtab.map_default (s, [p]) (insert (op =) p)
   259 
   260 fun is_formula_type T = (T = HOLogic.boolT orelse T = propT)
   261 
   262 fun pconsts_in_terms thy is_built_in_const also_skolems pos ts =
   263   let
   264     (* Giving a penalty to Skolems is a good idea, but that penalty shouldn't
   265        increase linearly with the number of Skolems in the fact. Hence, we
   266        create equivalence classes of Skolems by reusing fresh Skolem names. *)
   267     val skolem_cache = Unsynchronized.ref ""
   268     fun get_skolem_name () =
   269       (if !skolem_cache = "" then skolem_cache := gensym skolem_prefix else ();
   270        !skolem_cache)
   271     val flip = Option.map not
   272     (* We include free variables, as well as constants, to handle locales. For
   273        each quantifiers that must necessarily be skolemized by the automatic
   274        prover, we introduce a fresh constant to simulate the effect of
   275        Skolemization. *)
   276     fun do_const const x ts =
   277       (not (is_built_in_const x ts)
   278        ? add_pconst_to_table also_skolems (rich_pconst thy const x))
   279       #> fold do_term ts
   280     and do_term t =
   281       case strip_comb t of
   282         (Const x, ts) => do_const true x ts
   283       | (Free x, ts) => do_const false x ts
   284       | (Abs (_, T, t'), ts) =>
   285         (null ts
   286          ? add_pconst_to_table true (abs_name, PType (order_of_type T + 1, [])))
   287         #> fold do_term (t' :: ts)
   288       | (_, ts) => fold do_term ts
   289     fun do_quantifier will_surely_be_skolemized abs_T body_t =
   290       do_formula pos body_t
   291       #> (if also_skolems andalso will_surely_be_skolemized then
   292             add_pconst_to_table true
   293                 (get_skolem_name (), PType (order_of_type abs_T, []))
   294           else
   295             I)
   296     and do_term_or_formula T =
   297       if is_formula_type T then do_formula NONE else do_term
   298     and do_formula pos t =
   299       case t of
   300         Const (@{const_name all}, _) $ Abs (_, T, t') =>
   301         do_quantifier (pos = SOME false) T t'
   302       | @{const "==>"} $ t1 $ t2 =>
   303         do_formula (flip pos) t1 #> do_formula pos t2
   304       | Const (@{const_name "=="}, Type (_, [T, _])) $ t1 $ t2 =>
   305         fold (do_term_or_formula T) [t1, t2]
   306       | @{const Trueprop} $ t1 => do_formula pos t1
   307       | @{const False} => I
   308       | @{const True} => I
   309       | @{const Not} $ t1 => do_formula (flip pos) t1
   310       | Const (@{const_name All}, _) $ Abs (_, T, t') =>
   311         do_quantifier (pos = SOME false) T t'
   312       | Const (@{const_name Ex}, _) $ Abs (_, T, t') =>
   313         do_quantifier (pos = SOME true) T t'
   314       | @{const HOL.conj} $ t1 $ t2 => fold (do_formula pos) [t1, t2]
   315       | @{const HOL.disj} $ t1 $ t2 => fold (do_formula pos) [t1, t2]
   316       | @{const HOL.implies} $ t1 $ t2 =>
   317         do_formula (flip pos) t1 #> do_formula pos t2
   318       | Const (@{const_name HOL.eq}, Type (_, [T, _])) $ t1 $ t2 =>
   319         fold (do_term_or_formula T) [t1, t2]
   320       | Const (@{const_name If}, Type (_, [_, Type (_, [T, _])]))
   321         $ t1 $ t2 $ t3 =>
   322         do_formula NONE t1 #> fold (do_term_or_formula T) [t2, t3]
   323       | Const (@{const_name Ex1}, _) $ Abs (_, T, t') =>
   324         do_quantifier (is_some pos) T t'
   325       | Const (@{const_name Ball}, _) $ t1 $ Abs (_, T, t') =>
   326         do_quantifier (pos = SOME false) T
   327                       (HOLogic.mk_imp (incr_boundvars 1 t1 $ Bound 0, t'))
   328       | Const (@{const_name Bex}, _) $ t1 $ Abs (_, T, t') =>
   329         do_quantifier (pos = SOME true) T
   330                       (HOLogic.mk_conj (incr_boundvars 1 t1 $ Bound 0, t'))
   331       | (t0 as Const (_, @{typ bool})) $ t1 =>
   332         do_term t0 #> do_formula pos t1  (* theory constant *)
   333       | _ => do_term t
   334   in Symtab.empty |> fold (do_formula pos) ts end
   335 
   336 (*Inserts a dummy "constant" referring to the theory name, so that relevance
   337   takes the given theory into account.*)
   338 fun theory_constify ({theory_const_rel_weight, theory_const_irrel_weight, ...}
   339                      : relevance_fudge) thy_name t =
   340   if exists (curry (op <) 0.0) [theory_const_rel_weight,
   341                                 theory_const_irrel_weight] then
   342     Const (thy_name ^ theory_const_suffix, @{typ bool}) $ t
   343   else
   344     t
   345 
   346 fun theory_const_prop_of fudge th =
   347   theory_constify fudge (Context.theory_name (theory_of_thm th)) (prop_of th)
   348 
   349 (**** Constant / Type Frequencies ****)
   350 
   351 (* A two-dimensional symbol table counts frequencies of constants. It's keyed
   352    first by constant name and second by its list of type instantiations. For the
   353    latter, we need a linear ordering on "pattern list". *)
   354 
   355 fun pattern_ord p =
   356   case p of
   357     (PVar, PVar) => EQUAL
   358   | (PVar, PApp _) => LESS
   359   | (PApp _, PVar) => GREATER
   360   | (PApp q1, PApp q2) =>
   361     prod_ord fast_string_ord (dict_ord pattern_ord) (q1, q2)
   362 fun ptype_ord (PType p, PType q) =
   363   prod_ord (dict_ord pattern_ord) int_ord (swap p, swap q)
   364 
   365 structure PType_Tab = Table(type key = ptype val ord = ptype_ord)
   366 
   367 fun count_fact_consts thy fudge =
   368   let
   369     fun do_const const (s, T) ts =
   370       (* Two-dimensional table update. Constant maps to types maps to count. *)
   371       PType_Tab.map_default (rich_ptype thy const (s, T), 0) (Integer.add 1)
   372       |> Symtab.map_default (s, PType_Tab.empty)
   373       #> fold do_term ts
   374     and do_term t =
   375       case strip_comb t of
   376         (Const x, ts) => do_const true x ts
   377       | (Free x, ts) => do_const false x ts
   378       | (Abs (_, _, t'), ts) => fold do_term (t' :: ts)
   379       | (_, ts) => fold do_term ts
   380   in do_term o theory_const_prop_of fudge o snd end
   381 
   382 
   383 (**** Actual Filtering Code ****)
   384 
   385 fun pow_int _ 0 = 1.0
   386   | pow_int x 1 = x
   387   | pow_int x n = if n > 0 then x * pow_int x (n - 1) else pow_int x (n + 1) / x
   388 
   389 (*The frequency of a constant is the sum of those of all instances of its type.*)
   390 fun pconst_freq match const_tab (c, ps) =
   391   PType_Tab.fold (fn (qs, m) => match (ps, qs) ? Integer.add m)
   392                  (the (Symtab.lookup const_tab c)) 0
   393 
   394 
   395 (* A surprising number of theorems contain only a few significant constants.
   396    These include all induction rules, and other general theorems. *)
   397 
   398 (* "log" seems best in practice. A constant function of one ignores the constant
   399    frequencies. Rare constants give more points if they are relevant than less
   400    rare ones. *)
   401 fun rel_weight_for _ freq = 1.0 + 2.0 / Math.ln (Real.fromInt freq + 1.0)
   402 
   403 (* Irrelevant constants are treated differently. We associate lower penalties to
   404    very rare constants and very common ones -- the former because they can't
   405    lead to the inclusion of too many new facts, and the latter because they are
   406    so common as to be of little interest. *)
   407 fun irrel_weight_for ({worse_irrel_freq, higher_order_irrel_weight, ...}
   408                       : relevance_fudge) order freq =
   409   let val (k, x) = worse_irrel_freq |> `Real.ceil in
   410     (if freq < k then Math.ln (Real.fromInt (freq + 1)) / Math.ln x
   411      else rel_weight_for order freq / rel_weight_for order k)
   412     * pow_int higher_order_irrel_weight (order - 1)
   413   end
   414 
   415 (* Computes a constant's weight, as determined by its frequency. *)
   416 fun generic_pconst_weight abs_weight skolem_weight theory_const_weight
   417                           weight_for f const_tab (c as (s, PType (m, _))) =
   418   if s = abs_name then abs_weight
   419   else if String.isPrefix skolem_prefix s then skolem_weight
   420   else if String.isSuffix theory_const_suffix s then theory_const_weight
   421   else weight_for m (pconst_freq (match_ptype o f) const_tab c)
   422 
   423 fun rel_pconst_weight ({abs_rel_weight, theory_const_rel_weight, ...}
   424                        : relevance_fudge) const_tab =
   425   generic_pconst_weight abs_rel_weight 0.0 theory_const_rel_weight
   426                         rel_weight_for I const_tab
   427 fun irrel_pconst_weight (fudge as {abs_irrel_weight, skolem_irrel_weight,
   428                                    theory_const_irrel_weight, ...}) const_tab =
   429   generic_pconst_weight abs_irrel_weight skolem_irrel_weight
   430                         theory_const_irrel_weight (irrel_weight_for fudge) swap
   431                         const_tab
   432 
   433 fun locality_bonus (_ : relevance_fudge) General = 0.0
   434   | locality_bonus {intro_bonus, ...} Intro = intro_bonus
   435   | locality_bonus {elim_bonus, ...} Elim = elim_bonus
   436   | locality_bonus {simp_bonus, ...} Simp = simp_bonus
   437   | locality_bonus {local_bonus, ...} Local = local_bonus
   438   | locality_bonus {assum_bonus, ...} Assum = assum_bonus
   439   | locality_bonus {chained_bonus, ...} Chained = chained_bonus
   440 
   441 fun is_odd_const_name s =
   442   s = abs_name orelse String.isPrefix skolem_prefix s orelse
   443   String.isSuffix theory_const_suffix s
   444 
   445 fun fact_weight fudge loc const_tab relevant_consts fact_consts =
   446   case fact_consts |> List.partition (pconst_hyper_mem I relevant_consts)
   447                    ||> filter_out (pconst_hyper_mem swap relevant_consts) of
   448     ([], _) => 0.0
   449   | (rel, irrel) =>
   450     if forall (forall (is_odd_const_name o fst)) [rel, irrel] then
   451       0.0
   452     else
   453       let
   454         val irrel = irrel |> filter_out (pconst_mem swap rel)
   455         val rel_weight =
   456           0.0 |> fold (curry (op +) o rel_pconst_weight fudge const_tab) rel
   457         val irrel_weight =
   458           ~ (locality_bonus fudge loc)
   459           |> fold (curry (op +) o irrel_pconst_weight fudge const_tab) irrel
   460         val res = rel_weight / (rel_weight + irrel_weight)
   461       in if Real.isFinite res then res else 0.0 end
   462 
   463 fun pconsts_in_fact thy is_built_in_const t =
   464   Symtab.fold (fn (s, pss) => fold (cons o pair s) pss)
   465               (pconsts_in_terms thy is_built_in_const true (SOME true) [t]) []
   466 fun pair_consts_fact thy is_built_in_const fudge fact =
   467   case fact |> snd |> theory_const_prop_of fudge
   468             |> pconsts_in_fact thy is_built_in_const of
   469     [] => NONE
   470   | consts => SOME ((fact, consts), NONE)
   471 
   472 type annotated_thm =
   473   (((unit -> string) * locality) * thm) * (string * ptype) list
   474 
   475 fun take_most_relevant max_relevant remaining_max
   476         ({max_imperfect, max_imperfect_exp, ...} : relevance_fudge) 
   477         (candidates : (annotated_thm * real) list) =
   478   let
   479     val max_imperfect =
   480       Real.ceil (Math.pow (max_imperfect,
   481                     Math.pow (Real.fromInt remaining_max
   482                               / Real.fromInt max_relevant, max_imperfect_exp)))
   483     val (perfect, imperfect) =
   484       candidates |> sort (Real.compare o swap o pairself snd)
   485                  |> take_prefix (fn (_, w) => w > 0.99999)
   486     val ((accepts, more_rejects), rejects) =
   487       chop max_imperfect imperfect |>> append perfect |>> chop remaining_max
   488   in
   489     trace_msg (fn () =>
   490         "Actually passed (" ^ Int.toString (length accepts) ^ " of " ^
   491         Int.toString (length candidates) ^ "): " ^
   492         (accepts |> map (fn ((((name, _), _), _), weight) =>
   493                             name () ^ " [" ^ Real.toString weight ^ "]")
   494                  |> commas));
   495     (accepts, more_rejects @ rejects)
   496   end
   497 
   498 fun if_empty_replace_with_locality thy is_built_in_const facts loc tab =
   499   if Symtab.is_empty tab then
   500     pconsts_in_terms thy is_built_in_const false (SOME false)
   501         (map_filter (fn ((_, loc'), th) =>
   502                         if loc' = loc then SOME (prop_of th) else NONE) facts)
   503   else
   504     tab
   505 
   506 fun add_arities is_built_in_const th =
   507   let
   508     fun aux _ _ NONE = NONE
   509       | aux t args (SOME tab) =
   510         case t of
   511           t1 $ t2 => SOME tab |> aux t1 (t2 :: args) |> aux t2 []
   512         | Const (x as (s, _)) =>
   513           (if is_built_in_const x args then
   514              SOME tab
   515            else case Symtab.lookup tab s of
   516              NONE => SOME (Symtab.update (s, length args) tab)
   517            | SOME n => if n = length args then SOME tab else NONE)
   518         | _ => SOME tab
   519   in aux (prop_of th) [] end
   520 
   521 fun needs_ext is_built_in_const facts =
   522   fold (add_arities is_built_in_const o snd) facts (SOME Symtab.empty)
   523   |> is_none
   524 
   525 fun relevance_filter ctxt threshold0 decay max_relevant is_built_in_const
   526         (fudge as {threshold_divisor, ridiculous_threshold, ...})
   527         ({add, del, ...} : relevance_override) facts goal_ts =
   528   let
   529     val thy = ProofContext.theory_of ctxt
   530     val const_tab = fold (count_fact_consts thy fudge) facts Symtab.empty
   531     val goal_const_tab =
   532       pconsts_in_terms thy is_built_in_const false (SOME false) goal_ts
   533       |> fold (if_empty_replace_with_locality thy is_built_in_const facts)
   534               [Chained, Assum, Local]
   535     val add_ths = Attrib.eval_thms ctxt add
   536     val del_ths = Attrib.eval_thms ctxt del
   537     val facts = facts |> filter_out (member Thm.eq_thm del_ths o snd)
   538     fun iter j remaining_max threshold rel_const_tab hopeless hopeful =
   539       let
   540         fun relevant [] _ [] =
   541             (* Nothing has been added this iteration. *)
   542             if j = 0 andalso threshold >= ridiculous_threshold then
   543               (* First iteration? Try again. *)
   544               iter 0 max_relevant (threshold / threshold_divisor) rel_const_tab
   545                    hopeless hopeful
   546             else
   547               []
   548           | relevant candidates rejects [] =
   549             let
   550               val (accepts, more_rejects) =
   551                 take_most_relevant max_relevant remaining_max fudge candidates
   552               val rel_const_tab' =
   553                 rel_const_tab
   554                 |> fold (add_pconst_to_table false) (maps (snd o fst) accepts)
   555               fun is_dirty (c, _) =
   556                 Symtab.lookup rel_const_tab' c <> Symtab.lookup rel_const_tab c
   557               val (hopeful_rejects, hopeless_rejects) =
   558                  (rejects @ hopeless, ([], []))
   559                  |-> fold (fn (ax as (_, consts), old_weight) =>
   560                               if exists is_dirty consts then
   561                                 apfst (cons (ax, NONE))
   562                               else
   563                                 apsnd (cons (ax, old_weight)))
   564                  |>> append (more_rejects
   565                              |> map (fn (ax as (_, consts), old_weight) =>
   566                                         (ax, if exists is_dirty consts then NONE
   567                                              else SOME old_weight)))
   568               val threshold =
   569                 1.0 - (1.0 - threshold)
   570                       * Math.pow (decay, Real.fromInt (length accepts))
   571               val remaining_max = remaining_max - length accepts
   572             in
   573               trace_msg (fn () => "New or updated constants: " ^
   574                   commas (rel_const_tab' |> Symtab.dest
   575                           |> subtract (op =) (rel_const_tab |> Symtab.dest)
   576                           |> map string_for_hyper_pconst));
   577               map (fst o fst) accepts @
   578               (if remaining_max = 0 then
   579                  []
   580                else
   581                  iter (j + 1) remaining_max threshold rel_const_tab'
   582                       hopeless_rejects hopeful_rejects)
   583             end
   584           | relevant candidates rejects
   585                      (((ax as (((_, loc), _), fact_consts)), cached_weight)
   586                       :: hopeful) =
   587             let
   588               val weight =
   589                 case cached_weight of
   590                   SOME w => w
   591                 | NONE => fact_weight fudge loc const_tab rel_const_tab
   592                                       fact_consts
   593             in
   594               if weight >= threshold then
   595                 relevant ((ax, weight) :: candidates) rejects hopeful
   596               else
   597                 relevant candidates ((ax, weight) :: rejects) hopeful
   598             end
   599         in
   600           trace_msg (fn () =>
   601               "ITERATION " ^ string_of_int j ^ ": current threshold: " ^
   602               Real.toString threshold ^ ", constants: " ^
   603               commas (rel_const_tab |> Symtab.dest
   604                       |> filter (curry (op <>) [] o snd)
   605                       |> map string_for_hyper_pconst));
   606           relevant [] [] hopeful
   607         end
   608     fun add_facts ths accepts =
   609       (facts |> filter (member Thm.eq_thm ths o snd)) @
   610       (accepts |> filter_out (member Thm.eq_thm ths o snd))
   611       |> take max_relevant
   612   in
   613     facts |> map_filter (pair_consts_fact thy is_built_in_const fudge)
   614           |> iter 0 max_relevant threshold0 goal_const_tab []
   615           |> not (null add_ths) ? add_facts add_ths
   616           |> (fn accepts =>
   617                  accepts |> needs_ext is_built_in_const accepts
   618                             ? add_facts @{thms ext})
   619           |> tap (fn accepts => trace_msg (fn () =>
   620                       "Total relevant: " ^ Int.toString (length accepts)))
   621   end
   622 
   623 
   624 (***************************************************************)
   625 (* Retrieving and filtering lemmas                             *)
   626 (***************************************************************)
   627 
   628 (*** retrieve lemmas and filter them ***)
   629 
   630 (*Reject theorems with names like "List.filter.filter_list_def" or
   631   "Accessible_Part.acc.defs", as these are definitions arising from packages.*)
   632 fun is_package_def a =
   633   let val names = Long_Name.explode a in
   634     (length names > 2 andalso not (hd names = "local") andalso
   635      String.isSuffix "_def" a) orelse String.isSuffix "_defs" a
   636   end
   637 
   638 fun mk_fact_table f xs =
   639   fold (Termtab.update o `(prop_of o f)) xs Termtab.empty
   640 fun uniquify xs = Termtab.fold (cons o snd) (mk_fact_table snd xs) []
   641 
   642 (* FIXME: put other record thms here, or declare as "no_atp" *)
   643 val multi_base_blacklist =
   644   ["defs", "select_defs", "update_defs", "split", "splits", "split_asm",
   645    "cases", "ext_cases", "eq.simps", "eq.refl", "nchotomy", "case_cong",
   646    "weak_case_cong"]
   647   |> map (prefix ".")
   648 
   649 val max_lambda_nesting = 3
   650 
   651 fun term_has_too_many_lambdas max (t1 $ t2) =
   652     exists (term_has_too_many_lambdas max) [t1, t2]
   653   | term_has_too_many_lambdas max (Abs (_, _, t)) =
   654     max = 0 orelse term_has_too_many_lambdas (max - 1) t
   655   | term_has_too_many_lambdas _ _ = false
   656 
   657 (* Don't count nested lambdas at the level of formulas, since they are
   658    quantifiers. *)
   659 fun formula_has_too_many_lambdas Ts (Abs (_, T, t)) =
   660     formula_has_too_many_lambdas (T :: Ts) t
   661   | formula_has_too_many_lambdas Ts t =
   662     if is_formula_type (fastype_of1 (Ts, t)) then
   663       exists (formula_has_too_many_lambdas Ts) (#2 (strip_comb t))
   664     else
   665       term_has_too_many_lambdas max_lambda_nesting t
   666 
   667 (* The max apply depth of any "metis" call in "Metis_Examples" (on 2007-10-31)
   668    was 11. *)
   669 val max_apply_depth = 15
   670 
   671 fun apply_depth (f $ t) = Int.max (apply_depth f, apply_depth t + 1)
   672   | apply_depth (Abs (_, _, t)) = apply_depth t
   673   | apply_depth _ = 0
   674 
   675 fun is_formula_too_complex t =
   676   apply_depth t > max_apply_depth orelse formula_has_too_many_lambdas [] t
   677 
   678 (* FIXME: Extend to "Meson" and "Metis" *)
   679 val exists_sledgehammer_const =
   680   exists_Const (fn (s, _) => String.isPrefix sledgehammer_prefix s)
   681 
   682 (* FIXME: make more reliable *)
   683 val exists_low_level_class_const =
   684   exists_Const (fn (s, _) =>
   685      String.isSubstring (Long_Name.separator ^ "class" ^ Long_Name.separator) s)
   686 
   687 fun is_metastrange_theorem th =
   688   case head_of (concl_of th) of
   689       Const (a, _) => (a <> @{const_name Trueprop} andalso
   690                        a <> @{const_name "=="})
   691     | _ => false
   692 
   693 fun is_that_fact th =
   694   String.isSuffix (Long_Name.separator ^ Obtain.thatN) (Thm.get_name_hint th)
   695   andalso exists_subterm (fn Free (s, _) => s = Name.skolem Auto_Bind.thesisN
   696                            | _ => false) (prop_of th)
   697 
   698 val type_has_top_sort =
   699   exists_subtype (fn TFree (_, []) => true | TVar (_, []) => true | _ => false)
   700 
   701 (**** Predicates to detect unwanted facts (prolific or likely to cause
   702       unsoundness) ****)
   703 
   704 (* Too general means, positive equality literal with a variable X as one
   705    operand, when X does not occur properly in the other operand. This rules out
   706    clearly inconsistent facts such as X = a | X = b, though it by no means
   707    guarantees soundness. *)
   708 
   709 (* Unwanted equalities are those between a (bound or schematic) variable that
   710    does not properly occur in the second operand. *)
   711 val is_exhaustive_finite =
   712   let
   713     fun is_bad_equal (Var z) t =
   714         not (exists_subterm (fn Var z' => z = z' | _ => false) t)
   715       | is_bad_equal (Bound j) t = not (loose_bvar1 (t, j))
   716       | is_bad_equal _ _ = false
   717     fun do_equals t1 t2 = is_bad_equal t1 t2 orelse is_bad_equal t2 t1
   718     fun do_formula pos t =
   719       case (pos, t) of
   720         (_, @{const Trueprop} $ t1) => do_formula pos t1
   721       | (true, Const (@{const_name all}, _) $ Abs (_, _, t')) =>
   722         do_formula pos t'
   723       | (true, Const (@{const_name All}, _) $ Abs (_, _, t')) =>
   724         do_formula pos t'
   725       | (false, Const (@{const_name Ex}, _) $ Abs (_, _, t')) =>
   726         do_formula pos t'
   727       | (_, @{const "==>"} $ t1 $ t2) =>
   728         do_formula (not pos) t1 andalso
   729         (t2 = @{prop False} orelse do_formula pos t2)
   730       | (_, @{const HOL.implies} $ t1 $ t2) =>
   731         do_formula (not pos) t1 andalso
   732         (t2 = @{const False} orelse do_formula pos t2)
   733       | (_, @{const Not} $ t1) => do_formula (not pos) t1
   734       | (true, @{const HOL.disj} $ t1 $ t2) => forall (do_formula pos) [t1, t2]
   735       | (false, @{const HOL.conj} $ t1 $ t2) => forall (do_formula pos) [t1, t2]
   736       | (true, Const (@{const_name HOL.eq}, _) $ t1 $ t2) => do_equals t1 t2
   737       | (true, Const (@{const_name "=="}, _) $ t1 $ t2) => do_equals t1 t2
   738       | _ => false
   739   in do_formula true end
   740 
   741 fun has_bound_or_var_of_type tycons =
   742   exists_subterm (fn Var (_, Type (s, _)) => member (op =) tycons s
   743                    | Abs (_, Type (s, _), _) => member (op =) tycons s
   744                    | _ => false)
   745 
   746 (* Facts are forbidden to contain variables of these types. The typical reason
   747    is that they lead to unsoundness. Note that "unit" satisfies numerous
   748    equations like "?x = ()". The resulting clauses will have no type constraint,
   749    yielding false proofs. Even "bool" leads to many unsound proofs, though only
   750    for higher-order problems. *)
   751 val dangerous_types = [@{type_name unit}, @{type_name bool}, @{type_name prop}];
   752 
   753 (* Facts containing variables of type "unit" or "bool" or of the form
   754    "ALL x. x = A | x = B | x = C" are likely to lead to unsound proofs if types
   755    are omitted. *)
   756 fun is_dangerous_term no_dangerous_types t =
   757   not no_dangerous_types andalso
   758   let val t = transform_elim_term t in
   759     has_bound_or_var_of_type dangerous_types t orelse
   760     is_exhaustive_finite t
   761   end
   762 
   763 fun is_theorem_bad_for_atps no_dangerous_types thm =
   764   let val t = prop_of thm in
   765     is_formula_too_complex t orelse exists_type type_has_top_sort t orelse
   766     is_dangerous_term no_dangerous_types t orelse
   767     exists_sledgehammer_const t orelse exists_low_level_class_const t orelse
   768     is_metastrange_theorem thm orelse is_that_fact thm
   769   end
   770 
   771 fun clasimpset_rules_of ctxt =
   772   let
   773     val {safeIs, safeEs, hazIs, hazEs, ...} = ctxt |> claset_of |> rep_cs
   774     val intros = safeIs @ hazIs
   775     val elims = map Classical.classical_rule (safeEs @ hazEs)
   776     val simps = ctxt |> simpset_of |> dest_ss |> #simps |> map snd
   777   in (mk_fact_table I intros, mk_fact_table I elims, mk_fact_table I simps) end
   778 
   779 fun all_facts ctxt reserved no_dangerous_types
   780               ({intro_bonus, elim_bonus, simp_bonus, ...} : relevance_fudge)
   781               add_ths chained_ths =
   782   let
   783     val thy = ProofContext.theory_of ctxt
   784     val global_facts = Global_Theory.facts_of thy
   785     val local_facts = ProofContext.facts_of ctxt
   786     val named_locals = local_facts |> Facts.dest_static []
   787     val assms = Assumption.all_assms_of ctxt
   788     fun is_assum th = exists (fn ct => prop_of th aconv term_of ct) assms
   789     val is_chained = member Thm.eq_thm chained_ths
   790     val (intros, elims, simps) =
   791       if exists (curry (op <) 0.0) [intro_bonus, elim_bonus, simp_bonus] then
   792         clasimpset_rules_of ctxt
   793       else
   794         (Termtab.empty, Termtab.empty, Termtab.empty)
   795     fun is_good_unnamed_local th =
   796       not (Thm.has_name_hint th) andalso
   797       forall (fn (_, ths) => not (member Thm.eq_thm ths th)) named_locals
   798     val unnamed_locals =
   799       union Thm.eq_thm (Facts.props local_facts) chained_ths
   800       |> filter is_good_unnamed_local |> map (pair "" o single)
   801     val full_space =
   802       Name_Space.merge (Facts.space_of global_facts, Facts.space_of local_facts)
   803     fun add_facts global foldx facts =
   804       foldx (fn (name0, ths) =>
   805         if name0 <> "" andalso
   806            forall (not o member Thm.eq_thm add_ths) ths andalso
   807            (Facts.is_concealed facts name0 orelse
   808             (respect_no_atp andalso is_package_def name0) orelse
   809             exists (fn s => String.isSuffix s name0) multi_base_blacklist orelse
   810             String.isSuffix "_def_raw" (* FIXME: crude hack *) name0) then
   811           I
   812         else
   813           let
   814             val multi = length ths > 1
   815             val backquotify =
   816               enclose "`" "`" o string_for_term ctxt o close_form o prop_of
   817             fun check_thms a =
   818               case try (ProofContext.get_thms ctxt) a of
   819                 NONE => false
   820               | SOME ths' => Thm.eq_thms (ths, ths')
   821           in
   822             pair 1
   823             #> fold (fn th => fn (j, rest) =>
   824                  (j + 1,
   825                   if is_theorem_bad_for_atps no_dangerous_types th andalso
   826                      not (member Thm.eq_thm add_ths th) then
   827                     rest
   828                   else
   829                     (((fn () =>
   830                           if name0 = "" then
   831                             th |> backquotify
   832                           else
   833                             let
   834                               val name1 = Facts.extern facts name0
   835                               val name2 = Name_Space.extern full_space name0
   836                             in
   837                               case find_first check_thms [name1, name2, name0] of
   838                                 SOME name => make_name reserved multi j name
   839                               | NONE => ""
   840                             end),
   841                       let val t = prop_of th in
   842                         if is_chained th then
   843                           Chained
   844                         else if global then
   845                           if Termtab.defined intros t then Intro
   846                           else if Termtab.defined elims t then Elim
   847                           else if Termtab.defined simps t then Simp
   848                           else General
   849                         else
   850                           if is_assum th then Assum else Local
   851                       end),
   852                       (multi, th)) :: rest)) ths
   853             #> snd
   854           end)
   855   in
   856     [] |> add_facts false fold local_facts (unnamed_locals @ named_locals)
   857        |> add_facts true Facts.fold_static global_facts global_facts
   858   end
   859 
   860 (* The single-name theorems go after the multiple-name ones, so that single
   861    names are preferred when both are available. *)
   862 fun rearrange_facts ctxt respect_no_atp =
   863   List.partition (fst o snd) #> op @ #> map (apsnd snd)
   864   #> respect_no_atp ? filter_out (No_ATPs.member ctxt o snd)
   865 
   866 fun external_frees t =
   867   [] |> Term.add_frees t |> filter_out (can Name.dest_internal o fst)
   868 
   869 fun relevant_facts ctxt no_dangerous_types (threshold0, threshold1)
   870                    max_relevant is_built_in_const fudge
   871                    (override as {add, only, ...}) chained_ths hyp_ts concl_t =
   872   let
   873     val thy = ProofContext.theory_of ctxt
   874     val decay = Math.pow ((1.0 - threshold1) / (1.0 - threshold0),
   875                           1.0 / Real.fromInt (max_relevant + 1))
   876     val add_ths = Attrib.eval_thms ctxt add
   877     val reserved = reserved_isar_keyword_table ()
   878     val ind_stmt =
   879       Logic.list_implies (hyp_ts |> filter_out (null o external_frees), concl_t)
   880       |> atomize_term
   881     val ind_stmt_xs = external_frees ind_stmt
   882     val facts =
   883       (if only then
   884          maps (map (fn ((name, loc), th) => ((K name, loc), (true, th)))
   885                o fact_from_ref ctxt reserved chained_ths) add
   886        else
   887          all_facts ctxt reserved no_dangerous_types fudge add_ths chained_ths)
   888       |> maps (instantiate_if_induct_rule ctxt ind_stmt ind_stmt_xs)
   889       |> rearrange_facts ctxt (respect_no_atp andalso not only)
   890       |> uniquify
   891   in
   892     trace_msg (fn () => "Considering " ^ Int.toString (length facts) ^
   893                         " facts");
   894     (if only orelse threshold1 < 0.0 then
   895        facts
   896      else if threshold0 > 1.0 orelse threshold0 > threshold1 orelse
   897              max_relevant = 0 then
   898        []
   899      else
   900        ((concl_t |> theory_constify fudge (Context.theory_name thy)) :: hyp_ts)
   901        |> relevance_filter ctxt threshold0 decay max_relevant is_built_in_const
   902                            fudge override facts)
   903     |> map (apfst (apfst (fn f => f ())))
   904   end
   905 
   906 end;