src/Pure/Isar/find_theorems.ML
author kleing
Fri Nov 28 11:37:20 2008 +0100 (2008-11-28)
changeset 28900 53fd5cc685b4
parent 28211 07cfaa1a9e12
child 29269 5c25a2012975
permissions -rw-r--r--
FindTheorems performance improvements (from Timothy Bourke)

* Prefilter the list of theorems based on the constants and
free variables in Pattern search terms, before calling
Pattern.matches_subterm.
* Apply filters successively rather than running each and
then finding the intersection.
* Show the time taken to run a query.
wenzelm@16033
     1
(*  Title:      Pure/Isar/find_theorems.ML
wenzelm@16033
     2
    ID:         $Id$
wenzelm@26283
     3
    Author:     Rafal Kolanski and Gerwin Klein, NICTA
wenzelm@16033
     4
wenzelm@16033
     5
Retrieve theorems from proof context.
wenzelm@16033
     6
*)
wenzelm@16033
     7
wenzelm@16033
     8
signature FIND_THEOREMS =
wenzelm@16033
     9
sig
wenzelm@25992
    10
  val limit: int ref
wenzelm@16036
    11
  datatype 'term criterion =
kleing@16074
    12
    Name of string | Intro | Elim | Dest | Simp of 'term | Pattern of 'term
kleing@22340
    13
  val print_theorems: Proof.context -> term option -> int option -> bool ->
wenzelm@16036
    14
    (bool * string criterion) list -> unit
wenzelm@16033
    15
end;
wenzelm@16033
    16
wenzelm@16033
    17
structure FindTheorems: FIND_THEOREMS =
wenzelm@16033
    18
struct
wenzelm@16033
    19
wenzelm@16033
    20
(** search criteria **)
wenzelm@16033
    21
wenzelm@16036
    22
datatype 'term criterion =
kleing@16074
    23
  Name of string | Intro | Elim | Dest | Simp of 'term | Pattern of 'term;
wenzelm@16036
    24
wenzelm@16036
    25
fun read_criterion _ (Name name) = Name name
wenzelm@16036
    26
  | read_criterion _ Intro = Intro
wenzelm@16036
    27
  | read_criterion _ Elim = Elim
wenzelm@16036
    28
  | read_criterion _ Dest = Dest
wenzelm@24683
    29
  | read_criterion ctxt (Simp str) = Simp (ProofContext.read_term_pattern ctxt str)
wenzelm@24683
    30
  | read_criterion ctxt (Pattern str) = Pattern (ProofContext.read_term_pattern ctxt str);
wenzelm@16033
    31
wenzelm@16036
    32
fun pretty_criterion ctxt (b, c) =
wenzelm@16036
    33
  let
wenzelm@16036
    34
    fun prfx s = if b then s else "-" ^ s;
wenzelm@16036
    35
  in
wenzelm@16036
    36
    (case c of
wenzelm@16036
    37
      Name name => Pretty.str (prfx "name: " ^ quote name)
wenzelm@16036
    38
    | Intro => Pretty.str (prfx "intro")
wenzelm@16036
    39
    | Elim => Pretty.str (prfx "elim")
wenzelm@16036
    40
    | Dest => Pretty.str (prfx "dest")
kleing@16088
    41
    | Simp pat => Pretty.block [Pretty.str (prfx "simp:"), Pretty.brk 1,
wenzelm@24920
    42
        Pretty.quote (Syntax.pretty_term ctxt (Term.show_dummy_patterns pat))]
wenzelm@16036
    43
    | Pattern pat => Pretty.enclose (prfx " \"") "\""
wenzelm@24920
    44
        [Syntax.pretty_term ctxt (Term.show_dummy_patterns pat)])
wenzelm@16036
    45
  end;
wenzelm@16033
    46
wenzelm@16033
    47
(** search criterion filters **)
wenzelm@16033
    48
kleing@16895
    49
(*generated filters are to be of the form
wenzelm@26336
    50
  input: (Facts.ref * thm)
wenzelm@17106
    51
  output: (p:int, s:int) option, where
kleing@16895
    52
    NONE indicates no match
wenzelm@17106
    53
    p is the primary sorting criterion
kleing@16895
    54
      (eg. number of assumptions in the theorem)
kleing@16895
    55
    s is the secondary sorting criterion
kleing@16895
    56
      (eg. size of the substitution for intro, elim and dest)
kleing@16895
    57
  when applying a set of filters to a thm, fold results in:
kleing@16895
    58
    (biggest p, sum of all s)
wenzelm@17106
    59
  currently p and s only matter for intro, elim, dest and simp filters,
wenzelm@17106
    60
  otherwise the default ordering is used.
kleing@16895
    61
*)
kleing@16895
    62
kleing@16088
    63
kleing@16088
    64
(* matching theorems *)
wenzelm@17106
    65
wenzelm@17205
    66
fun is_nontrivial thy = Term.is_Const o Term.head_of o ObjectLogic.drop_judgment thy;
kleing@16088
    67
kleing@16964
    68
(*extract terms from term_src, refine them to the parts that concern us,
kleing@16964
    69
  if po try match them against obj else vice versa.
kleing@16964
    70
  trivial matches are ignored.
kleing@16964
    71
  returns: smallest substitution size*)
kleing@16964
    72
fun is_matching_thm (extract_terms, refine_term) ctxt po obj term_src =
kleing@16088
    73
  let
wenzelm@17106
    74
    val thy = ProofContext.theory_of ctxt;
kleing@16088
    75
wenzelm@16486
    76
    fun matches pat =
wenzelm@17106
    77
      is_nontrivial thy pat andalso
wenzelm@17205
    78
      Pattern.matches thy (if po then (pat, obj) else (obj, pat));
kleing@16895
    79
kleing@16895
    80
    fun substsize pat =
wenzelm@18184
    81
      let val (_, subst) =
wenzelm@18184
    82
        Pattern.match thy (if po then (pat, obj) else (obj, pat)) (Vartab.empty, Vartab.empty)
wenzelm@17205
    83
      in Vartab.fold (fn (_, (_, t)) => fn n => size_of_term t + n) subst 0 end;
kleing@16088
    84
kleing@16895
    85
    fun bestmatch [] = NONE
wenzelm@17205
    86
     |  bestmatch xs = SOME (foldr1 Int.min xs);
kleing@16895
    87
kleing@16964
    88
    val match_thm = matches o refine_term;
wenzelm@16486
    89
  in
wenzelm@26283
    90
    map (substsize o refine_term) (filter match_thm (extract_terms term_src))
wenzelm@26283
    91
    |> bestmatch
kleing@16088
    92
  end;
kleing@16088
    93
kleing@16088
    94
wenzelm@16033
    95
(* filter_name *)
wenzelm@16033
    96
wenzelm@17755
    97
fun match_string pat str =
wenzelm@17755
    98
  let
wenzelm@17755
    99
    fun match [] _ = true
wenzelm@17755
   100
      | match (p :: ps) s =
wenzelm@17755
   101
          size p <= size s andalso
wenzelm@17755
   102
            (case try (unprefix p) s of
wenzelm@17755
   103
              SOME s' => match ps s'
wenzelm@17755
   104
            | NONE => match (p :: ps) (String.substring (s, 1, size s - 1)));
wenzelm@17755
   105
  in match (space_explode "*" pat) str end;
wenzelm@16033
   106
wenzelm@17106
   107
fun filter_name str_pat (thmref, _) =
wenzelm@26336
   108
  if match_string str_pat (Facts.name_of_ref thmref)
wenzelm@17205
   109
  then SOME (0, 0) else NONE;
wenzelm@16033
   110
wenzelm@16033
   111
wenzelm@16036
   112
(* filter intro/elim/dest rules *)
wenzelm@16033
   113
wenzelm@17205
   114
fun filter_dest ctxt goal (_, thm) =
wenzelm@16033
   115
  let
kleing@16964
   116
    val extract_dest =
wenzelm@17205
   117
     (fn thm => if Thm.no_prems thm then [] else [Thm.full_prop_of thm],
wenzelm@16033
   118
      hd o Logic.strip_imp_prems);
wenzelm@16033
   119
    val prems = Logic.prems_of_goal goal 1;
kleing@16895
   120
kleing@16964
   121
    fun try_subst prem = is_matching_thm extract_dest ctxt true prem thm;
wenzelm@19482
   122
    val successful = prems |> map_filter try_subst;
wenzelm@16033
   123
  in
kleing@16895
   124
    (*if possible, keep best substitution (one with smallest size)*)
wenzelm@17106
   125
    (*dest rules always have assumptions, so a dest with one
kleing@16895
   126
      assumption is as good as an intro rule with none*)
wenzelm@17205
   127
    if not (null successful)
wenzelm@17205
   128
    then SOME (Thm.nprems_of thm - 1, foldr1 Int.min successful) else NONE
wenzelm@16033
   129
  end;
wenzelm@16033
   130
wenzelm@17205
   131
fun filter_intro ctxt goal (_, thm) =
wenzelm@16033
   132
  let
wenzelm@17205
   133
    val extract_intro = (single o Thm.full_prop_of, Logic.strip_imp_concl);
wenzelm@16036
   134
    val concl = Logic.concl_of_goal goal 1;
kleing@16964
   135
    val ss = is_matching_thm extract_intro ctxt true concl thm;
wenzelm@16033
   136
  in
wenzelm@18939
   137
    if is_some ss then SOME (Thm.nprems_of thm, the ss) else NONE
wenzelm@16033
   138
  end;
wenzelm@16033
   139
wenzelm@17205
   140
fun filter_elim ctxt goal (_, thm) =
kleing@16964
   141
  if not (Thm.no_prems thm) then
kleing@16964
   142
    let
wenzelm@17205
   143
      val rule = Thm.full_prop_of thm;
kleing@16964
   144
      val prems = Logic.prems_of_goal goal 1;
kleing@16964
   145
      val goal_concl = Logic.concl_of_goal goal 1;
wenzelm@26283
   146
      val rule_mp = hd (Logic.strip_imp_prems rule);
kleing@16964
   147
      val rule_concl = Logic.strip_imp_concl rule;
wenzelm@26283
   148
      fun combine t1 t2 = Const ("*combine*", dummyT --> dummyT) $ (t1 $ t2);
kleing@16964
   149
      val rule_tree = combine rule_mp rule_concl;
wenzelm@26283
   150
      fun goal_tree prem = combine prem goal_concl;
wenzelm@17106
   151
      fun try_subst prem =
kleing@16964
   152
        is_matching_thm (single, I) ctxt true (goal_tree prem) rule_tree;
wenzelm@19482
   153
      val successful = prems |> map_filter try_subst;
kleing@16964
   154
    in
wenzelm@17106
   155
    (*elim rules always have assumptions, so an elim with one
kleing@16964
   156
      assumption is as good as an intro rule with none*)
wenzelm@17106
   157
      if is_nontrivial (ProofContext.theory_of ctxt) (Thm.major_prem_of thm)
wenzelm@17205
   158
        andalso not (null successful)
wenzelm@17205
   159
      then SOME (Thm.nprems_of thm - 1, foldr1 Int.min successful) else NONE
kleing@16964
   160
    end
kleing@16964
   161
  else NONE
wenzelm@16036
   162
wenzelm@16033
   163
kleing@16074
   164
(* filter_simp *)
wenzelm@16033
   165
wenzelm@17205
   166
fun filter_simp ctxt t (_, thm) =
wenzelm@16033
   167
  let
wenzelm@16033
   168
    val (_, {mk_rews = {mk, ...}, ...}) =
wenzelm@16033
   169
      MetaSimplifier.rep_ss (Simplifier.local_simpset_of ctxt);
wenzelm@17106
   170
    val extract_simp =
wenzelm@17205
   171
      (map Thm.full_prop_of o mk, #1 o Logic.dest_equals o Logic.strip_imp_concl);
kleing@16964
   172
    val ss = is_matching_thm extract_simp ctxt false t thm
wenzelm@17106
   173
  in
wenzelm@18939
   174
    if is_some ss then SOME (Thm.nprems_of thm, the ss) else NONE
kleing@16964
   175
  end;
wenzelm@16033
   176
wenzelm@16033
   177
wenzelm@16033
   178
(* filter_pattern *)
wenzelm@16033
   179
kleing@28900
   180
fun get_names (_, thm) = let
kleing@28900
   181
    val t = Thm.full_prop_of thm;
kleing@28900
   182
  in (term_consts t) union (add_term_free_names (t, [])) end;
wenzelm@17205
   183
kleing@28900
   184
fun add_pat_names (t, cs) =
kleing@28900
   185
      case strip_comb t of
kleing@28900
   186
          (Const (c, _), args) => foldl add_pat_names (insert (op =) c cs) args
kleing@28900
   187
        | (Free (c, _), args) => foldl add_pat_names (insert (op =) c cs) args
kleing@28900
   188
        | (Abs (_, _, t), _) => add_pat_names (t, cs)
kleing@28900
   189
        | _ => cs;
kleing@28900
   190
    (* Only include constants and frees that cannot be thrown away.
kleing@28900
   191
       for example, from "(% x y z. y + 1) 7 8 9" give [1].
kleing@28900
   192
       The result [1, 8] would be more accurate, but only a
kleing@28900
   193
       sound approximation is required and variables must
kleing@28900
   194
       be ignored: e.g. "_ 7 8 9". *)
kleing@28900
   195
kleing@28900
   196
fun filter_pattern ctxt pat = let
kleing@28900
   197
    val pat_consts = add_pat_names (pat, []);
kleing@28900
   198
kleing@28900
   199
    fun check (t, NONE) = check (t, SOME (get_names t))
kleing@28900
   200
      | check ((_, thm), c as SOME thm_consts) =
kleing@28900
   201
          (if pat_consts subset_string thm_consts
kleing@28900
   202
              andalso (Pattern.matches_subterm (ProofContext.theory_of ctxt)
kleing@28900
   203
                                               (pat, Thm.full_prop_of thm))
kleing@28900
   204
           then SOME (0, 0) else NONE, c);
kleing@28900
   205
  in check end;
wenzelm@16033
   206
wenzelm@16033
   207
(* interpret criteria as filters *)
wenzelm@16033
   208
wenzelm@16036
   209
local
wenzelm@16036
   210
wenzelm@16036
   211
fun err_no_goal c =
wenzelm@16036
   212
  error ("Current goal required for " ^ c ^ " search criterion");
wenzelm@16036
   213
kleing@28900
   214
fun filter_crit _ _ (Name name) = apfst (filter_name name)
wenzelm@16036
   215
  | filter_crit _ NONE Intro = err_no_goal "intro"
wenzelm@16036
   216
  | filter_crit _ NONE Elim = err_no_goal "elim"
wenzelm@16036
   217
  | filter_crit _ NONE Dest = err_no_goal "dest"
kleing@28900
   218
  | filter_crit ctxt (SOME goal) Intro = apfst (filter_intro ctxt goal)
kleing@28900
   219
  | filter_crit ctxt (SOME goal) Elim = apfst (filter_elim ctxt goal)
kleing@28900
   220
  | filter_crit ctxt (SOME goal) Dest = apfst (filter_dest ctxt goal)
kleing@28900
   221
  | filter_crit ctxt _ (Simp pat) = apfst (filter_simp ctxt pat)
kleing@16088
   222
  | filter_crit ctxt _ (Pattern pat) = filter_pattern ctxt pat;
wenzelm@16036
   223
wenzelm@19502
   224
fun opt_not x = if is_some x then NONE else SOME (0, 0);
kleing@16895
   225
wenzelm@17756
   226
fun opt_add (SOME (a, x)) (SOME (b, y)) = SOME (Int.max (a, b), x + y : int)
wenzelm@26283
   227
  | opt_add _ _ = NONE;
kleing@16895
   228
kleing@28900
   229
fun app_filters thm = let
kleing@28900
   230
    fun app (NONE, _, _) = NONE
kleing@28900
   231
      | app (SOME v, consts, []) = SOME (v, thm)
kleing@28900
   232
      | app (r, consts, f::fs) = let val (r', consts') = f (thm, consts)
kleing@28900
   233
                                 in app (opt_add r r', consts', fs) end;
kleing@28900
   234
  in app end;
kleing@28900
   235
wenzelm@16036
   236
in
wenzelm@16033
   237
wenzelm@16033
   238
fun filter_criterion ctxt opt_goal (b, c) =
kleing@28900
   239
  (if b then I else (apfst opt_not)) o filter_crit ctxt opt_goal c;
kleing@16895
   240
kleing@16895
   241
fun all_filters filters thms =
kleing@16895
   242
  let
kleing@28900
   243
    fun eval_filters thm = app_filters thm (SOME (0, 0), NONE, filters);
wenzelm@16033
   244
kleing@16895
   245
    (*filters return: (number of assumptions, substitution size) option, so
kleing@16964
   246
      sort (desc. in both cases) according to number of assumptions first,
kleing@16895
   247
      then by the substitution size*)
wenzelm@17205
   248
    fun thm_ord (((p0, s0), _), ((p1, s1), _)) =
wenzelm@17205
   249
      prod_ord int_ord int_ord ((p1, s1), (p0, s0));
kleing@28900
   250
  in map_filter eval_filters thms |> sort thm_ord |> map #2 end;
wenzelm@16033
   251
wenzelm@16036
   252
end;
wenzelm@16036
   253
wenzelm@16033
   254
kleing@22414
   255
(* removing duplicates, preferring nicer names, roughly n log n *)
kleing@22340
   256
wenzelm@25226
   257
local
wenzelm@25226
   258
huffman@27486
   259
val index_ord = option_ord (K EQUAL);
wenzelm@25226
   260
val hidden_ord = bool_ord o pairself NameSpace.is_hidden;
wenzelm@25226
   261
val qual_ord = int_ord o pairself (length o NameSpace.explode);
wenzelm@25226
   262
val txt_ord = int_ord o pairself size;
wenzelm@25226
   263
huffman@27486
   264
fun nicer_name (x, i) (y, j) =
huffman@27486
   265
  (case hidden_ord (x, y) of EQUAL =>
huffman@27486
   266
    (case index_ord (i, j) of EQUAL =>
huffman@27486
   267
      (case qual_ord (x, y) of EQUAL => txt_ord (x, y) | ord => ord)
huffman@27486
   268
    | ord => ord)
wenzelm@25226
   269
  | ord => ord) <> GREATER;
wenzelm@25226
   270
huffman@27486
   271
fun nicer (Facts.Named ((x, _), i)) (Facts.Named ((y, _), j)) =
huffman@27486
   272
      nicer_name (x, i) (y, j)
wenzelm@26336
   273
  | nicer (Facts.Fact _) (Facts.Named _) = true
wenzelm@26336
   274
  | nicer (Facts.Named _) (Facts.Fact _) = false;
wenzelm@25226
   275
wenzelm@26336
   276
fun rem_cdups xs =
wenzelm@26336
   277
  let
wenzelm@26336
   278
    fun rem_c rev_seen [] = rev rev_seen
wenzelm@26336
   279
      | rem_c rev_seen [x] = rem_c (x :: rev_seen) []
wenzelm@26336
   280
      | rem_c rev_seen ((x as ((n, t), _)) :: (y as ((n', t'), _)) :: xs) =
wenzelm@26336
   281
        if Thm.eq_thm_prop (t, t')
wenzelm@26336
   282
        then rem_c rev_seen ((if nicer n n' then x else y) :: xs)
wenzelm@26336
   283
        else rem_c (x :: rev_seen) (y :: xs)
wenzelm@26336
   284
  in rem_c [] xs end;
wenzelm@25226
   285
wenzelm@26336
   286
in
wenzelm@25226
   287
kleing@22340
   288
fun rem_thm_dups xs =
wenzelm@26336
   289
  xs ~~ (1 upto length xs)
wenzelm@26336
   290
  |> sort (Term.fast_term_ord o pairself (Thm.prop_of o #2 o #1))
wenzelm@26336
   291
  |> rem_cdups
wenzelm@26336
   292
  |> sort (int_ord o pairself #2)
wenzelm@26336
   293
  |> map #1;
kleing@22340
   294
wenzelm@26336
   295
end;
kleing@22340
   296
kleing@22340
   297
wenzelm@16033
   298
(* print_theorems *)
wenzelm@16033
   299
wenzelm@26283
   300
fun all_facts_of ctxt =
wenzelm@26336
   301
  maps Facts.selections
wenzelm@27173
   302
   (Facts.dest_static [] (PureThy.facts_of (ProofContext.theory_of ctxt)) @
wenzelm@27173
   303
    Facts.dest_static [] (ProofContext.facts_of ctxt));
wenzelm@17972
   304
wenzelm@25992
   305
val limit = ref 40;
wenzelm@25992
   306
kleing@22340
   307
fun print_theorems ctxt opt_goal opt_limit rem_dups raw_criteria =
wenzelm@16033
   308
  let
kleing@28900
   309
    val start = start_timing ();
wenzelm@16036
   310
    val criteria = map (apsnd (read_criterion ctxt)) raw_criteria;
wenzelm@16036
   311
    val filters = map (filter_criterion ctxt opt_goal) criteria;
wenzelm@16033
   312
wenzelm@26283
   313
    val raw_matches = all_filters filters (all_facts_of ctxt);
kleing@28900
   314
wenzelm@22360
   315
    val matches =
kleing@22414
   316
      if rem_dups
wenzelm@22360
   317
      then rem_thm_dups raw_matches
wenzelm@22360
   318
      else raw_matches;
kleing@22340
   319
wenzelm@16033
   320
    val len = length matches;
wenzelm@25992
   321
    val lim = the_default (! limit) opt_limit;
wenzelm@25992
   322
    val thms = Library.drop (len - lim, matches);
wenzelm@16033
   323
kleing@28900
   324
    val end_msg = " in " ^
kleing@28900
   325
                  (List.nth (String.tokens Char.isSpace (end_timing start), 3))
kleing@28900
   326
                  ^ " secs"
kleing@28900
   327
wenzelm@28211
   328
    fun prt_fact (thmref, thm) = Pretty.block
wenzelm@28211
   329
      [Pretty.str (Facts.string_of_ref thmref), Pretty.str ":", Pretty.brk 1,
wenzelm@28211
   330
        ProofContext.pretty_thm ctxt thm];
wenzelm@16033
   331
  in
kleing@28900
   332
    Pretty.big_list "searched for:" (map (pretty_criterion ctxt) criteria)
kleing@28900
   333
      :: Pretty.str "" ::
kleing@28900
   334
     (if null thms then [Pretty.str ("nothing found" ^ end_msg)]
wenzelm@16033
   335
      else
wenzelm@16036
   336
        [Pretty.str ("found " ^ string_of_int len ^ " theorems" ^
kleing@28900
   337
          (if len <= lim then ""
kleing@28900
   338
           else " (" ^ string_of_int lim ^ " displayed)")
kleing@28900
   339
           ^ end_msg ^ ":"), Pretty.str ""] @
kleing@22340
   340
        map prt_fact thms)
wenzelm@16033
   341
    |> Pretty.chunks |> Pretty.writeln
wenzelm@16033
   342
  end;
wenzelm@16033
   343
wenzelm@16033
   344
end;