src/HOL/TPTP/mash_export.ML
author blanchet
Thu, 26 Jun 2014 20:49:34 +0200
changeset 57388 a8eaa0e7d439
parent 57352 9801e9fa9270
child 57406 e844dcf57deb
permissions -rw-r--r--
tuning
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
48234
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
     1
(*  Title:      HOL/TPTP/mash_export.ML
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
     2
    Author:     Jasmin Blanchette, TU Muenchen
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
     3
    Copyright   2012
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
     4
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
     5
Export Isabelle theory information for MaSh (Machine-learning for Sledgehammer).
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
     6
*)
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
     7
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
     8
signature MASH_EXPORT =
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
     9
sig
55201
1ee776da8da7 renamed ML file
blanchet
parents: 54695
diff changeset
    10
  type params = Sledgehammer_Prover.params
48235
40655464a93b MaSh evaluation driver
blanchet
parents: 48234
diff changeset
    11
57121
426ab5fabcae got rid of 'linearize' option
blanchet
parents: 57120
diff changeset
    12
  val generate_accessibility : Proof.context -> theory list -> string -> unit
55212
blanchet
parents: 55201
diff changeset
    13
  val generate_features : Proof.context -> theory list -> string -> unit
57121
426ab5fabcae got rid of 'linearize' option
blanchet
parents: 57120
diff changeset
    14
  val generate_isar_dependencies : Proof.context -> int * int option -> theory list -> string ->
426ab5fabcae got rid of 'linearize' option
blanchet
parents: 57120
diff changeset
    15
    unit
426ab5fabcae got rid of 'linearize' option
blanchet
parents: 57120
diff changeset
    16
  val generate_prover_dependencies : Proof.context -> params -> int * int option -> theory list ->
55212
blanchet
parents: 55201
diff changeset
    17
    string -> unit
blanchet
parents: 55201
diff changeset
    18
  val generate_isar_commands : Proof.context -> string -> (int * int option) * int -> theory list ->
57121
426ab5fabcae got rid of 'linearize' option
blanchet
parents: 57120
diff changeset
    19
    int -> string -> unit
55212
blanchet
parents: 55201
diff changeset
    20
  val generate_prover_commands : Proof.context -> params -> (int * int option) * int ->
57121
426ab5fabcae got rid of 'linearize' option
blanchet
parents: 57120
diff changeset
    21
    theory list -> int -> string -> unit
55212
blanchet
parents: 55201
diff changeset
    22
  val generate_mepo_suggestions : Proof.context -> params -> (int * int option) * int ->
57121
426ab5fabcae got rid of 'linearize' option
blanchet
parents: 57120
diff changeset
    23
    theory list -> int -> string -> unit
57120
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
    24
  val generate_mash_suggestions : Proof.context -> params -> (int * int option) * int ->
57121
426ab5fabcae got rid of 'linearize' option
blanchet
parents: 57120
diff changeset
    25
    theory list -> int -> string -> unit
50814
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
    26
  val generate_mesh_suggestions : int -> string -> string -> string -> unit
48234
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
    27
end;
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
    28
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
    29
structure MaSh_Export : MASH_EXPORT =
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
    30
struct
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
    31
50485
3c6ac2da2f45 merge aliased theorems in MaSh dependencies, modulo symmetry of equality
blanchet
parents: 50484
diff changeset
    32
open Sledgehammer_Fact
55212
blanchet
parents: 55201
diff changeset
    33
open Sledgehammer_Prover_ATP
48381
1b7d798460bb renamed ML structures
blanchet
parents: 48379
diff changeset
    34
open Sledgehammer_MePo
1b7d798460bb renamed ML structures
blanchet
parents: 48379
diff changeset
    35
open Sledgehammer_MaSh
48245
854a47677335 generate ATP dependencies
blanchet
parents: 48242
diff changeset
    36
50559
89c0d2f13cca MaSh exporter can now export subsets of the facts, as consecutive ranges
blanchet
parents: 50523
diff changeset
    37
fun in_range (from, to) j =
89c0d2f13cca MaSh exporter can now export subsets of the facts, as consecutive ranges
blanchet
parents: 50523
diff changeset
    38
  j >= from andalso (to = NONE orelse j <= the to)
89c0d2f13cca MaSh exporter can now export subsets of the facts, as consecutive ranges
blanchet
parents: 50523
diff changeset
    39
50349
b79803ee14f3 generalized MaSh exporter to sets of theories
blanchet
parents: 48667
diff changeset
    40
fun has_thm_thy th thy =
48316
252f45c04042 drastic overhaul of MaSh data structures + fixed a few performance issues
blanchet
parents: 48315
diff changeset
    41
  Context.theory_name thy = Context.theory_name (theory_of_thm th)
252f45c04042 drastic overhaul of MaSh data structures + fixed a few performance issues
blanchet
parents: 48315
diff changeset
    42
50349
b79803ee14f3 generalized MaSh exporter to sets of theories
blanchet
parents: 48667
diff changeset
    43
fun has_thys thys th = exists (has_thm_thy th) thys
b79803ee14f3 generalized MaSh exporter to sets of theories
blanchet
parents: 48667
diff changeset
    44
b79803ee14f3 generalized MaSh exporter to sets of theories
blanchet
parents: 48667
diff changeset
    45
fun all_facts ctxt =
48531
7da5d3b8aef4 don't export technical theorems for MaSh
blanchet
parents: 48530
diff changeset
    46
  let val css = Sledgehammer_Fact.clasimpset_rule_table_of ctxt in
50442
4f6a4d32522c don't blacklist "case" theorems -- this causes problems in MaSh later
blanchet
parents: 50434
diff changeset
    47
    Sledgehammer_Fact.all_facts ctxt true false Symtab.empty [] [] css
51135
e32114b25551 tuned code
blanchet
parents: 51034
diff changeset
    48
    |> sort (crude_thm_ord o pairself snd)
48531
7da5d3b8aef4 don't export technical theorems for MaSh
blanchet
parents: 48530
diff changeset
    49
  end
7da5d3b8aef4 don't export technical theorems for MaSh
blanchet
parents: 48530
diff changeset
    50
51182
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51177
diff changeset
    51
fun filter_accessible_from th = filter (fn (_, th') => thm_less (th', th))
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51177
diff changeset
    52
57121
426ab5fabcae got rid of 'linearize' option
blanchet
parents: 57120
diff changeset
    53
fun generate_accessibility ctxt thys file_name =
48304
50e64af9c829 more work on MaSh
blanchet
parents: 48303
diff changeset
    54
  let
57121
426ab5fabcae got rid of 'linearize' option
blanchet
parents: 57120
diff changeset
    55
    val path = Path.explode file_name
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
    56
51182
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51177
diff changeset
    57
    fun do_fact (parents, fact) prevs =
57122
5f69b8c3af5a more work on exporter
blanchet
parents: 57121
diff changeset
    58
      let val s = encode_str fact ^ ": " ^ encode_strs parents ^ "\n" in
5f69b8c3af5a more work on exporter
blanchet
parents: 57121
diff changeset
    59
        File.append path s; [fact]
5f69b8c3af5a more work on exporter
blanchet
parents: 57121
diff changeset
    60
      end
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
    61
50611
99af6b652b3a linearize eval driver, to work around horrible bug in previous implementation
blanchet
parents: 50582
diff changeset
    62
    val facts =
50349
b79803ee14f3 generalized MaSh exporter to sets of theories
blanchet
parents: 48667
diff changeset
    63
      all_facts ctxt
51182
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51177
diff changeset
    64
      |> filter_out (has_thys thys o snd)
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51177
diff changeset
    65
      |> attach_parents_to_facts []
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51177
diff changeset
    66
      |> map (apsnd (nickname_of_thm o snd))
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
    67
  in
57122
5f69b8c3af5a more work on exporter
blanchet
parents: 57121
diff changeset
    68
    File.write path "";
5f69b8c3af5a more work on exporter
blanchet
parents: 57121
diff changeset
    69
    ignore (fold do_fact facts [])
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
    70
  end
48304
50e64af9c829 more work on MaSh
blanchet
parents: 48303
diff changeset
    71
55212
blanchet
parents: 55201
diff changeset
    72
fun generate_features ctxt thys file_name =
48304
50e64af9c829 more work on MaSh
blanchet
parents: 48303
diff changeset
    73
  let
57388
blanchet
parents: 57352
diff changeset
    74
    val path = Path.explode file_name
48304
50e64af9c829 more work on MaSh
blanchet
parents: 48303
diff changeset
    75
    val _ = File.write path ""
51182
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51177
diff changeset
    76
    val facts = all_facts ctxt |> filter_out (has_thys thys o snd)
57122
5f69b8c3af5a more work on exporter
blanchet
parents: 57121
diff changeset
    77
48385
2779dea0b1e0 added locality as a MaSh feature
blanchet
parents: 48381
diff changeset
    78
    fun do_fact ((_, stature), th) =
48304
50e64af9c829 more work on MaSh
blanchet
parents: 48303
diff changeset
    79
      let
50624
4d0997abce79 improved thm order hack, in case the default names are overridden
blanchet
parents: 50611
diff changeset
    80
        val name = nickname_of_thm th
48318
325c8fd0d762 more consolidation of MaSh code
blanchet
parents: 48316
diff changeset
    81
        val feats =
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
    82
          features_of ctxt (theory_of_thm th) 0 Symtab.empty stature [prop_of th] |> map fst
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
    83
        val s = encode_str name ^ ": " ^ encode_strs (sort string_ord feats) ^ "\n"
57122
5f69b8c3af5a more work on exporter
blanchet
parents: 57121
diff changeset
    84
      in
5f69b8c3af5a more work on exporter
blanchet
parents: 57121
diff changeset
    85
        File.append path s
5f69b8c3af5a more work on exporter
blanchet
parents: 57121
diff changeset
    86
      end
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
    87
  in
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
    88
    List.app do_fact facts
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
    89
  end
48304
50e64af9c829 more work on MaSh
blanchet
parents: 48303
diff changeset
    90
51034
0ee6039d2c8e distinguish one more kind of proofs
blanchet
parents: 51033
diff changeset
    91
val prover_marker = "$a"
51033
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
    92
val isar_marker = "$i"
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
    93
val omitted_marker = "$o"
51034
0ee6039d2c8e distinguish one more kind of proofs
blanchet
parents: 51033
diff changeset
    94
val unprovable_marker = "$u" (* axiom or definition or characteristic theorem *)
51033
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
    95
val prover_failed_marker = "$x"
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
    96
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
    97
fun smart_dependencies_of ctxt params_opt facts name_tabs th isar_deps_opt =
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
    98
  let
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
    99
    val (marker, deps) =
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   100
      (case params_opt of
51033
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
   101
        SOME (params as {provers = prover :: _, ...}) =>
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
   102
        prover_dependencies_of ctxt params prover 0 facts name_tabs th
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
   103
        |>> (fn true => prover_marker | false => prover_failed_marker)
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
   104
      | NONE =>
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
   105
        let
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
   106
          val deps =
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   107
            (case isar_deps_opt of
57306
ff10067b2248 optimized traversal of proof terms by skipping bad apples (e.g. full_exhaustive_int'.pinduct)
blanchet
parents: 57295
diff changeset
   108
              NONE => isar_dependencies_of name_tabs th
ff10067b2248 optimized traversal of proof terms by skipping bad apples (e.g. full_exhaustive_int'.pinduct)
blanchet
parents: 57295
diff changeset
   109
            | deps => deps)
ff10067b2248 optimized traversal of proof terms by skipping bad apples (e.g. full_exhaustive_int'.pinduct)
blanchet
parents: 57295
diff changeset
   110
        in
57351
29691e2dde9a generate right dependencies in MaSh driver
blanchet
parents: 57306
diff changeset
   111
          (case deps of
29691e2dde9a generate right dependencies in MaSh driver
blanchet
parents: 57306
diff changeset
   112
            NONE => (omitted_marker, [])
29691e2dde9a generate right dependencies in MaSh driver
blanchet
parents: 57306
diff changeset
   113
          | SOME [] => (unprovable_marker, [])
29691e2dde9a generate right dependencies in MaSh driver
blanchet
parents: 57306
diff changeset
   114
          | SOME deps => (isar_marker, deps))
57306
ff10067b2248 optimized traversal of proof terms by skipping bad apples (e.g. full_exhaustive_int'.pinduct)
blanchet
parents: 57295
diff changeset
   115
        end)
51033
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
   116
  in
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   117
    (case trim_dependencies deps of
51033
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
   118
      SOME deps => (marker, deps)
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   119
    | NONE => (omitted_marker, []))
51033
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
   120
  end
50411
c9023d78d1a6 export ATP and Isar commands separately
blanchet
parents: 50402
diff changeset
   121
57121
426ab5fabcae got rid of 'linearize' option
blanchet
parents: 57120
diff changeset
   122
fun generate_isar_or_prover_dependencies ctxt params_opt range thys file_name =
48304
50e64af9c829 more work on MaSh
blanchet
parents: 48303
diff changeset
   123
  let
57121
426ab5fabcae got rid of 'linearize' option
blanchet
parents: 57120
diff changeset
   124
    val path = Path.explode file_name
426ab5fabcae got rid of 'linearize' option
blanchet
parents: 57120
diff changeset
   125
    val facts = filter_out (has_thys thys o snd) (all_facts ctxt)
50735
6b232d76cbc9 refined class handling, to prevent cycles in fact graph
blanchet
parents: 50624
diff changeset
   126
    val name_tabs = build_name_tables nickname_of_thm facts
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   127
50559
89c0d2f13cca MaSh exporter can now export subsets of the facts, as consecutive ranges
blanchet
parents: 50523
diff changeset
   128
    fun do_fact (j, (_, th)) =
89c0d2f13cca MaSh exporter can now export subsets of the facts, as consecutive ranges
blanchet
parents: 50523
diff changeset
   129
      if in_range range j then
89c0d2f13cca MaSh exporter can now export subsets of the facts, as consecutive ranges
blanchet
parents: 50523
diff changeset
   130
        let
50624
4d0997abce79 improved thm order hack, in case the default names are overridden
blanchet
parents: 50611
diff changeset
   131
          val name = nickname_of_thm th
50561
9a733bd6c0ba added tracing to ATP exporter
blanchet
parents: 50559
diff changeset
   132
          val _ = tracing ("Fact " ^ string_of_int j ^ ": " ^ name)
57121
426ab5fabcae got rid of 'linearize' option
blanchet
parents: 57120
diff changeset
   133
          val access_facts = filter_accessible_from th facts
426ab5fabcae got rid of 'linearize' option
blanchet
parents: 57120
diff changeset
   134
          val (marker, deps) = smart_dependencies_of ctxt params_opt access_facts name_tabs th NONE
426ab5fabcae got rid of 'linearize' option
blanchet
parents: 57120
diff changeset
   135
        in
426ab5fabcae got rid of 'linearize' option
blanchet
parents: 57120
diff changeset
   136
          encode_str name ^ ": " ^ marker ^ " " ^ encode_strs deps ^ "\n"
426ab5fabcae got rid of 'linearize' option
blanchet
parents: 57120
diff changeset
   137
        end
50559
89c0d2f13cca MaSh exporter can now export subsets of the facts, as consecutive ranges
blanchet
parents: 50523
diff changeset
   138
      else
89c0d2f13cca MaSh exporter can now export subsets of the facts, as consecutive ranges
blanchet
parents: 50523
diff changeset
   139
        ""
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   140
57352
9801e9fa9270 avoid parallelism, since it confuses the global state and leads to cheating (with 'sml_xxx' engines)
blanchet
parents: 57351
diff changeset
   141
    val lines = map do_fact (tag_list 1 facts)
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   142
  in
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   143
    File.write_list path lines
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   144
  end
48304
50e64af9c829 more work on MaSh
blanchet
parents: 48303
diff changeset
   145
50411
c9023d78d1a6 export ATP and Isar commands separately
blanchet
parents: 50402
diff changeset
   146
fun generate_isar_dependencies ctxt =
54118
f5fc8525838f tweaked signature
blanchet
parents: 54095
diff changeset
   147
  generate_isar_or_prover_dependencies ctxt NONE
50411
c9023d78d1a6 export ATP and Isar commands separately
blanchet
parents: 50402
diff changeset
   148
50484
8ec31bdb9d36 adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents: 50442
diff changeset
   149
fun generate_prover_dependencies ctxt params =
8ec31bdb9d36 adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents: 50442
diff changeset
   150
  generate_isar_or_prover_dependencies ctxt (SOME params)
50411
c9023d78d1a6 export ATP and Isar commands separately
blanchet
parents: 50402
diff changeset
   151
50954
7bc58677860e added step to skip some queries
blanchet
parents: 50907
diff changeset
   152
fun is_bad_query ctxt ho_atp step j th isar_deps =
7bc58677860e added step to skip some queries
blanchet
parents: 50907
diff changeset
   153
  j mod step <> 0 orelse
50859
c0f38015a632 don't generate queries with empty dependency list
blanchet
parents: 50829
diff changeset
   154
  Thm.legacy_get_kind th = "" orelse
57306
ff10067b2248 optimized traversal of proof terms by skipping bad apples (e.g. full_exhaustive_int'.pinduct)
blanchet
parents: 57295
diff changeset
   155
  null (the_list isar_deps) orelse
50523
0799339fea0f get rid of some junk facts in the MaSh evaluation driver
blanchet
parents: 50519
diff changeset
   156
  is_blacklisted_or_something ctxt ho_atp (Thm.get_name_hint th)
50515
c4a27ab89c9b shared bad MaSh query detection between MePo and MaSh, so that the generated files mirror each other
blanchet
parents: 50511
diff changeset
   157
57121
426ab5fabcae got rid of 'linearize' option
blanchet
parents: 57120
diff changeset
   158
fun generate_isar_or_prover_commands ctxt prover params_opt (range, step) thys max_suggs file_name =
48234
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
   159
  let
55212
blanchet
parents: 55201
diff changeset
   160
    val ho_atp = is_ho_atp ctxt prover
57388
blanchet
parents: 57352
diff changeset
   161
    val path = Path.explode file_name
50349
b79803ee14f3 generalized MaSh exporter to sets of theories
blanchet
parents: 48667
diff changeset
   162
    val facts = all_facts ctxt
50485
3c6ac2da2f45 merge aliased theorems in MaSh dependencies, modulo symmetry of equality
blanchet
parents: 50484
diff changeset
   163
    val (new_facts, old_facts) = facts |> List.partition (has_thys thys o snd)
53127
60801776d8af weight MaSh constants by frequency
blanchet
parents: 53121
diff changeset
   164
    val num_old_facts = length old_facts
50735
6b232d76cbc9 refined class handling, to prevent cycles in fact graph
blanchet
parents: 50624
diff changeset
   165
    val name_tabs = build_name_tables nickname_of_thm facts
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   166
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   167
    fun do_fact (j, (((name, (parents, ((_, stature), th))), prevs), const_tab)) =
50559
89c0d2f13cca MaSh exporter can now export subsets of the facts, as consecutive ranges
blanchet
parents: 50523
diff changeset
   168
      if in_range range j then
89c0d2f13cca MaSh exporter can now export subsets of the facts, as consecutive ranges
blanchet
parents: 50523
diff changeset
   169
        let
50561
9a733bd6c0ba added tracing to ATP exporter
blanchet
parents: 50559
diff changeset
   170
          val _ = tracing ("Fact " ^ string_of_int j ^ ": " ^ name)
53127
60801776d8af weight MaSh constants by frequency
blanchet
parents: 53121
diff changeset
   171
          val isar_deps = isar_dependencies_of name_tabs th
60801776d8af weight MaSh constants by frequency
blanchet
parents: 53121
diff changeset
   172
          val do_query = not (is_bad_query ctxt ho_atp step j th isar_deps)
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   173
          val goal_feats =
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   174
            features_of ctxt (theory_of_thm th) (num_old_facts + j) const_tab stature [prop_of th]
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   175
            |> sort_wrt fst
57121
426ab5fabcae got rid of 'linearize' option
blanchet
parents: 57120
diff changeset
   176
          val access_facts = filter_accessible_from th new_facts @ old_facts
51033
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
   177
          val (marker, deps) =
57306
ff10067b2248 optimized traversal of proof terms by skipping bad apples (e.g. full_exhaustive_int'.pinduct)
blanchet
parents: 57295
diff changeset
   178
            smart_dependencies_of ctxt params_opt access_facts name_tabs th isar_deps
57122
5f69b8c3af5a more work on exporter
blanchet
parents: 57121
diff changeset
   179
53140
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53127
diff changeset
   180
          fun extra_features_of (((_, stature), th), weight) =
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53127
diff changeset
   181
            [prop_of th]
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   182
            |> features_of ctxt (theory_of_thm th) (num_old_facts + j) const_tab stature
53141
d27e99a6a679 take chained and proximate facts into consideration when computing MaSh features
blanchet
parents: 53140
diff changeset
   183
            |> map (apsnd (fn r => weight * extra_feature_factor * r))
57122
5f69b8c3af5a more work on exporter
blanchet
parents: 57121
diff changeset
   184
50559
89c0d2f13cca MaSh exporter can now export subsets of the facts, as consecutive ranges
blanchet
parents: 50523
diff changeset
   185
          val query =
53127
60801776d8af weight MaSh constants by frequency
blanchet
parents: 53121
diff changeset
   186
            if do_query then
53140
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53127
diff changeset
   187
              let
53141
d27e99a6a679 take chained and proximate facts into consideration when computing MaSh features
blanchet
parents: 53140
diff changeset
   188
                val query_feats =
53140
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53127
diff changeset
   189
                  new_facts
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53127
diff changeset
   190
                  |> drop (j - num_extra_feature_facts)
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53127
diff changeset
   191
                  |> take num_extra_feature_facts
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53127
diff changeset
   192
                  |> rev
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53127
diff changeset
   193
                  |> weight_facts_steeply
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53127
diff changeset
   194
                  |> map extra_features_of
53159
a5805fe4e91c repaired num_extra_feature_facts + tuning
blanchet
parents: 53141
diff changeset
   195
                  |> rpair goal_feats |-> fold (union (eq_fst (op =)))
53140
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53127
diff changeset
   196
              in
57122
5f69b8c3af5a more work on exporter
blanchet
parents: 57121
diff changeset
   197
                "? " ^ string_of_int max_suggs ^ " # " ^ encode_str name ^ ": " ^
5f69b8c3af5a more work on exporter
blanchet
parents: 57121
diff changeset
   198
                encode_strs parents ^ "; " ^ encode_features query_feats ^ "\n"
53140
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53127
diff changeset
   199
              end
53127
60801776d8af weight MaSh constants by frequency
blanchet
parents: 53121
diff changeset
   200
            else
60801776d8af weight MaSh constants by frequency
blanchet
parents: 53121
diff changeset
   201
              ""
50754
74a6adcb96ac also generate queries for goals with too many Isar dependencies
blanchet
parents: 50735
diff changeset
   202
          val update =
53121
5f727525b1ac only generate feature weights for queries -- they're not used elsewhere
blanchet
parents: 53120
diff changeset
   203
            "! " ^ encode_str name ^ ": " ^ encode_strs parents ^ "; " ^
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   204
            encode_strs (map fst goal_feats) ^ "; " ^ marker ^ " " ^ encode_strs deps ^ "\n"
50559
89c0d2f13cca MaSh exporter can now export subsets of the facts, as consecutive ranges
blanchet
parents: 50523
diff changeset
   205
        in query ^ update end
89c0d2f13cca MaSh exporter can now export subsets of the facts, as consecutive ranges
blanchet
parents: 50523
diff changeset
   206
      else
89c0d2f13cca MaSh exporter can now export subsets of the facts, as consecutive ranges
blanchet
parents: 50523
diff changeset
   207
        ""
57122
5f69b8c3af5a more work on exporter
blanchet
parents: 57121
diff changeset
   208
51182
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51177
diff changeset
   209
    val new_facts =
57122
5f69b8c3af5a more work on exporter
blanchet
parents: 57121
diff changeset
   210
      new_facts |> attach_parents_to_facts old_facts |> map (`(nickname_of_thm o snd o snd))
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   211
    val hd_prevs = map (nickname_of_thm o snd) (the_list (try List.last old_facts))
53127
60801776d8af weight MaSh constants by frequency
blanchet
parents: 53121
diff changeset
   212
    val prevss = hd_prevs :: map (single o fst) new_facts |> split_last |> fst
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   213
    val hd_const_tabs = fold (add_const_counts o prop_of o snd) old_facts Symtab.empty
53127
60801776d8af weight MaSh constants by frequency
blanchet
parents: 53121
diff changeset
   214
    val (const_tabs, _) =
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   215
      fold_map (`I oo add_const_counts o prop_of o snd o snd o snd) new_facts hd_const_tabs
57352
9801e9fa9270 avoid parallelism, since it confuses the global state and leads to cheating (with 'sml_xxx' engines)
blanchet
parents: 57351
diff changeset
   216
    val lines = map do_fact (tag_list 1 (new_facts ~~ prevss ~~ const_tabs))
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   217
  in
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   218
    File.write_list path lines
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   219
  end
48239
0016290f904c generate Meng--Paulson facts for evaluation purposes
blanchet
parents: 48235
diff changeset
   220
50411
c9023d78d1a6 export ATP and Isar commands separately
blanchet
parents: 50402
diff changeset
   221
fun generate_isar_commands ctxt prover =
50954
7bc58677860e added step to skip some queries
blanchet
parents: 50907
diff changeset
   222
  generate_isar_or_prover_commands ctxt prover NONE
50411
c9023d78d1a6 export ATP and Isar commands separately
blanchet
parents: 50402
diff changeset
   223
50484
8ec31bdb9d36 adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents: 50442
diff changeset
   224
fun generate_prover_commands ctxt (params as {provers = prover :: _, ...}) =
8ec31bdb9d36 adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents: 50442
diff changeset
   225
  generate_isar_or_prover_commands ctxt prover (SOME params)
50411
c9023d78d1a6 export ATP and Isar commands separately
blanchet
parents: 50402
diff changeset
   226
57120
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   227
fun generate_mepo_or_mash_suggestions mepo_or_mash_suggested_facts ctxt
57121
426ab5fabcae got rid of 'linearize' option
blanchet
parents: 57120
diff changeset
   228
    (params as {provers = prover :: _, ...}) (range, step) thys max_suggs file_name =
48239
0016290f904c generate Meng--Paulson facts for evaluation purposes
blanchet
parents: 48235
diff changeset
   229
  let
55212
blanchet
parents: 55201
diff changeset
   230
    val ho_atp = is_ho_atp ctxt prover
57388
blanchet
parents: 57352
diff changeset
   231
    val path = Path.explode file_name
50349
b79803ee14f3 generalized MaSh exporter to sets of theories
blanchet
parents: 48667
diff changeset
   232
    val facts = all_facts ctxt
50485
3c6ac2da2f45 merge aliased theorems in MaSh dependencies, modulo symmetry of equality
blanchet
parents: 50484
diff changeset
   233
    val (new_facts, old_facts) = facts |> List.partition (has_thys thys o snd)
50735
6b232d76cbc9 refined class handling, to prevent cycles in fact graph
blanchet
parents: 50624
diff changeset
   234
    val name_tabs = build_name_tables nickname_of_thm facts
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   235
50814
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
   236
    fun do_fact (j, ((_, th), old_facts)) =
50906
67b04a8375b0 honor fact range for MePo as well
blanchet
parents: 50859
diff changeset
   237
      if in_range range j then
67b04a8375b0 honor fact range for MePo as well
blanchet
parents: 50859
diff changeset
   238
        let
67b04a8375b0 honor fact range for MePo as well
blanchet
parents: 50859
diff changeset
   239
          val name = nickname_of_thm th
67b04a8375b0 honor fact range for MePo as well
blanchet
parents: 50859
diff changeset
   240
          val _ = tracing ("Fact " ^ string_of_int j ^ ": " ^ name)
67b04a8375b0 honor fact range for MePo as well
blanchet
parents: 50859
diff changeset
   241
          val goal = goal_of_thm (Proof_Context.theory_of ctxt) th
52196
2281f33e8da6 redid rac7830871177 to avoid duplicate fixed variable (e.g. lemma "P (a::nat)" proof - have "!!a::int. Q a" sledgehammer [e])
blanchet
parents: 52125
diff changeset
   242
          val (_, hyp_ts, concl_t) = ATP_Util.strip_subgoal goal 1 ctxt
50906
67b04a8375b0 honor fact range for MePo as well
blanchet
parents: 50859
diff changeset
   243
          val isar_deps = isar_dependencies_of name_tabs th
67b04a8375b0 honor fact range for MePo as well
blanchet
parents: 50859
diff changeset
   244
        in
50954
7bc58677860e added step to skip some queries
blanchet
parents: 50907
diff changeset
   245
          if is_bad_query ctxt ho_atp step j th isar_deps then
50906
67b04a8375b0 honor fact range for MePo as well
blanchet
parents: 50859
diff changeset
   246
            ""
67b04a8375b0 honor fact range for MePo as well
blanchet
parents: 50859
diff changeset
   247
          else
67b04a8375b0 honor fact range for MePo as well
blanchet
parents: 50859
diff changeset
   248
            let
67b04a8375b0 honor fact range for MePo as well
blanchet
parents: 50859
diff changeset
   249
              val suggs =
67b04a8375b0 honor fact range for MePo as well
blanchet
parents: 50859
diff changeset
   250
                old_facts
57121
426ab5fabcae got rid of 'linearize' option
blanchet
parents: 57120
diff changeset
   251
                |> filter_accessible_from th
57120
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   252
                |> mepo_or_mash_suggested_facts ctxt params max_suggs hyp_ts concl_t
50906
67b04a8375b0 honor fact range for MePo as well
blanchet
parents: 50859
diff changeset
   253
                |> map (nickname_of_thm o snd)
57120
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   254
            in
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   255
              encode_str name ^ ": " ^ encode_strs suggs ^ "\n"
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   256
            end
50906
67b04a8375b0 honor fact range for MePo as well
blanchet
parents: 50859
diff changeset
   257
        end
67b04a8375b0 honor fact range for MePo as well
blanchet
parents: 50859
diff changeset
   258
      else
67b04a8375b0 honor fact range for MePo as well
blanchet
parents: 50859
diff changeset
   259
        ""
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   260
50519
2951841ec011 parallelized MaSh exporter
blanchet
parents: 50515
diff changeset
   261
    fun accum x (yss as ys :: _) = (x :: ys) :: yss
57295
2ccd6820f74e changed order of facts so that 'name_tabs' has the same order everywhere (which affects unaliasing)
blanchet
parents: 57150
diff changeset
   262
    val old_factss = tl (fold accum new_facts [rev old_facts])
57352
9801e9fa9270 avoid parallelism, since it confuses the global state and leads to cheating (with 'sml_xxx' engines)
blanchet
parents: 57351
diff changeset
   263
    val lines = map do_fact (tag_list 1 (new_facts ~~ rev (map rev old_factss)))
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   264
  in
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   265
    File.write_list path lines
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   266
  end
48234
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
   267
57120
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   268
val generate_mepo_suggestions =
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   269
  generate_mepo_or_mash_suggestions
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   270
    (fn ctxt => fn params => fn max_suggs => fn hyp_ts => fn concl_t =>
57150
f591096a9c94 add option to keep duplicates, for more precise evaluation of relevance filters
blanchet
parents: 57125
diff changeset
   271
       not (Config.get ctxt Sledgehammer_MaSh.duplicates) ? Sledgehammer_Fact.drop_duplicate_facts
57120
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   272
       #> Sledgehammer_MePo.mepo_suggested_facts ctxt params max_suggs NONE hyp_ts concl_t)
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   273
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   274
fun generate_mash_suggestions ctxt params =
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   275
  (Sledgehammer_MaSh.mash_unlearn ctxt params;
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   276
   generate_mepo_or_mash_suggestions
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   277
     (fn ctxt => fn params as {provers = prover :: _, ...} => fn max_suggs => fn hyp_ts =>
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   278
          fn concl_t =>
57122
5f69b8c3af5a more work on exporter
blanchet
parents: 57121
diff changeset
   279
        tap (Sledgehammer_MaSh.mash_learn_facts ctxt params prover true 2 false
57120
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   280
          Sledgehammer_Util.one_year)
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   281
        #> Sledgehammer_MaSh.mash_suggested_facts ctxt params max_suggs hyp_ts concl_t
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   282
        #> fst) ctxt params)
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   283
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   284
fun generate_mesh_suggestions max_suggs mash_file_name mepo_file_name mesh_file_name =
50814
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
   285
  let
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
   286
    val mesh_path = Path.explode mesh_file_name
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
   287
    val _ = File.write mesh_path ""
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   288
50814
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
   289
    fun do_fact (mash_line, mepo_line) =
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
   290
      let
50829
01c9a515ccdd fixed escaping for MeSh encoder
blanchet
parents: 50826
diff changeset
   291
        val (name, mash_suggs) =
50814
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
   292
          extract_suggestions mash_line
57125
2f620ef839ee added another way of invoking Python code, for experiments
blanchet
parents: 57122
diff changeset
   293
          ||> (map fst #> weight_facts_steeply)
50829
01c9a515ccdd fixed escaping for MeSh encoder
blanchet
parents: 50826
diff changeset
   294
        val (name', mepo_suggs) =
50814
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
   295
          extract_suggestions mepo_line
57125
2f620ef839ee added another way of invoking Python code, for experiments
blanchet
parents: 57122
diff changeset
   296
          ||> (map fst #> weight_facts_steeply)
50829
01c9a515ccdd fixed escaping for MeSh encoder
blanchet
parents: 50826
diff changeset
   297
        val _ = if name = name' then () else error "Input files out of sync."
50814
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
   298
        val mess =
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
   299
          [(mepo_weight, (mepo_suggs, [])),
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
   300
           (mash_weight, (mash_suggs, []))]
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
   301
        val mesh_suggs = mesh_facts (op =) max_suggs mess
50829
01c9a515ccdd fixed escaping for MeSh encoder
blanchet
parents: 50826
diff changeset
   302
        val mesh_line = encode_str name ^ ": " ^ encode_strs mesh_suggs ^ "\n"
50814
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
   303
      in File.append mesh_path mesh_line end
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   304
50814
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
   305
    val mash_lines = Path.explode mash_file_name |> File.read_lines
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
   306
    val mepo_lines = Path.explode mepo_file_name |> File.read_lines
50907
a86708897266 graceful failure
blanchet
parents: 50906
diff changeset
   307
  in
57122
5f69b8c3af5a more work on exporter
blanchet
parents: 57121
diff changeset
   308
    if length mash_lines = length mepo_lines then List.app do_fact (mash_lines ~~ mepo_lines)
5f69b8c3af5a more work on exporter
blanchet
parents: 57121
diff changeset
   309
    else warning "Skipped: MaSh file missing or out of sync with MePo file."
50907
a86708897266 graceful failure
blanchet
parents: 50906
diff changeset
   310
  end
50814
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
   311
48234
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
   312
end;