src/HOL/TPTP/mash_export.ML
author blanchet
Fri, 30 May 2014 12:27:51 +0200
changeset 57120 f8112c4b9cb8
parent 57077 5bc908e5bf42
child 57121 426ab5fabcae
permissions -rw-r--r--
extend exporter with new versions of MaSh
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
48234
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
     1
(*  Title:      HOL/TPTP/mash_export.ML
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
     2
    Author:     Jasmin Blanchette, TU Muenchen
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
     3
    Copyright   2012
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
     4
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
     5
Export Isabelle theory information for MaSh (Machine-learning for Sledgehammer).
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
     6
*)
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
     7
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
     8
signature MASH_EXPORT =
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
     9
sig
55201
1ee776da8da7 renamed ML file
blanchet
parents: 54695
diff changeset
    10
  type params = Sledgehammer_Prover.params
48235
40655464a93b MaSh evaluation driver
blanchet
parents: 48234
diff changeset
    11
55212
blanchet
parents: 55201
diff changeset
    12
  val generate_accessibility : Proof.context -> theory list -> bool -> string -> unit
blanchet
parents: 55201
diff changeset
    13
  val generate_features : Proof.context -> theory list -> string -> unit
blanchet
parents: 55201
diff changeset
    14
  val generate_isar_dependencies : Proof.context -> int * int option -> theory list -> bool ->
blanchet
parents: 55201
diff changeset
    15
    string -> unit
blanchet
parents: 55201
diff changeset
    16
  val generate_prover_dependencies : Proof.context -> params -> int * int option -> theory list ->
blanchet
parents: 55201
diff changeset
    17
    bool -> string -> unit
blanchet
parents: 55201
diff changeset
    18
  val generate_isar_commands : Proof.context -> string -> (int * int option) * int -> theory list ->
blanchet
parents: 55201
diff changeset
    19
    bool -> int -> string -> unit
blanchet
parents: 55201
diff changeset
    20
  val generate_prover_commands : Proof.context -> params -> (int * int option) * int ->
blanchet
parents: 55201
diff changeset
    21
    theory list -> bool -> int -> string -> unit
blanchet
parents: 55201
diff changeset
    22
  val generate_mepo_suggestions : Proof.context -> params -> (int * int option) * int ->
blanchet
parents: 55201
diff changeset
    23
    theory list -> bool -> int -> string -> unit
57120
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
    24
  val generate_mash_suggestions : Proof.context -> params -> (int * int option) * int ->
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
    25
    theory list -> bool -> int -> string -> unit
50814
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
    26
  val generate_mesh_suggestions : int -> string -> string -> string -> unit
48234
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
    27
end;
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
    28
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
    29
structure MaSh_Export : MASH_EXPORT =
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
    30
struct
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
    31
50485
3c6ac2da2f45 merge aliased theorems in MaSh dependencies, modulo symmetry of equality
blanchet
parents: 50484
diff changeset
    32
open Sledgehammer_Fact
55212
blanchet
parents: 55201
diff changeset
    33
open Sledgehammer_Prover_ATP
48381
1b7d798460bb renamed ML structures
blanchet
parents: 48379
diff changeset
    34
open Sledgehammer_MePo
1b7d798460bb renamed ML structures
blanchet
parents: 48379
diff changeset
    35
open Sledgehammer_MaSh
48245
854a47677335 generate ATP dependencies
blanchet
parents: 48242
diff changeset
    36
50559
89c0d2f13cca MaSh exporter can now export subsets of the facts, as consecutive ranges
blanchet
parents: 50523
diff changeset
    37
fun in_range (from, to) j =
89c0d2f13cca MaSh exporter can now export subsets of the facts, as consecutive ranges
blanchet
parents: 50523
diff changeset
    38
  j >= from andalso (to = NONE orelse j <= the to)
89c0d2f13cca MaSh exporter can now export subsets of the facts, as consecutive ranges
blanchet
parents: 50523
diff changeset
    39
50349
b79803ee14f3 generalized MaSh exporter to sets of theories
blanchet
parents: 48667
diff changeset
    40
fun has_thm_thy th thy =
48316
252f45c04042 drastic overhaul of MaSh data structures + fixed a few performance issues
blanchet
parents: 48315
diff changeset
    41
  Context.theory_name thy = Context.theory_name (theory_of_thm th)
252f45c04042 drastic overhaul of MaSh data structures + fixed a few performance issues
blanchet
parents: 48315
diff changeset
    42
50349
b79803ee14f3 generalized MaSh exporter to sets of theories
blanchet
parents: 48667
diff changeset
    43
fun has_thys thys th = exists (has_thm_thy th) thys
b79803ee14f3 generalized MaSh exporter to sets of theories
blanchet
parents: 48667
diff changeset
    44
b79803ee14f3 generalized MaSh exporter to sets of theories
blanchet
parents: 48667
diff changeset
    45
fun all_facts ctxt =
48531
7da5d3b8aef4 don't export technical theorems for MaSh
blanchet
parents: 48530
diff changeset
    46
  let val css = Sledgehammer_Fact.clasimpset_rule_table_of ctxt in
50442
4f6a4d32522c don't blacklist "case" theorems -- this causes problems in MaSh later
blanchet
parents: 50434
diff changeset
    47
    Sledgehammer_Fact.all_facts ctxt true false Symtab.empty [] [] css
51135
e32114b25551 tuned code
blanchet
parents: 51034
diff changeset
    48
    |> sort (crude_thm_ord o pairself snd)
48531
7da5d3b8aef4 don't export technical theorems for MaSh
blanchet
parents: 48530
diff changeset
    49
  end
7da5d3b8aef4 don't export technical theorems for MaSh
blanchet
parents: 48530
diff changeset
    50
51182
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51177
diff changeset
    51
fun filter_accessible_from th = filter (fn (_, th') => thm_less (th', th))
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51177
diff changeset
    52
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51177
diff changeset
    53
fun generate_accessibility ctxt thys linearize file_name =
48304
50e64af9c829 more work on MaSh
blanchet
parents: 48303
diff changeset
    54
  let
50e64af9c829 more work on MaSh
blanchet
parents: 48303
diff changeset
    55
    val path = file_name |> Path.explode
50e64af9c829 more work on MaSh
blanchet
parents: 48303
diff changeset
    56
    val _ = File.write path ""
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
    57
51182
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51177
diff changeset
    58
    fun do_fact (parents, fact) prevs =
48304
50e64af9c829 more work on MaSh
blanchet
parents: 48303
diff changeset
    59
      let
51182
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51177
diff changeset
    60
        val parents = if linearize then prevs else parents
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51177
diff changeset
    61
        val s = encode_str fact ^ ": " ^ encode_strs parents ^ "\n"
48304
50e64af9c829 more work on MaSh
blanchet
parents: 48303
diff changeset
    62
        val _ = File.append path s
50e64af9c829 more work on MaSh
blanchet
parents: 48303
diff changeset
    63
      in [fact] end
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
    64
50611
99af6b652b3a linearize eval driver, to work around horrible bug in previous implementation
blanchet
parents: 50582
diff changeset
    65
    val facts =
50349
b79803ee14f3 generalized MaSh exporter to sets of theories
blanchet
parents: 48667
diff changeset
    66
      all_facts ctxt
51182
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51177
diff changeset
    67
      |> filter_out (has_thys thys o snd)
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51177
diff changeset
    68
      |> attach_parents_to_facts []
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51177
diff changeset
    69
      |> map (apsnd (nickname_of_thm o snd))
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
    70
  in
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
    71
    fold do_fact facts []; ()
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
    72
  end
48304
50e64af9c829 more work on MaSh
blanchet
parents: 48303
diff changeset
    73
55212
blanchet
parents: 55201
diff changeset
    74
fun generate_features ctxt thys file_name =
48304
50e64af9c829 more work on MaSh
blanchet
parents: 48303
diff changeset
    75
  let
50e64af9c829 more work on MaSh
blanchet
parents: 48303
diff changeset
    76
    val path = file_name |> Path.explode
50e64af9c829 more work on MaSh
blanchet
parents: 48303
diff changeset
    77
    val _ = File.write path ""
51182
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51177
diff changeset
    78
    val facts = all_facts ctxt |> filter_out (has_thys thys o snd)
48385
2779dea0b1e0 added locality as a MaSh feature
blanchet
parents: 48381
diff changeset
    79
    fun do_fact ((_, stature), th) =
48304
50e64af9c829 more work on MaSh
blanchet
parents: 48303
diff changeset
    80
      let
50624
4d0997abce79 improved thm order hack, in case the default names are overridden
blanchet
parents: 50611
diff changeset
    81
        val name = nickname_of_thm th
48318
325c8fd0d762 more consolidation of MaSh code
blanchet
parents: 48316
diff changeset
    82
        val feats =
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
    83
          features_of ctxt (theory_of_thm th) 0 Symtab.empty stature [prop_of th] |> map fst
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
    84
        val s = encode_str name ^ ": " ^ encode_strs (sort string_ord feats) ^ "\n"
48304
50e64af9c829 more work on MaSh
blanchet
parents: 48303
diff changeset
    85
      in File.append path s end
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
    86
  in
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
    87
    List.app do_fact facts
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
    88
  end
48304
50e64af9c829 more work on MaSh
blanchet
parents: 48303
diff changeset
    89
51034
0ee6039d2c8e distinguish one more kind of proofs
blanchet
parents: 51033
diff changeset
    90
val prover_marker = "$a"
51033
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
    91
val isar_marker = "$i"
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
    92
val omitted_marker = "$o"
51034
0ee6039d2c8e distinguish one more kind of proofs
blanchet
parents: 51033
diff changeset
    93
val unprovable_marker = "$u" (* axiom or definition or characteristic theorem *)
51033
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
    94
val prover_failed_marker = "$x"
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
    95
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
    96
fun smart_dependencies_of ctxt params_opt facts name_tabs th isar_deps_opt =
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
    97
  let
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
    98
    val (marker, deps) =
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
    99
      (case params_opt of
51033
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
   100
        SOME (params as {provers = prover :: _, ...}) =>
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
   101
        prover_dependencies_of ctxt params prover 0 facts name_tabs th
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
   102
        |>> (fn true => prover_marker | false => prover_failed_marker)
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
   103
      | NONE =>
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
   104
        let
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
   105
          val deps =
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   106
            (case isar_deps_opt of
51033
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
   107
              SOME deps => deps
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   108
            | NONE => isar_dependencies_of name_tabs th)
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   109
        in (if null deps then unprovable_marker else isar_marker, deps) end)
51033
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
   110
  in
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   111
    (case trim_dependencies deps of
51033
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
   112
      SOME deps => (marker, deps)
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   113
    | NONE => (omitted_marker, []))
51033
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
   114
  end
50411
c9023d78d1a6 export ATP and Isar commands separately
blanchet
parents: 50402
diff changeset
   115
57077
5bc908e5bf42 fixed semantics of 'linearize'
blanchet
parents: 57055
diff changeset
   116
fun generate_isar_or_prover_dependencies ctxt params_opt range thys linearize file_name =
48304
50e64af9c829 more work on MaSh
blanchet
parents: 48303
diff changeset
   117
  let
50e64af9c829 more work on MaSh
blanchet
parents: 48303
diff changeset
   118
    val path = file_name |> Path.explode
51182
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51177
diff changeset
   119
    val facts = all_facts ctxt |> filter_out (has_thys thys o snd)
50735
6b232d76cbc9 refined class handling, to prevent cycles in fact graph
blanchet
parents: 50624
diff changeset
   120
    val name_tabs = build_name_tables nickname_of_thm facts
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   121
50559
89c0d2f13cca MaSh exporter can now export subsets of the facts, as consecutive ranges
blanchet
parents: 50523
diff changeset
   122
    fun do_fact (j, (_, th)) =
89c0d2f13cca MaSh exporter can now export subsets of the facts, as consecutive ranges
blanchet
parents: 50523
diff changeset
   123
      if in_range range j then
89c0d2f13cca MaSh exporter can now export subsets of the facts, as consecutive ranges
blanchet
parents: 50523
diff changeset
   124
        let
50624
4d0997abce79 improved thm order hack, in case the default names are overridden
blanchet
parents: 50611
diff changeset
   125
          val name = nickname_of_thm th
50561
9a733bd6c0ba added tracing to ATP exporter
blanchet
parents: 50559
diff changeset
   126
          val _ = tracing ("Fact " ^ string_of_int j ^ ": " ^ name)
51182
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51177
diff changeset
   127
          val access_facts =
57077
5bc908e5bf42 fixed semantics of 'linearize'
blanchet
parents: 57055
diff changeset
   128
            if linearize then take (j - 1) facts else facts |> filter_accessible_from th
51033
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
   129
          val (marker, deps) =
51182
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51177
diff changeset
   130
            smart_dependencies_of ctxt params_opt access_facts name_tabs th NONE
51033
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
   131
        in encode_str name ^ ": " ^ marker ^ " " ^ encode_strs deps ^ "\n" end
50559
89c0d2f13cca MaSh exporter can now export subsets of the facts, as consecutive ranges
blanchet
parents: 50523
diff changeset
   132
      else
89c0d2f13cca MaSh exporter can now export subsets of the facts, as consecutive ranges
blanchet
parents: 50523
diff changeset
   133
        ""
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   134
50559
89c0d2f13cca MaSh exporter can now export subsets of the facts, as consecutive ranges
blanchet
parents: 50523
diff changeset
   135
    val lines = Par_List.map do_fact (tag_list 1 facts)
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   136
  in
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   137
    File.write_list path lines
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   138
  end
48304
50e64af9c829 more work on MaSh
blanchet
parents: 48303
diff changeset
   139
50411
c9023d78d1a6 export ATP and Isar commands separately
blanchet
parents: 50402
diff changeset
   140
fun generate_isar_dependencies ctxt =
54118
f5fc8525838f tweaked signature
blanchet
parents: 54095
diff changeset
   141
  generate_isar_or_prover_dependencies ctxt NONE
50411
c9023d78d1a6 export ATP and Isar commands separately
blanchet
parents: 50402
diff changeset
   142
50484
8ec31bdb9d36 adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents: 50442
diff changeset
   143
fun generate_prover_dependencies ctxt params =
8ec31bdb9d36 adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents: 50442
diff changeset
   144
  generate_isar_or_prover_dependencies ctxt (SOME params)
50411
c9023d78d1a6 export ATP and Isar commands separately
blanchet
parents: 50402
diff changeset
   145
50954
7bc58677860e added step to skip some queries
blanchet
parents: 50907
diff changeset
   146
fun is_bad_query ctxt ho_atp step j th isar_deps =
7bc58677860e added step to skip some queries
blanchet
parents: 50907
diff changeset
   147
  j mod step <> 0 orelse
50859
c0f38015a632 don't generate queries with empty dependency list
blanchet
parents: 50829
diff changeset
   148
  Thm.legacy_get_kind th = "" orelse
51020
242cd1632b0b removed spurious trimming
blanchet
parents: 50965
diff changeset
   149
  null isar_deps orelse
50523
0799339fea0f get rid of some junk facts in the MaSh evaluation driver
blanchet
parents: 50519
diff changeset
   150
  is_blacklisted_or_something ctxt ho_atp (Thm.get_name_hint th)
50515
c4a27ab89c9b shared bad MaSh query detection between MePo and MaSh, so that the generated files mirror each other
blanchet
parents: 50511
diff changeset
   151
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   152
fun generate_isar_or_prover_commands ctxt prover params_opt (range, step) thys linearize max_suggs
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   153
    file_name =
48234
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
   154
  let
55212
blanchet
parents: 55201
diff changeset
   155
    val ho_atp = is_ho_atp ctxt prover
48234
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
   156
    val path = file_name |> Path.explode
50349
b79803ee14f3 generalized MaSh exporter to sets of theories
blanchet
parents: 48667
diff changeset
   157
    val facts = all_facts ctxt
50485
3c6ac2da2f45 merge aliased theorems in MaSh dependencies, modulo symmetry of equality
blanchet
parents: 50484
diff changeset
   158
    val (new_facts, old_facts) = facts |> List.partition (has_thys thys o snd)
53127
60801776d8af weight MaSh constants by frequency
blanchet
parents: 53121
diff changeset
   159
    val num_old_facts = length old_facts
50735
6b232d76cbc9 refined class handling, to prevent cycles in fact graph
blanchet
parents: 50624
diff changeset
   160
    val name_tabs = build_name_tables nickname_of_thm facts
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   161
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   162
    fun do_fact (j, (((name, (parents, ((_, stature), th))), prevs), const_tab)) =
50559
89c0d2f13cca MaSh exporter can now export subsets of the facts, as consecutive ranges
blanchet
parents: 50523
diff changeset
   163
      if in_range range j then
89c0d2f13cca MaSh exporter can now export subsets of the facts, as consecutive ranges
blanchet
parents: 50523
diff changeset
   164
        let
50561
9a733bd6c0ba added tracing to ATP exporter
blanchet
parents: 50559
diff changeset
   165
          val _ = tracing ("Fact " ^ string_of_int j ^ ": " ^ name)
53127
60801776d8af weight MaSh constants by frequency
blanchet
parents: 53121
diff changeset
   166
          val isar_deps = isar_dependencies_of name_tabs th
60801776d8af weight MaSh constants by frequency
blanchet
parents: 53121
diff changeset
   167
          val do_query = not (is_bad_query ctxt ho_atp step j th isar_deps)
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   168
          val goal_feats =
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   169
            features_of ctxt (theory_of_thm th) (num_old_facts + j) const_tab stature [prop_of th]
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   170
            |> sort_wrt fst
51182
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51177
diff changeset
   171
          val access_facts =
57077
5bc908e5bf42 fixed semantics of 'linearize'
blanchet
parents: 57055
diff changeset
   172
            (if linearize then take (j - 1) new_facts else new_facts |> filter_accessible_from th) @
5bc908e5bf42 fixed semantics of 'linearize'
blanchet
parents: 57055
diff changeset
   173
            old_facts
51033
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
   174
          val (marker, deps) =
51182
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51177
diff changeset
   175
            smart_dependencies_of ctxt params_opt access_facts name_tabs th
51033
177db6811f11 added markers in proofs identifying origin of proofs, in eval driver
blanchet
parents: 51020
diff changeset
   176
                                  (SOME isar_deps)
51182
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51177
diff changeset
   177
          val parents = if linearize then prevs else parents
53140
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53127
diff changeset
   178
          fun extra_features_of (((_, stature), th), weight) =
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53127
diff changeset
   179
            [prop_of th]
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   180
            |> features_of ctxt (theory_of_thm th) (num_old_facts + j) const_tab stature
53141
d27e99a6a679 take chained and proximate facts into consideration when computing MaSh features
blanchet
parents: 53140
diff changeset
   181
            |> map (apsnd (fn r => weight * extra_feature_factor * r))
50559
89c0d2f13cca MaSh exporter can now export subsets of the facts, as consecutive ranges
blanchet
parents: 50523
diff changeset
   182
          val query =
53127
60801776d8af weight MaSh constants by frequency
blanchet
parents: 53121
diff changeset
   183
            if do_query then
53140
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53127
diff changeset
   184
              let
53141
d27e99a6a679 take chained and proximate facts into consideration when computing MaSh features
blanchet
parents: 53140
diff changeset
   185
                val query_feats =
53140
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53127
diff changeset
   186
                  new_facts
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53127
diff changeset
   187
                  |> drop (j - num_extra_feature_facts)
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53127
diff changeset
   188
                  |> take num_extra_feature_facts
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53127
diff changeset
   189
                  |> rev
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53127
diff changeset
   190
                  |> weight_facts_steeply
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53127
diff changeset
   191
                  |> map extra_features_of
53159
a5805fe4e91c repaired num_extra_feature_facts + tuning
blanchet
parents: 53141
diff changeset
   192
                  |> rpair goal_feats |-> fold (union (eq_fst (op =)))
53140
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53127
diff changeset
   193
              in
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53127
diff changeset
   194
                "? " ^ string_of_int max_suggs ^ " # " ^
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53127
diff changeset
   195
                encode_str name ^ ": " ^ encode_strs parents ^ "; " ^
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53127
diff changeset
   196
                encode_features query_feats ^ "\n"
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53127
diff changeset
   197
              end
53127
60801776d8af weight MaSh constants by frequency
blanchet
parents: 53121
diff changeset
   198
            else
60801776d8af weight MaSh constants by frequency
blanchet
parents: 53121
diff changeset
   199
              ""
50754
74a6adcb96ac also generate queries for goals with too many Isar dependencies
blanchet
parents: 50735
diff changeset
   200
          val update =
53121
5f727525b1ac only generate feature weights for queries -- they're not used elsewhere
blanchet
parents: 53120
diff changeset
   201
            "! " ^ encode_str name ^ ": " ^ encode_strs parents ^ "; " ^
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   202
            encode_strs (map fst goal_feats) ^ "; " ^ marker ^ " " ^ encode_strs deps ^ "\n"
50559
89c0d2f13cca MaSh exporter can now export subsets of the facts, as consecutive ranges
blanchet
parents: 50523
diff changeset
   203
        in query ^ update end
89c0d2f13cca MaSh exporter can now export subsets of the facts, as consecutive ranges
blanchet
parents: 50523
diff changeset
   204
      else
89c0d2f13cca MaSh exporter can now export subsets of the facts, as consecutive ranges
blanchet
parents: 50523
diff changeset
   205
        ""
51182
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51177
diff changeset
   206
    val new_facts =
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51177
diff changeset
   207
      new_facts |> attach_parents_to_facts old_facts
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51177
diff changeset
   208
                |> map (`(nickname_of_thm o snd o snd))
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   209
    val hd_prevs = map (nickname_of_thm o snd) (the_list (try List.last old_facts))
53127
60801776d8af weight MaSh constants by frequency
blanchet
parents: 53121
diff changeset
   210
    val prevss = hd_prevs :: map (single o fst) new_facts |> split_last |> fst
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   211
    val hd_const_tabs = fold (add_const_counts o prop_of o snd) old_facts Symtab.empty
53127
60801776d8af weight MaSh constants by frequency
blanchet
parents: 53121
diff changeset
   212
    val (const_tabs, _) =
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   213
      fold_map (`I oo add_const_counts o prop_of o snd o snd o snd) new_facts hd_const_tabs
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   214
    val lines = Par_List.map do_fact (tag_list 1 (new_facts ~~ prevss ~~ const_tabs))
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   215
  in
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   216
    File.write_list path lines
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   217
  end
48239
0016290f904c generate Meng--Paulson facts for evaluation purposes
blanchet
parents: 48235
diff changeset
   218
50411
c9023d78d1a6 export ATP and Isar commands separately
blanchet
parents: 50402
diff changeset
   219
fun generate_isar_commands ctxt prover =
50954
7bc58677860e added step to skip some queries
blanchet
parents: 50907
diff changeset
   220
  generate_isar_or_prover_commands ctxt prover NONE
50411
c9023d78d1a6 export ATP and Isar commands separately
blanchet
parents: 50402
diff changeset
   221
50484
8ec31bdb9d36 adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents: 50442
diff changeset
   222
fun generate_prover_commands ctxt (params as {provers = prover :: _, ...}) =
8ec31bdb9d36 adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents: 50442
diff changeset
   223
  generate_isar_or_prover_commands ctxt prover (SOME params)
50411
c9023d78d1a6 export ATP and Isar commands separately
blanchet
parents: 50402
diff changeset
   224
57120
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   225
fun generate_mepo_or_mash_suggestions mepo_or_mash_suggested_facts ctxt
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   226
    (params as {provers = prover :: _, ...}) (range, step) thys linearize max_suggs file_name =
48239
0016290f904c generate Meng--Paulson facts for evaluation purposes
blanchet
parents: 48235
diff changeset
   227
  let
55212
blanchet
parents: 55201
diff changeset
   228
    val ho_atp = is_ho_atp ctxt prover
48239
0016290f904c generate Meng--Paulson facts for evaluation purposes
blanchet
parents: 48235
diff changeset
   229
    val path = file_name |> Path.explode
50349
b79803ee14f3 generalized MaSh exporter to sets of theories
blanchet
parents: 48667
diff changeset
   230
    val facts = all_facts ctxt
50485
3c6ac2da2f45 merge aliased theorems in MaSh dependencies, modulo symmetry of equality
blanchet
parents: 50484
diff changeset
   231
    val (new_facts, old_facts) = facts |> List.partition (has_thys thys o snd)
50735
6b232d76cbc9 refined class handling, to prevent cycles in fact graph
blanchet
parents: 50624
diff changeset
   232
    val name_tabs = build_name_tables nickname_of_thm facts
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   233
50814
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
   234
    fun do_fact (j, ((_, th), old_facts)) =
50906
67b04a8375b0 honor fact range for MePo as well
blanchet
parents: 50859
diff changeset
   235
      if in_range range j then
67b04a8375b0 honor fact range for MePo as well
blanchet
parents: 50859
diff changeset
   236
        let
67b04a8375b0 honor fact range for MePo as well
blanchet
parents: 50859
diff changeset
   237
          val name = nickname_of_thm th
67b04a8375b0 honor fact range for MePo as well
blanchet
parents: 50859
diff changeset
   238
          val _ = tracing ("Fact " ^ string_of_int j ^ ": " ^ name)
67b04a8375b0 honor fact range for MePo as well
blanchet
parents: 50859
diff changeset
   239
          val goal = goal_of_thm (Proof_Context.theory_of ctxt) th
52196
2281f33e8da6 redid rac7830871177 to avoid duplicate fixed variable (e.g. lemma "P (a::nat)" proof - have "!!a::int. Q a" sledgehammer [e])
blanchet
parents: 52125
diff changeset
   240
          val (_, hyp_ts, concl_t) = ATP_Util.strip_subgoal goal 1 ctxt
50906
67b04a8375b0 honor fact range for MePo as well
blanchet
parents: 50859
diff changeset
   241
          val isar_deps = isar_dependencies_of name_tabs th
67b04a8375b0 honor fact range for MePo as well
blanchet
parents: 50859
diff changeset
   242
        in
50954
7bc58677860e added step to skip some queries
blanchet
parents: 50907
diff changeset
   243
          if is_bad_query ctxt ho_atp step j th isar_deps then
50906
67b04a8375b0 honor fact range for MePo as well
blanchet
parents: 50859
diff changeset
   244
            ""
67b04a8375b0 honor fact range for MePo as well
blanchet
parents: 50859
diff changeset
   245
          else
67b04a8375b0 honor fact range for MePo as well
blanchet
parents: 50859
diff changeset
   246
            let
67b04a8375b0 honor fact range for MePo as well
blanchet
parents: 50859
diff changeset
   247
              val suggs =
67b04a8375b0 honor fact range for MePo as well
blanchet
parents: 50859
diff changeset
   248
                old_facts
57077
5bc908e5bf42 fixed semantics of 'linearize'
blanchet
parents: 57055
diff changeset
   249
                |> not linearize ? filter_accessible_from th
57120
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   250
                |> mepo_or_mash_suggested_facts ctxt params max_suggs hyp_ts concl_t
50906
67b04a8375b0 honor fact range for MePo as well
blanchet
parents: 50859
diff changeset
   251
                |> map (nickname_of_thm o snd)
57120
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   252
            in
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   253
              encode_str name ^ ": " ^ encode_strs suggs ^ "\n"
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   254
            end
50906
67b04a8375b0 honor fact range for MePo as well
blanchet
parents: 50859
diff changeset
   255
        end
67b04a8375b0 honor fact range for MePo as well
blanchet
parents: 50859
diff changeset
   256
      else
67b04a8375b0 honor fact range for MePo as well
blanchet
parents: 50859
diff changeset
   257
        ""
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   258
50519
2951841ec011 parallelized MaSh exporter
blanchet
parents: 50515
diff changeset
   259
    fun accum x (yss as ys :: _) = (x :: ys) :: yss
2951841ec011 parallelized MaSh exporter
blanchet
parents: 50515
diff changeset
   260
    val old_factss = tl (fold accum new_facts [old_facts])
50814
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
   261
    val lines = Par_List.map do_fact (tag_list 1 (new_facts ~~ rev old_factss))
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   262
  in
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   263
    File.write_list path lines
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   264
  end
48234
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
   265
57120
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   266
val generate_mepo_suggestions =
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   267
  generate_mepo_or_mash_suggestions
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   268
    (fn ctxt => fn params => fn max_suggs => fn hyp_ts => fn concl_t =>
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   269
       Sledgehammer_Fact.drop_duplicate_facts
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   270
       #> Sledgehammer_MePo.mepo_suggested_facts ctxt params max_suggs NONE hyp_ts concl_t)
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   271
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   272
fun generate_mash_suggestions ctxt params =
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   273
  (Sledgehammer_MaSh.mash_unlearn ctxt params;
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   274
   generate_mepo_or_mash_suggestions
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   275
     (fn ctxt => fn params as {provers = prover :: _, ...} => fn max_suggs => fn hyp_ts =>
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   276
          fn concl_t =>
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   277
        tap (Sledgehammer_MaSh.mash_learn_facts ctxt params prover false 2 false
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   278
          Sledgehammer_Util.one_year)
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   279
        #> Sledgehammer_MaSh.mash_suggested_facts ctxt params max_suggs hyp_ts concl_t
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   280
        #> fst) ctxt params)
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57077
diff changeset
   281
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   282
fun generate_mesh_suggestions max_suggs mash_file_name mepo_file_name mesh_file_name =
50814
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
   283
  let
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
   284
    val mesh_path = Path.explode mesh_file_name
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
   285
    val _ = File.write mesh_path ""
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   286
50814
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
   287
    fun do_fact (mash_line, mepo_line) =
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
   288
      let
50829
01c9a515ccdd fixed escaping for MeSh encoder
blanchet
parents: 50826
diff changeset
   289
        val (name, mash_suggs) =
50814
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
   290
          extract_suggestions mash_line
53140
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53127
diff changeset
   291
          ||> weight_facts_steeply
50829
01c9a515ccdd fixed escaping for MeSh encoder
blanchet
parents: 50826
diff changeset
   292
        val (name', mepo_suggs) =
50814
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
   293
          extract_suggestions mepo_line
53140
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53127
diff changeset
   294
          ||> weight_facts_steeply
50829
01c9a515ccdd fixed escaping for MeSh encoder
blanchet
parents: 50826
diff changeset
   295
        val _ = if name = name' then () else error "Input files out of sync."
50814
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
   296
        val mess =
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
   297
          [(mepo_weight, (mepo_suggs, [])),
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
   298
           (mash_weight, (mash_suggs, []))]
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
   299
        val mesh_suggs = mesh_facts (op =) max_suggs mess
50829
01c9a515ccdd fixed escaping for MeSh encoder
blanchet
parents: 50826
diff changeset
   300
        val mesh_line = encode_str name ^ ": " ^ encode_strs mesh_suggs ^ "\n"
50814
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
   301
      in File.append mesh_path mesh_line end
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57005
diff changeset
   302
50814
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
   303
    val mash_lines = Path.explode mash_file_name |> File.read_lines
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
   304
    val mepo_lines = Path.explode mepo_file_name |> File.read_lines
50907
a86708897266 graceful failure
blanchet
parents: 50906
diff changeset
   305
  in
a86708897266 graceful failure
blanchet
parents: 50906
diff changeset
   306
    if length mash_lines = length mepo_lines then
a86708897266 graceful failure
blanchet
parents: 50906
diff changeset
   307
      List.app do_fact (mash_lines ~~ mepo_lines)
a86708897266 graceful failure
blanchet
parents: 50906
diff changeset
   308
    else
a86708897266 graceful failure
blanchet
parents: 50906
diff changeset
   309
      warning "Skipped: MaSh file missing or out of sync with MePo file."
a86708897266 graceful failure
blanchet
parents: 50906
diff changeset
   310
  end
50814
4247cbd78aaf export MeSh data as well
blanchet
parents: 50754
diff changeset
   311
48234
06216c789ac9 moved MaSh into own files
blanchet
parents:
diff changeset
   312
end;