src/HOL/TPTP/TPTP_Proof_Reconstruction_Test.thy
author wenzelm
Sun Sep 18 20:33:48 2016 +0200 (2016-09-18)
changeset 63915 bab633745c7f
parent 63167 0909deb8059b
child 65999 ee4cf96a9406
permissions -rw-r--r--
tuned proofs;
wenzelm@59720
     1
(*  Title:      HOL/TPTP/TPTP_Proof_Reconstruction_Test.thy
sultana@55596
     2
    Author:     Nik Sultana, Cambridge University Computer Laboratory
sultana@55596
     3
sultana@55596
     4
Various tests for the proof reconstruction module.
sultana@55596
     5
sultana@55596
     6
NOTE
sultana@55596
     7
  - looks for THF proofs in the path indicated by $THF_PROOFS
sultana@55597
     8
  - these proofs are generated using LEO-II with the following
sultana@55597
     9
    configuration choices: -po 1 -nux -nuc --expand_extuni
sultana@55597
    10
    You can simply filter LEO-II's output using the filter_proof
sultana@55597
    11
    script which is distributed with LEO-II.
sultana@55596
    12
*)
sultana@55596
    13
sultana@55596
    14
theory TPTP_Proof_Reconstruction_Test
sultana@55596
    15
imports TPTP_Test TPTP_Proof_Reconstruction
sultana@55596
    16
begin
sultana@55596
    17
sultana@55597
    18
section "Main function"
sultana@55597
    19
text "This function wraps all the reconstruction-related functions. Simply
sultana@55597
    20
 point it at a THF proof produced by LEO-II (using the configuration described
sultana@55597
    21
 above), and it will try to reconstruct it into an Isabelle/HOL theorem.
sultana@55597
    22
 It also returns the theory (into which the axioms and definitions used in the
sultana@55597
    23
 proof have been imported), but note that you can get the theory value from
sultana@55597
    24
 the theorem value."
sultana@55597
    25
wenzelm@63167
    26
ML \<open>
sultana@55597
    27
  reconstruct_leo2 (Path.explode "$THF_PROOFS/NUM667^1.p.out") @{theory}
wenzelm@63167
    28
\<close>
sultana@55597
    29
sultana@55597
    30
section "Prep for testing the component functions"
sultana@55597
    31
sultana@55596
    32
declare [[
sultana@55596
    33
  tptp_trace_reconstruction = false,
sultana@55596
    34
  tptp_test_all = false,
sultana@55596
    35
  (* tptp_test_all = true, *)
sultana@55596
    36
  tptp_test_timeout = 30,
sultana@55596
    37
  (* tptp_max_term_size = 200 *)
sultana@55596
    38
  tptp_max_term_size = 0
sultana@55596
    39
]]
sultana@55596
    40
wenzelm@60926
    41
declare [[ML_exception_trace, ML_print_depth = 200]]
sultana@55596
    42
sultana@55596
    43
sultana@55596
    44
section "Importing proofs"
sultana@55596
    45
wenzelm@63167
    46
ML \<open>
sultana@55596
    47
val probs =
sultana@55596
    48
  (* "$THF_PROOFS/SYN991^1.p.out" *) (*lacks conjecture*)
sultana@55596
    49
  (* "$THF_PROOFS/SYO040^2.p.out" *)
sultana@55596
    50
  (* "$THF_PROOFS/NUM640^1.p.out" *)
sultana@55596
    51
  (* "$THF_PROOFS/SEU553^2.p.out" *)
sultana@55596
    52
  (* "$THF_PROOFS/NUM665^1.p.out" *)
sultana@55596
    53
  (* "$THF_PROOFS/SEV161^5.p.out" *)
sultana@55596
    54
  (* "$THF_PROOFS/SET014^4.p.out" *)
sultana@55596
    55
  "$THF_PROOFS/NUM667^1.p.out"
sultana@55596
    56
  |> Path.explode
sultana@55596
    57
  |> single
sultana@55596
    58
sultana@55596
    59
val prob_names =
sultana@55596
    60
  probs
sultana@55596
    61
  |> map (Path.base #> Path.implode #> TPTP_Problem_Name.Nonstandard)
wenzelm@63167
    62
\<close>
sultana@55596
    63
wenzelm@63167
    64
setup \<open>
sultana@55596
    65
  if test_all @{context} then I
sultana@55596
    66
  else
sultana@55596
    67
    fold
sultana@55596
    68
     (fn path =>
sultana@55596
    69
       TPTP_Reconstruct.import_thm true [Path.dir path, Path.explode "$THF_PROOFS"] path leo2_on_load)
sultana@55596
    70
     probs
wenzelm@63167
    71
\<close>
sultana@55596
    72
sultana@55596
    73
text "Display nicely."
wenzelm@63167
    74
ML \<open>
sultana@55596
    75
fun display_nicely ctxt (fms : TPTP_Reconstruct.formula_meaning list) =
sultana@55596
    76
  List.app (fn ((n, data) : TPTP_Reconstruct.formula_meaning) =>
sultana@55596
    77
    Pretty.writeln
sultana@55596
    78
      (Pretty.block
sultana@55596
    79
        [Pretty.str (n ^ " "),
sultana@55596
    80
         Syntax.pretty_term ctxt (#fmla data),
sultana@55596
    81
         Pretty.str (
sultana@55596
    82
          if is_none (#source_inf_opt data) then ""
sultana@55596
    83
          else ("\n\tannotation: " ^
wenzelm@60925
    84
           @{make_string} (the (#source_inf_opt data : TPTP_Proof.source_info option))))])
sultana@55596
    85
    ) (rev fms);
sultana@55596
    86
sultana@55596
    87
(*FIXME hack for testing*)
sultana@55596
    88
fun test_fmla thy =
sultana@55596
    89
  TPTP_Reconstruct.get_fmlas_of_prob thy (hd prob_names);
sultana@55596
    90
sultana@55596
    91
fun test_pannot thy =
sultana@55596
    92
  TPTP_Reconstruct.get_pannot_of_prob thy (hd prob_names);
sultana@55596
    93
sultana@55596
    94
if test_all @{context} orelse prob_names = [] then ()
sultana@55596
    95
else
sultana@55596
    96
  display_nicely @{context}
sultana@55596
    97
  (#meta (test_pannot @{theory}))
sultana@55596
    98
(* To look at the original proof (i.e., before the proof transformations applied
sultana@55596
    99
   when the proof is loaded) replace previous line with:
sultana@55596
   100
   (test_fmla @{theory}
sultana@55596
   101
    |> map TPTP_Reconstruct.structure_fmla_meaning)
sultana@55596
   102
*)
wenzelm@63167
   103
\<close>
sultana@55596
   104
wenzelm@63167
   105
ML \<open>
sultana@55596
   106
fun step_range_tester f_x f_exn ctxt prob_name from until =
sultana@55596
   107
  let
sultana@55596
   108
    val max =
sultana@55596
   109
      case until of
sultana@55596
   110
          SOME x => x
sultana@55596
   111
        | NONE =>
sultana@55596
   112
            if is_some Int.maxInt then the Int.maxInt else 999999
sultana@55596
   113
    fun test_step x =
sultana@55596
   114
      if x > max then ()
sultana@55596
   115
      else
sultana@55596
   116
        (f_x x;
sultana@55596
   117
         (interpret_leo2_inference ctxt prob_name (Int.toString x); ())
sultana@55596
   118
         handle e => f_exn e; (*FIXME naive. should let Interrupt through*)
sultana@55596
   119
         (*assumes that inferences are numbered consecutively*)
sultana@55596
   120
         test_step (x + 1))
sultana@55596
   121
  in
sultana@55596
   122
    test_step from
sultana@55596
   123
  end
sultana@55596
   124
sultana@55596
   125
val step_range_tester_tracing =
sultana@55596
   126
  step_range_tester
sultana@55596
   127
   (fn x => tracing ("@step " ^ Int.toString x))
wenzelm@60925
   128
   (fn e => tracing ("!!" ^ @{make_string} e))
wenzelm@63167
   129
\<close>
sultana@55596
   130
wenzelm@63167
   131
ML \<open>
sultana@55596
   132
(*try to reconstruct each inference step*)
sultana@55596
   133
if test_all @{context} orelse prob_names = []
sultana@55596
   134
orelse true (*NOTE currently disabled*)
sultana@55596
   135
then ()
sultana@55596
   136
else
sultana@55596
   137
  let
sultana@55596
   138
    (*FIXME not guaranteed to be the right nodes*)
sultana@55596
   139
    val heur_start = 3
sultana@55596
   140
    val heur_end =
sultana@55596
   141
      hd (#meta (test_pannot @{theory}))
sultana@55596
   142
      |> #1
sultana@55596
   143
      |> Int.fromString
sultana@55596
   144
  in
sultana@55596
   145
    step_range_tester_tracing @{context} (hd prob_names) heur_start heur_end
sultana@55596
   146
  end
wenzelm@63167
   147
\<close>
sultana@55596
   148
sultana@55596
   149
sultana@55596
   150
section "Building metadata and tactics"
sultana@55596
   151
sultana@55596
   152
subsection "Building the skeleton"
wenzelm@63167
   153
ML \<open>
sultana@55596
   154
if test_all @{context} orelse prob_names = [] then []
sultana@55596
   155
else TPTP_Reconstruct.make_skeleton @{context} (test_pannot @{theory});
sultana@55596
   156
sultana@55596
   157
length it
wenzelm@63167
   158
\<close>
sultana@55596
   159
sultana@55596
   160
sultana@55596
   161
subsection "The 'one shot' tactic approach"
wenzelm@63167
   162
ML \<open>
sultana@55596
   163
val the_tactic =
sultana@55596
   164
  if test_all @{context} then []
sultana@55596
   165
  else
sultana@55596
   166
    map (fn prob_name =>
sultana@55596
   167
      (TPTP_Reconstruct.naive_reconstruct_tac @{context} interpret_leo2_inference (* auto_based_reconstruction_tac *) (* oracle_based_reconstruction_tac *) prob_name))
sultana@55596
   168
     prob_names;
wenzelm@63167
   169
\<close>
sultana@55596
   170
sultana@55596
   171
sultana@55596
   172
subsection "The 'piecemeal' approach"
wenzelm@63167
   173
ML \<open>
sultana@55596
   174
val the_tactics =
sultana@55596
   175
  if test_all @{context} then []
sultana@55596
   176
  else
sultana@55596
   177
    map (fn prob_name =>
sultana@55596
   178
      TPTP_Reconstruct.naive_reconstruct_tacs interpret_leo2_inference (* auto_based_reconstruction_tac *) (* oracle_based_reconstruction_tac *) prob_name @{context})
sultana@55596
   179
     prob_names;
wenzelm@63167
   180
\<close>
sultana@55596
   181
wenzelm@56281
   182
declare [[ML_print_depth = 2000]]
wenzelm@56281
   183
wenzelm@63167
   184
ML \<open>
sultana@55596
   185
the_tactics
sultana@55596
   186
|> map (filter (fn (_, _, x) => is_none x)
sultana@55596
   187
        #> map (fn (x, SOME y, _) => (x, cterm_of @{theory} y)))
wenzelm@63167
   188
\<close>
sultana@55596
   189
sultana@55596
   190
sultana@55596
   191
section "Using metadata and tactics"
sultana@55596
   192
text "There are various ways of testing the two ways (whole tactics or lists of tactics) of representing 'reconstructors'."
sultana@55596
   193
sultana@55596
   194
sultana@55596
   195
subsection "The 'one shot' tactic approach"
sultana@55596
   196
text "First we test whole tactics."
wenzelm@63167
   197
ML \<open>
sultana@55596
   198
(*produce thm*)
sultana@55596
   199
if test_all @{context} then []
sultana@55596
   200
else
sultana@55596
   201
  map (
sultana@55596
   202
    (* try *) (TPTP_Reconstruct.reconstruct @{context}
sultana@55596
   203
     (fn prob_name =>
sultana@55596
   204
       TPTP_Reconstruct.naive_reconstruct_tac @{context} interpret_leo2_inference prob_name
sultana@55596
   205
     (* oracle_based_reconstruction_tac *))))
sultana@55596
   206
   prob_names
wenzelm@63167
   207
\<close>
sultana@55596
   208
sultana@55596
   209
sultana@55596
   210
subsection "The 'piecemeal' approach"
wenzelm@63167
   211
ML \<open>
sultana@55596
   212
fun attac n = List.nth (List.nth (the_tactics, 0), n) |> #3 |> the |> snd
sultana@55596
   213
fun attac_to n 0 = attac n
sultana@55596
   214
  | attac_to n m = attac n THEN attac_to (n + 1) (m - 1)
sultana@55596
   215
fun shotac n = List.nth (List.nth (the_tactics, 0), n) |> #3 |> the |> fst
wenzelm@63167
   216
\<close>
sultana@55596
   217
wenzelm@63167
   218
ML \<open>
sultana@55596
   219
(*Given a list of reconstructed inferences (as in "the_tactics" above,
sultana@55596
   220
  count the number of failures and successes, and list the failed inference
sultana@55596
   221
  reconstructions.*)
sultana@55596
   222
fun evaluate_the_tactics [] acc = acc
sultana@55596
   223
  | evaluate_the_tactics ((node_no, (inf_name, inf_fmla, NONE)) :: xs) ((fai, suc), inf_list) =
sultana@55596
   224
      let
sultana@55596
   225
        val score = (fai + 1, suc)
sultana@55596
   226
        val index_info = get_index (fn (x, _) => if x = node_no then SOME true else NONE) inf_list
sultana@55596
   227
        val inf_list' =
sultana@55596
   228
          case index_info of
sultana@55596
   229
              NONE => (node_no, (inf_name, inf_fmla, 1)) :: inf_list
sultana@55596
   230
            | SOME (idx, _) =>
sultana@55596
   231
                nth_map idx (fn (node_no, (inf_name, inf_fmla, count)) => (node_no, (inf_name, inf_fmla, count + 1))) inf_list
sultana@55596
   232
      in
sultana@55596
   233
        evaluate_the_tactics xs (score, inf_list')
sultana@55596
   234
      end
sultana@55596
   235
  | evaluate_the_tactics ((_, (_, _, SOME _)) :: xs) ((fai, suc), inf_list) =
sultana@55596
   236
      evaluate_the_tactics xs ((fai, suc + 1), inf_list)
wenzelm@63167
   237
\<close>
sultana@55596
   238
sultana@55596
   239
sultana@55596
   240
text "Now we build a tactic by combining lists of tactics"
wenzelm@63167
   241
ML \<open>
sultana@55596
   242
(*given a list of tactics to be applied in sequence (i.e., they
sultana@55596
   243
  follow a skeleton), we build a single tactic, interleaving
sultana@55596
   244
  some tracing info to help with debugging.*)
sultana@57772
   245
fun step_by_step_tacs ctxt verbose (thm_tacs : (thm * tactic) list) : tactic =
sultana@55596
   246
    let
sultana@55596
   247
      fun interleave_tacs [] [] = all_tac
sultana@55596
   248
        | interleave_tacs (tac1 :: tacs1) (tac2 :: tacs2) =
sultana@55596
   249
            EVERY [tac1, tac2]
sultana@55596
   250
            THEN interleave_tacs tacs1 tacs2
sultana@55596
   251
      val thms_to_traceprint =
sultana@55596
   252
        map (fn thm => fn st =>
sultana@55596
   253
              (*FIXME uses makestring*)
wenzelm@60925
   254
              print_tac ctxt (@{make_string} thm) st)
sultana@55596
   255
sultana@55596
   256
    in
sultana@55596
   257
      if verbose then
sultana@55596
   258
        ListPair.unzip thm_tacs
sultana@55596
   259
        |> apfst (fn thms => enumerate 1 thms)
sultana@55596
   260
        |> apfst thms_to_traceprint
sultana@55596
   261
        |> uncurry interleave_tacs
sultana@55596
   262
      else EVERY (map #2 thm_tacs)
sultana@55596
   263
    end
wenzelm@63167
   264
\<close>
sultana@55596
   265
wenzelm@63167
   266
ML \<open>
sultana@55596
   267
(*apply step_by_step_tacs to all problems under test*)
sultana@57772
   268
fun narrated_tactics ctxt =
sultana@55596
   269
 map (map (#3 #> the)
sultana@57772
   270
      #> step_by_step_tacs ctxt false)
sultana@55596
   271
   the_tactics;
sultana@55596
   272
sultana@55596
   273
(*produce thm*)
sultana@55596
   274
(*use narrated_tactics to reconstruct all problems under test*)
sultana@55596
   275
if test_all @{context} then []
sultana@55596
   276
else
sultana@55596
   277
  map (fn (prob_name, tac) =>
sultana@55596
   278
         TPTP_Reconstruct.reconstruct @{context}
sultana@55596
   279
           (fn _ => tac) prob_name)
sultana@57772
   280
    (ListPair.zip (prob_names, narrated_tactics @{context}))
wenzelm@63167
   281
\<close>
sultana@55596
   282
sultana@55596
   283
sultana@55596
   284
subsection "Manually using 'piecemeal' approach"
sultana@55596
   285
text "Another testing possibility involves manually creating a lemma
sultana@55596
   286
and running through the list of tactics generating to prove that lemma. The following code shows the goal of each problem under test, and then for each problem returns the list of tactics which can be invoked individually as shown below."
wenzelm@63167
   287
ML \<open>
sultana@55596
   288
fun show_goal ctxt prob_name =
sultana@55596
   289
  let
sultana@55596
   290
    val thy = Proof_Context.theory_of ctxt
sultana@55596
   291
    val pannot = TPTP_Reconstruct.get_pannot_of_prob thy prob_name
sultana@55596
   292
  in
sultana@55596
   293
    #meta pannot
wenzelm@58412
   294
    |> filter (fn (_, info) =>
sultana@55596
   295
        #role info = TPTP_Syntax.Role_Conjecture)
sultana@55596
   296
    |> hd
sultana@55596
   297
    |> snd |> #fmla
sultana@55596
   298
    |> cterm_of thy
sultana@55596
   299
  end;
sultana@55596
   300
sultana@55596
   301
if test_all @{context} then []
sultana@55596
   302
else
sultana@55596
   303
  map (show_goal @{context}) prob_names;
wenzelm@63167
   304
\<close>
sultana@55596
   305
wenzelm@63167
   306
ML \<open>
sultana@55596
   307
(*project out the list of tactics from "the_tactics"*)
sultana@55596
   308
val just_the_tacs  =
sultana@55596
   309
 map (map (#3 #> the #> #2))
sultana@55596
   310
   the_tactics;
sultana@55596
   311
sultana@55596
   312
map length just_the_tacs
wenzelm@63167
   313
\<close>
sultana@55596
   314
wenzelm@63167
   315
ML \<open>
sultana@55596
   316
(*like just_the_tacs, but extract the thms, to inspect their thys*)
sultana@55596
   317
val just_the_thms  =
sultana@55596
   318
 map (map (#3 #> the #> #1))
sultana@55596
   319
   the_tactics;
sultana@55596
   320
sultana@55596
   321
map length just_the_thms;
wenzelm@63167
   322
\<close>
sultana@55596
   323
wenzelm@63167
   324
ML \<open>
wenzelm@60649
   325
val axms_of_thy =
wenzelm@60649
   326
  `Theory.axioms_of
sultana@55596
   327
  #> apsnd cterm_of
sultana@55596
   328
  #> swap
sultana@55596
   329
  #> apsnd (map snd)
sultana@55596
   330
  #> uncurry map
wenzelm@63167
   331
\<close>
sultana@55596
   332
wenzelm@63167
   333
ML \<open>
sultana@55596
   334
(*Show the skeleton-level inference which is done by each element of just_the_tacs. This is useful when debugging using the technique shown next*)
sultana@55596
   335
if test_all @{context} orelse prob_names = [] then ()
sultana@55596
   336
else
sultana@55596
   337
  the_tactics
sultana@55596
   338
  |> hd
sultana@55596
   339
  |> map #1
sultana@55596
   340
  |> TPTP_Reconstruct_Library.enumerate 0
wenzelm@60925
   341
  |> List.app (@{make_string} #> writeln)
wenzelm@63167
   342
\<close>
sultana@55596
   343
wenzelm@63167
   344
ML \<open>
wenzelm@60649
   345
fun leo2_tac_wrap ctxt prob_name step i st =
wenzelm@60649
   346
  rtac (interpret_leo2_inference ctxt prob_name step) i st
wenzelm@63167
   347
\<close>
sultana@55596
   348
sultana@55596
   349
(*FIXME move these examples elsewhere*)
sultana@55596
   350
(*
wenzelm@61076
   351
lemma "\<forall>(Xj::TPTP_Interpret.ind) Xk::TPTP_Interpret.ind.
sultana@55596
   352
        bnd_cCKB6_BLACK Xj Xk \<longrightarrow>
sultana@55596
   353
        bnd_cCKB6_BLACK (bnd_s (bnd_s (bnd_s Xj))) (bnd_s Xk)"
sultana@55596
   354
apply (tactic {*nth (nth just_the_tacs 0) 0*})
sultana@55596
   355
apply (tactic {*nth (nth just_the_tacs 0) 1*})
sultana@55596
   356
apply (tactic {*nth (nth just_the_tacs 0) 2*})
sultana@55596
   357
apply (tactic {*nth (nth just_the_tacs 0) 3*})
sultana@55596
   358
apply (tactic {*nth (nth just_the_tacs 0) 4*})
sultana@55596
   359
apply (tactic {*nth (nth just_the_tacs 0) 5*})
sultana@55596
   360
ML_prf "nth (hd the_tactics) 6"
sultana@55596
   361
apply (tactic {*nth (nth just_the_tacs 0) 6*})
sultana@55596
   362
apply (tactic {*nth (nth just_the_tacs 0) 7*})
sultana@55596
   363
apply (tactic {*nth (nth just_the_tacs 0) 8*})
sultana@55596
   364
apply (tactic {*nth (nth just_the_tacs 0) 9*})
sultana@55596
   365
apply (tactic {*nth (nth just_the_tacs 0) 10*})
sultana@55596
   366
apply (tactic {*nth (nth just_the_tacs 0) 11*})
sultana@55596
   367
apply (tactic {*nth (nth just_the_tacs 0) 12*})
sultana@55596
   368
apply (tactic {*nth (nth just_the_tacs 0) 13*})
sultana@55596
   369
apply (tactic {*nth (nth just_the_tacs 0) 14*})
sultana@55596
   370
apply (tactic {*nth (nth just_the_tacs 0) 15*})
sultana@55596
   371
sultana@55596
   372
apply (tactic {*nth (nth just_the_tacs 0) 16*})
sultana@55596
   373
sultana@55596
   374
(*
sultana@55596
   375
apply (tactic {*
sultana@55596
   376
rtac (interpret_leo2_inference @{context} (hd prob_names) "8") 1
sultana@55596
   377
*})
sultana@55596
   378
apply (tactic {*
sultana@55596
   379
rtac (interpret_leo2_inference @{context} (hd prob_names) "7") 1
sultana@55596
   380
*})
sultana@55596
   381
apply (tactic {*
sultana@55596
   382
rtac (interpret_leo2_inference @{context} (hd prob_names) "6") 1
sultana@55596
   383
*})
sultana@55596
   384
(*
sultana@55596
   385
apply (tactic {*
sultana@55596
   386
rtac (interpret_leo2_inference @{context} (hd prob_names) "4") 1
sultana@55596
   387
*})
sultana@55596
   388
*)
sultana@55596
   389
*)
sultana@55596
   390
sultana@55596
   391
apply (tactic {*nth (nth just_the_tacs 0) 17*})
sultana@55596
   392
apply (tactic {*nth (nth just_the_tacs 0) 18*})
sultana@55596
   393
apply (tactic {*nth (nth just_the_tacs 0) 19*})
sultana@55596
   394
apply (tactic {*nth (nth just_the_tacs 0) 20*})
sultana@55596
   395
apply (tactic {*nth (nth just_the_tacs 0) 21*})
sultana@55596
   396
sultana@55596
   397
ML_prf "nth (hd the_tactics) 21"
sultana@55596
   398
ML_prf "nth (hd the_tactics) 22"
sultana@55596
   399
sultana@55596
   400
apply (tactic {*nth (nth just_the_tacs 0) 22*})
sultana@55596
   401
apply (tactic {*nth (nth just_the_tacs 0) 23*})
sultana@55596
   402
apply (tactic {*nth (nth just_the_tacs 0) 24*})
sultana@55596
   403
apply (tactic {*nth (nth just_the_tacs 0) 25*})
sultana@55596
   404
sultana@55596
   405
sultana@55596
   406
ML_prf "nth (hd the_tactics) 19"
sultana@55596
   407
sultana@55596
   408
apply (tactic {*
sultana@55596
   409
interpret_leo2_inference_wrap (hd prob_names) "8" 1
sultana@55596
   410
*})
sultana@55596
   411
apply (tactic {*
sultana@55596
   412
interpret_leo2_inference_wrap (hd prob_names) "7" 1
sultana@55596
   413
*})
sultana@55596
   414
apply (tactic {*
sultana@55596
   415
interpret_leo2_inference_wrap (hd prob_names) "6" 1
sultana@55596
   416
*})
sultana@55596
   417
apply (tactic {*
sultana@55596
   418
interpret_leo2_inference_wrap (hd prob_names) "4" 1
sultana@55596
   419
*})
sultana@55596
   420
sultana@55596
   421
ML_prf "nth (hd the_tactics) 20"
sultana@55596
   422
ML_prf "nth (hd the_tactics) 21"
sultana@55596
   423
ML_prf "nth (hd the_tactics) 22"
sultana@55596
   424
*)
sultana@55596
   425
sultana@55596
   426
(*
sultana@55596
   427
lemma "bnd_powersetE1 \<longrightarrow>
sultana@55596
   428
     bnd_sepInPowerset \<longrightarrow>
sultana@55596
   429
     (\<forall>A Xphi. bnd_subset (bnd_dsetconstr A Xphi) A)"
sultana@55596
   430
apply (tactic {*nth (nth just_the_tacs 0) 0*})
sultana@55596
   431
apply (tactic {*nth (nth just_the_tacs 0) 1*})
sultana@55596
   432
apply (tactic {*nth (nth just_the_tacs 0) 2*})
sultana@55596
   433
apply (tactic {*nth (nth just_the_tacs 0) 3*})
sultana@55596
   434
apply (tactic {*nth (nth just_the_tacs 0) 4*})
sultana@55596
   435
apply (tactic {*nth (nth just_the_tacs 0) 5*})
sultana@55596
   436
ML_prf "nth (hd the_tactics) 6"
sultana@55596
   437
apply (tactic {*nth (nth just_the_tacs 0) 6*})
sultana@55596
   438
apply (tactic {*nth (nth just_the_tacs 0) 7*})
sultana@55596
   439
apply (tactic {*nth (nth just_the_tacs 0) 8*})
sultana@55596
   440
apply (tactic {*nth (nth just_the_tacs 0) 9*})
sultana@55596
   441
apply (tactic {*nth (nth just_the_tacs 0) 10*})
sultana@55596
   442
apply (tactic {*nth (nth just_the_tacs 0) 11*})
sultana@55596
   443
apply (tactic {*nth (nth just_the_tacs 0) 12*})
sultana@55596
   444
apply (tactic {*nth (nth just_the_tacs 0) 13*})
sultana@55596
   445
apply (tactic {*nth (nth just_the_tacs 0) 14*})
sultana@55596
   446
apply (tactic {*nth (nth just_the_tacs 0) 15*})
sultana@55596
   447
apply (tactic {*nth (nth just_the_tacs 0) 16*})
sultana@55596
   448
apply (tactic {*nth (nth just_the_tacs 0) 17*})
sultana@55596
   449
apply (tactic {*nth (nth just_the_tacs 0) 18*})
sultana@55596
   450
apply (tactic {*nth (nth just_the_tacs 0) 19*})
sultana@55596
   451
apply (tactic {*nth (nth just_the_tacs 0) 20*})
sultana@55596
   452
apply (tactic {*nth (nth just_the_tacs 0) 21*})
sultana@55596
   453
apply (tactic {*nth (nth just_the_tacs 0) 22*})
sultana@55596
   454
apply (tactic {*nth (nth just_the_tacs 0) 23*})
sultana@55596
   455
apply (tactic {*nth (nth just_the_tacs 0) 24*})
sultana@55596
   456
apply (tactic {*nth (nth just_the_tacs 0) 25*})
sultana@55596
   457
(* apply (tactic {*nth (nth just_the_tacs 0) 26*}) *)
sultana@55596
   458
ML_prf "nth (hd the_tactics) 26"
sultana@55596
   459
apply (subgoal_tac "(\<not> (\<forall>A Xphi. bnd_subset (bnd_dsetconstr A Xphi) A)) =
sultana@55596
   460
       True \<Longrightarrow>
sultana@55596
   461
       (\<not> bnd_subset (bnd_dsetconstr bnd_sK1 bnd_sK2) bnd_sK1) =
sultana@55596
   462
       True")
sultana@55596
   463
prefer 2
sultana@55596
   464
apply (thin_tac "(bnd_powersetE1 \<longrightarrow>
sultana@55596
   465
      bnd_sepInPowerset \<longrightarrow>
sultana@55596
   466
      (\<forall>A Xphi. bnd_subset (bnd_dsetconstr A Xphi) A)) =
sultana@55596
   467
     False")
sultana@55596
   468
apply (tactic {*extcnf_combined_simulator_tac (hd prob_names) 1*})
sultana@55596
   469
apply (tactic {*extcnf_combined_simulator_tac (hd prob_names) 1*})
sultana@55596
   470
apply (tactic {*extcnf_combined_simulator_tac (hd prob_names) 1*})
sultana@55596
   471
apply (tactic {*extcnf_combined_simulator_tac (hd prob_names) 1*})
sultana@55596
   472
sultana@55596
   473
apply simp
sultana@55596
   474
sultana@55596
   475
(* apply (tactic {*nth (nth just_the_tacs 0) 26*}) *)
sultana@55596
   476
apply (tactic {*nth (nth just_the_tacs 0) 27*})
sultana@55596
   477
apply (tactic {*nth (nth just_the_tacs 0) 28*})
sultana@55596
   478
apply (tactic {*nth (nth just_the_tacs 0) 29*})
sultana@55596
   479
apply (tactic {*nth (nth just_the_tacs 0) 30*})
sultana@55596
   480
apply (tactic {*nth (nth just_the_tacs 0) 31*})
sultana@55596
   481
apply (tactic {*nth (nth just_the_tacs 0) 32*})
sultana@55596
   482
apply (tactic {*nth (nth just_the_tacs 0) 33*})
sultana@55596
   483
apply (tactic {*nth (nth just_the_tacs 0) 34*})
sultana@55596
   484
apply (tactic {*nth (nth just_the_tacs 0) 35*})
sultana@55596
   485
apply (tactic {*nth (nth just_the_tacs 0) 36*})
sultana@55596
   486
apply (tactic {*nth (nth just_the_tacs 0) 37*})
sultana@55596
   487
apply (tactic {*nth (nth just_the_tacs 0) 38*})
sultana@55596
   488
apply (tactic {*nth (nth just_the_tacs 0) 39*})
sultana@55596
   489
apply (tactic {*nth (nth just_the_tacs 0) 40*})
sultana@55596
   490
apply (tactic {*nth (nth just_the_tacs 0) 41*})
sultana@55596
   491
apply (tactic {*nth (nth just_the_tacs 0) 42*})
sultana@55596
   492
apply (tactic {*nth (nth just_the_tacs 0) 43*})
sultana@55596
   493
apply (tactic {*nth (nth just_the_tacs 0) 44*})
sultana@55596
   494
apply (tactic {*nth (nth just_the_tacs 0) 45*})
sultana@55596
   495
apply (tactic {*nth (nth just_the_tacs 0) 46*})
sultana@55596
   496
apply (tactic {*nth (nth just_the_tacs 0) 47*})
sultana@55596
   497
apply (tactic {*nth (nth just_the_tacs 0) 48*})
sultana@55596
   498
apply (tactic {*nth (nth just_the_tacs 0) 49*})
sultana@55596
   499
apply (tactic {*nth (nth just_the_tacs 0) 50*})
sultana@55596
   500
apply (tactic {*nth (nth just_the_tacs 0) 51*})
sultana@55596
   501
done
sultana@55596
   502
*)
sultana@55596
   503
sultana@55596
   504
(*
sultana@55596
   505
We can use just_the_tacs as follows:
sultana@55596
   506
sultana@55596
   507
(this is from SEV012^5.p.out)
sultana@55596
   508
lemma "((\<forall>(Xx :: bool) (Xy :: bool). True \<longrightarrow> True) \<and>
sultana@55596
   509
      (\<forall>(Xx :: bool) (Xy :: bool) (Xz :: bool). True \<and> True \<longrightarrow> True)) \<and>
sultana@55596
   510
     (\<lambda>(Xx :: bool) (Xy :: bool). True) = (\<lambda>Xx Xy. True)"
sultana@55596
   511
apply (tactic {*nth (nth just_the_tacs 0) 0*})
sultana@55596
   512
apply (tactic {*nth (nth just_the_tacs 0) 1*})
sultana@55596
   513
apply (tactic {*nth (nth just_the_tacs 0) 2*})
sultana@55596
   514
apply (tactic {*nth (nth just_the_tacs 0) 3*})
sultana@55596
   515
apply (tactic {*nth (nth just_the_tacs 0) 4*})
sultana@55596
   516
apply (tactic {*nth (nth just_the_tacs 0) 5*})
sultana@55596
   517
ML_prf "nth (hd the_tactics) 6"
sultana@55596
   518
apply (tactic {*nth (nth just_the_tacs 0) 6*})
sultana@55596
   519
apply (tactic {*nth (nth just_the_tacs 0) 7*})
sultana@55596
   520
apply (tactic {*nth (nth just_the_tacs 0) 8*})
sultana@55596
   521
apply (tactic {*nth (nth just_the_tacs 0) 9*})
sultana@55596
   522
apply (tactic {*nth (nth just_the_tacs 0) 10*})
sultana@55596
   523
apply (tactic {*nth (nth just_the_tacs 0) 11*})
sultana@55596
   524
apply (tactic {*nth (nth just_the_tacs 0) 12*})
sultana@55596
   525
apply (tactic {*nth (nth just_the_tacs 0) 13*})
sultana@55596
   526
apply (tactic {*nth (nth just_the_tacs 0) 14*})
sultana@55596
   527
apply (tactic {*nth (nth just_the_tacs 0) 15*})
sultana@55596
   528
apply (tactic {*nth (nth just_the_tacs 0) 16*})
sultana@55596
   529
apply (tactic {*nth (nth just_the_tacs 0) 17*})
sultana@55596
   530
apply (tactic {*nth (nth just_the_tacs 0) 18*})
sultana@55596
   531
apply (tactic {*nth (nth just_the_tacs 0) 19*})
sultana@55596
   532
apply (tactic {*nth (nth just_the_tacs 0) 20*})
sultana@55596
   533
apply (tactic {*nth (nth just_the_tacs 0) 21*})
sultana@55596
   534
apply (tactic {*nth (nth just_the_tacs 0) 22*})
sultana@55596
   535
done
sultana@55596
   536
sultana@55596
   537
(*
sultana@55596
   538
We could also use previous definitions directly,
sultana@55596
   539
e.g. the following should prove the goal at a go:
sultana@55596
   540
- apply (tactic {*narrated_tactics |> hd |> hd*})
sultana@55596
   541
- apply (tactic {*
sultana@55596
   542
    TPTP_Reconstruct.naive_reconstruct_tac
sultana@55596
   543
     interpret_leo2_inference
sultana@55596
   544
     (hd prob_names)
sultana@55596
   545
     @{context}*})
sultana@55596
   546
(Note that the previous two methods don't work in this
sultana@55596
   547
 "lemma" testing mode, not sure why. The previous methods
sultana@55596
   548
 (producing the thm values directly) should work though.)
sultana@55596
   549
*)
sultana@55596
   550
*)
sultana@55596
   551
sultana@55596
   552
sultana@55596
   553
section "Testing against benchmark"
sultana@55596
   554
wenzelm@63167
   555
ML \<open>
sultana@55596
   556
(*if reconstruction_info value is NONE then a big error must have occurred*)
sultana@55596
   557
type reconstruction_info =
sultana@55596
   558
  ((int(*no of failures*) * int(*no of successes*)) *
sultana@55596
   559
  (TPTP_Reconstruct.rolling_stock * term option(*inference formula*) * int (*number of times the inference occurs in the skeleton*)) list) option
sultana@55596
   560
sultana@55596
   561
datatype proof_contents =
sultana@55596
   562
    No_info
sultana@55596
   563
  | Empty
sultana@55596
   564
  | Nonempty of reconstruction_info
sultana@55596
   565
sultana@55596
   566
(*To make output less cluttered in whole-run tests*)
sultana@55596
   567
fun erase_inference_fmlas (Nonempty (SOME (outline, inf_info))) =
sultana@55596
   568
      Nonempty (SOME (outline, map (fn (inf_name, _, count) => (inf_name, NONE, count)) inf_info))
sultana@55596
   569
  | erase_inference_fmlas x = x
wenzelm@63167
   570
\<close>
sultana@55596
   571
wenzelm@63167
   572
ML \<open>
sultana@55596
   573
(*Report on how many inferences in a proof are reconstructed, and give some
sultana@55596
   574
  info about the inferences for which reconstruction failed.*)
sultana@55596
   575
fun test_partial_reconstruction thy prob_file =
sultana@55596
   576
  let
sultana@55596
   577
    val prob_name =
sultana@55596
   578
      (Path.base #> Path.implode #> TPTP_Problem_Name.Nonstandard) prob_file
sultana@55596
   579
sultana@55596
   580
    val thy' =
sultana@55596
   581
      try
sultana@55596
   582
       (TPTP_Reconstruct.import_thm
sultana@55596
   583
        true
sultana@55596
   584
        [Path.dir prob_file, Path.explode "$TPTP"]
sultana@55596
   585
        prob_file leo2_on_load)
sultana@55596
   586
       thy
sultana@55596
   587
sultana@55596
   588
    val ctxt' =
sultana@55596
   589
      if is_some thy' then SOME (Proof_Context.init_global (the thy')) else NONE
sultana@55596
   590
sultana@55596
   591
    (*to test if proof is empty*)
sultana@55596
   592
    val fms =
sultana@55596
   593
      if is_some thy'
sultana@55596
   594
      then SOME (TPTP_Reconstruct.get_fmlas_of_prob (the thy') prob_name)
sultana@55596
   595
      else NONE
sultana@55596
   596
sultana@55596
   597
    val the_tactics =
sultana@55596
   598
      if is_some thy' then
sultana@55596
   599
        SOME (TPTP_Reconstruct.naive_reconstruct_tacs (* metis_based_reconstruction_tac *)
sultana@55596
   600
interpret_leo2_inference (* auto_based_reconstruction_tac *) (* oracle_based_reconstruction_tac *) prob_name (the ctxt'))
sultana@55596
   601
      else NONE
sultana@55596
   602
wenzelm@60925
   603
(* val _ = tracing ("tt=" ^ @{make_string} the_tactics) *)
sultana@55596
   604
sultana@55596
   605
    val skeleton =
sultana@55596
   606
      if is_some thy' then
sultana@55596
   607
        SOME (TPTP_Reconstruct.make_skeleton (the ctxt')
sultana@55596
   608
              (TPTP_Reconstruct.get_pannot_of_prob (the thy') prob_name))
sultana@55596
   609
      else NONE
sultana@55596
   610
sultana@55596
   611
    val skeleton_and_tactics =
sultana@55596
   612
      if is_some thy' then
sultana@55596
   613
        SOME (ListPair.zip (the skeleton, the the_tactics))
sultana@55596
   614
      else NONE
sultana@55596
   615
sultana@55596
   616
    val result =
sultana@55596
   617
      if is_some thy' then
sultana@55596
   618
        SOME (evaluate_the_tactics (the skeleton_and_tactics)
sultana@55596
   619
              ((0, 0), []))
sultana@55596
   620
      else NONE
sultana@55596
   621
sultana@55596
   622
    (*strip node names*)
sultana@55596
   623
    val result' =
sultana@55596
   624
      if is_some result then SOME (apsnd (map #2) (the result)) else NONE
sultana@55596
   625
  in
sultana@55596
   626
    if is_some fms andalso List.null (the fms) then Empty
sultana@55596
   627
    else Nonempty result'
sultana@55596
   628
  end
wenzelm@63167
   629
\<close>
sultana@55596
   630
wenzelm@63167
   631
ML \<open>
sultana@55596
   632
  (*default timeout is 1 min*)
sultana@55596
   633
  fun reconstruct timeout light_output file thy =
sultana@55596
   634
    let
sultana@55596
   635
      val timer = Timer.startRealTimer ()
sultana@55596
   636
    in
wenzelm@62519
   637
      Timeout.apply (Time.fromSeconds (if timeout = 0 then 60 else timeout))
sultana@55596
   638
       (test_partial_reconstruction thy
sultana@55596
   639
        #> light_output ? erase_inference_fmlas
wenzelm@60925
   640
        #> @{make_string} (* FIXME *)
wenzelm@60925
   641
        #> (fn s => report (Proof_Context.init_global thy) (@{make_string} file ^ " === " ^ s ^
wenzelm@60925
   642
             " t=" ^ (Timer.checkRealTimer timer |> Time.toMilliseconds |> @{make_string}))))
sultana@55596
   643
       file
sultana@55596
   644
    end
wenzelm@63167
   645
\<close>
sultana@55596
   646
wenzelm@63167
   647
ML \<open>
sultana@55596
   648
  (*this version of "reconstruct" builds theorems, instead of lists of reconstructed inferences*)
sultana@55596
   649
  (*default timeout is 1 min*)
sultana@55596
   650
  fun reconstruct timeout file thy =
sultana@55596
   651
    let
sultana@55596
   652
      val timer = Timer.startRealTimer ()
sultana@55596
   653
      val thy' =
sultana@55596
   654
        TPTP_Reconstruct.import_thm true
sultana@55596
   655
         [Path.dir file, Path.explode "$TPTP"]
sultana@55596
   656
         file leo2_on_load thy
sultana@55596
   657
sultana@55596
   658
      val ctxt = Proof_Context.init_global thy' (*FIXME pass ctxt instead of thy*)
sultana@55596
   659
      val prob_name =
sultana@55596
   660
        file
sultana@55596
   661
        |> Path.base
sultana@55596
   662
        |> Path.implode
sultana@55596
   663
        |> TPTP_Problem_Name.Nonstandard
sultana@55596
   664
    in
wenzelm@62519
   665
      Timeout.apply (Time.fromSeconds (if timeout = 0 then 60 else timeout))
sultana@55596
   666
       (fn prob_name =>
sultana@55596
   667
        (can
sultana@55596
   668
          (TPTP_Reconstruct.reconstruct ctxt (fn prob_name =>
sultana@55596
   669
            TPTP_Reconstruct.naive_reconstruct_tac ctxt interpret_leo2_inference prob_name (* oracle_based_reconstruction_tac *))) prob_name )
sultana@55596
   670
       |> (fn s => report ctxt (Path.print file ^ " === " ^ Bool.toString s ^
wenzelm@60925
   671
             " t=" ^ (Timer.checkRealTimer timer |> Time.toMilliseconds |> @{make_string}))))
sultana@55596
   672
       prob_name
sultana@55596
   673
    end
wenzelm@63167
   674
\<close>
sultana@55596
   675
wenzelm@63167
   676
ML \<open>
sultana@55596
   677
  fun reconstruction_test timeout ctxt =
sultana@55596
   678
    test_fn ctxt
sultana@55596
   679
     (fn file => reconstruct timeout file (Proof_Context.theory_of ctxt))
sultana@55596
   680
     "reconstructor"
sultana@55596
   681
     ()
wenzelm@63167
   682
\<close>
sultana@55596
   683
wenzelm@63167
   684
ML \<open>
sultana@55596
   685
datatype examination_results =
sultana@55596
   686
    Whole_proof of string(*filename*) * proof_contents
sultana@55596
   687
  | Specific_rule of string(*filename*) * string(*inference rule*) * term option list
sultana@55596
   688
sultana@55596
   689
(*Look out for failures reconstructing a particular inference rule*)
sultana@55596
   690
fun filter_failures inference_name (Whole_proof (filename, results)) =
sultana@55596
   691
  let
sultana@55596
   692
    val filtered_results =
sultana@55596
   693
      case results of
sultana@55596
   694
          Nonempty (SOME results') =>
sultana@55596
   695
            #2 results'
wenzelm@58411
   696
            |> maps (fn (stock as TPTP_Reconstruct.Annotated_step (_, inf_name), inf_fmla, _) =>
sultana@55596
   697
                 if inf_name = inference_name then [inf_fmla] else [])
sultana@55596
   698
        | _ => []
sultana@55596
   699
  in Specific_rule (filename, inference_name, filtered_results) end
sultana@55596
   700
sultana@55596
   701
(*Returns detailed info about a proof-reconstruction attempt.
sultana@55596
   702
  If rule_name is specified then the related failed inferences
sultana@55596
   703
  are returned, otherwise all failed inferences are returned.*)
sultana@55596
   704
fun examine_failed_inferences ctxt filename rule_name =
sultana@55596
   705
  let
sultana@55596
   706
    val thy = Proof_Context.theory_of ctxt
sultana@55596
   707
    val prob_file = Path.explode filename
sultana@55596
   708
    val results =
sultana@55596
   709
      if test_all ctxt then No_info
sultana@55596
   710
      else test_partial_reconstruction thy prob_file
sultana@55596
   711
  in
sultana@55596
   712
    Whole_proof (filename, results)
sultana@55596
   713
    |> is_some rule_name ? (fn x =>
sultana@55596
   714
                             filter_failures (the rule_name) x)
sultana@55596
   715
  end
wenzelm@63167
   716
\<close>
sultana@55596
   717
wenzelm@63167
   718
ML \<open>
sultana@55596
   719
exception NONSENSE
sultana@55596
   720
sultana@55596
   721
fun annotation_or_id (TPTP_Reconstruct.Step n) = n
sultana@55596
   722
  | annotation_or_id (TPTP_Reconstruct.Annotated_step (n, anno)) = anno
sultana@55596
   723
  | annotation_or_id TPTP_Reconstruct.Assumed = "assumption"
sultana@55596
   724
  | annotation_or_id TPTP_Reconstruct.Unconjoin = "conjI"
sultana@55596
   725
  | annotation_or_id TPTP_Reconstruct.Caboose = "(end)"
sultana@55596
   726
  | annotation_or_id (TPTP_Reconstruct.Synth_step s) = s
sultana@55596
   727
  | annotation_or_id (TPTP_Reconstruct.Split (split_node, soln_node, _)) = "split_at " ^ split_node ^ " " ^ soln_node;
sultana@55596
   728
sultana@55596
   729
fun count_failures (Whole_proof (_, No_info)) = raise NONSENSE
sultana@55596
   730
  | count_failures (Whole_proof (_, Empty)) = raise NONSENSE
sultana@55596
   731
  | count_failures (Whole_proof (_, Nonempty NONE)) = raise NONSENSE
sultana@55596
   732
  | count_failures (Whole_proof (_, Nonempty (SOME (((n, _), _))))) = n
sultana@55596
   733
  | count_failures (Specific_rule (_, _, t)) = length t
sultana@55596
   734
sultana@55596
   735
fun pre_classify_failures [] alist = alist
sultana@55596
   736
  | pre_classify_failures ((stock, _, _) :: xs) alist =
sultana@55596
   737
      let
sultana@55596
   738
        val inf = annotation_or_id stock
sultana@55596
   739
        val count = AList.lookup (op =) alist inf
sultana@55596
   740
      in
sultana@55596
   741
        if is_none count
sultana@55596
   742
        then pre_classify_failures xs ((inf, 1) :: alist)
sultana@55596
   743
        else
sultana@55596
   744
          pre_classify_failures xs
sultana@55596
   745
           (AList.update (op =) (inf, the count + 1) alist)
sultana@55596
   746
      end
sultana@55596
   747
sultana@55596
   748
fun classify_failures (Whole_proof (_, Nonempty (SOME (((_, _), inferences))))) = pre_classify_failures inferences []
sultana@55596
   749
  | classify_failures (Specific_rule (_, rule, t)) = [(rule, length t)]
sultana@55596
   750
  | classify_failures _ = raise NONSENSE
wenzelm@63167
   751
\<close>
sultana@55596
   752
wenzelm@63167
   753
ML \<open>
sultana@55596
   754
val regressions = map (fn s => "$THF_PROOFS/" ^ s)
sultana@55596
   755
  ["SEV405^5.p.out",
sultana@55596
   756
   (*"SYO377^5.p.out", Always seems to raise Interrupt on my laptop -- probably because node 475 has lots of premises*)
sultana@55596
   757
   "PUZ031^5.p.out",
sultana@55596
   758
   "ALG001^5.p.out",
sultana@55596
   759
   "SYO238^5.p.out",
sultana@55596
   760
   (*"SEV158^5.p.out", This is big*)
sultana@55596
   761
   "SYO285^5.p.out",
sultana@55596
   762
   "../SYO285^5.p.out_reduced",
sultana@55596
   763
   (* "SYO225^5.p.out", This is big*)
sultana@55596
   764
   "SYO291^5.p.out",
sultana@55596
   765
   "SET669^3.p.out",
sultana@55596
   766
   "SEV233^5.p.out",
sultana@55596
   767
   (*"SEU511^1.p.out", This is big*)
sultana@55596
   768
   "SEV161^5.p.out",
sultana@55596
   769
   "SEV012^5.p.out",
sultana@55596
   770
   "SYO035^1.p.out",
sultana@55596
   771
   "SYO291^5.p.out",
sultana@55596
   772
   "SET741^4.p.out", (*involves both definitions and contorted splitting. has nice graph.*)
sultana@55596
   773
   "SEU548^2.p.out",
sultana@55596
   774
   "SEU513^2.p.out",
sultana@55596
   775
   "SYO006^1.p.out",
sultana@55596
   776
   "SYO371^5.p.out" (*has contorted splitting, like SYO006^1.p.out, but doesn't involve definitions*)
sultana@55596
   777
  ]
wenzelm@63167
   778
\<close>
sultana@55596
   779
wenzelm@63167
   780
ML \<open>
sultana@55596
   781
val experiment = examine_failed_inferences @{context}
sultana@55596
   782
  (List.last regressions) NONE;
sultana@55596
   783
sultana@55596
   784
(*
sultana@55596
   785
val experiment_focus =
sultana@55596
   786
  filter_failures "extcnf_combined" experiment;
sultana@55596
   787
*)
sultana@55596
   788
sultana@55596
   789
(*
sultana@55596
   790
count_failures experiment_focus
sultana@55596
   791
classify_failures experiment
sultana@55596
   792
*)
wenzelm@63167
   793
\<close>
sultana@55596
   794
sultana@55596
   795
text "Run reconstruction on all problems in a benchmark (provided via a script)
sultana@55596
   796
and report on partial success."
sultana@55596
   797
sultana@55596
   798
declare [[
sultana@55596
   799
  tptp_test_all = true,
sultana@55596
   800
  tptp_test_timeout = 10
sultana@55596
   801
]]
sultana@55596
   802
wenzelm@63167
   803
ML \<open>
sultana@55596
   804
  (*problem source*)
sultana@55596
   805
  val tptp_probs_dir =
sultana@55596
   806
    Path.explode "$THF_PROOFS"
sultana@55596
   807
    |> Path.expand;
wenzelm@63167
   808
\<close>
sultana@55596
   809
wenzelm@63167
   810
ML \<open>
sultana@55596
   811
  if test_all @{context} then
sultana@55596
   812
    (report @{context} "Reconstructing proofs";
sultana@55596
   813
    S timed_test (reconstruction_test (get_timeout @{context})) @{context} (TPTP_Syntax.get_file_list tptp_probs_dir))
sultana@55596
   814
  else ()
wenzelm@63167
   815
\<close>
sultana@55596
   816
sultana@55596
   817
(*
sultana@55596
   818
Debugging strategy:
sultana@55596
   819
  1) get list of all proofs
sultana@55596
   820
  2) order by size
sultana@55596
   821
  3) try to construct each in turn, given some timeout
sultana@55596
   822
sultana@55596
   823
Use this to find the smallest failure, then debug that.
sultana@55596
   824
*)
nipkow@62390
   825
end