src/HOL/Mirabelle/Tools/mirabelle_sledgehammer.ML
author wenzelm
Sat Apr 02 23:29:05 2016 +0200 (2016-04-02 ago)
changeset 62826 eb94e570c1a4
parent 62738 fe827c6fa8c5
child 62969 9f394a16c557
permissions -rw-r--r--
prefer infix operations;
     1 (*  Title:      HOL/Mirabelle/Tools/mirabelle_sledgehammer.ML
     2     Author:     Jasmin Blanchette and Sascha Boehme and Tobias Nipkow, TU Munich
     3 *)
     4 
     5 structure Mirabelle_Sledgehammer : MIRABELLE_ACTION =
     6 struct
     7 
     8 (*To facilitate synching the description of Mirabelle Sledgehammer parameters
     9  (in ../lib/Tools/mirabelle) with the parameters actually used by this
    10  interface, the former extracts PARAMETER and DESCRIPTION from code below which
    11  has this pattern (provided it appears in a single line):
    12    val .*K = "PARAMETER" (*DESCRIPTION*)
    13 *)
    14 (*NOTE: descriptions mention parameters (particularly NAME) without a defined range.*)
    15 val proverK = "prover" (*=NAME: name of the external prover to call*)
    16 val prover_timeoutK = "prover_timeout" (*=TIME: timeout for invoked ATP (seconds of process time)*)
    17 val keepK = "keep" (*=PATH: path where to keep temporary files created by sledgehammer*)
    18 
    19 val proof_methodK = "proof_method" (*=NAME: how to reconstruct proofs (ie. using metis/smt)*)
    20 
    21 val max_factsK = "max_facts" (*=NUM: max. relevant clauses to use*)
    22 val max_relevantK = "max_relevant" (*=NUM: max. relevant clauses to use*)
    23 val max_callsK = "max_calls" (*=NUM: max. no. of calls to sledgehammer*)
    24 val preplay_timeoutK = "preplay_timeout" (*=TIME: timeout for finding reconstructed proof*)
    25 val isar_proofsK = "isar_proofs" (*: enable Isar proof generation*)
    26 val smt_proofsK = "smt_proofs" (*: enable SMT proof generation*)
    27 val minimizeK = "minimize" (*: instruct sledgehammer to run its minimizer*)
    28 
    29 val check_trivialK = "check_trivial" (*: check if goals are "trivial" (false by default)*)
    30 val fact_filterK = "fact_filter" (*=STRING: fact filter*)
    31 val type_encK = "type_enc" (*=STRING: type encoding scheme*)
    32 val lam_transK = "lam_trans" (*=STRING: lambda translation scheme*)
    33 val strictK = "strict" (*=BOOL: run in strict mode*)
    34 val sliceK = "slice" (*=BOOL: allow sledgehammer-level strategy-scheduling*)
    35 val uncurried_aliasesK = "uncurried_aliases" (*=SMART_BOOL: use fresh function names to alias curried applications*)
    36 val e_selection_heuristicK = "e_selection_heuristic" (*: FIXME*)
    37 val term_orderK = "term_order" (*: FIXME*)
    38 val force_sosK = "force_sos" (*: use SOS*)
    39 val max_new_mono_instancesK = "max_new_mono_instances" (*=NUM: max. new monomorphic instances*)
    40 val max_mono_itersK = "max_mono_iters" (*=NUM: max. iterations of monomorphiser*)
    41 
    42 fun sh_tag id = "#" ^ string_of_int id ^ " sledgehammer: "
    43 fun proof_method_tag meth id = "#" ^ string_of_int id ^ " " ^ (!meth) ^ " (sledgehammer): "
    44 
    45 val separator = "-----"
    46 
    47 (*FIXME sensible to have Mirabelle-level Sledgehammer defaults?*)
    48 (*defaults used in this Mirabelle action*)
    49 val preplay_timeout_default = "1"
    50 val lam_trans_default = "smart"
    51 val uncurried_aliases_default = "smart"
    52 val fact_filter_default = "smart"
    53 val type_enc_default = "smart"
    54 val strict_default = "false"
    55 val max_facts_default = "smart"
    56 val slice_default = "true"
    57 val max_calls_default = "10000000"
    58 val trivial_default = "false"
    59 
    60 (*If a key is present in args then augment a list with its pair*)
    61 (*This is used to avoid fixing default values at the Mirabelle level, and
    62   instead use the default values of the tool (Sledgehammer in this case).*)
    63 fun available_parameter args key label list =
    64   let
    65     val value = AList.lookup (op =) args key
    66   in if is_some value then (label, the value) :: list else list end
    67 
    68 datatype sh_data = ShData of {
    69   calls: int,
    70   success: int,
    71   nontriv_calls: int,
    72   nontriv_success: int,
    73   lemmas: int,
    74   max_lems: int,
    75   time_isa: int,
    76   time_prover: int,
    77   time_prover_fail: int}
    78 
    79 datatype re_data = ReData of {
    80   calls: int,
    81   success: int,
    82   nontriv_calls: int,
    83   nontriv_success: int,
    84   proofs: int,
    85   time: int,
    86   timeout: int,
    87   lemmas: int * int * int,
    88   posns: (Position.T * bool) list
    89   }
    90 
    91 fun make_sh_data
    92       (calls,success,nontriv_calls,nontriv_success,lemmas,max_lems,time_isa,
    93        time_prover,time_prover_fail) =
    94   ShData{calls=calls, success=success, nontriv_calls=nontriv_calls,
    95          nontriv_success=nontriv_success, lemmas=lemmas, max_lems=max_lems,
    96          time_isa=time_isa, time_prover=time_prover,
    97          time_prover_fail=time_prover_fail}
    98 
    99 fun make_re_data (calls,success,nontriv_calls,nontriv_success,proofs,time,
   100                   timeout,lemmas,posns) =
   101   ReData{calls=calls, success=success, nontriv_calls=nontriv_calls,
   102          nontriv_success=nontriv_success, proofs=proofs, time=time,
   103          timeout=timeout, lemmas=lemmas, posns=posns}
   104 
   105 val empty_sh_data = make_sh_data (0, 0, 0, 0, 0, 0, 0, 0, 0)
   106 val empty_re_data = make_re_data (0, 0, 0, 0, 0, 0, 0, (0,0,0), [])
   107 
   108 fun tuple_of_sh_data (ShData {calls, success, nontriv_calls, nontriv_success,
   109                               lemmas, max_lems, time_isa,
   110   time_prover, time_prover_fail}) = (calls, success, nontriv_calls,
   111   nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail)
   112 
   113 fun tuple_of_re_data (ReData {calls, success, nontriv_calls, nontriv_success,
   114   proofs, time, timeout, lemmas, posns}) = (calls, success, nontriv_calls,
   115   nontriv_success, proofs, time, timeout, lemmas, posns)
   116 
   117 datatype data = Data of {
   118   sh: sh_data,
   119   re_u: re_data (* proof method with unminimized set of lemmas *)
   120   }
   121 
   122 fun make_data (sh, re_u) = Data {sh=sh, re_u=re_u}
   123 
   124 val empty_data = make_data (empty_sh_data, empty_re_data)
   125 
   126 fun map_sh_data f (Data {sh, re_u}) =
   127   let val sh' = make_sh_data (f (tuple_of_sh_data sh))
   128   in make_data (sh', re_u) end
   129 
   130 fun map_re_data f (Data {sh, re_u}) =
   131   let
   132     val f' = make_re_data o f o tuple_of_re_data
   133     val re_u' = f' re_u
   134   in make_data (sh, re_u') end
   135 
   136 fun inc_max (n:int) (s,sos,m) = (s+n, sos + n*n, Int.max(m,n));
   137 
   138 val inc_sh_calls =  map_sh_data
   139   (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail)
   140     => (calls + 1, success, nontriv_calls, nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail))
   141 
   142 val inc_sh_success = map_sh_data
   143   (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail)
   144     => (calls, success + 1, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail))
   145 
   146 val inc_sh_nontriv_calls =  map_sh_data
   147   (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail)
   148     => (calls, success, nontriv_calls + 1, nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail))
   149 
   150 val inc_sh_nontriv_success = map_sh_data
   151   (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail)
   152     => (calls, success, nontriv_calls, nontriv_success + 1, lemmas,max_lems, time_isa, time_prover, time_prover_fail))
   153 
   154 fun inc_sh_lemmas n = map_sh_data
   155   (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail)
   156     => (calls,success,nontriv_calls, nontriv_success, lemmas+n,max_lems,time_isa,time_prover,time_prover_fail))
   157 
   158 fun inc_sh_max_lems n = map_sh_data
   159   (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail)
   160     => (calls,success,nontriv_calls, nontriv_success, lemmas,Int.max(max_lems,n),time_isa,time_prover,time_prover_fail))
   161 
   162 fun inc_sh_time_isa t = map_sh_data
   163   (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail)
   164     => (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa + t,time_prover,time_prover_fail))
   165 
   166 fun inc_sh_time_prover t = map_sh_data
   167   (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail)
   168     => (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover + t,time_prover_fail))
   169 
   170 fun inc_sh_time_prover_fail t = map_sh_data
   171   (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail)
   172     => (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail + t))
   173 
   174 val inc_proof_method_calls = map_re_data
   175   (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
   176     => (calls + 1, success, nontriv_calls, nontriv_success, proofs, time, timeout, lemmas,posns))
   177 
   178 val inc_proof_method_success = map_re_data
   179   (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
   180     => (calls, success + 1, nontriv_calls, nontriv_success, proofs, time, timeout, lemmas,posns))
   181 
   182 val inc_proof_method_nontriv_calls = map_re_data
   183   (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
   184     => (calls, success, nontriv_calls + 1, nontriv_success, proofs, time, timeout, lemmas,posns))
   185 
   186 val inc_proof_method_nontriv_success = map_re_data
   187   (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
   188     => (calls, success, nontriv_calls, nontriv_success + 1, proofs, time, timeout, lemmas,posns))
   189 
   190 val inc_proof_method_proofs = map_re_data
   191   (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
   192     => (calls, success, nontriv_calls, nontriv_success, proofs + 1, time, timeout, lemmas,posns))
   193 
   194 fun inc_proof_method_time t = map_re_data
   195  (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
   196   => (calls, success, nontriv_calls, nontriv_success, proofs, time + t, timeout, lemmas,posns))
   197 
   198 val inc_proof_method_timeout = map_re_data
   199   (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
   200     => (calls, success, nontriv_calls, nontriv_success, proofs, time, timeout + 1, lemmas,posns))
   201 
   202 fun inc_proof_method_lemmas n = map_re_data
   203   (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
   204     => (calls, success, nontriv_calls, nontriv_success, proofs, time, timeout, inc_max n lemmas, posns))
   205 
   206 fun inc_proof_method_posns pos = map_re_data
   207   (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
   208     => (calls, success, nontriv_calls, nontriv_success, proofs, time, timeout, lemmas, pos::posns))
   209 
   210 val str0 = string_of_int o the_default 0
   211 
   212 local
   213 
   214 val str = string_of_int
   215 val str3 = Real.fmt (StringCvt.FIX (SOME 3))
   216 fun percentage a b = string_of_int (a * 100 div b)
   217 fun time t = Real.fromInt t / 1000.0
   218 fun avg_time t n =
   219   if n > 0 then (Real.fromInt t / 1000.0) / Real.fromInt n else 0.0
   220 
   221 fun log_sh_data log
   222     (calls, success, nontriv_calls, nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail) =
   223  (log ("Total number of sledgehammer calls: " ^ str calls);
   224   log ("Number of successful sledgehammer calls: " ^ str success);
   225   log ("Number of sledgehammer lemmas: " ^ str lemmas);
   226   log ("Max number of sledgehammer lemmas: " ^ str max_lems);
   227   log ("Success rate: " ^ percentage success calls ^ "%");
   228   log ("Total number of nontrivial sledgehammer calls: " ^ str nontriv_calls);
   229   log ("Number of successful nontrivial sledgehammer calls: " ^ str nontriv_success);
   230   log ("Total time for sledgehammer calls (Isabelle): " ^ str3 (time time_isa));
   231   log ("Total time for successful sledgehammer calls (ATP): " ^ str3 (time time_prover));
   232   log ("Total time for failed sledgehammer calls (ATP): " ^ str3 (time time_prover_fail));
   233   log ("Average time for sledgehammer calls (Isabelle): " ^
   234     str3 (avg_time time_isa calls));
   235   log ("Average time for successful sledgehammer calls (ATP): " ^
   236     str3 (avg_time time_prover success));
   237   log ("Average time for failed sledgehammer calls (ATP): " ^
   238     str3 (avg_time time_prover_fail (calls - success)))
   239   )
   240 
   241 fun str_of_pos (pos, triv) =
   242   str0 (Position.line_of pos) ^ ":" ^ str0 (Position.offset_of pos) ^
   243   (if triv then "[T]" else "")
   244 
   245 fun log_re_data log tag sh_calls (re_calls, re_success, re_nontriv_calls,
   246      re_nontriv_success, re_proofs, re_time, re_timeout,
   247     (lemmas, lems_sos, lems_max), re_posns) =
   248  (log ("Total number of " ^ tag ^ "proof method calls: " ^ str re_calls);
   249   log ("Number of successful " ^ tag ^ "proof method calls: " ^ str re_success ^
   250     " (proof: " ^ str re_proofs ^ ")");
   251   log ("Number of " ^ tag ^ "proof method timeouts: " ^ str re_timeout);
   252   log ("Success rate: " ^ percentage re_success sh_calls ^ "%");
   253   log ("Total number of nontrivial " ^ tag ^ "proof method calls: " ^ str re_nontriv_calls);
   254   log ("Number of successful nontrivial " ^ tag ^ "proof method calls: " ^ str re_nontriv_success ^
   255     " (proof: " ^ str re_proofs ^ ")");
   256   log ("Number of successful " ^ tag ^ "proof method lemmas: " ^ str lemmas);
   257   log ("SOS of successful " ^ tag ^ "proof method lemmas: " ^ str lems_sos);
   258   log ("Max number of successful " ^ tag ^ "proof method lemmas: " ^ str lems_max);
   259   log ("Total time for successful " ^ tag ^ "proof method calls: " ^ str3 (time re_time));
   260   log ("Average time for successful " ^ tag ^ "proof method calls: " ^
   261     str3 (avg_time re_time re_success));
   262   if tag=""
   263   then log ("Proved: " ^ space_implode " " (map str_of_pos re_posns))
   264   else ()
   265  )
   266 
   267 in
   268 
   269 fun log_data id log (Data {sh, re_u}) =
   270   let
   271     val ShData {calls=sh_calls, ...} = sh
   272 
   273     fun app_if (ReData {calls, ...}) f = if calls > 0 then f () else ()
   274     fun log_re tag m =
   275       log_re_data log tag sh_calls (tuple_of_re_data m)
   276     fun log_proof_method (tag1, m1) = app_if m1 (fn () => (log_re tag1 m1; log ""))
   277   in
   278     if sh_calls > 0
   279     then
   280      (log ("\n\n\nReport #" ^ string_of_int id ^ ":\n");
   281       log_sh_data log (tuple_of_sh_data sh);
   282       log "";
   283       log_proof_method ("", re_u))
   284     else ()
   285   end
   286 
   287 end
   288 
   289 (* Warning: we implicitly assume single-threaded execution here *)
   290 val data = Unsynchronized.ref ([] : (int * data) list)
   291 
   292 fun init id thy = (Unsynchronized.change data (cons (id, empty_data)); thy)
   293 fun done id ({log, ...}: Mirabelle.done_args) =
   294   AList.lookup (op =) (!data) id
   295   |> Option.map (log_data id log)
   296   |> K ()
   297 
   298 fun change_data id f = (Unsynchronized.change data (AList.map_entry (op =) id f); ())
   299 
   300 fun get_prover_name thy args =
   301   let
   302     fun default_prover_name () =
   303       hd (#provers (Sledgehammer_Commands.default_params thy []))
   304       handle List.Empty => error "No ATP available."
   305   in
   306     (case AList.lookup (op =) args proverK of
   307       SOME name => name
   308     | NONE => default_prover_name ())
   309   end
   310 
   311 fun get_prover ctxt name params goal all_facts =
   312   let
   313     val learn = Sledgehammer_MaSh.mash_learn_proof ctxt params (Thm.prop_of goal) all_facts
   314   in
   315     Sledgehammer_Prover_Minimize.get_minimizing_prover ctxt Sledgehammer_Prover.Normal learn name
   316   end
   317 
   318 type stature = ATP_Problem_Generate.stature
   319 
   320 fun is_good_line s =
   321   (String.isSubstring " ms)" s orelse String.isSubstring " s)" s)
   322   andalso not (String.isSubstring "(> " s)
   323   andalso not (String.isSubstring ", > " s)
   324   andalso not (String.isSubstring "may fail" s)
   325 
   326 (* Fragile hack *)
   327 fun proof_method_from_msg args msg =
   328   (case AList.lookup (op =) args proof_methodK of
   329     SOME name =>
   330     if name = "smart" then
   331       if exists is_good_line (split_lines msg) then
   332         "none"
   333       else
   334         "fail"
   335     else
   336       name
   337   | NONE =>
   338     if exists is_good_line (split_lines msg) then
   339       "none" (* trust the preplayed proof *)
   340     else if String.isSubstring "metis (" msg then
   341       msg |> Substring.full
   342           |> Substring.position "metis ("
   343           |> snd |> Substring.position ")"
   344           |> fst |> Substring.string
   345           |> suffix ")"
   346     else if String.isSubstring "metis" msg then
   347       "metis"
   348     else
   349       "smt")
   350 
   351 local
   352 
   353 datatype sh_result =
   354   SH_OK of int * int * (string * stature) list |
   355   SH_FAIL of int * int |
   356   SH_ERROR
   357 
   358 fun run_sh prover_name fact_filter type_enc strict max_facts slice
   359       lam_trans uncurried_aliases e_selection_heuristic term_order force_sos
   360       hard_timeout timeout preplay_timeout isar_proofsLST smt_proofsLST
   361       minimizeLST max_new_mono_instancesLST max_mono_itersLST dir pos st =
   362   let
   363     val thy = Proof.theory_of st
   364     val {context = ctxt, facts = chained_ths, goal} = Proof.goal st
   365     val i = 1
   366     fun set_file_name (SOME dir) =
   367         Config.put Sledgehammer_Prover_ATP.atp_dest_dir dir
   368         #> Config.put Sledgehammer_Prover_ATP.atp_problem_prefix
   369           ("prob_" ^ str0 (Position.line_of pos) ^ "__")
   370         #> Config.put SMT_Config.debug_files
   371           (dir ^ "/" ^ Name.desymbolize (SOME false) (ATP_Util.timestamp ()) ^ "_"
   372           ^ serial_string ())
   373       | set_file_name NONE = I
   374     val st' =
   375       st
   376       |> Proof.map_context
   377            (set_file_name dir
   378             #> (Option.map (Config.put ATP_Systems.e_selection_heuristic)
   379                   e_selection_heuristic |> the_default I)
   380             #> (Option.map (Config.put ATP_Systems.term_order)
   381                   term_order |> the_default I)
   382             #> (Option.map (Config.put ATP_Systems.force_sos)
   383                   force_sos |> the_default I))
   384     val params as {max_facts, minimize, preplay_timeout, ...} =
   385       Sledgehammer_Commands.default_params thy
   386          ([(* ("verbose", "true"), *)
   387            ("fact_filter", fact_filter),
   388            ("type_enc", type_enc),
   389            ("strict", strict),
   390            ("lam_trans", lam_trans |> the_default lam_trans_default),
   391            ("uncurried_aliases", uncurried_aliases |> the_default uncurried_aliases_default),
   392            ("max_facts", max_facts),
   393            ("slice", slice),
   394            ("timeout", string_of_int timeout),
   395            ("preplay_timeout", preplay_timeout)]
   396           |> isar_proofsLST
   397           |> smt_proofsLST
   398           |> minimizeLST (*don't confuse the two minimization flags*)
   399           |> max_new_mono_instancesLST
   400           |> max_mono_itersLST)
   401     val default_max_facts =
   402       Sledgehammer_Prover_Minimize.default_max_facts_of_prover ctxt prover_name
   403     val (_, hyp_ts, concl_t) = ATP_Util.strip_subgoal goal i ctxt
   404     val time_limit =
   405       (case hard_timeout of
   406         NONE => I
   407       | SOME secs => Timeout.apply (Time.fromSeconds secs))
   408     fun failed failure =
   409       ({outcome = SOME failure, used_facts = [], used_from = [],
   410         preferred_methss = (Sledgehammer_Proof_Methods.Auto_Method, []), run_time = Time.zeroTime,
   411         message = K ""}, ~1)
   412     val ({outcome, used_facts, preferred_methss, run_time, message, ...}
   413          : Sledgehammer_Prover.prover_result,
   414          time_isa) = time_limit (Mirabelle.cpu_time (fn () =>
   415       let
   416         val ho_atp = Sledgehammer_Prover_ATP.is_ho_atp ctxt prover_name
   417         val keywords = Thy_Header.get_keywords' ctxt
   418         val css_table = Sledgehammer_Fact.clasimpset_rule_table_of ctxt
   419         val facts =
   420           Sledgehammer_Fact.nearly_all_facts ctxt ho_atp
   421               Sledgehammer_Fact.no_fact_override keywords css_table chained_ths
   422               hyp_ts concl_t
   423         val factss =
   424           facts
   425           |> Sledgehammer_MaSh.relevant_facts ctxt params prover_name
   426                  (the_default default_max_facts max_facts)
   427                  Sledgehammer_Fact.no_fact_override hyp_ts concl_t
   428           |> tap (fn factss =>
   429                      "Line " ^ str0 (Position.line_of pos) ^ ": " ^
   430                      Sledgehammer.string_of_factss factss
   431                      |> writeln)
   432         val prover = get_prover ctxt prover_name params goal facts
   433         val problem =
   434           {comment = "", state = st', goal = goal, subgoal = i,
   435            subgoal_count = Sledgehammer_Util.subgoal_count st, factss = factss, found_proof = I}
   436       in prover params problem end)) ()
   437       handle Timeout.TIMEOUT _ => failed ATP_Proof.TimedOut
   438            | Fail "inappropriate" => failed ATP_Proof.Inappropriate
   439     val time_prover = run_time |> Time.toMilliseconds
   440     val msg = message (fn () => Sledgehammer.play_one_line_proof minimize preplay_timeout used_facts
   441       st' i preferred_methss)
   442   in
   443     (case outcome of
   444       NONE => (msg, SH_OK (time_isa, time_prover, used_facts))
   445     | SOME _ => (msg, SH_FAIL (time_isa, time_prover)))
   446   end
   447   handle ERROR msg => ("error: " ^ msg, SH_ERROR)
   448 
   449 in
   450 
   451 fun run_sledgehammer trivial args proof_method named_thms id
   452       ({pre=st, log, pos, ...}: Mirabelle.run_args) =
   453   let
   454     val thy = Proof.theory_of st
   455     val triv_str = if trivial then "[T] " else ""
   456     val _ = change_data id inc_sh_calls
   457     val _ = if trivial then () else change_data id inc_sh_nontriv_calls
   458     val prover_name = get_prover_name thy args
   459     val fact_filter = AList.lookup (op =) args fact_filterK |> the_default fact_filter_default
   460     val type_enc = AList.lookup (op =) args type_encK |> the_default type_enc_default
   461     val strict = AList.lookup (op =) args strictK |> the_default strict_default
   462     val max_facts =
   463       (case AList.lookup (op =) args max_factsK of
   464         SOME max => max
   465       | NONE =>
   466         (case AList.lookup (op =) args max_relevantK of
   467           SOME max => max
   468         | NONE => max_facts_default))
   469     val slice = AList.lookup (op =) args sliceK |> the_default slice_default
   470     val lam_trans = AList.lookup (op =) args lam_transK
   471     val uncurried_aliases = AList.lookup (op =) args uncurried_aliasesK
   472     val e_selection_heuristic = AList.lookup (op =) args e_selection_heuristicK
   473     val term_order = AList.lookup (op =) args term_orderK
   474     val force_sos = AList.lookup (op =) args force_sosK
   475       |> Option.map (curry (op <>) "false")
   476     val dir = AList.lookup (op =) args keepK
   477     val timeout = Mirabelle.get_int_setting args (prover_timeoutK, 30)
   478     (* always use a hard timeout, but give some slack so that the automatic
   479        minimizer has a chance to do its magic *)
   480     val preplay_timeout = AList.lookup (op =) args preplay_timeoutK
   481       |> the_default preplay_timeout_default
   482     val isar_proofsLST = available_parameter args isar_proofsK "isar_proofs"
   483     val smt_proofsLST = available_parameter args smt_proofsK "smt_proofs"
   484     val minimizeLST = available_parameter args minimizeK "minimize"
   485     val max_new_mono_instancesLST =
   486       available_parameter args max_new_mono_instancesK max_new_mono_instancesK
   487     val max_mono_itersLST = available_parameter args max_mono_itersK max_mono_itersK
   488     val hard_timeout = SOME (4 * timeout)
   489     val (msg, result) =
   490       run_sh prover_name fact_filter type_enc strict max_facts slice lam_trans
   491         uncurried_aliases e_selection_heuristic term_order force_sos
   492         hard_timeout timeout preplay_timeout isar_proofsLST smt_proofsLST
   493         minimizeLST max_new_mono_instancesLST max_mono_itersLST dir pos st
   494   in
   495     (case result of
   496       SH_OK (time_isa, time_prover, names) =>
   497         let
   498           fun get_thms (name, stature) =
   499             try (Sledgehammer_Util.thms_of_name (Proof.context_of st))
   500               name
   501             |> Option.map (pair (name, stature))
   502         in
   503           change_data id inc_sh_success;
   504           if trivial then () else change_data id inc_sh_nontriv_success;
   505           change_data id (inc_sh_lemmas (length names));
   506           change_data id (inc_sh_max_lems (length names));
   507           change_data id (inc_sh_time_isa time_isa);
   508           change_data id (inc_sh_time_prover time_prover);
   509           proof_method := proof_method_from_msg args msg;
   510           named_thms := SOME (map_filter get_thms names);
   511           log (sh_tag id ^ triv_str ^ "succeeded (" ^ string_of_int time_isa ^ "+" ^
   512             string_of_int time_prover ^ ") [" ^ prover_name ^ "]:\n" ^ msg)
   513         end
   514     | SH_FAIL (time_isa, time_prover) =>
   515         let
   516           val _ = change_data id (inc_sh_time_isa time_isa)
   517           val _ = change_data id (inc_sh_time_prover_fail time_prover)
   518         in log (sh_tag id ^ triv_str ^ "failed: " ^ msg) end
   519     | SH_ERROR => log (sh_tag id ^ "failed: " ^ msg))
   520   end
   521 
   522 end
   523 
   524 fun override_params prover type_enc timeout =
   525   [("provers", prover),
   526    ("max_facts", "0"),
   527    ("type_enc", type_enc),
   528    ("strict", "true"),
   529    ("slice", "false"),
   530    ("timeout", timeout |> Time.toSeconds |> string_of_int)]
   531 
   532 fun run_proof_method trivial full name meth named_thms id
   533     ({pre=st, timeout, log, pos, ...}: Mirabelle.run_args) =
   534   let
   535     fun do_method named_thms ctxt =
   536       let
   537         val ref_of_str = (* FIXME proper wrapper for parser combinators *)
   538           suffix ";" #> Token.explode (Thy_Header.get_keywords' ctxt) Position.none
   539           #> Parse.xthm #> fst
   540         val thms = named_thms |> maps snd
   541         val facts = named_thms |> map (ref_of_str o fst o fst)
   542         val fact_override = {add = facts, del = [], only = true}
   543         fun my_timeout time_slice =
   544           timeout |> Time.toReal |> curry (op *) time_slice |> Time.fromReal
   545         fun sledge_tac time_slice prover type_enc =
   546           Sledgehammer_Tactics.sledgehammer_as_oracle_tac ctxt
   547             (override_params prover type_enc (my_timeout time_slice)) fact_override []
   548       in
   549         if !meth = "sledgehammer_tac" then
   550           sledge_tac 0.2 ATP_Proof.vampireN "mono_native"
   551           ORELSE' sledge_tac 0.2 ATP_Proof.eN "poly_guards??"
   552           ORELSE' sledge_tac 0.2 ATP_Proof.spassN "mono_native"
   553           ORELSE' sledge_tac 0.2 ATP_Proof.z3_tptpN "poly_tags??"
   554           ORELSE' SMT_Solver.smt_tac ctxt thms
   555         else if !meth = "smt" then
   556           SMT_Solver.smt_tac ctxt thms
   557         else if full then
   558           Metis_Tactic.metis_tac [ATP_Proof_Reconstruct.full_typesN]
   559             ATP_Proof_Reconstruct.default_metis_lam_trans ctxt thms
   560         else if String.isPrefix "metis (" (!meth) then
   561           let
   562             val (type_encs, lam_trans) =
   563               !meth
   564               |> Token.explode (Thy_Header.get_keywords' ctxt) Position.start
   565               |> filter Token.is_proper |> tl
   566               |> Metis_Tactic.parse_metis_options |> fst
   567               |>> the_default [ATP_Proof_Reconstruct.partial_typesN]
   568               ||> the_default ATP_Proof_Reconstruct.default_metis_lam_trans
   569           in Metis_Tactic.metis_tac type_encs lam_trans ctxt thms end
   570         else if !meth = "metis" then
   571           Metis_Tactic.metis_tac [] ATP_Proof_Reconstruct.default_metis_lam_trans ctxt thms
   572         else if !meth = "none" then
   573           K all_tac
   574         else if !meth = "fail" then
   575           K no_tac
   576         else
   577           (warning ("Unknown method " ^ quote (!meth)); K no_tac)
   578       end
   579     fun apply_method named_thms =
   580       Mirabelle.can_apply timeout (do_method named_thms) st
   581 
   582     fun with_time (false, t) = "failed (" ^ string_of_int t ^ ")"
   583       | with_time (true, t) = (change_data id inc_proof_method_success;
   584           if trivial then ()
   585           else change_data id inc_proof_method_nontriv_success;
   586           change_data id (inc_proof_method_lemmas (length named_thms));
   587           change_data id (inc_proof_method_time t);
   588           change_data id (inc_proof_method_posns (pos, trivial));
   589           if name = "proof" then change_data id inc_proof_method_proofs else ();
   590           "succeeded (" ^ string_of_int t ^ ")")
   591     fun timed_method named_thms =
   592       (with_time (Mirabelle.cpu_time apply_method named_thms), true)
   593       handle Timeout.TIMEOUT _ => (change_data id inc_proof_method_timeout; ("timeout", false))
   594            | ERROR msg => ("error: " ^ msg, false)
   595 
   596     val _ = log separator
   597     val _ = change_data id inc_proof_method_calls
   598     val _ = if trivial then () else change_data id inc_proof_method_nontriv_calls
   599   in
   600     named_thms
   601     |> timed_method
   602     |>> log o prefix (proof_method_tag meth id)
   603     |> snd
   604   end
   605 
   606 val try_timeout = seconds 5.0
   607 
   608 (* crude hack *)
   609 val num_sledgehammer_calls = Unsynchronized.ref 0
   610 
   611 fun sledgehammer_action args id (st as {pre, name, ...}: Mirabelle.run_args) =
   612   let val goal = Thm.major_prem_of (#goal (Proof.goal pre)) in
   613     if can Logic.dest_conjunction goal orelse can Logic.dest_equals goal
   614     then () else
   615     let
   616       val max_calls =
   617         AList.lookup (op =) args max_callsK |> the_default max_calls_default
   618         |> Int.fromString |> the
   619       val _ = num_sledgehammer_calls := !num_sledgehammer_calls + 1;
   620     in
   621       if !num_sledgehammer_calls > max_calls then ()
   622       else
   623         let
   624           val meth = Unsynchronized.ref ""
   625           val named_thms =
   626             Unsynchronized.ref (NONE : ((string * stature) * thm list) list option)
   627           val trivial =
   628             if AList.lookup (op =) args check_trivialK |> the_default trivial_default
   629                             |> Markup.parse_bool then
   630               Try0.try0 (SOME try_timeout) ([], [], [], []) pre
   631               handle Timeout.TIMEOUT _ => false
   632             else false
   633           fun apply_method () =
   634             (Mirabelle.catch_result (proof_method_tag meth) false
   635               (run_proof_method trivial false name meth (these (!named_thms))) id st; ())
   636         in
   637           Mirabelle.catch sh_tag (run_sledgehammer trivial args meth named_thms) id st;
   638           if is_some (!named_thms) then apply_method () else ()
   639         end
   640     end
   641   end
   642 
   643 fun invoke args =
   644   Mirabelle.register (init, sledgehammer_action args, done)
   645 
   646 end