src/HOL/Tools/Sledgehammer/sledgehammer_prover_atp.ML
author blanchet
Mon Jun 02 17:34:26 2014 +0200 (2014-06-02 ago)
changeset 57158 f028d93798e6
parent 57154 f0eff6393a32
child 57245 f6bf6d5341ee
permissions -rw-r--r--
simplified counterexample handling
     1 (*  Title:      HOL/Tools/Sledgehammer/sledgehammer_prover_atp.ML
     2     Author:     Fabian Immler, TU Muenchen
     3     Author:     Makarius
     4     Author:     Jasmin Blanchette, TU Muenchen
     5 
     6 ATPs as Sledgehammer provers.
     7 *)
     8 
     9 signature SLEDGEHAMMER_PROVER_ATP =
    10 sig
    11   type mode = Sledgehammer_Prover.mode
    12   type prover = Sledgehammer_Prover.prover
    13 
    14   val atp_dest_dir : string Config.T
    15   val atp_problem_prefix : string Config.T
    16   val atp_completish : bool Config.T
    17   val atp_full_names : bool Config.T
    18 
    19   val is_ho_atp : Proof.context -> string -> bool
    20 
    21   val run_atp : mode -> string -> prover
    22 end;
    23 
    24 structure Sledgehammer_Prover_ATP : SLEDGEHAMMER_PROVER_ATP =
    25 struct
    26 
    27 open ATP_Util
    28 open ATP_Problem
    29 open ATP_Proof
    30 open ATP_Problem_Generate
    31 open ATP_Proof_Reconstruct
    32 open ATP_Systems
    33 open Sledgehammer_Util
    34 open Sledgehammer_Proof_Methods
    35 open Sledgehammer_Isar
    36 open Sledgehammer_Prover
    37 
    38 (* Empty string means create files in Isabelle's temporary files directory. *)
    39 val atp_dest_dir = Attrib.setup_config_string @{binding sledgehammer_atp_dest_dir} (K "")
    40 val atp_problem_prefix =
    41   Attrib.setup_config_string @{binding sledgehammer_atp_problem_prefix} (K "prob")
    42 val atp_completish = Attrib.setup_config_bool @{binding sledgehammer_atp_completish} (K false)
    43 (* In addition to being easier to read, readable names are often much shorter, especially if types
    44    are mangled in names. This makes a difference for some provers (e.g., E). For these reason, short
    45    names are enabled by default. *)
    46 val atp_full_names = Attrib.setup_config_bool @{binding sledgehammer_atp_full_names} (K false)
    47 
    48 fun is_atp_of_format is_format ctxt name =
    49   let val thy = Proof_Context.theory_of ctxt in
    50     (case try (get_atp thy) name of
    51       SOME config =>
    52       exists (fn (_, ((_, format, _, _, _), _)) => is_format format) (#best_slices (config ()) ctxt)
    53     | NONE => false)
    54   end
    55 
    56 val is_ho_atp = is_atp_of_format is_format_higher_order
    57 
    58 fun choose_type_enc strictness best_type_enc format =
    59   the_default best_type_enc
    60   #> type_enc_of_string strictness
    61   #> adjust_type_enc format
    62 
    63 fun has_bound_or_var_of_type pred =
    64   exists_subterm (fn Var (_, T as Type _) => pred T
    65                    | Abs (_, T as Type _, _) => pred T
    66                    | _ => false)
    67 
    68 (* Unwanted equalities are those between a (bound or schematic) variable that does not properly
    69    occur in the second operand. *)
    70 val is_exhaustive_finite =
    71   let
    72     fun is_bad_equal (Var z) t =
    73         not (exists_subterm (fn Var z' => z = z' | _ => false) t)
    74       | is_bad_equal (Bound j) t = not (loose_bvar1 (t, j))
    75       | is_bad_equal _ _ = false
    76     fun do_equals t1 t2 = is_bad_equal t1 t2 orelse is_bad_equal t2 t1
    77     fun do_formula pos t =
    78       (case (pos, t) of
    79         (_, @{const Trueprop} $ t1) => do_formula pos t1
    80       | (true, Const (@{const_name Pure.all}, _) $ Abs (_, _, t')) => do_formula pos t'
    81       | (true, Const (@{const_name All}, _) $ Abs (_, _, t')) => do_formula pos t'
    82       | (false, Const (@{const_name Ex}, _) $ Abs (_, _, t')) => do_formula pos t'
    83       | (_, @{const Pure.imp} $ t1 $ t2) =>
    84         do_formula (not pos) t1 andalso (t2 = @{prop False} orelse do_formula pos t2)
    85       | (_, @{const HOL.implies} $ t1 $ t2) =>
    86         do_formula (not pos) t1 andalso (t2 = @{const False} orelse do_formula pos t2)
    87       | (_, @{const Not} $ t1) => do_formula (not pos) t1
    88       | (true, @{const HOL.disj} $ t1 $ t2) => forall (do_formula pos) [t1, t2]
    89       | (false, @{const HOL.conj} $ t1 $ t2) => forall (do_formula pos) [t1, t2]
    90       | (true, Const (@{const_name HOL.eq}, _) $ t1 $ t2) => do_equals t1 t2
    91       | (true, Const (@{const_name Pure.eq}, _) $ t1 $ t2) => do_equals t1 t2
    92       | _ => false)
    93   in do_formula true end
    94 
    95 (* Facts containing variables of finite types such as "unit" or "bool" or of the form
    96    "ALL x. x = A | x = B | x = C" are likely to lead to untypable proofs for unsound type
    97    encodings. *)
    98 fun is_dangerous_prop ctxt =
    99   transform_elim_prop
   100   #> (has_bound_or_var_of_type (is_type_surely_finite ctxt) orf is_exhaustive_finite)
   101 
   102 fun get_slices slice slices =
   103   (0 upto length slices - 1) ~~ slices |> not slice ? (List.last #> single)
   104 
   105 fun get_facts_of_filter _ [(_, facts)] = facts
   106   | get_facts_of_filter fact_filter factss =
   107     (case AList.lookup (op =) factss fact_filter of
   108       SOME facts => facts
   109     | NONE => snd (hd factss))
   110 
   111 (* For low values of "max_facts", this fudge value ensures that most slices are invoked with a
   112    nontrivial amount of facts. *)
   113 val max_fact_factor_fudge = 5
   114 
   115 val mono_max_privileged_facts = 10
   116 
   117 fun suffix_of_mode Auto_Try = "_try"
   118   | suffix_of_mode Try = "_try"
   119   | suffix_of_mode Normal = ""
   120   | suffix_of_mode MaSh = ""
   121   | suffix_of_mode Auto_Minimize = "_min"
   122   | suffix_of_mode Minimize = "_min"
   123 
   124 (* Give the ATPs some slack before interrupting them the hard way. "z3_tptp" on Linux appears to be
   125    the only ATP that does not honor its time limit. *)
   126 val atp_timeout_slack = seconds 1.0
   127 
   128 (* Important messages are important but not so important that users want to see
   129    them each time. *)
   130 val atp_important_message_keep_quotient = 25
   131 
   132 fun run_atp mode name
   133     (params as {debug, verbose, overlord, type_enc, strict, lam_trans, uncurried_aliases,
   134        fact_filter, max_facts, max_mono_iters, max_new_mono_instances, isar_proofs, compress_isar,
   135        try0_isar, smt_proofs, slice, minimize, timeout, preplay_timeout, ...})
   136     minimize_command
   137     ({comment, state, goal, subgoal, subgoal_count, factss, ...} : prover_problem) =
   138   let
   139     val thy = Proof.theory_of state
   140     val ctxt = Proof.context_of state
   141 
   142     val {exec, arguments, proof_delims, known_failures, prem_role, best_slices, best_max_mono_iters,
   143       best_max_new_mono_instances, ...} = get_atp thy name ()
   144 
   145     val atp_mode = if Config.get ctxt atp_completish then Sledgehammer_Completish else Sledgehammer
   146     val (_, hyp_ts, concl_t) = strip_subgoal goal subgoal ctxt
   147     val (dest_dir, problem_prefix) =
   148       if overlord then overlord_file_location_of_prover name
   149       else (Config.get ctxt atp_dest_dir, Config.get ctxt atp_problem_prefix)
   150     val problem_file_name =
   151       Path.basic (problem_prefix ^ (if overlord then "" else serial_string ()) ^
   152                   suffix_of_mode mode ^ "_" ^ string_of_int subgoal)
   153     val prob_path =
   154       if dest_dir = "" then
   155         File.tmp_path problem_file_name
   156       else if File.exists (Path.explode dest_dir) then
   157         Path.append (Path.explode dest_dir) problem_file_name
   158       else
   159         error ("No such directory: " ^ quote dest_dir ^ ".")
   160     val exec = exec ()
   161     val command0 =
   162       (case find_first (fn var => getenv var <> "") (fst exec) of
   163         SOME var =>
   164         let
   165           val pref = getenv var ^ "/"
   166           val paths = map (Path.explode o prefix pref) (snd exec)
   167         in
   168           (case find_first File.exists paths of
   169             SOME path => path
   170           | NONE => error ("Bad executable: " ^ Path.print (hd paths) ^ "."))
   171         end
   172       | NONE => error ("The environment variable " ^ quote (List.last (fst exec)) ^ " is not set."))
   173 
   174     fun split_time s =
   175       let
   176         val split = String.tokens (fn c => str c = "\n")
   177         val (output, t) = s |> split |> (try split_last #> the_default ([], "0")) |>> cat_lines
   178         val num = Scan.many1 Symbol.is_ascii_digit >> (fst o read_int)
   179         val digit = Scan.one Symbol.is_ascii_digit
   180         val num3 = digit ::: digit ::: (digit >> single) >> (fst o read_int)
   181         val time = num --| Scan.$$ "." -- num3 >> (fn (a, b) => a * 1000 + b)
   182         val as_time = raw_explode #> Scan.read Symbol.stopper time #> the_default 0
   183       in (output, as_time t |> Time.fromMilliseconds) end
   184 
   185     fun run () =
   186       let
   187         (* If slicing is disabled, we expand the last slice to fill the entire time available. *)
   188         val all_slices = best_slices ctxt
   189         val actual_slices = get_slices slice all_slices
   190 
   191         fun max_facts_of_slices (slices : (real * (slice_spec * string)) list) =
   192           fold (Integer.max o fst o #1 o fst o snd) slices 0
   193 
   194         val num_actual_slices = length actual_slices
   195         val max_fact_factor =
   196           Real.fromInt (case max_facts of NONE => max_facts_of_slices all_slices | SOME max => max)
   197           / Real.fromInt (max_facts_of_slices (map snd actual_slices))
   198 
   199         fun monomorphize_facts facts =
   200           let
   201             val ctxt =
   202               ctxt
   203               |> repair_monomorph_context max_mono_iters best_max_mono_iters max_new_mono_instances
   204                    best_max_new_mono_instances
   205             (* pseudo-theorem involving the same constants as the subgoal *)
   206             val subgoal_th =
   207               Logic.list_implies (hyp_ts, concl_t) |> Skip_Proof.make_thm thy
   208             val rths =
   209               facts |> chop mono_max_privileged_facts
   210                     |>> map (pair 1 o snd)
   211                     ||> map (pair 2 o snd)
   212                     |> op @
   213                     |> cons (0, subgoal_th)
   214           in
   215             Monomorph.monomorph atp_schematic_consts_of ctxt rths
   216             |> tl |> curry ListPair.zip (map fst facts)
   217             |> maps (fn (name, rths) => map (pair name o zero_var_indexes o snd) rths)
   218           end
   219 
   220         fun run_slice time_left (cache_key, cache_value) (slice, (time_frac,
   221             (key as ((best_max_facts, best_fact_filter), format, best_type_enc, best_lam_trans,
   222                best_uncurried_aliases),
   223              extra))) =
   224           let
   225             val effective_fact_filter = fact_filter |> the_default best_fact_filter
   226             val facts = get_facts_of_filter effective_fact_filter factss
   227             val num_facts =
   228               Real.ceil (max_fact_factor * Real.fromInt best_max_facts) + max_fact_factor_fudge
   229               |> Integer.min (length facts)
   230             val strictness = if strict then Strict else Non_Strict
   231             val type_enc = type_enc |> choose_type_enc strictness best_type_enc format
   232             val sound = is_type_enc_sound type_enc
   233             val real_ms = Real.fromInt o Time.toMilliseconds
   234             val slice_timeout =
   235               (real_ms time_left
   236                |> (if slice < num_actual_slices - 1 then
   237                      curry Real.min (time_frac * real_ms timeout)
   238                    else
   239                      I))
   240               * 0.001
   241               |> seconds
   242             val generous_slice_timeout =
   243               if mode = MaSh then one_day else Time.+ (slice_timeout, atp_timeout_slack)
   244             val _ =
   245               if debug then
   246                 quote name ^ " slice #" ^ string_of_int (slice + 1) ^
   247                 " with " ^ string_of_int num_facts ^ " fact" ^
   248                 plural_s num_facts ^ " for " ^ string_of_time slice_timeout ^ "..."
   249                 |> Output.urgent_message
   250               else
   251                 ()
   252             val readable_names = not (Config.get ctxt atp_full_names)
   253             val lam_trans = lam_trans |> the_default best_lam_trans
   254             val uncurried_aliases = uncurried_aliases |> the_default best_uncurried_aliases
   255             val value as (atp_problem, _, fact_names, _, _) =
   256               if cache_key = SOME key then
   257                 cache_value
   258               else
   259                 facts
   260                 |> not sound ? filter_out (is_dangerous_prop ctxt o prop_of o snd)
   261                 |> take num_facts
   262                 |> not (is_type_enc_polymorphic type_enc) ? monomorphize_facts
   263                 |> map (apsnd prop_of)
   264                 |> prepare_atp_problem ctxt format prem_role type_enc atp_mode lam_trans
   265                      uncurried_aliases readable_names true hyp_ts concl_t
   266 
   267             fun sel_weights () = atp_problem_selection_weights atp_problem
   268             fun ord_info () = atp_problem_term_order_info atp_problem
   269 
   270             val ord = effective_term_order ctxt name
   271             val full_proof = isar_proofs |> the_default (mode = Minimize)
   272             val args =
   273               arguments ctxt full_proof extra slice_timeout (File.shell_path prob_path)
   274                 (ord, ord_info, sel_weights)
   275             val command =
   276               "(exec 2>&1; " ^ File.shell_path command0 ^ " " ^ args ^ " " ^ ")"
   277               |> enclose "TIMEFORMAT='%3R'; { time " " ; }"
   278             val _ =
   279               atp_problem
   280               |> lines_of_atp_problem format ord ord_info
   281               |> cons ("% " ^ command ^ "\n" ^ (if comment = "" then "" else "% " ^ comment ^ "\n"))
   282               |> File.write_list prob_path
   283             val ((output, run_time), (atp_proof, outcome)) =
   284               TimeLimit.timeLimit generous_slice_timeout Isabelle_System.bash_output command
   285               |>> (if overlord then prefix ("% " ^ command ^ "\n% " ^ timestamp () ^ "\n") else I)
   286               |> fst |> split_time
   287               |> (fn accum as (output, _) =>
   288                      (accum,
   289                       extract_tstplike_proof_and_outcome verbose proof_delims known_failures output
   290                       |>> atp_proof_of_tstplike_proof name atp_problem
   291                       handle UNRECOGNIZED_ATP_PROOF () => ([], SOME ProofIncomplete)))
   292               handle TimeLimit.TimeOut => (("", slice_timeout), ([], SOME TimedOut))
   293             val outcome =
   294               (case outcome of
   295                 NONE =>
   296                 (case used_facts_in_unsound_atp_proof ctxt fact_names atp_proof
   297                       |> Option.map (sort string_ord) of
   298                   SOME facts =>
   299                   let val failure = UnsoundProof (is_type_enc_sound type_enc, facts) in
   300                     if debug then (warning (string_of_atp_failure failure); NONE) else SOME failure
   301                   end
   302                 | NONE => NONE)
   303               | _ => outcome)
   304           in
   305             ((SOME key, value), (output, run_time, facts, atp_proof, outcome))
   306           end
   307 
   308         val timer = Timer.startRealTimer ()
   309 
   310         fun maybe_run_slice slice (result as (cache, (_, run_time0, _, _, SOME _))) =
   311             let val time_left = Time.- (timeout, Timer.checkRealTimer timer) in
   312               if Time.<= (time_left, Time.zeroTime) then
   313                 result
   314               else
   315                 run_slice time_left cache slice
   316                 |> (fn (cache, (output, run_time, used_from, atp_proof, outcome)) =>
   317                   (cache, (output, Time.+ (run_time0, run_time), used_from, atp_proof, outcome)))
   318             end
   319           | maybe_run_slice _ result = result
   320       in
   321         ((NONE, ([], Symtab.empty, Vector.fromList [], [], Symtab.empty)),
   322          ("", Time.zeroTime, [], [], SOME InternalError))
   323         |> fold maybe_run_slice actual_slices
   324       end
   325 
   326     (* If the problem file has not been exported, remove it; otherwise, export
   327        the proof file too. *)
   328     fun clean_up () = if dest_dir = "" then (try File.rm prob_path; ()) else ()
   329     fun export (_, (output, _, _, _, _)) =
   330       if dest_dir = "" then ()
   331       else File.write (Path.explode (Path.implode prob_path ^ "_proof")) output
   332 
   333     val ((_, (_, pool, fact_names, lifted, sym_tab)),
   334          (output, run_time, used_from, atp_proof, outcome)) =
   335       with_cleanup clean_up run () |> tap export
   336 
   337     val important_message =
   338       if mode = Normal andalso random_range 0 (atp_important_message_keep_quotient - 1) = 0 then
   339         extract_important_message output
   340       else
   341         ""
   342 
   343     val (used_facts, preplay, message, message_tail) =
   344       (case outcome of
   345         NONE =>
   346         let
   347           val used_facts = used_facts_in_atp_proof ctxt fact_names atp_proof
   348           val needs_full_types = is_typed_helper_used_in_atp_proof atp_proof
   349           val meths =
   350             bunch_of_proof_methods (smt_proofs <> SOME false) needs_full_types
   351               (if atp_proof_prefers_lifting atp_proof then liftingN else hide_lamsN)
   352         in
   353           (used_facts,
   354            Lazy.lazy (fn () =>
   355              let val used_pairs = used_from |> filter_used_facts false used_facts in
   356                play_one_line_proof mode verbose preplay_timeout used_pairs state subgoal (hd meths)
   357                  meths
   358              end),
   359            fn preplay =>
   360               let
   361                 val _ = if verbose then Output.urgent_message "Generating proof text..." else ()
   362                 fun isar_params () =
   363                   let
   364                     val metis_type_enc =
   365                       if is_typed_helper_used_in_atp_proof atp_proof then SOME full_typesN else NONE
   366                     val metis_lam_trans =
   367                       if atp_proof_prefers_lifting atp_proof then SOME liftingN else NONE
   368                     val atp_proof =
   369                       atp_proof
   370                       |> termify_atp_proof ctxt pool lifted sym_tab
   371                       |> introduce_spass_skolem
   372                       |> factify_atp_proof fact_names hyp_ts concl_t
   373                   in
   374                     (verbose, (metis_type_enc, metis_lam_trans), preplay_timeout, compress_isar,
   375                      try0_isar, minimize <> SOME false, atp_proof, goal)
   376                   end
   377                 val one_line_params =
   378                   (preplay, proof_banner mode name, used_facts,
   379                    choose_minimize_command thy params minimize_command name preplay, subgoal,
   380                    subgoal_count)
   381                 val num_chained = length (#facts (Proof.goal state))
   382               in
   383                 proof_text ctxt debug isar_proofs smt_proofs isar_params num_chained one_line_params
   384               end,
   385            (if verbose then "\nATP real CPU time: " ^ string_of_time run_time ^ "." else "") ^
   386            (if important_message <> "" then
   387               "\n\nImportant message from Dr. Geoff Sutcliffe:\n" ^ important_message
   388             else
   389               ""))
   390         end
   391       | SOME failure =>
   392         ([], Lazy.value (Metis_Method (NONE, NONE), Play_Failed),
   393          fn _ => string_of_atp_failure failure, ""))
   394   in
   395     {outcome = outcome, used_facts = used_facts, used_from = used_from, run_time = run_time,
   396      preplay = preplay, message = message, message_tail = message_tail}
   397   end
   398 
   399 end;