src/HOL/Tools/ATP_Manager/atp_wrapper.ML
changeset 36369 d2cd0d04b8e6
parent 36289 f75b6a3e1450
child 36370 a4f601daa175
equal deleted inserted replaced
36294:59a55dfa76d5 36369:d2cd0d04b8e6
    47 
    47 
    48 type prover_config =
    48 type prover_config =
    49  {home: string,
    49  {home: string,
    50   executable: string,
    50   executable: string,
    51   arguments: Time.time -> string,
    51   arguments: Time.time -> string,
       
    52   proof_delims: (string * string) list,
    52   known_failures: (string list * string) list,
    53   known_failures: (string list * string) list,
    53   max_new_clauses: int,
    54   max_new_clauses: int,
    54   prefers_theory_relevant: bool};
    55   prefers_theory_relevant: bool};
    55 
    56 
    56 
    57 
    58 
    59 
    59 fun with_path cleanup after f path =
    60 fun with_path cleanup after f path =
    60   Exn.capture f path
    61   Exn.capture f path
    61   |> tap (fn _ => cleanup path)
    62   |> tap (fn _ => cleanup path)
    62   |> Exn.release
    63   |> Exn.release
    63   |> tap (after path);
    64   |> tap (after path)
    64 
    65 
    65 fun find_known_failure known_failures proof =
    66 (* Splits by the first possible of a list of delimiters. *)
    66   case map_filter (fn (patterns, message) =>
    67 fun extract_proof delims output =
    67                       if exists (fn pattern => String.isSubstring pattern proof)
    68   case pairself (find_first (fn s => String.isSubstring s output))
    68                                 patterns then
    69                 (ListPair.unzip delims) of
    69                         SOME message
    70     (SOME begin_delim, SOME end_delim) =>
    70                       else
    71     output |> first_field begin_delim |> the |> snd
    71                         NONE) known_failures of
    72            |> first_field end_delim |> the |> fst
    72     [] => if is_proof_well_formed proof then ""
    73   | _ => ""
    73           else "Error: The ATP output is ill-formed."
    74 
    74   | (message :: _) => message
    75 fun extract_proof_or_failure proof_delims known_failures output =
       
    76   case map_filter
       
    77            (fn (patterns, message) =>
       
    78                if exists (fn p => String.isSubstring p output) patterns then
       
    79                  SOME message
       
    80                else
       
    81                  NONE) known_failures of
       
    82     [] => (case extract_proof proof_delims output of
       
    83              "" => ("", "Error: The ATP output is malformed.")
       
    84            | proof => (proof, ""))
       
    85   | (message :: _) => ("", message)
    75 
    86 
    76 fun generic_prover overlord get_facts prepare write_file home executable args
    87 fun generic_prover overlord get_facts prepare write_file home executable args
    77         known_failures name
    88         proof_delims known_failures name
    78         ({debug, full_types, explicit_apply, isar_proof, modulus, sorts, ...}
    89         ({debug, full_types, explicit_apply, isar_proof, modulus, sorts, ...}
    79          : params) minimize_command
    90          : params) minimize_command
    80         ({subgoal, goal, relevance_override, axiom_clauses, filtered_clauses}
    91         ({subgoal, goal, relevance_override, axiom_clauses, filtered_clauses}
    81          : problem) =
    92          : problem) =
    82   let
    93   let
   133        else
   144        else
   134          "")
   145          "")
   135     fun split_time s =
   146     fun split_time s =
   136       let
   147       let
   137         val split = String.tokens (fn c => str c = "\n");
   148         val split = String.tokens (fn c => str c = "\n");
   138         val (proof, t) = s |> split |> split_last |> apfst cat_lines;
   149         val (output, t) = s |> split |> split_last |> apfst cat_lines;
   139         fun as_num f = f >> (fst o read_int);
   150         fun as_num f = f >> (fst o read_int);
   140         val num = as_num (Scan.many1 Symbol.is_ascii_digit);
   151         val num = as_num (Scan.many1 Symbol.is_ascii_digit);
   141         val digit = Scan.one Symbol.is_ascii_digit;
   152         val digit = Scan.one Symbol.is_ascii_digit;
   142         val num3 = as_num (digit ::: digit ::: (digit >> single));
   153         val num3 = as_num (digit ::: digit ::: (digit >> single));
   143         val time = num --| Scan.$$ "." -- num3 >> (fn (a, b) => a * 1000 + b);
   154         val time = num --| Scan.$$ "." -- num3 >> (fn (a, b) => a * 1000 + b);
   144         val as_time = the_default 0 o Scan.read Symbol.stopper time o explode;
   155         val as_time = the_default 0 o Scan.read Symbol.stopper time o explode;
   145       in (proof, as_time t) end;
   156       in (output, as_time t) end;
   146     fun split_time' s =
   157     fun split_time' s =
   147       if Config.get ctxt measure_runtime then split_time s else (s, 0)
   158       if Config.get ctxt measure_runtime then split_time s else (s, 0)
   148     fun run_on probfile =
   159     fun run_on probfile =
   149       if File.exists command then
   160       if File.exists command then
   150         write_file full_types explicit_apply probfile clauses
   161         write_file full_types explicit_apply probfile clauses
   152       else error ("Bad executable: " ^ Path.implode command ^ ".");
   163       else error ("Bad executable: " ^ Path.implode command ^ ".");
   153 
   164 
   154     (* If the problem file has not been exported, remove it; otherwise, export
   165     (* If the problem file has not been exported, remove it; otherwise, export
   155        the proof file too. *)
   166        the proof file too. *)
   156     fun cleanup probfile = if destdir' = "" then try File.rm probfile else NONE;
   167     fun cleanup probfile = if destdir' = "" then try File.rm probfile else NONE;
   157     fun export probfile (((proof, _), _), _) =
   168     fun export probfile (((output, _), _), _) =
   158       if destdir' = "" then
   169       if destdir' = "" then
   159         ()
   170         ()
   160       else
   171       else
   161         File.write (Path.explode (Path.implode probfile ^ "_proof"))
   172         File.write (Path.explode (Path.implode probfile ^ "_proof"))
   162                    ((if overlord then
   173                    ((if overlord then
   163                        "% " ^ command_line probfile ^ "\n% " ^ timestamp () ^
   174                        "% " ^ command_line probfile ^ "\n% " ^ timestamp () ^
   164                        "\n"
   175                        "\n"
   165                      else
   176                      else
   166                         "") ^ proof)
   177                         "") ^ output)
   167 
   178 
   168     val (((proof, atp_run_time_in_msecs), rc), _) =
   179     val (((output, atp_run_time_in_msecs), rc), _) =
   169       with_path cleanup export run_on (prob_pathname subgoal);
   180       with_path cleanup export run_on (prob_pathname subgoal);
   170 
   181 
   171     (* Check for success and print out some information on failure. *)
   182     (* Check for success and print out some information on failure. *)
   172     val failure = find_known_failure known_failures proof;
   183     val (proof, failure) =
   173     val success = rc = 0 andalso failure = "";
   184       extract_proof_or_failure proof_delims known_failures output
       
   185     val success = (rc = 0 andalso failure = "")
   174     val (message, relevant_thm_names) =
   186     val (message, relevant_thm_names) =
   175       if success then
   187       if success then
   176         proof_text isar_proof debug modulus sorts ctxt
   188         proof_text isar_proof debug modulus sorts ctxt
   177                    (minimize_command, proof, internal_thm_names, th, subgoal)
   189                    (minimize_command, proof, internal_thm_names, th, subgoal)
   178       else if failure <> "" then
   190       else if failure <> "" then
   179         (failure ^ "\n", [])
   191         (failure ^ "\n", [])
   180       else
   192       else
   181         ("Unknown ATP error: " ^ proof ^ ".\n", [])
   193         ("Unknown ATP error: " ^ output ^ ".\n", [])
   182   in
   194   in
   183     {success = success, message = message,
   195     {success = success, message = message,
   184      relevant_thm_names = relevant_thm_names,
   196      relevant_thm_names = relevant_thm_names,
   185      atp_run_time_in_msecs = atp_run_time_in_msecs, proof = proof,
   197      atp_run_time_in_msecs = atp_run_time_in_msecs, output = output,
   186      internal_thm_names = internal_thm_names,
   198      proof = proof, internal_thm_names = internal_thm_names,
   187      filtered_clauses = the_filtered_clauses}
   199      filtered_clauses = the_filtered_clauses}
   188   end;
   200   end;
   189 
   201 
   190 
   202 
   191 (* generic TPTP-based provers *)
   203 (* generic TPTP-based provers *)
   192 
   204 
   193 fun generic_tptp_prover
   205 fun generic_tptp_prover
   194         (name, {home, executable, arguments, known_failures, max_new_clauses,
   206         (name, {home, executable, arguments, proof_delims, known_failures,
   195                 prefers_theory_relevant})
   207                 max_new_clauses, prefers_theory_relevant})
   196         (params as {debug, overlord, respect_no_atp, relevance_threshold,
   208         (params as {debug, overlord, respect_no_atp, relevance_threshold,
   197                     convergence, theory_relevant, higher_order, follow_defs,
   209                     convergence, theory_relevant, higher_order, follow_defs,
   198                     isar_proof, ...})
   210                     isar_proof, ...})
   199         minimize_command timeout =
   211         minimize_command timeout =
   200   generic_prover overlord
   212   generic_prover overlord
   201       (get_relevant_facts respect_no_atp relevance_threshold convergence
   213       (get_relevant_facts respect_no_atp relevance_threshold convergence
   202                           higher_order follow_defs max_new_clauses
   214                           higher_order follow_defs max_new_clauses
   203                           (the_default prefers_theory_relevant theory_relevant))
   215                           (the_default prefers_theory_relevant theory_relevant))
   204       (prepare_clauses higher_order false)
   216       (prepare_clauses higher_order false)
   205       (write_tptp_file (debug andalso overlord andalso not isar_proof)) home
   217       (write_tptp_file (debug andalso overlord andalso not isar_proof)) home
   206       executable (arguments timeout) known_failures name params minimize_command
   218       executable (arguments timeout) proof_delims known_failures name params
       
   219       minimize_command
   207 
   220 
   208 fun tptp_prover name p = (name, generic_tptp_prover (name, p));
   221 fun tptp_prover name p = (name, generic_tptp_prover (name, p));
   209 
   222 
   210 
   223 
   211 (** common provers **)
   224 (** common provers **)
   219 val vampire_config : prover_config =
   232 val vampire_config : prover_config =
   220   {home = getenv "VAMPIRE_HOME",
   233   {home = getenv "VAMPIRE_HOME",
   221    executable = "vampire",
   234    executable = "vampire",
   222    arguments = (fn timeout => "--output_syntax tptp --mode casc -t " ^
   235    arguments = (fn timeout => "--output_syntax tptp --mode casc -t " ^
   223                               string_of_int (generous_to_secs timeout)),
   236                               string_of_int (generous_to_secs timeout)),
       
   237    proof_delims = [("=========== Refutation ==========",
       
   238                     "======= End of refutation =======")],
   224    known_failures =
   239    known_failures =
   225      [(["Satisfiability detected", "CANNOT PROVE"],
   240      [(["Satisfiability detected", "CANNOT PROVE"],
   226        "The ATP problem is unprovable."),
   241        "The ATP problem is unprovable."),
   227       (["Refutation not found"],
   242       (["Refutation not found"],
   228        "The ATP failed to determine the problem's status.")],
   243        "The ATP failed to determine the problem's status.")],
   231 val vampire = tptp_prover "vampire" vampire_config
   246 val vampire = tptp_prover "vampire" vampire_config
   232 
   247 
   233 
   248 
   234 (* E prover *)
   249 (* E prover *)
   235 
   250 
       
   251 val tstp_proof_delims =
       
   252   ("# SZS output start CNFRefutation.", "# SZS output end CNFRefutation")
       
   253 
   236 val e_config : prover_config =
   254 val e_config : prover_config =
   237   {home = getenv "E_HOME",
   255   {home = getenv "E_HOME",
   238    executable = "eproof",
   256    executable = "eproof",
   239    arguments = (fn timeout => "--tstp-in --tstp-out -l5 -xAutoDev \
   257    arguments = (fn timeout => "--tstp-in --tstp-out -l5 -xAutoDev \
   240                               \-tAutoDev --silent --cpu-limit=" ^
   258                               \-tAutoDev --silent --cpu-limit=" ^
   241                               string_of_int (generous_to_secs timeout)),
   259                               string_of_int (generous_to_secs timeout)),
       
   260    proof_delims = [tstp_proof_delims],
   242    known_failures =
   261    known_failures =
   243        [(["SZS status: Satisfiable", "SZS status Satisfiable"],
   262        [(["SZS status: Satisfiable", "SZS status Satisfiable"],
   244          "The ATP problem is unprovable."),
   263          "The ATP problem is unprovable."),
   245         (["SZS status: ResourceOut", "SZS status ResourceOut"],
   264         (["SZS status: ResourceOut", "SZS status ResourceOut"],
   246          "The ATP ran out of resources."),
   265          "The ATP ran out of resources."),
   252 
   271 
   253 
   272 
   254 (* SPASS *)
   273 (* SPASS *)
   255 
   274 
   256 fun generic_dfg_prover
   275 fun generic_dfg_prover
   257         (name, {home, executable, arguments, known_failures, max_new_clauses,
   276         (name, {home, executable, arguments, proof_delims, known_failures,
   258                 prefers_theory_relevant})
   277                 max_new_clauses, prefers_theory_relevant})
   259         (params as {overlord, respect_no_atp, relevance_threshold, convergence,
   278         (params as {overlord, respect_no_atp, relevance_threshold, convergence,
   260                     theory_relevant, higher_order, follow_defs, ...})
   279                     theory_relevant, higher_order, follow_defs, ...})
   261         minimize_command timeout =
   280         minimize_command timeout =
   262   generic_prover overlord
   281   generic_prover overlord
   263       (get_relevant_facts respect_no_atp relevance_threshold convergence
   282       (get_relevant_facts respect_no_atp relevance_threshold convergence
   264                           higher_order follow_defs max_new_clauses
   283                           higher_order follow_defs max_new_clauses
   265                           (the_default prefers_theory_relevant theory_relevant))
   284                           (the_default prefers_theory_relevant theory_relevant))
   266       (prepare_clauses higher_order true) write_dfg_file home executable
   285       (prepare_clauses higher_order true) write_dfg_file home executable
   267       (arguments timeout) known_failures name params minimize_command
   286       (arguments timeout) proof_delims known_failures name params
       
   287       minimize_command
   268 
   288 
   269 fun dfg_prover name p = (name, generic_dfg_prover (name, p))
   289 fun dfg_prover name p = (name, generic_dfg_prover (name, p))
   270 
   290 
   271 (* The "-VarWeight=3" option helps the higher-order problems, probably by
   291 (* The "-VarWeight=3" option helps the higher-order problems, probably by
   272    counteracting the presence of "hAPP". *)
   292    counteracting the presence of "hAPP". *)
   274   {home = getenv "SPASS_HOME",
   294   {home = getenv "SPASS_HOME",
   275    executable = "SPASS",
   295    executable = "SPASS",
   276    arguments = (fn timeout => "-Auto -SOS=1 -PGiven=0 -PProblem=0 -Splits=0" ^
   296    arguments = (fn timeout => "-Auto -SOS=1 -PGiven=0 -PProblem=0 -Splits=0" ^
   277      " -FullRed=0 -DocProof -VarWeight=3 -TimeLimit=" ^
   297      " -FullRed=0 -DocProof -VarWeight=3 -TimeLimit=" ^
   278      string_of_int (generous_to_secs timeout)),
   298      string_of_int (generous_to_secs timeout)),
       
   299    proof_delims = [("Here is a proof", "Formulae used in the proof")],
   279    known_failures =
   300    known_failures =
   280      [(["SPASS beiseite: Completion found."], "The ATP problem is unprovable."),
   301      [(["SPASS beiseite: Completion found."], "The ATP problem is unprovable."),
   281       (["SPASS beiseite: Ran out of time."], "The ATP timed out."),
   302       (["SPASS beiseite: Ran out of time."], "The ATP timed out."),
   282       (["SPASS beiseite: Maximal number of loops exceeded."],
   303       (["SPASS beiseite: Maximal number of loops exceeded."],
   283        "The ATP hit its loop limit.")],
   304        "The ATP hit its loop limit.")],
   295 
   316 
   296 val spass_tptp_config =
   317 val spass_tptp_config =
   297   {home = #home spass_config,
   318   {home = #home spass_config,
   298    executable = #executable spass_config,
   319    executable = #executable spass_config,
   299    arguments = prefix "-TPTP " o #arguments spass_config,
   320    arguments = prefix "-TPTP " o #arguments spass_config,
       
   321    proof_delims = #proof_delims spass_config,
   300    known_failures =
   322    known_failures =
   301      #known_failures spass_config @
   323      #known_failures spass_config @
   302      [(["unrecognized option `-TPTP'", "Unrecognized option TPTP"],
   324      [(["unrecognized option `-TPTP'", "Unrecognized option TPTP"],
   303        "Warning: Sledgehammer requires a more recent version of SPASS with \
   325        "Warning: Sledgehammer requires a more recent version of SPASS with \
   304        \support for the TPTP syntax. To install it, download and untar the \
   326        \support for the TPTP syntax. To install it, download and untar the \
   341 val remote_known_failures =
   363 val remote_known_failures =
   342   [(["Remote-script could not extract proof"],
   364   [(["Remote-script could not extract proof"],
   343     "Error: The remote ATP proof is ill-formed.")]
   365     "Error: The remote ATP proof is ill-formed.")]
   344 
   366 
   345 fun remote_prover_config prover_prefix args
   367 fun remote_prover_config prover_prefix args
   346         ({known_failures, max_new_clauses, prefers_theory_relevant, ...}
   368         ({proof_delims, known_failures, max_new_clauses,
   347          : prover_config) : prover_config =
   369           prefers_theory_relevant, ...} : prover_config) : prover_config =
   348   {home = getenv "ISABELLE_ATP_MANAGER",
   370   {home = getenv "ISABELLE_ATP_MANAGER",
   349    executable = "SystemOnTPTP",
   371    executable = "SystemOnTPTP",
   350    arguments = (fn timeout =>
   372    arguments = (fn timeout =>
   351      args ^ " -t " ^ string_of_int (generous_to_secs timeout) ^ " -s " ^
   373      args ^ " -t " ^ string_of_int (generous_to_secs timeout) ^ " -s " ^
   352      the_system prover_prefix),
   374      the_system prover_prefix),
       
   375    proof_delims = insert (op =) tstp_proof_delims proof_delims,
   353    known_failures = remote_known_failures @ known_failures,
   376    known_failures = remote_known_failures @ known_failures,
   354    max_new_clauses = max_new_clauses,
   377    max_new_clauses = max_new_clauses,
   355    prefers_theory_relevant = prefers_theory_relevant}
   378    prefers_theory_relevant = prefers_theory_relevant}
   356 
   379 
   357 val remote_vampire =
   380 val remote_vampire =