src/HOL/Tools/ATP_Manager/atp_wrapper.ML
changeset 36370 a4f601daa175
parent 36369 d2cd0d04b8e6
child 36371 8c83ea1a7740
equal deleted inserted replaced
36369:d2cd0d04b8e6 36370:a4f601daa175
    41   Attrib.config_bool "atp_measure_runtime" (K false);
    41   Attrib.config_bool "atp_measure_runtime" (K false);
    42 
    42 
    43 
    43 
    44 (* prover configuration *)
    44 (* prover configuration *)
    45 
    45 
       
    46 type prover_config =
       
    47   {home: string,
       
    48    executable: string,
       
    49    arguments: Time.time -> string,
       
    50    proof_delims: (string * string) list,
       
    51    known_failures: (failure * string) list,
       
    52    max_new_clauses: int,
       
    53    prefers_theory_relevant: bool};
       
    54 
       
    55 
       
    56 (* basic template *)
       
    57 
    46 val remotify = prefix "remote_"
    58 val remotify = prefix "remote_"
    47 
       
    48 type prover_config =
       
    49  {home: string,
       
    50   executable: string,
       
    51   arguments: Time.time -> string,
       
    52   proof_delims: (string * string) list,
       
    53   known_failures: (string list * string) list,
       
    54   max_new_clauses: int,
       
    55   prefers_theory_relevant: bool};
       
    56 
       
    57 
       
    58 (* basic template *)
       
    59 
    59 
    60 fun with_path cleanup after f path =
    60 fun with_path cleanup after f path =
    61   Exn.capture f path
    61   Exn.capture f path
    62   |> tap (fn _ => cleanup path)
    62   |> tap (fn _ => cleanup path)
    63   |> Exn.release
    63   |> Exn.release
    70     (SOME begin_delim, SOME end_delim) =>
    70     (SOME begin_delim, SOME end_delim) =>
    71     output |> first_field begin_delim |> the |> snd
    71     output |> first_field begin_delim |> the |> snd
    72            |> first_field end_delim |> the |> fst
    72            |> first_field end_delim |> the |> fst
    73   | _ => ""
    73   | _ => ""
    74 
    74 
    75 fun extract_proof_or_failure proof_delims known_failures output =
    75 fun extract_proof_and_outcome res_code proof_delims known_failures output =
    76   case map_filter
    76   case map_filter (fn (failure, pattern) =>
    77            (fn (patterns, message) =>
    77                       if String.isSubstring pattern output then SOME failure
    78                if exists (fn p => String.isSubstring p output) patterns then
    78                       else NONE) known_failures of
    79                  SOME message
       
    80                else
       
    81                  NONE) known_failures of
       
    82     [] => (case extract_proof proof_delims output of
    79     [] => (case extract_proof proof_delims output of
    83              "" => ("", "Error: The ATP output is malformed.")
    80              "" => ("", SOME UnknownError)
    84            | proof => (proof, ""))
    81            | proof => if res_code = 0 then (proof, NONE)
    85   | (message :: _) => ("", message)
    82                       else ("", SOME UnknownError))
       
    83   | (failure :: _) => ("", SOME failure)
       
    84 
       
    85 fun string_for_failure Unprovable = "The ATP problem is unprovable."
       
    86   | string_for_failure TimedOut = "Timed out."
       
    87   | string_for_failure OutOfResources = "The ATP ran out of resources."
       
    88   | string_for_failure OldSpass =
       
    89     "Warning: Sledgehammer requires a more recent version of SPASS with \
       
    90     \support for the TPTP syntax. To install it, download and untar the \
       
    91     \package \"http://isabelle.in.tum.de/~blanchet/spass-3.7.tgz\" and add the \
       
    92     \\"spass-3.7\" directory's full path to \"" ^
       
    93     Path.implode (Path.expand (Path.appends
       
    94         (Path.variable "ISABELLE_HOME_USER" ::
       
    95          map Path.basic ["etc", "components"]))) ^
       
    96     "\" on a line of its own."
       
    97   | string_for_failure MalformedOutput = "Error: The ATP output is malformed."
       
    98   | string_for_failure UnknownError = "Error: An unknown ATP error occurred."
    86 
    99 
    87 fun generic_prover overlord get_facts prepare write_file home executable args
   100 fun generic_prover overlord get_facts prepare write_file home executable args
    88         proof_delims known_failures name
   101         proof_delims known_failures name
    89         ({debug, full_types, explicit_apply, isar_proof, modulus, sorts, ...}
   102         ({debug, full_types, explicit_apply, isar_proof, modulus, sorts, ...}
    90          : params) minimize_command
   103          : params) minimize_command
   174                        "% " ^ command_line probfile ^ "\n% " ^ timestamp () ^
   187                        "% " ^ command_line probfile ^ "\n% " ^ timestamp () ^
   175                        "\n"
   188                        "\n"
   176                      else
   189                      else
   177                         "") ^ output)
   190                         "") ^ output)
   178 
   191 
   179     val (((output, atp_run_time_in_msecs), rc), _) =
   192     val (((output, atp_run_time_in_msecs), res_code), _) =
   180       with_path cleanup export run_on (prob_pathname subgoal);
   193       with_path cleanup export run_on (prob_pathname subgoal);
   181 
   194 
   182     (* Check for success and print out some information on failure. *)
   195     (* Check for success and print out some information on failure. *)
   183     val (proof, failure) =
   196     val (proof, outcome) =
   184       extract_proof_or_failure proof_delims known_failures output
   197       extract_proof_and_outcome res_code proof_delims known_failures output
   185     val success = (rc = 0 andalso failure = "")
       
   186     val (message, relevant_thm_names) =
   198     val (message, relevant_thm_names) =
   187       if success then
   199       case outcome of
   188         proof_text isar_proof debug modulus sorts ctxt
   200         NONE => proof_text isar_proof debug modulus sorts ctxt
   189                    (minimize_command, proof, internal_thm_names, th, subgoal)
   201                            (minimize_command, proof, internal_thm_names, th,
   190       else if failure <> "" then
   202                             subgoal)
   191         (failure ^ "\n", [])
   203       | SOME failure => (string_for_failure failure ^ "\n", [])
   192       else
       
   193         ("Unknown ATP error: " ^ output ^ ".\n", [])
       
   194   in
   204   in
   195     {success = success, message = message,
   205     {outcome = outcome, message = message,
   196      relevant_thm_names = relevant_thm_names,
   206      relevant_thm_names = relevant_thm_names,
   197      atp_run_time_in_msecs = atp_run_time_in_msecs, output = output,
   207      atp_run_time_in_msecs = atp_run_time_in_msecs, output = output,
   198      proof = proof, internal_thm_names = internal_thm_names,
   208      proof = proof, internal_thm_names = internal_thm_names,
   199      filtered_clauses = the_filtered_clauses}
   209      filtered_clauses = the_filtered_clauses}
   200   end;
   210   end;
   235    arguments = (fn timeout => "--output_syntax tptp --mode casc -t " ^
   245    arguments = (fn timeout => "--output_syntax tptp --mode casc -t " ^
   236                               string_of_int (generous_to_secs timeout)),
   246                               string_of_int (generous_to_secs timeout)),
   237    proof_delims = [("=========== Refutation ==========",
   247    proof_delims = [("=========== Refutation ==========",
   238                     "======= End of refutation =======")],
   248                     "======= End of refutation =======")],
   239    known_failures =
   249    known_failures =
   240      [(["Satisfiability detected", "CANNOT PROVE"],
   250      [(Unprovable, "Satisfiability detected"),
   241        "The ATP problem is unprovable."),
   251       (OutOfResources, "CANNOT PROVE"),
   242       (["Refutation not found"],
   252       (OutOfResources, "Refutation not found")],
   243        "The ATP failed to determine the problem's status.")],
       
   244    max_new_clauses = 60,
   253    max_new_clauses = 60,
   245    prefers_theory_relevant = false}
   254    prefers_theory_relevant = false}
   246 val vampire = tptp_prover "vampire" vampire_config
   255 val vampire = tptp_prover "vampire" vampire_config
   247 
   256 
   248 
   257 
   257    arguments = (fn timeout => "--tstp-in --tstp-out -l5 -xAutoDev \
   266    arguments = (fn timeout => "--tstp-in --tstp-out -l5 -xAutoDev \
   258                               \-tAutoDev --silent --cpu-limit=" ^
   267                               \-tAutoDev --silent --cpu-limit=" ^
   259                               string_of_int (generous_to_secs timeout)),
   268                               string_of_int (generous_to_secs timeout)),
   260    proof_delims = [tstp_proof_delims],
   269    proof_delims = [tstp_proof_delims],
   261    known_failures =
   270    known_failures =
   262        [(["SZS status: Satisfiable", "SZS status Satisfiable"],
   271      [(Unprovable, "SZS status: Satisfiable"),
   263          "The ATP problem is unprovable."),
   272       (Unprovable, "SZS status Satisfiable"),
   264         (["SZS status: ResourceOut", "SZS status ResourceOut"],
   273       (TimedOut, "Failure: Resource limit exceeded (time)"),
   265          "The ATP ran out of resources."),
   274       (TimedOut, "time limit exceeded"),
   266         (["# Cannot determine problem status"],
   275       (OutOfResources,
   267          "The ATP failed to determine the problem's status.")],
   276        "# Cannot determine problem status within resource limit"),
       
   277       (OutOfResources, "SZS status: ResourceOut"),
       
   278       (OutOfResources, "SZS status ResourceOut")],
   268    max_new_clauses = 100,
   279    max_new_clauses = 100,
   269    prefers_theory_relevant = false}
   280    prefers_theory_relevant = false}
   270 val e = tptp_prover "e" e_config
   281 val e = tptp_prover "e" e_config
   271 
   282 
   272 
   283 
   296    arguments = (fn timeout => "-Auto -SOS=1 -PGiven=0 -PProblem=0 -Splits=0" ^
   307    arguments = (fn timeout => "-Auto -SOS=1 -PGiven=0 -PProblem=0 -Splits=0" ^
   297      " -FullRed=0 -DocProof -VarWeight=3 -TimeLimit=" ^
   308      " -FullRed=0 -DocProof -VarWeight=3 -TimeLimit=" ^
   298      string_of_int (generous_to_secs timeout)),
   309      string_of_int (generous_to_secs timeout)),
   299    proof_delims = [("Here is a proof", "Formulae used in the proof")],
   310    proof_delims = [("Here is a proof", "Formulae used in the proof")],
   300    known_failures =
   311    known_failures =
   301      [(["SPASS beiseite: Completion found."], "The ATP problem is unprovable."),
   312      [(Unprovable, "SPASS beiseite: Completion found"),
   302       (["SPASS beiseite: Ran out of time."], "The ATP timed out."),
   313       (TimedOut, "SPASS beiseite: Ran out of time"),
   303       (["SPASS beiseite: Maximal number of loops exceeded."],
   314       (OutOfResources, "SPASS beiseite: Maximal number of loops exceeded")],
   304        "The ATP hit its loop limit.")],
       
   305    max_new_clauses = 40,
   315    max_new_clauses = 40,
   306    prefers_theory_relevant = true}
   316    prefers_theory_relevant = true}
   307 val spass = dfg_prover "spass" spass_config
   317 val spass = dfg_prover "spass" spass_config
   308 
   318 
   309 (* SPASS 3.7 supports both the DFG and the TPTP syntax, whereas SPASS 3.0
   319 (* SPASS 3.7 supports both the DFG and the TPTP syntax, whereas SPASS 3.0
   319    executable = #executable spass_config,
   329    executable = #executable spass_config,
   320    arguments = prefix "-TPTP " o #arguments spass_config,
   330    arguments = prefix "-TPTP " o #arguments spass_config,
   321    proof_delims = #proof_delims spass_config,
   331    proof_delims = #proof_delims spass_config,
   322    known_failures =
   332    known_failures =
   323      #known_failures spass_config @
   333      #known_failures spass_config @
   324      [(["unrecognized option `-TPTP'", "Unrecognized option TPTP"],
   334      [(OldSpass, "unrecognized option `-TPTP'"),
   325        "Warning: Sledgehammer requires a more recent version of SPASS with \
   335       (OldSpass, "Unrecognized option TPTP")],
   326        \support for the TPTP syntax. To install it, download and untar the \
       
   327        \package \"http://isabelle.in.tum.de/~blanchet/spass-3.7.tgz\" and add \
       
   328        \the \"spass-3.7\" directory's full path to \"" ^
       
   329        Path.implode (Path.expand (Path.appends
       
   330            (Path.variable "ISABELLE_HOME_USER" ::
       
   331             map Path.basic ["etc", "components"]))) ^
       
   332        "\" on a line of its own.")],
       
   333    max_new_clauses = #max_new_clauses spass_config,
   336    max_new_clauses = #max_new_clauses spass_config,
   334    prefers_theory_relevant = #prefers_theory_relevant spass_config}
   337    prefers_theory_relevant = #prefers_theory_relevant spass_config}
   335 val spass_tptp = tptp_prover "spass_tptp" spass_tptp_config
   338 val spass_tptp = tptp_prover "spass_tptp" spass_tptp_config
   336 
   339 
   337 (* remote prover invocation via SystemOnTPTP *)
   340 (* remote prover invocation via SystemOnTPTP *)
   338 
   341 
   339 val systems = Synchronized.var "atp_wrapper_systems" ([]: string list);
   342 val systems = Synchronized.var "atp_wrapper_systems" ([]: string list);
   340 
   343 
   341 fun get_systems () =
   344 fun get_systems () =
   342   let
   345   case bash_output "\"$ISABELLE_ATP_MANAGER/SystemOnTPTP\" -w" of
   343     val (answer, rc) = bash_output "\"$ISABELLE_ATP_MANAGER/SystemOnTPTP\" -w"
   346     (answer, 0) => split_lines answer
   344   in
   347   | (answer, _) =>
   345     if rc <> 0 then
   348     error ("Failed to get available systems at SystemOnTPTP:\n" ^ answer)
   346       error ("Failed to get available systems at SystemOnTPTP:\n" ^ answer)
       
   347     else
       
   348       split_lines answer
       
   349   end;
       
   350 
   349 
   351 fun refresh_systems_on_tptp () =
   350 fun refresh_systems_on_tptp () =
   352   Synchronized.change systems (fn _ => get_systems ());
   351   Synchronized.change systems (fn _ => get_systems ());
   353 
   352 
   354 fun get_system prefix = Synchronized.change_result systems (fn systems =>
   353 fun get_system prefix = Synchronized.change_result systems (fn systems =>
   355   (if null systems then get_systems () else systems)
   354   (if null systems then get_systems () else systems)
   356   |> `(find_first (String.isPrefix prefix)));
   355   |> `(find_first (String.isPrefix prefix)));
   357 
   356 
   358 fun the_system prefix =
   357 fun the_system prefix =
   359   (case get_system prefix of
   358   (case get_system prefix of
   360     NONE => error ("System " ^ quote prefix ^ " not available at SystemOnTPTP")
   359     NONE => error ("System " ^ quote prefix ^
       
   360                    " not available at SystemOnTPTP.")
   361   | SOME sys => sys);
   361   | SOME sys => sys);
   362 
   362 
   363 val remote_known_failures =
   363 val remote_known_failures =
   364   [(["Remote-script could not extract proof"],
   364   [(TimedOut, "says Timeout"),
   365     "Error: The remote ATP proof is ill-formed.")]
   365    (MalformedOutput, "Remote-script could not extract proof")]
   366 
   366 
   367 fun remote_prover_config prover_prefix args
   367 fun remote_prover_config prover_prefix args
   368         ({proof_delims, known_failures, max_new_clauses,
   368         ({proof_delims, known_failures, max_new_clauses,
   369           prefers_theory_relevant, ...} : prover_config) : prover_config =
   369           prefers_theory_relevant, ...} : prover_config) : prover_config =
   370   {home = getenv "ISABELLE_ATP_MANAGER",
   370   {home = getenv "ISABELLE_ATP_MANAGER",