src/HOL/Tools/ATP/atp_systems.ML
author blanchet
Tue Aug 23 14:44:19 2011 +0200 (2011-08-23)
changeset 44418 800baa1b1406
parent 44417 c76c04d876ef
child 44420 3d0921b91a86
permissions -rw-r--r--
fixed TFF slicing
     1 (*  Title:      HOL/Tools/ATP/atp_systems.ML
     2     Author:     Fabian Immler, TU Muenchen
     3     Author:     Jasmin Blanchette, TU Muenchen
     4 
     5 Setup for supported ATPs.
     6 *)
     7 
     8 signature ATP_SYSTEMS =
     9 sig
    10   type format = ATP_Problem.format
    11   type formula_kind = ATP_Problem.formula_kind
    12   type failure = ATP_Proof.failure
    13 
    14   type atp_config =
    15     {exec : string * string,
    16      required_execs : (string * string) list,
    17      arguments :
    18        Proof.context -> bool -> string -> Time.time
    19        -> (unit -> (string * real) list) -> string,
    20      proof_delims : (string * string) list,
    21      known_failures : (failure * string) list,
    22      conj_sym_kind : formula_kind,
    23      prem_kind : formula_kind,
    24      best_slices :
    25        Proof.context -> (real * (bool * (int * format * string * string))) list}
    26 
    27   val force_sos : bool Config.T
    28   val e_smartN : string
    29   val e_autoN : string
    30   val e_fun_weightN : string
    31   val e_sym_offset_weightN : string
    32   val e_weight_method : string Config.T
    33   val e_default_fun_weight : real Config.T
    34   val e_fun_weight_base : real Config.T
    35   val e_fun_weight_span : real Config.T
    36   val e_default_sym_offs_weight : real Config.T
    37   val e_sym_offs_weight_base : real Config.T
    38   val e_sym_offs_weight_span : real Config.T
    39   val eN : string
    40   val spassN : string
    41   val vampireN : string
    42   val leo2N : string
    43   val satallaxN : string
    44   val e_sineN : string
    45   val snarkN : string
    46   val e_tofofN : string
    47   val waldmeisterN : string
    48   val z3_atpN : string
    49   val remote_prefix : string
    50   val remote_atp :
    51     string -> string -> string list -> (string * string) list
    52     -> (failure * string) list -> formula_kind -> formula_kind
    53     -> (Proof.context -> int * format * string) -> string * atp_config
    54   val add_atp : string * atp_config -> theory -> theory
    55   val get_atp : theory -> string -> atp_config
    56   val supported_atps : theory -> string list
    57   val is_atp_installed : theory -> string -> bool
    58   val refresh_systems_on_tptp : unit -> unit
    59   val setup : theory -> theory
    60 end;
    61 
    62 structure ATP_Systems : ATP_SYSTEMS =
    63 struct
    64 
    65 open ATP_Problem
    66 open ATP_Proof
    67 
    68 (* ATP configuration *)
    69 
    70 type atp_config =
    71   {exec : string * string,
    72    required_execs : (string * string) list,
    73    arguments :
    74      Proof.context -> bool -> string -> Time.time
    75      -> (unit -> (string * real) list) -> string,
    76    proof_delims : (string * string) list,
    77    known_failures : (failure * string) list,
    78    conj_sym_kind : formula_kind,
    79    prem_kind : formula_kind,
    80    best_slices :
    81      Proof.context -> (real * (bool * (int * format * string * string))) list}
    82 
    83 (* "best_slices" must be found empirically, taking a wholistic approach since
    84    the ATPs are run in parallel. The "real" components give the faction of the
    85    time available given to the slice and should add up to 1.0. The "bool"
    86    component indicates whether the slice's strategy is complete; the "int", the
    87    preferred number of facts to pass; the first "string", the preferred type
    88    system (which should be sound or quasi-sound); the second "string", extra
    89    information to the prover (e.g., SOS or no SOS).
    90 
    91    The last slice should be the most "normal" one, because it will get all the
    92    time available if the other slices fail early and also because it is used if
    93    slicing is disabled (e.g., by the minimizer). *)
    94 
    95 val known_perl_failures =
    96   [(CantConnect, "HTTP error"),
    97    (NoPerl, "env: perl"),
    98    (NoLibwwwPerl, "Can't locate HTTP")]
    99 
   100 (* named ATPs *)
   101 
   102 val eN = "e"
   103 val leo2N = "leo2"
   104 val satallaxN = "satallax"
   105 val spassN = "spass"
   106 val vampireN = "vampire"
   107 val z3_atpN = "z3_atp"
   108 val e_sineN = "e_sine"
   109 val snarkN = "snark"
   110 val e_tofofN = "e_tofof"
   111 val waldmeisterN = "waldmeister"
   112 val remote_prefix = "remote_"
   113 
   114 structure Data = Theory_Data
   115 (
   116   type T = (atp_config * stamp) Symtab.table
   117   val empty = Symtab.empty
   118   val extend = I
   119   fun merge data : T = Symtab.merge (eq_snd op =) data
   120     handle Symtab.DUP name => error ("Duplicate ATP: " ^ quote name ^ ".")
   121 )
   122 
   123 fun to_secs min time = Int.max (min, (Time.toMilliseconds time + 999) div 1000)
   124 
   125 val sosN = "sos"
   126 val no_sosN = "no_sos"
   127 
   128 val force_sos = Attrib.setup_config_bool @{binding atp_force_sos} (K false)
   129 
   130 
   131 (* E *)
   132 
   133 val tstp_proof_delims =
   134   [("# SZS output start CNFRefutation.", "# SZS output end CNFRefutation"),
   135    ("% SZS output start CNFRefutation", "% SZS output end CNFRefutation")]
   136 
   137 val e_smartN = "smart"
   138 val e_autoN = "auto"
   139 val e_fun_weightN = "fun_weight"
   140 val e_sym_offset_weightN = "sym_offset_weight"
   141 
   142 val e_weight_method =
   143   Attrib.setup_config_string @{binding atp_e_weight_method} (K e_smartN)
   144 (* FUDGE *)
   145 val e_default_fun_weight =
   146   Attrib.setup_config_real @{binding atp_e_default_fun_weight} (K 20.0)
   147 val e_fun_weight_base =
   148   Attrib.setup_config_real @{binding atp_e_fun_weight_base} (K 0.0)
   149 val e_fun_weight_span =
   150   Attrib.setup_config_real @{binding atp_e_fun_weight_span} (K 40.0)
   151 val e_default_sym_offs_weight =
   152   Attrib.setup_config_real @{binding atp_e_default_sym_offs_weight} (K 1.0)
   153 val e_sym_offs_weight_base =
   154   Attrib.setup_config_real @{binding atp_e_sym_offs_weight_base} (K ~20.0)
   155 val e_sym_offs_weight_span =
   156   Attrib.setup_config_real @{binding atp_e_sym_offs_weight_span} (K 60.0)
   157 
   158 fun e_weight_method_case method fw sow =
   159   if method = e_fun_weightN then fw
   160   else if method = e_sym_offset_weightN then sow
   161   else raise Fail ("unexpected " ^ quote method)
   162 
   163 fun scaled_e_weight ctxt method w =
   164   w * Config.get ctxt
   165           (e_weight_method_case method e_fun_weight_span e_sym_offs_weight_span)
   166   + Config.get ctxt
   167         (e_weight_method_case method e_fun_weight_base e_sym_offs_weight_base)
   168   |> Real.ceil |> signed_string_of_int
   169 
   170 fun e_weight_arguments ctxt method weights =
   171   if method = e_autoN then
   172     "-xAutoDev"
   173   else
   174     (* supplied by Stephan Schulz *)
   175     "--split-clauses=4 --split-reuse-defs --simul-paramod --forward-context-sr \
   176     \--destructive-er-aggressive --destructive-er --presat-simplify \
   177     \--prefer-initial-clauses -tKBO6 -winvfreqrank -c1 -Ginvfreqconjmax -F1 \
   178     \--delete-bad-limit=150000000 -WSelectMaxLComplexAvoidPosPred \
   179     \-H'(4*" ^ e_weight_method_case method "FunWeight" "SymOffsetWeight" ^
   180     "(SimulateSOS, " ^
   181     (e_weight_method_case method e_default_fun_weight e_default_sym_offs_weight
   182      |> Config.get ctxt |> Real.ceil |> signed_string_of_int) ^
   183     ",20,1.5,1.5,1" ^
   184     (weights ()
   185      |> map (fn (s, w) => "," ^ s ^ ":" ^ scaled_e_weight ctxt method w)
   186      |> implode) ^
   187     "),3*ConjectureGeneralSymbolWeight(PreferNonGoals,200,100,200,50,50,1,100,\
   188     \1.5,1.5,1),1*Clauseweight(PreferProcessed,1,1,1),1*\
   189     \FIFOWeight(PreferProcessed))'"
   190 
   191 fun is_old_e_version () = (string_ord (getenv "E_VERSION", "1.2w") = LESS)
   192 
   193 fun effective_e_weight_method ctxt =
   194   if is_old_e_version () then e_autoN else Config.get ctxt e_weight_method
   195 
   196 val e_config : atp_config =
   197   {exec = ("E_HOME", "eproof"),
   198    required_execs = [],
   199    arguments =
   200      fn ctxt => fn _ => fn method => fn timeout => fn weights =>
   201         "--tstp-in --tstp-out -l5 " ^ e_weight_arguments ctxt method weights ^
   202         " -tAutoDev --silent --cpu-limit=" ^ string_of_int (to_secs 2 timeout),
   203    proof_delims = tstp_proof_delims,
   204    known_failures =
   205      [(Unprovable, "SZS status: CounterSatisfiable"),
   206       (Unprovable, "SZS status CounterSatisfiable"),
   207       (ProofMissing, "SZS status Theorem"),
   208       (TimedOut, "Failure: Resource limit exceeded (time)"),
   209       (TimedOut, "time limit exceeded"),
   210       (OutOfResources, "# Cannot determine problem status"),
   211       (OutOfResources, "SZS status: ResourceOut"),
   212       (OutOfResources, "SZS status ResourceOut")],
   213    conj_sym_kind = Hypothesis,
   214    prem_kind = Conjecture,
   215    best_slices = fn ctxt =>
   216      let val method = effective_e_weight_method ctxt in
   217        (* FUDGE *)
   218        if method = e_smartN then
   219          [(0.333, (true, (500, FOF, "mangled_tags?", e_fun_weightN))),
   220           (0.334, (true, (50, FOF, "mangled_guards?", e_fun_weightN))),
   221           (0.333, (true, (1000, FOF, "mangled_tags?", e_sym_offset_weightN)))]
   222        else
   223          [(1.0, (true, (500, FOF, "mangled_tags?", method)))]
   224      end}
   225 
   226 val e = (eN, e_config)
   227 
   228 
   229 (* LEO-II *)
   230 
   231 val leo2_config : atp_config =
   232   {exec = ("LEO2_HOME", "leo"),
   233    required_execs = [],
   234    arguments =
   235      fn _ => fn _ => fn sos => fn timeout => fn _ =>
   236         "--proofoutput --timeout " ^ string_of_int (to_secs 1 timeout)
   237         |> sos = sosN ? prefix "--sos ",
   238    proof_delims = tstp_proof_delims,
   239    known_failures = [],
   240    conj_sym_kind = Axiom,
   241    prem_kind = Hypothesis,
   242    best_slices = fn ctxt =>
   243      (* FUDGE *)
   244      [(0.667, (false, (150, THF Without_Choice, "simple_higher", sosN))),
   245       (0.333, (true, (50, THF Without_Choice, "simple_higher", no_sosN)))]
   246      |> (if Config.get ctxt force_sos then hd #> apfst (K 1.0) #> single
   247          else I)}
   248 
   249 val leo2 = (leo2N, leo2_config)
   250 
   251 
   252 (* Satallax *)
   253 
   254 val satallax_config : atp_config =
   255   {exec = ("SATALLAX_HOME", "satallax"),
   256    required_execs = [],
   257    arguments =
   258      fn _ => fn _ => fn _ => fn timeout => fn _ =>
   259         "-t " ^ string_of_int (to_secs 1 timeout),
   260    proof_delims = tstp_proof_delims,
   261    known_failures = [(ProofMissing, "SZS status Theorem")],
   262    conj_sym_kind = Axiom,
   263    prem_kind = Hypothesis,
   264    best_slices =
   265      K [(1.0, (true, (100, THF With_Choice, "simple_higher", "")))] (* FUDGE *)}
   266 
   267 val satallax = (satallaxN, satallax_config)
   268 
   269 
   270 (* SPASS *)
   271 
   272 (* The "-VarWeight=3" option helps the higher-order problems, probably by
   273    counteracting the presence of "hAPP". *)
   274 val spass_config : atp_config =
   275   {exec = ("ISABELLE_ATP", "scripts/spass"),
   276    required_execs = [("SPASS_HOME", "SPASS"), ("SPASS_HOME", "tptp2dfg")],
   277    arguments = fn _ => fn _ => fn sos => fn timeout => fn _ =>
   278      ("-Auto -PGiven=0 -PProblem=0 -Splits=0 -FullRed=0 -DocProof \
   279       \-VarWeight=3 -TimeLimit=" ^ string_of_int (to_secs 1 timeout))
   280      |> sos = sosN ? prefix "-SOS=1 ",
   281    proof_delims = [("Here is a proof", "Formulae used in the proof")],
   282    known_failures =
   283      known_perl_failures @
   284      [(GaveUp, "SPASS beiseite: Completion found"),
   285       (TimedOut, "SPASS beiseite: Ran out of time"),
   286       (OutOfResources, "SPASS beiseite: Maximal number of loops exceeded"),
   287       (MalformedInput, "Undefined symbol"),
   288       (MalformedInput, "Free Variable"),
   289       (Unprovable, "No formulae and clauses found in input file"),
   290       (SpassTooOld, "tptp2dfg"),
   291       (InternalError, "Please report this error")],
   292    conj_sym_kind = Hypothesis,
   293    prem_kind = Conjecture,
   294    best_slices = fn ctxt =>
   295      (* FUDGE *)
   296      [(0.333, (false, (150, FOF, "mangled_tags", sosN))),
   297       (0.333, (false, (300, FOF, "poly_tags?", sosN))),
   298       (0.334, (true, (50, FOF, "mangled_tags?", no_sosN)))]
   299      |> (if Config.get ctxt force_sos then hd #> apfst (K 1.0) #> single
   300          else I)}
   301 
   302 val spass = (spassN, spass_config)
   303 
   304 
   305 (* Vampire *)
   306 
   307 val vampire_config : atp_config =
   308   {exec = ("VAMPIRE_HOME", "vampire"),
   309    required_execs = [],
   310    arguments = fn _ => fn _ => fn sos => fn timeout => fn _ =>
   311      "--mode casc -t " ^ string_of_int (to_secs 1 timeout) ^
   312      " --proof tptp --output_axiom_names on \
   313      \ --thanks \"Andrei and Krystof\" --input_file"
   314      |> sos = sosN ? prefix "--sos on ",
   315    proof_delims =
   316      [("=========== Refutation ==========",
   317        "======= End of refutation ======="),
   318       ("% SZS output start Refutation", "% SZS output end Refutation"),
   319       ("% SZS output start Proof", "% SZS output end Proof")],
   320    known_failures =
   321      [(GaveUp, "UNPROVABLE"),
   322       (GaveUp, "CANNOT PROVE"),
   323       (GaveUp, "SZS status GaveUp"),
   324       (ProofIncomplete, "predicate_definition_introduction,[]"),
   325       (TimedOut, "SZS status Timeout"),
   326       (Unprovable, "Satisfiability detected"),
   327       (Unprovable, "Termination reason: Satisfiable"),
   328       (VampireTooOld, "not a valid option"),
   329       (Interrupted, "Aborted by signal SIGINT")],
   330    conj_sym_kind = Conjecture,
   331    prem_kind = Conjecture,
   332    best_slices = fn ctxt =>
   333      (* FUDGE *)
   334      [(0.333, (false, (150, TFF, "poly_guards", sosN))),
   335       (0.334, (true, (50, TFF, "mangled_guards?", no_sosN))),
   336       (0.333, (false, (500, TFF, "mangled_tags?", sosN)))]
   337      |> (if Config.get ctxt force_sos then hd #> apfst (K 1.0) #> single
   338          else I)}
   339 
   340 val vampire = (vampireN, vampire_config)
   341 
   342 
   343 (* Z3 with TPTP syntax *)
   344 
   345 val z3_atp_config : atp_config =
   346   {exec = ("Z3_HOME", "z3"),
   347    required_execs = [],
   348    arguments = fn _ => fn _ => fn _ => fn timeout => fn _ =>
   349      "MBQI=true -p -t:" ^ string_of_int (to_secs 1 timeout),
   350    proof_delims = [],
   351    known_failures =
   352      [(Unprovable, "\nsat"),
   353       (GaveUp, "\nunknown"),
   354       (GaveUp, "SZS status Satisfiable"),
   355       (ProofMissing, "\nunsat"),
   356       (ProofMissing, "SZS status Unsatisfiable")],
   357    conj_sym_kind = Hypothesis,
   358    prem_kind = Hypothesis,
   359    best_slices =
   360      (* FUDGE (FIXME) *)
   361      K [(0.5, (false, (250, FOF, "mangled_guards?", ""))),
   362         (0.25, (false, (125, FOF, "mangled_guards?", ""))),
   363         (0.125, (false, (62, FOF, "mangled_guards?", ""))),
   364         (0.125, (false, (31, FOF, "mangled_guards?", "")))]}
   365 
   366 val z3_atp = (z3_atpN, z3_atp_config)
   367 
   368 
   369 (* Remote ATP invocation via SystemOnTPTP *)
   370 
   371 val systems = Synchronized.var "atp_systems" ([] : string list)
   372 
   373 fun get_systems () =
   374   case Isabelle_System.bash_output "\"$ISABELLE_ATP/scripts/remote_atp\" -w 2>&1" of
   375     (output, 0) => split_lines output
   376   | (output, _) =>
   377     error (case extract_known_failure known_perl_failures output of
   378              SOME failure => string_for_failure failure
   379            | NONE => perhaps (try (unsuffix "\n")) output ^ ".")
   380 
   381 fun find_system name [] systems =
   382     find_first (String.isPrefix (name ^ "---")) systems
   383   | find_system name (version :: versions) systems =
   384     case find_first (String.isPrefix (name ^ "---" ^ version)) systems of
   385       NONE => find_system name versions systems
   386     | res => res
   387 
   388 fun get_system name versions =
   389   Synchronized.change_result systems
   390       (fn systems => (if null systems then get_systems () else systems)
   391                      |> `(`(find_system name versions)))
   392 
   393 fun the_system name versions =
   394   case get_system name versions of
   395     (SOME sys, _) => sys
   396   | (NONE, []) => error ("SystemOnTPTP is currently not available.")
   397   | (NONE, syss) =>
   398     error ("System " ^ quote name ^ " is not available at SystemOnTPTP.\n" ^
   399            "(Available systems: " ^ commas_quote syss ^ ".)")
   400 
   401 val max_remote_secs = 240 (* give Geoff Sutcliffe's servers a break *)
   402 
   403 fun remote_config system_name system_versions proof_delims known_failures
   404                   conj_sym_kind prem_kind best_slice : atp_config =
   405   {exec = ("ISABELLE_ATP", "scripts/remote_atp"),
   406    required_execs = [],
   407    arguments = fn _ => fn _ => fn _ => fn timeout => fn _ =>
   408      "-t " ^ string_of_int (Int.min (max_remote_secs, to_secs 1 timeout))
   409      ^ " -s " ^ the_system system_name system_versions,
   410    proof_delims = union (op =) tstp_proof_delims proof_delims,
   411    known_failures = known_failures @ known_perl_failures @
   412      [(Unprovable, "says Satisfiable"),
   413       (Unprovable, "says CounterSatisfiable"),
   414       (GaveUp, "says Unknown"),
   415       (GaveUp, "says GaveUp"),
   416       (ProofMissing, "says Theorem"),
   417       (ProofMissing, "says Unsatisfiable"),
   418       (TimedOut, "says Timeout"),
   419       (Inappropriate, "says Inappropriate")],
   420    conj_sym_kind = conj_sym_kind,
   421    prem_kind = prem_kind,
   422    best_slices = fn ctxt =>
   423      let val (max_relevant, format, type_enc) = best_slice ctxt in
   424        [(1.0, (false, (max_relevant, format, type_enc, "")))]
   425      end}
   426 
   427 fun remotify_config system_name system_versions best_slice
   428         ({proof_delims, known_failures, conj_sym_kind, prem_kind, ...}
   429          : atp_config) : atp_config =
   430   remote_config system_name system_versions proof_delims known_failures
   431                 conj_sym_kind prem_kind best_slice
   432 
   433 fun remote_atp name system_name system_versions proof_delims known_failures
   434                conj_sym_kind prem_kind best_slice =
   435   (remote_prefix ^ name,
   436    remote_config system_name system_versions proof_delims known_failures
   437                  conj_sym_kind prem_kind best_slice)
   438 fun remotify_atp (name, config) system_name system_versions best_slice =
   439   (remote_prefix ^ name,
   440    remotify_config system_name system_versions best_slice config)
   441 
   442 val remote_e =
   443   remotify_atp e "EP" ["1.0", "1.1", "1.2"]
   444                (K (750, FOF, "mangled_tags?") (* FUDGE *))
   445 val remote_leo2 =
   446   remotify_atp leo2 "LEO-II" ["1.2.8", "1.2.6"]
   447                (K (100, THF Without_Choice, "simple_higher") (* FUDGE *))
   448 val remote_satallax =
   449   remotify_atp satallax "Satallax" ["2.1", "2.0", "2"]
   450                (K (100, THF With_Choice, "simple_higher") (* FUDGE *))
   451 val remote_vampire =
   452   remotify_atp vampire "Vampire" ["1.8"] (K (250, TFF, "simple") (* FUDGE *))
   453 val remote_z3_atp =
   454   remotify_atp z3_atp "Z3" ["2.18"]
   455                (K (250, FOF, "mangled_guards?") (* FUDGE *))
   456 val remote_e_sine =
   457   remote_atp e_sineN "SInE" ["0.4"] [] (#known_failures e_config) Axiom
   458              Conjecture (K (500, FOF, "mangled_guards?") (* FUDGE *))
   459 val remote_snark =
   460   remote_atp snarkN "SNARK" ["20080805r029", "20080805r024"]
   461              [("refutation.", "end_refutation.")] [] Hypothesis Hypothesis
   462              (K (100, TFF, "simple") (* FUDGE *))
   463 val remote_e_tofof =
   464   remote_atp e_tofofN "ToFoF" ["0.1"] [] (#known_failures e_config)
   465              Axiom Hypothesis (K (150, TFF, "simple") (* FUDGE *))
   466 val remote_waldmeister =
   467   remote_atp waldmeisterN "Waldmeister" ["710"]
   468              [("#START OF PROOF", "Proved Goals:")]
   469              [(OutOfResources, "Too many function symbols"),
   470               (Crashed, "Unrecoverable Segmentation Fault")]
   471              Hypothesis Hypothesis
   472              (K (50, CNF_UEQ, "mangled_tags?") (* FUDGE *))
   473 
   474 (* Setup *)
   475 
   476 fun add_atp (name, config) thy =
   477   Data.map (Symtab.update_new (name, (config, stamp ()))) thy
   478   handle Symtab.DUP name => error ("Duplicate ATP: " ^ quote name ^ ".")
   479 
   480 fun get_atp thy name =
   481   the (Symtab.lookup (Data.get thy) name) |> fst
   482   handle Option.Option => error ("Unknown ATP: " ^ name ^ ".")
   483 
   484 val supported_atps = Symtab.keys o Data.get
   485 
   486 fun is_atp_installed thy name =
   487   let val {exec, required_execs, ...} = get_atp thy name in
   488     forall (curry (op <>) "" o getenv o fst) (exec :: required_execs)
   489   end
   490 
   491 fun refresh_systems_on_tptp () =
   492   Synchronized.change systems (fn _ => get_systems ())
   493 
   494 val atps =
   495   [e, leo2, satallax, spass, vampire, z3_atp, remote_e, remote_leo2,
   496    remote_satallax, remote_vampire, remote_z3_atp, remote_e_sine, remote_snark,
   497    remote_e_tofof, remote_waldmeister]
   498 val setup = fold add_atp atps
   499 
   500 end;