src/HOL/Tools/ATP/atp_systems.ML
author blanchet
Mon Aug 22 15:02:45 2011 +0200 (2011-08-22)
changeset 44391 7b4104df2be6
parent 44235 85e9dad3c187
child 44416 cabd06b69c18
permissions -rw-r--r--
gracefully handle empty SPASS problems
     1 (*  Title:      HOL/Tools/ATP/atp_systems.ML
     2     Author:     Fabian Immler, TU Muenchen
     3     Author:     Jasmin Blanchette, TU Muenchen
     4 
     5 Setup for supported ATPs.
     6 *)
     7 
     8 signature ATP_SYSTEMS =
     9 sig
    10   type format = ATP_Problem.format
    11   type formula_kind = ATP_Problem.formula_kind
    12   type failure = ATP_Proof.failure
    13 
    14   type atp_config =
    15     {exec : string * string,
    16      required_execs : (string * string) list,
    17      arguments :
    18        Proof.context -> bool -> string -> Time.time
    19        -> (unit -> (string * real) list) -> string,
    20      proof_delims : (string * string) list,
    21      known_failures : (failure * string) list,
    22      conj_sym_kind : formula_kind,
    23      prem_kind : formula_kind,
    24      formats : format list,
    25      best_slices :
    26        Proof.context -> (real * (bool * (int * string * string))) list}
    27 
    28   val force_sos : bool Config.T
    29   val e_smartN : string
    30   val e_autoN : string
    31   val e_fun_weightN : string
    32   val e_sym_offset_weightN : string
    33   val e_weight_method : string Config.T
    34   val e_default_fun_weight : real Config.T
    35   val e_fun_weight_base : real Config.T
    36   val e_fun_weight_span : real Config.T
    37   val e_default_sym_offs_weight : real Config.T
    38   val e_sym_offs_weight_base : real Config.T
    39   val e_sym_offs_weight_span : real Config.T
    40   val eN : string
    41   val spassN : string
    42   val vampireN : string
    43   val leo2N : string
    44   val satallaxN : string
    45   val e_sineN : string
    46   val snarkN : string
    47   val e_tofofN : string
    48   val waldmeisterN : string
    49   val z3_atpN : string
    50   val remote_prefix : string
    51   val remote_atp :
    52     string -> string -> string list -> (string * string) list
    53     -> (failure * string) list -> formula_kind -> formula_kind -> format list
    54     -> (Proof.context -> int * string) -> string * atp_config
    55   val add_atp : string * atp_config -> theory -> theory
    56   val get_atp : theory -> string -> atp_config
    57   val supported_atps : theory -> string list
    58   val is_atp_installed : theory -> string -> bool
    59   val refresh_systems_on_tptp : unit -> unit
    60   val setup : theory -> theory
    61 end;
    62 
    63 structure ATP_Systems : ATP_SYSTEMS =
    64 struct
    65 
    66 open ATP_Problem
    67 open ATP_Proof
    68 
    69 (* ATP configuration *)
    70 
    71 type atp_config =
    72   {exec : string * string,
    73    required_execs : (string * string) list,
    74    arguments :
    75      Proof.context -> bool -> string -> Time.time
    76      -> (unit -> (string * real) list) -> string,
    77    proof_delims : (string * string) list,
    78    known_failures : (failure * string) list,
    79    conj_sym_kind : formula_kind,
    80    prem_kind : formula_kind,
    81    formats : format list,
    82    best_slices :
    83      Proof.context -> (real * (bool * (int * string * string))) list}
    84 
    85 (* "best_slices" must be found empirically, taking a wholistic approach since
    86    the ATPs are run in parallel. The "real" components give the faction of the
    87    time available given to the slice and should add up to 1.0. The "bool"
    88    component indicates whether the slice's strategy is complete; the "int", the
    89    preferred number of facts to pass; the first "string", the preferred type
    90    system (which should be sound or quasi-sound); the second "string", extra
    91    information to the prover (e.g., SOS or no SOS).
    92 
    93    The last slice should be the most "normal" one, because it will get all the
    94    time available if the other slices fail early and also because it is used if
    95    slicing is disabled (e.g., by the minimizer). *)
    96 
    97 val known_perl_failures =
    98   [(CantConnect, "HTTP error"),
    99    (NoPerl, "env: perl"),
   100    (NoLibwwwPerl, "Can't locate HTTP")]
   101 
   102 (* named ATPs *)
   103 
   104 val eN = "e"
   105 val leo2N = "leo2"
   106 val satallaxN = "satallax"
   107 val spassN = "spass"
   108 val vampireN = "vampire"
   109 val z3_atpN = "z3_atp"
   110 val e_sineN = "e_sine"
   111 val snarkN = "snark"
   112 val e_tofofN = "e_tofof"
   113 val waldmeisterN = "waldmeister"
   114 val remote_prefix = "remote_"
   115 
   116 structure Data = Theory_Data
   117 (
   118   type T = (atp_config * stamp) Symtab.table
   119   val empty = Symtab.empty
   120   val extend = I
   121   fun merge data : T = Symtab.merge (eq_snd op =) data
   122     handle Symtab.DUP name => error ("Duplicate ATP: " ^ quote name ^ ".")
   123 )
   124 
   125 fun to_secs min time = Int.max (min, (Time.toMilliseconds time + 999) div 1000)
   126 
   127 val sosN = "sos"
   128 val no_sosN = "no_sos"
   129 
   130 val force_sos = Attrib.setup_config_bool @{binding atp_force_sos} (K false)
   131 
   132 
   133 (* E *)
   134 
   135 val tstp_proof_delims =
   136   [("# SZS output start CNFRefutation.", "# SZS output end CNFRefutation"),
   137    ("% SZS output start CNFRefutation", "% SZS output end CNFRefutation")]
   138 
   139 val e_smartN = "smart"
   140 val e_autoN = "auto"
   141 val e_fun_weightN = "fun_weight"
   142 val e_sym_offset_weightN = "sym_offset_weight"
   143 
   144 val e_weight_method =
   145   Attrib.setup_config_string @{binding atp_e_weight_method} (K e_smartN)
   146 (* FUDGE *)
   147 val e_default_fun_weight =
   148   Attrib.setup_config_real @{binding atp_e_default_fun_weight} (K 20.0)
   149 val e_fun_weight_base =
   150   Attrib.setup_config_real @{binding atp_e_fun_weight_base} (K 0.0)
   151 val e_fun_weight_span =
   152   Attrib.setup_config_real @{binding atp_e_fun_weight_span} (K 40.0)
   153 val e_default_sym_offs_weight =
   154   Attrib.setup_config_real @{binding atp_e_default_sym_offs_weight} (K 1.0)
   155 val e_sym_offs_weight_base =
   156   Attrib.setup_config_real @{binding atp_e_sym_offs_weight_base} (K ~20.0)
   157 val e_sym_offs_weight_span =
   158   Attrib.setup_config_real @{binding atp_e_sym_offs_weight_span} (K 60.0)
   159 
   160 fun e_weight_method_case method fw sow =
   161   if method = e_fun_weightN then fw
   162   else if method = e_sym_offset_weightN then sow
   163   else raise Fail ("unexpected " ^ quote method)
   164 
   165 fun scaled_e_weight ctxt method w =
   166   w * Config.get ctxt
   167           (e_weight_method_case method e_fun_weight_span e_sym_offs_weight_span)
   168   + Config.get ctxt
   169         (e_weight_method_case method e_fun_weight_base e_sym_offs_weight_base)
   170   |> Real.ceil |> signed_string_of_int
   171 
   172 fun e_weight_arguments ctxt method weights =
   173   if method = e_autoN then
   174     "-xAutoDev"
   175   else
   176     (* supplied by Stephan Schulz *)
   177     "--split-clauses=4 --split-reuse-defs --simul-paramod --forward-context-sr \
   178     \--destructive-er-aggressive --destructive-er --presat-simplify \
   179     \--prefer-initial-clauses -tKBO6 -winvfreqrank -c1 -Ginvfreqconjmax -F1 \
   180     \--delete-bad-limit=150000000 -WSelectMaxLComplexAvoidPosPred \
   181     \-H'(4*" ^ e_weight_method_case method "FunWeight" "SymOffsetWeight" ^
   182     "(SimulateSOS, " ^
   183     (e_weight_method_case method e_default_fun_weight e_default_sym_offs_weight
   184      |> Config.get ctxt |> Real.ceil |> signed_string_of_int) ^
   185     ",20,1.5,1.5,1" ^
   186     (weights ()
   187      |> map (fn (s, w) => "," ^ s ^ ":" ^ scaled_e_weight ctxt method w)
   188      |> implode) ^
   189     "),3*ConjectureGeneralSymbolWeight(PreferNonGoals,200,100,200,50,50,1,100,\
   190     \1.5,1.5,1),1*Clauseweight(PreferProcessed,1,1,1),1*\
   191     \FIFOWeight(PreferProcessed))'"
   192 
   193 fun is_old_e_version () = (string_ord (getenv "E_VERSION", "1.2w") = LESS)
   194 
   195 fun effective_e_weight_method ctxt =
   196   if is_old_e_version () then e_autoN else Config.get ctxt e_weight_method
   197 
   198 val e_config : atp_config =
   199   {exec = ("E_HOME", "eproof"),
   200    required_execs = [],
   201    arguments =
   202      fn ctxt => fn _ => fn method => fn timeout => fn weights =>
   203         "--tstp-in --tstp-out -l5 " ^ e_weight_arguments ctxt method weights ^
   204         " -tAutoDev --silent --cpu-limit=" ^ string_of_int (to_secs 2 timeout),
   205    proof_delims = tstp_proof_delims,
   206    known_failures =
   207      [(Unprovable, "SZS status: CounterSatisfiable"),
   208       (Unprovable, "SZS status CounterSatisfiable"),
   209       (ProofMissing, "SZS status Theorem"),
   210       (TimedOut, "Failure: Resource limit exceeded (time)"),
   211       (TimedOut, "time limit exceeded"),
   212       (OutOfResources, "# Cannot determine problem status"),
   213       (OutOfResources, "SZS status: ResourceOut"),
   214       (OutOfResources, "SZS status ResourceOut")],
   215    conj_sym_kind = Hypothesis,
   216    prem_kind = Conjecture,
   217    formats = [FOF],
   218    best_slices = fn ctxt =>
   219      let val method = effective_e_weight_method ctxt in
   220        (* FUDGE *)
   221        if method = e_smartN then
   222          [(0.333, (true, (500, "mangled_tags?", e_fun_weightN))),
   223           (0.334, (true, (50, "mangled_guards?", e_fun_weightN))),
   224           (0.333, (true, (1000, "mangled_tags?", e_sym_offset_weightN)))]
   225        else
   226          [(1.0, (true, (500, "mangled_tags?", method)))]
   227      end}
   228 
   229 val e = (eN, e_config)
   230 
   231 
   232 (* LEO-II *)
   233 
   234 val leo2_config : atp_config =
   235   {exec = ("LEO2_HOME", "leo"),
   236    required_execs = [],
   237    arguments =
   238      fn _ => fn _ => fn sos => fn timeout => fn _ =>
   239         "--proofoutput --timeout " ^ string_of_int (to_secs 1 timeout)
   240         |> sos = sosN ? prefix "--sos ",
   241    proof_delims = tstp_proof_delims,
   242    known_failures = [],
   243    conj_sym_kind = Axiom,
   244    prem_kind = Hypothesis,
   245    formats = [THF Without_Choice, FOF],
   246    best_slices = fn ctxt =>
   247      (* FUDGE *)
   248      [(0.667, (false, (150, "simple_higher", sosN))),
   249       (0.333, (true, (50, "simple_higher", no_sosN)))]
   250      |> (if Config.get ctxt force_sos then hd #> apfst (K 1.0) #> single
   251          else I)}
   252 
   253 val leo2 = (leo2N, leo2_config)
   254 
   255 
   256 (* Satallax *)
   257 
   258 val satallax_config : atp_config =
   259   {exec = ("SATALLAX_HOME", "satallax"),
   260    required_execs = [],
   261    arguments =
   262      fn _ => fn _ => fn _ => fn timeout => fn _ =>
   263         "-t " ^ string_of_int (to_secs 1 timeout),
   264    proof_delims = tstp_proof_delims,
   265    known_failures = [(ProofMissing, "SZS status Theorem")],
   266    conj_sym_kind = Axiom,
   267    prem_kind = Hypothesis,
   268    formats = [THF With_Choice],
   269    best_slices = K [(1.0, (true, (100, "simple_higher", "")))] (* FUDGE *)}
   270 
   271 val satallax = (satallaxN, satallax_config)
   272 
   273 
   274 (* SPASS *)
   275 
   276 (* The "-VarWeight=3" option helps the higher-order problems, probably by
   277    counteracting the presence of "hAPP". *)
   278 val spass_config : atp_config =
   279   {exec = ("ISABELLE_ATP", "scripts/spass"),
   280    required_execs = [("SPASS_HOME", "SPASS"), ("SPASS_HOME", "tptp2dfg")],
   281    arguments = fn _ => fn _ => fn sos => fn timeout => fn _ =>
   282      ("-Auto -PGiven=0 -PProblem=0 -Splits=0 -FullRed=0 -DocProof \
   283       \-VarWeight=3 -TimeLimit=" ^ string_of_int (to_secs 1 timeout))
   284      |> sos = sosN ? prefix "-SOS=1 ",
   285    proof_delims = [("Here is a proof", "Formulae used in the proof")],
   286    known_failures =
   287      known_perl_failures @
   288      [(GaveUp, "SPASS beiseite: Completion found"),
   289       (TimedOut, "SPASS beiseite: Ran out of time"),
   290       (OutOfResources, "SPASS beiseite: Maximal number of loops exceeded"),
   291       (MalformedInput, "Undefined symbol"),
   292       (MalformedInput, "Free Variable"),
   293       (Unprovable, "No formulae and clauses found in input file"),
   294       (SpassTooOld, "tptp2dfg"),
   295       (InternalError, "Please report this error")],
   296    conj_sym_kind = Hypothesis,
   297    prem_kind = Conjecture,
   298    formats = [FOF],
   299    best_slices = fn ctxt =>
   300      (* FUDGE *)
   301      [(0.333, (false, (150, "mangled_tags", sosN))),
   302       (0.333, (false, (300, "poly_tags?", sosN))),
   303       (0.334, (true, (50, "mangled_tags?", no_sosN)))]
   304      |> (if Config.get ctxt force_sos then hd #> apfst (K 1.0) #> single
   305          else I)}
   306 
   307 val spass = (spassN, spass_config)
   308 
   309 
   310 (* Vampire *)
   311 
   312 val vampire_config : atp_config =
   313   {exec = ("VAMPIRE_HOME", "vampire"),
   314    required_execs = [],
   315    arguments = fn _ => fn _ => fn sos => fn timeout => fn _ =>
   316      "--proof tptp --mode casc -t " ^ string_of_int (to_secs 1 timeout) ^
   317      " --thanks \"Andrei and Krystof\" --input_file"
   318      |> sos = sosN ? prefix "--sos on ",
   319    proof_delims =
   320      [("=========== Refutation ==========",
   321        "======= End of refutation ======="),
   322       ("% SZS output start Refutation", "% SZS output end Refutation"),
   323       ("% SZS output start Proof", "% SZS output end Proof")],
   324    known_failures =
   325      [(GaveUp, "UNPROVABLE"),
   326       (GaveUp, "CANNOT PROVE"),
   327       (GaveUp, "SZS status GaveUp"),
   328       (ProofIncomplete, "predicate_definition_introduction,[]"),
   329       (TimedOut, "SZS status Timeout"),
   330       (Unprovable, "Satisfiability detected"),
   331       (Unprovable, "Termination reason: Satisfiable"),
   332       (VampireTooOld, "not a valid option"),
   333       (Interrupted, "Aborted by signal SIGINT")],
   334    conj_sym_kind = Conjecture,
   335    prem_kind = Conjecture,
   336    formats = [FOF],
   337    best_slices = fn ctxt =>
   338      (* FUDGE *)
   339      [(0.333, (false, (150, "poly_guards", sosN))),
   340       (0.334, (true, (50, "mangled_guards?", no_sosN))),
   341       (0.333, (false, (500, "mangled_tags?", sosN)))]
   342      |> (if Config.get ctxt force_sos then hd #> apfst (K 1.0) #> single
   343          else I)}
   344 
   345 val vampire = (vampireN, vampire_config)
   346 
   347 
   348 (* Z3 with TPTP syntax *)
   349 
   350 val z3_atp_config : atp_config =
   351   {exec = ("Z3_HOME", "z3"),
   352    required_execs = [],
   353    arguments = fn _ => fn _ => fn _ => fn timeout => fn _ =>
   354      "MBQI=true -p -t:" ^ string_of_int (to_secs 1 timeout),
   355    proof_delims = [],
   356    known_failures =
   357      [(Unprovable, "\nsat"),
   358       (GaveUp, "\nunknown"),
   359       (GaveUp, "SZS status Satisfiable"),
   360       (ProofMissing, "\nunsat"),
   361       (ProofMissing, "SZS status Unsatisfiable")],
   362    conj_sym_kind = Hypothesis,
   363    prem_kind = Hypothesis,
   364    formats = [FOF],
   365    best_slices =
   366      (* FUDGE (FIXME) *)
   367      K [(0.5, (false, (250, "mangled_guards?", ""))),
   368         (0.25, (false, (125, "mangled_guards?", ""))),
   369         (0.125, (false, (62, "mangled_guards?", ""))),
   370         (0.125, (false, (31, "mangled_guards?", "")))]}
   371 
   372 val z3_atp = (z3_atpN, z3_atp_config)
   373 
   374 
   375 (* Remote ATP invocation via SystemOnTPTP *)
   376 
   377 val systems = Synchronized.var "atp_systems" ([] : string list)
   378 
   379 fun get_systems () =
   380   case Isabelle_System.bash_output "\"$ISABELLE_ATP/scripts/remote_atp\" -w 2>&1" of
   381     (output, 0) => split_lines output
   382   | (output, _) =>
   383     error (case extract_known_failure known_perl_failures output of
   384              SOME failure => string_for_failure failure
   385            | NONE => perhaps (try (unsuffix "\n")) output ^ ".")
   386 
   387 fun find_system name [] systems =
   388     find_first (String.isPrefix (name ^ "---")) systems
   389   | find_system name (version :: versions) systems =
   390     case find_first (String.isPrefix (name ^ "---" ^ version)) systems of
   391       NONE => find_system name versions systems
   392     | res => res
   393 
   394 fun get_system name versions =
   395   Synchronized.change_result systems
   396       (fn systems => (if null systems then get_systems () else systems)
   397                      |> `(`(find_system name versions)))
   398 
   399 fun the_system name versions =
   400   case get_system name versions of
   401     (SOME sys, _) => sys
   402   | (NONE, []) => error ("SystemOnTPTP is currently not available.")
   403   | (NONE, syss) =>
   404     error ("System " ^ quote name ^ " is not available at SystemOnTPTP.\n" ^
   405            "(Available systems: " ^ commas_quote syss ^ ".)")
   406 
   407 val max_remote_secs = 240 (* give Geoff Sutcliffe's servers a break *)
   408 
   409 fun remote_config system_name system_versions proof_delims known_failures
   410                   conj_sym_kind prem_kind formats best_slice : atp_config =
   411   {exec = ("ISABELLE_ATP", "scripts/remote_atp"),
   412    required_execs = [],
   413    arguments = fn _ => fn _ => fn _ => fn timeout => fn _ =>
   414      "-t " ^ string_of_int (Int.min (max_remote_secs, to_secs 1 timeout))
   415      ^ " -s " ^ the_system system_name system_versions,
   416    proof_delims = union (op =) tstp_proof_delims proof_delims,
   417    known_failures = known_failures @ known_perl_failures @
   418      [(Unprovable, "says Satisfiable"),
   419       (Unprovable, "says CounterSatisfiable"),
   420       (GaveUp, "says Unknown"),
   421       (GaveUp, "says GaveUp"),
   422       (ProofMissing, "says Theorem"),
   423       (ProofMissing, "says Unsatisfiable"),
   424       (TimedOut, "says Timeout"),
   425       (Inappropriate, "says Inappropriate")],
   426    conj_sym_kind = conj_sym_kind,
   427    prem_kind = prem_kind,
   428    formats = formats,
   429    best_slices = fn ctxt =>
   430      let val (max_relevant, type_enc) = best_slice ctxt in
   431        [(1.0, (false, (max_relevant, type_enc, "")))]
   432      end}
   433 
   434 fun remotify_config system_name system_versions best_slice
   435         ({proof_delims, known_failures, conj_sym_kind, prem_kind, formats, ...}
   436          : atp_config) : atp_config =
   437   remote_config system_name system_versions proof_delims known_failures
   438                 conj_sym_kind prem_kind formats best_slice
   439 
   440 fun remote_atp name system_name system_versions proof_delims known_failures
   441         conj_sym_kind prem_kind formats best_slice =
   442   (remote_prefix ^ name,
   443    remote_config system_name system_versions proof_delims known_failures
   444                  conj_sym_kind prem_kind formats best_slice)
   445 fun remotify_atp (name, config) system_name system_versions best_slice =
   446   (remote_prefix ^ name,
   447    remotify_config system_name system_versions best_slice config)
   448 
   449 val remote_e =
   450   remotify_atp e "EP" ["1.0", "1.1", "1.2"]
   451                (K (750, "mangled_tags?") (* FUDGE *))
   452 val remote_leo2 =
   453   remotify_atp leo2 "LEO-II" ["1.2.8", "1.2.6"]
   454                (K (100, "simple_higher") (* FUDGE *))
   455 val remote_satallax =
   456   remotify_atp satallax "Satallax" ["2.1", "2.0", "2"]
   457                (K (100, "simple_higher") (* FUDGE *))
   458 val remote_vampire =
   459   remotify_atp vampire "Vampire" ["0.6", "9.0", "1.0"]
   460                (K (200, "mangled_guards?") (* FUDGE *))
   461 val remote_z3_atp =
   462   remotify_atp z3_atp "Z3" ["2.18"] (K (250, "mangled_guards?") (* FUDGE *))
   463 val remote_e_sine =
   464   remote_atp e_sineN "SInE" ["0.4"] [] (#known_failures e_config) Axiom
   465              Conjecture [FOF] (K (500, "mangled_guards?") (* FUDGE *))
   466 val remote_snark =
   467   remote_atp snarkN "SNARK" ["20080805r029", "20080805r024"]
   468              [("refutation.", "end_refutation.")] [] Hypothesis Hypothesis
   469              [TFF, FOF] (K (100, "simple") (* FUDGE *))
   470 val remote_e_tofof =
   471   remote_atp e_tofofN "ToFoF" ["0.1"] [] (#known_failures e_config)
   472              Axiom Hypothesis [TFF] (K (150, "simple") (* FUDGE *))
   473 val remote_waldmeister =
   474   remote_atp waldmeisterN "Waldmeister" ["710"]
   475              [("#START OF PROOF", "Proved Goals:")]
   476              [(OutOfResources, "Too many function symbols"),
   477               (Crashed, "Unrecoverable Segmentation Fault")]
   478              Hypothesis Hypothesis [CNF_UEQ]
   479              (K (50, "mangled_tags?") (* FUDGE *))
   480 
   481 (* Setup *)
   482 
   483 fun add_atp (name, config) thy =
   484   Data.map (Symtab.update_new (name, (config, stamp ()))) thy
   485   handle Symtab.DUP name => error ("Duplicate ATP: " ^ quote name ^ ".")
   486 
   487 fun get_atp thy name =
   488   the (Symtab.lookup (Data.get thy) name) |> fst
   489   handle Option.Option => error ("Unknown ATP: " ^ name ^ ".")
   490 
   491 val supported_atps = Symtab.keys o Data.get
   492 
   493 fun is_atp_installed thy name =
   494   let val {exec, required_execs, ...} = get_atp thy name in
   495     forall (curry (op <>) "" o getenv o fst) (exec :: required_execs)
   496   end
   497 
   498 fun refresh_systems_on_tptp () =
   499   Synchronized.change systems (fn _ => get_systems ())
   500 
   501 val atps =
   502   [e, leo2, satallax, spass, vampire, z3_atp, remote_e, remote_leo2,
   503    remote_satallax, remote_vampire, remote_z3_atp, remote_e_sine, remote_snark,
   504    remote_e_tofof, remote_waldmeister]
   505 val setup = fold add_atp atps
   506 
   507 end;