better error reporting: detect missing E proofs and remove Vampire native format error

(*  Title:      HOL/Tools/ATP/atp_proof.ML
    Author:     Lawrence C. Paulson, Cambridge University Computer Laboratory
    Author:     Claire Quigley, Cambridge University Computer Laboratory
    Author:     Jasmin Blanchette, TU Muenchen

Abstract representation of ATP proofs and TSTP/Vampire/SPASS syntax.

signature ATP_PROOF =
  type 'a fo_term = 'a ATP_Problem.fo_term
  type ('a, 'b, 'c) formula = ('a, 'b, 'c) ATP_Problem.formula

  datatype failure =
    Unprovable |
    IncompleteUnprovable |
    ProofMissing |
    UnsoundProof of bool |
    CantConnect |
    TimedOut |
    OutOfResources |
    SpassTooOld |
    VampireTooOld |
    NoPerl |
    NoLibwwwPerl |
    NoRealZ3 |
    MalformedInput |
    MalformedOutput |
    Interrupted |
    Crashed |
    InternalError |
    UnknownError of string

  type step_name = string * string option

  datatype 'a step =
    Definition of step_name * 'a * 'a |
    Inference of step_name * 'a * step_name list

  type 'a proof = ('a, 'a, 'a fo_term) formula step list

  val strip_spaces : (char -> bool) -> string -> string
  val short_output : bool -> string -> string
  val string_for_failure : failure -> string
  val extract_important_message : string -> string
  val extract_known_failure :
    (failure * string) list -> string -> failure option
  val extract_tstplike_proof_and_outcome :
    bool -> bool -> bool -> int -> (string * string) list
    -> (failure * string) list -> string -> string * failure option
  val is_same_step : step_name * step_name -> bool
  val atp_proof_from_tstplike_proof : string -> string proof
  val map_term_names_in_atp_proof :
    (string -> string) -> string proof -> string proof
  val nasty_atp_proof : string Symtab.table -> string proof -> string proof

structure ATP_Proof : ATP_PROOF =

open ATP_Problem

datatype failure =
  Unprovable |
  IncompleteUnprovable |
  ProofMissing |
  UnsoundProof of bool |
  CantConnect |
  TimedOut |
  OutOfResources |
  SpassTooOld |
  VampireTooOld |
  NoPerl |
  NoLibwwwPerl |
  NoRealZ3 |
  MalformedInput |
  MalformedOutput |
  Interrupted |
  Crashed |
  InternalError |
  UnknownError of string

fun strip_spaces_in_list _ [] = []
  | strip_spaces_in_list _ [c1] = if Char.isSpace c1 then [] else [str c1]
  | strip_spaces_in_list is_evil [c1, c2] =
    strip_spaces_in_list is_evil [c1] @ strip_spaces_in_list is_evil [c2]
  | strip_spaces_in_list is_evil (c1 :: c2 :: c3 :: cs) =
    if Char.isSpace c1 then
      strip_spaces_in_list is_evil (c2 :: c3 :: cs)
    else if Char.isSpace c2 then
      if Char.isSpace c3 then
        strip_spaces_in_list is_evil (c1 :: c3 :: cs)
        str c1 :: (if forall is_evil [c1, c3] then [" "] else []) @
        strip_spaces_in_list is_evil (c3 :: cs)
      str c1 :: strip_spaces_in_list is_evil (c2 :: c3 :: cs)
fun strip_spaces is_evil =
  implode o strip_spaces_in_list is_evil o String.explode

fun is_ident_char c = Char.isAlphaNum c orelse c = #"_"
val strip_spaces_except_between_ident_chars = strip_spaces is_ident_char

fun elide_string threshold s =
  if size s > threshold then
    String.extract (s, 0, SOME (threshold div 2 - 5)) ^ " ...... " ^
    String.extract (s, size s - (threshold + 1) div 2 + 6, NONE)
fun short_output verbose output =
  if verbose then
    if output = "" then "No details available" else elide_string 1000 output

val missing_message_tail =
  " appears to be missing. You will need to install it if you want to invoke \
  \remote provers."

fun string_for_failure Unprovable =
    "The problem is unprovable."
  | string_for_failure IncompleteUnprovable =
    "The prover gave up."
  | string_for_failure ProofMissing =
    "The prover claims the conjecture is a theorem but did not provide a proof."
  | string_for_failure (UnsoundProof false) =
    "The prover found a type-unsound proof (or, very unlikely, your axioms \
    \are inconsistent). Try passing the \"full_types\" option to Sledgehammer \
    \to avoid such spurious proofs."
  | string_for_failure (UnsoundProof true) =
    "The prover found a type-unsound proof even though a supposedly type-sound \
    \encoding was used (or, very unlikely, your axioms are inconsistent). You \
    \might want to report this to the Isabelle developers."
  | string_for_failure CantConnect = "Cannot connect to remote server."
  | string_for_failure TimedOut = "Timed out."
  | string_for_failure OutOfResources = "The prover ran out of resources."
  | string_for_failure SpassTooOld =
    "Isabelle requires a more recent version of SPASS with support for the \
    \TPTP syntax. To install it, download and extract the package \
    \\"\" and add the \
    \\"spass-3.7\" directory's absolute path to " ^
    Path.print (Path.expand (Path.appends
               (Path.variable "ISABELLE_HOME_USER" ::
                map Path.basic ["etc", "components"]))) ^
    " on a line of its own."
  | string_for_failure VampireTooOld =
    "Isabelle requires a more recent version of Vampire. To install it, follow \
    \the instructions from the Sledgehammer manual (\"isabelle doc\
    \ sledgehammer\")."
  | string_for_failure NoPerl = "Perl" ^ missing_message_tail
  | string_for_failure NoLibwwwPerl =
    "The Perl module \"libwww-perl\"" ^ missing_message_tail
  | string_for_failure NoRealZ3 =
    "The environment variable \"Z3_REAL_SOLVER\" must be set to Z3's full path."
  | string_for_failure MalformedInput =
    "The generated problem is malformed. Please report this to the Isabelle \
  | string_for_failure MalformedOutput = "The prover output is malformed."
  | string_for_failure Crashed = "The prover crashed."
  | string_for_failure InternalError = "An internal prover error occurred."
  | string_for_failure (UnknownError string) =
    "A prover error occurred" ^
    (if string = "" then ". (Pass the \"verbose\" option for details.)"
     else ":\n" ^ string)

fun extract_delimited (begin_delim, end_delim) output =
  output |> first_field begin_delim |> the |> snd
         |> first_field end_delim |> the |> fst
         |> first_field "\n" |> the |> snd
  handle Option.Option => ""

val tstp_important_message_delims =
  ("% SZS start RequiredInformation", "% SZS end RequiredInformation")

fun extract_important_message output =
  case extract_delimited tstp_important_message_delims output of
    "" => ""
  | s => s |> space_explode "\n" |> filter_out (curry (op =) "")
           |> map (perhaps (try (unprefix "%")))
           |> map (perhaps (try (unprefix " ")))
           |> space_implode "\n " |> quote

(* Splits by the first possible of a list of delimiters. *)
fun extract_tstplike_proof delims output =
  case pairself (find_first (fn s => String.isSubstring s output))
                (ListPair.unzip delims) of
    (SOME begin_delim, SOME end_delim) =>
    extract_delimited (begin_delim, end_delim) output
  | _ => ""

fun extract_known_failure known_failures output =
  |> find_first (fn (_, pattern) => String.isSubstring pattern output)
  |> fst

fun extract_tstplike_proof_and_outcome debug verbose complete res_code
                                       proof_delims known_failures output =
   case extract_tstplike_proof proof_delims output of
     "" =>
     ("", SOME (if res_code = 0 andalso (not debug orelse output = "") then
                else case extract_known_failure known_failures output of
                  SOME failure =>
                  if failure = IncompleteUnprovable andalso complete then
                | NONE => UnknownError (short_output verbose output)))
   | tstplike_proof => (tstplike_proof, NONE)

fun mk_anot (AConn (ANot, [phi])) = phi
  | mk_anot phi = AConn (ANot, [phi])
fun mk_aconn c (phi1, phi2) = AConn (c, [phi1, phi2])

type step_name = string * string option

fun is_same_step p = p |> pairself fst |> op =

fun step_name_ord p =
  let val q = pairself fst p in
    (* The "unprefix" part is to cope with remote Vampire's output. The proper
       solution would be to perform a topological sort, e.g. using the nice
       "Graph" functor. *)
    case pairself (Int.fromString o perhaps (try (unprefix "f"))) q of
      (NONE, NONE) => string_ord q
    | (NONE, SOME _) => LESS
    | (SOME _, NONE) => GREATER
    | (SOME i, SOME j) => int_ord (i, j)

datatype 'a step =
  Definition of step_name * 'a * 'a |
  Inference of step_name * 'a * step_name list

type 'a proof = ('a, 'a, 'a fo_term) formula step list

fun step_name (Definition (name, _, _)) = name
  | step_name (Inference (name, _, _)) = name


(* Strings enclosed in single quotes (e.g., file names) *)
val scan_general_id =
  $$ "'" |-- Scan.repeat (~$$ "'") --| $$ "'" >> implode
  || Scan.repeat ($$ "$") -- Scan.many1 Symbol.is_letdig
     >> (fn (ss1, ss2) => implode ss1 ^ implode ss2)

(* Generalized first-order terms, which include file names, numbers, etc. *)
fun parse_annotation x =
  ((scan_general_id ::: Scan.repeat ($$ " " |-- scan_general_id))
     -- Scan.optional parse_annotation [] >> op @
   || $$ "(" |-- parse_annotations --| $$ ")"
   || $$ "[" |-- parse_annotations --| $$ "]") x
and parse_annotations x =
  (Scan.optional (parse_annotation
                  ::: Scan.repeat ($$ "," |-- parse_annotation)) []
   >> flat) x

fun parse_term x =
     --| Scan.option ($$ ":" -- scan_general_id) (* ignore TFF types for now *)
     -- Scan.optional ($$ "(" |-- parse_terms --| $$ ")") []
   >> ATerm) x
and parse_terms x = (parse_term ::: Scan.repeat ($$ "," |-- parse_term)) x

fun parse_atom x =
  (parse_term -- Scan.option (Scan.option ($$ "!") --| $$ "=" -- parse_term)
   >> (fn (u1, NONE) => AAtom u1
        | (u1, SOME (NONE, u2)) => AAtom (ATerm ("c_equal", [u1, u2]))
        | (u1, SOME (SOME _, u2)) =>
          mk_anot (AAtom (ATerm ("c_equal", [u1, u2]))))) x

fun fo_term_head (ATerm (s, _)) = s

(* TPTP formulas are fully parenthesized, so we don't need to worry about
   operator precedence. *)
fun parse_literal x =
  ((Scan.repeat ($$ "~") >> length)
      -- ($$ "(" |-- parse_formula --| $$ ")"
          || parse_quantified_formula
          || parse_atom)
      >> (fn (n, phi) => phi |> n mod 2 = 1 ? mk_anot)) x
and parse_formula x =
   -- Scan.option ((Scan.this_string "=>" >> K AImplies
                    || Scan.this_string "<=>" >> K AIff
                    || Scan.this_string "<~>" >> K ANotIff
                    || Scan.this_string "<=" >> K AIf
                    || $$ "|" >> K AOr
                    || $$ "&" >> K AAnd)
                   -- parse_formula)
   >> (fn (phi1, NONE) => phi1
        | (phi1, SOME (c, phi2)) => mk_aconn c (phi1, phi2))) x
and parse_quantified_formula x =
  (($$ "!" >> K AForall || $$ "?" >> K AExists)
   --| $$ "[" -- parse_terms --| $$ "]" --| $$ ":" -- parse_literal
   >> (fn ((q, ts), phi) =>
          (* FIXME: TFF *)
          AQuant (q, map (rpair NONE o fo_term_head) ts, phi))) x

val parse_tstp_extra_arguments =
  Scan.optional ($$ "," |-- parse_annotation
                 --| Scan.option ($$ "," |-- parse_annotations)) []

val vampire_unknown_fact = "unknown"
val tofof_fact_prefix = "fof_"

(* Syntax: (cnf|fof|tff)\(<num>, <formula_role>, <formula> <extra_arguments>\).
   The <num> could be an identifier, but we assume integers. *)
fun parse_tstp_line x =
  (((Scan.this_string "cnf" || Scan.this_string "fof" || Scan.this_string "tff")
       -- $$ "(")
     |-- scan_general_id --| $$ "," -- Symbol.scan_id --| $$ ","
     -- parse_formula -- parse_tstp_extra_arguments --| $$ ")" --| $$ "."
    >> (fn (((num, role), phi), deps) =>
             val (name, deps) =
               case deps of
                 ["file", _, s] =>
                   if s = vampire_unknown_fact then NONE
                   else SOME (s |> perhaps (try (unprefix tofof_fact_prefix)))),
               | _ => ((num, NONE), deps)
             case role of
               "definition" =>
               (case phi of
                  AConn (AIff, [phi1 as AAtom _, phi2]) =>
                  Definition (name, phi1, phi2)
                | AAtom (ATerm ("c_equal", _)) =>
                  (* Vampire's equality proxy axiom *)
                  Inference (name, phi, map (rpair NONE) deps)
                | _ => raise Fail "malformed definition")
             | _ => Inference (name, phi, map (rpair NONE) deps)
           end)) x


(* SPASS returns clause references of the form "x.y". We ignore "y", whose role
   is not clear anyway. *)
val parse_dot_name = scan_general_id --| $$ "." --| scan_general_id

val parse_spass_annotations =
  Scan.optional ($$ ":" |-- Scan.repeat (parse_dot_name
                                         --| Scan.option ($$ ","))) []

(* It is not clear why some literals are followed by sequences of stars and/or
   pluses. We ignore them. *)
fun parse_decorated_atom x =
  (parse_atom --| Scan.repeat ($$ "*" || $$ "+" || $$ " ")) x

fun mk_horn ([], []) = AAtom (ATerm ("c_False", []))
  | mk_horn ([], pos_lits) = foldr1 (mk_aconn AOr) pos_lits
  | mk_horn (neg_lits, []) = mk_anot (foldr1 (mk_aconn AAnd) neg_lits)
  | mk_horn (neg_lits, pos_lits) =
    mk_aconn AImplies (foldr1 (mk_aconn AAnd) neg_lits,
                       foldr1 (mk_aconn AOr) pos_lits)

fun parse_horn_clause x =
  (Scan.repeat parse_decorated_atom --| $$ "|" --| $$ "|"
     -- Scan.repeat parse_decorated_atom --| $$ "-" --| $$ ">"
     -- Scan.repeat parse_decorated_atom
   >> (mk_horn o apfst (op @))) x

(* Syntax: <num>[0:<inference><annotations>]
   <atoms> || <atoms> -> <atoms>. *)
fun parse_spass_line x =
  (scan_general_id --| $$ "[" --| $$ "0" --| $$ ":" --| Symbol.scan_id
     -- parse_spass_annotations --| $$ "]" -- parse_horn_clause --| $$ "."
   >> (fn ((num, deps), u) =>
          Inference ((num, NONE), u, map (rpair NONE) deps))) x

fun parse_line x = (parse_tstp_line || parse_spass_line) x
fun parse_proof s =
  s |> strip_spaces_except_between_ident_chars
    |> raw_explode
    |> Scan.finite Symbol.stopper
           (Scan.error (!! (fn _ => raise Fail "unrecognized ATP output")
                           (Scan.repeat1 parse_line)))
    |> fst

fun clean_up_dependency seen dep = find_first (curry is_same_step dep) seen
fun clean_up_dependencies _ [] = []
  | clean_up_dependencies seen ((step as Definition (name, _, _)) :: steps) =
    step :: clean_up_dependencies (name :: seen) steps
  | clean_up_dependencies seen (Inference (name, u, deps) :: steps) =
    Inference (name, u, map_filter (clean_up_dependency seen) deps) ::
    clean_up_dependencies (name :: seen) steps

fun atp_proof_from_tstplike_proof "" = []
  | atp_proof_from_tstplike_proof s =
    s ^ "$" (* the $ sign acts as a sentinel (FIXME: needed?) *)
    |> parse_proof
    |> sort (step_name_ord o pairself step_name)
    |> clean_up_dependencies []

fun map_term_names_in_term f (ATerm (s, ts)) =
  ATerm (f s, map (map_term_names_in_term f) ts)
fun map_term_names_in_formula f (AQuant (q, xs, phi)) =
    AQuant (q, xs, map_term_names_in_formula f phi)
  | map_term_names_in_formula f (AConn (c, phis)) =
    AConn (c, map (map_term_names_in_formula f) phis)
  | map_term_names_in_formula f (AAtom t) = AAtom (map_term_names_in_term f t)
fun map_term_names_in_step f (Definition (name, phi1, phi2)) =
    Definition (name, map_term_names_in_formula f phi1,
                map_term_names_in_formula f phi2)
  | map_term_names_in_step f (Inference (name, phi, deps)) =
    Inference (name, map_term_names_in_formula f phi, deps)
fun map_term_names_in_atp_proof f = map (map_term_names_in_step f)

fun nasty_name pool s = s |> Symtab.lookup pool |> the_default s
fun nasty_atp_proof pool =
  if Symtab.is_empty pool then I
  else map_term_names_in_atp_proof (nasty_name pool)
