src/Doc/Sledgehammer/document/root.tex
author blanchet
Tue Nov 07 15:16:41 2017 +0100 (19 months ago)
changeset 67021 41f1f8c4259b
parent 66735 5887ae5b95a8
child 68250 c45067867860
permissions -rw-r--r--
integrated Leo-III in Sledgehammer (thanks to Alexander Steen for the patch)
     1 \documentclass[a4paper,12pt]{article}
     2 \usepackage{lmodern}
     3 \usepackage[T1]{fontenc}
     4 \usepackage{amsmath}
     5 \usepackage{amssymb}
     6 \usepackage[english]{babel}
     7 \usepackage{color}
     8 \usepackage{footmisc}
     9 \usepackage{graphicx}
    10 %\usepackage{mathpazo}
    11 \usepackage{multicol}
    12 \usepackage{stmaryrd}
    13 %\usepackage[scaled=.85]{beramono}
    14 \usepackage{isabelle,iman,pdfsetup}
    15 
    16 \newcommand\download{\url{http://isabelle.in.tum.de/components/}}
    17 
    18 \let\oldS=\S
    19 \def\S{\oldS\,}
    20 
    21 \def\qty#1{\ensuremath{\left<\mathit{#1\/}\right>}}
    22 \def\qtybf#1{$\mathbf{\left<\textbf{\textit{#1\/}}\right>}$}
    23 
    24 \newcommand\const[1]{\textsf{#1}}
    25 
    26 %\oddsidemargin=4.6mm
    27 %\evensidemargin=4.6mm
    28 %\textwidth=150mm
    29 %\topmargin=4.6mm
    30 %\headheight=0mm
    31 %\headsep=0mm
    32 %\textheight=234mm
    33 
    34 \def\Colon{\mathord{:\mkern-1.5mu:}}
    35 %\def\lbrakk{\mathopen{\lbrack\mkern-3.25mu\lbrack}}
    36 %\def\rbrakk{\mathclose{\rbrack\mkern-3.255mu\rbrack}}
    37 \def\lparr{\mathopen{(\mkern-4mu\mid}}
    38 \def\rparr{\mathclose{\mid\mkern-4mu)}}
    39 
    40 \def\unk{{?}}
    41 \def\undef{(\lambda x.\; \unk)}
    42 %\def\unr{\textit{others}}
    43 \def\unr{\ldots}
    44 \def\Abs#1{\hbox{\rm{\flqq}}{\,#1\,}\hbox{\rm{\frqq}}}
    45 \def\Q{{\smash{\lower.2ex\hbox{$\scriptstyle?$}}}}
    46 
    47 \urlstyle{tt}
    48 
    49 \renewcommand\_{\hbox{\textunderscore\kern-.05ex}}
    50 
    51 \begin{document}
    52 
    53 %%% TYPESETTING
    54 %\renewcommand\labelitemi{$\bullet$}
    55 \renewcommand\labelitemi{\raise.065ex\hbox{\small\textbullet}}
    56 
    57 \title{\includegraphics[scale=0.5]{isabelle_sledgehammer} \\[4ex]
    58 Hammering Away \\[\smallskipamount]
    59 \Large A User's Guide to Sledgehammer for Isabelle/HOL}
    60 \author{\hbox{} \\
    61 Jasmin Christian Blanchette \\
    62 {\normalsize Institut f\"ur Informatik, Technische Universit\"at M\"unchen} \\[4\smallskipamount]
    63 {\normalsize with contributions from} \\[4\smallskipamount]
    64 Lawrence C. Paulson \\
    65 {\normalsize Computer Laboratory, University of Cambridge} \\
    66 \hbox{}}
    67 
    68 \maketitle
    69 
    70 \tableofcontents
    71 
    72 \setlength{\parskip}{.7em plus .2em minus .1em}
    73 \setlength{\parindent}{0pt}
    74 \setlength{\abovedisplayskip}{\parskip}
    75 \setlength{\abovedisplayshortskip}{.9\parskip}
    76 \setlength{\belowdisplayskip}{\parskip}
    77 \setlength{\belowdisplayshortskip}{.9\parskip}
    78 
    79 % general-purpose enum environment with correct spacing
    80 \newenvironment{enum}%
    81     {\begin{list}{}{%
    82         \setlength{\topsep}{.1\parskip}%
    83         \setlength{\partopsep}{.1\parskip}%
    84         \setlength{\itemsep}{\parskip}%
    85         \advance\itemsep by-\parsep}}
    86     {\end{list}}
    87 
    88 \def\pre{\begingroup\vskip0pt plus1ex\advance\leftskip by\leftmargin
    89 \advance\rightskip by\leftmargin}
    90 \def\post{\vskip0pt plus1ex\endgroup}
    91 
    92 \def\prew{\pre\advance\rightskip by-\leftmargin}
    93 \def\postw{\post}
    94 
    95 \section{Introduction}
    96 \label{introduction}
    97 
    98 Sledgehammer is a tool that applies automatic theorem provers (ATPs)
    99 and satisfiability-modulo-theories (SMT) solvers on the current goal.%
   100 \footnote{The distinction between ATPs and SMT solvers is convenient but mostly
   101 historical. The two communities are converging, with more and more ATPs
   102 supporting typical SMT features such as arithmetic and sorts, and a few SMT
   103 solvers parsing ATP syntaxes. There is also a strong technological connection
   104 between instantiation-based ATPs (such as iProver and iProver-Eq) and SMT
   105 solvers.}
   106 %
   107 The supported ATPs are AgsyHOL \cite{agsyHOL}, Alt-Ergo \cite{alt-ergo}, E
   108 \cite{schulz-2002}, E-SInE \cite{sine}, E-ToFoF \cite{tofof}, iProver
   109 \cite{korovin-2009}, iProver-Eq \cite{korovin-sticksel-2010}, LEO-II
   110 \cite{leo2}, Leo-III \cite{leo3}, Satallax \cite{satallax}, SNARK \cite{snark}, SPASS
   111 \cite{weidenbach-et-al-2009}, Vampire \cite{riazanov-voronkov-2002},
   112 Waldmeister \cite{waldmeister}, and Zipperposition \cite{cruanes-2014}.
   113 The ATPs are run either locally or remotely via the System\-On\-TPTP web service
   114 \cite{sutcliffe-2000}. The supported SMT solvers are CVC3 \cite{cvc3}, CVC4
   115 \cite{cvc4}, veriT \cite{bouton-et-al-2009}, and Z3 \cite{z3}. These are always
   116 run locally.
   117 
   118 The problem passed to the external provers (or solvers) consists of your current
   119 goal together with a heuristic selection of hundreds of facts (theorems) from the
   120 current theory context, filtered by relevance.
   121 
   122 The result of a successful proof search is some source text that usually (but
   123 not always) reconstructs the proof within Isabelle. For ATPs, the reconstructed
   124 proof typically relies on the general-purpose \textit{metis} proof method, which
   125 integrates the Metis ATP in Isabelle/HOL with explicit inferences going through
   126 the kernel. Thus its results are correct by construction.
   127 
   128 For Isabelle/jEdit users, Sledgehammer provides an automatic mode that can be
   129 enabled via the ``Auto Sledgehammer'' option under ``Plugins > Plugin Options >
   130 Isabelle > General.'' In this mode, a reduced version of Sledgehammer is run on
   131 every newly entered theorem for a few seconds.
   132 
   133 \newbox\boxA
   134 \setbox\boxA=\hbox{\texttt{NOSPAM}}
   135 
   136 \newcommand\authoremail{\texttt{blan{\color{white}NOSPAM}\kern-\wd\boxA{}chette@\allowbreak
   137 in.\allowbreak tum.\allowbreak de}}
   138 
   139 To run Sledgehammer, you must make sure that the theory \textit{Sledgehammer} is
   140 imported---this is rarely a problem in practice since it is part of
   141 \textit{Main}. Examples of Sledgehammer use can be found in Isabelle's
   142 \texttt{src/HOL/Metis\_Examples} directory.
   143 Comments and bug reports concerning Sledgehammer or this manual should be
   144 directed to the author at \authoremail.
   145 
   146 %\vskip2.5\smallskipamount
   147 %
   148 %\textbf{Acknowledgment.} The author would like to thank Mark Summerfield for
   149 %suggesting several textual improvements.
   150 
   151 \section{Installation}
   152 \label{installation}
   153 
   154 Sledgehammer is part of Isabelle, so you do not need to install it. However, it
   155 relies on third-party automatic provers (ATPs and SMT solvers).
   156 
   157 Among the ATPs, AgsyHOL, Alt-Ergo, E, LEO-II, Leo-III, Satallax, SPASS, Vampire, and
   158 Zipperposition can be run locally; in addition, AgsyHOL, E, E-SInE, E-ToFoF,
   159 iProver, iProver-Eq, LEO-II, Leo-III, Satallax, SNARK, Vampire, and Waldmeister are
   160 available remotely via System\-On\-TPTP \cite{sutcliffe-2000}. The SMT solvers
   161 CVC3, CVC4, veriT, and Z3 can be run locally.
   162 
   163 There are three main ways to install automatic provers on your machine:
   164 
   165 \begin{sloppy}
   166 \begin{enum}
   167 \item[\labelitemi] If you installed an official Isabelle package, it should
   168 already include properly setup executables for CVC4, E, SPASS, veriT, and Z3, ready to use.
   169 
   170 \item[\labelitemi] Alternatively, you can download the Isabelle-aware CVC3,
   171 CVC4, E, SPASS, and Z3 binary packages from \download. Extract the archives,
   172 then add a line to your \texttt{\$ISABELLE\_HOME\_USER\slash etc\slash
   173 components}%
   174 \footnote{The variable \texttt{\$ISABELLE\_HOME\_USER} is set by Isabelle at
   175 startup. Its value can be retrieved by executing \texttt{isabelle}
   176 \texttt{getenv} \texttt{ISABELLE\_HOME\_USER} on the command line.}
   177 file with the absolute path to CVC3, CVC4, E, SPASS, or Z3. For example, if the
   178 \texttt{components} file does not exist yet and you extracted SPASS to
   179 \texttt{/usr/local/spass-3.8ds}, create it with the single line
   180 
   181 \prew
   182 \texttt{/usr/local/spass-3.8ds}
   183 \postw
   184 
   185 in it.
   186 
   187 \item[\labelitemi] If you prefer to build AgsyHOL, Alt-Ergo, E, LEO-II, Leo-III, or
   188 Satallax manually, or found a Vampire executable somewhere (e.g.,
   189 \url{http://www.vprover.org/}), set the environment variable
   190 \texttt{AGSYHOL\_HOME}, \texttt{E\_HOME}, \texttt{LEO2\_HOME},
   191 \texttt{LEO3\_HOME}, \texttt{SATALLAX\_HOME}, or
   192 \texttt{VAMPIRE\_HOME} to the directory that contains the \texttt{agsyHOL},
   193 \texttt{eprover} (and/or \texttt{eproof} or \texttt{eproof\_ram}),
   194 \texttt{leo}, \texttt{leo3}, \texttt{satallax}, or \texttt{vampire} executable;
   195 for Alt-Ergo, set the
   196 environment variable \texttt{WHY3\_HOME} to the directory that contains the
   197 \texttt{why3} executable.
   198 Sledgehammer has been tested with AgsyHOL 1.0, Alt-Ergo 0.95.2, E 1.6 to 2.0,
   199 LEO-II 1.3.4, Leo-III 1.1, Satallax 2.2 to 2.7, and Vampire 0.6 to 4.0.%
   200 \footnote{Following the rewrite of Vampire, the counter for version numbers was
   201 reset to 0; hence the (new) Vampire versions 0.6, 1.0, 1.8, 2.6, and 3.0 are more
   202 recent than 9.0 or 11.5.}%
   203 Since the ATPs' output formats are neither documented nor stable, other
   204 versions might not work well with Sledgehammer. Ideally,
   205 you should also set \texttt{E\_VERSION}, \texttt{LEO2\_VERSION},
   206 \texttt{LEO3\_VERSION}, \texttt{SATALLAX\_VERSION}, or
   207 \texttt{VAMPIRE\_VERSION} to the prover's version number (e.g., ``4.0'').
   208 
   209 Similarly, if you want to install CVC3, CVC4, veriT, or Z3, set the environment
   210 variable \texttt{CVC3\_\allowbreak SOLVER}, \texttt{CVC4\_\allowbreak SOLVER},
   211 \texttt{VERIT\_\allowbreak SOLVER}, or \texttt{Z3\_SOLVER} to the complete path
   212 of the executable, \emph{including the file name}. Sledgehammer has been tested
   213 with CVC3 2.2 and 2.4.1, CVC4 1.5-prerelease, veriT smtcomp2014, and Z3 4.3.2.
   214 Since Z3's output format is somewhat unstable, other versions of the solver
   215 might not work well with Sledgehammer. Ideally, also set
   216 \texttt{CVC3\_VERSION}, \texttt{CVC4\_VERSION}, \texttt{VERIT\_VERSION}, or
   217 \texttt{Z3\_VERSION} to the solver's version number (e.g., ``4.4.0'').
   218 \end{enum}
   219 \end{sloppy}
   220 
   221 To check whether the provers are successfully installed, try out the example
   222 in \S\ref{first-steps}. If the remote versions of any of these provers is used
   223 (identified by the prefix ``\textit{remote\_\/}''), or if the local versions
   224 fail to solve the easy goal presented there, something must be wrong with the
   225 installation.
   226 
   227 Remote prover invocation requires Perl with the World Wide Web Library
   228 (\texttt{libwww-perl}) installed. If you must use a proxy server to access the
   229 Internet, set the \texttt{http\_proxy} environment variable to the proxy, either
   230 in the environment in which Isabelle is launched or in your
   231 \texttt{\$ISABELLE\_HOME\_USER/etc/settings} file. Here are a few
   232 examples:
   233 
   234 \prew
   235 \texttt{http\_proxy=http://proxy.example.org} \\
   236 \texttt{http\_proxy=http://proxy.example.org:8080} \\
   237 \texttt{http\_proxy=http://joeblow:pAsSwRd@proxy.example.org}
   238 \postw
   239 
   240 \section{First Steps}
   241 \label{first-steps}
   242 
   243 To illustrate Sledgehammer in context, let us start a theory file and
   244 attempt to prove a simple lemma:
   245 
   246 \prew
   247 \textbf{theory}~\textit{Scratch} \\
   248 \textbf{imports}~\textit{Main} \\
   249 \textbf{begin} \\[2\smallskipamount]
   250 %
   251 \textbf{lemma} ``$[a] = [b] \,\Longrightarrow\, a = b$'' \\
   252 \textbf{sledgehammer}
   253 \postw
   254 
   255 Instead of issuing the \textbf{sledgehammer} command, you can also use the
   256 Sledgehammer panel in Isabelle/jEdit. Sledgehammer produces the following output
   257 after a few seconds:
   258 
   259 \prew
   260 \slshape
   261 Proof found\ldots \\
   262 ``\textit{e\/}'': Try this: \textbf{by} \textit{simp} (0.3 ms) \\
   263 %
   264 ``\textit{cvc4\/}'': Try this: \textbf{by} \textit{simp} (0.4 ms) \\
   265 %
   266 ``\textit{z3\/}'': Try this: \textbf{by} \textit{simp} (0.5 ms) \\
   267 %
   268 ``\textit{spass\/}'': Try this: \textbf{by} \textit{simp} (0.3 ms)
   269 %
   270 \postw
   271 
   272 Sledgehammer ran CVC4, E, SPASS, and Z3 in parallel. Depending on which
   273 provers are installed and how many processor cores are available, some of the
   274 provers might be missing or present with a \textit{remote\_} prefix.
   275 
   276 For each successful prover, Sledgehammer gives a one-line \textit{metis} or
   277 \textit{smt} method call. Rough timings are shown in parentheses, indicating how
   278 fast the call is. You can click the proof to insert it into the theory text.
   279 
   280 In addition, you can ask Sledgehammer for an Isar text proof by enabling the
   281 \textit{isar\_proofs} option (\S\ref{output-format}):
   282 
   283 \prew
   284 \textbf{sledgehammer} [\textit{isar\_proofs}]
   285 \postw
   286 
   287 When Isar proof construction is successful, it can yield proofs that are more
   288 readable and also faster than the \textit{metis} or \textit{smt} one-line
   289 proofs. This feature is experimental and is only available for ATPs.
   290 
   291 \section{Hints}
   292 \label{hints}
   293 
   294 This section presents a few hints that should help you get the most out of
   295 Sledgehammer. Frequently asked questions are answered in
   296 \S\ref{frequently-asked-questions}.
   297 
   298 %\newcommand\point[1]{\medskip\par{\sl\bfseries#1}\par\nopagebreak}
   299 \newcommand\point[1]{\subsection{\emph{#1}}}
   300 
   301 \point{Presimplify the goal}
   302 
   303 For best results, first simplify your problem by calling \textit{auto} or at
   304 least \textit{safe} followed by \textit{simp\_all}. The SMT solvers provide
   305 arithmetic decision procedures, but the ATPs typically do not (or if they do,
   306 Sledgehammer does not use it yet). Apart from Waldmeister, they are not
   307 particularly good at heavy rewriting, but because they regard equations as
   308 undirected, they often prove theorems that require the reverse orientation of a
   309 \textit{simp} rule. Higher-order problems can be tackled, but the success rate
   310 is better for first-order problems. Hence, you may get better results if you
   311 first simplify the problem to remove higher-order features.
   312 
   313 \point{Familiarize yourself with the main options}
   314 
   315 Sledgehammer's options are fully documented in \S\ref{command-syntax}. Many of
   316 the options are very specialized, but serious users of the tool should at least
   317 familiarize themselves with the following options:
   318 
   319 \begin{enum}
   320 \item[\labelitemi] \textbf{\textit{provers}} (\S\ref{mode-of-operation}) specifies
   321 the automatic provers (ATPs and SMT solvers) that should be run whenever
   322 Sledgehammer is invoked (e.g., ``\textit{provers}~= \textit{e spass
   323 remote\_vampire\/}''). For convenience, you can omit ``\textit{provers}~=''
   324 and simply write the prover names as a space-separated list (e.g., ``\textit{e
   325 spass remote\_vampire\/}'').
   326 
   327 \item[\labelitemi] \textbf{\textit{max\_facts}} (\S\ref{relevance-filter})
   328 specifies the maximum number of facts that should be passed to the provers. By
   329 default, the value is prover-dependent but varies between about 50 and 1000. If
   330 the provers time out, you can try lowering this value to, say, 25 or 50 and see
   331 if that helps.
   332 
   333 \item[\labelitemi] \textbf{\textit{isar\_proofs}} (\S\ref{output-format}) specifies
   334 that Isar proofs should be generated, in addition to one-line \textit{metis} or
   335 \textit{smt} proofs. The length of the Isar proofs can be controlled by setting
   336 \textit{compress} (\S\ref{output-format}).
   337 
   338 \item[\labelitemi] \textbf{\textit{timeout}} (\S\ref{timeouts}) controls the
   339 provers' time limit. It is set to 30 seconds by default.
   340 \end{enum}
   341 
   342 Options can be set globally using \textbf{sledgehammer\_params}
   343 (\S\ref{command-syntax}). The command also prints the list of all available
   344 options with their current value. Fact selection can be influenced by specifying
   345 ``$(\textit{add}{:}~\textit{my\_facts})$'' after the \textbf{sledgehammer} call
   346 to ensure that certain facts are included, or simply ``$(\textit{my\_facts})$''
   347 to force Sledgehammer to run only with $\textit{my\_facts}$ (and any facts
   348 chained into the goal).
   349 
   350 \section{Frequently Asked Questions}
   351 \label{frequently-asked-questions}
   352 
   353 This sections answers frequently (and infrequently) asked questions about
   354 Sledgehammer. It is a good idea to skim over it now even if you do not have any
   355 questions at this stage. And if you have any further questions not listed here,
   356 send them to the author at \authoremail.
   357 
   358 \point{Which facts are passed to the automatic provers?}
   359 
   360 Sledgehammer heuristically selects a few hundred relevant lemmas from the
   361 currently loaded libraries. The component that performs this selection is
   362 called \emph{relevance filter} (\S\ref{relevance-filter}).
   363 
   364 \begin{enum}
   365 \item[\labelitemi]
   366 The traditional relevance filter, called \emph{MePo}
   367 (\underline{Me}ng--\underline{Pau}lson), assigns a score to every available fact
   368 (lemma, theorem, definition, or axiom) based upon how many constants that fact
   369 shares with the conjecture. This process iterates to include facts relevant to
   370 those just accepted. The constants are weighted to give unusual ones greater
   371 significance. MePo copes best when the conjecture contains some unusual
   372 constants; if all the constants are common, it is unable to discriminate among
   373 the hundreds of facts that are picked up. The filter is also memoryless: It has
   374 no information about how many times a particular fact has been used in a proof,
   375 and it cannot learn.
   376 
   377 \item[\labelitemi]
   378 An alternative to MePo is \emph{MaSh} (\underline{Ma}chine Learner for
   379 \underline{S}ledge\underline{h}ammer). It applies machine learning to the
   380 problem of finding relevant facts.
   381 
   382 \item[\labelitemi] The \emph{MeSh} filter combines MePo and MaSh. This is
   383 the default.
   384 \end{enum}
   385 
   386 The number of facts included in a problem varies from prover to prover, since
   387 some provers get overwhelmed more easily than others. You can show the number of
   388 facts given using the \textit{verbose} option (\S\ref{output-format}) and the
   389 actual facts using \textit{debug} (\S\ref{output-format}).
   390 
   391 Sledgehammer is good at finding short proofs combining a handful of existing
   392 lemmas. If you are looking for longer proofs, you must typically restrict the
   393 number of facts, by setting the \textit{max\_facts} option
   394 (\S\ref{relevance-filter}) to, say, 25 or 50.
   395 
   396 You can also influence which facts are actually selected in a number of ways. If
   397 you simply want to ensure that a fact is included, you can specify it using the
   398 ``$(\textit{add}{:}~\textit{my\_facts})$'' syntax. For example:
   399 %
   400 \prew
   401 \textbf{sledgehammer} (\textit{add}: \textit{hd.simps} \textit{tl.simps})
   402 \postw
   403 %
   404 The specified facts then replace the least relevant facts that would otherwise be
   405 included; the other selected facts remain the same.
   406 If you want to direct the selection in a particular direction, you can specify
   407 the facts via \textbf{using}:
   408 %
   409 \prew
   410 \textbf{using} \textit{hd.simps} \textit{tl.simps} \\
   411 \textbf{sledgehammer}
   412 \postw
   413 %
   414 The facts are then more likely to be selected than otherwise, and if they are
   415 selected at iteration $j$ they also influence which facts are selected at
   416 iterations $j + 1$, $j + 2$, etc. To give them even more weight, try
   417 %
   418 \prew
   419 \textbf{using} \textit{hd.simps} \textit{tl.simps} \\
   420 \textbf{apply}~\textbf{--} \\
   421 \textbf{sledgehammer}
   422 \postw
   423 
   424 \point{Why does Metis fail to reconstruct the proof?}
   425 
   426 There are many reasons. If Metis runs seemingly forever, that is a sign that the
   427 proof is too difficult for it. Metis's search is complete for first-order logic
   428 with equality, so if the proof was found by an ATP such as E, SPASS, or Vampire,
   429 Metis should eventually find it, but that's little consolation.
   430 
   431 In some rare cases, \textit{metis} fails fairly quickly, and you get the error
   432 message ``One-line proof reconstruction failed.'' This indicates that
   433 Sledgehammer determined that the goal is provable, but the proof is, for
   434 technical reasons, beyond \textit{metis}'s power. You can then try again with
   435 the \textit{strict} option (\S\ref{problem-encoding}).
   436 
   437 If the goal is actually unprovable and you did not specify an unsound encoding
   438 using \textit{type\_enc} (\S\ref{problem-encoding}), this is a bug, and you are
   439 strongly encouraged to report this to the author at \authoremail.
   440 
   441 \point{How can I tell whether a suggested proof is sound?}
   442 
   443 Earlier versions of Sledgehammer often suggested unsound proofs---either proofs
   444 of nontheorems or simply proofs that rely on type-unsound inferences. This
   445 is a thing of the past, unless you explicitly specify an unsound encoding
   446 using \textit{type\_enc} (\S\ref{problem-encoding}).
   447 %
   448 Officially, the only form of ``unsoundness'' that lurks in the sound
   449 encodings is related to missing characteristic theorems of datatypes. For
   450 example,
   451 
   452 \prew
   453 \textbf{lemma}~``$\exists \mathit{xs}.\; \mathit{xs} \neq []$'' \\
   454 \textbf{sledgehammer} ()
   455 \postw
   456 
   457 suggests an argumentless \textit{metis} call that fails. However, the conjecture
   458 does actually hold, and the \textit{metis} call can be repaired by adding
   459 \textit{list.distinct}.
   460 %
   461 We hope to address this problem in a future version of Isabelle. In the
   462 meantime, you can avoid it by passing the \textit{strict} option
   463 (\S\ref{problem-encoding}).
   464 
   465 \point{What are the \textit{full\_types}, \textit{no\_types}, and
   466 \textit{mono\_tags} arguments to Metis?}
   467 
   468 The \textit{metis}~(\textit{full\_types}) proof method
   469 and its cousin \textit{metis}~(\textit{mono\_tags}) are fully-typed
   470 versions of Metis. It is somewhat slower than \textit{metis}, but the proof
   471 search is fully typed, and it also includes more powerful rules such as the
   472 axiom ``$x = \const{True} \mathrel{\lor} x = \const{False}$'' for reasoning in
   473 higher-order places (e.g., in set comprehensions). The method is automatically
   474 tried as a fallback when \textit{metis} fails, and it is sometimes
   475 generated by Sledgehammer instead of \textit{metis} if the proof obviously
   476 requires type information or if \textit{metis} failed when Sledgehammer
   477 preplayed the proof. (By default, Sledgehammer tries to run \textit{metis} with
   478 various sets of option for up to 1~second each time to ensure that the generated
   479 one-line proofs actually work and to display timing information. This can be
   480 configured using the \textit{preplay\_timeout} and \textit{dont\_preplay}
   481 options (\S\ref{timeouts}).)
   482 %
   483 At the other end of the soundness spectrum, \textit{metis} (\textit{no\_types})
   484 uses no type information at all during the proof search, which is more efficient
   485 but often fails. Calls to \textit{metis} (\textit{no\_types}) are occasionally
   486 generated by Sledgehammer.
   487 %
   488 See the \textit{type\_enc} option (\S\ref{problem-encoding}) for details.
   489 
   490 Incidentally, if you ever see warnings such as
   491 
   492 \prew
   493 \slshape
   494 Metis: Falling back on ``\textit{metis} (\textit{full\_types})''
   495 \postw
   496 
   497 for a successful \textit{metis} proof, you can advantageously pass the
   498 \textit{full\_types} option to \textit{metis} directly.
   499 
   500 \point{And what are the \textit{lifting} and \textit{hide\_lams} arguments
   501 to Metis?}
   502 
   503 Orthogonally to the encoding of types, it is important to choose an appropriate
   504 translation of $\lambda$-abstractions. Metis supports three translation schemes,
   505 in decreasing order of power: Curry combinators (the default),
   506 $\lambda$-lifting, and a ``hiding'' scheme that disables all reasoning under
   507 $\lambda$-abstractions. The more powerful schemes also give the automatic
   508 provers more rope to hang themselves. See the \textit{lam\_trans} option (\S\ref{problem-encoding}) for details.
   509 
   510 \point{Are generated proofs minimal?}
   511 
   512 Automatic provers frequently use many more facts than are necessary.
   513 Sledgehammer includes a minimization tool that takes a set of facts returned by
   514 a given prover and repeatedly calls a prover or proof method with subsets of
   515 those facts to find a minimal set. Reducing the number of facts typically helps
   516 reconstruction, while also removing superfluous clutter from the proof scripts.
   517 
   518 In earlier versions of Sledgehammer, generated proofs were systematically
   519 accompanied by a suggestion to invoke the minimization tool. This step is now
   520 performed by default but can be disabled using the \textit{minimize} option
   521 (\S\ref{mode-of-operation}).
   522 
   523 \point{A strange error occurred---what should I do?}
   524 
   525 Sledgehammer tries to give informative error messages. Please report any strange
   526 error to the author at \authoremail.
   527 
   528 \point{Auto can solve it---why not Sledgehammer?}
   529 
   530 Problems can be easy for \textit{auto} and difficult for automatic provers, but
   531 the reverse is also true, so do not be discouraged if your first attempts fail.
   532 Because the system refers to all theorems known to Isabelle, it is particularly
   533 suitable when your goal has a short proof but requires lemmas that you do not
   534 know about.
   535 
   536 \point{Why are there so many options?}
   537 
   538 Sledgehammer's philosophy should work out of the box, without user guidance.
   539 Many of the options are meant to be used mostly by the Sledgehammer developers
   540 for experiments. Of course, feel free to try them out if you are so inclined.
   541 
   542 \section{Command Syntax}
   543 \label{command-syntax}
   544 
   545 \subsection{Sledgehammer}
   546 \label{sledgehammer}
   547 
   548 Sledgehammer can be invoked at any point when there is an open goal by entering
   549 the \textbf{sledgehammer} command in the theory file. Its general syntax is as
   550 follows:
   551 
   552 \prew
   553 \textbf{sledgehammer} \qty{subcommand}$^?$ \qty{options}$^?$ \qty{facts\_override}$^?$ \qty{num}$^?$
   554 \postw
   555 
   556 In the general syntax, the \qty{subcommand} may be any of the following:
   557 
   558 \begin{enum}
   559 \item[\labelitemi] \textbf{\textit{run} (the default):} Runs Sledgehammer on
   560 subgoal number \qty{num} (1 by default), with the given options and facts.
   561 
   562 \item[\labelitemi] \textbf{\textit{supported\_provers}:} Prints the list of
   563 automatic provers supported by Sledgehammer. See \S\ref{installation} and
   564 \S\ref{mode-of-operation} for more information on how to install automatic
   565 provers.
   566 
   567 \item[\labelitemi] \textbf{\textit{refresh\_tptp}:} Refreshes the list of remote
   568 ATPs available at System\-On\-TPTP \cite{sutcliffe-2000}.
   569 \end{enum}
   570 
   571 In addition, the following subcommands provide finer control over machine
   572 learning with MaSh:
   573 
   574 \begin{enum}
   575 \item[\labelitemi] \textbf{\textit{unlearn}:} Resets MaSh, erasing any
   576 persistent state.
   577 
   578 \item[\labelitemi] \textbf{\textit{learn\_isar}:} Invokes MaSh on the current
   579 theory to process all the available facts, learning from their Isabelle/Isar
   580 proofs. This happens automatically at Sledgehammer invocations if the
   581 \textit{learn} option (\S\ref{relevance-filter}) is enabled.
   582 
   583 \item[\labelitemi] \textbf{\textit{learn\_prover}:} Invokes MaSh on the current
   584 theory to process all the available facts, learning from proofs generated by
   585 automatic provers. The prover to use and its timeout can be set using the
   586 \textit{prover} (\S\ref{mode-of-operation}) and \textit{timeout}
   587 (\S\ref{timeouts}) options. It is recommended to perform learning using a
   588 first-order ATP (such as E, SPASS, and Vampire) as opposed to a
   589 higher-order ATP or an SMT solver.
   590 
   591 \item[\labelitemi] \textbf{\textit{relearn\_isar}:} Same as \textit{unlearn}
   592 followed by \textit{learn\_isar}.
   593 
   594 \item[\labelitemi] \textbf{\textit{relearn\_prover}:} Same as \textit{unlearn}
   595 followed by \textit{learn\_prover}.
   596 \end{enum}
   597 
   598 Sledgehammer's behavior can be influenced by various \qty{options}, which can be
   599 specified in brackets after the \textbf{sledgehammer} command. The
   600 \qty{options} are a list of key--value pairs of the form ``[$k_1 = v_1,
   601 \ldots, k_n = v_n$]''. For Boolean options, ``= \textit{true\/}'' is optional.
   602 For example:
   603 
   604 \prew
   605 \textbf{sledgehammer} [\textit{isar\_proofs}, \,\textit{timeout} = 120]
   606 \postw
   607 
   608 Default values can be set using \textbf{sledgehammer\_\allowbreak params}:
   609 
   610 \prew
   611 \textbf{sledgehammer\_params} \qty{options}
   612 \postw
   613 
   614 The supported options are described in \S\ref{option-reference}.
   615 
   616 The \qty{facts\_override} argument lets you alter the set of facts that go
   617 through the relevance filter. It may be of the form ``(\qty{facts})'', where
   618 \qty{facts} is a space-separated list of Isabelle facts (theorems, local
   619 assumptions, etc.), in which case the relevance filter is bypassed and the given
   620 facts are used. It may also be of the form ``(\textit{add}:\ \qty{facts\/_{\mathrm{1}}})'',
   621 ``(\textit{del}:\ \qty{facts\/_{\mathrm{2}}})'', or ``(\textit{add}:\ \qty{facts\/_{\mathrm{1}}}\
   622 \textit{del}:\ \qty{facts\/_{\mathrm{2}}})'', where the relevance filter is instructed to
   623 proceed as usual except that it should consider \qty{facts\/_{\mathrm{1}}}
   624 highly-relevant and \qty{facts\/_{\mathrm{2}}} fully irrelevant.
   625 
   626 If you use Isabelle/jEdit, Sledgehammer also provides an automatic mode that can
   627 be enabled via the ``Auto Sledgehammer'' option under ``Plugins > Plugin Options
   628 > Isabelle > General.'' For automatic runs, only the first prover set using
   629 \textit{provers} (\S\ref{mode-of-operation}) is considered (typically E),
   630 \textit{slice} (\S\ref{mode-of-operation}) is disabled,
   631 fewer facts are
   632 passed to the prover, \textit{fact\_filter} (\S\ref{relevance-filter}) is set to
   633 \textit{mepo}, \textit{strict} (\S\ref{problem-encoding}) is enabled,
   634 \textit{verbose} (\S\ref{output-format}) and \textit{debug}
   635 (\S\ref{output-format}) are disabled, and \textit{timeout} (\S\ref{timeouts}) is
   636 superseded by the ``Auto Time Limit'' option in jEdit. Sledgehammer's output is
   637 also more concise.
   638 
   639 \subsection{Metis}
   640 \label{metis}
   641 
   642 The \textit{metis} proof method has the syntax
   643 
   644 \prew
   645 \textbf{\textit{metis}}~(\qty{options})${}^?$~\qty{facts}${}^?$
   646 \postw
   647 
   648 where \qty{facts} is a list of arbitrary facts and \qty{options} is a
   649 comma-separated list consisting of at most one $\lambda$ translation scheme
   650 specification with the same semantics as Sledgehammer's \textit{lam\_trans}
   651 option (\S\ref{problem-encoding}) and at most one type encoding specification
   652 with the same semantics as Sledgehammer's \textit{type\_enc} option
   653 (\S\ref{problem-encoding}).
   654 %
   655 The supported $\lambda$ translation schemes are \textit{hide\_lams},
   656 \textit{lifting}, and \textit{combs} (the default).
   657 %
   658 All the untyped type encodings listed in \S\ref{problem-encoding} are supported.
   659 For convenience, the following aliases are provided:
   660 \begin{enum}
   661 \item[\labelitemi] \textbf{\textit{full\_types}:} Alias for \textit{poly\_guards\_query}.
   662 \item[\labelitemi] \textbf{\textit{partial\_types}:} Alias for \textit{poly\_args}.
   663 \item[\labelitemi] \textbf{\textit{no\_types}:} Alias for \textit{erased}.
   664 \end{enum}
   665 
   666 \section{Option Reference}
   667 \label{option-reference}
   668 
   669 \def\defl{\{}
   670 \def\defr{\}}
   671 
   672 \def\flushitem#1{\item[]\noindent\kern-\leftmargin \textbf{#1}}
   673 \def\optrueonly#1{\flushitem{\textit{#1} $\bigl[$= \textit{true}$\bigr]$\enskip}\nopagebreak\\[\parskip]}
   674 \def\optrue#1#2{\flushitem{\textit{#1} $\bigl[$= \qtybf{bool}$\bigr]$\enskip \defl\textit{true}\defr\hfill (neg.: \textit{#2})}\nopagebreak\\[\parskip]}
   675 \def\opfalse#1#2{\flushitem{\textit{#1} $\bigl[$= \qtybf{bool}$\bigr]$\enskip \defl\textit{false}\defr\hfill (neg.: \textit{#2})}\nopagebreak\\[\parskip]}
   676 \def\opsmart#1#2{\flushitem{\textit{#1} $\bigl[$= \qtybf{smart\_bool}$\bigr]$\enskip \defl\textit{smart}\defr\hfill (neg.: \textit{#2})}\nopagebreak\\[\parskip]}
   677 \def\opsmartx#1#2{\flushitem{\textit{#1} $\bigl[$= \qtybf{smart\_bool}$\bigr]$\enskip \defl\textit{smart}\defr\\\hbox{}\hfill (neg.: \textit{#2})}\nopagebreak\\[\parskip]}
   678 \def\opnodefault#1#2{\flushitem{\textit{#1} = \qtybf{#2}} \nopagebreak\\[\parskip]}
   679 \def\opnodefaultbrk#1#2{\flushitem{$\bigl[$\textit{#1} =$\bigr]$ \qtybf{#2}} \nopagebreak\\[\parskip]}
   680 \def\opdefault#1#2#3{\flushitem{\textit{#1} = \qtybf{#2}\enskip \defl\textit{#3}\defr} \nopagebreak\\[\parskip]}
   681 \def\oparg#1#2#3{\flushitem{\textit{#1} \qtybf{#2} = \qtybf{#3}} \nopagebreak\\[\parskip]}
   682 \def\opargbool#1#2#3{\flushitem{\textit{#1} \qtybf{#2} $\bigl[$= \qtybf{bool}$\bigr]$\hfill (neg.: \textit{#3})}\nopagebreak\\[\parskip]}
   683 \def\opargboolorsmart#1#2#3{\flushitem{\textit{#1} \qtybf{#2} $\bigl[$= \qtybf{smart\_bool}$\bigr]$\hfill (neg.: \textit{#3})}\nopagebreak\\[\parskip]}
   684 
   685 Sledgehammer's options are categorized as follows:\ mode of operation
   686 (\S\ref{mode-of-operation}), problem encoding (\S\ref{problem-encoding}),
   687 relevance filter (\S\ref{relevance-filter}), output format
   688 (\S\ref{output-format}), regression testing (\S\ref{regression-testing}),
   689 and timeouts (\S\ref{timeouts}).
   690 
   691 The descriptions below refer to the following syntactic quantities:
   692 
   693 \begin{enum}
   694 \item[\labelitemi] \qtybf{string}: A string.
   695 \item[\labelitemi] \qtybf{bool\/}: \textit{true} or \textit{false}.
   696 \item[\labelitemi] \qtybf{smart\_bool\/}: \textit{true}, \textit{false}, or
   697 \textit{smart}.
   698 \item[\labelitemi] \qtybf{int\/}: An integer.
   699 \item[\labelitemi] \qtybf{float}: A floating-point number (e.g., 2.5 or 60)
   700 expressing a number of seconds.
   701 \item[\labelitemi] \qtybf{float\_pair\/}: A pair of floating-point numbers
   702 (e.g., 0.6 0.95).
   703 \item[\labelitemi] \qtybf{smart\_int\/}: An integer or \textit{smart}.
   704 \end{enum}
   705 
   706 Default values are indicated in curly brackets (\textrm{\{\}}). Boolean options
   707 have a negative counterpart (e.g., \textit{minimize} vs.\
   708 \textit{dont\_minimize}). When setting Boolean options or their negative
   709 counterparts, ``= \textit{true\/}'' may be omitted.
   710 
   711 \subsection{Mode of Operation}
   712 \label{mode-of-operation}
   713 
   714 \begin{enum}
   715 \opnodefaultbrk{provers}{string}
   716 Specifies the automatic provers to use as a space-separated list (e.g.,
   717 ``\textit{e}~\textit{spass}~\textit{remote\_vampire\/}'').
   718 Provers can be run locally or remotely; see \S\ref{installation} for
   719 installation instructions.
   720 
   721 The following local provers are supported:
   722 
   723 \begin{sloppy}
   724 \begin{enum}
   725 \item[\labelitemi] \textbf{\textit{agsyhol}:} AgsyHOL is an automatic
   726 higher-order prover developed by Fredrik Lindblad \cite{agsyHOL},
   727 with support for the TPTP typed higher-order syntax (THF0). To use AgsyHOL, set
   728 the environment variable \texttt{AGSYHOL\_HOME} to the directory that contains
   729 the \texttt{agsyHOL} executable. Sledgehammer has been tested with version 1.0.
   730 
   731 \item[\labelitemi] \textbf{\textit{alt\_ergo}:} Alt-Ergo is a polymorphic
   732 ATP developed by Bobot et al.\ \cite{alt-ergo}.
   733 It supports the TPTP polymorphic typed first-order format (TFF1) via Why3
   734 \cite{why3}. To use Alt-Ergo, set the environment variable \texttt{WHY3\_HOME}
   735 to the directory that contains the \texttt{why3} executable. Sledgehammer
   736 requires Alt-Ergo 0.95.2 and Why3 0.83.
   737 
   738 \item[\labelitemi] \textbf{\textit{cvc3}:} CVC3 is an SMT solver developed by
   739 Clark Barrett, Cesare Tinelli, and their colleagues \cite{cvc3}. To use CVC3,
   740 set the environment variable \texttt{CVC3\_SOLVER} to the complete path of the
   741 executable, including the file name, or install the prebuilt CVC3 package from
   742 \download. Sledgehammer has been tested with versions 2.2 and 2.4.1.
   743 
   744 \item[\labelitemi] \textbf{\textit{cvc4}:} CVC4 \cite{cvc4} is the successor to
   745 CVC3. To use CVC4, set the environment variable \texttt{CVC4\_SOLVER} to the
   746 complete path of the executable, including the file name, or install the
   747 prebuilt CVC4 package from \download. Sledgehammer has been tested with version
   748 1.5-prerelease.
   749 
   750 \item[\labelitemi] \textbf{\textit{e}:} E is a first-order resolution prover
   751 developed by Stephan Schulz \cite{schulz-2002}. To use E, set the environment
   752 variable \texttt{E\_HOME} to the directory that contains the \texttt{eproof}
   753 executable and \texttt{E\_VERSION} to the version number (e.g., ``1.8''), or
   754 install the prebuilt E package from \download. Sledgehammer has been tested with
   755 versions 1.6 to 1.8.
   756 
   757 \item[\labelitemi] \textbf{\textit{e\_males}:} E-MaLeS is a metaprover developed
   758 by Daniel K\"uhlwein that implements strategy scheduling on top of E. To use
   759 E-MaLeS, set the environment variable \texttt{E\_MALES\_HOME} to the directory
   760 that contains the \texttt{emales.py} script. Sledgehammer has been tested with
   761 version 1.1.
   762 
   763 \item[\labelitemi] \textbf{\textit{e\_par}:} E-Par is an experimental metaprover
   764 developed by Josef Urban that implements strategy scheduling on top of E. To use
   765 E-Par, set the environment variable \texttt{E\_HOME} to the directory that
   766 contains the \texttt{runepar.pl} script and the \texttt{eprover} and
   767 \texttt{epclextract} executables, or use the prebuilt E package from \download.
   768 Be aware that E-Par is experimental software. It has been known to generate
   769 zombie processes. Use at your own risks.
   770 
   771 \item[\labelitemi] \textbf{\textit{iprover}:} iProver is a pure
   772 instantiation-based prover developed by Konstantin Korovin \cite{korovin-2009}.
   773 To use iProver, set the environment variable \texttt{IPROVER\_HOME} to the
   774 directory that contains the \texttt{iprover} and \texttt{vclausify\_rel}
   775 executables. Sledgehammer has been tested with version 0.99.
   776 
   777 \item[\labelitemi] \textbf{\textit{iprover\_eq}:} iProver-Eq is an
   778 instantiation-based prover with native support for equality developed by
   779 Konstantin Korovin and Christoph Sticksel \cite{korovin-sticksel-2010}. To use
   780 iProver-Eq, set the environment variable \texttt{IPROVER\_EQ\_HOME} to the
   781 directory that contains the \texttt{iprover-eq} and \texttt{vclausify\_rel}
   782 executables. Sledgehammer has been tested with version 0.8.
   783 
   784 \item[\labelitemi] \textbf{\textit{leo2}:} LEO-II is an automatic
   785 higher-order prover developed by Christoph Benzm\"uller et al.\ \cite{leo2},
   786 with support for the TPTP typed higher-order syntax (THF0). To use LEO-II, set
   787 the environment variable \texttt{LEO2\_HOME} to the directory that contains the
   788 \texttt{leo} executable. Sledgehammer requires version 1.3.4 or above.
   789 
   790 \item[\labelitemi] \textbf{\textit{leo3}:} Leo-III is an automatic
   791 higher-order prover developed by Alexander Steen, Max Wisniewski, Christoph
   792 Benzm\"uller et al.\ \cite{leo3},
   793 with support for the TPTP typed higher-order syntax (THF0). To use Leo-III, set
   794 the environment variable \texttt{LEO3\_HOME} to the directory that contains the
   795 \texttt{leo3} executable. Sledgehammer requires version 1.1 or above.
   796 
   797 \item[\labelitemi] \textbf{\textit{satallax}:} Satallax is an automatic
   798 higher-order prover developed by Chad Brown et al.\ \cite{satallax}, with
   799 support for the TPTP typed higher-order syntax (THF0). To use Satallax, set the
   800 environment variable \texttt{SATALLAX\_HOME} to the directory that contains the
   801 \texttt{satallax} executable. Sledgehammer requires version 2.2 or above.
   802 
   803 \item[\labelitemi] \textbf{\textit{spass}:} SPASS is a first-order resolution
   804 prover developed by Christoph Weidenbach et al.\ \cite{weidenbach-et-al-2009}.
   805 To use SPASS, set the environment variable \texttt{SPASS\_HOME} to the directory
   806 that contains the \texttt{SPASS} executable and \texttt{SPASS\_VERSION} to the
   807 version number (e.g., ``3.8ds''), or install the prebuilt SPASS package from
   808 \download. Sledgehammer requires version 3.8ds or above.
   809 
   810 \item[\labelitemi] \textbf{\textit{vampire}:} Vampire is a first-order
   811 resolution prover developed by Andrei Voronkov and his colleagues
   812 \cite{riazanov-voronkov-2002}. To use Vampire, set the environment variable
   813 \texttt{VAMPIRE\_HOME} to the directory that contains the \texttt{vampire}
   814 executable and \texttt{VAMPIRE\_VERSION} to the version number (e.g.,
   815 ``3.0''). Sledgehammer has been tested with versions 0.6 to 3.0.
   816 Versions strictly above 1.8 support the TPTP typed first-order format (TFF0).
   817 
   818 \item[\labelitemi] \textbf{\textit{verit}:} veriT \cite{bouton-et-al-2009} is an
   819 SMT solver developed by David D\'eharbe, Pascal Fontaine, and their colleagues.
   820 It is specifically designed to produce detailed proofs for reconstruction in
   821 proof assistants. To use veriT, set the environment variable
   822 \texttt{VERIT\_SOLVER} to the complete path of the executable, including the
   823 file name. Sledgehammer has been tested with version smtcomp2014.
   824 
   825 \item[\labelitemi] \textbf{\textit{z3}:} Z3 is an SMT solver developed at
   826 Microsoft Research \cite{z3}. To use Z3, set the environment variable
   827 \texttt{Z3\_SOLVER} to the complete path of the executable, including the
   828 file name. Sledgehammer has been tested with a pre-release version of 4.4.0.
   829 
   830 \item[\labelitemi] \textbf{\textit{z3\_tptp}:} This version of Z3 pretends to be
   831 an ATP, exploiting Z3's support for the TPTP untyped and typed first-order
   832 formats (FOF and TFF0). It is included for experimental purposes. It requires
   833 version 4.3.1 of Z3 or above. To use it, set the environment variable
   834 \texttt{Z3\_TPTP\_HOME} to the directory that contains the \texttt{z3\_tptp}
   835 executable.
   836 
   837 \item[\labelitemi] \textbf{\textit{zipperposition}:} Zipperposition
   838 \cite{cruanes-2014} is an experimental first-order resolution prover developed
   839 by Simon Cruane. To use Zipperposition, set the environment variable
   840 \texttt{ZIPPERPOSITION\_HOME} to the directory that contains the
   841 \texttt{zipperposition} executable.
   842 \end{enum}
   843 
   844 \end{sloppy}
   845 
   846 Moreover, the following remote provers are supported:
   847 
   848 \begin{enum}
   849 \item[\labelitemi] \textbf{\textit{remote\_agsyhol}:} The remote version of
   850 AgsyHOL runs on Geoff Sutcliffe's Miami servers \cite{sutcliffe-2000}.
   851 
   852 \item[\labelitemi] \textbf{\textit{remote\_e}:} The remote version of E runs
   853 on Geoff Sutcliffe's Miami servers \cite{sutcliffe-2000}.
   854 
   855 \item[\labelitemi] \textbf{\textit{remote\_e\_sine}:} E-SInE is a metaprover
   856 developed by Kry\v stof Hoder \cite{sine} based on E. It runs on Geoff
   857 Sutcliffe's Miami servers.
   858 
   859 \item[\labelitemi] \textbf{\textit{remote\_e\_tofof}:} E-ToFoF is a metaprover
   860 developed by Geoff Sutcliffe \cite{tofof} based on E running on his Miami
   861 servers. This ATP supports the TPTP typed first-order format (TFF0). The
   862 remote version of E-ToFoF runs on Geoff Sutcliffe's Miami servers.
   863 
   864 \item[\labelitemi] \textbf{\textit{remote\_iprover}:} The
   865 remote version of iProver runs on Geoff Sutcliffe's Miami servers
   866 \cite{sutcliffe-2000}.
   867 
   868 \item[\labelitemi] \textbf{\textit{remote\_iprover\_eq}:} The
   869 remote version of iProver-Eq runs on Geoff Sutcliffe's Miami servers
   870 \cite{sutcliffe-2000}.
   871 
   872 \item[\labelitemi] \textbf{\textit{remote\_leo2}:} The remote version of LEO-II
   873 runs on Geoff Sutcliffe's Miami servers \cite{sutcliffe-2000}.
   874 
   875 \item[\labelitemi] \textbf{\textit{remote\_leo3}:} The remote version of Leo-III
   876 runs on Geoff Sutcliffe's Miami servers \cite{sutcliffe-2000}.
   877 
   878 \item[\labelitemi] \textbf{\textit{remote\_pirate}:} Pirate is a
   879 highly experimental first-order resolution prover developed by Daniel Wand.
   880 The remote version of Pirate run on a private server he generously set up.
   881 
   882 \item[\labelitemi] \textbf{\textit{remote\_satallax}:} The remote version of
   883 Satallax runs on Geoff Sutcliffe's Miami servers \cite{sutcliffe-2000}.
   884 
   885 \item[\labelitemi] \textbf{\textit{remote\_snark}:} SNARK is a first-order
   886 resolution prover developed by Stickel et al.\ \cite{snark}. It supports the
   887 TPTP typed first-order format (TFF0). The remote version of SNARK runs on
   888 Geoff Sutcliffe's Miami servers.
   889 
   890 \item[\labelitemi] \textbf{\textit{remote\_vampire}:} The remote version of
   891 Vampire runs on Geoff Sutcliffe's Miami servers.
   892 
   893 \item[\labelitemi] \textbf{\textit{remote\_waldmeister}:} Waldmeister is a unit
   894 equality prover developed by Hillenbrand et al.\ \cite{waldmeister}. It can be
   895 used to prove universally quantified equations using unconditional equations,
   896 corresponding to the TPTP CNF UEQ division. The remote version of Waldmeister
   897 runs on Geoff Sutcliffe's Miami servers.
   898 \end{enum}
   899 
   900 By default, Sledgehammer runs a subset of CVC4, E, E-SInE, SPASS, Vampire,
   901 veriT, and Z3 in parallel, either locally or remotely---depending on the number
   902 of processor cores available and on which provers are actually installed. It is
   903 generally a good idea to run several provers in parallel.
   904 
   905 \opnodefault{prover}{string}
   906 Alias for \textit{provers}.
   907 
   908 \optrue{slice}{dont\_slice}
   909 Specifies whether the time allocated to a prover should be sliced into several
   910 segments, each of which has its own set of possibly prover-dependent options.
   911 For SPASS and Vampire, the first slice tries the fast but incomplete
   912 set-of-support (SOS) strategy, whereas the second slice runs without it. For E,
   913 up to three slices are tried, with different weighted search strategies and
   914 number of facts. For SMT solvers, several slices are tried with the same options
   915 each time but fewer and fewer facts. According to benchmarks with a timeout of
   916 30 seconds, slicing is a valuable optimization, and you should probably leave it
   917 enabled unless you are conducting experiments.
   918 
   919 \nopagebreak
   920 {\small See also \textit{verbose} (\S\ref{output-format}).}
   921 
   922 \optrue{minimize}{dont\_minimize}
   923 Specifies whether the minimization tool should be invoked automatically after
   924 proof search.
   925 
   926 \nopagebreak
   927 {\small See also \textit{preplay\_timeout} (\S\ref{timeouts})
   928 and \textit{dont\_preplay} (\S\ref{timeouts}).}
   929 
   930 \opfalse{spy}{dont\_spy}
   931 Specifies whether Sledgehammer should record statistics in
   932 \texttt{\$ISA\-BELLE\_\allowbreak HOME\_\allowbreak USER/\allowbreak spy\_\allowbreak sledgehammer}.
   933 These statistics can be useful to the developers of Sledgehammer. If you are willing to have your
   934 interactions recorded in the name of science, please enable this feature and send the statistics
   935 file every now and then to the author of this manual (\authoremail).
   936 To change the default value of this option globally, set the environment variable
   937 \texttt{SLEDGEHAMMER\_SPY} to \textit{yes}.
   938 
   939 \nopagebreak
   940 {\small See also \textit{debug} (\S\ref{output-format}).}
   941 
   942 \opfalse{overlord}{no\_overlord}
   943 Specifies whether Sledgehammer should put its temporary files in
   944 \texttt{\$ISA\-BELLE\_\allowbreak HOME\_\allowbreak USER}, which is useful for
   945 debugging Sledgehammer but also unsafe if several instances of the tool are run
   946 simultaneously. The files are identified by the prefixes \texttt{prob\_} and
   947 \texttt{mash\_}; you may safely remove them after Sledgehammer has run.
   948 
   949 \textbf{Warning:} This option is not thread-safe. Use at your own risks.
   950 
   951 \nopagebreak
   952 {\small See also \textit{debug} (\S\ref{output-format}).}
   953 \end{enum}
   954 
   955 \subsection{Relevance Filter}
   956 \label{relevance-filter}
   957 
   958 \begin{enum}
   959 \opdefault{fact\_filter}{string}{smart}
   960 Specifies the relevance filter to use. The following filters are available:
   961 
   962 \begin{enum}
   963 \item[\labelitemi] \textbf{\textit{mepo}:}
   964 The traditional memoryless MePo relevance filter.
   965 
   966 \item[\labelitemi] \textbf{\textit{mash}:}
   967 The MaSh machine learner. Three learning algorithms are provided:
   968 
   969 \begin{enum}
   970 \item[\labelitemi] \textbf{\textit{nb}} is an implementation of naive Bayes.
   971 
   972 \item[\labelitemi] \textbf{\textit{knn}} is an implementation of $k$-nearest
   973 neighbors.
   974 
   975 \item[\labelitemi] \textbf{\textit{nb\_knn}} (also called \textbf{\textit{yes}}
   976 and \textbf{\textit{sml}}) is a combination of naive Bayes and $k$-nearest
   977 neighbors.
   978 \end{enum}
   979 
   980 In addition, the special value \textit{none} is used to disable machine learning by
   981 default (cf.\ \textit{smart} below).
   982 
   983 The default algorithm is \textit{nb\_knn}. The algorithm can be selected by
   984 setting the ``MaSh'' option under ``Plugins > Plugin Options > Isabelle >
   985 General'' in Isabelle/jEdit. Persistent data for both algorithms is stored in
   986 the directory \texttt{\$ISABELLE\_\allowbreak HOME\_\allowbreak USER/\allowbreak
   987 mash}.
   988 
   989 \item[\labelitemi] \textbf{\textit{mesh}:} The MeSh filter, which combines the
   990 rankings from MePo and MaSh.
   991 
   992 \item[\labelitemi] \textbf{\textit{smart}:} A combination of MePo, MaSh, and
   993 MeSh. If the learning algorithm is set to be \textit{none}, \textit{smart}
   994 behaves like MePo.
   995 \end{enum}
   996 
   997 \opdefault{max\_facts}{smart\_int}{smart}
   998 Specifies the maximum number of facts that may be returned by the relevance
   999 filter. If the option is set to \textit{smart} (the default), it effectively
  1000 takes a value that was empirically found to be appropriate for the prover.
  1001 Typical values lie between 50 and 1000.
  1002 
  1003 \opdefault{fact\_thresholds}{float\_pair}{\upshape 0.45~0.85}
  1004 Specifies the thresholds above which facts are considered relevant by the
  1005 relevance filter. The first threshold is used for the first iteration of the
  1006 relevance filter and the second threshold is used for the last iteration (if it
  1007 is reached). The effective threshold is quadratically interpolated for the other
  1008 iterations. Each threshold ranges from 0 to 1, where 0 means that all theorems
  1009 are relevant and 1 only theorems that refer to previously seen constants.
  1010 
  1011 \optrue{learn}{dont\_learn}
  1012 Specifies whether MaSh should be run automatically by Sledgehammer to learn the
  1013 available theories (and hence provide more accurate results). Learning takes
  1014 place only if MaSh is enabled.
  1015 
  1016 \opdefault{max\_new\_mono\_instances}{int}{smart}
  1017 Specifies the maximum number of monomorphic instances to generate beyond
  1018 \textit{max\_facts}. The higher this limit is, the more monomorphic instances
  1019 are potentially generated. Whether monomorphization takes place depends on the
  1020 type encoding used. If the option is set to \textit{smart} (the default), it
  1021 takes a value that was empirically found to be appropriate for the prover. For
  1022 most provers, this value is 100.
  1023 
  1024 \nopagebreak
  1025 {\small See also \textit{type\_enc} (\S\ref{problem-encoding}).}
  1026 
  1027 \opdefault{max\_mono\_iters}{int}{smart}
  1028 Specifies the maximum number of iterations for the monomorphization fixpoint
  1029 construction. The higher this limit is, the more monomorphic instances are
  1030 potentially generated. Whether monomorphization takes place depends on the
  1031 type encoding used. If the option is set to \textit{smart} (the default), it
  1032 takes a value that was empirically found to be appropriate for the prover.
  1033 For most provers, this value is 3.
  1034 
  1035 \nopagebreak
  1036 {\small See also \textit{type\_enc} (\S\ref{problem-encoding}).}
  1037 \end{enum}
  1038 
  1039 \subsection{Problem Encoding}
  1040 \label{problem-encoding}
  1041 
  1042 \newcommand\comb[1]{\const{#1}}
  1043 
  1044 \begin{enum}
  1045 \opdefault{lam\_trans}{string}{smart}
  1046 Specifies the $\lambda$ translation scheme to use in ATP problems. The supported
  1047 translation schemes are listed below:
  1048 
  1049 \begin{enum}
  1050 \item[\labelitemi] \textbf{\textit{hide\_lams}:} Hide the $\lambda$-abstractions
  1051 by replacing them by unspecified fresh constants, effectively disabling all
  1052 reasoning under $\lambda$-abstractions.
  1053 
  1054 \item[\labelitemi] \textbf{\textit{lifting}:} Introduce a new
  1055 supercombinator \const{c} for each cluster of $n$~$\lambda$-abstractions,
  1056 defined using an equation $\const{c}~x_1~\ldots~x_n = t$ ($\lambda$-lifting).
  1057 
  1058 \item[\labelitemi] \textbf{\textit{combs}:} Rewrite lambdas to the Curry
  1059 combinators (\comb{I}, \comb{K}, \comb{S}, \comb{B}, \comb{C}). Combinators
  1060 enable the ATPs to synthesize $\lambda$-terms but tend to yield bulkier formulas
  1061 than $\lambda$-lifting: The translation is quadratic in the worst case, and the
  1062 equational definitions of the combinators are very prolific in the context of
  1063 resolution.
  1064 
  1065 \item[\labelitemi] \textbf{\textit{combs\_and\_lifting}:} Introduce a new
  1066 supercombinator \const{c} for each cluster of $\lambda$-abstractions and characterize it both using a
  1067 lifted equation $\const{c}~x_1~\ldots~x_n = t$ and via Curry combinators.
  1068 
  1069 \item[\labelitemi] \textbf{\textit{combs\_or\_lifting}:} For each cluster of
  1070 $\lambda$-abstractions, heuristically choose between $\lambda$-lifting and Curry
  1071 combinators.
  1072 
  1073 \item[\labelitemi] \textbf{\textit{keep\_lams}:}
  1074 Keep the $\lambda$-abstractions in the generated problems. This is available
  1075 only with provers that support the THF0 syntax.
  1076 
  1077 \item[\labelitemi] \textbf{\textit{smart}:} The actual translation scheme used
  1078 depends on the ATP and should be the most efficient scheme for that ATP.
  1079 \end{enum}
  1080 
  1081 For SMT solvers, the $\lambda$ translation scheme is always \textit{lifting},
  1082 irrespective of the value of this option.
  1083 
  1084 \opsmartx{uncurried\_aliases}{no\_uncurried\_aliases}
  1085 Specifies whether fresh function symbols should be generated as aliases for
  1086 applications of curried functions in ATP problems.
  1087 
  1088 \opdefault{type\_enc}{string}{smart}
  1089 Specifies the type encoding to use in ATP problems. Some of the type encodings
  1090 are unsound, meaning that they can give rise to spurious proofs
  1091 (unreconstructible using \textit{metis}). The type encodings are
  1092 listed below, with an indication of their soundness in parentheses.
  1093 An asterisk (*) indicates that the encoding is slightly incomplete for
  1094 reconstruction with \textit{metis}, unless the \textit{strict} option (described
  1095 below) is enabled.
  1096 
  1097 \begin{enum}
  1098 \item[\labelitemi] \textbf{\textit{erased} (unsound):} No type information is
  1099 supplied to the ATP, not even to resolve overloading. Types are simply erased.
  1100 
  1101 \item[\labelitemi] \textbf{\textit{poly\_guards} (sound):} Types are encoded using
  1102 a predicate \const{g}$(\tau, t)$ that guards bound
  1103 variables. Constants are annotated with their types, supplied as extra
  1104 arguments, to resolve overloading.
  1105 
  1106 \item[\labelitemi] \textbf{\textit{poly\_tags} (sound):} Each term and subterm is
  1107 tagged with its type using a function $\const{t\/}(\tau, t)$.
  1108 
  1109 \item[\labelitemi] \textbf{\textit{poly\_args} (unsound):}
  1110 Like for \textit{poly\_guards} constants are annotated with their types to
  1111 resolve overloading, but otherwise no type information is encoded. This
  1112 is the default encoding used by the \textit{metis} proof method.
  1113 
  1114 \item[\labelitemi]
  1115 \textbf{%
  1116 \textit{raw\_mono\_guards}, \textit{raw\_mono\_tags} (sound); \\
  1117 \textit{raw\_mono\_args} (unsound):} \\
  1118 Similar to \textit{poly\_guards}, \textit{poly\_tags}, and \textit{poly\_args},
  1119 respectively, but the problem is additionally monomorphized, meaning that type
  1120 variables are instantiated with heuristically chosen ground types.
  1121 Monomorphization can simplify reasoning but also leads to larger fact bases,
  1122 which can slow down the ATPs.
  1123 
  1124 \item[\labelitemi]
  1125 \textbf{%
  1126 \textit{mono\_guards}, \textit{mono\_tags} (sound);
  1127 \textit{mono\_args} (unsound):} \\
  1128 Similar to
  1129 \textit{raw\_mono\_guards}, \textit{raw\_mono\_tags}, and
  1130 \textit{raw\_mono\_args}, respectively but types are mangled in constant names
  1131 instead of being supplied as ground term arguments. The binary predicate
  1132 $\const{g}(\tau, t)$ becomes a unary predicate
  1133 $\const{g\_}\tau(t)$, and the binary function
  1134 $\const{t}(\tau, t)$ becomes a unary function
  1135 $\const{t\_}\tau(t)$.
  1136 
  1137 \item[\labelitemi] \textbf{\textit{mono\_native} (sound):} Exploits native
  1138 first-order types if the prover supports the TFF0, TFF1, or THF0 syntax;
  1139 otherwise, falls back on \textit{mono\_guards}. The problem is monomorphized.
  1140 
  1141 \item[\labelitemi] \textbf{\textit{mono\_native\_higher} (sound):} Exploits
  1142 native higher-order types if the prover supports the THF0 syntax; otherwise,
  1143 falls back on \textit{mono\_native} or \textit{mono\_guards}. The problem is
  1144 monomorphized.
  1145 
  1146 \item[\labelitemi] \textbf{\textit{poly\_native} (sound):} Exploits native
  1147 first-order polymorphic types if the prover supports the TFF1 syntax; otherwise,
  1148 falls back on \textit{mono\_native}.
  1149 
  1150 \item[\labelitemi]
  1151 \textbf{%
  1152 \textit{poly\_guards}?, \textit{poly\_tags}?, \textit{raw\_mono\_guards}?, \\
  1153 \textit{raw\_mono\_tags}?, \textit{mono\_guards}?, \textit{mono\_tags}?, \\
  1154 \textit{mono\_native}? (sound*):} \\
  1155 The type encodings \textit{poly\_guards}, \textit{poly\_tags},
  1156 \textit{raw\_mono\_guards}, \textit{raw\_mono\_tags}, \textit{mono\_guards},
  1157 \textit{mono\_tags}, and \textit{mono\_native} are fully typed and sound. For
  1158 each of these, Sledgehammer also provides a lighter variant identified by a
  1159 question mark (`\hbox{?}')\ that detects and erases monotonic types, notably
  1160 infinite types. (For \textit{mono\_native}, the types are not actually erased
  1161 but rather replaced by a shared uniform type of individuals.) As argument to the
  1162 \textit{metis} proof method, the question mark is replaced by a
  1163 \hbox{``\textit{\_query\/}''} suffix.
  1164 
  1165 \item[\labelitemi]
  1166 \textbf{%
  1167 \textit{poly\_guards}??, \textit{poly\_tags}??, \textit{raw\_mono\_guards}??, \\
  1168 \textit{raw\_mono\_tags}??, \textit{mono\_guards}??, \textit{mono\_tags}?? \\
  1169 (sound*):} \\
  1170 Even lighter versions of the `\hbox{?}' encodings. As argument to the
  1171 \textit{metis} proof method, the `\hbox{??}' suffix is replaced by
  1172 \hbox{``\textit{\_query\_query\/}''}.
  1173 
  1174 \item[\labelitemi]
  1175 \textbf{%
  1176 \textit{poly\_guards}@, \textit{poly\_tags}@, \textit{raw\_mono\_guards}@, \\
  1177 \textit{raw\_mono\_tags}@ (sound*):} \\
  1178 Alternative versions of the `\hbox{??}' encodings. As argument to the
  1179 \textit{metis} proof method, the `\hbox{@}' suffix is replaced by
  1180 \hbox{``\textit{\_at\/}''}.
  1181 
  1182 \item[\labelitemi] \textbf{\textit{poly\_args}?, \textit{raw\_mono\_args}? (unsound):} \\
  1183 Lighter versions of \textit{poly\_args} and \textit{raw\_mono\_args}.
  1184 
  1185 \item[\labelitemi] \textbf{\textit{smart}:} The actual encoding used depends on
  1186 the ATP and should be the most efficient sound encoding for that ATP.
  1187 \end{enum}
  1188 
  1189 For SMT solvers, the type encoding is always \textit{mono\_native}, irrespective
  1190 of the value of this option.
  1191 
  1192 \nopagebreak
  1193 {\small See also \textit{max\_new\_mono\_instances} (\S\ref{relevance-filter})
  1194 and \textit{max\_mono\_iters} (\S\ref{relevance-filter}).}
  1195 
  1196 \opfalse{strict}{non\_strict}
  1197 Specifies whether Sledgehammer should run in its strict mode. In that mode,
  1198 sound type encodings marked with an asterisk (*) above are made complete
  1199 for reconstruction with \textit{metis}, at the cost of some clutter in the
  1200 generated problems. This option has no effect if \textit{type\_enc} is
  1201 deliberately set to an unsound encoding.
  1202 \end{enum}
  1203 
  1204 \subsection{Output Format}
  1205 \label{output-format}
  1206 
  1207 \begin{enum}
  1208 
  1209 \opfalse{verbose}{quiet}
  1210 Specifies whether the \textbf{sledgehammer} command should explain what it does.
  1211 
  1212 \opfalse{debug}{no\_debug}
  1213 Specifies whether Sledgehammer should display additional debugging information
  1214 beyond what \textit{verbose} already displays. Enabling \textit{debug} also
  1215 enables \textit{verbose} behind the scenes.
  1216 
  1217 \nopagebreak
  1218 {\small See also \textit{spy} (\S\ref{mode-of-operation}) and
  1219 \textit{overlord} (\S\ref{mode-of-operation}).}
  1220 
  1221 \opsmart{isar\_proofs}{no\_isar\_proofs}
  1222 Specifies whether Isar proofs should be output in addition to one-line proofs.
  1223 The construction of Isar proof is still experimental and may sometimes fail;
  1224 however, when they succeed they are usually faster and more intelligible than
  1225 one-line proofs. If the option is set to \textit{smart} (the default), Isar
  1226 proofs are only generated when no working one-line proof is available.
  1227 
  1228 \opdefault{compress}{int}{smart}
  1229 Specifies the granularity of the generated Isar proofs if \textit{isar\_proofs}
  1230 is explicitly enabled. A value of $n$ indicates that each Isar proof step should
  1231 correspond to a group of up to $n$ consecutive proof steps in the ATP proof. If
  1232 the option is set to \textit{smart} (the default), the compression factor is 10
  1233 if the \textit{isar\_proofs} option is explicitly enabled; otherwise, it is
  1234 $\infty$.
  1235 
  1236 \optrueonly{dont\_compress}
  1237 Alias for ``\textit{compress} = 1''.
  1238 
  1239 \optrue{try0}{dont\_try0}
  1240 Specifies whether standard proof methods such as \textit{auto} and
  1241 \textit{blast} should be tried as alternatives to \textit{metis} in Isar proofs.
  1242 The collection of methods is roughly the same as for the \textbf{try0} command.
  1243 
  1244 \opsmart{smt\_proofs}{no\_smt\_proofs}
  1245 Specifies whether the \textit{smt} proof method should be tried in addition to
  1246 Isabelle's other proof methods. If the option is set to \textit{smart} (the
  1247 default), the \textit{smt} method is used for one-line proofs but not in Isar
  1248 proofs.
  1249 \end{enum}
  1250 
  1251 \subsection{Regression Testing}
  1252 \label{regression-testing}
  1253 
  1254 \begin{enum}
  1255 \opnodefault{expect}{string}
  1256 Specifies the expected outcome, which must be one of the following:
  1257 
  1258 \begin{enum}
  1259 \item[\labelitemi] \textbf{\textit{some}:} Sledgehammer found a proof.
  1260 \item[\labelitemi] \textbf{\textit{none}:} Sledgehammer found no proof.
  1261 \item[\labelitemi] \textbf{\textit{timeout}:} Sledgehammer timed out.
  1262 \item[\labelitemi] \textbf{\textit{unknown}:} Sledgehammer encountered some
  1263 problem.
  1264 \end{enum}
  1265 
  1266 Sledgehammer emits an error if the actual outcome differs from the expected outcome. This option is
  1267 useful for regression testing.
  1268 
  1269 \nopagebreak
  1270 {\small See also \textit{timeout} (\S\ref{timeouts}).}
  1271 \end{enum}
  1272 
  1273 \subsection{Timeouts}
  1274 \label{timeouts}
  1275 
  1276 \begin{enum}
  1277 \opdefault{timeout}{float}{\upshape 30}
  1278 Specifies the maximum number of seconds that the automatic provers should spend
  1279 searching for a proof. This excludes problem preparation and is a soft limit.
  1280 
  1281 \opdefault{preplay\_timeout}{float}{\upshape 1}
  1282 Specifies the maximum number of seconds that \textit{metis} or other proof
  1283 methods should spend trying to ``preplay'' the found proof. If this option
  1284 is set to 0, no preplaying takes place, and no timing information is displayed
  1285 next to the suggested proof method calls.
  1286 
  1287 \nopagebreak
  1288 {\small See also \textit{minimize} (\S\ref{mode-of-operation}).}
  1289 
  1290 \optrueonly{dont\_preplay}
  1291 Alias for ``\textit{preplay\_timeout} = 0''.
  1292 
  1293 \end{enum}
  1294 
  1295 \let\em=\sl
  1296 \bibliography{manual}{}
  1297 \bibliographystyle{abbrv}
  1298 
  1299 \end{document}