[mirabelle] add initial documentation in Sledgehammer's doc
authordesharna
Tue, 08 Sep 2020 11:32:57 +0200
changeset 72342 4195e75a92ef
parent 72341 0973a594be72
child 72343 478b7599a1a0
child 72344 728da67527b9
[mirabelle] add initial documentation in Sledgehammer's doc
src/Doc/Sledgehammer/document/root.tex
src/HOL/Mirabelle/Tools/mirabelle_sledgehammer.ML
src/HOL/Mirabelle/lib/Tools/mirabelle
src/HOL/Sledgehammer.thy
src/HOL/Tools/Sledgehammer/sledgehammer_fact.ML
--- a/src/Doc/Sledgehammer/document/root.tex	Tue Sep 29 20:08:08 2020 +0200
+++ b/src/Doc/Sledgehammer/document/root.tex	Tue Sep 08 11:32:57 2020 +0200
@@ -60,6 +60,8 @@
 Jasmin Blanchette \\
 {\normalsize Institut f\"ur Informatik, Technische Universit\"at M\"unchen} \\[4\smallskipamount]
 {\normalsize with contributions from} \\[4\smallskipamount]
+Martin Desharnais \\
+{\normalsize Forschungsinstitut CODE, Universit\"at der Bundeswehr M\"unchen}  \\[4\smallskipamount]
 Lawrence C. Paulson \\
 {\normalsize Computer Laboratory, University of Cambridge} \\
 \hbox{}}
@@ -1286,6 +1288,94 @@
 
 \end{enum}
 
+\section{Mirabelle Testing Tool}
+\label{mirabelle}
+
+% using Sledgehammer or other advisory tools
+% proof tools or counterexample generator
+The \texttt{isabelle mirabelle} tool executes Sledgehammer, or other advisory
+tools (e.g. proof tools, counterexample generators) on most subgoals in a theory.
+It is typically used to quantify the success rate of a proof tool on a
+representative benchmark. Its command-line usage is:
+
+{\small
+\begin{verbatim}
+isabelle mirabelle [OPTIONS] ACTIONS FILES
+
+  Options are:
+    -L LOGIC     parent logic to use (default HOL)
+    -O DIR       output directory for test data (default None)
+    -S FILE      user-provided setup file (no actions required)
+    -T THEORY    parent theory to use (default Main)
+    -d DIR       include session directory
+    -q           be quiet (suppress output of Isabelle process)
+    -t TIMEOUT   timeout for each action in seconds (default 30)
+
+  Apply the given actions at all proof steps in the given theory
+  files.
+\end{verbatim}
+}
+
+Option \texttt{-L LOGIC} specifies the parent session to use. This is often a
+logic (e.g. Pure, HOL) but may be any session (e.g. from the AFP). Using
+multiple sessions is not supported at the moment. If a theory A needs to import
+from multiple sessions, this limitation can be overcome by
+
+\begin{enumerate}
+  \item defining a custom session S with a single theory B;
+  \item moving all imports from A to B;
+  \item building the heap image of S;
+  \item importing S.B from theory A;
+  \item executing Mirabelle with C as parent logic, i.e. with \texttt{-L S}.
+\end{enumerate}
+
+Option \texttt{-O DIR} specifies the output directory, which is created if not
+already existing, where the log will be written. In this directory, one log
+file per theory records the position of each tested subgoal and the result of
+executing the proof tool.
+
+Option \texttt{-t TIMEOUT} specifies a generic timeout that different actions
+may interpret in different ways.
+
+More specific documentation about parameters \texttt{ACTIONS}, \texttt{FILES},
+and their corresponding options may be found in the isabelle tool usage by
+entering \texttt{isabelle mirabelle -?} on the command line.
+
+\subsection{Example of Benchmarking Sledgehammer}
+
+\begin{verbatim}
+isabelle mirabelle -O output \
+  sledgehammer[prover=e,prover_timeout=10] Huffman.thy
+\end{verbatim}
+
+This command benchmarks sledgehammer when using \textbf{\textit{e}} as prover
+with a timeout of 10 seconds. The results are written to the file
+\texttt{output/Huffman.log}.
+
+\subsection{Example of Benchmarking Other Tools}
+
+\begin{verbatim}
+isabelle mirabelle -O output -t 10 \
+  try0 Huffman.thy
+\end{verbatim}
+
+This command benchmarks the \texttt{try0} tactic with a timeout of 10 seconds.
+The results are written to the file \texttt{output/Huffman.log}.
+
+\subsection{Example of Generating TPTP Files}
+
+\begin{verbatim}
+isabelle mirabelle -O output \
+  sledgehammer[prover_timeout=1,keep=tptp-files] Huffman.thy
+\end{verbatim}
+
+This command generates TPTP files with sledgehammer. Since the file is
+generated at the very beginning of every sledgehammer invocation, a timeout of
+1 second making the prover fail faster speeds handling the theory up.  The
+results are written in the \texttt{tptp-files} directory, which has to exist
+prior to the command invocation. A distinct TPTP file is generated for each
+subgoal with a file name ending with \texttt{.smt\_in}.
+
 \let\em=\sl
 \bibliography{manual}{}
 \bibliographystyle{abbrv}
--- a/src/HOL/Mirabelle/Tools/mirabelle_sledgehammer.ML	Tue Sep 29 20:08:08 2020 +0200
+++ b/src/HOL/Mirabelle/Tools/mirabelle_sledgehammer.ML	Tue Sep 08 11:32:57 2020 +0200
@@ -11,6 +11,7 @@
  has this pattern (provided it appears in a single line):
    val .*K = "PARAMETER" (*DESCRIPTION*)
 *)
+(* NOTE: Do not forget to update the Sledgehammer documentation to reflect changes here. *)
 
 val check_trivialK = "check_trivial" (*=BOOL: check if goals are "trivial"*)
 val e_selection_heuristicK = "e_selection_heuristic" (*=STRING: E clause selection heuristic*)
--- a/src/HOL/Mirabelle/lib/Tools/mirabelle	Tue Sep 29 20:08:08 2020 +0200
+++ b/src/HOL/Mirabelle/lib/Tools/mirabelle	Tue Sep 08 11:32:57 2020 +0200
@@ -21,6 +21,7 @@
 }
 
 function usage() {
+  # Do not forget to update the Sledgehammer documentation to reflect changes here.
   [ -n "$MIRABELLE_OUTPUT_PATH" ] && out="$MIRABELLE_OUTPUT_PATH" || out="None"
   timeout="$MIRABELLE_TIMEOUT"
   echo
@@ -35,8 +36,7 @@
   echo "    -q           be quiet (suppress output of Isabelle process)"
   echo "    -t TIMEOUT   timeout for each action in seconds (default $timeout)"
   echo
-  echo "  Apply the given actions (i.e., automated proof tools)"
-  echo "  at all proof steps in the given theory files."
+  echo "  Apply the given actions at all proof steps in the given theory files."
   echo
   echo "  ACTIONS is a colon-separated list of actions, where each action is"
   echo "  either NAME or NAME[OPTION,...,OPTION]. Available actions are:"
--- a/src/HOL/Sledgehammer.thy	Tue Sep 29 20:08:08 2020 +0200
+++ b/src/HOL/Sledgehammer.thy	Tue Sep 08 11:32:57 2020 +0200
@@ -16,7 +16,7 @@
 lemma size_ne_size_imp_ne: "size x \<noteq> size y \<Longrightarrow> x \<noteq> y"
   by (erule contrapos_nn) (rule arg_cong)
 
-ML_file \<open>Tools/ATP/atp_systems.ML\<close>
+ML_file \<open>Tools/ATP/atp_systems.ML\<close> (* TODO: rename and move to Tools/Sledgehammer *)
 ML_file \<open>Tools/Sledgehammer/async_manager_legacy.ML\<close>
 ML_file \<open>Tools/Sledgehammer/sledgehammer_util.ML\<close>
 ML_file \<open>Tools/Sledgehammer/sledgehammer_fact.ML\<close>
--- a/src/HOL/Tools/Sledgehammer/sledgehammer_fact.ML	Tue Sep 29 20:08:08 2020 +0200
+++ b/src/HOL/Tools/Sledgehammer/sledgehammer_fact.ML	Tue Sep 08 11:32:57 2020 +0200
@@ -10,7 +10,7 @@
   type status = ATP_Problem_Generate.status
   type stature = ATP_Problem_Generate.stature
 
-  type raw_fact = ((unit -> string) * stature) * thm
+  type raw_fact = ((unit -> string) * stature) * thm (* TODO: rename to lazy_fact *)
   type fact = (string * stature) * thm
 
   type fact_override =