closer correspondence of document and session names, while maintaining document names for external reference
authorhaftmann
Sat Apr 05 11:37:00 2014 +0200 (2014-04-05)
changeset 56420b266e7a86485
parent 56419 f47de9e82b0f
child 56431 4eb88149c7b2
closer correspondence of document and session names, while maintaining document names for external reference
src/Doc/Implementation/Base.thy
src/Doc/Implementation/Eq.thy
src/Doc/Implementation/Integration.thy
src/Doc/Implementation/Isar.thy
src/Doc/Implementation/Local_Theory.thy
src/Doc/Implementation/Logic.thy
src/Doc/Implementation/ML.thy
src/Doc/Implementation/Prelim.thy
src/Doc/Implementation/Proof.thy
src/Doc/Implementation/Syntax.thy
src/Doc/Implementation/Tactic.thy
src/Doc/Implementation/document/build
src/Doc/Implementation/document/root.tex
src/Doc/Implementation/document/style.sty
src/Doc/Isar-Ref/Base.thy
src/Doc/Isar-Ref/Document_Preparation.thy
src/Doc/Isar-Ref/First_Order_Logic.thy
src/Doc/Isar-Ref/Framework.thy
src/Doc/Isar-Ref/Generic.thy
src/Doc/Isar-Ref/HOL_Specific.thy
src/Doc/Isar-Ref/Inner_Syntax.thy
src/Doc/Isar-Ref/ML_Tactic.thy
src/Doc/Isar-Ref/Misc.thy
src/Doc/Isar-Ref/Outer_Syntax.thy
src/Doc/Isar-Ref/Preface.thy
src/Doc/Isar-Ref/Proof.thy
src/Doc/Isar-Ref/Quick_Reference.thy
src/Doc/Isar-Ref/Spec.thy
src/Doc/Isar-Ref/Symbols.thy
src/Doc/Isar-Ref/Synopsis.thy
src/Doc/Isar-Ref/document/build
src/Doc/Isar-Ref/document/isar-vm.pdf
src/Doc/Isar-Ref/document/isar-vm.svg
src/Doc/Isar-Ref/document/root.tex
src/Doc/Isar-Ref/document/showsymbols
src/Doc/Isar-Ref/document/style.sty
src/Doc/IsarImplementation/Base.thy
src/Doc/IsarImplementation/Eq.thy
src/Doc/IsarImplementation/Integration.thy
src/Doc/IsarImplementation/Isar.thy
src/Doc/IsarImplementation/Local_Theory.thy
src/Doc/IsarImplementation/Logic.thy
src/Doc/IsarImplementation/ML.thy
src/Doc/IsarImplementation/Prelim.thy
src/Doc/IsarImplementation/Proof.thy
src/Doc/IsarImplementation/Syntax.thy
src/Doc/IsarImplementation/Tactic.thy
src/Doc/IsarImplementation/document/build
src/Doc/IsarImplementation/document/root.tex
src/Doc/IsarImplementation/document/style.sty
src/Doc/IsarRef/Base.thy
src/Doc/IsarRef/Document_Preparation.thy
src/Doc/IsarRef/First_Order_Logic.thy
src/Doc/IsarRef/Framework.thy
src/Doc/IsarRef/Generic.thy
src/Doc/IsarRef/HOL_Specific.thy
src/Doc/IsarRef/Inner_Syntax.thy
src/Doc/IsarRef/ML_Tactic.thy
src/Doc/IsarRef/Misc.thy
src/Doc/IsarRef/Outer_Syntax.thy
src/Doc/IsarRef/Preface.thy
src/Doc/IsarRef/Proof.thy
src/Doc/IsarRef/Quick_Reference.thy
src/Doc/IsarRef/Spec.thy
src/Doc/IsarRef/Symbols.thy
src/Doc/IsarRef/Synopsis.thy
src/Doc/IsarRef/document/build
src/Doc/IsarRef/document/isar-vm.pdf
src/Doc/IsarRef/document/isar-vm.svg
src/Doc/IsarRef/document/root.tex
src/Doc/IsarRef/document/showsymbols
src/Doc/IsarRef/document/style.sty
src/Doc/JEdit/document/build
src/Doc/LaTeXsugar/Sugar.thy
src/Doc/LaTeXsugar/document/build
src/Doc/LaTeXsugar/document/mathpartir.sty
src/Doc/LaTeXsugar/document/root.bib
src/Doc/LaTeXsugar/document/root.tex
src/Doc/Logics-ZF/FOL_examples.thy
src/Doc/Logics-ZF/IFOL_examples.thy
src/Doc/Logics-ZF/If.thy
src/Doc/Logics-ZF/ZF_Isar.thy
src/Doc/Logics-ZF/ZF_examples.thy
src/Doc/Logics-ZF/document/FOL.tex
src/Doc/Logics-ZF/document/ZF.tex
src/Doc/Logics-ZF/document/build
src/Doc/Logics-ZF/document/logics.sty
src/Doc/Logics-ZF/document/root.tex
src/Doc/Prog-Prove/Basics.thy
src/Doc/Prog-Prove/Bool_nat_list.thy
src/Doc/Prog-Prove/Isar.thy
src/Doc/Prog-Prove/LaTeXsugar.thy
src/Doc/Prog-Prove/Logic.thy
src/Doc/Prog-Prove/MyList.thy
src/Doc/Prog-Prove/Types_and_funs.thy
src/Doc/Prog-Prove/document/bang.pdf
src/Doc/Prog-Prove/document/build
src/Doc/Prog-Prove/document/intro-isabelle.tex
src/Doc/Prog-Prove/document/mathpartir.sty
src/Doc/Prog-Prove/document/prelude.tex
src/Doc/Prog-Prove/document/root.bib
src/Doc/Prog-Prove/document/root.tex
src/Doc/Prog-Prove/document/svmono.cls
src/Doc/ProgProve/Basics.thy
src/Doc/ProgProve/Bool_nat_list.thy
src/Doc/ProgProve/Isar.thy
src/Doc/ProgProve/LaTeXsugar.thy
src/Doc/ProgProve/Logic.thy
src/Doc/ProgProve/MyList.thy
src/Doc/ProgProve/Types_and_funs.thy
src/Doc/ProgProve/document/bang.pdf
src/Doc/ProgProve/document/build
src/Doc/ProgProve/document/intro-isabelle.tex
src/Doc/ProgProve/document/mathpartir.sty
src/Doc/ProgProve/document/prelude.tex
src/Doc/ProgProve/document/root.bib
src/Doc/ProgProve/document/root.tex
src/Doc/ProgProve/document/svmono.cls
src/Doc/ROOT
src/Doc/Sugar/Sugar.thy
src/Doc/Sugar/document/build
src/Doc/Sugar/document/mathpartir.sty
src/Doc/Sugar/document/root.bib
src/Doc/Sugar/document/root.tex
src/Doc/System/document/build
src/Doc/ZF/FOL_examples.thy
src/Doc/ZF/IFOL_examples.thy
src/Doc/ZF/If.thy
src/Doc/ZF/ZF_Isar.thy
src/Doc/ZF/ZF_examples.thy
src/Doc/ZF/document/FOL.tex
src/Doc/ZF/document/ZF.tex
src/Doc/ZF/document/build
src/Doc/ZF/document/logics.sty
src/Doc/ZF/document/root.tex
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/Doc/Implementation/Base.thy	Sat Apr 05 11:37:00 2014 +0200
     1.3 @@ -0,0 +1,7 @@
     1.4 +theory Base
     1.5 +imports Main
     1.6 +begin
     1.7 +
     1.8 +ML_file "../antiquote_setup.ML"
     1.9 +
    1.10 +end
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/src/Doc/Implementation/Eq.thy	Sat Apr 05 11:37:00 2014 +0200
     2.3 @@ -0,0 +1,126 @@
     2.4 +theory Eq
     2.5 +imports Base
     2.6 +begin
     2.7 +
     2.8 +chapter {* Equational reasoning *}
     2.9 +
    2.10 +text {* Equality is one of the most fundamental concepts of
    2.11 +  mathematics.  The Isabelle/Pure logic (\chref{ch:logic}) provides a
    2.12 +  builtin relation @{text "\<equiv> :: \<alpha> \<Rightarrow> \<alpha> \<Rightarrow> prop"} that expresses equality
    2.13 +  of arbitrary terms (or propositions) at the framework level, as
    2.14 +  expressed by certain basic inference rules (\secref{sec:eq-rules}).
    2.15 +
    2.16 +  Equational reasoning means to replace equals by equals, using
    2.17 +  reflexivity and transitivity to form chains of replacement steps,
    2.18 +  and congruence rules to access sub-structures.  Conversions
    2.19 +  (\secref{sec:conv}) provide a convenient framework to compose basic
    2.20 +  equational steps to build specific equational reasoning tools.
    2.21 +
    2.22 +  Higher-order matching is able to provide suitable instantiations for
    2.23 +  giving equality rules, which leads to the versatile concept of
    2.24 +  @{text "\<lambda>"}-term rewriting (\secref{sec:rewriting}).  Internally
    2.25 +  this is based on the general-purpose Simplifier engine of Isabelle,
    2.26 +  which is more specific and more efficient than plain conversions.
    2.27 +
    2.28 +  Object-logics usually introduce specific notions of equality or
    2.29 +  equivalence, and relate it with the Pure equality.  This enables to
    2.30 +  re-use the Pure tools for equational reasoning for particular
    2.31 +  object-logic connectives as well.
    2.32 +*}
    2.33 +
    2.34 +
    2.35 +section {* Basic equality rules \label{sec:eq-rules} *}
    2.36 +
    2.37 +text {* Isabelle/Pure uses @{text "\<equiv>"} for equality of arbitrary
    2.38 +  terms, which includes equivalence of propositions of the logical
    2.39 +  framework.  The conceptual axiomatization of the constant @{text "\<equiv>
    2.40 +  :: \<alpha> \<Rightarrow> \<alpha> \<Rightarrow> prop"} is given in \figref{fig:pure-equality}.  The
    2.41 +  inference kernel presents slightly different equality rules, which
    2.42 +  may be understood as derived rules from this minimal axiomatization.
    2.43 +  The Pure theory also provides some theorems that express the same
    2.44 +  reasoning schemes as theorems that can be composed like object-level
    2.45 +  rules as explained in \secref{sec:obj-rules}.
    2.46 +
    2.47 +  For example, @{ML Thm.symmetric} as Pure inference is an ML function
    2.48 +  that maps a theorem @{text "th"} stating @{text "t \<equiv> u"} to one
    2.49 +  stating @{text "u \<equiv> t"}.  In contrast, @{thm [source]
    2.50 +  Pure.symmetric} as Pure theorem expresses the same reasoning in
    2.51 +  declarative form.  If used like @{text "th [THEN Pure.symmetric]"}
    2.52 +  in Isar source notation, it achieves a similar effect as the ML
    2.53 +  inference function, although the rule attribute @{attribute THEN} or
    2.54 +  ML operator @{ML "op RS"} involve the full machinery of higher-order
    2.55 +  unification (modulo @{text "\<beta>\<eta>"}-conversion) and lifting of @{text
    2.56 +  "\<And>/\<Longrightarrow>"} contexts. *}
    2.57 +
    2.58 +text %mlref {*
    2.59 +  \begin{mldecls}
    2.60 +  @{index_ML Thm.reflexive: "cterm -> thm"} \\
    2.61 +  @{index_ML Thm.symmetric: "thm -> thm"} \\
    2.62 +  @{index_ML Thm.transitive: "thm -> thm -> thm"} \\
    2.63 +  @{index_ML Thm.abstract_rule: "string -> cterm -> thm -> thm"} \\
    2.64 +  @{index_ML Thm.combination: "thm -> thm -> thm"} \\[0.5ex]
    2.65 +  @{index_ML Thm.equal_intr: "thm -> thm -> thm"} \\
    2.66 +  @{index_ML Thm.equal_elim: "thm -> thm -> thm"} \\
    2.67 +  \end{mldecls}
    2.68 +
    2.69 +  See also @{file "~~/src/Pure/thm.ML" } for further description of
    2.70 +  these inference rules, and a few more for primitive @{text "\<beta>"} and
    2.71 +  @{text "\<eta>"} conversions.  Note that @{text "\<alpha>"} conversion is
    2.72 +  implicit due to the representation of terms with de-Bruijn indices
    2.73 +  (\secref{sec:terms}). *}
    2.74 +
    2.75 +
    2.76 +section {* Conversions \label{sec:conv} *}
    2.77 +
    2.78 +text {*
    2.79 +  %FIXME
    2.80 +
    2.81 +  The classic article that introduces the concept of conversion (for
    2.82 +  Cambridge LCF) is \cite{paulson:1983}.
    2.83 +*}
    2.84 +
    2.85 +
    2.86 +section {* Rewriting \label{sec:rewriting} *}
    2.87 +
    2.88 +text {* Rewriting normalizes a given term (theorem or goal) by
    2.89 +  replacing instances of given equalities @{text "t \<equiv> u"} in subterms.
    2.90 +  Rewriting continues until no rewrites are applicable to any subterm.
    2.91 +  This may be used to unfold simple definitions of the form @{text "f
    2.92 +  x\<^sub>1 \<dots> x\<^sub>n \<equiv> u"}, but is slightly more general than that.
    2.93 +*}
    2.94 +
    2.95 +text %mlref {*
    2.96 +  \begin{mldecls}
    2.97 +  @{index_ML rewrite_rule: "Proof.context -> thm list -> thm -> thm"} \\
    2.98 +  @{index_ML rewrite_goals_rule: "Proof.context -> thm list -> thm -> thm"} \\
    2.99 +  @{index_ML rewrite_goal_tac: "Proof.context -> thm list -> int -> tactic"} \\
   2.100 +  @{index_ML rewrite_goals_tac: "Proof.context -> thm list -> tactic"} \\
   2.101 +  @{index_ML fold_goals_tac: "Proof.context -> thm list -> tactic"} \\
   2.102 +  \end{mldecls}
   2.103 +
   2.104 +  \begin{description}
   2.105 +
   2.106 +  \item @{ML rewrite_rule}~@{text "ctxt rules thm"} rewrites the whole
   2.107 +  theorem by the given rules.
   2.108 +
   2.109 +  \item @{ML rewrite_goals_rule}~@{text "ctxt rules thm"} rewrites the
   2.110 +  outer premises of the given theorem.  Interpreting the same as a
   2.111 +  goal state (\secref{sec:tactical-goals}) it means to rewrite all
   2.112 +  subgoals (in the same manner as @{ML rewrite_goals_tac}).
   2.113 +
   2.114 +  \item @{ML rewrite_goal_tac}~@{text "ctxt rules i"} rewrites subgoal
   2.115 +  @{text "i"} by the given rewrite rules.
   2.116 +
   2.117 +  \item @{ML rewrite_goals_tac}~@{text "ctxt rules"} rewrites all subgoals
   2.118 +  by the given rewrite rules.
   2.119 +
   2.120 +  \item @{ML fold_goals_tac}~@{text "ctxt rules"} essentially uses @{ML
   2.121 +  rewrite_goals_tac} with the symmetric form of each member of @{text
   2.122 +  "rules"}, re-ordered to fold longer expression first.  This supports
   2.123 +  to idea to fold primitive definitions that appear in expended form
   2.124 +  in the proof state.
   2.125 +
   2.126 +  \end{description}
   2.127 +*}
   2.128 +
   2.129 +end
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/src/Doc/Implementation/Integration.thy	Sat Apr 05 11:37:00 2014 +0200
     3.3 @@ -0,0 +1,291 @@
     3.4 +theory Integration
     3.5 +imports Base
     3.6 +begin
     3.7 +
     3.8 +chapter {* System integration *}
     3.9 +
    3.10 +section {* Isar toplevel \label{sec:isar-toplevel} *}
    3.11 +
    3.12 +text {* The Isar toplevel may be considered the central hub of the
    3.13 +  Isabelle/Isar system, where all key components and sub-systems are
    3.14 +  integrated into a single read-eval-print loop of Isar commands,
    3.15 +  which also incorporates the underlying ML compiler.
    3.16 +
    3.17 +  Isabelle/Isar departs from the original ``LCF system architecture''
    3.18 +  where ML was really The Meta Language for defining theories and
    3.19 +  conducting proofs.  Instead, ML now only serves as the
    3.20 +  implementation language for the system (and user extensions), while
    3.21 +  the specific Isar toplevel supports the concepts of theory and proof
    3.22 +  development natively.  This includes the graph structure of theories
    3.23 +  and the block structure of proofs, support for unlimited undo,
    3.24 +  facilities for tracing, debugging, timing, profiling etc.
    3.25 +
    3.26 +  \medskip The toplevel maintains an implicit state, which is
    3.27 +  transformed by a sequence of transitions -- either interactively or
    3.28 +  in batch-mode.
    3.29 +
    3.30 +  The toplevel state is a disjoint sum of empty @{text toplevel}, or
    3.31 +  @{text theory}, or @{text proof}.  On entering the main Isar loop we
    3.32 +  start with an empty toplevel.  A theory is commenced by giving a
    3.33 +  @{text \<THEORY>} header; within a theory we may issue theory
    3.34 +  commands such as @{text \<DEFINITION>}, or state a @{text
    3.35 +  \<THEOREM>} to be proven.  Now we are within a proof state, with a
    3.36 +  rich collection of Isar proof commands for structured proof
    3.37 +  composition, or unstructured proof scripts.  When the proof is
    3.38 +  concluded we get back to the theory, which is then updated by
    3.39 +  storing the resulting fact.  Further theory declarations or theorem
    3.40 +  statements with proofs may follow, until we eventually conclude the
    3.41 +  theory development by issuing @{text \<END>}.  The resulting theory
    3.42 +  is then stored within the theory database and we are back to the
    3.43 +  empty toplevel.
    3.44 +
    3.45 +  In addition to these proper state transformations, there are also
    3.46 +  some diagnostic commands for peeking at the toplevel state without
    3.47 +  modifying it (e.g.\ \isakeyword{thm}, \isakeyword{term},
    3.48 +  \isakeyword{print-cases}).
    3.49 +*}
    3.50 +
    3.51 +text %mlref {*
    3.52 +  \begin{mldecls}
    3.53 +  @{index_ML_type Toplevel.state} \\
    3.54 +  @{index_ML_exception Toplevel.UNDEF} \\
    3.55 +  @{index_ML Toplevel.is_toplevel: "Toplevel.state -> bool"} \\
    3.56 +  @{index_ML Toplevel.theory_of: "Toplevel.state -> theory"} \\
    3.57 +  @{index_ML Toplevel.proof_of: "Toplevel.state -> Proof.state"} \\
    3.58 +  @{index_ML Toplevel.timing: "bool Unsynchronized.ref"} \\
    3.59 +  @{index_ML Toplevel.profiling: "int Unsynchronized.ref"} \\
    3.60 +  \end{mldecls}
    3.61 +
    3.62 +  \begin{description}
    3.63 +
    3.64 +  \item Type @{ML_type Toplevel.state} represents Isar toplevel
    3.65 +  states, which are normally manipulated through the concept of
    3.66 +  toplevel transitions only (\secref{sec:toplevel-transition}).  Also
    3.67 +  note that a raw toplevel state is subject to the same linearity
    3.68 +  restrictions as a theory context (cf.~\secref{sec:context-theory}).
    3.69 +
    3.70 +  \item @{ML Toplevel.UNDEF} is raised for undefined toplevel
    3.71 +  operations.  Many operations work only partially for certain cases,
    3.72 +  since @{ML_type Toplevel.state} is a sum type.
    3.73 +
    3.74 +  \item @{ML Toplevel.is_toplevel}~@{text "state"} checks for an empty
    3.75 +  toplevel state.
    3.76 +
    3.77 +  \item @{ML Toplevel.theory_of}~@{text "state"} selects the
    3.78 +  background theory of @{text "state"}, raises @{ML Toplevel.UNDEF}
    3.79 +  for an empty toplevel state.
    3.80 +
    3.81 +  \item @{ML Toplevel.proof_of}~@{text "state"} selects the Isar proof
    3.82 +  state if available, otherwise raises @{ML Toplevel.UNDEF}.
    3.83 +
    3.84 +  \item @{ML "Toplevel.timing := true"} makes the toplevel print timing
    3.85 +  information for each Isar command being executed.
    3.86 +
    3.87 +  \item @{ML Toplevel.profiling}~@{ML_text ":="}~@{text "n"} controls
    3.88 +  low-level profiling of the underlying ML runtime system.  For
    3.89 +  Poly/ML, @{text "n = 1"} means time and @{text "n = 2"} space
    3.90 +  profiling.
    3.91 +
    3.92 +  \end{description}
    3.93 +*}
    3.94 +
    3.95 +text %mlantiq {*
    3.96 +  \begin{matharray}{rcl}
    3.97 +  @{ML_antiquotation_def "Isar.state"} & : & @{text ML_antiquotation} \\
    3.98 +  \end{matharray}
    3.99 +
   3.100 +  \begin{description}
   3.101 +
   3.102 +  \item @{text "@{Isar.state}"} refers to Isar toplevel state at that
   3.103 +  point --- as abstract value.
   3.104 +
   3.105 +  This only works for diagnostic ML commands, such as @{command
   3.106 +  ML_val} or @{command ML_command}.
   3.107 +
   3.108 +  \end{description}
   3.109 +*}
   3.110 +
   3.111 +
   3.112 +subsection {* Toplevel transitions \label{sec:toplevel-transition} *}
   3.113 +
   3.114 +text {*
   3.115 +  An Isar toplevel transition consists of a partial function on the
   3.116 +  toplevel state, with additional information for diagnostics and
   3.117 +  error reporting: there are fields for command name, source position,
   3.118 +  optional source text, as well as flags for interactive-only commands
   3.119 +  (which issue a warning in batch-mode), printing of result state,
   3.120 +  etc.
   3.121 +
   3.122 +  The operational part is represented as the sequential union of a
   3.123 +  list of partial functions, which are tried in turn until the first
   3.124 +  one succeeds.  This acts like an outer case-expression for various
   3.125 +  alternative state transitions.  For example, \isakeyword{qed} works
   3.126 +  differently for a local proofs vs.\ the global ending of the main
   3.127 +  proof.
   3.128 +
   3.129 +  Toplevel transitions are composed via transition transformers.
   3.130 +  Internally, Isar commands are put together from an empty transition
   3.131 +  extended by name and source position.  It is then left to the
   3.132 +  individual command parser to turn the given concrete syntax into a
   3.133 +  suitable transition transformer that adjoins actual operations on a
   3.134 +  theory or proof state etc.
   3.135 +*}
   3.136 +
   3.137 +text %mlref {*
   3.138 +  \begin{mldecls}
   3.139 +  @{index_ML Toplevel.print: "Toplevel.transition -> Toplevel.transition"} \\
   3.140 +  @{index_ML Toplevel.keep: "(Toplevel.state -> unit) ->
   3.141 +  Toplevel.transition -> Toplevel.transition"} \\
   3.142 +  @{index_ML Toplevel.theory: "(theory -> theory) ->
   3.143 +  Toplevel.transition -> Toplevel.transition"} \\
   3.144 +  @{index_ML Toplevel.theory_to_proof: "(theory -> Proof.state) ->
   3.145 +  Toplevel.transition -> Toplevel.transition"} \\
   3.146 +  @{index_ML Toplevel.proof: "(Proof.state -> Proof.state) ->
   3.147 +  Toplevel.transition -> Toplevel.transition"} \\
   3.148 +  @{index_ML Toplevel.proofs: "(Proof.state -> Proof.state Seq.result Seq.seq) ->
   3.149 +  Toplevel.transition -> Toplevel.transition"} \\
   3.150 +  @{index_ML Toplevel.end_proof: "(bool -> Proof.state -> Proof.context) ->
   3.151 +  Toplevel.transition -> Toplevel.transition"} \\
   3.152 +  \end{mldecls}
   3.153 +
   3.154 +  \begin{description}
   3.155 +
   3.156 +  \item @{ML Toplevel.print}~@{text "tr"} sets the print flag, which
   3.157 +  causes the toplevel loop to echo the result state (in interactive
   3.158 +  mode).
   3.159 +
   3.160 +  \item @{ML Toplevel.keep}~@{text "tr"} adjoins a diagnostic
   3.161 +  function.
   3.162 +
   3.163 +  \item @{ML Toplevel.theory}~@{text "tr"} adjoins a theory
   3.164 +  transformer.
   3.165 +
   3.166 +  \item @{ML Toplevel.theory_to_proof}~@{text "tr"} adjoins a global
   3.167 +  goal function, which turns a theory into a proof state.  The theory
   3.168 +  may be changed before entering the proof; the generic Isar goal
   3.169 +  setup includes an argument that specifies how to apply the proven
   3.170 +  result to the theory, when the proof is finished.
   3.171 +
   3.172 +  \item @{ML Toplevel.proof}~@{text "tr"} adjoins a deterministic
   3.173 +  proof command, with a singleton result.
   3.174 +
   3.175 +  \item @{ML Toplevel.proofs}~@{text "tr"} adjoins a general proof
   3.176 +  command, with zero or more result states (represented as a lazy
   3.177 +  list).
   3.178 +
   3.179 +  \item @{ML Toplevel.end_proof}~@{text "tr"} adjoins a concluding
   3.180 +  proof command, that returns the resulting theory, after storing the
   3.181 +  resulting facts in the context etc.
   3.182 +
   3.183 +  \end{description}
   3.184 +*}
   3.185 +
   3.186 +
   3.187 +section {* Theory database \label{sec:theory-database} *}
   3.188 +
   3.189 +text {*
   3.190 +  The theory database maintains a collection of theories, together
   3.191 +  with some administrative information about their original sources,
   3.192 +  which are held in an external store (i.e.\ some directory within the
   3.193 +  regular file system).
   3.194 +
   3.195 +  The theory database is organized as a directed acyclic graph;
   3.196 +  entries are referenced by theory name.  Although some additional
   3.197 +  interfaces allow to include a directory specification as well, this
   3.198 +  is only a hint to the underlying theory loader.  The internal theory
   3.199 +  name space is flat!
   3.200 +
   3.201 +  Theory @{text A} is associated with the main theory file @{text
   3.202 +  A}\verb,.thy,, which needs to be accessible through the theory
   3.203 +  loader path.  Any number of additional ML source files may be
   3.204 +  associated with each theory, by declaring these dependencies in the
   3.205 +  theory header as @{text \<USES>}, and loading them consecutively
   3.206 +  within the theory context.  The system keeps track of incoming ML
   3.207 +  sources and associates them with the current theory.
   3.208 +
   3.209 +  The basic internal actions of the theory database are @{text
   3.210 +  "update"} and @{text "remove"}:
   3.211 +
   3.212 +  \begin{itemize}
   3.213 +
   3.214 +  \item @{text "update A"} introduces a link of @{text "A"} with a
   3.215 +  @{text "theory"} value of the same name; it asserts that the theory
   3.216 +  sources are now consistent with that value;
   3.217 +
   3.218 +  \item @{text "remove A"} deletes entry @{text "A"} from the theory
   3.219 +  database.
   3.220 +  
   3.221 +  \end{itemize}
   3.222 +
   3.223 +  These actions are propagated to sub- or super-graphs of a theory
   3.224 +  entry as expected, in order to preserve global consistency of the
   3.225 +  state of all loaded theories with the sources of the external store.
   3.226 +  This implies certain causalities between actions: @{text "update"}
   3.227 +  or @{text "remove"} of an entry will @{text "remove"} all
   3.228 +  descendants.
   3.229 +
   3.230 +  \medskip There are separate user-level interfaces to operate on the
   3.231 +  theory database directly or indirectly.  The primitive actions then
   3.232 +  just happen automatically while working with the system.  In
   3.233 +  particular, processing a theory header @{text "\<THEORY> A
   3.234 +  \<IMPORTS> B\<^sub>1 \<dots> B\<^sub>n \<BEGIN>"} ensures that the
   3.235 +  sub-graph of the collective imports @{text "B\<^sub>1 \<dots> B\<^sub>n"}
   3.236 +  is up-to-date, too.  Earlier theories are reloaded as required, with
   3.237 +  @{text update} actions proceeding in topological order according to
   3.238 +  theory dependencies.  There may be also a wave of implied @{text
   3.239 +  remove} actions for derived theory nodes until a stable situation
   3.240 +  is achieved eventually.
   3.241 +*}
   3.242 +
   3.243 +text %mlref {*
   3.244 +  \begin{mldecls}
   3.245 +  @{index_ML use_thy: "string -> unit"} \\
   3.246 +  @{index_ML use_thys: "string list -> unit"} \\
   3.247 +  @{index_ML Thy_Info.get_theory: "string -> theory"} \\
   3.248 +  @{index_ML Thy_Info.remove_thy: "string -> unit"} \\[1ex]
   3.249 +  @{index_ML Thy_Info.register_thy: "theory -> unit"} \\[1ex]
   3.250 +  @{ML_text "datatype action = Update | Remove"} \\
   3.251 +  @{index_ML Thy_Info.add_hook: "(Thy_Info.action -> string -> unit) -> unit"} \\
   3.252 +  \end{mldecls}
   3.253 +
   3.254 +  \begin{description}
   3.255 +
   3.256 +  \item @{ML use_thy}~@{text A} ensures that theory @{text A} is fully
   3.257 +  up-to-date wrt.\ the external file store, reloading outdated
   3.258 +  ancestors as required.  In batch mode, the simultaneous @{ML
   3.259 +  use_thys} should be used exclusively.
   3.260 +
   3.261 +  \item @{ML use_thys} is similar to @{ML use_thy}, but handles
   3.262 +  several theories simultaneously.  Thus it acts like processing the
   3.263 +  import header of a theory, without performing the merge of the
   3.264 +  result.  By loading a whole sub-graph of theories like that, the
   3.265 +  intrinsic parallelism can be exploited by the system, to speedup
   3.266 +  loading.
   3.267 +
   3.268 +  \item @{ML Thy_Info.get_theory}~@{text A} retrieves the theory value
   3.269 +  presently associated with name @{text A}.  Note that the result
   3.270 +  might be outdated.
   3.271 +
   3.272 +  \item @{ML Thy_Info.remove_thy}~@{text A} deletes theory @{text A} and all
   3.273 +  descendants from the theory database.
   3.274 +
   3.275 +  \item @{ML Thy_Info.register_thy}~@{text "text thy"} registers an
   3.276 +  existing theory value with the theory loader database and updates
   3.277 +  source version information according to the current file-system
   3.278 +  state.
   3.279 +
   3.280 +  \item @{ML "Thy_Info.add_hook"}~@{text f} registers function @{text
   3.281 +  f} as a hook for theory database actions.  The function will be
   3.282 +  invoked with the action and theory name being involved; thus derived
   3.283 +  actions may be performed in associated system components, e.g.\
   3.284 +  maintaining the state of an editor for the theory sources.
   3.285 +
   3.286 +  The kind and order of actions occurring in practice depends both on
   3.287 +  user interactions and the internal process of resolving theory
   3.288 +  imports.  Hooks should not rely on a particular policy here!  Any
   3.289 +  exceptions raised by the hook are ignored.
   3.290 +
   3.291 +  \end{description}
   3.292 +*}
   3.293 +
   3.294 +end
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/src/Doc/Implementation/Isar.thy	Sat Apr 05 11:37:00 2014 +0200
     4.3 @@ -0,0 +1,585 @@
     4.4 +theory Isar
     4.5 +imports Base
     4.6 +begin
     4.7 +
     4.8 +chapter {* Isar language elements *}
     4.9 +
    4.10 +text {* The Isar proof language (see also
    4.11 +  \cite[\S2]{isabelle-isar-ref}) consists of three main categories of
    4.12 +  language elements as follows.
    4.13 +
    4.14 +  \begin{enumerate}
    4.15 +
    4.16 +  \item Proof \emph{commands} define the primary language of
    4.17 +  transactions of the underlying Isar/VM interpreter.  Typical
    4.18 +  examples are @{command "fix"}, @{command "assume"}, @{command
    4.19 +  "show"}, @{command "proof"}, and @{command "qed"}.
    4.20 +
    4.21 +  Composing proof commands according to the rules of the Isar/VM leads
    4.22 +  to expressions of structured proof text, such that both the machine
    4.23 +  and the human reader can give it a meaning as formal reasoning.
    4.24 +
    4.25 +  \item Proof \emph{methods} define a secondary language of mixed
    4.26 +  forward-backward refinement steps involving facts and goals.
    4.27 +  Typical examples are @{method rule}, @{method unfold}, and @{method
    4.28 +  simp}.
    4.29 +
    4.30 +  Methods can occur in certain well-defined parts of the Isar proof
    4.31 +  language, say as arguments to @{command "proof"}, @{command "qed"},
    4.32 +  or @{command "by"}.
    4.33 +
    4.34 +  \item \emph{Attributes} define a tertiary language of small
    4.35 +  annotations to theorems being defined or referenced.  Attributes can
    4.36 +  modify both the context and the theorem.
    4.37 +
    4.38 +  Typical examples are @{attribute intro} (which affects the context),
    4.39 +  and @{attribute symmetric} (which affects the theorem).
    4.40 +
    4.41 +  \end{enumerate}
    4.42 +*}
    4.43 +
    4.44 +
    4.45 +section {* Proof commands *}
    4.46 +
    4.47 +text {* A \emph{proof command} is state transition of the Isar/VM
    4.48 +  proof interpreter.
    4.49 +
    4.50 +  In principle, Isar proof commands could be defined in user-space as
    4.51 +  well.  The system is built like that in the first place: one part of
    4.52 +  the commands are primitive, the other part is defined as derived
    4.53 +  elements.  Adding to the genuine structured proof language requires
    4.54 +  profound understanding of the Isar/VM machinery, though, so this is
    4.55 +  beyond the scope of this manual.
    4.56 +
    4.57 +  What can be done realistically is to define some diagnostic commands
    4.58 +  that inspect the general state of the Isar/VM, and report some
    4.59 +  feedback to the user.  Typically this involves checking of the
    4.60 +  linguistic \emph{mode} of a proof state, or peeking at the pending
    4.61 +  goals (if available).
    4.62 +
    4.63 +  Another common application is to define a toplevel command that
    4.64 +  poses a problem to the user as Isar proof state and processes the
    4.65 +  final result relatively to the context.  Thus a proof can be
    4.66 +  incorporated into the context of some user-space tool, without
    4.67 +  modifying the Isar proof language itself.  *}
    4.68 +
    4.69 +text %mlref {*
    4.70 +  \begin{mldecls}
    4.71 +  @{index_ML_type Proof.state} \\
    4.72 +  @{index_ML Proof.assert_forward: "Proof.state -> Proof.state"} \\
    4.73 +  @{index_ML Proof.assert_chain: "Proof.state -> Proof.state"} \\
    4.74 +  @{index_ML Proof.assert_backward: "Proof.state -> Proof.state"} \\
    4.75 +  @{index_ML Proof.simple_goal: "Proof.state -> {context: Proof.context, goal: thm}"} \\
    4.76 +  @{index_ML Proof.goal: "Proof.state ->
    4.77 +  {context: Proof.context, facts: thm list, goal: thm}"} \\
    4.78 +  @{index_ML Proof.raw_goal: "Proof.state ->
    4.79 +  {context: Proof.context, facts: thm list, goal: thm}"} \\
    4.80 +  @{index_ML Proof.theorem: "Method.text option ->
    4.81 +  (thm list list -> Proof.context -> Proof.context) ->
    4.82 +  (term * term list) list list -> Proof.context -> Proof.state"} \\
    4.83 +  \end{mldecls}
    4.84 +
    4.85 +  \begin{description}
    4.86 +
    4.87 +  \item Type @{ML_type Proof.state} represents Isar proof states.
    4.88 +  This is a block-structured configuration with proof context,
    4.89 +  linguistic mode, and optional goal.  The latter consists of goal
    4.90 +  context, goal facts (``@{text "using"}''), and tactical goal state
    4.91 +  (see \secref{sec:tactical-goals}).
    4.92 +
    4.93 +  The general idea is that the facts shall contribute to the
    4.94 +  refinement of some parts of the tactical goal --- how exactly is
    4.95 +  defined by the proof method that is applied in that situation.
    4.96 +
    4.97 +  \item @{ML Proof.assert_forward}, @{ML Proof.assert_chain}, @{ML
    4.98 +  Proof.assert_backward} are partial identity functions that fail
    4.99 +  unless a certain linguistic mode is active, namely ``@{text
   4.100 +  "proof(state)"}'', ``@{text "proof(chain)"}'', ``@{text
   4.101 +  "proof(prove)"}'', respectively (using the terminology of
   4.102 +  \cite{isabelle-isar-ref}).
   4.103 +
   4.104 +  It is advisable study the implementations of existing proof commands
   4.105 +  for suitable modes to be asserted.
   4.106 +
   4.107 +  \item @{ML Proof.simple_goal}~@{text "state"} returns the structured
   4.108 +  Isar goal (if available) in the form seen by ``simple'' methods
   4.109 +  (like @{method simp} or @{method blast}).  The Isar goal facts are
   4.110 +  already inserted as premises into the subgoals, which are presented
   4.111 +  individually as in @{ML Proof.goal}.
   4.112 +
   4.113 +  \item @{ML Proof.goal}~@{text "state"} returns the structured Isar
   4.114 +  goal (if available) in the form seen by regular methods (like
   4.115 +  @{method rule}).  The auxiliary internal encoding of Pure
   4.116 +  conjunctions is split into individual subgoals as usual.
   4.117 +
   4.118 +  \item @{ML Proof.raw_goal}~@{text "state"} returns the structured
   4.119 +  Isar goal (if available) in the raw internal form seen by ``raw''
   4.120 +  methods (like @{method induct}).  This form is rarely appropriate
   4.121 +  for dignostic tools; @{ML Proof.simple_goal} or @{ML Proof.goal}
   4.122 +  should be used in most situations.
   4.123 +
   4.124 +  \item @{ML Proof.theorem}~@{text "before_qed after_qed statement ctxt"}
   4.125 +  initializes a toplevel Isar proof state within a given context.
   4.126 +
   4.127 +  The optional @{text "before_qed"} method is applied at the end of
   4.128 +  the proof, just before extracting the result (this feature is rarely
   4.129 +  used).
   4.130 +
   4.131 +  The @{text "after_qed"} continuation receives the extracted result
   4.132 +  in order to apply it to the final context in a suitable way (e.g.\
   4.133 +  storing named facts).  Note that at this generic level the target
   4.134 +  context is specified as @{ML_type Proof.context}, but the usual
   4.135 +  wrapping of toplevel proofs into command transactions will provide a
   4.136 +  @{ML_type local_theory} here (\chref{ch:local-theory}).  This
   4.137 +  affects the way how results are stored.
   4.138 +
   4.139 +  The @{text "statement"} is given as a nested list of terms, each
   4.140 +  associated with optional @{keyword "is"} patterns as usual in the
   4.141 +  Isar source language.  The original nested list structure over terms
   4.142 +  is turned into one over theorems when @{text "after_qed"} is
   4.143 +  invoked.
   4.144 +
   4.145 +  \end{description}
   4.146 +*}
   4.147 +
   4.148 +
   4.149 +text %mlantiq {*
   4.150 +  \begin{matharray}{rcl}
   4.151 +  @{ML_antiquotation_def "Isar.goal"} & : & @{text ML_antiquotation} \\
   4.152 +  \end{matharray}
   4.153 +
   4.154 +  \begin{description}
   4.155 +
   4.156 +  \item @{text "@{Isar.goal}"} refers to the regular goal state (if
   4.157 +  available) of the current proof state managed by the Isar toplevel
   4.158 +  --- as abstract value.
   4.159 +
   4.160 +  This only works for diagnostic ML commands, such as @{command
   4.161 +  ML_val} or @{command ML_command}.
   4.162 +
   4.163 +  \end{description}
   4.164 +*}
   4.165 +
   4.166 +text %mlex {* The following example peeks at a certain goal configuration. *}
   4.167 +
   4.168 +notepad
   4.169 +begin
   4.170 +  have A and B and C
   4.171 +    ML_val {*
   4.172 +      val n = Thm.nprems_of (#goal @{Isar.goal});
   4.173 +      @{assert} (n = 3);
   4.174 +    *}
   4.175 +    oops
   4.176 +
   4.177 +text {* Here we see 3 individual subgoals in the same way as regular
   4.178 +  proof methods would do.  *}
   4.179 +
   4.180 +
   4.181 +section {* Proof methods *}
   4.182 +
   4.183 +text {* A @{text "method"} is a function @{text "context \<rightarrow> thm\<^sup>* \<rightarrow> goal
   4.184 +  \<rightarrow> (cases \<times> goal)\<^sup>*\<^sup>*"} that operates on the full Isar goal
   4.185 +  configuration with context, goal facts, and tactical goal state and
   4.186 +  enumerates possible follow-up goal states, with the potential
   4.187 +  addition of named extensions of the proof context (\emph{cases}).
   4.188 +  The latter feature is rarely used.
   4.189 +
   4.190 +  This means a proof method is like a structurally enhanced tactic
   4.191 +  (cf.\ \secref{sec:tactics}).  The well-formedness conditions for
   4.192 +  tactics need to hold for methods accordingly, with the following
   4.193 +  additions.
   4.194 +
   4.195 +  \begin{itemize}
   4.196 +
   4.197 +  \item Goal addressing is further limited either to operate either
   4.198 +  uniformly on \emph{all} subgoals, or specifically on the
   4.199 +  \emph{first} subgoal.
   4.200 +
   4.201 +  Exception: old-style tactic emulations that are embedded into the
   4.202 +  method space, e.g.\ @{method rule_tac}.
   4.203 +
   4.204 +  \item A non-trivial method always needs to make progress: an
   4.205 +  identical follow-up goal state has to be avoided.\footnote{This
   4.206 +  enables the user to write method expressions like @{text "meth\<^sup>+"}
   4.207 +  without looping, while the trivial do-nothing case can be recovered
   4.208 +  via @{text "meth\<^sup>?"}.}
   4.209 +
   4.210 +  Exception: trivial stuttering steps, such as ``@{method -}'' or
   4.211 +  @{method succeed}.
   4.212 +
   4.213 +  \item Goal facts passed to the method must not be ignored.  If there
   4.214 +  is no sensible use of facts outside the goal state, facts should be
   4.215 +  inserted into the subgoals that are addressed by the method.
   4.216 +
   4.217 +  \end{itemize}
   4.218 +
   4.219 +  \medskip Syntactically, the language of proof methods appears as
   4.220 +  arguments to Isar commands like @{command "by"} or @{command apply}.
   4.221 +  User-space additions are reasonably easy by plugging suitable
   4.222 +  method-valued parser functions into the framework, using the
   4.223 +  @{command method_setup} command, for example.
   4.224 +
   4.225 +  To get a better idea about the range of possibilities, consider the
   4.226 +  following Isar proof schemes.  This is the general form of
   4.227 +  structured proof text:
   4.228 +
   4.229 +  \medskip
   4.230 +  \begin{tabular}{l}
   4.231 +  @{command from}~@{text "facts\<^sub>1"}~@{command have}~@{text "props"}~@{command using}~@{text "facts\<^sub>2"} \\
   4.232 +  @{command proof}~@{text "(initial_method)"} \\
   4.233 +  \quad@{text "body"} \\
   4.234 +  @{command qed}~@{text "(terminal_method)"} \\
   4.235 +  \end{tabular}
   4.236 +  \medskip
   4.237 +
   4.238 +  The goal configuration consists of @{text "facts\<^sub>1"} and
   4.239 +  @{text "facts\<^sub>2"} appended in that order, and various @{text
   4.240 +  "props"} being claimed.  The @{text "initial_method"} is invoked
   4.241 +  with facts and goals together and refines the problem to something
   4.242 +  that is handled recursively in the proof @{text "body"}.  The @{text
   4.243 +  "terminal_method"} has another chance to finish any remaining
   4.244 +  subgoals, but it does not see the facts of the initial step.
   4.245 +
   4.246 +  \medskip This pattern illustrates unstructured proof scripts:
   4.247 +
   4.248 +  \medskip
   4.249 +  \begin{tabular}{l}
   4.250 +  @{command have}~@{text "props"} \\
   4.251 +  \quad@{command using}~@{text "facts\<^sub>1"}~@{command apply}~@{text "method\<^sub>1"} \\
   4.252 +  \quad@{command apply}~@{text "method\<^sub>2"} \\
   4.253 +  \quad@{command using}~@{text "facts\<^sub>3"}~@{command apply}~@{text "method\<^sub>3"} \\
   4.254 +  \quad@{command done} \\
   4.255 +  \end{tabular}
   4.256 +  \medskip
   4.257 +
   4.258 +  The @{text "method\<^sub>1"} operates on the original claim while
   4.259 +  using @{text "facts\<^sub>1"}.  Since the @{command apply} command
   4.260 +  structurally resets the facts, the @{text "method\<^sub>2"} will
   4.261 +  operate on the remaining goal state without facts.  The @{text
   4.262 +  "method\<^sub>3"} will see again a collection of @{text
   4.263 +  "facts\<^sub>3"} that has been inserted into the script explicitly.
   4.264 +
   4.265 +  \medskip Empirically, any Isar proof method can be categorized as
   4.266 +  follows.
   4.267 +
   4.268 +  \begin{enumerate}
   4.269 +
   4.270 +  \item \emph{Special method with cases} with named context additions
   4.271 +  associated with the follow-up goal state.
   4.272 +
   4.273 +  Example: @{method "induct"}, which is also a ``raw'' method since it
   4.274 +  operates on the internal representation of simultaneous claims as
   4.275 +  Pure conjunction (\secref{sec:logic-aux}), instead of separate
   4.276 +  subgoals (\secref{sec:tactical-goals}).
   4.277 +
   4.278 +  \item \emph{Structured method} with strong emphasis on facts outside
   4.279 +  the goal state.
   4.280 +
   4.281 +  Example: @{method "rule"}, which captures the key ideas behind
   4.282 +  structured reasoning in Isar in purest form.
   4.283 +
   4.284 +  \item \emph{Simple method} with weaker emphasis on facts, which are
   4.285 +  inserted into subgoals to emulate old-style tactical as
   4.286 +  ``premises''.
   4.287 +
   4.288 +  Examples: @{method "simp"}, @{method "blast"}, @{method "auto"}.
   4.289 +
   4.290 +  \item \emph{Old-style tactic emulation} with detailed numeric goal
   4.291 +  addressing and explicit references to entities of the internal goal
   4.292 +  state (which are otherwise invisible from proper Isar proof text).
   4.293 +  The naming convention @{text "foo_tac"} makes this special
   4.294 +  non-standard status clear.
   4.295 +
   4.296 +  Example: @{method "rule_tac"}.
   4.297 +
   4.298 +  \end{enumerate}
   4.299 +
   4.300 +  When implementing proof methods, it is advisable to study existing
   4.301 +  implementations carefully and imitate the typical ``boiler plate''
   4.302 +  for context-sensitive parsing and further combinators to wrap-up
   4.303 +  tactic expressions as methods.\footnote{Aliases or abbreviations of
   4.304 +  the standard method combinators should be avoided.  Note that from
   4.305 +  Isabelle99 until Isabelle2009 the system did provide various odd
   4.306 +  combinations of method wrappers that made user applications more
   4.307 +  complicated than necessary.}
   4.308 +*}
   4.309 +
   4.310 +text %mlref {*
   4.311 +  \begin{mldecls}
   4.312 +  @{index_ML_type Proof.method} \\
   4.313 +  @{index_ML METHOD_CASES: "(thm list -> cases_tactic) -> Proof.method"} \\
   4.314 +  @{index_ML METHOD: "(thm list -> tactic) -> Proof.method"} \\
   4.315 +  @{index_ML SIMPLE_METHOD: "tactic -> Proof.method"} \\
   4.316 +  @{index_ML SIMPLE_METHOD': "(int -> tactic) -> Proof.method"} \\
   4.317 +  @{index_ML Method.insert_tac: "thm list -> int -> tactic"} \\
   4.318 +  @{index_ML Method.setup: "binding -> (Proof.context -> Proof.method) context_parser ->
   4.319 +  string -> theory -> theory"} \\
   4.320 +  \end{mldecls}
   4.321 +
   4.322 +  \begin{description}
   4.323 +
   4.324 +  \item Type @{ML_type Proof.method} represents proof methods as
   4.325 +  abstract type.
   4.326 +
   4.327 +  \item @{ML METHOD_CASES}~@{text "(fn facts => cases_tactic)"} wraps
   4.328 +  @{text cases_tactic} depending on goal facts as proof method with
   4.329 +  cases; the goal context is passed via method syntax.
   4.330 +
   4.331 +  \item @{ML METHOD}~@{text "(fn facts => tactic)"} wraps @{text
   4.332 +  tactic} depending on goal facts as regular proof method; the goal
   4.333 +  context is passed via method syntax.
   4.334 +
   4.335 +  \item @{ML SIMPLE_METHOD}~@{text "tactic"} wraps a tactic that
   4.336 +  addresses all subgoals uniformly as simple proof method.  Goal facts
   4.337 +  are already inserted into all subgoals before @{text "tactic"} is
   4.338 +  applied.
   4.339 +
   4.340 +  \item @{ML SIMPLE_METHOD'}~@{text "tactic"} wraps a tactic that
   4.341 +  addresses a specific subgoal as simple proof method that operates on
   4.342 +  subgoal 1.  Goal facts are inserted into the subgoal then the @{text
   4.343 +  "tactic"} is applied.
   4.344 +
   4.345 +  \item @{ML Method.insert_tac}~@{text "facts i"} inserts @{text
   4.346 +  "facts"} into subgoal @{text "i"}.  This is convenient to reproduce
   4.347 +  part of the @{ML SIMPLE_METHOD} or @{ML SIMPLE_METHOD'} wrapping
   4.348 +  within regular @{ML METHOD}, for example.
   4.349 +
   4.350 +  \item @{ML Method.setup}~@{text "name parser description"} provides
   4.351 +  the functionality of the Isar command @{command method_setup} as ML
   4.352 +  function.
   4.353 +
   4.354 +  \end{description}
   4.355 +*}
   4.356 +
   4.357 +text %mlex {* See also @{command method_setup} in
   4.358 +  \cite{isabelle-isar-ref} which includes some abstract examples.
   4.359 +
   4.360 +  \medskip The following toy examples illustrate how the goal facts
   4.361 +  and state are passed to proof methods.  The pre-defined proof method
   4.362 +  called ``@{method tactic}'' wraps ML source of type @{ML_type
   4.363 +  tactic} (abstracted over @{ML_text facts}).  This allows immediate
   4.364 +  experimentation without parsing of concrete syntax. *}
   4.365 +
   4.366 +notepad
   4.367 +begin
   4.368 +  assume a: A and b: B
   4.369 +
   4.370 +  have "A \<and> B"
   4.371 +    apply (tactic {* rtac @{thm conjI} 1 *})
   4.372 +    using a apply (tactic {* resolve_tac facts 1 *})
   4.373 +    using b apply (tactic {* resolve_tac facts 1 *})
   4.374 +    done
   4.375 +
   4.376 +  have "A \<and> B"
   4.377 +    using a and b
   4.378 +    ML_val "@{Isar.goal}"
   4.379 +    apply (tactic {* Method.insert_tac facts 1 *})
   4.380 +    apply (tactic {* (rtac @{thm conjI} THEN_ALL_NEW atac) 1 *})
   4.381 +    done
   4.382 +end
   4.383 +
   4.384 +text {* \medskip The next example implements a method that simplifies
   4.385 +  the first subgoal by rewrite rules given as arguments.  *}
   4.386 +
   4.387 +method_setup my_simp = {*
   4.388 +  Attrib.thms >> (fn thms => fn ctxt =>
   4.389 +    SIMPLE_METHOD' (fn i =>
   4.390 +      CHANGED (asm_full_simp_tac
   4.391 +        (put_simpset HOL_basic_ss ctxt addsimps thms) i)))
   4.392 +*} "rewrite subgoal by given rules"
   4.393 +
   4.394 +text {* The concrete syntax wrapping of @{command method_setup} always
   4.395 +  passes-through the proof context at the end of parsing, but it is
   4.396 +  not used in this example.
   4.397 +
   4.398 +  The @{ML Attrib.thms} parser produces a list of theorems from the
   4.399 +  usual Isar syntax involving attribute expressions etc.\ (syntax
   4.400 +  category @{syntax thmrefs}) \cite{isabelle-isar-ref}.  The resulting
   4.401 +  @{ML_text thms} are added to @{ML HOL_basic_ss} which already
   4.402 +  contains the basic Simplifier setup for HOL.
   4.403 +
   4.404 +  The tactic @{ML asm_full_simp_tac} is the one that is also used in
   4.405 +  method @{method simp} by default.  The extra wrapping by the @{ML
   4.406 +  CHANGED} tactical ensures progress of simplification: identical goal
   4.407 +  states are filtered out explicitly to make the raw tactic conform to
   4.408 +  standard Isar method behaviour.
   4.409 +
   4.410 +  \medskip Method @{method my_simp} can be used in Isar proofs like
   4.411 +  this:
   4.412 +*}
   4.413 +
   4.414 +notepad
   4.415 +begin
   4.416 +  fix a b c
   4.417 +  assume a: "a = b"
   4.418 +  assume b: "b = c"
   4.419 +  have "a = c" by (my_simp a b)
   4.420 +end
   4.421 +
   4.422 +text {* Here is a similar method that operates on all subgoals,
   4.423 +  instead of just the first one. *}
   4.424 +
   4.425 +method_setup my_simp_all = {*
   4.426 +  Attrib.thms >> (fn thms => fn ctxt =>
   4.427 +    SIMPLE_METHOD
   4.428 +      (CHANGED
   4.429 +        (ALLGOALS (asm_full_simp_tac
   4.430 +          (put_simpset HOL_basic_ss ctxt addsimps thms)))))
   4.431 +*} "rewrite all subgoals by given rules"
   4.432 +
   4.433 +notepad
   4.434 +begin
   4.435 +  fix a b c
   4.436 +  assume a: "a = b"
   4.437 +  assume b: "b = c"
   4.438 +  have "a = c" and "c = b" by (my_simp_all a b)
   4.439 +end
   4.440 +
   4.441 +text {* \medskip Apart from explicit arguments, common proof methods
   4.442 +  typically work with a default configuration provided by the context.
   4.443 +  As a shortcut to rule management we use a cheap solution via functor
   4.444 +  @{ML_functor Named_Thms} (see also @{file
   4.445 +  "~~/src/Pure/Tools/named_thms.ML"}).  *}
   4.446 +
   4.447 +ML {*
   4.448 +  structure My_Simps =
   4.449 +    Named_Thms
   4.450 +      (val name = @{binding my_simp} val description = "my_simp rule")
   4.451 +*}
   4.452 +setup My_Simps.setup
   4.453 +
   4.454 +text {* This provides ML access to a list of theorems in canonical
   4.455 +  declaration order via @{ML My_Simps.get}.  The user can add or
   4.456 +  delete rules via the attribute @{attribute my_simp}.  The actual
   4.457 +  proof method is now defined as before, but we append the explicit
   4.458 +  arguments and the rules from the context.  *}
   4.459 +
   4.460 +method_setup my_simp' = {*
   4.461 +  Attrib.thms >> (fn thms => fn ctxt =>
   4.462 +    SIMPLE_METHOD' (fn i =>
   4.463 +      CHANGED (asm_full_simp_tac
   4.464 +        (put_simpset HOL_basic_ss ctxt
   4.465 +          addsimps (thms @ My_Simps.get ctxt)) i)))
   4.466 +*} "rewrite subgoal by given rules and my_simp rules from the context"
   4.467 +
   4.468 +text {*
   4.469 +  \medskip Method @{method my_simp'} can be used in Isar proofs
   4.470 +  like this:
   4.471 +*}
   4.472 +
   4.473 +notepad
   4.474 +begin
   4.475 +  fix a b c
   4.476 +  assume [my_simp]: "a \<equiv> b"
   4.477 +  assume [my_simp]: "b \<equiv> c"
   4.478 +  have "a \<equiv> c" by my_simp'
   4.479 +end
   4.480 +
   4.481 +text {* \medskip The @{method my_simp} variants defined above are
   4.482 +  ``simple'' methods, i.e.\ the goal facts are merely inserted as goal
   4.483 +  premises by the @{ML SIMPLE_METHOD'} or @{ML SIMPLE_METHOD} wrapper.
   4.484 +  For proof methods that are similar to the standard collection of
   4.485 +  @{method simp}, @{method blast}, @{method fast}, @{method auto}
   4.486 +  there is little more that can be done.
   4.487 +
   4.488 +  Note that using the primary goal facts in the same manner as the
   4.489 +  method arguments obtained via concrete syntax or the context does
   4.490 +  not meet the requirement of ``strong emphasis on facts'' of regular
   4.491 +  proof methods, because rewrite rules as used above can be easily
   4.492 +  ignored.  A proof text ``@{command using}~@{text "foo"}~@{command
   4.493 +  "by"}~@{text "my_simp"}'' where @{text "foo"} is not used would
   4.494 +  deceive the reader.
   4.495 +
   4.496 +  \medskip The technical treatment of rules from the context requires
   4.497 +  further attention.  Above we rebuild a fresh @{ML_type simpset} from
   4.498 +  the arguments and \emph{all} rules retrieved from the context on
   4.499 +  every invocation of the method.  This does not scale to really large
   4.500 +  collections of rules, which easily emerges in the context of a big
   4.501 +  theory library, for example.
   4.502 +
   4.503 +  This is an inherent limitation of the simplistic rule management via
   4.504 +  functor @{ML_functor Named_Thms}, because it lacks tool-specific
   4.505 +  storage and retrieval.  More realistic applications require
   4.506 +  efficient index-structures that organize theorems in a customized
   4.507 +  manner, such as a discrimination net that is indexed by the
   4.508 +  left-hand sides of rewrite rules.  For variations on the Simplifier,
   4.509 +  re-use of the existing type @{ML_type simpset} is adequate, but
   4.510 +  scalability would require it be maintained statically within the
   4.511 +  context data, not dynamically on each tool invocation.  *}
   4.512 +
   4.513 +
   4.514 +section {* Attributes \label{sec:attributes} *}
   4.515 +
   4.516 +text {* An \emph{attribute} is a function @{text "context \<times> thm \<rightarrow>
   4.517 +  context \<times> thm"}, which means both a (generic) context and a theorem
   4.518 +  can be modified simultaneously.  In practice this mixed form is very
   4.519 +  rare, instead attributes are presented either as \emph{declaration
   4.520 +  attribute:} @{text "thm \<rightarrow> context \<rightarrow> context"} or \emph{rule
   4.521 +  attribute:} @{text "context \<rightarrow> thm \<rightarrow> thm"}.
   4.522 +
   4.523 +  Attributes can have additional arguments via concrete syntax.  There
   4.524 +  is a collection of context-sensitive parsers for various logical
   4.525 +  entities (types, terms, theorems).  These already take care of
   4.526 +  applying morphisms to the arguments when attribute expressions are
   4.527 +  moved into a different context (see also \secref{sec:morphisms}).
   4.528 +
   4.529 +  When implementing declaration attributes, it is important to operate
   4.530 +  exactly on the variant of the generic context that is provided by
   4.531 +  the system, which is either global theory context or local proof
   4.532 +  context.  In particular, the background theory of a local context
   4.533 +  must not be modified in this situation! *}
   4.534 +
   4.535 +text %mlref {*
   4.536 +  \begin{mldecls}
   4.537 +  @{index_ML_type attribute} \\
   4.538 +  @{index_ML Thm.rule_attribute: "(Context.generic -> thm -> thm) -> attribute"} \\
   4.539 +  @{index_ML Thm.declaration_attribute: "
   4.540 +  (thm -> Context.generic -> Context.generic) -> attribute"} \\
   4.541 +  @{index_ML Attrib.setup: "binding -> attribute context_parser ->
   4.542 +  string -> theory -> theory"} \\
   4.543 +  \end{mldecls}
   4.544 +
   4.545 +  \begin{description}
   4.546 +
   4.547 +  \item Type @{ML_type attribute} represents attributes as concrete
   4.548 +  type alias.
   4.549 +
   4.550 +  \item @{ML Thm.rule_attribute}~@{text "(fn context => rule)"} wraps
   4.551 +  a context-dependent rule (mapping on @{ML_type thm}) as attribute.
   4.552 +
   4.553 +  \item @{ML Thm.declaration_attribute}~@{text "(fn thm => decl)"}
   4.554 +  wraps a theorem-dependent declaration (mapping on @{ML_type
   4.555 +  Context.generic}) as attribute.
   4.556 +
   4.557 +  \item @{ML Attrib.setup}~@{text "name parser description"} provides
   4.558 +  the functionality of the Isar command @{command attribute_setup} as
   4.559 +  ML function.
   4.560 +
   4.561 +  \end{description}
   4.562 +*}
   4.563 +
   4.564 +text %mlantiq {*
   4.565 +  \begin{matharray}{rcl}
   4.566 +  @{ML_antiquotation_def attributes} & : & @{text ML_antiquotation} \\
   4.567 +  \end{matharray}
   4.568 +
   4.569 +  @{rail \<open>
   4.570 +  @@{ML_antiquotation attributes} attributes
   4.571 +  \<close>}
   4.572 +
   4.573 +  \begin{description}
   4.574 +
   4.575 +  \item @{text "@{attributes [\<dots>]}"} embeds attribute source
   4.576 +  representation into the ML text, which is particularly useful with
   4.577 +  declarations like @{ML Local_Theory.note}.  Attribute names are
   4.578 +  internalized at compile time, but the source is unevaluated.  This
   4.579 +  means attributes with formal arguments (types, terms, theorems) may
   4.580 +  be subject to odd effects of dynamic scoping!
   4.581 +
   4.582 +  \end{description}
   4.583 +*}
   4.584 +
   4.585 +text %mlex {* See also @{command attribute_setup} in
   4.586 +  \cite{isabelle-isar-ref} which includes some abstract examples. *}
   4.587 +
   4.588 +end
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/src/Doc/Implementation/Local_Theory.thy	Sat Apr 05 11:37:00 2014 +0200
     5.3 @@ -0,0 +1,168 @@
     5.4 +theory Local_Theory
     5.5 +imports Base
     5.6 +begin
     5.7 +
     5.8 +chapter {* Local theory specifications \label{ch:local-theory} *}
     5.9 +
    5.10 +text {*
    5.11 +  A \emph{local theory} combines aspects of both theory and proof
    5.12 +  context (cf.\ \secref{sec:context}), such that definitional
    5.13 +  specifications may be given relatively to parameters and
    5.14 +  assumptions.  A local theory is represented as a regular proof
    5.15 +  context, augmented by administrative data about the \emph{target
    5.16 +  context}.
    5.17 +
    5.18 +  The target is usually derived from the background theory by adding
    5.19 +  local @{text "\<FIX>"} and @{text "\<ASSUME>"} elements, plus
    5.20 +  suitable modifications of non-logical context data (e.g.\ a special
    5.21 +  type-checking discipline).  Once initialized, the target is ready to
    5.22 +  absorb definitional primitives: @{text "\<DEFINE>"} for terms and
    5.23 +  @{text "\<NOTE>"} for theorems.  Such definitions may get
    5.24 +  transformed in a target-specific way, but the programming interface
    5.25 +  hides such details.
    5.26 +
    5.27 +  Isabelle/Pure provides target mechanisms for locales, type-classes,
    5.28 +  type-class instantiations, and general overloading.  In principle,
    5.29 +  users can implement new targets as well, but this rather arcane
    5.30 +  discipline is beyond the scope of this manual.  In contrast,
    5.31 +  implementing derived definitional packages to be used within a local
    5.32 +  theory context is quite easy: the interfaces are even simpler and
    5.33 +  more abstract than the underlying primitives for raw theories.
    5.34 +
    5.35 +  Many definitional packages for local theories are available in
    5.36 +  Isabelle.  Although a few old packages only work for global
    5.37 +  theories, the standard way of implementing definitional packages in
    5.38 +  Isabelle is via the local theory interface.
    5.39 +*}
    5.40 +
    5.41 +
    5.42 +section {* Definitional elements *}
    5.43 +
    5.44 +text {*
    5.45 +  There are separate elements @{text "\<DEFINE> c \<equiv> t"} for terms, and
    5.46 +  @{text "\<NOTE> b = thm"} for theorems.  Types are treated
    5.47 +  implicitly, according to Hindley-Milner discipline (cf.\
    5.48 +  \secref{sec:variables}).  These definitional primitives essentially
    5.49 +  act like @{text "let"}-bindings within a local context that may
    5.50 +  already contain earlier @{text "let"}-bindings and some initial
    5.51 +  @{text "\<lambda>"}-bindings.  Thus we gain \emph{dependent definitions}
    5.52 +  that are relative to an initial axiomatic context.  The following
    5.53 +  diagram illustrates this idea of axiomatic elements versus
    5.54 +  definitional elements:
    5.55 +
    5.56 +  \begin{center}
    5.57 +  \begin{tabular}{|l|l|l|}
    5.58 +  \hline
    5.59 +  & @{text "\<lambda>"}-binding & @{text "let"}-binding \\
    5.60 +  \hline
    5.61 +  types & fixed @{text "\<alpha>"} & arbitrary @{text "\<beta>"} \\
    5.62 +  terms & @{text "\<FIX> x :: \<tau>"} & @{text "\<DEFINE> c \<equiv> t"} \\
    5.63 +  theorems & @{text "\<ASSUME> a: A"} & @{text "\<NOTE> b = \<^BG>B\<^EN>"} \\
    5.64 +  \hline
    5.65 +  \end{tabular}
    5.66 +  \end{center}
    5.67 +
    5.68 +  A user package merely needs to produce suitable @{text "\<DEFINE>"}
    5.69 +  and @{text "\<NOTE>"} elements according to the application.  For
    5.70 +  example, a package for inductive definitions might first @{text
    5.71 +  "\<DEFINE>"} a certain predicate as some fixed-point construction,
    5.72 +  then @{text "\<NOTE>"} a proven result about monotonicity of the
    5.73 +  functor involved here, and then produce further derived concepts via
    5.74 +  additional @{text "\<DEFINE>"} and @{text "\<NOTE>"} elements.
    5.75 +
    5.76 +  The cumulative sequence of @{text "\<DEFINE>"} and @{text "\<NOTE>"}
    5.77 +  produced at package runtime is managed by the local theory
    5.78 +  infrastructure by means of an \emph{auxiliary context}.  Thus the
    5.79 +  system holds up the impression of working within a fully abstract
    5.80 +  situation with hypothetical entities: @{text "\<DEFINE> c \<equiv> t"}
    5.81 +  always results in a literal fact @{text "\<^BG>c \<equiv> t\<^EN>"}, where
    5.82 +  @{text "c"} is a fixed variable @{text "c"}.  The details about
    5.83 +  global constants, name spaces etc. are handled internally.
    5.84 +
    5.85 +  So the general structure of a local theory is a sandwich of three
    5.86 +  layers:
    5.87 +
    5.88 +  \begin{center}
    5.89 +  \framebox{\quad auxiliary context \quad\framebox{\quad target context \quad\framebox{\quad background theory\quad}}}
    5.90 +  \end{center}
    5.91 +
    5.92 +  When a definitional package is finished, the auxiliary context is
    5.93 +  reset to the target context.  The target now holds definitions for
    5.94 +  terms and theorems that stem from the hypothetical @{text
    5.95 +  "\<DEFINE>"} and @{text "\<NOTE>"} elements, transformed by the
    5.96 +  particular target policy (see \cite[\S4--5]{Haftmann-Wenzel:2009}
    5.97 +  for details).  *}
    5.98 +
    5.99 +text %mlref {*
   5.100 +  \begin{mldecls}
   5.101 +  @{index_ML_type local_theory: Proof.context} \\
   5.102 +  @{index_ML Named_Target.init: "(local_theory -> local_theory) ->
   5.103 +    string -> theory -> local_theory"} \\[1ex]
   5.104 +  @{index_ML Local_Theory.define: "(binding * mixfix) * (Attrib.binding * term) ->
   5.105 +    local_theory -> (term * (string * thm)) * local_theory"} \\
   5.106 +  @{index_ML Local_Theory.note: "Attrib.binding * thm list ->
   5.107 +    local_theory -> (string * thm list) * local_theory"} \\
   5.108 +  \end{mldecls}
   5.109 +
   5.110 +  \begin{description}
   5.111 +
   5.112 +  \item Type @{ML_type local_theory} represents local theories.
   5.113 +  Although this is merely an alias for @{ML_type Proof.context}, it is
   5.114 +  semantically a subtype of the same: a @{ML_type local_theory} holds
   5.115 +  target information as special context data.  Subtyping means that
   5.116 +  any value @{text "lthy:"}~@{ML_type local_theory} can be also used
   5.117 +  with operations on expecting a regular @{text "ctxt:"}~@{ML_type
   5.118 +  Proof.context}.
   5.119 +
   5.120 +  \item @{ML Named_Target.init}~@{text "before_exit name thy"}
   5.121 +  initializes a local theory derived from the given background theory.
   5.122 +  An empty name refers to a \emph{global theory} context, and a
   5.123 +  non-empty name refers to a @{command locale} or @{command class}
   5.124 +  context (a fully-qualified internal name is expected here).  This is
   5.125 +  useful for experimentation --- normally the Isar toplevel already
   5.126 +  takes care to initialize the local theory context.  The given @{text
   5.127 +  "before_exit"} function is invoked before leaving the context; in
   5.128 +  most situations plain identity @{ML I} is sufficient.
   5.129 +
   5.130 +  \item @{ML Local_Theory.define}~@{text "((b, mx), (a, rhs))
   5.131 +  lthy"} defines a local entity according to the specification that is
   5.132 +  given relatively to the current @{text "lthy"} context.  In
   5.133 +  particular the term of the RHS may refer to earlier local entities
   5.134 +  from the auxiliary context, or hypothetical parameters from the
   5.135 +  target context.  The result is the newly defined term (which is
   5.136 +  always a fixed variable with exactly the same name as specified for
   5.137 +  the LHS), together with an equational theorem that states the
   5.138 +  definition as a hypothetical fact.
   5.139 +
   5.140 +  Unless an explicit name binding is given for the RHS, the resulting
   5.141 +  fact will be called @{text "b_def"}.  Any given attributes are
   5.142 +  applied to that same fact --- immediately in the auxiliary context
   5.143 +  \emph{and} in any transformed versions stemming from target-specific
   5.144 +  policies or any later interpretations of results from the target
   5.145 +  context (think of @{command locale} and @{command interpretation},
   5.146 +  for example).  This means that attributes should be usually plain
   5.147 +  declarations such as @{attribute simp}, while non-trivial rules like
   5.148 +  @{attribute simplified} are better avoided.
   5.149 +
   5.150 +  \item @{ML Local_Theory.note}~@{text "(a, ths) lthy"} is
   5.151 +  analogous to @{ML Local_Theory.define}, but defines facts instead of
   5.152 +  terms.  There is also a slightly more general variant @{ML
   5.153 +  Local_Theory.notes} that defines several facts (with attribute
   5.154 +  expressions) simultaneously.
   5.155 +
   5.156 +  This is essentially the internal version of the @{command lemmas}
   5.157 +  command, or @{command declare} if an empty name binding is given.
   5.158 +
   5.159 +  \end{description}
   5.160 +*}
   5.161 +
   5.162 +
   5.163 +section {* Morphisms and declarations \label{sec:morphisms} *}
   5.164 +
   5.165 +text {*
   5.166 +  %FIXME
   5.167 +
   5.168 +  See also \cite{Chaieb-Wenzel:2007}.
   5.169 +*}
   5.170 +
   5.171 +end
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/src/Doc/Implementation/Logic.thy	Sat Apr 05 11:37:00 2014 +0200
     6.3 @@ -0,0 +1,1462 @@
     6.4 +theory Logic
     6.5 +imports Base
     6.6 +begin
     6.7 +
     6.8 +chapter {* Primitive logic \label{ch:logic} *}
     6.9 +
    6.10 +text {*
    6.11 +  The logical foundations of Isabelle/Isar are that of the Pure logic,
    6.12 +  which has been introduced as a Natural Deduction framework in
    6.13 +  \cite{paulson700}.  This is essentially the same logic as ``@{text
    6.14 +  "\<lambda>HOL"}'' in the more abstract setting of Pure Type Systems (PTS)
    6.15 +  \cite{Barendregt-Geuvers:2001}, although there are some key
    6.16 +  differences in the specific treatment of simple types in
    6.17 +  Isabelle/Pure.
    6.18 +
    6.19 +  Following type-theoretic parlance, the Pure logic consists of three
    6.20 +  levels of @{text "\<lambda>"}-calculus with corresponding arrows, @{text
    6.21 +  "\<Rightarrow>"} for syntactic function space (terms depending on terms), @{text
    6.22 +  "\<And>"} for universal quantification (proofs depending on terms), and
    6.23 +  @{text "\<Longrightarrow>"} for implication (proofs depending on proofs).
    6.24 +
    6.25 +  Derivations are relative to a logical theory, which declares type
    6.26 +  constructors, constants, and axioms.  Theory declarations support
    6.27 +  schematic polymorphism, which is strictly speaking outside the
    6.28 +  logic.\footnote{This is the deeper logical reason, why the theory
    6.29 +  context @{text "\<Theta>"} is separate from the proof context @{text "\<Gamma>"}
    6.30 +  of the core calculus: type constructors, term constants, and facts
    6.31 +  (proof constants) may involve arbitrary type schemes, but the type
    6.32 +  of a locally fixed term parameter is also fixed!}
    6.33 +*}
    6.34 +
    6.35 +
    6.36 +section {* Types \label{sec:types} *}
    6.37 +
    6.38 +text {*
    6.39 +  The language of types is an uninterpreted order-sorted first-order
    6.40 +  algebra; types are qualified by ordered type classes.
    6.41 +
    6.42 +  \medskip A \emph{type class} is an abstract syntactic entity
    6.43 +  declared in the theory context.  The \emph{subclass relation} @{text
    6.44 +  "c\<^sub>1 \<subseteq> c\<^sub>2"} is specified by stating an acyclic
    6.45 +  generating relation; the transitive closure is maintained
    6.46 +  internally.  The resulting relation is an ordering: reflexive,
    6.47 +  transitive, and antisymmetric.
    6.48 +
    6.49 +  A \emph{sort} is a list of type classes written as @{text "s = {c\<^sub>1,
    6.50 +  \<dots>, c\<^sub>m}"}, it represents symbolic intersection.  Notationally, the
    6.51 +  curly braces are omitted for singleton intersections, i.e.\ any
    6.52 +  class @{text "c"} may be read as a sort @{text "{c}"}.  The ordering
    6.53 +  on type classes is extended to sorts according to the meaning of
    6.54 +  intersections: @{text "{c\<^sub>1, \<dots> c\<^sub>m} \<subseteq> {d\<^sub>1, \<dots>, d\<^sub>n}"} iff @{text
    6.55 +  "\<forall>j. \<exists>i. c\<^sub>i \<subseteq> d\<^sub>j"}.  The empty intersection @{text "{}"} refers to
    6.56 +  the universal sort, which is the largest element wrt.\ the sort
    6.57 +  order.  Thus @{text "{}"} represents the ``full sort'', not the
    6.58 +  empty one!  The intersection of all (finitely many) classes declared
    6.59 +  in the current theory is the least element wrt.\ the sort ordering.
    6.60 +
    6.61 +  \medskip A \emph{fixed type variable} is a pair of a basic name
    6.62 +  (starting with a @{text "'"} character) and a sort constraint, e.g.\
    6.63 +  @{text "('a, s)"} which is usually printed as @{text "\<alpha>\<^sub>s"}.
    6.64 +  A \emph{schematic type variable} is a pair of an indexname and a
    6.65 +  sort constraint, e.g.\ @{text "(('a, 0), s)"} which is usually
    6.66 +  printed as @{text "?\<alpha>\<^sub>s"}.
    6.67 +
    6.68 +  Note that \emph{all} syntactic components contribute to the identity
    6.69 +  of type variables: basic name, index, and sort constraint.  The core
    6.70 +  logic handles type variables with the same name but different sorts
    6.71 +  as different, although the type-inference layer (which is outside
    6.72 +  the core) rejects anything like that.
    6.73 +
    6.74 +  A \emph{type constructor} @{text "\<kappa>"} is a @{text "k"}-ary operator
    6.75 +  on types declared in the theory.  Type constructor application is
    6.76 +  written postfix as @{text "(\<alpha>\<^sub>1, \<dots>, \<alpha>\<^sub>k)\<kappa>"}.  For
    6.77 +  @{text "k = 0"} the argument tuple is omitted, e.g.\ @{text "prop"}
    6.78 +  instead of @{text "()prop"}.  For @{text "k = 1"} the parentheses
    6.79 +  are omitted, e.g.\ @{text "\<alpha> list"} instead of @{text "(\<alpha>)list"}.
    6.80 +  Further notation is provided for specific constructors, notably the
    6.81 +  right-associative infix @{text "\<alpha> \<Rightarrow> \<beta>"} instead of @{text "(\<alpha>,
    6.82 +  \<beta>)fun"}.
    6.83 +  
    6.84 +  The logical category \emph{type} is defined inductively over type
    6.85 +  variables and type constructors as follows: @{text "\<tau> = \<alpha>\<^sub>s | ?\<alpha>\<^sub>s |
    6.86 +  (\<tau>\<^sub>1, \<dots>, \<tau>\<^sub>k)\<kappa>"}.
    6.87 +
    6.88 +  A \emph{type abbreviation} is a syntactic definition @{text
    6.89 +  "(\<^vec>\<alpha>)\<kappa> = \<tau>"} of an arbitrary type expression @{text "\<tau>"} over
    6.90 +  variables @{text "\<^vec>\<alpha>"}.  Type abbreviations appear as type
    6.91 +  constructors in the syntax, but are expanded before entering the
    6.92 +  logical core.
    6.93 +
    6.94 +  A \emph{type arity} declares the image behavior of a type
    6.95 +  constructor wrt.\ the algebra of sorts: @{text "\<kappa> :: (s\<^sub>1, \<dots>,
    6.96 +  s\<^sub>k)s"} means that @{text "(\<tau>\<^sub>1, \<dots>, \<tau>\<^sub>k)\<kappa>"} is
    6.97 +  of sort @{text "s"} if every argument type @{text "\<tau>\<^sub>i"} is
    6.98 +  of sort @{text "s\<^sub>i"}.  Arity declarations are implicitly
    6.99 +  completed, i.e.\ @{text "\<kappa> :: (\<^vec>s)c"} entails @{text "\<kappa> ::
   6.100 +  (\<^vec>s)c'"} for any @{text "c' \<supseteq> c"}.
   6.101 +
   6.102 +  \medskip The sort algebra is always maintained as \emph{coregular},
   6.103 +  which means that type arities are consistent with the subclass
   6.104 +  relation: for any type constructor @{text "\<kappa>"}, and classes @{text
   6.105 +  "c\<^sub>1 \<subseteq> c\<^sub>2"}, and arities @{text "\<kappa> ::
   6.106 +  (\<^vec>s\<^sub>1)c\<^sub>1"} and @{text "\<kappa> ::
   6.107 +  (\<^vec>s\<^sub>2)c\<^sub>2"} holds @{text "\<^vec>s\<^sub>1 \<subseteq>
   6.108 +  \<^vec>s\<^sub>2"} component-wise.
   6.109 +
   6.110 +  The key property of a coregular order-sorted algebra is that sort
   6.111 +  constraints can be solved in a most general fashion: for each type
   6.112 +  constructor @{text "\<kappa>"} and sort @{text "s"} there is a most general
   6.113 +  vector of argument sorts @{text "(s\<^sub>1, \<dots>, s\<^sub>k)"} such
   6.114 +  that a type scheme @{text "(\<alpha>\<^bsub>s\<^sub>1\<^esub>, \<dots>,
   6.115 +  \<alpha>\<^bsub>s\<^sub>k\<^esub>)\<kappa>"} is of sort @{text "s"}.
   6.116 +  Consequently, type unification has most general solutions (modulo
   6.117 +  equivalence of sorts), so type-inference produces primary types as
   6.118 +  expected \cite{nipkow-prehofer}.
   6.119 +*}
   6.120 +
   6.121 +text %mlref {*
   6.122 +  \begin{mldecls}
   6.123 +  @{index_ML_type class: string} \\
   6.124 +  @{index_ML_type sort: "class list"} \\
   6.125 +  @{index_ML_type arity: "string * sort list * sort"} \\
   6.126 +  @{index_ML_type typ} \\
   6.127 +  @{index_ML Term.map_atyps: "(typ -> typ) -> typ -> typ"} \\
   6.128 +  @{index_ML Term.fold_atyps: "(typ -> 'a -> 'a) -> typ -> 'a -> 'a"} \\
   6.129 +  \end{mldecls}
   6.130 +  \begin{mldecls}
   6.131 +  @{index_ML Sign.subsort: "theory -> sort * sort -> bool"} \\
   6.132 +  @{index_ML Sign.of_sort: "theory -> typ * sort -> bool"} \\
   6.133 +  @{index_ML Sign.add_type: "Proof.context -> binding * int * mixfix -> theory -> theory"} \\
   6.134 +  @{index_ML Sign.add_type_abbrev: "Proof.context ->
   6.135 +  binding * string list * typ -> theory -> theory"} \\
   6.136 +  @{index_ML Sign.primitive_class: "binding * class list -> theory -> theory"} \\
   6.137 +  @{index_ML Sign.primitive_classrel: "class * class -> theory -> theory"} \\
   6.138 +  @{index_ML Sign.primitive_arity: "arity -> theory -> theory"} \\
   6.139 +  \end{mldecls}
   6.140 +
   6.141 +  \begin{description}
   6.142 +
   6.143 +  \item Type @{ML_type class} represents type classes.
   6.144 +
   6.145 +  \item Type @{ML_type sort} represents sorts, i.e.\ finite
   6.146 +  intersections of classes.  The empty list @{ML "[]: sort"} refers to
   6.147 +  the empty class intersection, i.e.\ the ``full sort''.
   6.148 +
   6.149 +  \item Type @{ML_type arity} represents type arities.  A triple
   6.150 +  @{text "(\<kappa>, \<^vec>s, s) : arity"} represents @{text "\<kappa> ::
   6.151 +  (\<^vec>s)s"} as described above.
   6.152 +
   6.153 +  \item Type @{ML_type typ} represents types; this is a datatype with
   6.154 +  constructors @{ML TFree}, @{ML TVar}, @{ML Type}.
   6.155 +
   6.156 +  \item @{ML Term.map_atyps}~@{text "f \<tau>"} applies the mapping @{text
   6.157 +  "f"} to all atomic types (@{ML TFree}, @{ML TVar}) occurring in
   6.158 +  @{text "\<tau>"}.
   6.159 +
   6.160 +  \item @{ML Term.fold_atyps}~@{text "f \<tau>"} iterates the operation
   6.161 +  @{text "f"} over all occurrences of atomic types (@{ML TFree}, @{ML
   6.162 +  TVar}) in @{text "\<tau>"}; the type structure is traversed from left to
   6.163 +  right.
   6.164 +
   6.165 +  \item @{ML Sign.subsort}~@{text "thy (s\<^sub>1, s\<^sub>2)"}
   6.166 +  tests the subsort relation @{text "s\<^sub>1 \<subseteq> s\<^sub>2"}.
   6.167 +
   6.168 +  \item @{ML Sign.of_sort}~@{text "thy (\<tau>, s)"} tests whether type
   6.169 +  @{text "\<tau>"} is of sort @{text "s"}.
   6.170 +
   6.171 +  \item @{ML Sign.add_type}~@{text "ctxt (\<kappa>, k, mx)"} declares a
   6.172 +  new type constructors @{text "\<kappa>"} with @{text "k"} arguments and
   6.173 +  optional mixfix syntax.
   6.174 +
   6.175 +  \item @{ML Sign.add_type_abbrev}~@{text "ctxt (\<kappa>, \<^vec>\<alpha>, \<tau>)"}
   6.176 +  defines a new type abbreviation @{text "(\<^vec>\<alpha>)\<kappa> = \<tau>"}.
   6.177 +
   6.178 +  \item @{ML Sign.primitive_class}~@{text "(c, [c\<^sub>1, \<dots>,
   6.179 +  c\<^sub>n])"} declares a new class @{text "c"}, together with class
   6.180 +  relations @{text "c \<subseteq> c\<^sub>i"}, for @{text "i = 1, \<dots>, n"}.
   6.181 +
   6.182 +  \item @{ML Sign.primitive_classrel}~@{text "(c\<^sub>1,
   6.183 +  c\<^sub>2)"} declares the class relation @{text "c\<^sub>1 \<subseteq>
   6.184 +  c\<^sub>2"}.
   6.185 +
   6.186 +  \item @{ML Sign.primitive_arity}~@{text "(\<kappa>, \<^vec>s, s)"} declares
   6.187 +  the arity @{text "\<kappa> :: (\<^vec>s)s"}.
   6.188 +
   6.189 +  \end{description}
   6.190 +*}
   6.191 +
   6.192 +text %mlantiq {*
   6.193 +  \begin{matharray}{rcl}
   6.194 +  @{ML_antiquotation_def "class"} & : & @{text ML_antiquotation} \\
   6.195 +  @{ML_antiquotation_def "sort"} & : & @{text ML_antiquotation} \\
   6.196 +  @{ML_antiquotation_def "type_name"} & : & @{text ML_antiquotation} \\
   6.197 +  @{ML_antiquotation_def "type_abbrev"} & : & @{text ML_antiquotation} \\
   6.198 +  @{ML_antiquotation_def "nonterminal"} & : & @{text ML_antiquotation} \\
   6.199 +  @{ML_antiquotation_def "typ"} & : & @{text ML_antiquotation} \\
   6.200 +  \end{matharray}
   6.201 +
   6.202 +  @{rail \<open>
   6.203 +  @@{ML_antiquotation class} nameref
   6.204 +  ;
   6.205 +  @@{ML_antiquotation sort} sort
   6.206 +  ;
   6.207 +  (@@{ML_antiquotation type_name} |
   6.208 +   @@{ML_antiquotation type_abbrev} |
   6.209 +   @@{ML_antiquotation nonterminal}) nameref
   6.210 +  ;
   6.211 +  @@{ML_antiquotation typ} type
   6.212 +  \<close>}
   6.213 +
   6.214 +  \begin{description}
   6.215 +
   6.216 +  \item @{text "@{class c}"} inlines the internalized class @{text
   6.217 +  "c"} --- as @{ML_type string} literal.
   6.218 +
   6.219 +  \item @{text "@{sort s}"} inlines the internalized sort @{text "s"}
   6.220 +  --- as @{ML_type "string list"} literal.
   6.221 +
   6.222 +  \item @{text "@{type_name c}"} inlines the internalized type
   6.223 +  constructor @{text "c"} --- as @{ML_type string} literal.
   6.224 +
   6.225 +  \item @{text "@{type_abbrev c}"} inlines the internalized type
   6.226 +  abbreviation @{text "c"} --- as @{ML_type string} literal.
   6.227 +
   6.228 +  \item @{text "@{nonterminal c}"} inlines the internalized syntactic
   6.229 +  type~/ grammar nonterminal @{text "c"} --- as @{ML_type string}
   6.230 +  literal.
   6.231 +
   6.232 +  \item @{text "@{typ \<tau>}"} inlines the internalized type @{text "\<tau>"}
   6.233 +  --- as constructor term for datatype @{ML_type typ}.
   6.234 +
   6.235 +  \end{description}
   6.236 +*}
   6.237 +
   6.238 +
   6.239 +section {* Terms \label{sec:terms} *}
   6.240 +
   6.241 +text {*
   6.242 +  The language of terms is that of simply-typed @{text "\<lambda>"}-calculus
   6.243 +  with de-Bruijn indices for bound variables (cf.\ \cite{debruijn72}
   6.244 +  or \cite{paulson-ml2}), with the types being determined by the
   6.245 +  corresponding binders.  In contrast, free variables and constants
   6.246 +  have an explicit name and type in each occurrence.
   6.247 +
   6.248 +  \medskip A \emph{bound variable} is a natural number @{text "b"},
   6.249 +  which accounts for the number of intermediate binders between the
   6.250 +  variable occurrence in the body and its binding position.  For
   6.251 +  example, the de-Bruijn term @{text "\<lambda>\<^bsub>bool\<^esub>. \<lambda>\<^bsub>bool\<^esub>. 1 \<and> 0"} would
   6.252 +  correspond to @{text "\<lambda>x\<^bsub>bool\<^esub>. \<lambda>y\<^bsub>bool\<^esub>. x \<and> y"} in a named
   6.253 +  representation.  Note that a bound variable may be represented by
   6.254 +  different de-Bruijn indices at different occurrences, depending on
   6.255 +  the nesting of abstractions.
   6.256 +
   6.257 +  A \emph{loose variable} is a bound variable that is outside the
   6.258 +  scope of local binders.  The types (and names) for loose variables
   6.259 +  can be managed as a separate context, that is maintained as a stack
   6.260 +  of hypothetical binders.  The core logic operates on closed terms,
   6.261 +  without any loose variables.
   6.262 +
   6.263 +  A \emph{fixed variable} is a pair of a basic name and a type, e.g.\
   6.264 +  @{text "(x, \<tau>)"} which is usually printed @{text "x\<^sub>\<tau>"} here.  A
   6.265 +  \emph{schematic variable} is a pair of an indexname and a type,
   6.266 +  e.g.\ @{text "((x, 0), \<tau>)"} which is likewise printed as @{text
   6.267 +  "?x\<^sub>\<tau>"}.
   6.268 +
   6.269 +  \medskip A \emph{constant} is a pair of a basic name and a type,
   6.270 +  e.g.\ @{text "(c, \<tau>)"} which is usually printed as @{text "c\<^sub>\<tau>"}
   6.271 +  here.  Constants are declared in the context as polymorphic families
   6.272 +  @{text "c :: \<sigma>"}, meaning that all substitution instances @{text
   6.273 +  "c\<^sub>\<tau>"} for @{text "\<tau> = \<sigma>\<vartheta>"} are valid.
   6.274 +
   6.275 +  The vector of \emph{type arguments} of constant @{text "c\<^sub>\<tau>"} wrt.\
   6.276 +  the declaration @{text "c :: \<sigma>"} is defined as the codomain of the
   6.277 +  matcher @{text "\<vartheta> = {?\<alpha>\<^sub>1 \<mapsto> \<tau>\<^sub>1, \<dots>, ?\<alpha>\<^sub>n \<mapsto> \<tau>\<^sub>n}"} presented in
   6.278 +  canonical order @{text "(\<tau>\<^sub>1, \<dots>, \<tau>\<^sub>n)"}, corresponding to the
   6.279 +  left-to-right occurrences of the @{text "\<alpha>\<^sub>i"} in @{text "\<sigma>"}.
   6.280 +  Within a given theory context, there is a one-to-one correspondence
   6.281 +  between any constant @{text "c\<^sub>\<tau>"} and the application @{text "c(\<tau>\<^sub>1,
   6.282 +  \<dots>, \<tau>\<^sub>n)"} of its type arguments.  For example, with @{text "plus :: \<alpha>
   6.283 +  \<Rightarrow> \<alpha> \<Rightarrow> \<alpha>"}, the instance @{text "plus\<^bsub>nat \<Rightarrow> nat \<Rightarrow> nat\<^esub>"} corresponds to
   6.284 +  @{text "plus(nat)"}.
   6.285 +
   6.286 +  Constant declarations @{text "c :: \<sigma>"} may contain sort constraints
   6.287 +  for type variables in @{text "\<sigma>"}.  These are observed by
   6.288 +  type-inference as expected, but \emph{ignored} by the core logic.
   6.289 +  This means the primitive logic is able to reason with instances of
   6.290 +  polymorphic constants that the user-level type-checker would reject
   6.291 +  due to violation of type class restrictions.
   6.292 +
   6.293 +  \medskip An \emph{atomic term} is either a variable or constant.
   6.294 +  The logical category \emph{term} is defined inductively over atomic
   6.295 +  terms, with abstraction and application as follows: @{text "t = b |
   6.296 +  x\<^sub>\<tau> | ?x\<^sub>\<tau> | c\<^sub>\<tau> | \<lambda>\<^sub>\<tau>. t | t\<^sub>1 t\<^sub>2"}.  Parsing and printing takes care of
   6.297 +  converting between an external representation with named bound
   6.298 +  variables.  Subsequently, we shall use the latter notation instead
   6.299 +  of internal de-Bruijn representation.
   6.300 +
   6.301 +  The inductive relation @{text "t :: \<tau>"} assigns a (unique) type to a
   6.302 +  term according to the structure of atomic terms, abstractions, and
   6.303 +  applicatins:
   6.304 +  \[
   6.305 +  \infer{@{text "a\<^sub>\<tau> :: \<tau>"}}{}
   6.306 +  \qquad
   6.307 +  \infer{@{text "(\<lambda>x\<^sub>\<tau>. t) :: \<tau> \<Rightarrow> \<sigma>"}}{@{text "t :: \<sigma>"}}
   6.308 +  \qquad
   6.309 +  \infer{@{text "t u :: \<sigma>"}}{@{text "t :: \<tau> \<Rightarrow> \<sigma>"} & @{text "u :: \<tau>"}}
   6.310 +  \]
   6.311 +  A \emph{well-typed term} is a term that can be typed according to these rules.
   6.312 +
   6.313 +  Typing information can be omitted: type-inference is able to
   6.314 +  reconstruct the most general type of a raw term, while assigning
   6.315 +  most general types to all of its variables and constants.
   6.316 +  Type-inference depends on a context of type constraints for fixed
   6.317 +  variables, and declarations for polymorphic constants.
   6.318 +
   6.319 +  The identity of atomic terms consists both of the name and the type
   6.320 +  component.  This means that different variables @{text
   6.321 +  "x\<^bsub>\<tau>\<^sub>1\<^esub>"} and @{text "x\<^bsub>\<tau>\<^sub>2\<^esub>"} may become the same after
   6.322 +  type instantiation.  Type-inference rejects variables of the same
   6.323 +  name, but different types.  In contrast, mixed instances of
   6.324 +  polymorphic constants occur routinely.
   6.325 +
   6.326 +  \medskip The \emph{hidden polymorphism} of a term @{text "t :: \<sigma>"}
   6.327 +  is the set of type variables occurring in @{text "t"}, but not in
   6.328 +  its type @{text "\<sigma>"}.  This means that the term implicitly depends
   6.329 +  on type arguments that are not accounted in the result type, i.e.\
   6.330 +  there are different type instances @{text "t\<vartheta> :: \<sigma>"} and
   6.331 +  @{text "t\<vartheta>' :: \<sigma>"} with the same type.  This slightly
   6.332 +  pathological situation notoriously demands additional care.
   6.333 +
   6.334 +  \medskip A \emph{term abbreviation} is a syntactic definition @{text
   6.335 +  "c\<^sub>\<sigma> \<equiv> t"} of a closed term @{text "t"} of type @{text "\<sigma>"},
   6.336 +  without any hidden polymorphism.  A term abbreviation looks like a
   6.337 +  constant in the syntax, but is expanded before entering the logical
   6.338 +  core.  Abbreviations are usually reverted when printing terms, using
   6.339 +  @{text "t \<rightarrow> c\<^sub>\<sigma>"} as rules for higher-order rewriting.
   6.340 +
   6.341 +  \medskip Canonical operations on @{text "\<lambda>"}-terms include @{text
   6.342 +  "\<alpha>\<beta>\<eta>"}-conversion: @{text "\<alpha>"}-conversion refers to capture-free
   6.343 +  renaming of bound variables; @{text "\<beta>"}-conversion contracts an
   6.344 +  abstraction applied to an argument term, substituting the argument
   6.345 +  in the body: @{text "(\<lambda>x. b)a"} becomes @{text "b[a/x]"}; @{text
   6.346 +  "\<eta>"}-conversion contracts vacuous application-abstraction: @{text
   6.347 +  "\<lambda>x. f x"} becomes @{text "f"}, provided that the bound variable
   6.348 +  does not occur in @{text "f"}.
   6.349 +
   6.350 +  Terms are normally treated modulo @{text "\<alpha>"}-conversion, which is
   6.351 +  implicit in the de-Bruijn representation.  Names for bound variables
   6.352 +  in abstractions are maintained separately as (meaningless) comments,
   6.353 +  mostly for parsing and printing.  Full @{text "\<alpha>\<beta>\<eta>"}-conversion is
   6.354 +  commonplace in various standard operations (\secref{sec:obj-rules})
   6.355 +  that are based on higher-order unification and matching.
   6.356 +*}
   6.357 +
   6.358 +text %mlref {*
   6.359 +  \begin{mldecls}
   6.360 +  @{index_ML_type term} \\
   6.361 +  @{index_ML_op "aconv": "term * term -> bool"} \\
   6.362 +  @{index_ML Term.map_types: "(typ -> typ) -> term -> term"} \\
   6.363 +  @{index_ML Term.fold_types: "(typ -> 'a -> 'a) -> term -> 'a -> 'a"} \\
   6.364 +  @{index_ML Term.map_aterms: "(term -> term) -> term -> term"} \\
   6.365 +  @{index_ML Term.fold_aterms: "(term -> 'a -> 'a) -> term -> 'a -> 'a"} \\
   6.366 +  \end{mldecls}
   6.367 +  \begin{mldecls}
   6.368 +  @{index_ML fastype_of: "term -> typ"} \\
   6.369 +  @{index_ML lambda: "term -> term -> term"} \\
   6.370 +  @{index_ML betapply: "term * term -> term"} \\
   6.371 +  @{index_ML incr_boundvars: "int -> term -> term"} \\
   6.372 +  @{index_ML Sign.declare_const: "Proof.context ->
   6.373 +  (binding * typ) * mixfix -> theory -> term * theory"} \\
   6.374 +  @{index_ML Sign.add_abbrev: "string -> binding * term ->
   6.375 +  theory -> (term * term) * theory"} \\
   6.376 +  @{index_ML Sign.const_typargs: "theory -> string * typ -> typ list"} \\
   6.377 +  @{index_ML Sign.const_instance: "theory -> string * typ list -> typ"} \\
   6.378 +  \end{mldecls}
   6.379 +
   6.380 +  \begin{description}
   6.381 +
   6.382 +  \item Type @{ML_type term} represents de-Bruijn terms, with comments
   6.383 +  in abstractions, and explicitly named free variables and constants;
   6.384 +  this is a datatype with constructors @{index_ML Bound}, @{index_ML
   6.385 +  Free}, @{index_ML Var}, @{index_ML Const}, @{index_ML Abs},
   6.386 +  @{index_ML_op "$"}.
   6.387 +
   6.388 +  \item @{text "t"}~@{ML_text aconv}~@{text "u"} checks @{text
   6.389 +  "\<alpha>"}-equivalence of two terms.  This is the basic equality relation
   6.390 +  on type @{ML_type term}; raw datatype equality should only be used
   6.391 +  for operations related to parsing or printing!
   6.392 +
   6.393 +  \item @{ML Term.map_types}~@{text "f t"} applies the mapping @{text
   6.394 +  "f"} to all types occurring in @{text "t"}.
   6.395 +
   6.396 +  \item @{ML Term.fold_types}~@{text "f t"} iterates the operation
   6.397 +  @{text "f"} over all occurrences of types in @{text "t"}; the term
   6.398 +  structure is traversed from left to right.
   6.399 +
   6.400 +  \item @{ML Term.map_aterms}~@{text "f t"} applies the mapping @{text
   6.401 +  "f"} to all atomic terms (@{ML Bound}, @{ML Free}, @{ML Var}, @{ML
   6.402 +  Const}) occurring in @{text "t"}.
   6.403 +
   6.404 +  \item @{ML Term.fold_aterms}~@{text "f t"} iterates the operation
   6.405 +  @{text "f"} over all occurrences of atomic terms (@{ML Bound}, @{ML
   6.406 +  Free}, @{ML Var}, @{ML Const}) in @{text "t"}; the term structure is
   6.407 +  traversed from left to right.
   6.408 +
   6.409 +  \item @{ML fastype_of}~@{text "t"} determines the type of a
   6.410 +  well-typed term.  This operation is relatively slow, despite the
   6.411 +  omission of any sanity checks.
   6.412 +
   6.413 +  \item @{ML lambda}~@{text "a b"} produces an abstraction @{text
   6.414 +  "\<lambda>a. b"}, where occurrences of the atomic term @{text "a"} in the
   6.415 +  body @{text "b"} are replaced by bound variables.
   6.416 +
   6.417 +  \item @{ML betapply}~@{text "(t, u)"} produces an application @{text
   6.418 +  "t u"}, with topmost @{text "\<beta>"}-conversion if @{text "t"} is an
   6.419 +  abstraction.
   6.420 +
   6.421 +  \item @{ML incr_boundvars}~@{text "j"} increments a term's dangling
   6.422 +  bound variables by the offset @{text "j"}.  This is required when
   6.423 +  moving a subterm into a context where it is enclosed by a different
   6.424 +  number of abstractions.  Bound variables with a matching abstraction
   6.425 +  are unaffected.
   6.426 +
   6.427 +  \item @{ML Sign.declare_const}~@{text "ctxt ((c, \<sigma>), mx)"} declares
   6.428 +  a new constant @{text "c :: \<sigma>"} with optional mixfix syntax.
   6.429 +
   6.430 +  \item @{ML Sign.add_abbrev}~@{text "print_mode (c, t)"}
   6.431 +  introduces a new term abbreviation @{text "c \<equiv> t"}.
   6.432 +
   6.433 +  \item @{ML Sign.const_typargs}~@{text "thy (c, \<tau>)"} and @{ML
   6.434 +  Sign.const_instance}~@{text "thy (c, [\<tau>\<^sub>1, \<dots>, \<tau>\<^sub>n])"}
   6.435 +  convert between two representations of polymorphic constants: full
   6.436 +  type instance vs.\ compact type arguments form.
   6.437 +
   6.438 +  \end{description}
   6.439 +*}
   6.440 +
   6.441 +text %mlantiq {*
   6.442 +  \begin{matharray}{rcl}
   6.443 +  @{ML_antiquotation_def "const_name"} & : & @{text ML_antiquotation} \\
   6.444 +  @{ML_antiquotation_def "const_abbrev"} & : & @{text ML_antiquotation} \\
   6.445 +  @{ML_antiquotation_def "const"} & : & @{text ML_antiquotation} \\
   6.446 +  @{ML_antiquotation_def "term"} & : & @{text ML_antiquotation} \\
   6.447 +  @{ML_antiquotation_def "prop"} & : & @{text ML_antiquotation} \\
   6.448 +  \end{matharray}
   6.449 +
   6.450 +  @{rail \<open>
   6.451 +  (@@{ML_antiquotation const_name} |
   6.452 +   @@{ML_antiquotation const_abbrev}) nameref
   6.453 +  ;
   6.454 +  @@{ML_antiquotation const} ('(' (type + ',') ')')?
   6.455 +  ;
   6.456 +  @@{ML_antiquotation term} term
   6.457 +  ;
   6.458 +  @@{ML_antiquotation prop} prop
   6.459 +  \<close>}
   6.460 +
   6.461 +  \begin{description}
   6.462 +
   6.463 +  \item @{text "@{const_name c}"} inlines the internalized logical
   6.464 +  constant name @{text "c"} --- as @{ML_type string} literal.
   6.465 +
   6.466 +  \item @{text "@{const_abbrev c}"} inlines the internalized
   6.467 +  abbreviated constant name @{text "c"} --- as @{ML_type string}
   6.468 +  literal.
   6.469 +
   6.470 +  \item @{text "@{const c(\<^vec>\<tau>)}"} inlines the internalized
   6.471 +  constant @{text "c"} with precise type instantiation in the sense of
   6.472 +  @{ML Sign.const_instance} --- as @{ML Const} constructor term for
   6.473 +  datatype @{ML_type term}.
   6.474 +
   6.475 +  \item @{text "@{term t}"} inlines the internalized term @{text "t"}
   6.476 +  --- as constructor term for datatype @{ML_type term}.
   6.477 +
   6.478 +  \item @{text "@{prop \<phi>}"} inlines the internalized proposition
   6.479 +  @{text "\<phi>"} --- as constructor term for datatype @{ML_type term}.
   6.480 +
   6.481 +  \end{description}
   6.482 +*}
   6.483 +
   6.484 +
   6.485 +section {* Theorems \label{sec:thms} *}
   6.486 +
   6.487 +text {*
   6.488 +  A \emph{proposition} is a well-typed term of type @{text "prop"}, a
   6.489 +  \emph{theorem} is a proven proposition (depending on a context of
   6.490 +  hypotheses and the background theory).  Primitive inferences include
   6.491 +  plain Natural Deduction rules for the primary connectives @{text
   6.492 +  "\<And>"} and @{text "\<Longrightarrow>"} of the framework.  There is also a builtin
   6.493 +  notion of equality/equivalence @{text "\<equiv>"}.
   6.494 +*}
   6.495 +
   6.496 +
   6.497 +subsection {* Primitive connectives and rules \label{sec:prim-rules} *}
   6.498 +
   6.499 +text {*
   6.500 +  The theory @{text "Pure"} contains constant declarations for the
   6.501 +  primitive connectives @{text "\<And>"}, @{text "\<Longrightarrow>"}, and @{text "\<equiv>"} of
   6.502 +  the logical framework, see \figref{fig:pure-connectives}.  The
   6.503 +  derivability judgment @{text "A\<^sub>1, \<dots>, A\<^sub>n \<turnstile> B"} is
   6.504 +  defined inductively by the primitive inferences given in
   6.505 +  \figref{fig:prim-rules}, with the global restriction that the
   6.506 +  hypotheses must \emph{not} contain any schematic variables.  The
   6.507 +  builtin equality is conceptually axiomatized as shown in
   6.508 +  \figref{fig:pure-equality}, although the implementation works
   6.509 +  directly with derived inferences.
   6.510 +
   6.511 +  \begin{figure}[htb]
   6.512 +  \begin{center}
   6.513 +  \begin{tabular}{ll}
   6.514 +  @{text "all :: (\<alpha> \<Rightarrow> prop) \<Rightarrow> prop"} & universal quantification (binder @{text "\<And>"}) \\
   6.515 +  @{text "\<Longrightarrow> :: prop \<Rightarrow> prop \<Rightarrow> prop"} & implication (right associative infix) \\
   6.516 +  @{text "\<equiv> :: \<alpha> \<Rightarrow> \<alpha> \<Rightarrow> prop"} & equality relation (infix) \\
   6.517 +  \end{tabular}
   6.518 +  \caption{Primitive connectives of Pure}\label{fig:pure-connectives}
   6.519 +  \end{center}
   6.520 +  \end{figure}
   6.521 +
   6.522 +  \begin{figure}[htb]
   6.523 +  \begin{center}
   6.524 +  \[
   6.525 +  \infer[@{text "(axiom)"}]{@{text "\<turnstile> A"}}{@{text "A \<in> \<Theta>"}}
   6.526 +  \qquad
   6.527 +  \infer[@{text "(assume)"}]{@{text "A \<turnstile> A"}}{}
   6.528 +  \]
   6.529 +  \[
   6.530 +  \infer[@{text "(\<And>\<hyphen>intro)"}]{@{text "\<Gamma> \<turnstile> \<And>x. B[x]"}}{@{text "\<Gamma> \<turnstile> B[x]"} & @{text "x \<notin> \<Gamma>"}}
   6.531 +  \qquad
   6.532 +  \infer[@{text "(\<And>\<hyphen>elim)"}]{@{text "\<Gamma> \<turnstile> B[a]"}}{@{text "\<Gamma> \<turnstile> \<And>x. B[x]"}}
   6.533 +  \]
   6.534 +  \[
   6.535 +  \infer[@{text "(\<Longrightarrow>\<hyphen>intro)"}]{@{text "\<Gamma> - A \<turnstile> A \<Longrightarrow> B"}}{@{text "\<Gamma> \<turnstile> B"}}
   6.536 +  \qquad
   6.537 +  \infer[@{text "(\<Longrightarrow>\<hyphen>elim)"}]{@{text "\<Gamma>\<^sub>1 \<union> \<Gamma>\<^sub>2 \<turnstile> B"}}{@{text "\<Gamma>\<^sub>1 \<turnstile> A \<Longrightarrow> B"} & @{text "\<Gamma>\<^sub>2 \<turnstile> A"}}
   6.538 +  \]
   6.539 +  \caption{Primitive inferences of Pure}\label{fig:prim-rules}
   6.540 +  \end{center}
   6.541 +  \end{figure}
   6.542 +
   6.543 +  \begin{figure}[htb]
   6.544 +  \begin{center}
   6.545 +  \begin{tabular}{ll}
   6.546 +  @{text "\<turnstile> (\<lambda>x. b[x]) a \<equiv> b[a]"} & @{text "\<beta>"}-conversion \\
   6.547 +  @{text "\<turnstile> x \<equiv> x"} & reflexivity \\
   6.548 +  @{text "\<turnstile> x \<equiv> y \<Longrightarrow> P x \<Longrightarrow> P y"} & substitution \\
   6.549 +  @{text "\<turnstile> (\<And>x. f x \<equiv> g x) \<Longrightarrow> f \<equiv> g"} & extensionality \\
   6.550 +  @{text "\<turnstile> (A \<Longrightarrow> B) \<Longrightarrow> (B \<Longrightarrow> A) \<Longrightarrow> A \<equiv> B"} & logical equivalence \\
   6.551 +  \end{tabular}
   6.552 +  \caption{Conceptual axiomatization of Pure equality}\label{fig:pure-equality}
   6.553 +  \end{center}
   6.554 +  \end{figure}
   6.555 +
   6.556 +  The introduction and elimination rules for @{text "\<And>"} and @{text
   6.557 +  "\<Longrightarrow>"} are analogous to formation of dependently typed @{text
   6.558 +  "\<lambda>"}-terms representing the underlying proof objects.  Proof terms
   6.559 +  are irrelevant in the Pure logic, though; they cannot occur within
   6.560 +  propositions.  The system provides a runtime option to record
   6.561 +  explicit proof terms for primitive inferences, see also
   6.562 +  \secref{sec:proof-terms}.  Thus all three levels of @{text
   6.563 +  "\<lambda>"}-calculus become explicit: @{text "\<Rightarrow>"} for terms, and @{text
   6.564 +  "\<And>/\<Longrightarrow>"} for proofs (cf.\ \cite{Berghofer-Nipkow:2000:TPHOL}).
   6.565 +
   6.566 +  Observe that locally fixed parameters (as in @{text
   6.567 +  "\<And>\<hyphen>intro"}) need not be recorded in the hypotheses, because
   6.568 +  the simple syntactic types of Pure are always inhabitable.
   6.569 +  ``Assumptions'' @{text "x :: \<tau>"} for type-membership are only
   6.570 +  present as long as some @{text "x\<^sub>\<tau>"} occurs in the statement
   6.571 +  body.\footnote{This is the key difference to ``@{text "\<lambda>HOL"}'' in
   6.572 +  the PTS framework \cite{Barendregt-Geuvers:2001}, where hypotheses
   6.573 +  @{text "x : A"} are treated uniformly for propositions and types.}
   6.574 +
   6.575 +  \medskip The axiomatization of a theory is implicitly closed by
   6.576 +  forming all instances of type and term variables: @{text "\<turnstile>
   6.577 +  A\<vartheta>"} holds for any substitution instance of an axiom
   6.578 +  @{text "\<turnstile> A"}.  By pushing substitutions through derivations
   6.579 +  inductively, we also get admissible @{text "generalize"} and @{text
   6.580 +  "instantiate"} rules as shown in \figref{fig:subst-rules}.
   6.581 +
   6.582 +  \begin{figure}[htb]
   6.583 +  \begin{center}
   6.584 +  \[
   6.585 +  \infer{@{text "\<Gamma> \<turnstile> B[?\<alpha>]"}}{@{text "\<Gamma> \<turnstile> B[\<alpha>]"} & @{text "\<alpha> \<notin> \<Gamma>"}}
   6.586 +  \quad
   6.587 +  \infer[\quad@{text "(generalize)"}]{@{text "\<Gamma> \<turnstile> B[?x]"}}{@{text "\<Gamma> \<turnstile> B[x]"} & @{text "x \<notin> \<Gamma>"}}
   6.588 +  \]
   6.589 +  \[
   6.590 +  \infer{@{text "\<Gamma> \<turnstile> B[\<tau>]"}}{@{text "\<Gamma> \<turnstile> B[?\<alpha>]"}}
   6.591 +  \quad
   6.592 +  \infer[\quad@{text "(instantiate)"}]{@{text "\<Gamma> \<turnstile> B[t]"}}{@{text "\<Gamma> \<turnstile> B[?x]"}}
   6.593 +  \]
   6.594 +  \caption{Admissible substitution rules}\label{fig:subst-rules}
   6.595 +  \end{center}
   6.596 +  \end{figure}
   6.597 +
   6.598 +  Note that @{text "instantiate"} does not require an explicit
   6.599 +  side-condition, because @{text "\<Gamma>"} may never contain schematic
   6.600 +  variables.
   6.601 +
   6.602 +  In principle, variables could be substituted in hypotheses as well,
   6.603 +  but this would disrupt the monotonicity of reasoning: deriving
   6.604 +  @{text "\<Gamma>\<vartheta> \<turnstile> B\<vartheta>"} from @{text "\<Gamma> \<turnstile> B"} is
   6.605 +  correct, but @{text "\<Gamma>\<vartheta> \<supseteq> \<Gamma>"} does not necessarily hold:
   6.606 +  the result belongs to a different proof context.
   6.607 +
   6.608 +  \medskip An \emph{oracle} is a function that produces axioms on the
   6.609 +  fly.  Logically, this is an instance of the @{text "axiom"} rule
   6.610 +  (\figref{fig:prim-rules}), but there is an operational difference.
   6.611 +  The system always records oracle invocations within derivations of
   6.612 +  theorems by a unique tag.
   6.613 +
   6.614 +  Axiomatizations should be limited to the bare minimum, typically as
   6.615 +  part of the initial logical basis of an object-logic formalization.
   6.616 +  Later on, theories are usually developed in a strictly definitional
   6.617 +  fashion, by stating only certain equalities over new constants.
   6.618 +
   6.619 +  A \emph{simple definition} consists of a constant declaration @{text
   6.620 +  "c :: \<sigma>"} together with an axiom @{text "\<turnstile> c \<equiv> t"}, where @{text "t
   6.621 +  :: \<sigma>"} is a closed term without any hidden polymorphism.  The RHS
   6.622 +  may depend on further defined constants, but not @{text "c"} itself.
   6.623 +  Definitions of functions may be presented as @{text "c \<^vec>x \<equiv>
   6.624 +  t"} instead of the puristic @{text "c \<equiv> \<lambda>\<^vec>x. t"}.
   6.625 +
   6.626 +  An \emph{overloaded definition} consists of a collection of axioms
   6.627 +  for the same constant, with zero or one equations @{text
   6.628 +  "c((\<^vec>\<alpha>)\<kappa>) \<equiv> t"} for each type constructor @{text "\<kappa>"} (for
   6.629 +  distinct variables @{text "\<^vec>\<alpha>"}).  The RHS may mention
   6.630 +  previously defined constants as above, or arbitrary constants @{text
   6.631 +  "d(\<alpha>\<^sub>i)"} for some @{text "\<alpha>\<^sub>i"} projected from @{text
   6.632 +  "\<^vec>\<alpha>"}.  Thus overloaded definitions essentially work by
   6.633 +  primitive recursion over the syntactic structure of a single type
   6.634 +  argument.  See also \cite[\S4.3]{Haftmann-Wenzel:2006:classes}.
   6.635 +*}
   6.636 +
   6.637 +text %mlref {*
   6.638 +  \begin{mldecls}
   6.639 +  @{index_ML Logic.all: "term -> term -> term"} \\
   6.640 +  @{index_ML Logic.mk_implies: "term * term -> term"} \\
   6.641 +  \end{mldecls}
   6.642 +  \begin{mldecls}
   6.643 +  @{index_ML_type ctyp} \\
   6.644 +  @{index_ML_type cterm} \\
   6.645 +  @{index_ML Thm.ctyp_of: "theory -> typ -> ctyp"} \\
   6.646 +  @{index_ML Thm.cterm_of: "theory -> term -> cterm"} \\
   6.647 +  @{index_ML Thm.apply: "cterm -> cterm -> cterm"} \\
   6.648 +  @{index_ML Thm.lambda: "cterm -> cterm -> cterm"} \\
   6.649 +  @{index_ML Thm.all: "cterm -> cterm -> cterm"} \\
   6.650 +  @{index_ML Drule.mk_implies: "cterm * cterm -> cterm"} \\
   6.651 +  \end{mldecls}
   6.652 +  \begin{mldecls}
   6.653 +  @{index_ML_type thm} \\
   6.654 +  @{index_ML Thm.peek_status: "thm -> {oracle: bool, unfinished: bool, failed: bool}"} \\
   6.655 +  @{index_ML Thm.transfer: "theory -> thm -> thm"} \\
   6.656 +  @{index_ML Thm.assume: "cterm -> thm"} \\
   6.657 +  @{index_ML Thm.forall_intr: "cterm -> thm -> thm"} \\
   6.658 +  @{index_ML Thm.forall_elim: "cterm -> thm -> thm"} \\
   6.659 +  @{index_ML Thm.implies_intr: "cterm -> thm -> thm"} \\
   6.660 +  @{index_ML Thm.implies_elim: "thm -> thm -> thm"} \\
   6.661 +  @{index_ML Thm.generalize: "string list * string list -> int -> thm -> thm"} \\
   6.662 +  @{index_ML Thm.instantiate: "(ctyp * ctyp) list * (cterm * cterm) list -> thm -> thm"} \\
   6.663 +  @{index_ML Thm.add_axiom: "Proof.context ->
   6.664 +  binding * term -> theory -> (string * thm) * theory"} \\
   6.665 +  @{index_ML Thm.add_oracle: "binding * ('a -> cterm) -> theory ->
   6.666 +  (string * ('a -> thm)) * theory"} \\
   6.667 +  @{index_ML Thm.add_def: "Proof.context -> bool -> bool ->
   6.668 +  binding * term -> theory -> (string * thm) * theory"} \\
   6.669 +  \end{mldecls}
   6.670 +  \begin{mldecls}
   6.671 +  @{index_ML Theory.add_deps: "Proof.context -> string ->
   6.672 +  string * typ -> (string * typ) list -> theory -> theory"} \\
   6.673 +  \end{mldecls}
   6.674 +
   6.675 +  \begin{description}
   6.676 +
   6.677 +  \item @{ML Thm.peek_status}~@{text "thm"} informs about the current
   6.678 +  status of the derivation object behind the given theorem.  This is a
   6.679 +  snapshot of a potentially ongoing (parallel) evaluation of proofs.
   6.680 +  The three Boolean values indicate the following: @{verbatim oracle}
   6.681 +  if the finished part contains some oracle invocation; @{verbatim
   6.682 +  unfinished} if some future proofs are still pending; @{verbatim
   6.683 +  failed} if some future proof has failed, rendering the theorem
   6.684 +  invalid!
   6.685 +
   6.686 +  \item @{ML Logic.all}~@{text "a B"} produces a Pure quantification
   6.687 +  @{text "\<And>a. B"}, where occurrences of the atomic term @{text "a"} in
   6.688 +  the body proposition @{text "B"} are replaced by bound variables.
   6.689 +  (See also @{ML lambda} on terms.)
   6.690 +
   6.691 +  \item @{ML Logic.mk_implies}~@{text "(A, B)"} produces a Pure
   6.692 +  implication @{text "A \<Longrightarrow> B"}.
   6.693 +
   6.694 +  \item Types @{ML_type ctyp} and @{ML_type cterm} represent certified
   6.695 +  types and terms, respectively.  These are abstract datatypes that
   6.696 +  guarantee that its values have passed the full well-formedness (and
   6.697 +  well-typedness) checks, relative to the declarations of type
   6.698 +  constructors, constants etc.\ in the background theory.  The
   6.699 +  abstract types @{ML_type ctyp} and @{ML_type cterm} are part of the
   6.700 +  same inference kernel that is mainly responsible for @{ML_type thm}.
   6.701 +  Thus syntactic operations on @{ML_type ctyp} and @{ML_type cterm}
   6.702 +  are located in the @{ML_structure Thm} module, even though theorems are
   6.703 +  not yet involved at that stage.
   6.704 +
   6.705 +  \item @{ML Thm.ctyp_of}~@{text "thy \<tau>"} and @{ML
   6.706 +  Thm.cterm_of}~@{text "thy t"} explicitly checks types and terms,
   6.707 +  respectively.  This also involves some basic normalizations, such
   6.708 +  expansion of type and term abbreviations from the theory context.
   6.709 +  Full re-certification is relatively slow and should be avoided in
   6.710 +  tight reasoning loops.
   6.711 +
   6.712 +  \item @{ML Thm.apply}, @{ML Thm.lambda}, @{ML Thm.all}, @{ML
   6.713 +  Drule.mk_implies} etc.\ compose certified terms (or propositions)
   6.714 +  incrementally.  This is equivalent to @{ML Thm.cterm_of} after
   6.715 +  unchecked @{ML_op "$"}, @{ML lambda}, @{ML Logic.all}, @{ML
   6.716 +  Logic.mk_implies} etc., but there can be a big difference in
   6.717 +  performance when large existing entities are composed by a few extra
   6.718 +  constructions on top.  There are separate operations to decompose
   6.719 +  certified terms and theorems to produce certified terms again.
   6.720 +
   6.721 +  \item Type @{ML_type thm} represents proven propositions.  This is
   6.722 +  an abstract datatype that guarantees that its values have been
   6.723 +  constructed by basic principles of the @{ML_structure Thm} module.
   6.724 +  Every @{ML_type thm} value refers its background theory,
   6.725 +  cf.\ \secref{sec:context-theory}.
   6.726 +
   6.727 +  \item @{ML Thm.transfer}~@{text "thy thm"} transfers the given
   6.728 +  theorem to a \emph{larger} theory, see also \secref{sec:context}.
   6.729 +  This formal adjustment of the background context has no logical
   6.730 +  significance, but is occasionally required for formal reasons, e.g.\
   6.731 +  when theorems that are imported from more basic theories are used in
   6.732 +  the current situation.
   6.733 +
   6.734 +  \item @{ML Thm.assume}, @{ML Thm.forall_intr}, @{ML
   6.735 +  Thm.forall_elim}, @{ML Thm.implies_intr}, and @{ML Thm.implies_elim}
   6.736 +  correspond to the primitive inferences of \figref{fig:prim-rules}.
   6.737 +
   6.738 +  \item @{ML Thm.generalize}~@{text "(\<^vec>\<alpha>, \<^vec>x)"}
   6.739 +  corresponds to the @{text "generalize"} rules of
   6.740 +  \figref{fig:subst-rules}.  Here collections of type and term
   6.741 +  variables are generalized simultaneously, specified by the given
   6.742 +  basic names.
   6.743 +
   6.744 +  \item @{ML Thm.instantiate}~@{text "(\<^vec>\<alpha>\<^sub>s,
   6.745 +  \<^vec>x\<^sub>\<tau>)"} corresponds to the @{text "instantiate"} rules
   6.746 +  of \figref{fig:subst-rules}.  Type variables are substituted before
   6.747 +  term variables.  Note that the types in @{text "\<^vec>x\<^sub>\<tau>"}
   6.748 +  refer to the instantiated versions.
   6.749 +
   6.750 +  \item @{ML Thm.add_axiom}~@{text "ctxt (name, A)"} declares an
   6.751 +  arbitrary proposition as axiom, and retrieves it as a theorem from
   6.752 +  the resulting theory, cf.\ @{text "axiom"} in
   6.753 +  \figref{fig:prim-rules}.  Note that the low-level representation in
   6.754 +  the axiom table may differ slightly from the returned theorem.
   6.755 +
   6.756 +  \item @{ML Thm.add_oracle}~@{text "(binding, oracle)"} produces a named
   6.757 +  oracle rule, essentially generating arbitrary axioms on the fly,
   6.758 +  cf.\ @{text "axiom"} in \figref{fig:prim-rules}.
   6.759 +
   6.760 +  \item @{ML Thm.add_def}~@{text "ctxt unchecked overloaded (name, c
   6.761 +  \<^vec>x \<equiv> t)"} states a definitional axiom for an existing constant
   6.762 +  @{text "c"}.  Dependencies are recorded via @{ML Theory.add_deps},
   6.763 +  unless the @{text "unchecked"} option is set.  Note that the
   6.764 +  low-level representation in the axiom table may differ slightly from
   6.765 +  the returned theorem.
   6.766 +
   6.767 +  \item @{ML Theory.add_deps}~@{text "ctxt name c\<^sub>\<tau> \<^vec>d\<^sub>\<sigma>"}
   6.768 +  declares dependencies of a named specification for constant @{text
   6.769 +  "c\<^sub>\<tau>"}, relative to existing specifications for constants @{text
   6.770 +  "\<^vec>d\<^sub>\<sigma>"}.
   6.771 +
   6.772 +  \end{description}
   6.773 +*}
   6.774 +
   6.775 +
   6.776 +text %mlantiq {*
   6.777 +  \begin{matharray}{rcl}
   6.778 +  @{ML_antiquotation_def "ctyp"} & : & @{text ML_antiquotation} \\
   6.779 +  @{ML_antiquotation_def "cterm"} & : & @{text ML_antiquotation} \\
   6.780 +  @{ML_antiquotation_def "cprop"} & : & @{text ML_antiquotation} \\
   6.781 +  @{ML_antiquotation_def "thm"} & : & @{text ML_antiquotation} \\
   6.782 +  @{ML_antiquotation_def "thms"} & : & @{text ML_antiquotation} \\
   6.783 +  @{ML_antiquotation_def "lemma"} & : & @{text ML_antiquotation} \\
   6.784 +  \end{matharray}
   6.785 +
   6.786 +  @{rail \<open>
   6.787 +  @@{ML_antiquotation ctyp} typ
   6.788 +  ;
   6.789 +  @@{ML_antiquotation cterm} term
   6.790 +  ;
   6.791 +  @@{ML_antiquotation cprop} prop
   6.792 +  ;
   6.793 +  @@{ML_antiquotation thm} thmref
   6.794 +  ;
   6.795 +  @@{ML_antiquotation thms} thmrefs
   6.796 +  ;
   6.797 +  @@{ML_antiquotation lemma} ('(' @'open' ')')? ((prop +) + @'and') \<newline>
   6.798 +    @'by' method method?
   6.799 +  \<close>}
   6.800 +
   6.801 +  \begin{description}
   6.802 +
   6.803 +  \item @{text "@{ctyp \<tau>}"} produces a certified type wrt.\ the
   6.804 +  current background theory --- as abstract value of type @{ML_type
   6.805 +  ctyp}.
   6.806 +
   6.807 +  \item @{text "@{cterm t}"} and @{text "@{cprop \<phi>}"} produce a
   6.808 +  certified term wrt.\ the current background theory --- as abstract
   6.809 +  value of type @{ML_type cterm}.
   6.810 +
   6.811 +  \item @{text "@{thm a}"} produces a singleton fact --- as abstract
   6.812 +  value of type @{ML_type thm}.
   6.813 +
   6.814 +  \item @{text "@{thms a}"} produces a general fact --- as abstract
   6.815 +  value of type @{ML_type "thm list"}.
   6.816 +
   6.817 +  \item @{text "@{lemma \<phi> by meth}"} produces a fact that is proven on
   6.818 +  the spot according to the minimal proof, which imitates a terminal
   6.819 +  Isar proof.  The result is an abstract value of type @{ML_type thm}
   6.820 +  or @{ML_type "thm list"}, depending on the number of propositions
   6.821 +  given here.
   6.822 +
   6.823 +  The internal derivation object lacks a proper theorem name, but it
   6.824 +  is formally closed, unless the @{text "(open)"} option is specified
   6.825 +  (this may impact performance of applications with proof terms).
   6.826 +
   6.827 +  Since ML antiquotations are always evaluated at compile-time, there
   6.828 +  is no run-time overhead even for non-trivial proofs.  Nonetheless,
   6.829 +  the justification is syntactically limited to a single @{command
   6.830 +  "by"} step.  More complex Isar proofs should be done in regular
   6.831 +  theory source, before compiling the corresponding ML text that uses
   6.832 +  the result.
   6.833 +
   6.834 +  \end{description}
   6.835 +
   6.836 +*}
   6.837 +
   6.838 +
   6.839 +subsection {* Auxiliary connectives \label{sec:logic-aux} *}
   6.840 +
   6.841 +text {* Theory @{text "Pure"} provides a few auxiliary connectives
   6.842 +  that are defined on top of the primitive ones, see
   6.843 +  \figref{fig:pure-aux}.  These special constants are useful in
   6.844 +  certain internal encodings, and are normally not directly exposed to
   6.845 +  the user.
   6.846 +
   6.847 +  \begin{figure}[htb]
   6.848 +  \begin{center}
   6.849 +  \begin{tabular}{ll}
   6.850 +  @{text "conjunction :: prop \<Rightarrow> prop \<Rightarrow> prop"} & (infix @{text "&&&"}) \\
   6.851 +  @{text "\<turnstile> A &&& B \<equiv> (\<And>C. (A \<Longrightarrow> B \<Longrightarrow> C) \<Longrightarrow> C)"} \\[1ex]
   6.852 +  @{text "prop :: prop \<Rightarrow> prop"} & (prefix @{text "#"}, suppressed) \\
   6.853 +  @{text "#A \<equiv> A"} \\[1ex]
   6.854 +  @{text "term :: \<alpha> \<Rightarrow> prop"} & (prefix @{text "TERM"}) \\
   6.855 +  @{text "term x \<equiv> (\<And>A. A \<Longrightarrow> A)"} \\[1ex]
   6.856 +  @{text "type :: \<alpha> itself"} & (prefix @{text "TYPE"}) \\
   6.857 +  @{text "(unspecified)"} \\
   6.858 +  \end{tabular}
   6.859 +  \caption{Definitions of auxiliary connectives}\label{fig:pure-aux}
   6.860 +  \end{center}
   6.861 +  \end{figure}
   6.862 +
   6.863 +  The introduction @{text "A \<Longrightarrow> B \<Longrightarrow> A &&& B"}, and eliminations
   6.864 +  (projections) @{text "A &&& B \<Longrightarrow> A"} and @{text "A &&& B \<Longrightarrow> B"} are
   6.865 +  available as derived rules.  Conjunction allows to treat
   6.866 +  simultaneous assumptions and conclusions uniformly, e.g.\ consider
   6.867 +  @{text "A \<Longrightarrow> B \<Longrightarrow> C &&& D"}.  In particular, the goal mechanism
   6.868 +  represents multiple claims as explicit conjunction internally, but
   6.869 +  this is refined (via backwards introduction) into separate sub-goals
   6.870 +  before the user commences the proof; the final result is projected
   6.871 +  into a list of theorems using eliminations (cf.\
   6.872 +  \secref{sec:tactical-goals}).
   6.873 +
   6.874 +  The @{text "prop"} marker (@{text "#"}) makes arbitrarily complex
   6.875 +  propositions appear as atomic, without changing the meaning: @{text
   6.876 +  "\<Gamma> \<turnstile> A"} and @{text "\<Gamma> \<turnstile> #A"} are interchangeable.  See
   6.877 +  \secref{sec:tactical-goals} for specific operations.
   6.878 +
   6.879 +  The @{text "term"} marker turns any well-typed term into a derivable
   6.880 +  proposition: @{text "\<turnstile> TERM t"} holds unconditionally.  Although
   6.881 +  this is logically vacuous, it allows to treat terms and proofs
   6.882 +  uniformly, similar to a type-theoretic framework.
   6.883 +
   6.884 +  The @{text "TYPE"} constructor is the canonical representative of
   6.885 +  the unspecified type @{text "\<alpha> itself"}; it essentially injects the
   6.886 +  language of types into that of terms.  There is specific notation
   6.887 +  @{text "TYPE(\<tau>)"} for @{text "TYPE\<^bsub>\<tau> itself\<^esub>"}.
   6.888 +  Although being devoid of any particular meaning, the term @{text
   6.889 +  "TYPE(\<tau>)"} accounts for the type @{text "\<tau>"} within the term
   6.890 +  language.  In particular, @{text "TYPE(\<alpha>)"} may be used as formal
   6.891 +  argument in primitive definitions, in order to circumvent hidden
   6.892 +  polymorphism (cf.\ \secref{sec:terms}).  For example, @{text "c
   6.893 +  TYPE(\<alpha>) \<equiv> A[\<alpha>]"} defines @{text "c :: \<alpha> itself \<Rightarrow> prop"} in terms of
   6.894 +  a proposition @{text "A"} that depends on an additional type
   6.895 +  argument, which is essentially a predicate on types.
   6.896 +*}
   6.897 +
   6.898 +text %mlref {*
   6.899 +  \begin{mldecls}
   6.900 +  @{index_ML Conjunction.intr: "thm -> thm -> thm"} \\
   6.901 +  @{index_ML Conjunction.elim: "thm -> thm * thm"} \\
   6.902 +  @{index_ML Drule.mk_term: "cterm -> thm"} \\
   6.903 +  @{index_ML Drule.dest_term: "thm -> cterm"} \\
   6.904 +  @{index_ML Logic.mk_type: "typ -> term"} \\
   6.905 +  @{index_ML Logic.dest_type: "term -> typ"} \\
   6.906 +  \end{mldecls}
   6.907 +
   6.908 +  \begin{description}
   6.909 +
   6.910 +  \item @{ML Conjunction.intr} derives @{text "A &&& B"} from @{text
   6.911 +  "A"} and @{text "B"}.
   6.912 +
   6.913 +  \item @{ML Conjunction.elim} derives @{text "A"} and @{text "B"}
   6.914 +  from @{text "A &&& B"}.
   6.915 +
   6.916 +  \item @{ML Drule.mk_term} derives @{text "TERM t"}.
   6.917 +
   6.918 +  \item @{ML Drule.dest_term} recovers term @{text "t"} from @{text
   6.919 +  "TERM t"}.
   6.920 +
   6.921 +  \item @{ML Logic.mk_type}~@{text "\<tau>"} produces the term @{text
   6.922 +  "TYPE(\<tau>)"}.
   6.923 +
   6.924 +  \item @{ML Logic.dest_type}~@{text "TYPE(\<tau>)"} recovers the type
   6.925 +  @{text "\<tau>"}.
   6.926 +
   6.927 +  \end{description}
   6.928 +*}
   6.929 +
   6.930 +
   6.931 +subsection {* Sort hypotheses *}
   6.932 +
   6.933 +text {* Type variables are decorated with sorts, as explained in
   6.934 +  \secref{sec:types}.  This constrains type instantiation to certain
   6.935 +  ranges of types: variable @{text "\<alpha>\<^sub>s"} may only be assigned to types
   6.936 +  @{text "\<tau>"} that belong to sort @{text "s"}.  Within the logic, sort
   6.937 +  constraints act like implicit preconditions on the result @{text
   6.938 +  "\<lparr>\<alpha>\<^sub>1 : s\<^sub>1\<rparr>, \<dots>, \<lparr>\<alpha>\<^sub>n : s\<^sub>n\<rparr>, \<Gamma> \<turnstile> \<phi>"} where the type variables @{text
   6.939 +  "\<alpha>\<^sub>1, \<dots>, \<alpha>\<^sub>n"} cover the propositions @{text "\<Gamma>"}, @{text "\<phi>"}, as
   6.940 +  well as the proof of @{text "\<Gamma> \<turnstile> \<phi>"}.
   6.941 +
   6.942 +  These \emph{sort hypothesis} of a theorem are passed monotonically
   6.943 +  through further derivations.  They are redundant, as long as the
   6.944 +  statement of a theorem still contains the type variables that are
   6.945 +  accounted here.  The logical significance of sort hypotheses is
   6.946 +  limited to the boundary case where type variables disappear from the
   6.947 +  proposition, e.g.\ @{text "\<lparr>\<alpha>\<^sub>s : s\<rparr> \<turnstile> \<phi>"}.  Since such dangling type
   6.948 +  variables can be renamed arbitrarily without changing the
   6.949 +  proposition @{text "\<phi>"}, the inference kernel maintains sort
   6.950 +  hypotheses in anonymous form @{text "s \<turnstile> \<phi>"}.
   6.951 +
   6.952 +  In most practical situations, such extra sort hypotheses may be
   6.953 +  stripped in a final bookkeeping step, e.g.\ at the end of a proof:
   6.954 +  they are typically left over from intermediate reasoning with type
   6.955 +  classes that can be satisfied by some concrete type @{text "\<tau>"} of
   6.956 +  sort @{text "s"} to replace the hypothetical type variable @{text
   6.957 +  "\<alpha>\<^sub>s"}.  *}
   6.958 +
   6.959 +text %mlref {*
   6.960 +  \begin{mldecls}
   6.961 +  @{index_ML Thm.extra_shyps: "thm -> sort list"} \\
   6.962 +  @{index_ML Thm.strip_shyps: "thm -> thm"} \\
   6.963 +  \end{mldecls}
   6.964 +
   6.965 +  \begin{description}
   6.966 +
   6.967 +  \item @{ML Thm.extra_shyps}~@{text "thm"} determines the extraneous
   6.968 +  sort hypotheses of the given theorem, i.e.\ the sorts that are not
   6.969 +  present within type variables of the statement.
   6.970 +
   6.971 +  \item @{ML Thm.strip_shyps}~@{text "thm"} removes any extraneous
   6.972 +  sort hypotheses that can be witnessed from the type signature.
   6.973 +
   6.974 +  \end{description}
   6.975 +*}
   6.976 +
   6.977 +text %mlex {* The following artificial example demonstrates the
   6.978 +  derivation of @{prop False} with a pending sort hypothesis involving
   6.979 +  a logically empty sort.  *}
   6.980 +
   6.981 +class empty =
   6.982 +  assumes bad: "\<And>(x::'a) y. x \<noteq> y"
   6.983 +
   6.984 +theorem (in empty) false: False
   6.985 +  using bad by blast
   6.986 +
   6.987 +ML {*
   6.988 +  @{assert} (Thm.extra_shyps @{thm false} = [@{sort empty}])
   6.989 +*}
   6.990 +
   6.991 +text {* Thanks to the inference kernel managing sort hypothesis
   6.992 +  according to their logical significance, this example is merely an
   6.993 +  instance of \emph{ex falso quodlibet consequitur} --- not a collapse
   6.994 +  of the logical framework! *}
   6.995 +
   6.996 +
   6.997 +section {* Object-level rules \label{sec:obj-rules} *}
   6.998 +
   6.999 +text {*
  6.1000 +  The primitive inferences covered so far mostly serve foundational
  6.1001 +  purposes.  User-level reasoning usually works via object-level rules
  6.1002 +  that are represented as theorems of Pure.  Composition of rules
  6.1003 +  involves \emph{backchaining}, \emph{higher-order unification} modulo
  6.1004 +  @{text "\<alpha>\<beta>\<eta>"}-conversion of @{text "\<lambda>"}-terms, and so-called
  6.1005 +  \emph{lifting} of rules into a context of @{text "\<And>"} and @{text
  6.1006 +  "\<Longrightarrow>"} connectives.  Thus the full power of higher-order Natural
  6.1007 +  Deduction in Isabelle/Pure becomes readily available.
  6.1008 +*}
  6.1009 +
  6.1010 +
  6.1011 +subsection {* Hereditary Harrop Formulae *}
  6.1012 +
  6.1013 +text {*
  6.1014 +  The idea of object-level rules is to model Natural Deduction
  6.1015 +  inferences in the style of Gentzen \cite{Gentzen:1935}, but we allow
  6.1016 +  arbitrary nesting similar to \cite{extensions91}.  The most basic
  6.1017 +  rule format is that of a \emph{Horn Clause}:
  6.1018 +  \[
  6.1019 +  \infer{@{text "A"}}{@{text "A\<^sub>1"} & @{text "\<dots>"} & @{text "A\<^sub>n"}}
  6.1020 +  \]
  6.1021 +  where @{text "A, A\<^sub>1, \<dots>, A\<^sub>n"} are atomic propositions
  6.1022 +  of the framework, usually of the form @{text "Trueprop B"}, where
  6.1023 +  @{text "B"} is a (compound) object-level statement.  This
  6.1024 +  object-level inference corresponds to an iterated implication in
  6.1025 +  Pure like this:
  6.1026 +  \[
  6.1027 +  @{text "A\<^sub>1 \<Longrightarrow> \<dots> A\<^sub>n \<Longrightarrow> A"}
  6.1028 +  \]
  6.1029 +  As an example consider conjunction introduction: @{text "A \<Longrightarrow> B \<Longrightarrow> A \<and>
  6.1030 +  B"}.  Any parameters occurring in such rule statements are
  6.1031 +  conceptionally treated as arbitrary:
  6.1032 +  \[
  6.1033 +  @{text "\<And>x\<^sub>1 \<dots> x\<^sub>m. A\<^sub>1 x\<^sub>1 \<dots> x\<^sub>m \<Longrightarrow> \<dots> A\<^sub>n x\<^sub>1 \<dots> x\<^sub>m \<Longrightarrow> A x\<^sub>1 \<dots> x\<^sub>m"}
  6.1034 +  \]
  6.1035 +
  6.1036 +  Nesting of rules means that the positions of @{text "A\<^sub>i"} may
  6.1037 +  again hold compound rules, not just atomic propositions.
  6.1038 +  Propositions of this format are called \emph{Hereditary Harrop
  6.1039 +  Formulae} in the literature \cite{Miller:1991}.  Here we give an
  6.1040 +  inductive characterization as follows:
  6.1041 +
  6.1042 +  \medskip
  6.1043 +  \begin{tabular}{ll}
  6.1044 +  @{text "\<^bold>x"} & set of variables \\
  6.1045 +  @{text "\<^bold>A"} & set of atomic propositions \\
  6.1046 +  @{text "\<^bold>H  =  \<And>\<^bold>x\<^sup>*. \<^bold>H\<^sup>* \<Longrightarrow> \<^bold>A"} & set of Hereditary Harrop Formulas \\
  6.1047 +  \end{tabular}
  6.1048 +  \medskip
  6.1049 +
  6.1050 +  Thus we essentially impose nesting levels on propositions formed
  6.1051 +  from @{text "\<And>"} and @{text "\<Longrightarrow>"}.  At each level there is a prefix
  6.1052 +  of parameters and compound premises, concluding an atomic
  6.1053 +  proposition.  Typical examples are @{text "\<longrightarrow>"}-introduction @{text
  6.1054 +  "(A \<Longrightarrow> B) \<Longrightarrow> A \<longrightarrow> B"} or mathematical induction @{text "P 0 \<Longrightarrow> (\<And>n. P n
  6.1055 +  \<Longrightarrow> P (Suc n)) \<Longrightarrow> P n"}.  Even deeper nesting occurs in well-founded
  6.1056 +  induction @{text "(\<And>x. (\<And>y. y \<prec> x \<Longrightarrow> P y) \<Longrightarrow> P x) \<Longrightarrow> P x"}, but this
  6.1057 +  already marks the limit of rule complexity that is usually seen in
  6.1058 +  practice.
  6.1059 +
  6.1060 +  \medskip Regular user-level inferences in Isabelle/Pure always
  6.1061 +  maintain the following canonical form of results:
  6.1062 +
  6.1063 +  \begin{itemize}
  6.1064 +
  6.1065 +  \item Normalization by @{text "(A \<Longrightarrow> (\<And>x. B x)) \<equiv> (\<And>x. A \<Longrightarrow> B x)"},
  6.1066 +  which is a theorem of Pure, means that quantifiers are pushed in
  6.1067 +  front of implication at each level of nesting.  The normal form is a
  6.1068 +  Hereditary Harrop Formula.
  6.1069 +
  6.1070 +  \item The outermost prefix of parameters is represented via
  6.1071 +  schematic variables: instead of @{text "\<And>\<^vec>x. \<^vec>H \<^vec>x
  6.1072 +  \<Longrightarrow> A \<^vec>x"} we have @{text "\<^vec>H ?\<^vec>x \<Longrightarrow> A ?\<^vec>x"}.
  6.1073 +  Note that this representation looses information about the order of
  6.1074 +  parameters, and vacuous quantifiers vanish automatically.
  6.1075 +
  6.1076 +  \end{itemize}
  6.1077 +*}
  6.1078 +
  6.1079 +text %mlref {*
  6.1080 +  \begin{mldecls}
  6.1081 +  @{index_ML Simplifier.norm_hhf: "Proof.context -> thm -> thm"} \\
  6.1082 +  \end{mldecls}
  6.1083 +
  6.1084 +  \begin{description}
  6.1085 +
  6.1086 +  \item @{ML Simplifier.norm_hhf}~@{text "ctxt thm"} normalizes the given
  6.1087 +  theorem according to the canonical form specified above.  This is
  6.1088 +  occasionally helpful to repair some low-level tools that do not
  6.1089 +  handle Hereditary Harrop Formulae properly.
  6.1090 +
  6.1091 +  \end{description}
  6.1092 +*}
  6.1093 +
  6.1094 +
  6.1095 +subsection {* Rule composition *}
  6.1096 +
  6.1097 +text {*
  6.1098 +  The rule calculus of Isabelle/Pure provides two main inferences:
  6.1099 +  @{inference resolution} (i.e.\ back-chaining of rules) and
  6.1100 +  @{inference assumption} (i.e.\ closing a branch), both modulo
  6.1101 +  higher-order unification.  There are also combined variants, notably
  6.1102 +  @{inference elim_resolution} and @{inference dest_resolution}.
  6.1103 +
  6.1104 +  To understand the all-important @{inference resolution} principle,
  6.1105 +  we first consider raw @{inference_def composition} (modulo
  6.1106 +  higher-order unification with substitution @{text "\<vartheta>"}):
  6.1107 +  \[
  6.1108 +  \infer[(@{inference_def composition})]{@{text "\<^vec>A\<vartheta> \<Longrightarrow> C\<vartheta>"}}
  6.1109 +  {@{text "\<^vec>A \<Longrightarrow> B"} & @{text "B' \<Longrightarrow> C"} & @{text "B\<vartheta> = B'\<vartheta>"}}
  6.1110 +  \]
  6.1111 +  Here the conclusion of the first rule is unified with the premise of
  6.1112 +  the second; the resulting rule instance inherits the premises of the
  6.1113 +  first and conclusion of the second.  Note that @{text "C"} can again
  6.1114 +  consist of iterated implications.  We can also permute the premises
  6.1115 +  of the second rule back-and-forth in order to compose with @{text
  6.1116 +  "B'"} in any position (subsequently we shall always refer to
  6.1117 +  position 1 w.l.o.g.).
  6.1118 +
  6.1119 +  In @{inference composition} the internal structure of the common
  6.1120 +  part @{text "B"} and @{text "B'"} is not taken into account.  For
  6.1121 +  proper @{inference resolution} we require @{text "B"} to be atomic,
  6.1122 +  and explicitly observe the structure @{text "\<And>\<^vec>x. \<^vec>H
  6.1123 +  \<^vec>x \<Longrightarrow> B' \<^vec>x"} of the premise of the second rule.  The
  6.1124 +  idea is to adapt the first rule by ``lifting'' it into this context,
  6.1125 +  by means of iterated application of the following inferences:
  6.1126 +  \[
  6.1127 +  \infer[(@{inference_def imp_lift})]{@{text "(\<^vec>H \<Longrightarrow> \<^vec>A) \<Longrightarrow> (\<^vec>H \<Longrightarrow> B)"}}{@{text "\<^vec>A \<Longrightarrow> B"}}
  6.1128 +  \]
  6.1129 +  \[
  6.1130 +  \infer[(@{inference_def all_lift})]{@{text "(\<And>\<^vec>x. \<^vec>A (?\<^vec>a \<^vec>x)) \<Longrightarrow> (\<And>\<^vec>x. B (?\<^vec>a \<^vec>x))"}}{@{text "\<^vec>A ?\<^vec>a \<Longrightarrow> B ?\<^vec>a"}}
  6.1131 +  \]
  6.1132 +  By combining raw composition with lifting, we get full @{inference
  6.1133 +  resolution} as follows:
  6.1134 +  \[
  6.1135 +  \infer[(@{inference_def resolution})]
  6.1136 +  {@{text "(\<And>\<^vec>x. \<^vec>H \<^vec>x \<Longrightarrow> \<^vec>A (?\<^vec>a \<^vec>x))\<vartheta> \<Longrightarrow> C\<vartheta>"}}
  6.1137 +  {\begin{tabular}{l}
  6.1138 +    @{text "\<^vec>A ?\<^vec>a \<Longrightarrow> B ?\<^vec>a"} \\
  6.1139 +    @{text "(\<And>\<^vec>x. \<^vec>H \<^vec>x \<Longrightarrow> B' \<^vec>x) \<Longrightarrow> C"} \\
  6.1140 +    @{text "(\<lambda>\<^vec>x. B (?\<^vec>a \<^vec>x))\<vartheta> = B'\<vartheta>"} \\
  6.1141 +   \end{tabular}}
  6.1142 +  \]
  6.1143 +
  6.1144 +  Continued resolution of rules allows to back-chain a problem towards
  6.1145 +  more and sub-problems.  Branches are closed either by resolving with
  6.1146 +  a rule of 0 premises, or by producing a ``short-circuit'' within a
  6.1147 +  solved situation (again modulo unification):
  6.1148 +  \[
  6.1149 +  \infer[(@{inference_def assumption})]{@{text "C\<vartheta>"}}
  6.1150 +  {@{text "(\<And>\<^vec>x. \<^vec>H \<^vec>x \<Longrightarrow> A \<^vec>x) \<Longrightarrow> C"} & @{text "A\<vartheta> = H\<^sub>i\<vartheta>"}~~\text{(for some~@{text i})}}
  6.1151 +  \]
  6.1152 +
  6.1153 +  %FIXME @{inference_def elim_resolution}, @{inference_def dest_resolution}
  6.1154 +*}
  6.1155 +
  6.1156 +text %mlref {*
  6.1157 +  \begin{mldecls}
  6.1158 +  @{index_ML_op "RSN": "thm * (int * thm) -> thm"} \\
  6.1159 +  @{index_ML_op "RS": "thm * thm -> thm"} \\
  6.1160 +
  6.1161 +  @{index_ML_op "RLN": "thm list * (int * thm list) -> thm list"} \\
  6.1162 +  @{index_ML_op "RL": "thm list * thm list -> thm list"} \\
  6.1163 +
  6.1164 +  @{index_ML_op "MRS": "thm list * thm -> thm"} \\
  6.1165 +  @{index_ML_op "OF": "thm * thm list -> thm"} \\
  6.1166 +  \end{mldecls}
  6.1167 +
  6.1168 +  \begin{description}
  6.1169 +
  6.1170 +  \item @{text "rule\<^sub>1 RSN (i, rule\<^sub>2)"} resolves the conclusion of
  6.1171 +  @{text "rule\<^sub>1"} with the @{text i}-th premise of @{text "rule\<^sub>2"},
  6.1172 +  according to the @{inference resolution} principle explained above.
  6.1173 +  Unless there is precisely one resolvent it raises exception @{ML
  6.1174 +  THM}.
  6.1175 +
  6.1176 +  This corresponds to the rule attribute @{attribute THEN} in Isar
  6.1177 +  source language.
  6.1178 +
  6.1179 +  \item @{text "rule\<^sub>1 RS rule\<^sub>2"} abbreviates @{text "rule\<^sub>1 RSN (1,
  6.1180 +  rule\<^sub>2)"}.
  6.1181 +
  6.1182 +  \item @{text "rules\<^sub>1 RLN (i, rules\<^sub>2)"} joins lists of rules.  For
  6.1183 +  every @{text "rule\<^sub>1"} in @{text "rules\<^sub>1"} and @{text "rule\<^sub>2"} in
  6.1184 +  @{text "rules\<^sub>2"}, it resolves the conclusion of @{text "rule\<^sub>1"} with
  6.1185 +  the @{text "i"}-th premise of @{text "rule\<^sub>2"}, accumulating multiple
  6.1186 +  results in one big list.  Note that such strict enumerations of
  6.1187 +  higher-order unifications can be inefficient compared to the lazy
  6.1188 +  variant seen in elementary tactics like @{ML resolve_tac}.
  6.1189 +
  6.1190 +  \item @{text "rules\<^sub>1 RL rules\<^sub>2"} abbreviates @{text "rules\<^sub>1 RLN (1,
  6.1191 +  rules\<^sub>2)"}.
  6.1192 +
  6.1193 +  \item @{text "[rule\<^sub>1, \<dots>, rule\<^sub>n] MRS rule"} resolves @{text "rule\<^sub>i"}
  6.1194 +  against premise @{text "i"} of @{text "rule"}, for @{text "i = n, \<dots>,
  6.1195 +  1"}.  By working from right to left, newly emerging premises are
  6.1196 +  concatenated in the result, without interfering.
  6.1197 +
  6.1198 +  \item @{text "rule OF rules"} is an alternative notation for @{text
  6.1199 +  "rules MRS rule"}, which makes rule composition look more like
  6.1200 +  function application.  Note that the argument @{text "rules"} need
  6.1201 +  not be atomic.
  6.1202 +
  6.1203 +  This corresponds to the rule attribute @{attribute OF} in Isar
  6.1204 +  source language.
  6.1205 +
  6.1206 +  \end{description}
  6.1207 +*}
  6.1208 +
  6.1209 +
  6.1210 +section {* Proof terms \label{sec:proof-terms} *}
  6.1211 +
  6.1212 +text {* The Isabelle/Pure inference kernel can record the proof of
  6.1213 +  each theorem as a proof term that contains all logical inferences in
  6.1214 +  detail.  Rule composition by resolution (\secref{sec:obj-rules}) and
  6.1215 +  type-class reasoning is broken down to primitive rules of the
  6.1216 +  logical framework.  The proof term can be inspected by a separate
  6.1217 +  proof-checker, for example.
  6.1218 +
  6.1219 +  According to the well-known \emph{Curry-Howard isomorphism}, a proof
  6.1220 +  can be viewed as a @{text "\<lambda>"}-term. Following this idea, proofs in
  6.1221 +  Isabelle are internally represented by a datatype similar to the one
  6.1222 +  for terms described in \secref{sec:terms}.  On top of these
  6.1223 +  syntactic terms, two more layers of @{text "\<lambda>"}-calculus are added,
  6.1224 +  which correspond to @{text "\<And>x :: \<alpha>. B x"} and @{text "A \<Longrightarrow> B"}
  6.1225 +  according to the propositions-as-types principle.  The resulting
  6.1226 +  3-level @{text "\<lambda>"}-calculus resembles ``@{text "\<lambda>HOL"}'' in the
  6.1227 +  more abstract setting of Pure Type Systems (PTS)
  6.1228 +  \cite{Barendregt-Geuvers:2001}, if some fine points like schematic
  6.1229 +  polymorphism and type classes are ignored.
  6.1230 +
  6.1231 +  \medskip\emph{Proof abstractions} of the form @{text "\<^bold>\<lambda>x :: \<alpha>. prf"}
  6.1232 +  or @{text "\<^bold>\<lambda>p : A. prf"} correspond to introduction of @{text
  6.1233 +  "\<And>"}/@{text "\<Longrightarrow>"}, and \emph{proof applications} of the form @{text
  6.1234 +  "p \<cdot> t"} or @{text "p \<bullet> q"} correspond to elimination of @{text
  6.1235 +  "\<And>"}/@{text "\<Longrightarrow>"}.  Actual types @{text "\<alpha>"}, propositions @{text
  6.1236 +  "A"}, and terms @{text "t"} might be suppressed and reconstructed
  6.1237 +  from the overall proof term.
  6.1238 +
  6.1239 +  \medskip Various atomic proofs indicate special situations within
  6.1240 +  the proof construction as follows.
  6.1241 +
  6.1242 +  A \emph{bound proof variable} is a natural number @{text "b"} that
  6.1243 +  acts as de-Bruijn index for proof term abstractions.
  6.1244 +
  6.1245 +  A \emph{minimal proof} ``@{text "?"}'' is a dummy proof term.  This
  6.1246 +  indicates some unrecorded part of the proof.
  6.1247 +
  6.1248 +  @{text "Hyp A"} refers to some pending hypothesis by giving its
  6.1249 +  proposition.  This indicates an open context of implicit hypotheses,
  6.1250 +  similar to loose bound variables or free variables within a term
  6.1251 +  (\secref{sec:terms}).
  6.1252 +
  6.1253 +  An \emph{axiom} or \emph{oracle} @{text "a : A[\<^vec>\<tau>]"} refers
  6.1254 +  some postulated @{text "proof constant"}, which is subject to
  6.1255 +  schematic polymorphism of theory content, and the particular type
  6.1256 +  instantiation may be given explicitly.  The vector of types @{text
  6.1257 +  "\<^vec>\<tau>"} refers to the schematic type variables in the generic
  6.1258 +  proposition @{text "A"} in canonical order.
  6.1259 +
  6.1260 +  A \emph{proof promise} @{text "a : A[\<^vec>\<tau>]"} is a placeholder
  6.1261 +  for some proof of polymorphic proposition @{text "A"}, with explicit
  6.1262 +  type instantiation as given by the vector @{text "\<^vec>\<tau>"}, as
  6.1263 +  above.  Unlike axioms or oracles, proof promises may be
  6.1264 +  \emph{fulfilled} eventually, by substituting @{text "a"} by some
  6.1265 +  particular proof @{text "q"} at the corresponding type instance.
  6.1266 +  This acts like Hindley-Milner @{text "let"}-polymorphism: a generic
  6.1267 +  local proof definition may get used at different type instances, and
  6.1268 +  is replaced by the concrete instance eventually.
  6.1269 +
  6.1270 +  A \emph{named theorem} wraps up some concrete proof as a closed
  6.1271 +  formal entity, in the manner of constant definitions for proof
  6.1272 +  terms.  The \emph{proof body} of such boxed theorems involves some
  6.1273 +  digest about oracles and promises occurring in the original proof.
  6.1274 +  This allows the inference kernel to manage this critical information
  6.1275 +  without the full overhead of explicit proof terms.
  6.1276 +*}
  6.1277 +
  6.1278 +
  6.1279 +subsection {* Reconstructing and checking proof terms *}
  6.1280 +
  6.1281 +text {* Fully explicit proof terms can be large, but most of this
  6.1282 +  information is redundant and can be reconstructed from the context.
  6.1283 +  Therefore, the Isabelle/Pure inference kernel records only
  6.1284 +  \emph{implicit} proof terms, by omitting all typing information in
  6.1285 +  terms, all term and type labels of proof abstractions, and some
  6.1286 +  argument terms of applications @{text "p \<cdot> t"} (if possible).
  6.1287 +
  6.1288 +  There are separate operations to reconstruct the full proof term
  6.1289 +  later on, using \emph{higher-order pattern unification}
  6.1290 +  \cite{nipkow-patterns,Berghofer-Nipkow:2000:TPHOL}.
  6.1291 +
  6.1292 +  The \emph{proof checker} expects a fully reconstructed proof term,
  6.1293 +  and can turn it into a theorem by replaying its primitive inferences
  6.1294 +  within the kernel.  *}
  6.1295 +
  6.1296 +
  6.1297 +subsection {* Concrete syntax of proof terms *}
  6.1298 +
  6.1299 +text {* The concrete syntax of proof terms is a slight extension of
  6.1300 +  the regular inner syntax of Isabelle/Pure \cite{isabelle-isar-ref}.
  6.1301 +  Its main syntactic category @{syntax (inner) proof} is defined as
  6.1302 +  follows:
  6.1303 +
  6.1304 +  \begin{center}
  6.1305 +  \begin{supertabular}{rclr}
  6.1306 +
  6.1307 +  @{syntax_def (inner) proof} & = & @{verbatim Lam} @{text params} @{verbatim "."} @{text proof} \\
  6.1308 +    & @{text "|"} & @{text "\<^bold>\<lambda>"} @{text "params"} @{verbatim "."} @{text proof} \\
  6.1309 +    & @{text "|"} & @{text proof} @{verbatim "%"} @{text any} \\
  6.1310 +    & @{text "|"} & @{text proof} @{text "\<cdot>"} @{text any} \\
  6.1311 +    & @{text "|"} & @{text proof} @{verbatim "%%"} @{text proof} \\
  6.1312 +    & @{text "|"} & @{text proof} @{text "\<bullet>"} @{text proof} \\
  6.1313 +    & @{text "|"} & @{text "id  |  longid"} \\
  6.1314 +  \\
  6.1315 +
  6.1316 +  @{text param} & = & @{text idt} \\
  6.1317 +    & @{text "|"} & @{text idt} @{verbatim ":"} @{text prop} \\
  6.1318 +    & @{text "|"} & @{verbatim "("} @{text param} @{verbatim ")"} \\
  6.1319 +  \\
  6.1320 +
  6.1321 +  @{text params} & = & @{text param} \\
  6.1322 +    & @{text "|"} & @{text param} @{text params} \\
  6.1323 +
  6.1324 +  \end{supertabular}
  6.1325 +  \end{center}
  6.1326 +
  6.1327 +  Implicit term arguments in partial proofs are indicated by ``@{text
  6.1328 +  "_"}''.  Type arguments for theorems and axioms may be specified
  6.1329 +  using @{text "p \<cdot> TYPE(type)"} (they must appear before any other
  6.1330 +  term argument of a theorem or axiom, but may be omitted altogether).
  6.1331 +
  6.1332 +  \medskip There are separate read and print operations for proof
  6.1333 +  terms, in order to avoid conflicts with the regular term language.
  6.1334 +*}
  6.1335 +
  6.1336 +text %mlref {*
  6.1337 +  \begin{mldecls}
  6.1338 +  @{index_ML_type proof} \\
  6.1339 +  @{index_ML_type proof_body} \\
  6.1340 +  @{index_ML proofs: "int Unsynchronized.ref"} \\
  6.1341 +  @{index_ML Reconstruct.reconstruct_proof:
  6.1342 +  "theory -> term -> proof -> proof"} \\
  6.1343 +  @{index_ML Reconstruct.expand_proof: "theory ->
  6.1344 +  (string * term option) list -> proof -> proof"} \\
  6.1345 +  @{index_ML Proof_Checker.thm_of_proof: "theory -> proof -> thm"} \\
  6.1346 +  @{index_ML Proof_Syntax.read_proof: "theory -> bool -> bool -> string -> proof"} \\
  6.1347 +  @{index_ML Proof_Syntax.pretty_proof: "Proof.context -> proof -> Pretty.T"} \\
  6.1348 +  \end{mldecls}
  6.1349 +
  6.1350 +  \begin{description}
  6.1351 +
  6.1352 +  \item Type @{ML_type proof} represents proof terms; this is a
  6.1353 +  datatype with constructors @{index_ML Abst}, @{index_ML AbsP},
  6.1354 +  @{index_ML_op "%"}, @{index_ML_op "%%"}, @{index_ML PBound},
  6.1355 +  @{index_ML MinProof}, @{index_ML Hyp}, @{index_ML PAxm}, @{index_ML
  6.1356 +  Oracle}, @{index_ML Promise}, @{index_ML PThm} as explained above.
  6.1357 +  %FIXME OfClass (!?)
  6.1358 +
  6.1359 +  \item Type @{ML_type proof_body} represents the nested proof
  6.1360 +  information of a named theorem, consisting of a digest of oracles
  6.1361 +  and named theorem over some proof term.  The digest only covers the
  6.1362 +  directly visible part of the proof: in order to get the full
  6.1363 +  information, the implicit graph of nested theorems needs to be
  6.1364 +  traversed (e.g.\ using @{ML Proofterm.fold_body_thms}).
  6.1365 +
  6.1366 +  \item @{ML Thm.proof_of}~@{text "thm"} and @{ML
  6.1367 +  Thm.proof_body_of}~@{text "thm"} produce the proof term or proof
  6.1368 +  body (with digest of oracles and theorems) from a given theorem.
  6.1369 +  Note that this involves a full join of internal futures that fulfill
  6.1370 +  pending proof promises, and thus disrupts the natural bottom-up
  6.1371 +  construction of proofs by introducing dynamic ad-hoc dependencies.
  6.1372 +  Parallel performance may suffer by inspecting proof terms at
  6.1373 +  run-time.
  6.1374 +
  6.1375 +  \item @{ML proofs} specifies the detail of proof recording within
  6.1376 +  @{ML_type thm} values produced by the inference kernel: @{ML 0}
  6.1377 +  records only the names of oracles, @{ML 1} records oracle names and
  6.1378 +  propositions, @{ML 2} additionally records full proof terms.
  6.1379 +  Officially named theorems that contribute to a result are recorded
  6.1380 +  in any case.
  6.1381 +
  6.1382 +  \item @{ML Reconstruct.reconstruct_proof}~@{text "thy prop prf"}
  6.1383 +  turns the implicit proof term @{text "prf"} into a full proof of the
  6.1384 +  given proposition.
  6.1385 +
  6.1386 +  Reconstruction may fail if @{text "prf"} is not a proof of @{text
  6.1387 +  "prop"}, or if it does not contain sufficient information for
  6.1388 +  reconstruction.  Failure may only happen for proofs that are
  6.1389 +  constructed manually, but not for those produced automatically by
  6.1390 +  the inference kernel.
  6.1391 +
  6.1392 +  \item @{ML Reconstruct.expand_proof}~@{text "thy [thm\<^sub>1, \<dots>, thm\<^sub>n]
  6.1393 +  prf"} expands and reconstructs the proofs of all specified theorems,
  6.1394 +  with the given (full) proof.  Theorems that are not unique specified
  6.1395 +  via their name may be disambiguated by giving their proposition.
  6.1396 +
  6.1397 +  \item @{ML Proof_Checker.thm_of_proof}~@{text "thy prf"} turns the
  6.1398 +  given (full) proof into a theorem, by replaying it using only
  6.1399 +  primitive rules of the inference kernel.
  6.1400 +
  6.1401 +  \item @{ML Proof_Syntax.read_proof}~@{text "thy b\<^sub>1 b\<^sub>2 s"} reads in a
  6.1402 +  proof term. The Boolean flags indicate the use of sort and type
  6.1403 +  information.  Usually, typing information is left implicit and is
  6.1404 +  inferred during proof reconstruction.  %FIXME eliminate flags!?
  6.1405 +
  6.1406 +  \item @{ML Proof_Syntax.pretty_proof}~@{text "ctxt prf"}
  6.1407 +  pretty-prints the given proof term.
  6.1408 +
  6.1409 +  \end{description}
  6.1410 +*}
  6.1411 +
  6.1412 +text %mlex {* Detailed proof information of a theorem may be retrieved
  6.1413 +  as follows: *}
  6.1414 +
  6.1415 +lemma ex: "A \<and> B \<longrightarrow> B \<and> A"
  6.1416 +proof
  6.1417 +  assume "A \<and> B"
  6.1418 +  then obtain B and A ..
  6.1419 +  then show "B \<and> A" ..
  6.1420 +qed
  6.1421 +
  6.1422 +ML_val {*
  6.1423 +  (*proof body with digest*)
  6.1424 +  val body = Proofterm.strip_thm (Thm.proof_body_of @{thm ex});
  6.1425 +
  6.1426 +  (*proof term only*)
  6.1427 +  val prf = Proofterm.proof_of body;
  6.1428 +  Pretty.writeln (Proof_Syntax.pretty_proof @{context} prf);
  6.1429 +
  6.1430 +  (*all theorems used in the graph of nested proofs*)
  6.1431 +  val all_thms =
  6.1432 +    Proofterm.fold_body_thms
  6.1433 +      (fn (name, _, _) => insert (op =) name) [body] [];
  6.1434 +*}
  6.1435 +
  6.1436 +text {* The result refers to various basic facts of Isabelle/HOL:
  6.1437 +  @{thm [source] HOL.impI}, @{thm [source] HOL.conjE}, @{thm [source]
  6.1438 +  HOL.conjI} etc.  The combinator @{ML Proofterm.fold_body_thms}
  6.1439 +  recursively explores the graph of the proofs of all theorems being
  6.1440 +  used here.
  6.1441 +
  6.1442 +  \medskip Alternatively, we may produce a proof term manually, and
  6.1443 +  turn it into a theorem as follows: *}
  6.1444 +
  6.1445 +ML_val {*
  6.1446 +  val thy = @{theory};
  6.1447 +  val prf =
  6.1448 +    Proof_Syntax.read_proof thy true false
  6.1449 +      "impI \<cdot> _ \<cdot> _ \<bullet> \
  6.1450 +      \   (\<^bold>\<lambda>H: _. \
  6.1451 +      \     conjE \<cdot> _ \<cdot> _ \<cdot> _ \<bullet> H \<bullet> \
  6.1452 +      \       (\<^bold>\<lambda>(H: _) Ha: _. conjI \<cdot> _ \<cdot> _ \<bullet> Ha \<bullet> H))";
  6.1453 +  val thm =
  6.1454 +    prf
  6.1455 +    |> Reconstruct.reconstruct_proof thy @{prop "A \<and> B \<longrightarrow> B \<and> A"}
  6.1456 +    |> Proof_Checker.thm_of_proof thy
  6.1457 +    |> Drule.export_without_context;
  6.1458 +*}
  6.1459 +
  6.1460 +text {* \medskip See also @{file "~~/src/HOL/Proofs/ex/XML_Data.thy"}
  6.1461 +  for further examples, with export and import of proof terms via
  6.1462 +  XML/ML data representation.
  6.1463 +*}
  6.1464 +
  6.1465 +end
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/src/Doc/Implementation/ML.thy	Sat Apr 05 11:37:00 2014 +0200
     7.3 @@ -0,0 +1,2116 @@
     7.4 +theory "ML"
     7.5 +imports Base
     7.6 +begin
     7.7 +
     7.8 +chapter {* Isabelle/ML *}
     7.9 +
    7.10 +text {* Isabelle/ML is best understood as a certain culture based on
    7.11 +  Standard ML.  Thus it is not a new programming language, but a
    7.12 +  certain way to use SML at an advanced level within the Isabelle
    7.13 +  environment.  This covers a variety of aspects that are geared
    7.14 +  towards an efficient and robust platform for applications of formal
    7.15 +  logic with fully foundational proof construction --- according to
    7.16 +  the well-known \emph{LCF principle}.  There is specific
    7.17 +  infrastructure with library modules to address the needs of this
    7.18 +  difficult task.  For example, the raw parallel programming model of
    7.19 +  Poly/ML is presented as considerably more abstract concept of
    7.20 +  \emph{future values}, which is then used to augment the inference
    7.21 +  kernel, proof interpreter, and theory loader accordingly.
    7.22 +
    7.23 +  The main aspects of Isabelle/ML are introduced below.  These
    7.24 +  first-hand explanations should help to understand how proper
    7.25 +  Isabelle/ML is to be read and written, and to get access to the
    7.26 +  wealth of experience that is expressed in the source text and its
    7.27 +  history of changes.\footnote{See
    7.28 +  @{url "http://isabelle.in.tum.de/repos/isabelle"} for the full
    7.29 +  Mercurial history.  There are symbolic tags to refer to official
    7.30 +  Isabelle releases, as opposed to arbitrary \emph{tip} versions that
    7.31 +  merely reflect snapshots that are never really up-to-date.}  *}
    7.32 +
    7.33 +
    7.34 +section {* Style and orthography *}
    7.35 +
    7.36 +text {* The sources of Isabelle/Isar are optimized for
    7.37 +  \emph{readability} and \emph{maintainability}.  The main purpose is
    7.38 +  to tell an informed reader what is really going on and how things
    7.39 +  really work.  This is a non-trivial aim, but it is supported by a
    7.40 +  certain style of writing Isabelle/ML that has emerged from long
    7.41 +  years of system development.\footnote{See also the interesting style
    7.42 +  guide for OCaml
    7.43 +  @{url "http://caml.inria.fr/resources/doc/guides/guidelines.en.html"}
    7.44 +  which shares many of our means and ends.}
    7.45 +
    7.46 +  The main principle behind any coding style is \emph{consistency}.
    7.47 +  For a single author of a small program this merely means ``choose
    7.48 +  your style and stick to it''.  A complex project like Isabelle, with
    7.49 +  long years of development and different contributors, requires more
    7.50 +  standardization.  A coding style that is changed every few years or
    7.51 +  with every new contributor is no style at all, because consistency
    7.52 +  is quickly lost.  Global consistency is hard to achieve, though.
    7.53 +  Nonetheless, one should always strive at least for local consistency
    7.54 +  of modules and sub-systems, without deviating from some general
    7.55 +  principles how to write Isabelle/ML.
    7.56 +
    7.57 +  In a sense, good coding style is like an \emph{orthography} for the
    7.58 +  sources: it helps to read quickly over the text and see through the
    7.59 +  main points, without getting distracted by accidental presentation
    7.60 +  of free-style code.
    7.61 +*}
    7.62 +
    7.63 +
    7.64 +subsection {* Header and sectioning *}
    7.65 +
    7.66 +text {* Isabelle source files have a certain standardized header
    7.67 +  format (with precise spacing) that follows ancient traditions
    7.68 +  reaching back to the earliest versions of the system by Larry
    7.69 +  Paulson.  See @{file "~~/src/Pure/thm.ML"}, for example.
    7.70 +
    7.71 +  The header includes at least @{verbatim Title} and @{verbatim
    7.72 +  Author} entries, followed by a prose description of the purpose of
    7.73 +  the module.  The latter can range from a single line to several
    7.74 +  paragraphs of explanations.
    7.75 +
    7.76 +  The rest of the file is divided into sections, subsections,
    7.77 +  subsubsections, paragraphs etc.\ using a simple layout via ML
    7.78 +  comments as follows.
    7.79 +
    7.80 +\begin{verbatim}
    7.81 +(*** section ***)
    7.82 +
    7.83 +(** subsection **)
    7.84 +
    7.85 +(* subsubsection *)
    7.86 +
    7.87 +(*short paragraph*)
    7.88 +
    7.89 +(*
    7.90 +  long paragraph,
    7.91 +  with more text
    7.92 +*)
    7.93 +\end{verbatim}
    7.94 +
    7.95 +  As in regular typography, there is some extra space \emph{before}
    7.96 +  section headings that are adjacent to plain text (not other headings
    7.97 +  as in the example above).
    7.98 +
    7.99 +  \medskip The precise wording of the prose text given in these
   7.100 +  headings is chosen carefully to introduce the main theme of the
   7.101 +  subsequent formal ML text.
   7.102 +*}
   7.103 +
   7.104 +
   7.105 +subsection {* Naming conventions *}
   7.106 +
   7.107 +text {* Since ML is the primary medium to express the meaning of the
   7.108 +  source text, naming of ML entities requires special care.
   7.109 +
   7.110 +  \paragraph{Notation.}  A name consists of 1--3 \emph{words} (rarely
   7.111 +  4, but not more) that are separated by underscore.  There are three
   7.112 +  variants concerning upper or lower case letters, which are used for
   7.113 +  certain ML categories as follows:
   7.114 +
   7.115 +  \medskip
   7.116 +  \begin{tabular}{lll}
   7.117 +  variant & example & ML categories \\\hline
   7.118 +  lower-case & @{ML_text foo_bar} & values, types, record fields \\
   7.119 +  capitalized & @{ML_text Foo_Bar} & datatype constructors, structures, functors \\
   7.120 +  upper-case & @{ML_text FOO_BAR} & special values, exception constructors, signatures \\
   7.121 +  \end{tabular}
   7.122 +  \medskip
   7.123 +
   7.124 +  For historical reasons, many capitalized names omit underscores,
   7.125 +  e.g.\ old-style @{ML_text FooBar} instead of @{ML_text Foo_Bar}.
   7.126 +  Genuine mixed-case names are \emph{not} used, because clear division
   7.127 +  of words is essential for readability.\footnote{Camel-case was
   7.128 +  invented to workaround the lack of underscore in some early
   7.129 +  non-ASCII character sets.  Later it became habitual in some language
   7.130 +  communities that are now strong in numbers.}
   7.131 +
   7.132 +  A single (capital) character does not count as ``word'' in this
   7.133 +  respect: some Isabelle/ML names are suffixed by extra markers like
   7.134 +  this: @{ML_text foo_barT}.
   7.135 +
   7.136 +  Name variants are produced by adding 1--3 primes, e.g.\ @{ML_text
   7.137 +  foo'}, @{ML_text foo''}, or @{ML_text foo'''}, but not @{ML_text
   7.138 +  foo''''} or more.  Decimal digits scale better to larger numbers,
   7.139 +  e.g.\ @{ML_text foo0}, @{ML_text foo1}, @{ML_text foo42}.
   7.140 +
   7.141 +  \paragraph{Scopes.}  Apart from very basic library modules, ML
   7.142 +  structures are not ``opened'', but names are referenced with
   7.143 +  explicit qualification, as in @{ML Syntax.string_of_term} for
   7.144 +  example.  When devising names for structures and their components it
   7.145 +  is important aim at eye-catching compositions of both parts, because
   7.146 +  this is how they are seen in the sources and documentation.  For the
   7.147 +  same reasons, aliases of well-known library functions should be
   7.148 +  avoided.
   7.149 +
   7.150 +  Local names of function abstraction or case/let bindings are
   7.151 +  typically shorter, sometimes using only rudiments of ``words'',
   7.152 +  while still avoiding cryptic shorthands.  An auxiliary function
   7.153 +  called @{ML_text helper}, @{ML_text aux}, or @{ML_text f} is
   7.154 +  considered bad style.
   7.155 +
   7.156 +  Example:
   7.157 +
   7.158 +  \begin{verbatim}
   7.159 +  (* RIGHT *)
   7.160 +
   7.161 +  fun print_foo ctxt foo =
   7.162 +    let
   7.163 +      fun print t = ... Syntax.string_of_term ctxt t ...
   7.164 +    in ... end;
   7.165 +
   7.166 +
   7.167 +  (* RIGHT *)
   7.168 +
   7.169 +  fun print_foo ctxt foo =
   7.170 +    let
   7.171 +      val string_of_term = Syntax.string_of_term ctxt;
   7.172 +      fun print t = ... string_of_term t ...
   7.173 +    in ... end;
   7.174 +
   7.175 +
   7.176 +  (* WRONG *)
   7.177 +
   7.178 +  val string_of_term = Syntax.string_of_term;
   7.179 +
   7.180 +  fun print_foo ctxt foo =
   7.181 +    let
   7.182 +      fun aux t = ... string_of_term ctxt t ...
   7.183 +    in ... end;
   7.184 +
   7.185 +  \end{verbatim}
   7.186 +
   7.187 +
   7.188 +  \paragraph{Specific conventions.} Here are some specific name forms
   7.189 +  that occur frequently in the sources.
   7.190 +
   7.191 +  \begin{itemize}
   7.192 +
   7.193 +  \item A function that maps @{ML_text foo} to @{ML_text bar} is
   7.194 +  called @{ML_text foo_to_bar} or @{ML_text bar_of_foo} (never
   7.195 +  @{ML_text foo2bar}, @{ML_text bar_from_foo}, @{ML_text
   7.196 +  bar_for_foo}, or @{ML_text bar4foo}).
   7.197 +
   7.198 +  \item The name component @{ML_text legacy} means that the operation
   7.199 +  is about to be discontinued soon.
   7.200 +
   7.201 +  \item The name component @{ML_text old} means that this is historic
   7.202 +  material that might disappear at some later stage.
   7.203 +
   7.204 +  \item The name component @{ML_text global} means that this works
   7.205 +  with the background theory instead of the regular local context
   7.206 +  (\secref{sec:context}), sometimes for historical reasons, sometimes
   7.207 +  due a genuine lack of locality of the concept involved, sometimes as
   7.208 +  a fall-back for the lack of a proper context in the application
   7.209 +  code.  Whenever there is a non-global variant available, the
   7.210 +  application should be migrated to use it with a proper local
   7.211 +  context.
   7.212 +
   7.213 +  \item Variables of the main context types of the Isabelle/Isar
   7.214 +  framework (\secref{sec:context} and \chref{ch:local-theory}) have
   7.215 +  firm naming conventions as follows:
   7.216 +
   7.217 +  \begin{itemize}
   7.218 +
   7.219 +  \item theories are called @{ML_text thy}, rarely @{ML_text theory}
   7.220 +  (never @{ML_text thry})
   7.221 +
   7.222 +  \item proof contexts are called @{ML_text ctxt}, rarely @{ML_text
   7.223 +  context} (never @{ML_text ctx})
   7.224 +
   7.225 +  \item generic contexts are called @{ML_text context}, rarely
   7.226 +  @{ML_text ctxt}
   7.227 +
   7.228 +  \item local theories are called @{ML_text lthy}, except for local
   7.229 +  theories that are treated as proof context (which is a semantic
   7.230 +  super-type)
   7.231 +
   7.232 +  \end{itemize}
   7.233 +
   7.234 +  Variations with primed or decimal numbers are always possible, as
   7.235 +  well as sematic prefixes like @{ML_text foo_thy} or @{ML_text
   7.236 +  bar_ctxt}, but the base conventions above need to be preserved.
   7.237 +  This allows to visualize the their data flow via plain regular
   7.238 +  expressions in the editor.
   7.239 +
   7.240 +  \item The main logical entities (\secref{ch:logic}) have established
   7.241 +  naming convention as follows:
   7.242 +
   7.243 +  \begin{itemize}
   7.244 +
   7.245 +  \item sorts are called @{ML_text S}
   7.246 +
   7.247 +  \item types are called @{ML_text T}, @{ML_text U}, or @{ML_text
   7.248 +  ty} (never @{ML_text t})
   7.249 +
   7.250 +  \item terms are called @{ML_text t}, @{ML_text u}, or @{ML_text
   7.251 +  tm} (never @{ML_text trm})
   7.252 +
   7.253 +  \item certified types are called @{ML_text cT}, rarely @{ML_text
   7.254 +  T}, with variants as for types
   7.255 +
   7.256 +  \item certified terms are called @{ML_text ct}, rarely @{ML_text
   7.257 +  t}, with variants as for terms (never @{ML_text ctrm})
   7.258 +
   7.259 +  \item theorems are called @{ML_text th}, or @{ML_text thm}
   7.260 +
   7.261 +  \end{itemize}
   7.262 +
   7.263 +  Proper semantic names override these conventions completely.  For
   7.264 +  example, the left-hand side of an equation (as a term) can be called
   7.265 +  @{ML_text lhs} (not @{ML_text lhs_tm}).  Or a term that is known
   7.266 +  to be a variable can be called @{ML_text v} or @{ML_text x}.
   7.267 +
   7.268 +  \item Tactics (\secref{sec:tactics}) are sufficiently important to
   7.269 +  have specific naming conventions.  The name of a basic tactic
   7.270 +  definition always has a @{ML_text "_tac"} suffix, the subgoal index
   7.271 +  (if applicable) is always called @{ML_text i}, and the goal state
   7.272 +  (if made explicit) is usually called @{ML_text st} instead of the
   7.273 +  somewhat misleading @{ML_text thm}.  Any other arguments are given
   7.274 +  before the latter two, and the general context is given first.
   7.275 +  Example:
   7.276 +
   7.277 +  \begin{verbatim}
   7.278 +  fun my_tac ctxt arg1 arg2 i st = ...
   7.279 +  \end{verbatim}
   7.280 +
   7.281 +  Note that the goal state @{ML_text st} above is rarely made
   7.282 +  explicit, if tactic combinators (tacticals) are used as usual.
   7.283 +
   7.284 +  \end{itemize}
   7.285 +*}
   7.286 +
   7.287 +
   7.288 +subsection {* General source layout *}
   7.289 +
   7.290 +text {* The general Isabelle/ML source layout imitates regular
   7.291 +  type-setting to some extent, augmented by the requirements for
   7.292 +  deeply nested expressions that are commonplace in functional
   7.293 +  programming.
   7.294 +
   7.295 +  \paragraph{Line length} is 80 characters according to ancient
   7.296 +  standards, but we allow as much as 100 characters (not
   7.297 +  more).\footnote{Readability requires to keep the beginning of a line
   7.298 +  in view while watching its end.  Modern wide-screen displays do not
   7.299 +  change the way how the human brain works.  Sources also need to be
   7.300 +  printable on plain paper with reasonable font-size.} The extra 20
   7.301 +  characters acknowledge the space requirements due to qualified
   7.302 +  library references in Isabelle/ML.
   7.303 +
   7.304 +  \paragraph{White-space} is used to emphasize the structure of
   7.305 +  expressions, following mostly standard conventions for mathematical
   7.306 +  typesetting, as can be seen in plain {\TeX} or {\LaTeX}.  This
   7.307 +  defines positioning of spaces for parentheses, punctuation, and
   7.308 +  infixes as illustrated here:
   7.309 +
   7.310 +  \begin{verbatim}
   7.311 +  val x = y + z * (a + b);
   7.312 +  val pair = (a, b);
   7.313 +  val record = {foo = 1, bar = 2};
   7.314 +  \end{verbatim}
   7.315 +
   7.316 +  Lines are normally broken \emph{after} an infix operator or
   7.317 +  punctuation character.  For example:
   7.318 +
   7.319 +  \begin{verbatim}
   7.320 +  val x =
   7.321 +    a +
   7.322 +    b +
   7.323 +    c;
   7.324 +
   7.325 +  val tuple =
   7.326 +   (a,
   7.327 +    b,
   7.328 +    c);
   7.329 +  \end{verbatim}
   7.330 +
   7.331 +  Some special infixes (e.g.\ @{ML_text "|>"}) work better at the
   7.332 +  start of the line, but punctuation is always at the end.
   7.333 +
   7.334 +  Function application follows the tradition of @{text "\<lambda>"}-calculus,
   7.335 +  not informal mathematics.  For example: @{ML_text "f a b"} for a
   7.336 +  curried function, or @{ML_text "g (a, b)"} for a tupled function.
   7.337 +  Note that the space between @{ML_text g} and the pair @{ML_text
   7.338 +  "(a, b)"} follows the important principle of
   7.339 +  \emph{compositionality}: the layout of @{ML_text "g p"} does not
   7.340 +  change when @{ML_text "p"} is refined to the concrete pair
   7.341 +  @{ML_text "(a, b)"}.
   7.342 +
   7.343 +  \paragraph{Indentation} uses plain spaces, never hard
   7.344 +  tabulators.\footnote{Tabulators were invented to move the carriage
   7.345 +  of a type-writer to certain predefined positions.  In software they
   7.346 +  could be used as a primitive run-length compression of consecutive
   7.347 +  spaces, but the precise result would depend on non-standardized
   7.348 +  editor configuration.}
   7.349 +
   7.350 +  Each level of nesting is indented by 2 spaces, sometimes 1, very
   7.351 +  rarely 4, never 8 or any other odd number.
   7.352 +
   7.353 +  Indentation follows a simple logical format that only depends on the
   7.354 +  nesting depth, not the accidental length of the text that initiates
   7.355 +  a level of nesting.  Example:
   7.356 +
   7.357 +  \begin{verbatim}
   7.358 +  (* RIGHT *)
   7.359 +
   7.360 +  if b then
   7.361 +    expr1_part1
   7.362 +    expr1_part2
   7.363 +  else
   7.364 +    expr2_part1
   7.365 +    expr2_part2
   7.366 +
   7.367 +
   7.368 +  (* WRONG *)
   7.369 +
   7.370 +  if b then expr1_part1
   7.371 +            expr1_part2
   7.372 +  else expr2_part1
   7.373 +       expr2_part2
   7.374 +  \end{verbatim}
   7.375 +
   7.376 +  The second form has many problems: it assumes a fixed-width font
   7.377 +  when viewing the sources, it uses more space on the line and thus
   7.378 +  makes it hard to observe its strict length limit (working against
   7.379 +  \emph{readability}), it requires extra editing to adapt the layout
   7.380 +  to changes of the initial text (working against
   7.381 +  \emph{maintainability}) etc.
   7.382 +
   7.383 +  \medskip For similar reasons, any kind of two-dimensional or tabular
   7.384 +  layouts, ASCII-art with lines or boxes of asterisks etc.\ should be
   7.385 +  avoided.
   7.386 +
   7.387 +  \paragraph{Complex expressions} that consist of multi-clausal
   7.388 +  function definitions, @{ML_text handle}, @{ML_text case},
   7.389 +  @{ML_text let} (and combinations) require special attention.  The
   7.390 +  syntax of Standard ML is quite ambitious and admits a lot of
   7.391 +  variance that can distort the meaning of the text.
   7.392 +
   7.393 +  Clauses of @{ML_text fun}, @{ML_text fn}, @{ML_text handle},
   7.394 +  @{ML_text case} get extra indentation to indicate the nesting
   7.395 +  clearly.  Example:
   7.396 +
   7.397 +  \begin{verbatim}
   7.398 +  (* RIGHT *)
   7.399 +
   7.400 +  fun foo p1 =
   7.401 +        expr1
   7.402 +    | foo p2 =
   7.403 +        expr2
   7.404 +
   7.405 +
   7.406 +  (* WRONG *)
   7.407 +
   7.408 +  fun foo p1 =
   7.409 +    expr1
   7.410 +    | foo p2 =
   7.411 +    expr2
   7.412 +  \end{verbatim}
   7.413 +
   7.414 +  Body expressions consisting of @{ML_text case} or @{ML_text let}
   7.415 +  require care to maintain compositionality, to prevent loss of
   7.416 +  logical indentation where it is especially important to see the
   7.417 +  structure of the text.  Example:
   7.418 +
   7.419 +  \begin{verbatim}
   7.420 +  (* RIGHT *)
   7.421 +
   7.422 +  fun foo p1 =
   7.423 +        (case e of
   7.424 +          q1 => ...
   7.425 +        | q2 => ...)
   7.426 +    | foo p2 =
   7.427 +        let
   7.428 +          ...
   7.429 +        in
   7.430 +          ...
   7.431 +        end
   7.432 +
   7.433 +
   7.434 +  (* WRONG *)
   7.435 +
   7.436 +  fun foo p1 = case e of
   7.437 +      q1 => ...
   7.438 +    | q2 => ...
   7.439 +    | foo p2 =
   7.440 +    let
   7.441 +      ...
   7.442 +    in
   7.443 +      ...
   7.444 +    end
   7.445 +  \end{verbatim}
   7.446 +
   7.447 +  Extra parentheses around @{ML_text case} expressions are optional,
   7.448 +  but help to analyse the nesting based on character matching in the
   7.449 +  editor.
   7.450 +
   7.451 +  \medskip There are two main exceptions to the overall principle of
   7.452 +  compositionality in the layout of complex expressions.
   7.453 +
   7.454 +  \begin{enumerate}
   7.455 +
   7.456 +  \item @{ML_text "if"} expressions are iterated as if there would be
   7.457 +  a multi-branch conditional in SML, e.g.
   7.458 +
   7.459 +  \begin{verbatim}
   7.460 +  (* RIGHT *)
   7.461 +
   7.462 +  if b1 then e1
   7.463 +  else if b2 then e2
   7.464 +  else e3
   7.465 +  \end{verbatim}
   7.466 +
   7.467 +  \item @{ML_text fn} abstractions are often layed-out as if they
   7.468 +  would lack any structure by themselves.  This traditional form is
   7.469 +  motivated by the possibility to shift function arguments back and
   7.470 +  forth wrt.\ additional combinators.  Example:
   7.471 +
   7.472 +  \begin{verbatim}
   7.473 +  (* RIGHT *)
   7.474 +
   7.475 +  fun foo x y = fold (fn z =>
   7.476 +    expr)
   7.477 +  \end{verbatim}
   7.478 +
   7.479 +  Here the visual appearance is that of three arguments @{ML_text x},
   7.480 +  @{ML_text y}, @{ML_text z}.
   7.481 +
   7.482 +  \end{enumerate}
   7.483 +
   7.484 +  Such weakly structured layout should be use with great care.  Here
   7.485 +  are some counter-examples involving @{ML_text let} expressions:
   7.486 +
   7.487 +  \begin{verbatim}
   7.488 +  (* WRONG *)
   7.489 +
   7.490 +  fun foo x = let
   7.491 +      val y = ...
   7.492 +    in ... end
   7.493 +
   7.494 +
   7.495 +  (* WRONG *)
   7.496 +
   7.497 +  fun foo x = let
   7.498 +    val y = ...
   7.499 +  in ... end
   7.500 +
   7.501 +
   7.502 +  (* WRONG *)
   7.503 +
   7.504 +  fun foo x =
   7.505 +  let
   7.506 +    val y = ...
   7.507 +  in ... end
   7.508 +  \end{verbatim}
   7.509 +
   7.510 +  \medskip In general the source layout is meant to emphasize the
   7.511 +  structure of complex language expressions, not to pretend that SML
   7.512 +  had a completely different syntax (say that of Haskell or Java).
   7.513 +*}
   7.514 +
   7.515 +
   7.516 +section {* SML embedded into Isabelle/Isar *}
   7.517 +
   7.518 +text {* ML and Isar are intertwined via an open-ended bootstrap
   7.519 +  process that provides more and more programming facilities and
   7.520 +  logical content in an alternating manner.  Bootstrapping starts from
   7.521 +  the raw environment of existing implementations of Standard ML
   7.522 +  (mainly Poly/ML, but also SML/NJ).
   7.523 +
   7.524 +  Isabelle/Pure marks the point where the original ML toplevel is
   7.525 +  superseded by the Isar toplevel that maintains a uniform context for
   7.526 +  arbitrary ML values (see also \secref{sec:context}).  This formal
   7.527 +  environment holds ML compiler bindings, logical entities, and many
   7.528 +  other things.  Raw SML is never encountered again after the initial
   7.529 +  bootstrap of Isabelle/Pure.
   7.530 +
   7.531 +  Object-logics like Isabelle/HOL are built within the
   7.532 +  Isabelle/ML/Isar environment by introducing suitable theories with
   7.533 +  associated ML modules, either inlined or as separate files.  Thus
   7.534 +  Isabelle/HOL is defined as a regular user-space application within
   7.535 +  the Isabelle framework.  Further add-on tools can be implemented in
   7.536 +  ML within the Isar context in the same manner: ML is part of the
   7.537 +  standard repertoire of Isabelle, and there is no distinction between
   7.538 +  ``user'' and ``developer'' in this respect.
   7.539 +*}
   7.540 +
   7.541 +
   7.542 +subsection {* Isar ML commands *}
   7.543 +
   7.544 +text {* The primary Isar source language provides facilities to ``open
   7.545 +  a window'' to the underlying ML compiler.  Especially see the Isar
   7.546 +  commands @{command_ref "ML_file"} and @{command_ref "ML"}: both work the
   7.547 +  same way, only the source text is provided via a file vs.\ inlined,
   7.548 +  respectively.  Apart from embedding ML into the main theory
   7.549 +  definition like that, there are many more commands that refer to ML
   7.550 +  source, such as @{command_ref setup} or @{command_ref declaration}.
   7.551 +  Even more fine-grained embedding of ML into Isar is encountered in
   7.552 +  the proof method @{method_ref tactic}, which refines the pending
   7.553 +  goal state via a given expression of type @{ML_type tactic}.
   7.554 +*}
   7.555 +
   7.556 +text %mlex {* The following artificial example demonstrates some ML
   7.557 +  toplevel declarations within the implicit Isar theory context.  This
   7.558 +  is regular functional programming without referring to logical
   7.559 +  entities yet.
   7.560 +*}
   7.561 +
   7.562 +ML {*
   7.563 +  fun factorial 0 = 1
   7.564 +    | factorial n = n * factorial (n - 1)
   7.565 +*}
   7.566 +
   7.567 +text {* Here the ML environment is already managed by Isabelle, i.e.\
   7.568 +  the @{ML factorial} function is not yet accessible in the preceding
   7.569 +  paragraph, nor in a different theory that is independent from the
   7.570 +  current one in the import hierarchy.
   7.571 +
   7.572 +  Removing the above ML declaration from the source text will remove
   7.573 +  any trace of this definition as expected.  The Isabelle/ML toplevel
   7.574 +  environment is managed in a \emph{stateless} way: unlike the raw ML
   7.575 +  toplevel there are no global side-effects involved
   7.576 +  here.\footnote{Such a stateless compilation environment is also a
   7.577 +  prerequisite for robust parallel compilation within independent
   7.578 +  nodes of the implicit theory development graph.}
   7.579 +
   7.580 +  \medskip The next example shows how to embed ML into Isar proofs, using
   7.581 + @{command_ref "ML_prf"} instead of Instead of @{command_ref "ML"}.
   7.582 +  As illustrated below, the effect on the ML environment is local to
   7.583 +  the whole proof body, ignoring the block structure.
   7.584 +*}
   7.585 +
   7.586 +notepad
   7.587 +begin
   7.588 +  ML_prf %"ML" {* val a = 1 *}
   7.589 +  {
   7.590 +    ML_prf %"ML" {* val b = a + 1 *}
   7.591 +  } -- {* Isar block structure ignored by ML environment *}
   7.592 +  ML_prf %"ML" {* val c = b + 1 *}
   7.593 +end
   7.594 +
   7.595 +text {* By side-stepping the normal scoping rules for Isar proof
   7.596 +  blocks, embedded ML code can refer to the different contexts and
   7.597 +  manipulate corresponding entities, e.g.\ export a fact from a block
   7.598 +  context.
   7.599 +
   7.600 +  \medskip Two further ML commands are useful in certain situations:
   7.601 +  @{command_ref ML_val} and @{command_ref ML_command} are
   7.602 +  \emph{diagnostic} in the sense that there is no effect on the
   7.603 +  underlying environment, and can thus used anywhere (even outside a
   7.604 +  theory).  The examples below produce long strings of digits by
   7.605 +  invoking @{ML factorial}: @{command ML_val} already takes care of
   7.606 +  printing the ML toplevel result, but @{command ML_command} is silent
   7.607 +  so we produce an explicit output message.  *}
   7.608 +
   7.609 +ML_val {* factorial 100 *}
   7.610 +ML_command {* writeln (string_of_int (factorial 100)) *}
   7.611 +
   7.612 +notepad
   7.613 +begin
   7.614 +  ML_val {* factorial 100 *}
   7.615 +  ML_command {* writeln (string_of_int (factorial 100)) *}
   7.616 +end
   7.617 +
   7.618 +
   7.619 +subsection {* Compile-time context *}
   7.620 +
   7.621 +text {* Whenever the ML compiler is invoked within Isabelle/Isar, the
   7.622 +  formal context is passed as a thread-local reference variable.  Thus
   7.623 +  ML code may access the theory context during compilation, by reading
   7.624 +  or writing the (local) theory under construction.  Note that such
   7.625 +  direct access to the compile-time context is rare.  In practice it
   7.626 +  is typically done via some derived ML functions instead.
   7.627 +*}
   7.628 +
   7.629 +text %mlref {*
   7.630 +  \begin{mldecls}
   7.631 +  @{index_ML ML_Context.the_generic_context: "unit -> Context.generic"} \\
   7.632 +  @{index_ML "Context.>>": "(Context.generic -> Context.generic) -> unit"} \\
   7.633 +  @{index_ML ML_Thms.bind_thms: "string * thm list -> unit"} \\
   7.634 +  @{index_ML ML_Thms.bind_thm: "string * thm -> unit"} \\
   7.635 +  \end{mldecls}
   7.636 +
   7.637 +  \begin{description}
   7.638 +
   7.639 +  \item @{ML "ML_Context.the_generic_context ()"} refers to the theory
   7.640 +  context of the ML toplevel --- at compile time.  ML code needs to
   7.641 +  take care to refer to @{ML "ML_Context.the_generic_context ()"}
   7.642 +  correctly.  Recall that evaluation of a function body is delayed
   7.643 +  until actual run-time.
   7.644 +
   7.645 +  \item @{ML "Context.>>"}~@{text f} applies context transformation
   7.646 +  @{text f} to the implicit context of the ML toplevel.
   7.647 +
   7.648 +  \item @{ML ML_Thms.bind_thms}~@{text "(name, thms)"} stores a list of
   7.649 +  theorems produced in ML both in the (global) theory context and the
   7.650 +  ML toplevel, associating it with the provided name.  Theorems are
   7.651 +  put into a global ``standard'' format before being stored.
   7.652 +
   7.653 +  \item @{ML ML_Thms.bind_thm} is similar to @{ML ML_Thms.bind_thms} but refers to a
   7.654 +  singleton fact.
   7.655 +
   7.656 +  \end{description}
   7.657 +
   7.658 +  It is important to note that the above functions are really
   7.659 +  restricted to the compile time, even though the ML compiler is
   7.660 +  invoked at run-time.  The majority of ML code either uses static
   7.661 +  antiquotations (\secref{sec:ML-antiq}) or refers to the theory or
   7.662 +  proof context at run-time, by explicit functional abstraction.
   7.663 +*}
   7.664 +
   7.665 +
   7.666 +subsection {* Antiquotations \label{sec:ML-antiq} *}
   7.667 +
   7.668 +text {* A very important consequence of embedding SML into Isar is the
   7.669 +  concept of \emph{ML antiquotation}.  The standard token language of
   7.670 +  ML is augmented by special syntactic entities of the following form:
   7.671 +
   7.672 +  @{rail \<open>
   7.673 +  @{syntax_def antiquote}: '@{' nameref args '}'
   7.674 +  \<close>}
   7.675 +
   7.676 +  Here @{syntax nameref} and @{syntax args} are regular outer syntax
   7.677 +  categories \cite{isabelle-isar-ref}.  Attributes and proof methods
   7.678 +  use similar syntax.
   7.679 +
   7.680 +  \medskip A regular antiquotation @{text "@{name args}"} processes
   7.681 +  its arguments by the usual means of the Isar source language, and
   7.682 +  produces corresponding ML source text, either as literal
   7.683 +  \emph{inline} text (e.g. @{text "@{term t}"}) or abstract
   7.684 +  \emph{value} (e.g. @{text "@{thm th}"}).  This pre-compilation
   7.685 +  scheme allows to refer to formal entities in a robust manner, with
   7.686 +  proper static scoping and with some degree of logical checking of
   7.687 +  small portions of the code.
   7.688 +*}
   7.689 +
   7.690 +
   7.691 +subsection {* Printing ML values *}
   7.692 +
   7.693 +text {* The ML compiler knows about the structure of values according
   7.694 +  to their static type, and can print them in the manner of the
   7.695 +  toplevel loop, although the details are non-portable.  The
   7.696 +  antiquotations @{ML_antiquotation_def "make_string"} and
   7.697 +  @{ML_antiquotation_def "print"} provide a quasi-portable way to
   7.698 +  refer to this potential capability of the underlying ML system in
   7.699 +  generic Isabelle/ML sources.
   7.700 +
   7.701 +  This is occasionally useful for diagnostic or demonstration
   7.702 +  purposes.  Note that production-quality tools require proper
   7.703 +  user-level error messages. *}
   7.704 +
   7.705 +text %mlantiq {*
   7.706 +  \begin{matharray}{rcl}
   7.707 +  @{ML_antiquotation_def "make_string"} & : & @{text ML_antiquotation} \\
   7.708 +  @{ML_antiquotation_def "print"} & : & @{text ML_antiquotation} \\
   7.709 +  \end{matharray}
   7.710 +
   7.711 +  @{rail \<open>
   7.712 +  @@{ML_antiquotation make_string}
   7.713 +  ;
   7.714 +  @@{ML_antiquotation print} @{syntax name}?
   7.715 +  \<close>}
   7.716 +
   7.717 +  \begin{description}
   7.718 +
   7.719 +  \item @{text "@{make_string}"} inlines a function to print arbitrary
   7.720 +  values similar to the ML toplevel.  The result is compiler dependent
   7.721 +  and may fall back on "?" in certain situations.
   7.722 +
   7.723 +  \item @{text "@{print f}"} uses the ML function @{text "f: string ->
   7.724 +  unit"} to output the result of @{text "@{make_string}"} above,
   7.725 +  together with the source position of the antiquotation.  The default
   7.726 +  output function is @{ML writeln}.
   7.727 +
   7.728 +  \end{description}
   7.729 +*}
   7.730 +
   7.731 +text %mlex {* The following artificial examples show how to produce
   7.732 +  adhoc output of ML values for debugging purposes. *}
   7.733 +
   7.734 +ML {*
   7.735 +  val x = 42;
   7.736 +  val y = true;
   7.737 +
   7.738 +  writeln (@{make_string} {x = x, y = y});
   7.739 +
   7.740 +  @{print} {x = x, y = y};
   7.741 +  @{print tracing} {x = x, y = y};
   7.742 +*}
   7.743 +
   7.744 +
   7.745 +section {* Canonical argument order \label{sec:canonical-argument-order} *}
   7.746 +
   7.747 +text {* Standard ML is a language in the tradition of @{text
   7.748 +  "\<lambda>"}-calculus and \emph{higher-order functional programming},
   7.749 +  similar to OCaml, Haskell, or Isabelle/Pure and HOL as logical
   7.750 +  languages.  Getting acquainted with the native style of representing
   7.751 +  functions in that setting can save a lot of extra boiler-plate of
   7.752 +  redundant shuffling of arguments, auxiliary abstractions etc.
   7.753 +
   7.754 +  Functions are usually \emph{curried}: the idea of turning arguments
   7.755 +  of type @{text "\<tau>\<^sub>i"} (for @{text "i \<in> {1, \<dots> n}"}) into a result of
   7.756 +  type @{text "\<tau>"} is represented by the iterated function space
   7.757 +  @{text "\<tau>\<^sub>1 \<rightarrow> \<dots> \<rightarrow> \<tau>\<^sub>n \<rightarrow> \<tau>"}.  This is isomorphic to the well-known
   7.758 +  encoding via tuples @{text "\<tau>\<^sub>1 \<times> \<dots> \<times> \<tau>\<^sub>n \<rightarrow> \<tau>"}, but the curried
   7.759 +  version fits more smoothly into the basic calculus.\footnote{The
   7.760 +  difference is even more significant in higher-order logic, because
   7.761 +  the redundant tuple structure needs to be accommodated by formal
   7.762 +  reasoning.}
   7.763 +
   7.764 +  Currying gives some flexiblity due to \emph{partial application}.  A
   7.765 +  function @{text "f: \<tau>\<^sub>1 \<rightarrow> \<tau>\<^sub>2 \<rightarrow> \<tau>"} can be applied to @{text "x: \<tau>\<^sub>1"}
   7.766 +  and the remaining @{text "(f x): \<tau>\<^sub>2 \<rightarrow> \<tau>"} passed to another function
   7.767 +  etc.  How well this works in practice depends on the order of
   7.768 +  arguments.  In the worst case, arguments are arranged erratically,
   7.769 +  and using a function in a certain situation always requires some
   7.770 +  glue code.  Thus we would get exponentially many oppurtunities to
   7.771 +  decorate the code with meaningless permutations of arguments.
   7.772 +
   7.773 +  This can be avoided by \emph{canonical argument order}, which
   7.774 +  observes certain standard patterns and minimizes adhoc permutations
   7.775 +  in their application.  In Isabelle/ML, large portions of text can be
   7.776 +  written without auxiliary operations like @{text "swap: \<alpha> \<times> \<beta> \<rightarrow> \<beta> \<times>
   7.777 +  \<alpha>"} or @{text "C: (\<alpha> \<rightarrow> \<beta> \<rightarrow> \<gamma>) \<rightarrow> (\<beta> \<rightarrow> \<alpha> \<rightarrow> \<gamma>)"} (the latter not
   7.778 +  present in the Isabelle/ML library).
   7.779 +
   7.780 +  \medskip The basic idea is that arguments that vary less are moved
   7.781 +  further to the left than those that vary more.  Two particularly
   7.782 +  important categories of functions are \emph{selectors} and
   7.783 +  \emph{updates}.
   7.784 +
   7.785 +  The subsequent scheme is based on a hypothetical set-like container
   7.786 +  of type @{text "\<beta>"} that manages elements of type @{text "\<alpha>"}.  Both
   7.787 +  the names and types of the associated operations are canonical for
   7.788 +  Isabelle/ML.
   7.789 +
   7.790 +  \begin{center}
   7.791 +  \begin{tabular}{ll}
   7.792 +  kind & canonical name and type \\\hline
   7.793 +  selector & @{text "member: \<beta> \<rightarrow> \<alpha> \<rightarrow> bool"} \\
   7.794 +  update & @{text "insert: \<alpha> \<rightarrow> \<beta> \<rightarrow> \<beta>"} \\
   7.795 +  \end{tabular}
   7.796 +  \end{center}
   7.797 +
   7.798 +  Given a container @{text "B: \<beta>"}, the partially applied @{text
   7.799 +  "member B"} is a predicate over elements @{text "\<alpha> \<rightarrow> bool"}, and
   7.800 +  thus represents the intended denotation directly.  It is customary
   7.801 +  to pass the abstract predicate to further operations, not the
   7.802 +  concrete container.  The argument order makes it easy to use other
   7.803 +  combinators: @{text "forall (member B) list"} will check a list of
   7.804 +  elements for membership in @{text "B"} etc. Often the explicit
   7.805 +  @{text "list"} is pointless and can be contracted to @{text "forall
   7.806 +  (member B)"} to get directly a predicate again.
   7.807 +
   7.808 +  In contrast, an update operation varies the container, so it moves
   7.809 +  to the right: @{text "insert a"} is a function @{text "\<beta> \<rightarrow> \<beta>"} to
   7.810 +  insert a value @{text "a"}.  These can be composed naturally as
   7.811 +  @{text "insert c \<circ> insert b \<circ> insert a"}.  The slightly awkward
   7.812 +  inversion of the composition order is due to conventional
   7.813 +  mathematical notation, which can be easily amended as explained
   7.814 +  below.
   7.815 +*}
   7.816 +
   7.817 +
   7.818 +subsection {* Forward application and composition *}
   7.819 +
   7.820 +text {* Regular function application and infix notation works best for
   7.821 +  relatively deeply structured expressions, e.g.\ @{text "h (f x y + g
   7.822 +  z)"}.  The important special case of \emph{linear transformation}
   7.823 +  applies a cascade of functions @{text "f\<^sub>n (\<dots> (f\<^sub>1 x))"}.  This
   7.824 +  becomes hard to read and maintain if the functions are themselves
   7.825 +  given as complex expressions.  The notation can be significantly
   7.826 +  improved by introducing \emph{forward} versions of application and
   7.827 +  composition as follows:
   7.828 +
   7.829 +  \medskip
   7.830 +  \begin{tabular}{lll}
   7.831 +  @{text "x |> f"} & @{text "\<equiv>"} & @{text "f x"} \\
   7.832 +  @{text "(f #> g) x"} & @{text "\<equiv>"} & @{text "x |> f |> g"} \\
   7.833 +  \end{tabular}
   7.834 +  \medskip
   7.835 +
   7.836 +  This enables to write conveniently @{text "x |> f\<^sub>1 |> \<dots> |> f\<^sub>n"} or
   7.837 +  @{text "f\<^sub>1 #> \<dots> #> f\<^sub>n"} for its functional abstraction over @{text
   7.838 +  "x"}.
   7.839 +
   7.840 +  \medskip There is an additional set of combinators to accommodate
   7.841 +  multiple results (via pairs) that are passed on as multiple
   7.842 +  arguments (via currying).
   7.843 +
   7.844 +  \medskip
   7.845 +  \begin{tabular}{lll}
   7.846 +  @{text "(x, y) |-> f"} & @{text "\<equiv>"} & @{text "f x y"} \\
   7.847 +  @{text "(f #-> g) x"} & @{text "\<equiv>"} & @{text "x |> f |-> g"} \\
   7.848 +  \end{tabular}
   7.849 +  \medskip
   7.850 +*}
   7.851 +
   7.852 +text %mlref {*
   7.853 +  \begin{mldecls}
   7.854 +  @{index_ML_op "|> ": "'a * ('a -> 'b) -> 'b"} \\
   7.855 +  @{index_ML_op "|-> ": "('c * 'a) * ('c -> 'a -> 'b) -> 'b"} \\
   7.856 +  @{index_ML_op "#> ": "('a -> 'b) * ('b -> 'c) -> 'a -> 'c"} \\
   7.857 +  @{index_ML_op "#-> ": "('a -> 'c * 'b) * ('c -> 'b -> 'd) -> 'a -> 'd"} \\
   7.858 +  \end{mldecls}
   7.859 +*}
   7.860 +
   7.861 +
   7.862 +subsection {* Canonical iteration *}
   7.863 +
   7.864 +text {* As explained above, a function @{text "f: \<alpha> \<rightarrow> \<beta> \<rightarrow> \<beta>"} can be
   7.865 +  understood as update on a configuration of type @{text "\<beta>"},
   7.866 +  parametrized by arguments of type @{text "\<alpha>"}.  Given @{text "a: \<alpha>"}
   7.867 +  the partial application @{text "(f a): \<beta> \<rightarrow> \<beta>"} operates
   7.868 +  homogeneously on @{text "\<beta>"}.  This can be iterated naturally over a
   7.869 +  list of parameters @{text "[a\<^sub>1, \<dots>, a\<^sub>n]"} as @{text "f a\<^sub>1 #> \<dots> #> f a\<^sub>n"}.
   7.870 +  The latter expression is again a function @{text "\<beta> \<rightarrow> \<beta>"}.
   7.871 +  It can be applied to an initial configuration @{text "b: \<beta>"} to
   7.872 +  start the iteration over the given list of arguments: each @{text
   7.873 +  "a"} in @{text "a\<^sub>1, \<dots>, a\<^sub>n"} is applied consecutively by updating a
   7.874 +  cumulative configuration.
   7.875 +
   7.876 +  The @{text fold} combinator in Isabelle/ML lifts a function @{text
   7.877 +  "f"} as above to its iterated version over a list of arguments.
   7.878 +  Lifting can be repeated, e.g.\ @{text "(fold \<circ> fold) f"} iterates
   7.879 +  over a list of lists as expected.
   7.880 +
   7.881 +  The variant @{text "fold_rev"} works inside-out over the list of
   7.882 +  arguments, such that @{text "fold_rev f \<equiv> fold f \<circ> rev"} holds.
   7.883 +
   7.884 +  The @{text "fold_map"} combinator essentially performs @{text
   7.885 +  "fold"} and @{text "map"} simultaneously: each application of @{text
   7.886 +  "f"} produces an updated configuration together with a side-result;
   7.887 +  the iteration collects all such side-results as a separate list.
   7.888 +*}
   7.889 +
   7.890 +text %mlref {*
   7.891 +  \begin{mldecls}
   7.892 +  @{index_ML fold: "('a -> 'b -> 'b) -> 'a list -> 'b -> 'b"} \\
   7.893 +  @{index_ML fold_rev: "('a -> 'b -> 'b) -> 'a list -> 'b -> 'b"} \\
   7.894 +  @{index_ML fold_map: "('a -> 'b -> 'c * 'b) -> 'a list -> 'b -> 'c list * 'b"} \\
   7.895 +  \end{mldecls}
   7.896 +
   7.897 +  \begin{description}
   7.898 +
   7.899 +  \item @{ML fold}~@{text f} lifts the parametrized update function
   7.900 +  @{text "f"} to a list of parameters.
   7.901 +
   7.902 +  \item @{ML fold_rev}~@{text "f"} is similar to @{ML fold}~@{text
   7.903 +  "f"}, but works inside-out.
   7.904 +
   7.905 +  \item @{ML fold_map}~@{text "f"} lifts the parametrized update
   7.906 +  function @{text "f"} (with side-result) to a list of parameters and
   7.907 +  cumulative side-results.
   7.908 +
   7.909 +  \end{description}
   7.910 +
   7.911 +  \begin{warn}
   7.912 +  The literature on functional programming provides a multitude of
   7.913 +  combinators called @{text "foldl"}, @{text "foldr"} etc.  SML97
   7.914 +  provides its own variations as @{ML List.foldl} and @{ML
   7.915 +  List.foldr}, while the classic Isabelle library also has the
   7.916 +  historic @{ML Library.foldl} and @{ML Library.foldr}.  To avoid
   7.917 +  unnecessary complication and confusion, all these historical
   7.918 +  versions should be ignored, and @{ML fold} (or @{ML fold_rev}) used
   7.919 +  exclusively.
   7.920 +  \end{warn}
   7.921 +*}
   7.922 +
   7.923 +text %mlex {* The following example shows how to fill a text buffer
   7.924 +  incrementally by adding strings, either individually or from a given
   7.925 +  list.
   7.926 +*}
   7.927 +
   7.928 +ML {*
   7.929 +  val s =
   7.930 +    Buffer.empty
   7.931 +    |> Buffer.add "digits: "
   7.932 +    |> fold (Buffer.add o string_of_int) (0 upto 9)
   7.933 +    |> Buffer.content;
   7.934 +
   7.935 +  @{assert} (s = "digits: 0123456789");
   7.936 +*}
   7.937 +
   7.938 +text {* Note how @{ML "fold (Buffer.add o string_of_int)"} above saves
   7.939 +  an extra @{ML "map"} over the given list.  This kind of peephole
   7.940 +  optimization reduces both the code size and the tree structures in
   7.941 +  memory (``deforestation''), but it requires some practice to read
   7.942 +  and write fluently.
   7.943 +
   7.944 +  \medskip The next example elaborates the idea of canonical
   7.945 +  iteration, demonstrating fast accumulation of tree content using a
   7.946 +  text buffer.
   7.947 +*}
   7.948 +
   7.949 +ML {*
   7.950 +  datatype tree = Text of string | Elem of string * tree list;
   7.951 +
   7.952 +  fun slow_content (Text txt) = txt
   7.953 +    | slow_content (Elem (name, ts)) =
   7.954 +        "<" ^ name ^ ">" ^
   7.955 +        implode (map slow_content ts) ^
   7.956 +        "</" ^ name ^ ">"
   7.957 +
   7.958 +  fun add_content (Text txt) = Buffer.add txt
   7.959 +    | add_content (Elem (name, ts)) =
   7.960 +        Buffer.add ("<" ^ name ^ ">") #>
   7.961 +        fold add_content ts #>
   7.962 +        Buffer.add ("</" ^ name ^ ">");
   7.963 +
   7.964 +  fun fast_content tree =
   7.965 +    Buffer.empty |> add_content tree |> Buffer.content;
   7.966 +*}
   7.967 +
   7.968 +text {* The slow part of @{ML slow_content} is the @{ML implode} of
   7.969 +  the recursive results, because it copies previously produced strings
   7.970 +  again.
   7.971 +
   7.972 +  The incremental @{ML add_content} avoids this by operating on a
   7.973 +  buffer that is passed through in a linear fashion.  Using @{ML_text
   7.974 +  "#>"} and contraction over the actual buffer argument saves some
   7.975 +  additional boiler-plate.  Of course, the two @{ML "Buffer.add"}
   7.976 +  invocations with concatenated strings could have been split into
   7.977 +  smaller parts, but this would have obfuscated the source without
   7.978 +  making a big difference in allocations.  Here we have done some
   7.979 +  peephole-optimization for the sake of readability.
   7.980 +
   7.981 +  Another benefit of @{ML add_content} is its ``open'' form as a
   7.982 +  function on buffers that can be continued in further linear
   7.983 +  transformations, folding etc.  Thus it is more compositional than
   7.984 +  the naive @{ML slow_content}.  As realistic example, compare the
   7.985 +  old-style @{ML "Term.maxidx_of_term: term -> int"} with the newer
   7.986 +  @{ML "Term.maxidx_term: term -> int -> int"} in Isabelle/Pure.
   7.987 +
   7.988 +  Note that @{ML fast_content} above is only defined as example.  In
   7.989 +  many practical situations, it is customary to provide the
   7.990 +  incremental @{ML add_content} only and leave the initialization and
   7.991 +  termination to the concrete application by the user.
   7.992 +*}
   7.993 +
   7.994 +
   7.995 +section {* Message output channels \label{sec:message-channels} *}
   7.996 +
   7.997 +text {* Isabelle provides output channels for different kinds of
   7.998 +  messages: regular output, high-volume tracing information, warnings,
   7.999 +  and errors.
  7.1000 +
  7.1001 +  Depending on the user interface involved, these messages may appear
  7.1002 +  in different text styles or colours.  The standard output for
  7.1003 +  terminal sessions prefixes each line of warnings by @{verbatim
  7.1004 +  "###"} and errors by @{verbatim "***"}, but leaves anything else
  7.1005 +  unchanged.
  7.1006 +
  7.1007 +  Messages are associated with the transaction context of the running
  7.1008 +  Isar command.  This enables the front-end to manage commands and
  7.1009 +  resulting messages together.  For example, after deleting a command
  7.1010 +  from a given theory document version, the corresponding message
  7.1011 +  output can be retracted from the display.
  7.1012 +*}
  7.1013 +
  7.1014 +text %mlref {*
  7.1015 +  \begin{mldecls}
  7.1016 +  @{index_ML writeln: "string -> unit"} \\
  7.1017 +  @{index_ML tracing: "string -> unit"} \\
  7.1018 +  @{index_ML warning: "string -> unit"} \\
  7.1019 +  @{index_ML error: "string -> 'a"} \\
  7.1020 +  \end{mldecls}
  7.1021 +
  7.1022 +  \begin{description}
  7.1023 +
  7.1024 +  \item @{ML writeln}~@{text "text"} outputs @{text "text"} as regular
  7.1025 +  message.  This is the primary message output operation of Isabelle
  7.1026 +  and should be used by default.
  7.1027 +
  7.1028 +  \item @{ML tracing}~@{text "text"} outputs @{text "text"} as special
  7.1029 +  tracing message, indicating potential high-volume output to the
  7.1030 +  front-end (hundreds or thousands of messages issued by a single
  7.1031 +  command).  The idea is to allow the user-interface to downgrade the
  7.1032 +  quality of message display to achieve higher throughput.
  7.1033 +
  7.1034 +  Note that the user might have to take special actions to see tracing
  7.1035 +  output, e.g.\ switch to a different output window.  So this channel
  7.1036 +  should not be used for regular output.
  7.1037 +
  7.1038 +  \item @{ML warning}~@{text "text"} outputs @{text "text"} as
  7.1039 +  warning, which typically means some extra emphasis on the front-end
  7.1040 +  side (color highlighting, icons, etc.).
  7.1041 +
  7.1042 +  \item @{ML error}~@{text "text"} raises exception @{ML ERROR}~@{text
  7.1043 +  "text"} and thus lets the Isar toplevel print @{text "text"} on the
  7.1044 +  error channel, which typically means some extra emphasis on the
  7.1045 +  front-end side (color highlighting, icons, etc.).
  7.1046 +
  7.1047 +  This assumes that the exception is not handled before the command
  7.1048 +  terminates.  Handling exception @{ML ERROR}~@{text "text"} is a
  7.1049 +  perfectly legal alternative: it means that the error is absorbed
  7.1050 +  without any message output.
  7.1051 +
  7.1052 +  \begin{warn}
  7.1053 +  The actual error channel is accessed via @{ML Output.error_message}, but
  7.1054 +  the old interaction protocol of Proof~General \emph{crashes} if that
  7.1055 +  function is used in regular ML code: error output and toplevel
  7.1056 +  command failure always need to coincide in classic TTY interaction.
  7.1057 +  \end{warn}
  7.1058 +
  7.1059 +  \end{description}
  7.1060 +
  7.1061 +  \begin{warn}
  7.1062 +  Regular Isabelle/ML code should output messages exclusively by the
  7.1063 +  official channels.  Using raw I/O on \emph{stdout} or \emph{stderr}
  7.1064 +  instead (e.g.\ via @{ML TextIO.output}) is apt to cause problems in
  7.1065 +  the presence of parallel and asynchronous processing of Isabelle
  7.1066 +  theories.  Such raw output might be displayed by the front-end in
  7.1067 +  some system console log, with a low chance that the user will ever
  7.1068 +  see it.  Moreover, as a genuine side-effect on global process
  7.1069 +  channels, there is no proper way to retract output when Isar command
  7.1070 +  transactions are reset by the system.
  7.1071 +  \end{warn}
  7.1072 +
  7.1073 +  \begin{warn}
  7.1074 +  The message channels should be used in a message-oriented manner.
  7.1075 +  This means that multi-line output that logically belongs together is
  7.1076 +  issued by a \emph{single} invocation of @{ML writeln} etc.\ with the
  7.1077 +  functional concatenation of all message constituents.
  7.1078 +  \end{warn}
  7.1079 +*}
  7.1080 +
  7.1081 +text %mlex {* The following example demonstrates a multi-line
  7.1082 +  warning.  Note that in some situations the user sees only the first
  7.1083 +  line, so the most important point should be made first.
  7.1084 +*}
  7.1085 +
  7.1086 +ML_command {*
  7.1087 +  warning (cat_lines
  7.1088 +   ["Beware the Jabberwock, my son!",
  7.1089 +    "The jaws that bite, the claws that catch!",
  7.1090 +    "Beware the Jubjub Bird, and shun",
  7.1091 +    "The frumious Bandersnatch!"]);
  7.1092 +*}
  7.1093 +
  7.1094 +
  7.1095 +section {* Exceptions \label{sec:exceptions} *}
  7.1096 +
  7.1097 +text {* The Standard ML semantics of strict functional evaluation
  7.1098 +  together with exceptions is rather well defined, but some delicate
  7.1099 +  points need to be observed to avoid that ML programs go wrong
  7.1100 +  despite static type-checking.  Exceptions in Isabelle/ML are
  7.1101 +  subsequently categorized as follows.
  7.1102 +
  7.1103 +  \paragraph{Regular user errors.}  These are meant to provide
  7.1104 +  informative feedback about malformed input etc.
  7.1105 +
  7.1106 +  The \emph{error} function raises the corresponding \emph{ERROR}
  7.1107 +  exception, with a plain text message as argument.  \emph{ERROR}
  7.1108 +  exceptions can be handled internally, in order to be ignored, turned
  7.1109 +  into other exceptions, or cascaded by appending messages.  If the
  7.1110 +  corresponding Isabelle/Isar command terminates with an \emph{ERROR}
  7.1111 +  exception state, the toplevel will print the result on the error
  7.1112 +  channel (see \secref{sec:message-channels}).
  7.1113 +
  7.1114 +  It is considered bad style to refer to internal function names or
  7.1115 +  values in ML source notation in user error messages.
  7.1116 +
  7.1117 +  Grammatical correctness of error messages can be improved by
  7.1118 +  \emph{omitting} final punctuation: messages are often concatenated
  7.1119 +  or put into a larger context (e.g.\ augmented with source position).
  7.1120 +  By not insisting in the final word at the origin of the error, the
  7.1121 +  system can perform its administrative tasks more easily and
  7.1122 +  robustly.
  7.1123 +
  7.1124 +  \paragraph{Program failures.}  There is a handful of standard
  7.1125 +  exceptions that indicate general failure situations, or failures of
  7.1126 +  core operations on logical entities (types, terms, theorems,
  7.1127 +  theories, see \chref{ch:logic}).
  7.1128 +
  7.1129 +  These exceptions indicate a genuine breakdown of the program, so the
  7.1130 +  main purpose is to determine quickly what has happened where.
  7.1131 +  Traditionally, the (short) exception message would include the name
  7.1132 +  of an ML function, although this is no longer necessary, because the
  7.1133 +  ML runtime system prints a detailed source position of the
  7.1134 +  corresponding @{ML_text raise} keyword.
  7.1135 +
  7.1136 +  \medskip User modules can always introduce their own custom
  7.1137 +  exceptions locally, e.g.\ to organize internal failures robustly
  7.1138 +  without overlapping with existing exceptions.  Exceptions that are
  7.1139 +  exposed in module signatures require extra care, though, and should
  7.1140 +  \emph{not} be introduced by default.  Surprise by users of a module
  7.1141 +  can be often minimized by using plain user errors instead.
  7.1142 +
  7.1143 +  \paragraph{Interrupts.}  These indicate arbitrary system events:
  7.1144 +  both the ML runtime system and the Isabelle/ML infrastructure signal
  7.1145 +  various exceptional situations by raising the special
  7.1146 +  \emph{Interrupt} exception in user code.
  7.1147 +
  7.1148 +  This is the one and only way that physical events can intrude an
  7.1149 +  Isabelle/ML program.  Such an interrupt can mean out-of-memory,
  7.1150 +  stack overflow, timeout, internal signaling of threads, or the user
  7.1151 +  producing a console interrupt manually etc.  An Isabelle/ML program
  7.1152 +  that intercepts interrupts becomes dependent on physical effects of
  7.1153 +  the environment.  Even worse, exception handling patterns that are
  7.1154 +  too general by accident, e.g.\ by mispelled exception constructors,
  7.1155 +  will cover interrupts unintentionally and thus render the program
  7.1156 +  semantics ill-defined.
  7.1157 +
  7.1158 +  Note that the Interrupt exception dates back to the original SML90
  7.1159 +  language definition.  It was excluded from the SML97 version to
  7.1160 +  avoid its malign impact on ML program semantics, but without
  7.1161 +  providing a viable alternative.  Isabelle/ML recovers physical
  7.1162 +  interruptibility (which is an indispensable tool to implement
  7.1163 +  managed evaluation of command transactions), but requires user code
  7.1164 +  to be strictly transparent wrt.\ interrupts.
  7.1165 +
  7.1166 +  \begin{warn}
  7.1167 +  Isabelle/ML user code needs to terminate promptly on interruption,
  7.1168 +  without guessing at its meaning to the system infrastructure.
  7.1169 +  Temporary handling of interrupts for cleanup of global resources
  7.1170 +  etc.\ needs to be followed immediately by re-raising of the original
  7.1171 +  exception.
  7.1172 +  \end{warn}
  7.1173 +*}
  7.1174 +
  7.1175 +text %mlref {*
  7.1176 +  \begin{mldecls}
  7.1177 +  @{index_ML try: "('a -> 'b) -> 'a -> 'b option"} \\
  7.1178 +  @{index_ML can: "('a -> 'b) -> 'a -> bool"} \\
  7.1179 +  @{index_ML_exception ERROR: string} \\
  7.1180 +  @{index_ML_exception Fail: string} \\
  7.1181 +  @{index_ML Exn.is_interrupt: "exn -> bool"} \\
  7.1182 +  @{index_ML reraise: "exn -> 'a"} \\
  7.1183 +  @{index_ML Runtime.exn_trace: "(unit -> 'a) -> 'a"} \\
  7.1184 +  \end{mldecls}
  7.1185 +
  7.1186 +  \begin{description}
  7.1187 +
  7.1188 +  \item @{ML try}~@{text "f x"} makes the partiality of evaluating
  7.1189 +  @{text "f x"} explicit via the option datatype.  Interrupts are
  7.1190 +  \emph{not} handled here, i.e.\ this form serves as safe replacement
  7.1191 +  for the \emph{unsafe} version @{ML_text "(SOME"}~@{text "f
  7.1192 +  x"}~@{ML_text "handle _ => NONE)"} that is occasionally seen in
  7.1193 +  books about SML97, not Isabelle/ML.
  7.1194 +
  7.1195 +  \item @{ML can} is similar to @{ML try} with more abstract result.
  7.1196 +
  7.1197 +  \item @{ML ERROR}~@{text "msg"} represents user errors; this
  7.1198 +  exception is normally raised indirectly via the @{ML error} function
  7.1199 +  (see \secref{sec:message-channels}).
  7.1200 +
  7.1201 +  \item @{ML Fail}~@{text "msg"} represents general program failures.
  7.1202 +
  7.1203 +  \item @{ML Exn.is_interrupt} identifies interrupts robustly, without
  7.1204 +  mentioning concrete exception constructors in user code.  Handled
  7.1205 +  interrupts need to be re-raised promptly!
  7.1206 +
  7.1207 +  \item @{ML reraise}~@{text "exn"} raises exception @{text "exn"}
  7.1208 +  while preserving its implicit position information (if possible,
  7.1209 +  depending on the ML platform).
  7.1210 +
  7.1211 +  \item @{ML Runtime.exn_trace}~@{ML_text "(fn () =>"}~@{text
  7.1212 +  "e"}@{ML_text ")"} evaluates expression @{text "e"} while printing
  7.1213 +  a full trace of its stack of nested exceptions (if possible,
  7.1214 +  depending on the ML platform).
  7.1215 +
  7.1216 +  Inserting @{ML Runtime.exn_trace} into ML code temporarily is
  7.1217 +  useful for debugging, but not suitable for production code.
  7.1218 +
  7.1219 +  \end{description}
  7.1220 +*}
  7.1221 +
  7.1222 +text %mlantiq {*
  7.1223 +  \begin{matharray}{rcl}
  7.1224 +  @{ML_antiquotation_def "assert"} & : & @{text ML_antiquotation} \\
  7.1225 +  \end{matharray}
  7.1226 +
  7.1227 +  \begin{description}
  7.1228 +
  7.1229 +  \item @{text "@{assert}"} inlines a function
  7.1230 +  @{ML_type "bool -> unit"} that raises @{ML Fail} if the argument is
  7.1231 +  @{ML false}.  Due to inlining the source position of failed
  7.1232 +  assertions is included in the error output.
  7.1233 +
  7.1234 +  \end{description}
  7.1235 +*}
  7.1236 +
  7.1237 +
  7.1238 +section {* Strings of symbols \label{sec:symbols} *}
  7.1239 +
  7.1240 +text {* A \emph{symbol} constitutes the smallest textual unit in
  7.1241 +  Isabelle/ML --- raw ML characters are normally not encountered at
  7.1242 +  all!  Isabelle strings consist of a sequence of symbols, represented
  7.1243 +  as a packed string or an exploded list of strings.  Each symbol is
  7.1244 +  in itself a small string, which has either one of the following
  7.1245 +  forms:
  7.1246 +
  7.1247 +  \begin{enumerate}
  7.1248 +
  7.1249 +  \item a single ASCII character ``@{text "c"}'', for example
  7.1250 +  ``\verb,a,'',
  7.1251 +
  7.1252 +  \item a codepoint according to UTF8 (non-ASCII byte sequence),
  7.1253 +
  7.1254 +  \item a regular symbol ``\verb,\,\verb,<,@{text "ident"}\verb,>,'',
  7.1255 +  for example ``\verb,\,\verb,<alpha>,'',
  7.1256 +
  7.1257 +  \item a control symbol ``\verb,\,\verb,<^,@{text "ident"}\verb,>,'',
  7.1258 +  for example ``\verb,\,\verb,<^bold>,'',
  7.1259 +
  7.1260 +  \item a raw symbol ``\verb,\,\verb,<^raw:,@{text text}\verb,>,''
  7.1261 +  where @{text text} consists of printable characters excluding
  7.1262 +  ``\verb,.,'' and ``\verb,>,'', for example
  7.1263 +  ``\verb,\,\verb,<^raw:$\sum_{i = 1}^n$>,'',
  7.1264 +
  7.1265 +  \item a numbered raw control symbol ``\verb,\,\verb,<^raw,@{text
  7.1266 +  n}\verb,>, where @{text n} consists of digits, for example
  7.1267 +  ``\verb,\,\verb,<^raw42>,''.
  7.1268 +
  7.1269 +  \end{enumerate}
  7.1270 +
  7.1271 +  The @{text "ident"} syntax for symbol names is @{text "letter
  7.1272 +  (letter | digit)\<^sup>*"}, where @{text "letter = A..Za..z"} and @{text
  7.1273 +  "digit = 0..9"}.  There are infinitely many regular symbols and
  7.1274 +  control symbols, but a fixed collection of standard symbols is
  7.1275 +  treated specifically.  For example, ``\verb,\,\verb,<alpha>,'' is
  7.1276 +  classified as a letter, which means it may occur within regular
  7.1277 +  Isabelle identifiers.
  7.1278 +
  7.1279 +  The character set underlying Isabelle symbols is 7-bit ASCII, but
  7.1280 +  8-bit character sequences are passed-through unchanged.  Unicode/UCS
  7.1281 +  data in UTF-8 encoding is processed in a non-strict fashion, such
  7.1282 +  that well-formed code sequences are recognized
  7.1283 +  accordingly.\footnote{Note that ISO-Latin-1 differs from UTF-8 only
  7.1284 +  in some special punctuation characters that even have replacements
  7.1285 +  within the standard collection of Isabelle symbols.  Text consisting
  7.1286 +  of ASCII plus accented letters can be processed in either encoding.}
  7.1287 +  Unicode provides its own collection of mathematical symbols, but
  7.1288 +  within the core Isabelle/ML world there is no link to the standard
  7.1289 +  collection of Isabelle regular symbols.
  7.1290 +
  7.1291 +  \medskip Output of Isabelle symbols depends on the print mode
  7.1292 +  \cite{isabelle-isar-ref}.  For example, the standard {\LaTeX}
  7.1293 +  setup of the Isabelle document preparation system would present
  7.1294 +  ``\verb,\,\verb,<alpha>,'' as @{text "\<alpha>"}, and
  7.1295 +  ``\verb,\,\verb,<^bold>,\verb,\,\verb,<alpha>,'' as @{text "\<^bold>\<alpha>"}.
  7.1296 +  On-screen rendering usually works by mapping a finite subset of
  7.1297 +  Isabelle symbols to suitable Unicode characters.
  7.1298 +*}
  7.1299 +
  7.1300 +text %mlref {*
  7.1301 +  \begin{mldecls}
  7.1302 +  @{index_ML_type "Symbol.symbol": string} \\
  7.1303 +  @{index_ML Symbol.explode: "string -> Symbol.symbol list"} \\
  7.1304 +  @{index_ML Symbol.is_letter: "Symbol.symbol -> bool"} \\
  7.1305 +  @{index_ML Symbol.is_digit: "Symbol.symbol -> bool"} \\
  7.1306 +  @{index_ML Symbol.is_quasi: "Symbol.symbol -> bool"} \\
  7.1307 +  @{index_ML Symbol.is_blank: "Symbol.symbol -> bool"} \\
  7.1308 +  \end{mldecls}
  7.1309 +  \begin{mldecls}
  7.1310 +  @{index_ML_type "Symbol.sym"} \\
  7.1311 +  @{index_ML Symbol.decode: "Symbol.symbol -> Symbol.sym"} \\
  7.1312 +  \end{mldecls}
  7.1313 +
  7.1314 +  \begin{description}
  7.1315 +
  7.1316 +  \item Type @{ML_type "Symbol.symbol"} represents individual Isabelle
  7.1317 +  symbols.
  7.1318 +
  7.1319 +  \item @{ML "Symbol.explode"}~@{text "str"} produces a symbol list
  7.1320 +  from the packed form.  This function supersedes @{ML
  7.1321 +  "String.explode"} for virtually all purposes of manipulating text in
  7.1322 +  Isabelle!\footnote{The runtime overhead for exploded strings is
  7.1323 +  mainly that of the list structure: individual symbols that happen to
  7.1324 +  be a singleton string do not require extra memory in Poly/ML.}
  7.1325 +
  7.1326 +  \item @{ML "Symbol.is_letter"}, @{ML "Symbol.is_digit"}, @{ML
  7.1327 +  "Symbol.is_quasi"}, @{ML "Symbol.is_blank"} classify standard
  7.1328 +  symbols according to fixed syntactic conventions of Isabelle, cf.\
  7.1329 +  \cite{isabelle-isar-ref}.
  7.1330 +
  7.1331 +  \item Type @{ML_type "Symbol.sym"} is a concrete datatype that
  7.1332 +  represents the different kinds of symbols explicitly, with
  7.1333 +  constructors @{ML "Symbol.Char"}, @{ML "Symbol.Sym"}, @{ML
  7.1334 +  "Symbol.UTF8"}, @{ML "Symbol.Ctrl"}, @{ML "Symbol.Raw"}.
  7.1335 +
  7.1336 +  \item @{ML "Symbol.decode"} converts the string representation of a
  7.1337 +  symbol into the datatype version.
  7.1338 +
  7.1339 +  \end{description}
  7.1340 +
  7.1341 +  \paragraph{Historical note.} In the original SML90 standard the
  7.1342 +  primitive ML type @{ML_type char} did not exists, and @{ML_text
  7.1343 +  "explode: string -> string list"} produced a list of singleton
  7.1344 +  strings like @{ML "raw_explode: string -> string list"} in
  7.1345 +  Isabelle/ML today.  When SML97 came out, Isabelle did not adopt its
  7.1346 +  somewhat anachronistic 8-bit or 16-bit characters, but the idea of
  7.1347 +  exploding a string into a list of small strings was extended to
  7.1348 +  ``symbols'' as explained above.  Thus Isabelle sources can refer to
  7.1349 +  an infinite store of user-defined symbols, without having to worry
  7.1350 +  about the multitude of Unicode encodings that have emerged over the
  7.1351 +  years.  *}
  7.1352 +
  7.1353 +
  7.1354 +section {* Basic data types *}
  7.1355 +
  7.1356 +text {* The basis library proposal of SML97 needs to be treated with
  7.1357 +  caution.  Many of its operations simply do not fit with important
  7.1358 +  Isabelle/ML conventions (like ``canonical argument order'', see
  7.1359 +  \secref{sec:canonical-argument-order}), others cause problems with
  7.1360 +  the parallel evaluation model of Isabelle/ML (such as @{ML
  7.1361 +  TextIO.print} or @{ML OS.Process.system}).
  7.1362 +
  7.1363 +  Subsequently we give a brief overview of important operations on
  7.1364 +  basic ML data types.
  7.1365 +*}
  7.1366 +
  7.1367 +
  7.1368 +subsection {* Characters *}
  7.1369 +
  7.1370 +text %mlref {*
  7.1371 +  \begin{mldecls}
  7.1372 +  @{index_ML_type char} \\
  7.1373 +  \end{mldecls}
  7.1374 +
  7.1375 +  \begin{description}
  7.1376 +
  7.1377 +  \item Type @{ML_type char} is \emph{not} used.  The smallest textual
  7.1378 +  unit in Isabelle is represented as a ``symbol'' (see
  7.1379 +  \secref{sec:symbols}).
  7.1380 +
  7.1381 +  \end{description}
  7.1382 +*}
  7.1383 +
  7.1384 +
  7.1385 +subsection {* Strings *}
  7.1386 +
  7.1387 +text %mlref {*
  7.1388 +  \begin{mldecls}
  7.1389 +  @{index_ML_type string} \\
  7.1390 +  \end{mldecls}
  7.1391 +
  7.1392 +  \begin{description}
  7.1393 +
  7.1394 +  \item Type @{ML_type string} represents immutable vectors of 8-bit
  7.1395 +  characters.  There are operations in SML to convert back and forth
  7.1396 +  to actual byte vectors, which are seldom used.
  7.1397 +
  7.1398 +  This historically important raw text representation is used for
  7.1399 +  Isabelle-specific purposes with the following implicit substructures
  7.1400 +  packed into the string content:
  7.1401 +
  7.1402 +  \begin{enumerate}
  7.1403 +
  7.1404 +  \item sequence of Isabelle symbols (see also \secref{sec:symbols}),
  7.1405 +  with @{ML Symbol.explode} as key operation;
  7.1406 +
  7.1407 +  \item XML tree structure via YXML (see also \cite{isabelle-sys}),
  7.1408 +  with @{ML YXML.parse_body} as key operation.
  7.1409 +
  7.1410 +  \end{enumerate}
  7.1411 +
  7.1412 +  Note that Isabelle/ML string literals may refer Isabelle symbols
  7.1413 +  like ``\verb,\,\verb,<alpha>,'' natively, \emph{without} escaping
  7.1414 +  the backslash.  This is a consequence of Isabelle treating all
  7.1415 +  source text as strings of symbols, instead of raw characters.
  7.1416 +
  7.1417 +  \end{description}
  7.1418 +*}
  7.1419 +
  7.1420 +text %mlex {* The subsequent example illustrates the difference of
  7.1421 +  physical addressing of bytes versus logical addressing of symbols in
  7.1422 +  Isabelle strings.
  7.1423 +*}
  7.1424 +
  7.1425 +ML_val {*
  7.1426 +  val s = "\<A>";
  7.1427 +
  7.1428 +  @{assert} (length (Symbol.explode s) = 1);
  7.1429 +  @{assert} (size s = 4);
  7.1430 +*}
  7.1431 +
  7.1432 +text {* Note that in Unicode renderings of the symbol @{text "\<A>"},
  7.1433 +  variations of encodings like UTF-8 or UTF-16 pose delicate questions
  7.1434 +  about the multi-byte representations its codepoint, which is outside
  7.1435 +  of the 16-bit address space of the original Unicode standard from
  7.1436 +  the 1990-ies.  In Isabelle/ML it is just ``\verb,\,\verb,<A>,''
  7.1437 +  literally, using plain ASCII characters beyond any doubts. *}
  7.1438 +
  7.1439 +
  7.1440 +subsection {* Integers *}
  7.1441 +
  7.1442 +text %mlref {*
  7.1443 +  \begin{mldecls}
  7.1444 +  @{index_ML_type int} \\
  7.1445 +  \end{mldecls}
  7.1446 +
  7.1447 +  \begin{description}
  7.1448 +
  7.1449 +  \item Type @{ML_type int} represents regular mathematical integers,
  7.1450 +  which are \emph{unbounded}.  Overflow never happens in
  7.1451 +  practice.\footnote{The size limit for integer bit patterns in memory
  7.1452 +  is 64\,MB for 32-bit Poly/ML, and much higher for 64-bit systems.}
  7.1453 +  This works uniformly for all supported ML platforms (Poly/ML and
  7.1454 +  SML/NJ).
  7.1455 +
  7.1456 +  Literal integers in ML text are forced to be of this one true
  7.1457 +  integer type --- adhoc overloading of SML97 is disabled.
  7.1458 +
  7.1459 +  Structure @{ML_structure IntInf} of SML97 is obsolete and superseded by
  7.1460 +  @{ML_structure Int}.  Structure @{ML_structure Integer} in @{file
  7.1461 +  "~~/src/Pure/General/integer.ML"} provides some additional
  7.1462 +  operations.
  7.1463 +
  7.1464 +  \end{description}
  7.1465 +*}
  7.1466 +
  7.1467 +
  7.1468 +subsection {* Time *}
  7.1469 +
  7.1470 +text %mlref {*
  7.1471 +  \begin{mldecls}
  7.1472 +  @{index_ML_type Time.time} \\
  7.1473 +  @{index_ML seconds: "real -> Time.time"} \\
  7.1474 +  \end{mldecls}
  7.1475 +
  7.1476 +  \begin{description}
  7.1477 +
  7.1478 +  \item Type @{ML_type Time.time} represents time abstractly according
  7.1479 +  to the SML97 basis library definition.  This is adequate for
  7.1480 +  internal ML operations, but awkward in concrete time specifications.
  7.1481 +
  7.1482 +  \item @{ML seconds}~@{text "s"} turns the concrete scalar @{text
  7.1483 +  "s"} (measured in seconds) into an abstract time value.  Floating
  7.1484 +  point numbers are easy to use as configuration options in the
  7.1485 +  context (see \secref{sec:config-options}) or system preferences that
  7.1486 +  are maintained externally.
  7.1487 +
  7.1488 +  \end{description}
  7.1489 +*}
  7.1490 +
  7.1491 +
  7.1492 +subsection {* Options *}
  7.1493 +
  7.1494 +text %mlref {*
  7.1495 +  \begin{mldecls}
  7.1496 +  @{index_ML Option.map: "('a -> 'b) -> 'a option -> 'b option"} \\
  7.1497 +  @{index_ML is_some: "'a option -> bool"} \\
  7.1498 +  @{index_ML is_none: "'a option -> bool"} \\
  7.1499 +  @{index_ML the: "'a option -> 'a"} \\
  7.1500 +  @{index_ML these: "'a list option -> 'a list"} \\
  7.1501 +  @{index_ML the_list: "'a option -> 'a list"} \\
  7.1502 +  @{index_ML the_default: "'a -> 'a option -> 'a"} \\
  7.1503 +  \end{mldecls}
  7.1504 +*}
  7.1505 +
  7.1506 +text {* Apart from @{ML Option.map} most other operations defined in
  7.1507 +  structure @{ML_structure Option} are alien to Isabelle/ML an never
  7.1508 +  used.  The operations shown above are defined in @{file
  7.1509 +  "~~/src/Pure/General/basics.ML"}.  *}
  7.1510 +
  7.1511 +
  7.1512 +subsection {* Lists *}
  7.1513 +
  7.1514 +text {* Lists are ubiquitous in ML as simple and light-weight
  7.1515 +  ``collections'' for many everyday programming tasks.  Isabelle/ML
  7.1516 +  provides important additions and improvements over operations that
  7.1517 +  are predefined in the SML97 library. *}
  7.1518 +
  7.1519 +text %mlref {*
  7.1520 +  \begin{mldecls}
  7.1521 +  @{index_ML cons: "'a -> 'a list -> 'a list"} \\
  7.1522 +  @{index_ML member: "('b * 'a -> bool) -> 'a list -> 'b -> bool"} \\
  7.1523 +  @{index_ML insert: "('a * 'a -> bool) -> 'a -> 'a list -> 'a list"} \\
  7.1524 +  @{index_ML remove: "('b * 'a -> bool) -> 'b -> 'a list -> 'a list"} \\
  7.1525 +  @{index_ML update: "('a * 'a -> bool) -> 'a -> 'a list -> 'a list"} \\
  7.1526 +  \end{mldecls}
  7.1527 +
  7.1528 +  \begin{description}
  7.1529 +
  7.1530 +  \item @{ML cons}~@{text "x xs"} evaluates to @{text "x :: xs"}.
  7.1531 +
  7.1532 +  Tupled infix operators are a historical accident in Standard ML.
  7.1533 +  The curried @{ML cons} amends this, but it should be only used when
  7.1534 +  partial application is required.
  7.1535 +
  7.1536 +  \item @{ML member}, @{ML insert}, @{ML remove}, @{ML update} treat
  7.1537 +  lists as a set-like container that maintains the order of elements.
  7.1538 +  See @{file "~~/src/Pure/library.ML"} for the full specifications
  7.1539 +  (written in ML).  There are some further derived operations like
  7.1540 +  @{ML union} or @{ML inter}.
  7.1541 +
  7.1542 +  Note that @{ML insert} is conservative about elements that are
  7.1543 +  already a @{ML member} of the list, while @{ML update} ensures that
  7.1544 +  the latest entry is always put in front.  The latter discipline is
  7.1545 +  often more appropriate in declarations of context data
  7.1546 +  (\secref{sec:context-data}) that are issued by the user in Isar
  7.1547 +  source: later declarations take precedence over earlier ones.
  7.1548 +
  7.1549 +  \end{description}
  7.1550 +*}
  7.1551 +
  7.1552 +text %mlex {* Using canonical @{ML fold} together with @{ML cons} (or
  7.1553 +  similar standard operations) alternates the orientation of data.
  7.1554 +  The is quite natural and should not be altered forcible by inserting
  7.1555 +  extra applications of @{ML rev}.  The alternative @{ML fold_rev} can
  7.1556 +  be used in the few situations, where alternation should be
  7.1557 +  prevented.
  7.1558 +*}
  7.1559 +
  7.1560 +ML {*
  7.1561 +  val items = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
  7.1562 +
  7.1563 +  val list1 = fold cons items [];
  7.1564 +  @{assert} (list1 = rev items);
  7.1565 +
  7.1566 +  val list2 = fold_rev cons items [];
  7.1567 +  @{assert} (list2 = items);
  7.1568 +*}
  7.1569 +
  7.1570 +text {* The subsequent example demonstrates how to \emph{merge} two
  7.1571 +  lists in a natural way. *}
  7.1572 +
  7.1573 +ML {*
  7.1574 +  fun merge_lists eq (xs, ys) = fold_rev (insert eq) ys xs;
  7.1575 +*}
  7.1576 +
  7.1577 +text {* Here the first list is treated conservatively: only the new
  7.1578 +  elements from the second list are inserted.  The inside-out order of
  7.1579 +  insertion via @{ML fold_rev} attempts to preserve the order of
  7.1580 +  elements in the result.
  7.1581 +
  7.1582 +  This way of merging lists is typical for context data
  7.1583 +  (\secref{sec:context-data}).  See also @{ML merge} as defined in
  7.1584 +  @{file "~~/src/Pure/library.ML"}.
  7.1585 +*}
  7.1586 +
  7.1587 +
  7.1588 +subsection {* Association lists *}
  7.1589 +
  7.1590 +text {* The operations for association lists interpret a concrete list
  7.1591 +  of pairs as a finite function from keys to values.  Redundant
  7.1592 +  representations with multiple occurrences of the same key are
  7.1593 +  implicitly normalized: lookup and update only take the first
  7.1594 +  occurrence into account.
  7.1595 +*}
  7.1596 +
  7.1597 +text {*
  7.1598 +  \begin{mldecls}
  7.1599 +  @{index_ML AList.lookup: "('a * 'b -> bool) -> ('b * 'c) list -> 'a -> 'c option"} \\
  7.1600 +  @{index_ML AList.defined: "('a * 'b -> bool) -> ('b * 'c) list -> 'a -> bool"} \\
  7.1601 +  @{index_ML AList.update: "('a * 'a -> bool) -> 'a * 'b -> ('a * 'b) list -> ('a * 'b) list"} \\
  7.1602 +  \end{mldecls}
  7.1603 +
  7.1604 +  \begin{description}
  7.1605 +
  7.1606 +  \item @{ML AList.lookup}, @{ML AList.defined}, @{ML AList.update}
  7.1607 +  implement the main ``framework operations'' for mappings in
  7.1608 +  Isabelle/ML, following standard conventions for their names and
  7.1609 +  types.
  7.1610 +
  7.1611 +  Note that a function called @{text lookup} is obliged to express its
  7.1612 +  partiality via an explicit option element.  There is no choice to
  7.1613 +  raise an exception, without changing the name to something like
  7.1614 +  @{text "the_element"} or @{text "get"}.
  7.1615 +
  7.1616 +  The @{text "defined"} operation is essentially a contraction of @{ML
  7.1617 +  is_some} and @{text "lookup"}, but this is sufficiently frequent to
  7.1618 +  justify its independent existence.  This also gives the
  7.1619 +  implementation some opportunity for peep-hole optimization.
  7.1620 +
  7.1621 +  \end{description}
  7.1622 +
  7.1623 +  Association lists are adequate as simple and light-weight
  7.1624 +  implementation of finite mappings in many practical situations.  A
  7.1625 +  more heavy-duty table structure is defined in @{file
  7.1626 +  "~~/src/Pure/General/table.ML"}; that version scales easily to
  7.1627 +  thousands or millions of elements.
  7.1628 +*}
  7.1629 +
  7.1630 +
  7.1631 +subsection {* Unsynchronized references *}
  7.1632 +
  7.1633 +text %mlref {*
  7.1634 +  \begin{mldecls}
  7.1635 +  @{index_ML_type "'a Unsynchronized.ref"} \\
  7.1636 +  @{index_ML Unsynchronized.ref: "'a -> 'a Unsynchronized.ref"} \\
  7.1637 +  @{index_ML "!": "'a Unsynchronized.ref -> 'a"} \\
  7.1638 +  @{index_ML_op ":=": "'a Unsynchronized.ref * 'a -> unit"} \\
  7.1639 +  \end{mldecls}
  7.1640 +*}
  7.1641 +
  7.1642 +text {* Due to ubiquitous parallelism in Isabelle/ML (see also
  7.1643 +  \secref{sec:multi-threading}), the mutable reference cells of
  7.1644 +  Standard ML are notorious for causing problems.  In a highly
  7.1645 +  parallel system, both correctness \emph{and} performance are easily
  7.1646 +  degraded when using mutable data.
  7.1647 +
  7.1648 +  The unwieldy name of @{ML Unsynchronized.ref} for the constructor
  7.1649 +  for references in Isabelle/ML emphasizes the inconveniences caused by
  7.1650 +  mutability.  Existing operations @{ML "!"}  and @{ML_op ":="} are
  7.1651 +  unchanged, but should be used with special precautions, say in a
  7.1652 +  strictly local situation that is guaranteed to be restricted to
  7.1653 +  sequential evaluation --- now and in the future.
  7.1654 +
  7.1655 +  \begin{warn}
  7.1656 +  Never @{ML_text "open Unsynchronized"}, not even in a local scope!
  7.1657 +  Pretending that mutable state is no problem is a very bad idea.
  7.1658 +  \end{warn}
  7.1659 +*}
  7.1660 +
  7.1661 +
  7.1662 +section {* Thread-safe programming \label{sec:multi-threading} *}
  7.1663 +
  7.1664 +text {* Multi-threaded execution has become an everyday reality in
  7.1665 +  Isabelle since Poly/ML 5.2.1 and Isabelle2008.  Isabelle/ML provides
  7.1666 +  implicit and explicit parallelism by default, and there is no way
  7.1667 +  for user-space tools to ``opt out''.  ML programs that are purely
  7.1668 +  functional, output messages only via the official channels
  7.1669 +  (\secref{sec:message-channels}), and do not intercept interrupts
  7.1670 +  (\secref{sec:exceptions}) can participate in the multi-threaded
  7.1671 +  environment immediately without further ado.
  7.1672 +
  7.1673 +  More ambitious tools with more fine-grained interaction with the
  7.1674 +  environment need to observe the principles explained below.
  7.1675 +*}
  7.1676 +
  7.1677 +
  7.1678 +subsection {* Multi-threading with shared memory *}
  7.1679 +
  7.1680 +text {* Multiple threads help to organize advanced operations of the
  7.1681 +  system, such as real-time conditions on command transactions,
  7.1682 +  sub-components with explicit communication, general asynchronous
  7.1683 +  interaction etc.  Moreover, parallel evaluation is a prerequisite to
  7.1684 +  make adequate use of the CPU resources that are available on
  7.1685 +  multi-core systems.\footnote{Multi-core computing does not mean that
  7.1686 +  there are ``spare cycles'' to be wasted.  It means that the
  7.1687 +  continued exponential speedup of CPU performance due to ``Moore's
  7.1688 +  Law'' follows different rules: clock frequency has reached its peak
  7.1689 +  around 2005, and applications need to be parallelized in order to
  7.1690 +  avoid a perceived loss of performance.  See also
  7.1691 +  \cite{Sutter:2005}.}
  7.1692 +
  7.1693 +  Isabelle/Isar exploits the inherent structure of theories and proofs
  7.1694 +  to support \emph{implicit parallelism} to a large extent.  LCF-style
  7.1695 +  theorem provides almost ideal conditions for that, see also
  7.1696 +  \cite{Wenzel:2009}.  This means, significant parts of theory and
  7.1697 +  proof checking is parallelized by default.  In Isabelle2013, a
  7.1698 +  maximum speedup-factor of 3.5 on 4 cores and 6.5 on 8 cores can be
  7.1699 +  expected.
  7.1700 +
  7.1701 +  \medskip ML threads lack the memory protection of separate
  7.1702 +  processes, and operate concurrently on shared heap memory.  This has
  7.1703 +  the advantage that results of independent computations are directly
  7.1704 +  available to other threads: abstract values can be passed without
  7.1705 +  copying or awkward serialization that is typically required for
  7.1706 +  separate processes.
  7.1707 +
  7.1708 +  To make shared-memory multi-threading work robustly and efficiently,
  7.1709 +  some programming guidelines need to be observed.  While the ML
  7.1710 +  system is responsible to maintain basic integrity of the
  7.1711 +  representation of ML values in memory, the application programmer
  7.1712 +  needs to ensure that multi-threaded execution does not break the
  7.1713 +  intended semantics.
  7.1714 +
  7.1715 +  \begin{warn}
  7.1716 +  To participate in implicit parallelism, tools need to be
  7.1717 +  thread-safe.  A single ill-behaved tool can affect the stability and
  7.1718 +  performance of the whole system.
  7.1719 +  \end{warn}
  7.1720 +
  7.1721 +  Apart from observing the principles of thread-safeness passively,
  7.1722 +  advanced tools may also exploit parallelism actively, e.g.\ by using
  7.1723 +  ``future values'' (\secref{sec:futures}) or the more basic library
  7.1724 +  functions for parallel list operations (\secref{sec:parlist}).
  7.1725 +
  7.1726 +  \begin{warn}
  7.1727 +  Parallel computing resources are managed centrally by the
  7.1728 +  Isabelle/ML infrastructure.  User programs must not fork their own
  7.1729 +  ML threads to perform computations.
  7.1730 +  \end{warn}
  7.1731 +*}
  7.1732 +
  7.1733 +
  7.1734 +subsection {* Critical shared resources *}
  7.1735 +
  7.1736 +text {* Thread-safeness is mainly concerned about concurrent
  7.1737 +  read/write access to shared resources, which are outside the purely
  7.1738 +  functional world of ML.  This covers the following in particular.
  7.1739 +
  7.1740 +  \begin{itemize}
  7.1741 +
  7.1742 +  \item Global references (or arrays), i.e.\ mutable memory cells that
  7.1743 +  persist over several invocations of associated
  7.1744 +  operations.\footnote{This is independent of the visibility of such
  7.1745 +  mutable values in the toplevel scope.}
  7.1746 +
  7.1747 +  \item Global state of the running Isabelle/ML process, i.e.\ raw I/O
  7.1748 +  channels, environment variables, current working directory.
  7.1749 +
  7.1750 +  \item Writable resources in the file-system that are shared among
  7.1751 +  different threads or external processes.
  7.1752 +
  7.1753 +  \end{itemize}
  7.1754 +
  7.1755 +  Isabelle/ML provides various mechanisms to avoid critical shared
  7.1756 +  resources in most situations.  As last resort there are some
  7.1757 +  mechanisms for explicit synchronization.  The following guidelines
  7.1758 +  help to make Isabelle/ML programs work smoothly in a concurrent
  7.1759 +  environment.
  7.1760 +
  7.1761 +  \begin{itemize}
  7.1762 +
  7.1763 +  \item Avoid global references altogether.  Isabelle/Isar maintains a
  7.1764 +  uniform context that incorporates arbitrary data declared by user
  7.1765 +  programs (\secref{sec:context-data}).  This context is passed as
  7.1766 +  plain value and user tools can get/map their own data in a purely
  7.1767 +  functional manner.  Configuration options within the context
  7.1768 +  (\secref{sec:config-options}) provide simple drop-in replacements
  7.1769 +  for historic reference variables.
  7.1770 +
  7.1771 +  \item Keep components with local state information re-entrant.
  7.1772 +  Instead of poking initial values into (private) global references, a
  7.1773 +  new state record can be created on each invocation, and passed
  7.1774 +  through any auxiliary functions of the component.  The state record
  7.1775 +  may well contain mutable references, without requiring any special
  7.1776 +  synchronizations, as long as each invocation gets its own copy.
  7.1777 +
  7.1778 +  \item Avoid raw output on @{text "stdout"} or @{text "stderr"}.  The
  7.1779 +  Poly/ML library is thread-safe for each individual output operation,
  7.1780 +  but the ordering of parallel invocations is arbitrary.  This means
  7.1781 +  raw output will appear on some system console with unpredictable
  7.1782 +  interleaving of atomic chunks.
  7.1783 +
  7.1784 +  Note that this does not affect regular message output channels
  7.1785 +  (\secref{sec:message-channels}).  An official message is associated
  7.1786 +  with the command transaction from where it originates, independently
  7.1787 +  of other transactions.  This means each running Isar command has
  7.1788 +  effectively its own set of message channels, and interleaving can
  7.1789 +  only happen when commands use parallelism internally (and only at
  7.1790 +  message boundaries).
  7.1791 +
  7.1792 +  \item Treat environment variables and the current working directory
  7.1793 +  of the running process as strictly read-only.
  7.1794 +
  7.1795 +  \item Restrict writing to the file-system to unique temporary files.
  7.1796 +  Isabelle already provides a temporary directory that is unique for
  7.1797 +  the running process, and there is a centralized source of unique
  7.1798 +  serial numbers in Isabelle/ML.  Thus temporary files that are passed
  7.1799 +  to to some external process will be always disjoint, and thus
  7.1800 +  thread-safe.
  7.1801 +
  7.1802 +  \end{itemize}
  7.1803 +*}
  7.1804 +
  7.1805 +text %mlref {*
  7.1806 +  \begin{mldecls}
  7.1807 +  @{index_ML File.tmp_path: "Path.T -> Path.T"} \\
  7.1808 +  @{index_ML serial_string: "unit -> string"} \\
  7.1809 +  \end{mldecls}
  7.1810 +
  7.1811 +  \begin{description}
  7.1812 +
  7.1813 +  \item @{ML File.tmp_path}~@{text "path"} relocates the base
  7.1814 +  component of @{text "path"} into the unique temporary directory of
  7.1815 +  the running Isabelle/ML process.
  7.1816 +
  7.1817 +  \item @{ML serial_string}~@{text "()"} creates a new serial number
  7.1818 +  that is unique over the runtime of the Isabelle/ML process.
  7.1819 +
  7.1820 +  \end{description}
  7.1821 +*}
  7.1822 +
  7.1823 +text %mlex {* The following example shows how to create unique
  7.1824 +  temporary file names.
  7.1825 +*}
  7.1826 +
  7.1827 +ML {*
  7.1828 +  val tmp1 = File.tmp_path (Path.basic ("foo" ^ serial_string ()));
  7.1829 +  val tmp2 = File.tmp_path (Path.basic ("foo" ^ serial_string ()));
  7.1830 +  @{assert} (tmp1 <> tmp2);
  7.1831 +*}
  7.1832 +
  7.1833 +
  7.1834 +subsection {* Explicit synchronization *}
  7.1835 +
  7.1836 +text {* Isabelle/ML also provides some explicit synchronization
  7.1837 +  mechanisms, for the rare situations where mutable shared resources
  7.1838 +  are really required.  These are based on the synchronizations
  7.1839 +  primitives of Poly/ML, which have been adapted to the specific
  7.1840 +  assumptions of the concurrent Isabelle/ML environment.  User code
  7.1841 +  must not use the Poly/ML primitives directly!
  7.1842 +
  7.1843 +  \medskip The most basic synchronization concept is a single
  7.1844 +  \emph{critical section} (also called ``monitor'' in the literature).
  7.1845 +  A thread that enters the critical section prevents all other threads
  7.1846 +  from doing the same.  A thread that is already within the critical
  7.1847 +  section may re-enter it in an idempotent manner.
  7.1848 +
  7.1849 +  Such centralized locking is convenient, because it prevents
  7.1850 +  deadlocks by construction.
  7.1851 +
  7.1852 +  \medskip More fine-grained locking works via \emph{synchronized
  7.1853 +  variables}.  An explicit state component is associated with
  7.1854 +  mechanisms for locking and signaling.  There are operations to
  7.1855 +  await a condition, change the state, and signal the change to all
  7.1856 +  other waiting threads.
  7.1857 +
  7.1858 +  Here the synchronized access to the state variable is \emph{not}
  7.1859 +  re-entrant: direct or indirect nesting within the same thread will
  7.1860 +  cause a deadlock!
  7.1861 +*}
  7.1862 +
  7.1863 +text %mlref {*
  7.1864 +  \begin{mldecls}
  7.1865 +  @{index_ML NAMED_CRITICAL: "string -> (unit -> 'a) -> 'a"} \\
  7.1866 +  @{index_ML CRITICAL: "(unit -> 'a) -> 'a"} \\
  7.1867 +  \end{mldecls}
  7.1868 +  \begin{mldecls}
  7.1869 +  @{index_ML_type "'a Synchronized.var"} \\
  7.1870 +  @{index_ML Synchronized.var: "string -> 'a -> 'a Synchronized.var"} \\
  7.1871 +  @{index_ML Synchronized.guarded_access: "'a Synchronized.var ->
  7.1872 +  ('a -> ('b * 'a) option) -> 'b"} \\
  7.1873 +  \end{mldecls}
  7.1874 +
  7.1875 +  \begin{description}
  7.1876 +
  7.1877 +  \item @{ML NAMED_CRITICAL}~@{text "name e"} evaluates @{text "e ()"}
  7.1878 +  within the central critical section of Isabelle/ML.  No other thread
  7.1879 +  may do so at the same time, but non-critical parallel execution will
  7.1880 +  continue.  The @{text "name"} argument is used for tracing and might
  7.1881 +  help to spot sources of congestion.
  7.1882 +
  7.1883 +  Entering the critical section without contention is very fast.  Each
  7.1884 +  thread should stay within the critical section only very briefly,
  7.1885 +  otherwise parallel performance may degrade.
  7.1886 +
  7.1887 +  \item @{ML CRITICAL} is the same as @{ML NAMED_CRITICAL} with empty
  7.1888 +  name argument.
  7.1889 +
  7.1890 +  \item Type @{ML_type "'a Synchronized.var"} represents synchronized
  7.1891 +  variables with state of type @{ML_type 'a}.
  7.1892 +
  7.1893 +  \item @{ML Synchronized.var}~@{text "name x"} creates a synchronized
  7.1894 +  variable that is initialized with value @{text "x"}.  The @{text
  7.1895 +  "name"} is used for tracing.
  7.1896 +
  7.1897 +  \item @{ML Synchronized.guarded_access}~@{text "var f"} lets the
  7.1898 +  function @{text "f"} operate within a critical section on the state
  7.1899 +  @{text "x"} as follows: if @{text "f x"} produces @{ML NONE}, it
  7.1900 +  continues to wait on the internal condition variable, expecting that
  7.1901 +  some other thread will eventually change the content in a suitable
  7.1902 +  manner; if @{text "f x"} produces @{ML SOME}~@{text "(y, x')"} it is
  7.1903 +  satisfied and assigns the new state value @{text "x'"}, broadcasts a
  7.1904 +  signal to all waiting threads on the associated condition variable,
  7.1905 +  and returns the result @{text "y"}.
  7.1906 +
  7.1907 +  \end{description}
  7.1908 +
  7.1909 +  There are some further variants of the @{ML
  7.1910 +  Synchronized.guarded_access} combinator, see @{file
  7.1911 +  "~~/src/Pure/Concurrent/synchronized.ML"} for details.
  7.1912 +*}
  7.1913 +
  7.1914 +text %mlex {* The following example implements a counter that produces
  7.1915 +  positive integers that are unique over the runtime of the Isabelle
  7.1916 +  process:
  7.1917 +*}
  7.1918 +
  7.1919 +ML {*
  7.1920 +  local
  7.1921 +    val counter = Synchronized.var "counter" 0;
  7.1922 +  in
  7.1923 +    fun next () =
  7.1924 +      Synchronized.guarded_access counter
  7.1925 +        (fn i =>
  7.1926 +          let val j = i + 1
  7.1927 +          in SOME (j, j) end);
  7.1928 +  end;
  7.1929 +*}
  7.1930 +
  7.1931 +ML {*
  7.1932 +  val a = next ();
  7.1933 +  val b = next ();
  7.1934 +  @{assert} (a <> b);
  7.1935 +*}
  7.1936 +
  7.1937 +text {* \medskip See @{file "~~/src/Pure/Concurrent/mailbox.ML"} how
  7.1938 +  to implement a mailbox as synchronized variable over a purely
  7.1939 +  functional queue. *}
  7.1940 +
  7.1941 +
  7.1942 +section {* Managed evaluation *}
  7.1943 +
  7.1944 +text {* Execution of Standard ML follows the model of strict
  7.1945 +  functional evaluation with optional exceptions.  Evaluation happens
  7.1946 +  whenever some function is applied to (sufficiently many)
  7.1947 +  arguments. The result is either an explicit value or an implicit
  7.1948 +  exception.
  7.1949 +
  7.1950 +  \emph{Managed evaluation} in Isabelle/ML organizes expressions and
  7.1951 +  results to control certain physical side-conditions, to say more
  7.1952 +  specifically when and how evaluation happens.  For example, the
  7.1953 +  Isabelle/ML library supports lazy evaluation with memoing, parallel
  7.1954 +  evaluation via futures, asynchronous evaluation via promises,
  7.1955 +  evaluation with time limit etc.
  7.1956 +
  7.1957 +  \medskip An \emph{unevaluated expression} is represented either as
  7.1958 +  unit abstraction @{verbatim "fn () => a"} of type
  7.1959 +  @{verbatim "unit -> 'a"} or as regular function
  7.1960 +  @{verbatim "fn a => b"} of type @{verbatim "'a -> 'b"}.  Both forms
  7.1961 +  occur routinely, and special care is required to tell them apart ---
  7.1962 +  the static type-system of SML is only of limited help here.
  7.1963 +
  7.1964 +  The first form is more intuitive: some combinator @{text "(unit ->
  7.1965 +  'a) -> 'a"} applies the given function to @{text "()"} to initiate
  7.1966 +  the postponed evaluation process.  The second form is more flexible:
  7.1967 +  some combinator @{text "('a -> 'b) -> 'a -> 'b"} acts like a
  7.1968 +  modified form of function application; several such combinators may
  7.1969 +  be cascaded to modify a given function, before it is ultimately
  7.1970 +  applied to some argument.
  7.1971 +
  7.1972 +  \medskip \emph{Reified results} make the disjoint sum of regular
  7.1973 +  values versions exceptional situations explicit as ML datatype:
  7.1974 +  @{text "'a result = Res of 'a | Exn of exn"}.  This is typically
  7.1975 +  used for administrative purposes, to store the overall outcome of an
  7.1976 +  evaluation process.
  7.1977 +
  7.1978 +  \emph{Parallel exceptions} aggregate reified results, such that
  7.1979 +  multiple exceptions are digested as a collection in canonical form
  7.1980 +  that identifies exceptions according to their original occurrence.
  7.1981 +  This is particular important for parallel evaluation via futures
  7.1982 +  \secref{sec:futures}, which are organized as acyclic graph of
  7.1983 +  evaluations that depend on other evaluations: exceptions stemming
  7.1984 +  from shared sub-graphs are exposed exactly once and in the order of
  7.1985 +  their original occurrence (e.g.\ when printed at the toplevel).
  7.1986 +  Interrupt counts as neutral element here: it is treated as minimal
  7.1987 +  information about some canceled evaluation process, and is absorbed
  7.1988 +  by the presence of regular program exceptions.  *}
  7.1989 +
  7.1990 +text %mlref {*
  7.1991 +  \begin{mldecls}
  7.1992 +  @{index_ML_type "'a Exn.result"} \\
  7.1993 +  @{index_ML Exn.capture: "('a -> 'b) -> 'a -> 'b Exn.result"} \\
  7.1994 +  @{index_ML Exn.interruptible_capture: "('a -> 'b) -> 'a -> 'b Exn.result"} \\
  7.1995 +  @{index_ML Exn.release: "'a Exn.result -> 'a"} \\
  7.1996 +  @{index_ML Par_Exn.release_all: "'a Exn.result list -> 'a list"} \\
  7.1997 +  @{index_ML Par_Exn.release_first: "'a Exn.result list -> 'a list"} \\
  7.1998 +  \end{mldecls}
  7.1999 +
  7.2000 +  \begin{description}
  7.2001 +
  7.2002 +  \item Type @{ML_type "'a Exn.result"} represents the disjoint sum of
  7.2003 +  ML results explicitly, with constructor @{ML Exn.Res} for regular
  7.2004 +  values and @{ML "Exn.Exn"} for exceptions.
  7.2005 +
  7.2006 +  \item @{ML Exn.capture}~@{text "f x"} manages the evaluation of
  7.2007 +  @{text "f x"} such that exceptions are made explicit as @{ML
  7.2008 +  "Exn.Exn"}.  Note that this includes physical interrupts (see also
  7.2009 +  \secref{sec:exceptions}), so the same precautions apply to user
  7.2010 +  code: interrupts must not be absorbed accidentally!
  7.2011 +
  7.2012 +  \item @{ML Exn.interruptible_capture} is similar to @{ML
  7.2013 +  Exn.capture}, but interrupts are immediately re-raised as required
  7.2014 +  for user code.
  7.2015 +
  7.2016 +  \item @{ML Exn.release}~@{text "result"} releases the original
  7.2017 +  runtime result, exposing its regular value or raising the reified
  7.2018 +  exception.
  7.2019 +
  7.2020 +  \item @{ML Par_Exn.release_all}~@{text "results"} combines results
  7.2021 +  that were produced independently (e.g.\ by parallel evaluation).  If
  7.2022 +  all results are regular values, that list is returned.  Otherwise,
  7.2023 +  the collection of all exceptions is raised, wrapped-up as collective
  7.2024 +  parallel exception.  Note that the latter prevents access to
  7.2025 +  individual exceptions by conventional @{verbatim "handle"} of SML.
  7.2026 +
  7.2027 +  \item @{ML Par_Exn.release_first} is similar to @{ML
  7.2028 +  Par_Exn.release_all}, but only the first exception that has occurred
  7.2029 +  in the original evaluation process is raised again, the others are
  7.2030 +  ignored.  That single exception may get handled by conventional
  7.2031 +  means in SML.
  7.2032 +
  7.2033 +  \end{description}
  7.2034 +*}
  7.2035 +
  7.2036 +
  7.2037 +subsection {* Parallel skeletons \label{sec:parlist} *}
  7.2038 +
  7.2039 +text {*
  7.2040 +  Algorithmic skeletons are combinators that operate on lists in
  7.2041 +  parallel, in the manner of well-known @{text map}, @{text exists},
  7.2042 +  @{text forall} etc.  Management of futures (\secref{sec:futures})
  7.2043 +  and their results as reified exceptions is wrapped up into simple
  7.2044 +  programming interfaces that resemble the sequential versions.
  7.2045 +
  7.2046 +  What remains is the application-specific problem to present
  7.2047 +  expressions with suitable \emph{granularity}: each list element
  7.2048 +  corresponds to one evaluation task.  If the granularity is too
  7.2049 +  coarse, the available CPUs are not saturated.  If it is too
  7.2050 +  fine-grained, CPU cycles are wasted due to the overhead of
  7.2051 +  organizing parallel processing.  In the worst case, parallel
  7.2052 +  performance will be less than the sequential counterpart!
  7.2053 +*}
  7.2054 +
  7.2055 +text %mlref {*
  7.2056 +  \begin{mldecls}
  7.2057 +  @{index_ML Par_List.map: "('a -> 'b) -> 'a list -> 'b list"} \\
  7.2058 +  @{index_ML Par_List.get_some: "('a -> 'b option) -> 'a list -> 'b option"} \\
  7.2059 +  \end{mldecls}
  7.2060 +
  7.2061 +  \begin{description}
  7.2062 +
  7.2063 +  \item @{ML Par_List.map}~@{text "f [x\<^sub>1, \<dots>, x\<^sub>n]"} is like @{ML
  7.2064 +  "map"}~@{text "f [x\<^sub>1, \<dots>, x\<^sub>n]"}, but the evaluation of @{text "f x\<^sub>i"}
  7.2065 +  for @{text "i = 1, \<dots>, n"} is performed in parallel.
  7.2066 +
  7.2067 +  An exception in any @{text "f x\<^sub>i"} cancels the overall evaluation
  7.2068 +  process.  The final result is produced via @{ML
  7.2069 +  Par_Exn.release_first} as explained above, which means the first
  7.2070 +  program exception that happened to occur in the parallel evaluation
  7.2071 +  is propagated, and all other failures are ignored.
  7.2072 +
  7.2073 +  \item @{ML Par_List.get_some}~@{text "f [x\<^sub>1, \<dots>, x\<^sub>n]"} produces some
  7.2074 +  @{text "f x\<^sub>i"} that is of the form @{text "SOME y\<^sub>i"}, if that
  7.2075 +  exists, otherwise @{text "NONE"}.  Thus it is similar to @{ML
  7.2076 +  Library.get_first}, but subject to a non-deterministic parallel
  7.2077 +  choice process.  The first successful result cancels the overall
  7.2078 +  evaluation process; other exceptions are propagated as for @{ML
  7.2079 +  Par_List.map}.
  7.2080 +
  7.2081 +  This generic parallel choice combinator is the basis for derived
  7.2082 +  forms, such as @{ML Par_List.find_some}, @{ML Par_List.exists}, @{ML
  7.2083 +  Par_List.forall}.
  7.2084 +
  7.2085 +  \end{description}
  7.2086 +*}
  7.2087 +
  7.2088 +text %mlex {* Subsequently, the Ackermann function is evaluated in
  7.2089 +  parallel for some ranges of arguments. *}
  7.2090 +
  7.2091 +ML_val {*
  7.2092 +  fun ackermann 0 n = n + 1
  7.2093 +    | ackermann m 0 = ackermann (m - 1) 1
  7.2094 +    | ackermann m n = ackermann (m - 1) (ackermann m (n - 1));
  7.2095 +
  7.2096 +  Par_List.map (ackermann 2) (500 upto 1000);
  7.2097 +  Par_List.map (ackermann 3) (5 upto 10);
  7.2098 +*}
  7.2099 +
  7.2100 +
  7.2101 +subsection {* Lazy evaluation *}
  7.2102 +
  7.2103 +text {*
  7.2104 +  %FIXME
  7.2105 +
  7.2106 +  See also @{file "~~/src/Pure/Concurrent/lazy.ML"}.
  7.2107 +*}
  7.2108 +
  7.2109 +
  7.2110 +subsection {* Future values \label{sec:futures} *}
  7.2111 +
  7.2112 +text {*
  7.2113 +  %FIXME
  7.2114 +
  7.2115 +  See also @{file "~~/src/Pure/Concurrent/future.ML"}.
  7.2116 +*}
  7.2117 +
  7.2118 +
  7.2119 +end
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/src/Doc/Implementation/Prelim.thy	Sat Apr 05 11:37:00 2014 +0200
     8.3 @@ -0,0 +1,1069 @@
     8.4 +theory Prelim
     8.5 +imports Base
     8.6 +begin
     8.7 +
     8.8 +chapter {* Preliminaries *}
     8.9 +
    8.10 +section {* Contexts \label{sec:context} *}
    8.11 +
    8.12 +text {*
    8.13 +  A logical context represents the background that is required for
    8.14 +  formulating statements and composing proofs.  It acts as a medium to
    8.15 +  produce formal content, depending on earlier material (declarations,
    8.16 +  results etc.).
    8.17 +
    8.18 +  For example, derivations within the Isabelle/Pure logic can be
    8.19 +  described as a judgment @{text "\<Gamma> \<turnstile>\<^sub>\<Theta> \<phi>"}, which means that a
    8.20 +  proposition @{text "\<phi>"} is derivable from hypotheses @{text "\<Gamma>"}
    8.21 +  within the theory @{text "\<Theta>"}.  There are logical reasons for
    8.22 +  keeping @{text "\<Theta>"} and @{text "\<Gamma>"} separate: theories can be
    8.23 +  liberal about supporting type constructors and schematic
    8.24 +  polymorphism of constants and axioms, while the inner calculus of
    8.25 +  @{text "\<Gamma> \<turnstile> \<phi>"} is strictly limited to Simple Type Theory (with
    8.26 +  fixed type variables in the assumptions).
    8.27 +
    8.28 +  \medskip Contexts and derivations are linked by the following key
    8.29 +  principles:
    8.30 +
    8.31 +  \begin{itemize}
    8.32 +
    8.33 +  \item Transfer: monotonicity of derivations admits results to be
    8.34 +  transferred into a \emph{larger} context, i.e.\ @{text "\<Gamma> \<turnstile>\<^sub>\<Theta>
    8.35 +  \<phi>"} implies @{text "\<Gamma>' \<turnstile>\<^sub>\<Theta>\<^sub>' \<phi>"} for contexts @{text "\<Theta>'
    8.36 +  \<supseteq> \<Theta>"} and @{text "\<Gamma>' \<supseteq> \<Gamma>"}.
    8.37 +
    8.38 +  \item Export: discharge of hypotheses admits results to be exported
    8.39 +  into a \emph{smaller} context, i.e.\ @{text "\<Gamma>' \<turnstile>\<^sub>\<Theta> \<phi>"}
    8.40 +  implies @{text "\<Gamma> \<turnstile>\<^sub>\<Theta> \<Delta> \<Longrightarrow> \<phi>"} where @{text "\<Gamma>' \<supseteq> \<Gamma>"} and
    8.41 +  @{text "\<Delta> = \<Gamma>' - \<Gamma>"}.  Note that @{text "\<Theta>"} remains unchanged here,
    8.42 +  only the @{text "\<Gamma>"} part is affected.
    8.43 +
    8.44 +  \end{itemize}
    8.45 +
    8.46 +  \medskip By modeling the main characteristics of the primitive
    8.47 +  @{text "\<Theta>"} and @{text "\<Gamma>"} above, and abstracting over any
    8.48 +  particular logical content, we arrive at the fundamental notions of
    8.49 +  \emph{theory context} and \emph{proof context} in Isabelle/Isar.
    8.50 +  These implement a certain policy to manage arbitrary \emph{context
    8.51 +  data}.  There is a strongly-typed mechanism to declare new kinds of
    8.52 +  data at compile time.
    8.53 +
    8.54 +  The internal bootstrap process of Isabelle/Pure eventually reaches a
    8.55 +  stage where certain data slots provide the logical content of @{text
    8.56 +  "\<Theta>"} and @{text "\<Gamma>"} sketched above, but this does not stop there!
    8.57 +  Various additional data slots support all kinds of mechanisms that
    8.58 +  are not necessarily part of the core logic.
    8.59 +
    8.60 +  For example, there would be data for canonical introduction and
    8.61 +  elimination rules for arbitrary operators (depending on the
    8.62 +  object-logic and application), which enables users to perform
    8.63 +  standard proof steps implicitly (cf.\ the @{text "rule"} method
    8.64 +  \cite{isabelle-isar-ref}).
    8.65 +
    8.66 +  \medskip Thus Isabelle/Isar is able to bring forth more and more
    8.67 +  concepts successively.  In particular, an object-logic like
    8.68 +  Isabelle/HOL continues the Isabelle/Pure setup by adding specific
    8.69 +  components for automated reasoning (classical reasoner, tableau
    8.70 +  prover, structured induction etc.) and derived specification
    8.71 +  mechanisms (inductive predicates, recursive functions etc.).  All of
    8.72 +  this is ultimately based on the generic data management by theory
    8.73 +  and proof contexts introduced here.
    8.74 +*}
    8.75 +
    8.76 +
    8.77 +subsection {* Theory context \label{sec:context-theory} *}
    8.78 +
    8.79 +text {* A \emph{theory} is a data container with explicit name and
    8.80 +  unique identifier.  Theories are related by a (nominal) sub-theory
    8.81 +  relation, which corresponds to the dependency graph of the original
    8.82 +  construction; each theory is derived from a certain sub-graph of
    8.83 +  ancestor theories.  To this end, the system maintains a set of
    8.84 +  symbolic ``identification stamps'' within each theory.
    8.85 +
    8.86 +  The @{text "merge"} operation produces the least upper bound of two
    8.87 +  theories, which actually degenerates into absorption of one theory
    8.88 +  into the other (according to the nominal sub-theory relation).
    8.89 +
    8.90 +  The @{text "begin"} operation starts a new theory by importing
    8.91 +  several parent theories and entering a special mode of nameless
    8.92 +  incremental updates, until the final @{text "end"} operation is
    8.93 +  performed.
    8.94 +
    8.95 +  \medskip The example in \figref{fig:ex-theory} below shows a theory
    8.96 +  graph derived from @{text "Pure"}, with theory @{text "Length"}
    8.97 +  importing @{text "Nat"} and @{text "List"}.  The body of @{text
    8.98 +  "Length"} consists of a sequence of updates, resulting in locally a
    8.99 +  linear sub-theory relation for each intermediate step.
   8.100 +
   8.101 +  \begin{figure}[htb]
   8.102 +  \begin{center}
   8.103 +  \begin{tabular}{rcccl}
   8.104 +        &            & @{text "Pure"} \\
   8.105 +        &            & @{text "\<down>"} \\
   8.106 +        &            & @{text "FOL"} \\
   8.107 +        & $\swarrow$ &              & $\searrow$ & \\
   8.108 +  @{text "Nat"} &    &              &            & @{text "List"} \\
   8.109 +        & $\searrow$ &              & $\swarrow$ \\
   8.110 +        &            & @{text "Length"} \\
   8.111 +        &            & \multicolumn{3}{l}{~~@{keyword "begin"}} \\
   8.112 +        &            & $\vdots$~~ \\
   8.113 +        &            & \multicolumn{3}{l}{~~@{command "end"}} \\
   8.114 +  \end{tabular}
   8.115 +  \caption{A theory definition depending on ancestors}\label{fig:ex-theory}
   8.116 +  \end{center}
   8.117 +  \end{figure}
   8.118 +
   8.119 +  \medskip Derived formal entities may retain a reference to the
   8.120 +  background theory in order to indicate the formal context from which
   8.121 +  they were produced.  This provides an immutable certificate of the
   8.122 +  background theory.  *}
   8.123 +
   8.124 +text %mlref {*
   8.125 +  \begin{mldecls}
   8.126 +  @{index_ML_type theory} \\
   8.127 +  @{index_ML Theory.eq_thy: "theory * theory -> bool"} \\
   8.128 +  @{index_ML Theory.subthy: "theory * theory -> bool"} \\
   8.129 +  @{index_ML Theory.merge: "theory * theory -> theory"} \\
   8.130 +  @{index_ML Theory.begin_theory: "string * Position.T -> theory list -> theory"} \\
   8.131 +  @{index_ML Theory.parents_of: "theory -> theory list"} \\
   8.132 +  @{index_ML Theory.ancestors_of: "theory -> theory list"} \\
   8.133 +  \end{mldecls}
   8.134 +
   8.135 +  \begin{description}
   8.136 +
   8.137 +  \item Type @{ML_type theory} represents theory contexts.
   8.138 +
   8.139 +  \item @{ML "Theory.eq_thy"}~@{text "(thy\<^sub>1, thy\<^sub>2)"} check strict
   8.140 +  identity of two theories.
   8.141 +
   8.142 +  \item @{ML "Theory.subthy"}~@{text "(thy\<^sub>1, thy\<^sub>2)"} compares theories
   8.143 +  according to the intrinsic graph structure of the construction.
   8.144 +  This sub-theory relation is a nominal approximation of inclusion
   8.145 +  (@{text "\<subseteq>"}) of the corresponding content (according to the
   8.146 +  semantics of the ML modules that implement the data).
   8.147 +
   8.148 +  \item @{ML "Theory.merge"}~@{text "(thy\<^sub>1, thy\<^sub>2)"} absorbs one theory
   8.149 +  into the other.  This version of ad-hoc theory merge fails for
   8.150 +  unrelated theories!
   8.151 +
   8.152 +  \item @{ML "Theory.begin_theory"}~@{text "name parents"} constructs
   8.153 +  a new theory based on the given parents.  This ML function is
   8.154 +  normally not invoked directly.
   8.155 +
   8.156 +  \item @{ML "Theory.parents_of"}~@{text "thy"} returns the direct
   8.157 +  ancestors of @{text thy}.
   8.158 +
   8.159 +  \item @{ML "Theory.ancestors_of"}~@{text "thy"} returns all
   8.160 +  ancestors of @{text thy} (not including @{text thy} itself).
   8.161 +
   8.162 +  \end{description}
   8.163 +*}
   8.164 +
   8.165 +text %mlantiq {*
   8.166 +  \begin{matharray}{rcl}
   8.167 +  @{ML_antiquotation_def "theory"} & : & @{text ML_antiquotation} \\
   8.168 +  @{ML_antiquotation_def "theory_context"} & : & @{text ML_antiquotation} \\
   8.169 +  \end{matharray}
   8.170 +
   8.171 +  @{rail \<open>
   8.172 +  @@{ML_antiquotation theory} nameref?
   8.173 +  ;
   8.174 +  @@{ML_antiquotation theory_context} nameref
   8.175 +  \<close>}
   8.176 +
   8.177 +  \begin{description}
   8.178 +
   8.179 +  \item @{text "@{theory}"} refers to the background theory of the
   8.180 +  current context --- as abstract value.
   8.181 +
   8.182 +  \item @{text "@{theory A}"} refers to an explicitly named ancestor
   8.183 +  theory @{text "A"} of the background theory of the current context
   8.184 +  --- as abstract value.
   8.185 +
   8.186 +  \item @{text "@{theory_context A}"} is similar to @{text "@{theory
   8.187 +  A}"}, but presents the result as initial @{ML_type Proof.context}
   8.188 +  (see also @{ML Proof_Context.init_global}).
   8.189 +
   8.190 +  \end{description}
   8.191 +*}
   8.192 +
   8.193 +
   8.194 +subsection {* Proof context \label{sec:context-proof} *}
   8.195 +
   8.196 +text {* A proof context is a container for pure data that refers to
   8.197 +  the theory from which it is derived. The @{text "init"} operation
   8.198 +  creates a proof context from a given theory. There is an explicit
   8.199 +  @{text "transfer"} operation to force resynchronization with updates
   8.200 +  to the background theory -- this is rarely required in practice.
   8.201 +
   8.202 +  Entities derived in a proof context need to record logical
   8.203 +  requirements explicitly, since there is no separate context
   8.204 +  identification or symbolic inclusion as for theories.  For example,
   8.205 +  hypotheses used in primitive derivations (cf.\ \secref{sec:thms})
   8.206 +  are recorded separately within the sequent @{text "\<Gamma> \<turnstile> \<phi>"}, just to
   8.207 +  make double sure.  Results could still leak into an alien proof
   8.208 +  context due to programming errors, but Isabelle/Isar includes some
   8.209 +  extra validity checks in critical positions, notably at the end of a
   8.210 +  sub-proof.
   8.211 +
   8.212 +  Proof contexts may be manipulated arbitrarily, although the common
   8.213 +  discipline is to follow block structure as a mental model: a given
   8.214 +  context is extended consecutively, and results are exported back
   8.215 +  into the original context.  Note that an Isar proof state models
   8.216 +  block-structured reasoning explicitly, using a stack of proof
   8.217 +  contexts internally.  For various technical reasons, the background
   8.218 +  theory of an Isar proof state must not be changed while the proof is
   8.219 +  still under construction!
   8.220 +*}
   8.221 +
   8.222 +text %mlref {*
   8.223 +  \begin{mldecls}
   8.224 +  @{index_ML_type Proof.context} \\
   8.225 +  @{index_ML Proof_Context.init_global: "theory -> Proof.context"} \\
   8.226 +  @{index_ML Proof_Context.theory_of: "Proof.context -> theory"} \\
   8.227 +  @{index_ML Proof_Context.transfer: "theory -> Proof.context -> Proof.context"} \\
   8.228 +  \end{mldecls}
   8.229 +
   8.230 +  \begin{description}
   8.231 +
   8.232 +  \item Type @{ML_type Proof.context} represents proof contexts.
   8.233 +
   8.234 +  \item @{ML Proof_Context.init_global}~@{text "thy"} produces a proof
   8.235 +  context derived from @{text "thy"}, initializing all data.
   8.236 +
   8.237 +  \item @{ML Proof_Context.theory_of}~@{text "ctxt"} selects the
   8.238 +  background theory from @{text "ctxt"}.
   8.239 +
   8.240 +  \item @{ML Proof_Context.transfer}~@{text "thy ctxt"} promotes the
   8.241 +  background theory of @{text "ctxt"} to the super theory @{text
   8.242 +  "thy"}.
   8.243 +
   8.244 +  \end{description}
   8.245 +*}
   8.246 +
   8.247 +text %mlantiq {*
   8.248 +  \begin{matharray}{rcl}
   8.249 +  @{ML_antiquotation_def "context"} & : & @{text ML_antiquotation} \\
   8.250 +  \end{matharray}
   8.251 +
   8.252 +  \begin{description}
   8.253 +
   8.254 +  \item @{text "@{context}"} refers to \emph{the} context at
   8.255 +  compile-time --- as abstract value.  Independently of (local) theory
   8.256 +  or proof mode, this always produces a meaningful result.
   8.257 +
   8.258 +  This is probably the most common antiquotation in interactive
   8.259 +  experimentation with ML inside Isar.
   8.260 +
   8.261 +  \end{description}
   8.262 +*}
   8.263 +
   8.264 +
   8.265 +subsection {* Generic contexts \label{sec:generic-context} *}
   8.266 +
   8.267 +text {*
   8.268 +  A generic context is the disjoint sum of either a theory or proof
   8.269 +  context.  Occasionally, this enables uniform treatment of generic
   8.270 +  context data, typically extra-logical information.  Operations on
   8.271 +  generic contexts include the usual injections, partial selections,
   8.272 +  and combinators for lifting operations on either component of the
   8.273 +  disjoint sum.
   8.274 +
   8.275 +  Moreover, there are total operations @{text "theory_of"} and @{text
   8.276 +  "proof_of"} to convert a generic context into either kind: a theory
   8.277 +  can always be selected from the sum, while a proof context might
   8.278 +  have to be constructed by an ad-hoc @{text "init"} operation, which
   8.279 +  incurs a small runtime overhead.
   8.280 +*}
   8.281 +
   8.282 +text %mlref {*
   8.283 +  \begin{mldecls}
   8.284 +  @{index_ML_type Context.generic} \\
   8.285 +  @{index_ML Context.theory_of: "Context.generic -> theory"} \\
   8.286 +  @{index_ML Context.proof_of: "Context.generic -> Proof.context"} \\
   8.287 +  \end{mldecls}
   8.288 +
   8.289 +  \begin{description}
   8.290 +
   8.291 +  \item Type @{ML_type Context.generic} is the direct sum of @{ML_type
   8.292 +  "theory"} and @{ML_type "Proof.context"}, with the datatype
   8.293 +  constructors @{ML "Context.Theory"} and @{ML "Context.Proof"}.
   8.294 +
   8.295 +  \item @{ML Context.theory_of}~@{text "context"} always produces a
   8.296 +  theory from the generic @{text "context"}, using @{ML
   8.297 +  "Proof_Context.theory_of"} as required.
   8.298 +
   8.299 +  \item @{ML Context.proof_of}~@{text "context"} always produces a
   8.300 +  proof context from the generic @{text "context"}, using @{ML
   8.301 +  "Proof_Context.init_global"} as required (note that this re-initializes the
   8.302 +  context data with each invocation).
   8.303 +
   8.304 +  \end{description}
   8.305 +*}
   8.306 +
   8.307 +
   8.308 +subsection {* Context data \label{sec:context-data} *}
   8.309 +
   8.310 +text {* The main purpose of theory and proof contexts is to manage
   8.311 +  arbitrary (pure) data.  New data types can be declared incrementally
   8.312 +  at compile time.  There are separate declaration mechanisms for any
   8.313 +  of the three kinds of contexts: theory, proof, generic.
   8.314 +
   8.315 +  \paragraph{Theory data} declarations need to implement the following
   8.316 +  SML signature:
   8.317 +
   8.318 +  \medskip
   8.319 +  \begin{tabular}{ll}
   8.320 +  @{text "\<type> T"} & representing type \\
   8.321 +  @{text "\<val> empty: T"} & empty default value \\
   8.322 +  @{text "\<val> extend: T \<rightarrow> T"} & re-initialize on import \\
   8.323 +  @{text "\<val> merge: T \<times> T \<rightarrow> T"} & join on import \\
   8.324 +  \end{tabular}
   8.325 +  \medskip
   8.326 +
   8.327 +  The @{text "empty"} value acts as initial default for \emph{any}
   8.328 +  theory that does not declare actual data content; @{text "extend"}
   8.329 +  is acts like a unitary version of @{text "merge"}.
   8.330 +
   8.331 +  Implementing @{text "merge"} can be tricky.  The general idea is
   8.332 +  that @{text "merge (data\<^sub>1, data\<^sub>2)"} inserts those parts of @{text
   8.333 +  "data\<^sub>2"} into @{text "data\<^sub>1"} that are not yet present, while
   8.334 +  keeping the general order of things.  The @{ML Library.merge}
   8.335 +  function on plain lists may serve as canonical template.
   8.336 +
   8.337 +  Particularly note that shared parts of the data must not be
   8.338 +  duplicated by naive concatenation, or a theory graph that is like a
   8.339 +  chain of diamonds would cause an exponential blowup!
   8.340 +
   8.341 +  \paragraph{Proof context data} declarations need to implement the
   8.342 +  following SML signature:
   8.343 +
   8.344 +  \medskip
   8.345 +  \begin{tabular}{ll}
   8.346 +  @{text "\<type> T"} & representing type \\
   8.347 +  @{text "\<val> init: theory \<rightarrow> T"} & produce initial value \\
   8.348 +  \end{tabular}
   8.349 +  \medskip
   8.350 +
   8.351 +  The @{text "init"} operation is supposed to produce a pure value
   8.352 +  from the given background theory and should be somehow
   8.353 +  ``immediate''.  Whenever a proof context is initialized, which
   8.354 +  happens frequently, the the system invokes the @{text "init"}
   8.355 +  operation of \emph{all} theory data slots ever declared.  This also
   8.356 +  means that one needs to be economic about the total number of proof
   8.357 +  data declarations in the system, i.e.\ each ML module should declare
   8.358 +  at most one, sometimes two data slots for its internal use.
   8.359 +  Repeated data declarations to simulate a record type should be
   8.360 +  avoided!
   8.361 +
   8.362 +  \paragraph{Generic data} provides a hybrid interface for both theory
   8.363 +  and proof data.  The @{text "init"} operation for proof contexts is
   8.364 +  predefined to select the current data value from the background
   8.365 +  theory.
   8.366 +
   8.367 +  \bigskip Any of the above data declarations over type @{text "T"}
   8.368 +  result in an ML structure with the following signature:
   8.369 +
   8.370 +  \medskip
   8.371 +  \begin{tabular}{ll}
   8.372 +  @{text "get: context \<rightarrow> T"} \\
   8.373 +  @{text "put: T \<rightarrow> context \<rightarrow> context"} \\
   8.374 +  @{text "map: (T \<rightarrow> T) \<rightarrow> context \<rightarrow> context"} \\
   8.375 +  \end{tabular}
   8.376 +  \medskip
   8.377 +
   8.378 +  These other operations provide exclusive access for the particular
   8.379 +  kind of context (theory, proof, or generic context).  This interface
   8.380 +  observes the ML discipline for types and scopes: there is no other
   8.381 +  way to access the corresponding data slot of a context.  By keeping
   8.382 +  these operations private, an Isabelle/ML module may maintain
   8.383 +  abstract values authentically.  *}
   8.384 +
   8.385 +text %mlref {*
   8.386 +  \begin{mldecls}
   8.387 +  @{index_ML_functor Theory_Data} \\
   8.388 +  @{index_ML_functor Proof_Data} \\
   8.389 +  @{index_ML_functor Generic_Data} \\
   8.390 +  \end{mldecls}
   8.391 +
   8.392 +  \begin{description}
   8.393 +
   8.394 +  \item @{ML_functor Theory_Data}@{text "(spec)"} declares data for
   8.395 +  type @{ML_type theory} according to the specification provided as
   8.396 +  argument structure.  The resulting structure provides data init and
   8.397 +  access operations as described above.
   8.398 +
   8.399 +  \item @{ML_functor Proof_Data}@{text "(spec)"} is analogous to
   8.400 +  @{ML_functor Theory_Data} for type @{ML_type Proof.context}.
   8.401 +
   8.402 +  \item @{ML_functor Generic_Data}@{text "(spec)"} is analogous to
   8.403 +  @{ML_functor Theory_Data} for type @{ML_type Context.generic}.
   8.404 +
   8.405 +  \end{description}
   8.406 +*}
   8.407 +
   8.408 +text %mlex {*
   8.409 +  The following artificial example demonstrates theory
   8.410 +  data: we maintain a set of terms that are supposed to be wellformed
   8.411 +  wrt.\ the enclosing theory.  The public interface is as follows:
   8.412 +*}
   8.413 +
   8.414 +ML {*
   8.415 +  signature WELLFORMED_TERMS =
   8.416 +  sig
   8.417 +    val get: theory -> term list
   8.418 +    val add: term -> theory -> theory
   8.419 +  end;
   8.420 +*}
   8.421 +
   8.422 +text {* The implementation uses private theory data internally, and
   8.423 +  only exposes an operation that involves explicit argument checking
   8.424 +  wrt.\ the given theory. *}
   8.425 +
   8.426 +ML {*
   8.427 +  structure Wellformed_Terms: WELLFORMED_TERMS =
   8.428 +  struct
   8.429 +
   8.430 +  structure Terms = Theory_Data
   8.431 +  (
   8.432 +    type T = term Ord_List.T;
   8.433 +    val empty = [];
   8.434 +    val extend = I;
   8.435 +    fun merge (ts1, ts2) =
   8.436 +      Ord_List.union Term_Ord.fast_term_ord ts1 ts2;
   8.437 +  );
   8.438 +
   8.439 +  val get = Terms.get;
   8.440 +
   8.441 +  fun add raw_t thy =
   8.442 +    let
   8.443 +      val t = Sign.cert_term thy raw_t;
   8.444 +    in
   8.445 +      Terms.map (Ord_List.insert Term_Ord.fast_term_ord t) thy
   8.446 +    end;
   8.447 +
   8.448 +  end;
   8.449 +*}
   8.450 +
   8.451 +text {* Type @{ML_type "term Ord_List.T"} is used for reasonably
   8.452 +  efficient representation of a set of terms: all operations are
   8.453 +  linear in the number of stored elements.  Here we assume that users
   8.454 +  of this module do not care about the declaration order, since that
   8.455 +  data structure forces its own arrangement of elements.
   8.456 +
   8.457 +  Observe how the @{ML_text merge} operation joins the data slots of
   8.458 +  the two constituents: @{ML Ord_List.union} prevents duplication of
   8.459 +  common data from different branches, thus avoiding the danger of
   8.460 +  exponential blowup.  Plain list append etc.\ must never be used for
   8.461 +  theory data merges!
   8.462 +
   8.463 +  \medskip Our intended invariant is achieved as follows:
   8.464 +  \begin{enumerate}
   8.465 +
   8.466 +  \item @{ML Wellformed_Terms.add} only admits terms that have passed
   8.467 +  the @{ML Sign.cert_term} check of the given theory at that point.
   8.468 +
   8.469 +  \item Wellformedness in the sense of @{ML Sign.cert_term} is
   8.470 +  monotonic wrt.\ the sub-theory relation.  So our data can move
   8.471 +  upwards in the hierarchy (via extension or merges), and maintain
   8.472 +  wellformedness without further checks.
   8.473 +
   8.474 +  \end{enumerate}
   8.475 +
   8.476 +  Note that all basic operations of the inference kernel (which
   8.477 +  includes @{ML Sign.cert_term}) observe this monotonicity principle,
   8.478 +  but other user-space tools don't.  For example, fully-featured
   8.479 +  type-inference via @{ML Syntax.check_term} (cf.\
   8.480 +  \secref{sec:term-check}) is not necessarily monotonic wrt.\ the
   8.481 +  background theory, since constraints of term constants can be
   8.482 +  modified by later declarations, for example.
   8.483 +
   8.484 +  In most cases, user-space context data does not have to take such
   8.485 +  invariants too seriously.  The situation is different in the
   8.486 +  implementation of the inference kernel itself, which uses the very
   8.487 +  same data mechanisms for types, constants, axioms etc.
   8.488 +*}
   8.489 +
   8.490 +
   8.491 +subsection {* Configuration options \label{sec:config-options} *}
   8.492 +
   8.493 +text {* A \emph{configuration option} is a named optional value of
   8.494 +  some basic type (Boolean, integer, string) that is stored in the
   8.495 +  context.  It is a simple application of general context data
   8.496 +  (\secref{sec:context-data}) that is sufficiently common to justify
   8.497 +  customized setup, which includes some concrete declarations for
   8.498 +  end-users using existing notation for attributes (cf.\
   8.499 +  \secref{sec:attributes}).
   8.500 +
   8.501 +  For example, the predefined configuration option @{attribute
   8.502 +  show_types} controls output of explicit type constraints for
   8.503 +  variables in printed terms (cf.\ \secref{sec:read-print}).  Its
   8.504 +  value can be modified within Isar text like this:
   8.505 +*}
   8.506 +
   8.507 +declare [[show_types = false]]
   8.508 +  -- {* declaration within (local) theory context *}
   8.509 +
   8.510 +notepad
   8.511 +begin
   8.512 +  note [[show_types = true]]
   8.513 +    -- {* declaration within proof (forward mode) *}
   8.514 +  term x
   8.515 +
   8.516 +  have "x = x"
   8.517 +    using [[show_types = false]]
   8.518 +      -- {* declaration within proof (backward mode) *}
   8.519 +    ..
   8.520 +end
   8.521 +
   8.522 +text {* Configuration options that are not set explicitly hold a
   8.523 +  default value that can depend on the application context.  This
   8.524 +  allows to retrieve the value from another slot within the context,
   8.525 +  or fall back on a global preference mechanism, for example.
   8.526 +
   8.527 +  The operations to declare configuration options and get/map their
   8.528 +  values are modeled as direct replacements for historic global
   8.529 +  references, only that the context is made explicit.  This allows
   8.530 +  easy configuration of tools, without relying on the execution order
   8.531 +  as required for old-style mutable references.  *}
   8.532 +
   8.533 +text %mlref {*
   8.534 +  \begin{mldecls}
   8.535 +  @{index_ML Config.get: "Proof.context -> 'a Config.T -> 'a"} \\
   8.536 +  @{index_ML Config.map: "'a Config.T -> ('a -> 'a) -> Proof.context -> Proof.context"} \\
   8.537 +  @{index_ML Attrib.setup_config_bool: "binding -> (Context.generic -> bool) ->
   8.538 +  bool Config.T"} \\
   8.539 +  @{index_ML Attrib.setup_config_int: "binding -> (Context.generic -> int) ->
   8.540 +  int Config.T"} \\
   8.541 +  @{index_ML Attrib.setup_config_real: "binding -> (Context.generic -> real) ->
   8.542 +  real Config.T"} \\
   8.543 +  @{index_ML Attrib.setup_config_string: "binding -> (Context.generic -> string) ->
   8.544 +  string Config.T"} \\
   8.545 +  \end{mldecls}
   8.546 +
   8.547 +  \begin{description}
   8.548 +
   8.549 +  \item @{ML Config.get}~@{text "ctxt config"} gets the value of
   8.550 +  @{text "config"} in the given context.
   8.551 +
   8.552 +  \item @{ML Config.map}~@{text "config f ctxt"} updates the context
   8.553 +  by updating the value of @{text "config"}.
   8.554 +
   8.555 +  \item @{text "config ="}~@{ML Attrib.setup_config_bool}~@{text "name
   8.556 +  default"} creates a named configuration option of type @{ML_type
   8.557 +  bool}, with the given @{text "default"} depending on the application
   8.558 +  context.  The resulting @{text "config"} can be used to get/map its
   8.559 +  value in a given context.  There is an implicit update of the
   8.560 +  background theory that registers the option as attribute with some
   8.561 +  concrete syntax.
   8.562 +
   8.563 +  \item @{ML Attrib.config_int}, @{ML Attrib.config_real}, and @{ML
   8.564 +  Attrib.config_string} work like @{ML Attrib.config_bool}, but for
   8.565 +  types @{ML_type int} and @{ML_type string}, respectively.
   8.566 +
   8.567 +  \end{description}
   8.568 +*}
   8.569 +
   8.570 +text %mlex {* The following example shows how to declare and use a
   8.571 +  Boolean configuration option called @{text "my_flag"} with constant
   8.572 +  default value @{ML false}.  *}
   8.573 +
   8.574 +ML {*
   8.575 +  val my_flag =
   8.576 +    Attrib.setup_config_bool @{binding my_flag} (K false)
   8.577 +*}
   8.578 +
   8.579 +text {* Now the user can refer to @{attribute my_flag} in
   8.580 +  declarations, while ML tools can retrieve the current value from the
   8.581 +  context via @{ML Config.get}.  *}
   8.582 +
   8.583 +ML_val {* @{assert} (Config.get @{context} my_flag = false) *}
   8.584 +
   8.585 +declare [[my_flag = true]]
   8.586 +
   8.587 +ML_val {* @{assert} (Config.get @{context} my_flag = true) *}
   8.588 +
   8.589 +notepad
   8.590 +begin
   8.591 +  {
   8.592 +    note [[my_flag = false]]
   8.593 +    ML_val {* @{assert} (Config.get @{context} my_flag = false) *}
   8.594 +  }
   8.595 +  ML_val {* @{assert} (Config.get @{context} my_flag = true) *}
   8.596 +end
   8.597 +
   8.598 +text {* Here is another example involving ML type @{ML_type real}
   8.599 +  (floating-point numbers). *}
   8.600 +
   8.601 +ML {*
   8.602 +  val airspeed_velocity =
   8.603 +    Attrib.setup_config_real @{binding airspeed_velocity} (K 0.0)
   8.604 +*}
   8.605 +
   8.606 +declare [[airspeed_velocity = 10]]
   8.607 +declare [[airspeed_velocity = 9.9]]
   8.608 +
   8.609 +
   8.610 +section {* Names \label{sec:names} *}
   8.611 +
   8.612 +text {* In principle, a name is just a string, but there are various
   8.613 +  conventions for representing additional structure.  For example,
   8.614 +  ``@{text "Foo.bar.baz"}'' is considered as a long name consisting of
   8.615 +  qualifier @{text "Foo.bar"} and base name @{text "baz"}.  The
   8.616 +  individual constituents of a name may have further substructure,
   8.617 +  e.g.\ the string ``\verb,\,\verb,<alpha>,'' encodes as a single
   8.618 +  symbol (\secref{sec:symbols}).
   8.619 +
   8.620 +  \medskip Subsequently, we shall introduce specific categories of
   8.621 +  names.  Roughly speaking these correspond to logical entities as
   8.622 +  follows:
   8.623 +  \begin{itemize}
   8.624 +
   8.625 +  \item Basic names (\secref{sec:basic-name}): free and bound
   8.626 +  variables.
   8.627 +
   8.628 +  \item Indexed names (\secref{sec:indexname}): schematic variables.
   8.629 +
   8.630 +  \item Long names (\secref{sec:long-name}): constants of any kind
   8.631 +  (type constructors, term constants, other concepts defined in user
   8.632 +  space).  Such entities are typically managed via name spaces
   8.633 +  (\secref{sec:name-space}).
   8.634 +
   8.635 +  \end{itemize}
   8.636 +*}
   8.637 +
   8.638 +
   8.639 +subsection {* Basic names \label{sec:basic-name} *}
   8.640 +
   8.641 +text {*
   8.642 +  A \emph{basic name} essentially consists of a single Isabelle
   8.643 +  identifier.  There are conventions to mark separate classes of basic
   8.644 +  names, by attaching a suffix of underscores: one underscore means
   8.645 +  \emph{internal name}, two underscores means \emph{Skolem name},
   8.646 +  three underscores means \emph{internal Skolem name}.
   8.647 +
   8.648 +  For example, the basic name @{text "foo"} has the internal version
   8.649 +  @{text "foo_"}, with Skolem versions @{text "foo__"} and @{text
   8.650 +  "foo___"}, respectively.
   8.651 +
   8.652 +  These special versions provide copies of the basic name space, apart
   8.653 +  from anything that normally appears in the user text.  For example,
   8.654 +  system generated variables in Isar proof contexts are usually marked
   8.655 +  as internal, which prevents mysterious names like @{text "xaa"} to
   8.656 +  appear in human-readable text.
   8.657 +
   8.658 +  \medskip Manipulating binding scopes often requires on-the-fly
   8.659 +  renamings.  A \emph{name context} contains a collection of already
   8.660 +  used names.  The @{text "declare"} operation adds names to the
   8.661 +  context.
   8.662 +
   8.663 +  The @{text "invents"} operation derives a number of fresh names from
   8.664 +  a given starting point.  For example, the first three names derived
   8.665 +  from @{text "a"} are @{text "a"}, @{text "b"}, @{text "c"}.
   8.666 +
   8.667 +  The @{text "variants"} operation produces fresh names by
   8.668 +  incrementing tentative names as base-26 numbers (with digits @{text
   8.669 +  "a..z"}) until all clashes are resolved.  For example, name @{text
   8.670 +  "foo"} results in variants @{text "fooa"}, @{text "foob"}, @{text
   8.671 +  "fooc"}, \dots, @{text "fooaa"}, @{text "fooab"} etc.; each renaming
   8.672 +  step picks the next unused variant from this sequence.
   8.673 +*}
   8.674 +
   8.675 +text %mlref {*
   8.676 +  \begin{mldecls}
   8.677 +  @{index_ML Name.internal: "string -> string"} \\
   8.678 +  @{index_ML Name.skolem: "string -> string"} \\
   8.679 +  \end{mldecls}
   8.680 +  \begin{mldecls}
   8.681 +  @{index_ML_type Name.context} \\
   8.682 +  @{index_ML Name.context: Name.context} \\
   8.683 +  @{index_ML Name.declare: "string -> Name.context -> Name.context"} \\
   8.684 +  @{index_ML Name.invent: "Name.context -> string -> int -> string list"} \\
   8.685 +  @{index_ML Name.variant: "string -> Name.context -> string * Name.context"} \\
   8.686 +  \end{mldecls}
   8.687 +  \begin{mldecls}
   8.688 +  @{index_ML Variable.names_of: "Proof.context -> Name.context"} \\
   8.689 +  \end{mldecls}
   8.690 +
   8.691 +  \begin{description}
   8.692 +
   8.693 +  \item @{ML Name.internal}~@{text "name"} produces an internal name
   8.694 +  by adding one underscore.
   8.695 +
   8.696 +  \item @{ML Name.skolem}~@{text "name"} produces a Skolem name by
   8.697 +  adding two underscores.
   8.698 +
   8.699 +  \item Type @{ML_type Name.context} represents the context of already
   8.700 +  used names; the initial value is @{ML "Name.context"}.
   8.701 +
   8.702 +  \item @{ML Name.declare}~@{text "name"} enters a used name into the
   8.703 +  context.
   8.704 +
   8.705 +  \item @{ML Name.invent}~@{text "context name n"} produces @{text
   8.706 +  "n"} fresh names derived from @{text "name"}.
   8.707 +
   8.708 +  \item @{ML Name.variant}~@{text "name context"} produces a fresh
   8.709 +  variant of @{text "name"}; the result is declared to the context.
   8.710 +
   8.711 +  \item @{ML Variable.names_of}~@{text "ctxt"} retrieves the context
   8.712 +  of declared type and term variable names.  Projecting a proof
   8.713 +  context down to a primitive name context is occasionally useful when
   8.714 +  invoking lower-level operations.  Regular management of ``fresh
   8.715 +  variables'' is done by suitable operations of structure @{ML_structure
   8.716 +  Variable}, which is also able to provide an official status of
   8.717 +  ``locally fixed variable'' within the logical environment (cf.\
   8.718 +  \secref{sec:variables}).
   8.719 +
   8.720 +  \end{description}
   8.721 +*}
   8.722 +
   8.723 +text %mlex {* The following simple examples demonstrate how to produce
   8.724 +  fresh names from the initial @{ML Name.context}. *}
   8.725 +
   8.726 +ML {*
   8.727 +  val list1 = Name.invent Name.context "a" 5;
   8.728 +  @{assert} (list1 = ["a", "b", "c", "d", "e"]);
   8.729 +
   8.730 +  val list2 =
   8.731 +    #1 (fold_map Name.variant ["x", "x", "a", "a", "'a", "'a"] Name.context);
   8.732 +  @{assert} (list2 = ["x", "xa", "a", "aa", "'a", "'aa"]);
   8.733 +*}
   8.734 +
   8.735 +text {* \medskip The same works relatively to the formal context as
   8.736 +  follows. *}
   8.737 +
   8.738 +locale ex = fixes a b c :: 'a
   8.739 +begin
   8.740 +
   8.741 +ML {*
   8.742 +  val names = Variable.names_of @{context};
   8.743 +
   8.744 +  val list1 = Name.invent names "a" 5;
   8.745 +  @{assert} (list1 = ["d", "e", "f", "g", "h"]);
   8.746 +
   8.747 +  val list2 =
   8.748 +    #1 (fold_map Name.variant ["x", "x", "a", "a", "'a", "'a"] names);
   8.749 +  @{assert} (list2 = ["x", "xa", "aa", "ab", "'aa", "'ab"]);
   8.750 +*}
   8.751 +
   8.752 +end
   8.753 +
   8.754 +
   8.755 +subsection {* Indexed names \label{sec:indexname} *}
   8.756 +
   8.757 +text {*
   8.758 +  An \emph{indexed name} (or @{text "indexname"}) is a pair of a basic
   8.759 +  name and a natural number.  This representation allows efficient
   8.760 +  renaming by incrementing the second component only.  The canonical
   8.761 +  way to rename two collections of indexnames apart from each other is
   8.762 +  this: determine the maximum index @{text "maxidx"} of the first
   8.763 +  collection, then increment all indexes of the second collection by
   8.764 +  @{text "maxidx + 1"}; the maximum index of an empty collection is
   8.765 +  @{text "-1"}.
   8.766 +
   8.767 +  Occasionally, basic names are injected into the same pair type of
   8.768 +  indexed names: then @{text "(x, -1)"} is used to encode the basic
   8.769 +  name @{text "x"}.
   8.770 +
   8.771 +  \medskip Isabelle syntax observes the following rules for
   8.772 +  representing an indexname @{text "(x, i)"} as a packed string:
   8.773 +
   8.774 +  \begin{itemize}
   8.775 +
   8.776 +  \item @{text "?x"} if @{text "x"} does not end with a digit and @{text "i = 0"},
   8.777 +
   8.778 +  \item @{text "?xi"} if @{text "x"} does not end with a digit,
   8.779 +
   8.780 +  \item @{text "?x.i"} otherwise.
   8.781 +
   8.782 +  \end{itemize}
   8.783 +
   8.784 +  Indexnames may acquire large index numbers after several maxidx
   8.785 +  shifts have been applied.  Results are usually normalized towards
   8.786 +  @{text "0"} at certain checkpoints, notably at the end of a proof.
   8.787 +  This works by producing variants of the corresponding basic name
   8.788 +  components.  For example, the collection @{text "?x1, ?x7, ?x42"}
   8.789 +  becomes @{text "?x, ?xa, ?xb"}.
   8.790 +*}
   8.791 +
   8.792 +text %mlref {*
   8.793 +  \begin{mldecls}
   8.794 +  @{index_ML_type indexname: "string * int"} \\
   8.795 +  \end{mldecls}
   8.796 +
   8.797 +  \begin{description}
   8.798 +
   8.799 +  \item Type @{ML_type indexname} represents indexed names.  This is
   8.800 +  an abbreviation for @{ML_type "string * int"}.  The second component
   8.801 +  is usually non-negative, except for situations where @{text "(x,
   8.802 +  -1)"} is used to inject basic names into this type.  Other negative
   8.803 +  indexes should not be used.
   8.804 +
   8.805 +  \end{description}
   8.806 +*}
   8.807 +
   8.808 +
   8.809 +subsection {* Long names \label{sec:long-name} *}
   8.810 +
   8.811 +text {* A \emph{long name} consists of a sequence of non-empty name
   8.812 +  components.  The packed representation uses a dot as separator, as
   8.813 +  in ``@{text "A.b.c"}''.  The last component is called \emph{base
   8.814 +  name}, the remaining prefix is called \emph{qualifier} (which may be
   8.815 +  empty).  The qualifier can be understood as the access path to the
   8.816 +  named entity while passing through some nested block-structure,
   8.817 +  although our free-form long names do not really enforce any strict
   8.818 +  discipline.
   8.819 +
   8.820 +  For example, an item named ``@{text "A.b.c"}'' may be understood as
   8.821 +  a local entity @{text "c"}, within a local structure @{text "b"},
   8.822 +  within a global structure @{text "A"}.  In practice, long names
   8.823 +  usually represent 1--3 levels of qualification.  User ML code should
   8.824 +  not make any assumptions about the particular structure of long
   8.825 +  names!
   8.826 +
   8.827 +  The empty name is commonly used as an indication of unnamed
   8.828 +  entities, or entities that are not entered into the corresponding
   8.829 +  name space, whenever this makes any sense.  The basic operations on
   8.830 +  long names map empty names again to empty names.
   8.831 +*}
   8.832 +
   8.833 +text %mlref {*
   8.834 +  \begin{mldecls}
   8.835 +  @{index_ML Long_Name.base_name: "string -> string"} \\
   8.836 +  @{index_ML Long_Name.qualifier: "string -> string"} \\
   8.837 +  @{index_ML Long_Name.append: "string -> string -> string"} \\
   8.838 +  @{index_ML Long_Name.implode: "string list -> string"} \\
   8.839 +  @{index_ML Long_Name.explode: "string -> string list"} \\
   8.840 +  \end{mldecls}
   8.841 +
   8.842 +  \begin{description}
   8.843 +
   8.844 +  \item @{ML Long_Name.base_name}~@{text "name"} returns the base name
   8.845 +  of a long name.
   8.846 +
   8.847 +  \item @{ML Long_Name.qualifier}~@{text "name"} returns the qualifier
   8.848 +  of a long name.
   8.849 +
   8.850 +  \item @{ML Long_Name.append}~@{text "name\<^sub>1 name\<^sub>2"} appends two long
   8.851 +  names.
   8.852 +
   8.853 +  \item @{ML Long_Name.implode}~@{text "names"} and @{ML
   8.854 +  Long_Name.explode}~@{text "name"} convert between the packed string
   8.855 +  representation and the explicit list form of long names.
   8.856 +
   8.857 +  \end{description}
   8.858 +*}
   8.859 +
   8.860 +
   8.861 +subsection {* Name spaces \label{sec:name-space} *}
   8.862 +
   8.863 +text {* A @{text "name space"} manages a collection of long names,
   8.864 +  together with a mapping between partially qualified external names
   8.865 +  and fully qualified internal names (in both directions).  Note that
   8.866 +  the corresponding @{text "intern"} and @{text "extern"} operations
   8.867 +  are mostly used for parsing and printing only!  The @{text
   8.868 +  "declare"} operation augments a name space according to the accesses
   8.869 +  determined by a given binding, and a naming policy from the context.
   8.870 +
   8.871 +  \medskip A @{text "binding"} specifies details about the prospective
   8.872 +  long name of a newly introduced formal entity.  It consists of a
   8.873 +  base name, prefixes for qualification (separate ones for system
   8.874 +  infrastructure and user-space mechanisms), a slot for the original
   8.875 +  source position, and some additional flags.
   8.876 +
   8.877 +  \medskip A @{text "naming"} provides some additional details for
   8.878 +  producing a long name from a binding.  Normally, the naming is
   8.879 +  implicit in the theory or proof context.  The @{text "full"}
   8.880 +  operation (and its variants for different context types) produces a
   8.881 +  fully qualified internal name to be entered into a name space.  The
   8.882 +  main equation of this ``chemical reaction'' when binding new
   8.883 +  entities in a context is as follows:
   8.884 +
   8.885 +  \medskip
   8.886 +  \begin{tabular}{l}
   8.887 +  @{text "binding + naming \<longrightarrow> long name + name space accesses"}
   8.888 +  \end{tabular}
   8.889 +
   8.890 +  \bigskip As a general principle, there is a separate name space for
   8.891 +  each kind of formal entity, e.g.\ fact, logical constant, type
   8.892 +  constructor, type class.  It is usually clear from the occurrence in
   8.893 +  concrete syntax (or from the scope) which kind of entity a name
   8.894 +  refers to.  For example, the very same name @{text "c"} may be used
   8.895 +  uniformly for a constant, type constructor, and type class.
   8.896 +
   8.897 +  There are common schemes to name derived entities systematically
   8.898 +  according to the name of the main logical entity involved, e.g.\
   8.899 +  fact @{text "c.intro"} for a canonical introduction rule related to
   8.900 +  constant @{text "c"}.  This technique of mapping names from one
   8.901 +  space into another requires some care in order to avoid conflicts.
   8.902 +  In particular, theorem names derived from a type constructor or type
   8.903 +  class should get an additional suffix in addition to the usual
   8.904 +  qualification.  This leads to the following conventions for derived
   8.905 +  names:
   8.906 +
   8.907 +  \medskip
   8.908 +  \begin{tabular}{ll}
   8.909 +  logical entity & fact name \\\hline
   8.910 +  constant @{text "c"} & @{text "c.intro"} \\
   8.911 +  type @{text "c"} & @{text "c_type.intro"} \\
   8.912 +  class @{text "c"} & @{text "c_class.intro"} \\
   8.913 +  \end{tabular}
   8.914 +*}
   8.915 +
   8.916 +text %mlref {*
   8.917 +  \begin{mldecls}
   8.918 +  @{index_ML_type binding} \\
   8.919 +  @{index_ML Binding.empty: binding} \\
   8.920 +  @{index_ML Binding.name: "string -> binding"} \\
   8.921 +  @{index_ML Binding.qualify: "bool -> string -> binding -> binding"} \\
   8.922 +  @{index_ML Binding.prefix: "bool -> string -> binding -> binding"} \\
   8.923 +  @{index_ML Binding.conceal: "binding -> binding"} \\
   8.924 +  @{index_ML Binding.print: "binding -> string"} \\
   8.925 +  \end{mldecls}
   8.926 +  \begin{mldecls}
   8.927 +  @{index_ML_type Name_Space.naming} \\
   8.928 +  @{index_ML Name_Space.default_naming: Name_Space.naming} \\
   8.929 +  @{index_ML Name_Space.add_path: "string -> Name_Space.naming -> Name_Space.naming"} \\
   8.930 +  @{index_ML Name_Space.full_name: "Name_Space.naming -> binding -> string"} \\
   8.931 +  \end{mldecls}
   8.932 +  \begin{mldecls}
   8.933 +  @{index_ML_type Name_Space.T} \\
   8.934 +  @{index_ML Name_Space.empty: "string -> Name_Space.T"} \\
   8.935 +  @{index_ML Name_Space.merge: "Name_Space.T * Name_Space.T -> Name_Space.T"} \\
   8.936 +  @{index_ML Name_Space.declare: "Context.generic -> bool ->
   8.937 +  binding -> Name_Space.T -> string * Name_Space.T"} \\
   8.938 +  @{index_ML Name_Space.intern: "Name_Space.T -> string -> string"} \\
   8.939 +  @{index_ML Name_Space.extern: "Proof.context -> Name_Space.T -> string -> string"} \\
   8.940 +  @{index_ML Name_Space.is_concealed: "Name_Space.T -> string -> bool"}
   8.941 +  \end{mldecls}
   8.942 +
   8.943 +  \begin{description}
   8.944 +
   8.945 +  \item Type @{ML_type binding} represents the abstract concept of
   8.946 +  name bindings.
   8.947 +
   8.948 +  \item @{ML Binding.empty} is the empty binding.
   8.949 +
   8.950 +  \item @{ML Binding.name}~@{text "name"} produces a binding with base
   8.951 +  name @{text "name"}.  Note that this lacks proper source position
   8.952 +  information; see also the ML antiquotation @{ML_antiquotation
   8.953 +  binding}.
   8.954 +
   8.955 +  \item @{ML Binding.qualify}~@{text "mandatory name binding"}
   8.956 +  prefixes qualifier @{text "name"} to @{text "binding"}.  The @{text
   8.957 +  "mandatory"} flag tells if this name component always needs to be
   8.958 +  given in name space accesses --- this is mostly @{text "false"} in
   8.959 +  practice.  Note that this part of qualification is typically used in
   8.960 +  derived specification mechanisms.
   8.961 +
   8.962 +  \item @{ML Binding.prefix} is similar to @{ML Binding.qualify}, but
   8.963 +  affects the system prefix.  This part of extra qualification is
   8.964 +  typically used in the infrastructure for modular specifications,
   8.965 +  notably ``local theory targets'' (see also \chref{ch:local-theory}).
   8.966 +
   8.967 +  \item @{ML Binding.conceal}~@{text "binding"} indicates that the
   8.968 +  binding shall refer to an entity that serves foundational purposes
   8.969 +  only.  This flag helps to mark implementation details of
   8.970 +  specification mechanism etc.  Other tools should not depend on the
   8.971 +  particulars of concealed entities (cf.\ @{ML
   8.972 +  Name_Space.is_concealed}).
   8.973 +
   8.974 +  \item @{ML Binding.print}~@{text "binding"} produces a string
   8.975 +  representation for human-readable output, together with some formal
   8.976 +  markup that might get used in GUI front-ends, for example.
   8.977 +
   8.978 +  \item Type @{ML_type Name_Space.naming} represents the abstract
   8.979 +  concept of a naming policy.
   8.980 +
   8.981 +  \item @{ML Name_Space.default_naming} is the default naming policy.
   8.982 +  In a theory context, this is usually augmented by a path prefix
   8.983 +  consisting of the theory name.
   8.984 +
   8.985 +  \item @{ML Name_Space.add_path}~@{text "path naming"} augments the
   8.986 +  naming policy by extending its path component.
   8.987 +
   8.988 +  \item @{ML Name_Space.full_name}~@{text "naming binding"} turns a
   8.989 +  name binding (usually a basic name) into the fully qualified
   8.990 +  internal name, according to the given naming policy.
   8.991 +
   8.992 +  \item Type @{ML_type Name_Space.T} represents name spaces.
   8.993 +
   8.994 +  \item @{ML Name_Space.empty}~@{text "kind"} and @{ML Name_Space.merge}~@{text
   8.995 +  "(space\<^sub>1, space\<^sub>2)"} are the canonical operations for
   8.996 +  maintaining name spaces according to theory data management
   8.997 +  (\secref{sec:context-data}); @{text "kind"} is a formal comment
   8.998 +  to characterize the purpose of a name space.
   8.999 +
  8.1000 +  \item @{ML Name_Space.declare}~@{text "context strict binding
  8.1001 +  space"} enters a name binding as fully qualified internal name into
  8.1002 +  the name space, using the naming of the context.
  8.1003 +
  8.1004 +  \item @{ML Name_Space.intern}~@{text "space name"} internalizes a
  8.1005 +  (partially qualified) external name.
  8.1006 +
  8.1007 +  This operation is mostly for parsing!  Note that fully qualified
  8.1008 +  names stemming from declarations are produced via @{ML
  8.1009 +  "Name_Space.full_name"} and @{ML "Name_Space.declare"}
  8.1010 +  (or their derivatives for @{ML_type theory} and
  8.1011 +  @{ML_type Proof.context}).
  8.1012 +
  8.1013 +  \item @{ML Name_Space.extern}~@{text "ctxt space name"} externalizes a
  8.1014 +  (fully qualified) internal name.
  8.1015 +
  8.1016 +  This operation is mostly for printing!  User code should not rely on
  8.1017 +  the precise result too much.
  8.1018 +
  8.1019 +  \item @{ML Name_Space.is_concealed}~@{text "space name"} indicates
  8.1020 +  whether @{text "name"} refers to a strictly private entity that
  8.1021 +  other tools are supposed to ignore!
  8.1022 +
  8.1023 +  \end{description}
  8.1024 +*}
  8.1025 +
  8.1026 +text %mlantiq {*
  8.1027 +  \begin{matharray}{rcl}
  8.1028 +  @{ML_antiquotation_def "binding"} & : & @{text ML_antiquotation} \\
  8.1029 +  \end{matharray}
  8.1030 +
  8.1031 +  @{rail \<open>
  8.1032 +  @@{ML_antiquotation binding} name
  8.1033 +  \<close>}
  8.1034 +
  8.1035 +  \begin{description}
  8.1036 +
  8.1037 +  \item @{text "@{binding name}"} produces a binding with base name
  8.1038 +  @{text "name"} and the source position taken from the concrete
  8.1039 +  syntax of this antiquotation.  In many situations this is more
  8.1040 +  appropriate than the more basic @{ML Binding.name} function.
  8.1041 +
  8.1042 +  \end{description}
  8.1043 +*}
  8.1044 +
  8.1045 +text %mlex {* The following example yields the source position of some
  8.1046 +  concrete binding inlined into the text:
  8.1047 +*}
  8.1048 +
  8.1049 +ML {* Binding.pos_of @{binding here} *}
  8.1050 +
  8.1051 +text {* \medskip That position can be also printed in a message as
  8.1052 +  follows: *}
  8.1053 +
  8.1054 +ML_command {*
  8.1055 +  writeln
  8.1056 +    ("Look here" ^ Position.here (Binding.pos_of @{binding here}))
  8.1057 +*}
  8.1058 +
  8.1059 +text {* This illustrates a key virtue of formalized bindings as
  8.1060 +  opposed to raw specifications of base names: the system can use this
  8.1061 +  additional information for feedback given to the user (error
  8.1062 +  messages etc.).
  8.1063 +
  8.1064 +  \medskip The following example refers to its source position
  8.1065 +  directly, which is occasionally useful for experimentation and
  8.1066 +  diagnostic purposes: *}
  8.1067 +
  8.1068 +ML_command {*
  8.1069 +  warning ("Look here" ^ Position.here @{here})
  8.1070 +*}
  8.1071 +
  8.1072 +end
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/src/Doc/Implementation/Proof.thy	Sat Apr 05 11:37:00 2014 +0200
     9.3 @@ -0,0 +1,492 @@
     9.4 +theory Proof
     9.5 +imports Base
     9.6 +begin
     9.7 +
     9.8 +chapter {* Structured proofs *}
     9.9 +
    9.10 +section {* Variables \label{sec:variables} *}
    9.11 +
    9.12 +text {*
    9.13 +  Any variable that is not explicitly bound by @{text "\<lambda>"}-abstraction
    9.14 +  is considered as ``free''.  Logically, free variables act like
    9.15 +  outermost universal quantification at the sequent level: @{text
    9.16 +  "A\<^sub>1(x), \<dots>, A\<^sub>n(x) \<turnstile> B(x)"} means that the result
    9.17 +  holds \emph{for all} values of @{text "x"}.  Free variables for
    9.18 +  terms (not types) can be fully internalized into the logic: @{text
    9.19 +  "\<turnstile> B(x)"} and @{text "\<turnstile> \<And>x. B(x)"} are interchangeable, provided
    9.20 +  that @{text "x"} does not occur elsewhere in the context.
    9.21 +  Inspecting @{text "\<turnstile> \<And>x. B(x)"} more closely, we see that inside the
    9.22 +  quantifier, @{text "x"} is essentially ``arbitrary, but fixed'',
    9.23 +  while from outside it appears as a place-holder for instantiation
    9.24 +  (thanks to @{text "\<And>"} elimination).
    9.25 +
    9.26 +  The Pure logic represents the idea of variables being either inside
    9.27 +  or outside the current scope by providing separate syntactic
    9.28 +  categories for \emph{fixed variables} (e.g.\ @{text "x"}) vs.\
    9.29 +  \emph{schematic variables} (e.g.\ @{text "?x"}).  Incidently, a
    9.30 +  universal result @{text "\<turnstile> \<And>x. B(x)"} has the HHF normal form @{text
    9.31 +  "\<turnstile> B(?x)"}, which represents its generality without requiring an
    9.32 +  explicit quantifier.  The same principle works for type variables:
    9.33 +  @{text "\<turnstile> B(?\<alpha>)"} represents the idea of ``@{text "\<turnstile> \<forall>\<alpha>. B(\<alpha>)"}''
    9.34 +  without demanding a truly polymorphic framework.
    9.35 +
    9.36 +  \medskip Additional care is required to treat type variables in a
    9.37 +  way that facilitates type-inference.  In principle, term variables
    9.38 +  depend on type variables, which means that type variables would have
    9.39 +  to be declared first.  For example, a raw type-theoretic framework
    9.40 +  would demand the context to be constructed in stages as follows:
    9.41 +  @{text "\<Gamma> = \<alpha>: type, x: \<alpha>, a: A(x\<^sub>\<alpha>)"}.
    9.42 +
    9.43 +  We allow a slightly less formalistic mode of operation: term
    9.44 +  variables @{text "x"} are fixed without specifying a type yet
    9.45 +  (essentially \emph{all} potential occurrences of some instance
    9.46 +  @{text "x\<^sub>\<tau>"} are fixed); the first occurrence of @{text "x"}
    9.47 +  within a specific term assigns its most general type, which is then
    9.48 +  maintained consistently in the context.  The above example becomes
    9.49 +  @{text "\<Gamma> = x: term, \<alpha>: type, A(x\<^sub>\<alpha>)"}, where type @{text
    9.50 +  "\<alpha>"} is fixed \emph{after} term @{text "x"}, and the constraint
    9.51 +  @{text "x :: \<alpha>"} is an implicit consequence of the occurrence of
    9.52 +  @{text "x\<^sub>\<alpha>"} in the subsequent proposition.
    9.53 +
    9.54 +  This twist of dependencies is also accommodated by the reverse
    9.55 +  operation of exporting results from a context: a type variable
    9.56 +  @{text "\<alpha>"} is considered fixed as long as it occurs in some fixed
    9.57 +  term variable of the context.  For example, exporting @{text "x:
    9.58 +  term, \<alpha>: type \<turnstile> x\<^sub>\<alpha> \<equiv> x\<^sub>\<alpha>"} produces in the first step @{text "x: term
    9.59 +  \<turnstile> x\<^sub>\<alpha> \<equiv> x\<^sub>\<alpha>"} for fixed @{text "\<alpha>"}, and only in the second step
    9.60 +  @{text "\<turnstile> ?x\<^sub>?\<^sub>\<alpha> \<equiv> ?x\<^sub>?\<^sub>\<alpha>"} for schematic @{text "?x"} and @{text "?\<alpha>"}.
    9.61 +  The following Isar source text illustrates this scenario.
    9.62 +*}
    9.63 +
    9.64 +notepad
    9.65 +begin
    9.66 +  {
    9.67 +    fix x  -- {* all potential occurrences of some @{text "x::\<tau>"} are fixed *}
    9.68 +    {
    9.69 +      have "x::'a \<equiv> x"  -- {* implicit type assigment by concrete occurrence *}
    9.70 +        by (rule reflexive)
    9.71 +    }
    9.72 +    thm this  -- {* result still with fixed type @{text "'a"} *}
    9.73 +  }
    9.74 +  thm this  -- {* fully general result for arbitrary @{text "?x::?'a"} *}
    9.75 +end
    9.76 +
    9.77 +text {* The Isabelle/Isar proof context manages the details of term
    9.78 +  vs.\ type variables, with high-level principles for moving the
    9.79 +  frontier between fixed and schematic variables.
    9.80 +
    9.81 +  The @{text "add_fixes"} operation explictly declares fixed
    9.82 +  variables; the @{text "declare_term"} operation absorbs a term into
    9.83 +  a context by fixing new type variables and adding syntactic
    9.84 +  constraints.
    9.85 +
    9.86 +  The @{text "export"} operation is able to perform the main work of
    9.87 +  generalizing term and type variables as sketched above, assuming
    9.88 +  that fixing variables and terms have been declared properly.
    9.89 +
    9.90 +  There @{text "import"} operation makes a generalized fact a genuine
    9.91 +  part of the context, by inventing fixed variables for the schematic
    9.92 +  ones.  The effect can be reversed by using @{text "export"} later,
    9.93 +  potentially with an extended context; the result is equivalent to
    9.94 +  the original modulo renaming of schematic variables.
    9.95 +
    9.96 +  The @{text "focus"} operation provides a variant of @{text "import"}
    9.97 +  for nested propositions (with explicit quantification): @{text
    9.98 +  "\<And>x\<^sub>1 \<dots> x\<^sub>n. B(x\<^sub>1, \<dots>, x\<^sub>n)"} is
    9.99 +  decomposed by inventing fixed variables @{text "x\<^sub>1, \<dots>,
   9.100 +  x\<^sub>n"} for the body.
   9.101 +*}
   9.102 +
   9.103 +text %mlref {*
   9.104 +  \begin{mldecls}
   9.105 +  @{index_ML Variable.add_fixes: "
   9.106 +  string list -> Proof.context -> string list * Proof.context"} \\
   9.107 +  @{index_ML Variable.variant_fixes: "
   9.108 +  string list -> Proof.context -> string list * Proof.context"} \\
   9.109 +  @{index_ML Variable.declare_term: "term -> Proof.context -> Proof.context"} \\
   9.110 +  @{index_ML Variable.declare_constraints: "term -> Proof.context -> Proof.context"} \\
   9.111 +  @{index_ML Variable.export: "Proof.context -> Proof.context -> thm list -> thm list"} \\
   9.112 +  @{index_ML Variable.polymorphic: "Proof.context -> term list -> term list"} \\
   9.113 +  @{index_ML Variable.import: "bool -> thm list -> Proof.context ->
   9.114 +  (((ctyp * ctyp) list * (cterm * cterm) list) * thm list) * Proof.context"} \\
   9.115 +  @{index_ML Variable.focus: "term -> Proof.context ->
   9.116 +  ((string * (string * typ)) list * term) * Proof.context"} \\
   9.117 +  \end{mldecls}
   9.118 +
   9.119 +  \begin{description}
   9.120 +
   9.121 +  \item @{ML Variable.add_fixes}~@{text "xs ctxt"} fixes term
   9.122 +  variables @{text "xs"}, returning the resulting internal names.  By
   9.123 +  default, the internal representation coincides with the external
   9.124 +  one, which also means that the given variables must not be fixed
   9.125 +  already.  There is a different policy within a local proof body: the
   9.126 +  given names are just hints for newly invented Skolem variables.
   9.127 +
   9.128 +  \item @{ML Variable.variant_fixes} is similar to @{ML
   9.129 +  Variable.add_fixes}, but always produces fresh variants of the given
   9.130 +  names.
   9.131 +
   9.132 +  \item @{ML Variable.declare_term}~@{text "t ctxt"} declares term
   9.133 +  @{text "t"} to belong to the context.  This automatically fixes new
   9.134 +  type variables, but not term variables.  Syntactic constraints for
   9.135 +  type and term variables are declared uniformly, though.
   9.136 +
   9.137 +  \item @{ML Variable.declare_constraints}~@{text "t ctxt"} declares
   9.138 +  syntactic constraints from term @{text "t"}, without making it part
   9.139 +  of the context yet.
   9.140 +
   9.141 +  \item @{ML Variable.export}~@{text "inner outer thms"} generalizes
   9.142 +  fixed type and term variables in @{text "thms"} according to the
   9.143 +  difference of the @{text "inner"} and @{text "outer"} context,
   9.144 +  following the principles sketched above.
   9.145 +
   9.146 +  \item @{ML Variable.polymorphic}~@{text "ctxt ts"} generalizes type
   9.147 +  variables in @{text "ts"} as far as possible, even those occurring
   9.148 +  in fixed term variables.  The default policy of type-inference is to
   9.149 +  fix newly introduced type variables, which is essentially reversed
   9.150 +  with @{ML Variable.polymorphic}: here the given terms are detached
   9.151 +  from the context as far as possible.
   9.152 +
   9.153 +  \item @{ML Variable.import}~@{text "open thms ctxt"} invents fixed
   9.154 +  type and term variables for the schematic ones occurring in @{text
   9.155 +  "thms"}.  The @{text "open"} flag indicates whether the fixed names
   9.156 +  should be accessible to the user, otherwise newly introduced names
   9.157 +  are marked as ``internal'' (\secref{sec:names}).
   9.158 +
   9.159 +  \item @{ML Variable.focus}~@{text B} decomposes the outermost @{text
   9.160 +  "\<And>"} prefix of proposition @{text "B"}.
   9.161 +
   9.162 +  \end{description}
   9.163 +*}
   9.164 +
   9.165 +text %mlex {* The following example shows how to work with fixed term
   9.166 +  and type parameters and with type-inference.  *}
   9.167 +
   9.168 +ML {*
   9.169 +  (*static compile-time context -- for testing only*)
   9.170 +  val ctxt0 = @{context};
   9.171 +
   9.172 +  (*locally fixed parameters -- no type assignment yet*)
   9.173 +  val ([x, y], ctxt1) = ctxt0 |> Variable.add_fixes ["x", "y"];
   9.174 +
   9.175 +  (*t1: most general fixed type; t1': most general arbitrary type*)
   9.176 +  val t1 = Syntax.read_term ctxt1 "x";
   9.177 +  val t1' = singleton (Variable.polymorphic ctxt1) t1;
   9.178 +
   9.179 +  (*term u enforces specific type assignment*)
   9.180 +  val u = Syntax.read_term ctxt1 "(x::nat) \<equiv> y";
   9.181 +
   9.182 +  (*official declaration of u -- propagates constraints etc.*)
   9.183 +  val ctxt2 = ctxt1 |> Variable.declare_term u;
   9.184 +  val t2 = Syntax.read_term ctxt2 "x";  (*x::nat is enforced*)
   9.185 +*}
   9.186 +
   9.187 +text {* In the above example, the starting context is derived from the
   9.188 +  toplevel theory, which means that fixed variables are internalized
   9.189 +  literally: @{text "x"} is mapped again to @{text "x"}, and
   9.190 +  attempting to fix it again in the subsequent context is an error.
   9.191 +  Alternatively, fixed parameters can be renamed explicitly as
   9.192 +  follows: *}
   9.193 +
   9.194 +ML {*
   9.195 +  val ctxt0 = @{context};
   9.196 +  val ([x1, x2, x3], ctxt1) =
   9.197 +    ctxt0 |> Variable.variant_fixes ["x", "x", "x"];
   9.198 +*}
   9.199 +
   9.200 +text {* The following ML code can now work with the invented names of
   9.201 +  @{text x1}, @{text x2}, @{text x3}, without depending on
   9.202 +  the details on the system policy for introducing these variants.
   9.203 +  Recall that within a proof body the system always invents fresh
   9.204 +  ``skolem constants'', e.g.\ as follows: *}
   9.205 +
   9.206 +notepad
   9.207 +begin
   9.208 +  ML_prf %"ML" {*
   9.209 +    val ctxt0 = @{context};
   9.210 +
   9.211 +    val ([x1], ctxt1) = ctxt0 |> Variable.add_fixes ["x"];
   9.212 +    val ([x2], ctxt2) = ctxt1 |> Variable.add_fixes ["x"];
   9.213 +    val ([x3], ctxt3) = ctxt2 |> Variable.add_fixes ["x"];
   9.214 +
   9.215 +    val ([y1, y2], ctxt4) =
   9.216 +      ctxt3 |> Variable.variant_fixes ["y", "y"];
   9.217 +  *}
   9.218 +end
   9.219 +
   9.220 +text {* In this situation @{ML Variable.add_fixes} and @{ML
   9.221 +  Variable.variant_fixes} are very similar, but identical name
   9.222 +  proposals given in a row are only accepted by the second version.
   9.223 +  *}
   9.224 +
   9.225 +
   9.226 +section {* Assumptions \label{sec:assumptions} *}
   9.227 +
   9.228 +text {*
   9.229 +  An \emph{assumption} is a proposition that it is postulated in the
   9.230 +  current context.  Local conclusions may use assumptions as
   9.231 +  additional facts, but this imposes implicit hypotheses that weaken
   9.232 +  the overall statement.
   9.233 +
   9.234 +  Assumptions are restricted to fixed non-schematic statements, i.e.\
   9.235 +  all generality needs to be expressed by explicit quantifiers.
   9.236 +  Nevertheless, the result will be in HHF normal form with outermost
   9.237 +  quantifiers stripped.  For example, by assuming @{text "\<And>x :: \<alpha>. P
   9.238 +  x"} we get @{text "\<And>x :: \<alpha>. P x \<turnstile> P ?x"} for schematic @{text "?x"}
   9.239 +  of fixed type @{text "\<alpha>"}.  Local derivations accumulate more and
   9.240 +  more explicit references to hypotheses: @{text "A\<^sub>1, \<dots>,
   9.241 +  A\<^sub>n \<turnstile> B"} where @{text "A\<^sub>1, \<dots>, A\<^sub>n"} needs to
   9.242 +  be covered by the assumptions of the current context.
   9.243 +
   9.244 +  \medskip The @{text "add_assms"} operation augments the context by
   9.245 +  local assumptions, which are parameterized by an arbitrary @{text
   9.246 +  "export"} rule (see below).
   9.247 +
   9.248 +  The @{text "export"} operation moves facts from a (larger) inner
   9.249 +  context into a (smaller) outer context, by discharging the
   9.250 +  difference of the assumptions as specified by the associated export
   9.251 +  rules.  Note that the discharged portion is determined by the
   9.252 +  difference of contexts, not the facts being exported!  There is a
   9.253 +  separate flag to indicate a goal context, where the result is meant
   9.254 +  to refine an enclosing sub-goal of a structured proof state.
   9.255 +
   9.256 +  \medskip The most basic export rule discharges assumptions directly
   9.257 +  by means of the @{text "\<Longrightarrow>"} introduction rule:
   9.258 +  \[
   9.259 +  \infer[(@{text "\<Longrightarrow>\<hyphen>intro"})]{@{text "\<Gamma> - A \<turnstile> A \<Longrightarrow> B"}}{@{text "\<Gamma> \<turnstile> B"}}
   9.260 +  \]
   9.261 +
   9.262 +  The variant for goal refinements marks the newly introduced
   9.263 +  premises, which causes the canonical Isar goal refinement scheme to
   9.264 +  enforce unification with local premises within the goal:
   9.265 +  \[
   9.266 +  \infer[(@{text "#\<Longrightarrow>\<hyphen>intro"})]{@{text "\<Gamma> - A \<turnstile> #A \<Longrightarrow> B"}}{@{text "\<Gamma> \<turnstile> B"}}
   9.267 +  \]
   9.268 +
   9.269 +  \medskip Alternative versions of assumptions may perform arbitrary
   9.270 +  transformations on export, as long as the corresponding portion of
   9.271 +  hypotheses is removed from the given facts.  For example, a local
   9.272 +  definition works by fixing @{text "x"} and assuming @{text "x \<equiv> t"},
   9.273 +  with the following export rule to reverse the effect:
   9.274 +  \[
   9.275 +  \infer[(@{text "\<equiv>\<hyphen>expand"})]{@{text "\<Gamma> - (x \<equiv> t) \<turnstile> B t"}}{@{text "\<Gamma> \<turnstile> B x"}}
   9.276 +  \]
   9.277 +  This works, because the assumption @{text "x \<equiv> t"} was introduced in
   9.278 +  a context with @{text "x"} being fresh, so @{text "x"} does not
   9.279 +  occur in @{text "\<Gamma>"} here.
   9.280 +*}
   9.281 +
   9.282 +text %mlref {*
   9.283 +  \begin{mldecls}
   9.284 +  @{index_ML_type Assumption.export} \\
   9.285 +  @{index_ML Assumption.assume: "Proof.context -> cterm -> thm"} \\
   9.286 +  @{index_ML Assumption.add_assms:
   9.287 +    "Assumption.export ->
   9.288 +  cterm list -> Proof.context -> thm list * Proof.context"} \\
   9.289 +  @{index_ML Assumption.add_assumes: "
   9.290 +  cterm list -> Proof.context -> thm list * Proof.context"} \\
   9.291 +  @{index_ML Assumption.export: "bool -> Proof.context -> Proof.context -> thm -> thm"} \\
   9.292 +  \end{mldecls}
   9.293 +
   9.294 +  \begin{description}
   9.295 +
   9.296 +  \item Type @{ML_type Assumption.export} represents arbitrary export
   9.297 +  rules, which is any function of type @{ML_type "bool -> cterm list
   9.298 +  -> thm -> thm"}, where the @{ML_type "bool"} indicates goal mode,
   9.299 +  and the @{ML_type "cterm list"} the collection of assumptions to be
   9.300 +  discharged simultaneously.
   9.301 +
   9.302 +  \item @{ML Assumption.assume}~@{text "ctxt A"} turns proposition @{text
   9.303 +  "A"} into a primitive assumption @{text "A \<turnstile> A'"}, where the
   9.304 +  conclusion @{text "A'"} is in HHF normal form.
   9.305 +
   9.306 +  \item @{ML Assumption.add_assms}~@{text "r As"} augments the context
   9.307 +  by assumptions @{text "As"} with export rule @{text "r"}.  The
   9.308 +  resulting facts are hypothetical theorems as produced by the raw
   9.309 +  @{ML Assumption.assume}.
   9.310 +
   9.311 +  \item @{ML Assumption.add_assumes}~@{text "As"} is a special case of
   9.312 +  @{ML Assumption.add_assms} where the export rule performs @{text
   9.313 +  "\<Longrightarrow>\<hyphen>intro"} or @{text "#\<Longrightarrow>\<hyphen>intro"}, depending on goal
   9.314 +  mode.
   9.315 +
   9.316 +  \item @{ML Assumption.export}~@{text "is_goal inner outer thm"}
   9.317 +  exports result @{text "thm"} from the the @{text "inner"} context
   9.318 +  back into the @{text "outer"} one; @{text "is_goal = true"} means
   9.319 +  this is a goal context.  The result is in HHF normal form.  Note
   9.320 +  that @{ML "Proof_Context.export"} combines @{ML "Variable.export"}
   9.321 +  and @{ML "Assumption.export"} in the canonical way.
   9.322 +
   9.323 +  \end{description}
   9.324 +*}
   9.325 +
   9.326 +text %mlex {* The following example demonstrates how rules can be
   9.327 +  derived by building up a context of assumptions first, and exporting
   9.328 +  some local fact afterwards.  We refer to @{theory Pure} equality
   9.329 +  here for testing purposes.
   9.330 +*}
   9.331 +
   9.332 +ML {*
   9.333 +  (*static compile-time context -- for testing only*)
   9.334 +  val ctxt0 = @{context};
   9.335 +
   9.336 +  val ([eq], ctxt1) =
   9.337 +    ctxt0 |> Assumption.add_assumes [@{cprop "x \<equiv> y"}];
   9.338 +  val eq' = Thm.symmetric eq;
   9.339 +
   9.340 +  (*back to original context -- discharges assumption*)
   9.341 +  val r = Assumption.export false ctxt1 ctxt0 eq';
   9.342 +*}
   9.343 +
   9.344 +text {* Note that the variables of the resulting rule are not
   9.345 +  generalized.  This would have required to fix them properly in the
   9.346 +  context beforehand, and export wrt.\ variables afterwards (cf.\ @{ML
   9.347 +  Variable.export} or the combined @{ML "Proof_Context.export"}).  *}
   9.348 +
   9.349 +
   9.350 +section {* Structured goals and results \label{sec:struct-goals} *}
   9.351 +
   9.352 +text {*
   9.353 +  Local results are established by monotonic reasoning from facts
   9.354 +  within a context.  This allows common combinations of theorems,
   9.355 +  e.g.\ via @{text "\<And>/\<Longrightarrow>"} elimination, resolution rules, or equational
   9.356 +  reasoning, see \secref{sec:thms}.  Unaccounted context manipulations
   9.357 +  should be avoided, notably raw @{text "\<And>/\<Longrightarrow>"} introduction or ad-hoc
   9.358 +  references to free variables or assumptions not present in the proof
   9.359 +  context.
   9.360 +
   9.361 +  \medskip The @{text "SUBPROOF"} combinator allows to structure a
   9.362 +  tactical proof recursively by decomposing a selected sub-goal:
   9.363 +  @{text "(\<And>x. A(x) \<Longrightarrow> B(x)) \<Longrightarrow> \<dots>"} is turned into @{text "B(x) \<Longrightarrow> \<dots>"}
   9.364 +  after fixing @{text "x"} and assuming @{text "A(x)"}.  This means
   9.365 +  the tactic needs to solve the conclusion, but may use the premise as
   9.366 +  a local fact, for locally fixed variables.
   9.367 +
   9.368 +  The family of @{text "FOCUS"} combinators is similar to @{text
   9.369 +  "SUBPROOF"}, but allows to retain schematic variables and pending
   9.370 +  subgoals in the resulting goal state.
   9.371 +
   9.372 +  The @{text "prove"} operation provides an interface for structured
   9.373 +  backwards reasoning under program control, with some explicit sanity
   9.374 +  checks of the result.  The goal context can be augmented by
   9.375 +  additional fixed variables (cf.\ \secref{sec:variables}) and
   9.376 +  assumptions (cf.\ \secref{sec:assumptions}), which will be available
   9.377 +  as local facts during the proof and discharged into implications in
   9.378 +  the result.  Type and term variables are generalized as usual,
   9.379 +  according to the context.
   9.380 +
   9.381 +  The @{text "obtain"} operation produces results by eliminating
   9.382 +  existing facts by means of a given tactic.  This acts like a dual
   9.383 +  conclusion: the proof demonstrates that the context may be augmented
   9.384 +  by parameters and assumptions, without affecting any conclusions
   9.385 +  that do not mention these parameters.  See also
   9.386 +  \cite{isabelle-isar-ref} for the user-level @{command obtain} and
   9.387 +  @{command guess} elements.  Final results, which may not refer to
   9.388 +  the parameters in the conclusion, need to exported explicitly into
   9.389 +  the original context.  *}
   9.390 +
   9.391 +text %mlref {*
   9.392 +  \begin{mldecls}
   9.393 +  @{index_ML SUBPROOF: "(Subgoal.focus -> tactic) ->
   9.394 +  Proof.context -> int -> tactic"} \\
   9.395 +  @{index_ML Subgoal.FOCUS: "(Subgoal.focus -> tactic) ->
   9.396 +  Proof.context -> int -> tactic"} \\
   9.397 +  @{index_ML Subgoal.FOCUS_PREMS: "(Subgoal.focus -> tactic) ->
   9.398 +  Proof.context -> int -> tactic"} \\
   9.399 +  @{index_ML Subgoal.FOCUS_PARAMS: "(Subgoal.focus -> tactic) ->
   9.400 +  Proof.context -> int -> tactic"} \\
   9.401 +  @{index_ML Subgoal.focus: "Proof.context -> int -> thm -> Subgoal.focus * thm"} \\
   9.402 +  @{index_ML Subgoal.focus_prems: "Proof.context -> int -> thm -> Subgoal.focus * thm"} \\
   9.403 +  @{index_ML Subgoal.focus_params: "Proof.context -> int -> thm -> Subgoal.focus * thm"} \\
   9.404 +  \end{mldecls}
   9.405 +
   9.406 +  \begin{mldecls}
   9.407 +  @{index_ML Goal.prove: "Proof.context -> string list -> term list -> term ->
   9.408 +  ({prems: thm list, context: Proof.context} -> tactic) -> thm"} \\
   9.409 +  @{index_ML Goal.prove_multi: "Proof.context -> string list -> term list -> term list ->
   9.410 +  ({prems: thm list, context: Proof.context} -> tactic) -> thm list"} \\
   9.411 +  \end{mldecls}
   9.412 +  \begin{mldecls}
   9.413 +  @{index_ML Obtain.result: "(Proof.context -> tactic) -> thm list ->
   9.414 +  Proof.context -> ((string * cterm) list * thm list) * Proof.context"} \\
   9.415 +  \end{mldecls}
   9.416 +
   9.417 +  \begin{description}
   9.418 +
   9.419 +  \item @{ML SUBPROOF}~@{text "tac ctxt i"} decomposes the structure
   9.420 +  of the specified sub-goal, producing an extended context and a
   9.421 +  reduced goal, which needs to be solved by the given tactic.  All
   9.422 +  schematic parameters of the goal are imported into the context as
   9.423 +  fixed ones, which may not be instantiated in the sub-proof.
   9.424 +
   9.425 +  \item @{ML Subgoal.FOCUS}, @{ML Subgoal.FOCUS_PREMS}, and @{ML
   9.426 +  Subgoal.FOCUS_PARAMS} are similar to @{ML SUBPROOF}, but are
   9.427 +  slightly more flexible: only the specified parts of the subgoal are
   9.428 +  imported into the context, and the body tactic may introduce new
   9.429 +  subgoals and schematic variables.
   9.430 +
   9.431 +  \item @{ML Subgoal.focus}, @{ML Subgoal.focus_prems}, @{ML
   9.432 +  Subgoal.focus_params} extract the focus information from a goal
   9.433 +  state in the same way as the corresponding tacticals above.  This is
   9.434 +  occasionally useful to experiment without writing actual tactics
   9.435 +  yet.
   9.436 +
   9.437 +  \item @{ML Goal.prove}~@{text "ctxt xs As C tac"} states goal @{text
   9.438 +  "C"} in the context augmented by fixed variables @{text "xs"} and
   9.439 +  assumptions @{text "As"}, and applies tactic @{text "tac"} to solve
   9.440 +  it.  The latter may depend on the local assumptions being presented
   9.441 +  as facts.  The result is in HHF normal form.
   9.442 +
   9.443 +  \item @{ML Goal.prove_multi} is simular to @{ML Goal.prove}, but
   9.444 +  states several conclusions simultaneously.  The goal is encoded by
   9.445 +  means of Pure conjunction; @{ML Goal.conjunction_tac} will turn this
   9.446 +  into a collection of individual subgoals.
   9.447 +
   9.448 +  \item @{ML Obtain.result}~@{text "tac thms ctxt"} eliminates the
   9.449 +  given facts using a tactic, which results in additional fixed
   9.450 +  variables and assumptions in the context.  Final results need to be
   9.451 +  exported explicitly.
   9.452 +
   9.453 +  \end{description}
   9.454 +*}
   9.455 +
   9.456 +text %mlex {* The following minimal example illustrates how to access
   9.457 +  the focus information of a structured goal state. *}
   9.458 +
   9.459 +notepad
   9.460 +begin
   9.461 +  fix A B C :: "'a \<Rightarrow> bool"
   9.462 +
   9.463 +  have "\<And>x. A x \<Longrightarrow> B x \<Longrightarrow> C x"
   9.464 +    ML_val
   9.465 +    {*
   9.466 +      val {goal, context = goal_ctxt, ...} = @{Isar.goal};
   9.467 +      val (focus as {params, asms, concl, ...}, goal') =
   9.468 +        Subgoal.focus goal_ctxt 1 goal;
   9.469 +      val [A, B] = #prems focus;
   9.470 +      val [(_, x)] = #params focus;
   9.471 +    *}
   9.472 +    oops
   9.473 +
   9.474 +text {* \medskip The next example demonstrates forward-elimination in
   9.475 +  a local context, using @{ML Obtain.result}.  *}
   9.476 +
   9.477 +notepad
   9.478 +begin
   9.479 +  assume ex: "\<exists>x. B x"
   9.480 +
   9.481 +  ML_prf %"ML" {*
   9.482 +    val ctxt0 = @{context};
   9.483 +    val (([(_, x)], [B]), ctxt1) = ctxt0
   9.484 +      |> Obtain.result (fn _ => etac @{thm exE} 1) [@{thm ex}];
   9.485 +  *}
   9.486 +  ML_prf %"ML" {*
   9.487 +    singleton (Proof_Context.export ctxt1 ctxt0) @{thm refl};
   9.488 +  *}
   9.489 +  ML_prf %"ML" {*
   9.490 +    Proof_Context.export ctxt1 ctxt0 [Thm.reflexive x]
   9.491 +      handle ERROR msg => (warning msg; []);
   9.492 +  *}
   9.493 +end
   9.494 +
   9.495 +end
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/src/Doc/Implementation/Syntax.thy	Sat Apr 05 11:37:00 2014 +0200
    10.3 @@ -0,0 +1,151 @@
    10.4 +theory Syntax
    10.5 +imports Base
    10.6 +begin
    10.7 +
    10.8 +chapter {* Concrete syntax and type-checking *}
    10.9 +
   10.10 +text {* Pure @{text "\<lambda>"}-calculus as introduced in \chref{ch:logic} is
   10.11 +  an adequate foundation for logical languages --- in the tradition of
   10.12 +  \emph{higher-order abstract syntax} --- but end-users require
   10.13 +  additional means for reading and printing of terms and types.  This
   10.14 +  important add-on outside the logical core is called \emph{inner
   10.15 +  syntax} in Isabelle jargon, as opposed to the \emph{outer syntax} of
   10.16 +  the theory and proof language (cf.\ \cite{isabelle-isar-ref}).
   10.17 +
   10.18 +  For example, according to \cite{church40} quantifiers are
   10.19 +  represented as higher-order constants @{text "All :: ('a \<Rightarrow> bool) \<Rightarrow>
   10.20 +  bool"} such that @{text "All (\<lambda>x::'a. B x)"} faithfully represents
   10.21 +  the idea that is displayed as @{text "\<forall>x::'a. B x"} via @{keyword
   10.22 +  "binder"} notation.  Moreover, type-inference in the style of
   10.23 +  Hindley-Milner \cite{hindleymilner} (and extensions) enables users
   10.24 +  to write @{text "\<forall>x. B x"} concisely, when the type @{text "'a"} is
   10.25 +  already clear from the context.\footnote{Type-inference taken to the
   10.26 +  extreme can easily confuse users, though.  Beginners often stumble
   10.27 +  over unexpectedly general types inferred by the system.}
   10.28 +
   10.29 +  \medskip The main inner syntax operations are \emph{read} for
   10.30 +  parsing together with type-checking, and \emph{pretty} for formatted
   10.31 +  output.  See also \secref{sec:read-print}.
   10.32 +
   10.33 +  Furthermore, the input and output syntax layers are sub-divided into
   10.34 +  separate phases for \emph{concrete syntax} versus \emph{abstract
   10.35 +  syntax}, see also \secref{sec:parse-unparse} and
   10.36 +  \secref{sec:term-check}, respectively.  This results in the
   10.37 +  following decomposition of the main operations:
   10.38 +
   10.39 +  \begin{itemize}
   10.40 +
   10.41 +  \item @{text "read = parse; check"}
   10.42 +
   10.43 +  \item @{text "pretty = uncheck; unparse"}
   10.44 +
   10.45 +  \end{itemize}
   10.46 +
   10.47 +  Some specification package might thus intercept syntax processing at
   10.48 +  a well-defined stage after @{text "parse"}, to a augment the
   10.49 +  resulting pre-term before full type-reconstruction is performed by
   10.50 +  @{text "check"}, for example.  Note that the formal status of bound
   10.51 +  variables, versus free variables, versus constants must not be
   10.52 +  changed here! *}
   10.53 +
   10.54 +
   10.55 +section {* Reading and pretty printing \label{sec:read-print} *}
   10.56 +
   10.57 +text {* Read and print operations are roughly dual to each other, such
   10.58 +  that for the user @{text "s' = pretty (read s)"} looks similar to
   10.59 +  the original source text @{text "s"}, but the details depend on many
   10.60 +  side-conditions.  There are also explicit options to control
   10.61 +  suppressing of type information in the output.  The default
   10.62 +  configuration routinely looses information, so @{text "t' = read
   10.63 +  (pretty t)"} might fail, produce a differently typed term, or a
   10.64 +  completely different term in the face of syntactic overloading!  *}
   10.65 +
   10.66 +text %mlref {*
   10.67 +  \begin{mldecls}
   10.68 +  @{index_ML Syntax.read_typ: "Proof.context -> string -> typ"} \\
   10.69 +  @{index_ML Syntax.read_term: "Proof.context -> string -> term"} \\
   10.70 +  @{index_ML Syntax.read_prop: "Proof.context -> string -> term"} \\
   10.71 +  @{index_ML Syntax.pretty_typ: "Proof.context -> typ -> Pretty.T"} \\
   10.72 +  @{index_ML Syntax.pretty_term: "Proof.context -> term -> Pretty.T"} \\
   10.73 +  \end{mldecls}
   10.74 +
   10.75 +  %FIXME description
   10.76 +*}
   10.77 +
   10.78 +
   10.79 +section {* Parsing and unparsing \label{sec:parse-unparse} *}
   10.80 +
   10.81 +text {* Parsing and unparsing converts between actual source text and
   10.82 +  a certain \emph{pre-term} format, where all bindings and scopes are
   10.83 +  resolved faithfully.  Thus the names of free variables or constants
   10.84 +  are already determined in the sense of the logical context, but type
   10.85 +  information might is still missing.  Pre-terms support an explicit
   10.86 +  language of \emph{type constraints} that may be augmented by user
   10.87 +  code to guide the later \emph{check} phase, for example.
   10.88 +
   10.89 +  Actual parsing is based on traditional lexical analysis and Earley
   10.90 +  parsing for arbitrary context-free grammars.  The user can specify
   10.91 +  this via mixfix annotations.  Moreover, there are \emph{syntax
   10.92 +  translations} that can be augmented by the user, either
   10.93 +  declaratively via @{command translations} or programmatically via
   10.94 +  @{command parse_translation}, @{command print_translation} etc.  The
   10.95 +  final scope resolution is performed by the system, according to name
   10.96 +  spaces for types, constants etc.\ determined by the context.
   10.97 +*}
   10.98 +
   10.99 +text %mlref {*
  10.100 +  \begin{mldecls}
  10.101 +  @{index_ML Syntax.parse_typ: "Proof.context -> string -> typ"} \\
  10.102 +  @{index_ML Syntax.parse_term: "Proof.context -> string -> term"} \\
  10.103 +  @{index_ML Syntax.parse_prop: "Proof.context -> string -> term"} \\
  10.104 +  @{index_ML Syntax.unparse_typ: "Proof.context -> typ -> Pretty.T"} \\
  10.105 +  @{index_ML Syntax.unparse_term: "Proof.context -> term -> Pretty.T"} \\
  10.106 +  \end{mldecls}
  10.107 +
  10.108 +  %FIXME description
  10.109 +*}
  10.110 +
  10.111 +
  10.112 +section {* Checking and unchecking \label{sec:term-check} *}
  10.113 +
  10.114 +text {* These operations define the transition from pre-terms and
  10.115 +  fully-annotated terms in the sense of the logical core
  10.116 +  (\chref{ch:logic}).
  10.117 +
  10.118 +  The \emph{check} phase is meant to subsume a variety of mechanisms
  10.119 +  in the manner of ``type-inference'' or ``type-reconstruction'' or
  10.120 +  ``type-improvement'', not just type-checking in the narrow sense.
  10.121 +  The \emph{uncheck} phase is roughly dual, it prunes type-information
  10.122 +  before pretty printing.
  10.123 +
  10.124 +  A typical add-on for the check/uncheck syntax layer is the @{command
  10.125 +  abbreviation} mechanism.  Here the user specifies syntactic
  10.126 +  definitions that are managed by the system as polymorphic @{text
  10.127 +  "let"} bindings.  These are expanded during the @{text "check"}
  10.128 +  phase, and contracted during the @{text "uncheck"} phase, without
  10.129 +  affecting the type-assignment of the given terms.
  10.130 +
  10.131 +  \medskip The precise meaning of type checking depends on the context
  10.132 +  --- additional check/uncheck plugins might be defined in user space!
  10.133 +
  10.134 +  For example, the @{command class} command defines a context where
  10.135 +  @{text "check"} treats certain type instances of overloaded
  10.136 +  constants according to the ``dictionary construction'' of its
  10.137 +  logical foundation.  This involves ``type improvement''
  10.138 +  (specialization of slightly too general types) and replacement by
  10.139 +  certain locale parameters.  See also \cite{Haftmann-Wenzel:2009}.
  10.140 +*}
  10.141 +
  10.142 +text %mlref {*
  10.143 +  \begin{mldecls}
  10.144 +  @{index_ML Syntax.check_typs: "Proof.context -> typ list -> typ list"} \\
  10.145 +  @{index_ML Syntax.check_terms: "Proof.context -> term list -> term list"} \\
  10.146 +  @{index_ML Syntax.check_props: "Proof.context -> term list -> term list"} \\
  10.147 +  @{index_ML Syntax.uncheck_typs: "Proof.context -> typ list -> typ list"} \\
  10.148 +  @{index_ML Syntax.uncheck_terms: "Proof.context -> term list -> term list"} \\
  10.149 +  \end{mldecls}
  10.150 +
  10.151 +  %FIXME description
  10.152 +*}
  10.153 +
  10.154 +end
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/src/Doc/Implementation/Tactic.thy	Sat Apr 05 11:37:00 2014 +0200
    11.3 @@ -0,0 +1,939 @@
    11.4 +theory Tactic
    11.5 +imports Base
    11.6 +begin
    11.7 +
    11.8 +chapter {* Tactical reasoning *}
    11.9 +
   11.10 +text {* Tactical reasoning works by refining an initial claim in a
   11.11 +  backwards fashion, until a solved form is reached.  A @{text "goal"}
   11.12 +  consists of several subgoals that need to be solved in order to
   11.13 +  achieve the main statement; zero subgoals means that the proof may
   11.14 +  be finished.  A @{text "tactic"} is a refinement operation that maps
   11.15 +  a goal to a lazy sequence of potential successors.  A @{text
   11.16 +  "tactical"} is a combinator for composing tactics.  *}
   11.17 +
   11.18 +
   11.19 +section {* Goals \label{sec:tactical-goals} *}
   11.20 +
   11.21 +text {*
   11.22 +  Isabelle/Pure represents a goal as a theorem stating that the
   11.23 +  subgoals imply the main goal: @{text "A\<^sub>1 \<Longrightarrow> \<dots> \<Longrightarrow> A\<^sub>n \<Longrightarrow>
   11.24 +  C"}.  The outermost goal structure is that of a Horn Clause: i.e.\
   11.25 +  an iterated implication without any quantifiers\footnote{Recall that
   11.26 +  outermost @{text "\<And>x. \<phi>[x]"} is always represented via schematic
   11.27 +  variables in the body: @{text "\<phi>[?x]"}.  These variables may get
   11.28 +  instantiated during the course of reasoning.}.  For @{text "n = 0"}
   11.29 +  a goal is called ``solved''.
   11.30 +
   11.31 +  The structure of each subgoal @{text "A\<^sub>i"} is that of a
   11.32 +  general Hereditary Harrop Formula @{text "\<And>x\<^sub>1 \<dots>
   11.33 +  \<And>x\<^sub>k. H\<^sub>1 \<Longrightarrow> \<dots> \<Longrightarrow> H\<^sub>m \<Longrightarrow> B"}.  Here @{text
   11.34 +  "x\<^sub>1, \<dots>, x\<^sub>k"} are goal parameters, i.e.\
   11.35 +  arbitrary-but-fixed entities of certain types, and @{text
   11.36 +  "H\<^sub>1, \<dots>, H\<^sub>m"} are goal hypotheses, i.e.\ facts that may
   11.37 +  be assumed locally.  Together, this forms the goal context of the
   11.38 +  conclusion @{text B} to be established.  The goal hypotheses may be
   11.39 +  again arbitrary Hereditary Harrop Formulas, although the level of
   11.40 +  nesting rarely exceeds 1--2 in practice.
   11.41 +
   11.42 +  The main conclusion @{text C} is internally marked as a protected
   11.43 +  proposition, which is represented explicitly by the notation @{text
   11.44 +  "#C"} here.  This ensures that the decomposition into subgoals and
   11.45 +  main conclusion is well-defined for arbitrarily structured claims.
   11.46 +
   11.47 +  \medskip Basic goal management is performed via the following
   11.48 +  Isabelle/Pure rules:
   11.49 +
   11.50 +  \[
   11.51 +  \infer[@{text "(init)"}]{@{text "C \<Longrightarrow> #C"}}{} \qquad
   11.52 +  \infer[@{text "(finish)"}]{@{text "C"}}{@{text "#C"}}
   11.53 +  \]
   11.54 +
   11.55 +  \medskip The following low-level variants admit general reasoning
   11.56 +  with protected propositions:
   11.57 +
   11.58 +  \[
   11.59 +  \infer[@{text "(protect n)"}]{@{text "A\<^sub>1 \<Longrightarrow> \<dots> \<Longrightarrow> A\<^sub>n \<Longrightarrow> #C"}}{@{text "A\<^sub>1 \<Longrightarrow> \<dots> \<Longrightarrow> A\<^sub>n \<Longrightarrow> C"}}
   11.60 +  \]
   11.61 +  \[
   11.62 +  \infer[@{text "(conclude)"}]{@{text "A \<Longrightarrow> \<dots> \<Longrightarrow> C"}}{@{text "A \<Longrightarrow> \<dots> \<Longrightarrow> #C"}}
   11.63 +  \]
   11.64 +*}
   11.65 +
   11.66 +text %mlref {*
   11.67 +  \begin{mldecls}
   11.68 +  @{index_ML Goal.init: "cterm -> thm"} \\
   11.69 +  @{index_ML Goal.finish: "Proof.context -> thm -> thm"} \\
   11.70 +  @{index_ML Goal.protect: "int -> thm -> thm"} \\
   11.71 +  @{index_ML Goal.conclude: "thm -> thm"} \\
   11.72 +  \end{mldecls}
   11.73 +
   11.74 +  \begin{description}
   11.75 +
   11.76 +  \item @{ML "Goal.init"}~@{text C} initializes a tactical goal from
   11.77 +  the well-formed proposition @{text C}.
   11.78 +
   11.79 +  \item @{ML "Goal.finish"}~@{text "ctxt thm"} checks whether theorem
   11.80 +  @{text "thm"} is a solved goal (no subgoals), and concludes the
   11.81 +  result by removing the goal protection.  The context is only
   11.82 +  required for printing error messages.
   11.83 +
   11.84 +  \item @{ML "Goal.protect"}~@{text "n thm"} protects the statement
   11.85 +  of theorem @{text "thm"}.  The parameter @{text n} indicates the
   11.86 +  number of premises to be retained.
   11.87 +
   11.88 +  \item @{ML "Goal.conclude"}~@{text "thm"} removes the goal
   11.89 +  protection, even if there are pending subgoals.
   11.90 +
   11.91 +  \end{description}
   11.92 +*}
   11.93 +
   11.94 +
   11.95 +section {* Tactics\label{sec:tactics} *}
   11.96 +
   11.97 +text {* A @{text "tactic"} is a function @{text "goal \<rightarrow> goal\<^sup>*\<^sup>*"} that
   11.98 +  maps a given goal state (represented as a theorem, cf.\
   11.99 +  \secref{sec:tactical-goals}) to a lazy sequence of potential
  11.100 +  successor states.  The underlying sequence implementation is lazy
  11.101 +  both in head and tail, and is purely functional in \emph{not}
  11.102 +  supporting memoing.\footnote{The lack of memoing and the strict
  11.103 +  nature of SML requires some care when working with low-level
  11.104 +  sequence operations, to avoid duplicate or premature evaluation of
  11.105 +  results.  It also means that modified runtime behavior, such as
  11.106 +  timeout, is very hard to achieve for general tactics.}
  11.107 +
  11.108 +  An \emph{empty result sequence} means that the tactic has failed: in
  11.109 +  a compound tactic expression other tactics might be tried instead,
  11.110 +  or the whole refinement step might fail outright, producing a
  11.111 +  toplevel error message in the end.  When implementing tactics from
  11.112 +  scratch, one should take care to observe the basic protocol of
  11.113 +  mapping regular error conditions to an empty result; only serious
  11.114 +  faults should emerge as exceptions.
  11.115 +
  11.116 +  By enumerating \emph{multiple results}, a tactic can easily express
  11.117 +  the potential outcome of an internal search process.  There are also
  11.118 +  combinators for building proof tools that involve search
  11.119 +  systematically, see also \secref{sec:tacticals}.
  11.120 +
  11.121 +  \medskip As explained before, a goal state essentially consists of a
  11.122 +  list of subgoals that imply the main goal (conclusion).  Tactics may
  11.123 +  operate on all subgoals or on a particularly specified subgoal, but
  11.124 +  must not change the main conclusion (apart from instantiating
  11.125 +  schematic goal variables).
  11.126 +
  11.127 +  Tactics with explicit \emph{subgoal addressing} are of the form
  11.128 +  @{text "int \<rightarrow> tactic"} and may be applied to a particular subgoal
  11.129 +  (counting from 1).  If the subgoal number is out of range, the
  11.130 +  tactic should fail with an empty result sequence, but must not raise
  11.131 +  an exception!
  11.132 +
  11.133 +  Operating on a particular subgoal means to replace it by an interval
  11.134 +  of zero or more subgoals in the same place; other subgoals must not
  11.135 +  be affected, apart from instantiating schematic variables ranging
  11.136 +  over the whole goal state.
  11.137 +
  11.138 +  A common pattern of composing tactics with subgoal addressing is to
  11.139 +  try the first one, and then the second one only if the subgoal has
  11.140 +  not been solved yet.  Special care is required here to avoid bumping
  11.141 +  into unrelated subgoals that happen to come after the original
  11.142 +  subgoal.  Assuming that there is only a single initial subgoal is a
  11.143 +  very common error when implementing tactics!
  11.144 +
  11.145 +  Tactics with internal subgoal addressing should expose the subgoal
  11.146 +  index as @{text "int"} argument in full generality; a hardwired
  11.147 +  subgoal 1 is not acceptable.
  11.148 +  
  11.149 +  \medskip The main well-formedness conditions for proper tactics are
  11.150 +  summarized as follows.
  11.151 +
  11.152 +  \begin{itemize}
  11.153 +
  11.154 +  \item General tactic failure is indicated by an empty result, only
  11.155 +  serious faults may produce an exception.
  11.156 +
  11.157 +  \item The main conclusion must not be changed, apart from
  11.158 +  instantiating schematic variables.
  11.159 +
  11.160 +  \item A tactic operates either uniformly on all subgoals, or
  11.161 +  specifically on a selected subgoal (without bumping into unrelated
  11.162 +  subgoals).
  11.163 +
  11.164 +  \item Range errors in subgoal addressing produce an empty result.
  11.165 +
  11.166 +  \end{itemize}
  11.167 +
  11.168 +  Some of these conditions are checked by higher-level goal
  11.169 +  infrastructure (\secref{sec:struct-goals}); others are not checked
  11.170 +  explicitly, and violating them merely results in ill-behaved tactics
  11.171 +  experienced by the user (e.g.\ tactics that insist in being
  11.172 +  applicable only to singleton goals, or prevent composition via
  11.173 +  standard tacticals such as @{ML REPEAT}).
  11.174 +*}
  11.175 +
  11.176 +text %mlref {*
  11.177 +  \begin{mldecls}
  11.178 +  @{index_ML_type tactic: "thm -> thm Seq.seq"} \\
  11.179 +  @{index_ML no_tac: tactic} \\
  11.180 +  @{index_ML all_tac: tactic} \\
  11.181 +  @{index_ML print_tac: "string -> tactic"} \\[1ex]
  11.182 +  @{index_ML PRIMITIVE: "(thm -> thm) -> tactic"} \\[1ex]
  11.183 +  @{index_ML SUBGOAL: "(term * int -> tactic) -> int -> tactic"} \\
  11.184 +  @{index_ML CSUBGOAL: "(cterm * int -> tactic) -> int -> tactic"} \\
  11.185 +  @{index_ML SELECT_GOAL: "tactic -> int -> tactic"} \\
  11.186 +  @{index_ML PREFER_GOAL: "tactic -> int -> tactic"} \\
  11.187 +  \end{mldecls}
  11.188 +
  11.189 +  \begin{description}
  11.190 +
  11.191 +  \item Type @{ML_type tactic} represents tactics.  The
  11.192 +  well-formedness conditions described above need to be observed.  See
  11.193 +  also @{file "~~/src/Pure/General/seq.ML"} for the underlying
  11.194 +  implementation of lazy sequences.
  11.195 +
  11.196 +  \item Type @{ML_type "int -> tactic"} represents tactics with
  11.197 +  explicit subgoal addressing, with well-formedness conditions as
  11.198 +  described above.
  11.199 +
  11.200 +  \item @{ML no_tac} is a tactic that always fails, returning the
  11.201 +  empty sequence.
  11.202 +
  11.203 +  \item @{ML all_tac} is a tactic that always succeeds, returning a
  11.204 +  singleton sequence with unchanged goal state.
  11.205 +
  11.206 +  \item @{ML print_tac}~@{text "message"} is like @{ML all_tac}, but
  11.207 +  prints a message together with the goal state on the tracing
  11.208 +  channel.
  11.209 +
  11.210 +  \item @{ML PRIMITIVE}~@{text rule} turns a primitive inference rule
  11.211 +  into a tactic with unique result.  Exception @{ML THM} is considered
  11.212 +  a regular tactic failure and produces an empty result; other
  11.213 +  exceptions are passed through.
  11.214 +
  11.215 +  \item @{ML SUBGOAL}~@{text "(fn (subgoal, i) => tactic)"} is the
  11.216 +  most basic form to produce a tactic with subgoal addressing.  The
  11.217 +  given abstraction over the subgoal term and subgoal number allows to
  11.218 +  peek at the relevant information of the full goal state.  The
  11.219 +  subgoal range is checked as required above.
  11.220 +
  11.221 +  \item @{ML CSUBGOAL} is similar to @{ML SUBGOAL}, but passes the
  11.222 +  subgoal as @{ML_type cterm} instead of raw @{ML_type term}.  This
  11.223 +  avoids expensive re-certification in situations where the subgoal is
  11.224 +  used directly for primitive inferences.
  11.225 +
  11.226 +  \item @{ML SELECT_GOAL}~@{text "tac i"} confines a tactic to the
  11.227 +  specified subgoal @{text "i"}.  This rearranges subgoals and the
  11.228 +  main goal protection (\secref{sec:tactical-goals}), while retaining
  11.229 +  the syntactic context of the overall goal state (concerning
  11.230 +  schematic variables etc.).
  11.231 +
  11.232 +  \item @{ML PREFER_GOAL}~@{text "tac i"} rearranges subgoals to put
  11.233 +  @{text "i"} in front.  This is similar to @{ML SELECT_GOAL}, but
  11.234 +  without changing the main goal protection.
  11.235 +
  11.236 +  \end{description}
  11.237 +*}
  11.238 +
  11.239 +
  11.240 +subsection {* Resolution and assumption tactics \label{sec:resolve-assume-tac} *}
  11.241 +
  11.242 +text {* \emph{Resolution} is the most basic mechanism for refining a
  11.243 +  subgoal using a theorem as object-level rule.
  11.244 +  \emph{Elim-resolution} is particularly suited for elimination rules:
  11.245 +  it resolves with a rule, proves its first premise by assumption, and
  11.246 +  finally deletes that assumption from any new subgoals.
  11.247 +  \emph{Destruct-resolution} is like elim-resolution, but the given
  11.248 +  destruction rules are first turned into canonical elimination
  11.249 +  format.  \emph{Forward-resolution} is like destruct-resolution, but
  11.250 +  without deleting the selected assumption.  The @{text "r/e/d/f"}
  11.251 +  naming convention is maintained for several different kinds of
  11.252 +  resolution rules and tactics.
  11.253 +
  11.254 +  Assumption tactics close a subgoal by unifying some of its premises
  11.255 +  against its conclusion.
  11.256 +
  11.257 +  \medskip All the tactics in this section operate on a subgoal
  11.258 +  designated by a positive integer.  Other subgoals might be affected
  11.259 +  indirectly, due to instantiation of schematic variables.
  11.260 +
  11.261 +  There are various sources of non-determinism, the tactic result
  11.262 +  sequence enumerates all possibilities of the following choices (if
  11.263 +  applicable):
  11.264 +
  11.265 +  \begin{enumerate}
  11.266 +
  11.267 +  \item selecting one of the rules given as argument to the tactic;
  11.268 +
  11.269 +  \item selecting a subgoal premise to eliminate, unifying it against
  11.270 +  the first premise of the rule;
  11.271 +
  11.272 +  \item unifying the conclusion of the subgoal to the conclusion of
  11.273 +  the rule.
  11.274 +
  11.275 +  \end{enumerate}
  11.276 +
  11.277 +  Recall that higher-order unification may produce multiple results
  11.278 +  that are enumerated here.
  11.279 +*}
  11.280 +
  11.281 +text %mlref {*
  11.282 +  \begin{mldecls}
  11.283 +  @{index_ML resolve_tac: "thm list -> int -> tactic"} \\
  11.284 +  @{index_ML eresolve_tac: "thm list -> int -> tactic"} \\
  11.285 +  @{index_ML dresolve_tac: "thm list -> int -> tactic"} \\
  11.286 +  @{index_ML forward_tac: "thm list -> int -> tactic"} \\
  11.287 +  @{index_ML biresolve_tac: "(bool * thm) list -> int -> tactic"} \\[1ex]
  11.288 +  @{index_ML assume_tac: "int -> tactic"} \\
  11.289 +  @{index_ML eq_assume_tac: "int -> tactic"} \\[1ex]
  11.290 +  @{index_ML match_tac: "thm list -> int -> tactic"} \\
  11.291 +  @{index_ML ematch_tac: "thm list -> int -> tactic"} \\
  11.292 +  @{index_ML dmatch_tac: "thm list -> int -> tactic"} \\
  11.293 +  @{index_ML bimatch_tac: "(bool * thm) list -> int -> tactic"} \\
  11.294 +  \end{mldecls}
  11.295 +
  11.296 +  \begin{description}
  11.297 +
  11.298 +  \item @{ML resolve_tac}~@{text "thms i"} refines the goal state
  11.299 +  using the given theorems, which should normally be introduction
  11.300 +  rules.  The tactic resolves a rule's conclusion with subgoal @{text
  11.301 +  i}, replacing it by the corresponding versions of the rule's
  11.302 +  premises.
  11.303 +
  11.304 +  \item @{ML eresolve_tac}~@{text "thms i"} performs elim-resolution
  11.305 +  with the given theorems, which are normally be elimination rules.
  11.306 +
  11.307 +  Note that @{ML "eresolve_tac [asm_rl]"} is equivalent to @{ML
  11.308 +  assume_tac}, which facilitates mixing of assumption steps with
  11.309 +  genuine eliminations.
  11.310 +
  11.311 +  \item @{ML dresolve_tac}~@{text "thms i"} performs
  11.312 +  destruct-resolution with the given theorems, which should normally
  11.313 +  be destruction rules.  This replaces an assumption by the result of
  11.314 +  applying one of the rules.
  11.315 +
  11.316 +  \item @{ML forward_tac} is like @{ML dresolve_tac} except that the
  11.317 +  selected assumption is not deleted.  It applies a rule to an
  11.318 +  assumption, adding the result as a new assumption.
  11.319 +
  11.320 +  \item @{ML biresolve_tac}~@{text "brls i"} refines the proof state
  11.321 +  by resolution or elim-resolution on each rule, as indicated by its
  11.322 +  flag.  It affects subgoal @{text "i"} of the proof state.
  11.323 +
  11.324 +  For each pair @{text "(flag, rule)"}, it applies resolution if the
  11.325 +  flag is @{text "false"} and elim-resolution if the flag is @{text
  11.326 +  "true"}.  A single tactic call handles a mixture of introduction and
  11.327 +  elimination rules, which is useful to organize the search process
  11.328 +  systematically in proof tools.
  11.329 +
  11.330 +  \item @{ML assume_tac}~@{text i} attempts to solve subgoal @{text i}
  11.331 +  by assumption (modulo higher-order unification).
  11.332 +
  11.333 +  \item @{ML eq_assume_tac} is similar to @{ML assume_tac}, but checks
  11.334 +  only for immediate @{text "\<alpha>"}-convertibility instead of using
  11.335 +  unification.  It succeeds (with a unique next state) if one of the
  11.336 +  assumptions is equal to the subgoal's conclusion.  Since it does not
  11.337 +  instantiate variables, it cannot make other subgoals unprovable.
  11.338 +
  11.339 +  \item @{ML match_tac}, @{ML ematch_tac}, @{ML dmatch_tac}, and @{ML
  11.340 +  bimatch_tac} are similar to @{ML resolve_tac}, @{ML eresolve_tac},
  11.341 +  @{ML dresolve_tac}, and @{ML biresolve_tac}, respectively, but do
  11.342 +  not instantiate schematic variables in the goal state.%
  11.343 +\footnote{Strictly speaking, matching means to treat the unknowns in the goal
  11.344 +  state as constants, but these tactics merely discard unifiers that would
  11.345 +  update the goal state. In rare situations (where the conclusion and 
  11.346 +  goal state have flexible terms at the same position), the tactic
  11.347 +  will fail even though an acceptable unifier exists.}
  11.348 +  These tactics were written for a specific application within the classical reasoner.
  11.349 +
  11.350 +  Flexible subgoals are not updated at will, but are left alone.
  11.351 +  \end{description}
  11.352 +*}
  11.353 +
  11.354 +
  11.355 +subsection {* Explicit instantiation within a subgoal context *}
  11.356 +
  11.357 +text {* The main resolution tactics (\secref{sec:resolve-assume-tac})
  11.358 +  use higher-order unification, which works well in many practical
  11.359 +  situations despite its daunting theoretical properties.
  11.360 +  Nonetheless, there are important problem classes where unguided
  11.361 +  higher-order unification is not so useful.  This typically involves
  11.362 +  rules like universal elimination, existential introduction, or
  11.363 +  equational substitution.  Here the unification problem involves
  11.364 +  fully flexible @{text "?P ?x"} schemes, which are hard to manage
  11.365 +  without further hints.
  11.366 +
  11.367 +  By providing a (small) rigid term for @{text "?x"} explicitly, the
  11.368 +  remaining unification problem is to assign a (large) term to @{text
  11.369 +  "?P"}, according to the shape of the given subgoal.  This is
  11.370 +  sufficiently well-behaved in most practical situations.
  11.371 +
  11.372 +  \medskip Isabelle provides separate versions of the standard @{text
  11.373 +  "r/e/d/f"} resolution tactics that allow to provide explicit
  11.374 +  instantiations of unknowns of the given rule, wrt.\ terms that refer
  11.375 +  to the implicit context of the selected subgoal.
  11.376 +
  11.377 +  An instantiation consists of a list of pairs of the form @{text
  11.378 +  "(?x, t)"}, where @{text ?x} is a schematic variable occurring in
  11.379 +  the given rule, and @{text t} is a term from the current proof
  11.380 +  context, augmented by the local goal parameters of the selected
  11.381 +  subgoal; cf.\ the @{text "focus"} operation described in
  11.382 +  \secref{sec:variables}.
  11.383 +
  11.384 +  Entering the syntactic context of a subgoal is a brittle operation,
  11.385 +  because its exact form is somewhat accidental, and the choice of
  11.386 +  bound variable names depends on the presence of other local and
  11.387 +  global names.  Explicit renaming of subgoal parameters prior to
  11.388 +  explicit instantiation might help to achieve a bit more robustness.
  11.389 +
  11.390 +  Type instantiations may be given as well, via pairs like @{text
  11.391 +  "(?'a, \<tau>)"}.  Type instantiations are distinguished from term
  11.392 +  instantiations by the syntactic form of the schematic variable.
  11.393 +  Types are instantiated before terms are.  Since term instantiation
  11.394 +  already performs simple type-inference, so explicit type
  11.395 +  instantiations are seldom necessary.
  11.396 +*}
  11.397 +
  11.398 +text %mlref {*
  11.399 +  \begin{mldecls}
  11.400 +  @{index_ML res_inst_tac: "Proof.context -> (indexname * string) list -> thm -> int -> tactic"} \\
  11.401 +  @{index_ML eres_inst_tac: "Proof.context -> (indexname * string) list -> thm -> int -> tactic"} \\
  11.402 +  @{index_ML dres_inst_tac: "Proof.context -> (indexname * string) list -> thm -> int -> tactic"} \\
  11.403 +  @{index_ML forw_inst_tac: "Proof.context -> (indexname * string) list -> thm -> int -> tactic"} \\
  11.404 +  @{index_ML subgoal_tac: "Proof.context -> string -> int -> tactic"} \\
  11.405 +  @{index_ML thin_tac: "Proof.context -> string -> int -> tactic"} \\
  11.406 +  @{index_ML rename_tac: "string list -> int -> tactic"} \\
  11.407 +  \end{mldecls}
  11.408 +
  11.409 +  \begin{description}
  11.410 +
  11.411 +  \item @{ML res_inst_tac}~@{text "ctxt insts thm i"} instantiates the
  11.412 +  rule @{text thm} with the instantiations @{text insts}, as described
  11.413 +  above, and then performs resolution on subgoal @{text i}.
  11.414 +  
  11.415 +  \item @{ML eres_inst_tac} is like @{ML res_inst_tac}, but performs
  11.416 +  elim-resolution.
  11.417 +
  11.418 +  \item @{ML dres_inst_tac} is like @{ML res_inst_tac}, but performs
  11.419 +  destruct-resolution.
  11.420 +
  11.421 +  \item @{ML forw_inst_tac} is like @{ML dres_inst_tac} except that
  11.422 +  the selected assumption is not deleted.
  11.423 +
  11.424 +  \item @{ML subgoal_tac}~@{text "ctxt \<phi> i"} adds the proposition
  11.425 +  @{text "\<phi>"} as local premise to subgoal @{text "i"}, and poses the
  11.426 +  same as a new subgoal @{text "i + 1"} (in the original context).
  11.427 +
  11.428 +  \item @{ML thin_tac}~@{text "ctxt \<phi> i"} deletes the specified
  11.429 +  premise from subgoal @{text i}.  Note that @{text \<phi>} may contain
  11.430 +  schematic variables, to abbreviate the intended proposition; the
  11.431 +  first matching subgoal premise will be deleted.  Removing useless
  11.432 +  premises from a subgoal increases its readability and can make
  11.433 +  search tactics run faster.
  11.434 +
  11.435 +  \item @{ML rename_tac}~@{text "names i"} renames the innermost
  11.436 +  parameters of subgoal @{text i} according to the provided @{text
  11.437 +  names} (which need to be distinct indentifiers).
  11.438 +
  11.439 +  \end{description}
  11.440 +
  11.441 +  For historical reasons, the above instantiation tactics take
  11.442 +  unparsed string arguments, which makes them hard to use in general
  11.443 +  ML code.  The slightly more advanced @{ML Subgoal.FOCUS} combinator
  11.444 +  of \secref{sec:struct-goals} allows to refer to internal goal
  11.445 +  structure with explicit context management.
  11.446 +*}
  11.447 +
  11.448 +
  11.449 +subsection {* Rearranging goal states *}
  11.450 +
  11.451 +text {* In rare situations there is a need to rearrange goal states:
  11.452 +  either the overall collection of subgoals, or the local structure of
  11.453 +  a subgoal.  Various administrative tactics allow to operate on the
  11.454 +  concrete presentation these conceptual sets of formulae. *}
  11.455 +
  11.456 +text %mlref {*
  11.457 +  \begin{mldecls}
  11.458 +  @{index_ML rotate_tac: "int -> int -> tactic"} \\
  11.459 +  @{index_ML distinct_subgoals_tac: tactic} \\
  11.460 +  @{index_ML flexflex_tac: tactic} \\