# HG changeset patch
# User wenzelm
# Date 1236216766 -3600
# Node ID 44832d5036598dc35d4caa5f893daade32956cc5
# Parent  ecd6f0ca62ea58dfb517e89fa9c8aed6f820f30a# Parent  2d612824e642bff09a1b07231a2668e2a21ef1d4
merged

diff -r ecd6f0ca62ea -r 44832d503659 doc-src/IsarImplementation/Thy/Base.thy
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc-src/IsarImplementation/Thy/Base.thy	Thu Mar 05 02:32:46 2009 +0100
@@ -0,0 +1,6 @@
+theory Base
+imports Pure
+uses "../../antiquote_setup.ML"
+begin
+
+end
diff -r ecd6f0ca62ea -r 44832d503659 doc-src/IsarImplementation/Thy/Integration.thy
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc-src/IsarImplementation/Thy/Integration.thy	Thu Mar 05 02:32:46 2009 +0100
@@ -0,0 +1,425 @@
+theory Integration
+imports Base
+begin
+
+chapter {* System integration *}
+
+section {* Isar toplevel \label{sec:isar-toplevel} *}
+
+text {* The Isar toplevel may be considered the centeral hub of the
+  Isabelle/Isar system, where all key components and sub-systems are
+  integrated into a single read-eval-print loop of Isar commands.  We
+  shall even incorporate the existing {\ML} toplevel of the compiler
+  and run-time system (cf.\ \secref{sec:ML-toplevel}).
+
+  Isabelle/Isar departs from the original ``LCF system architecture''
+  where {\ML} was really The Meta Language for defining theories and
+  conducting proofs.  Instead, {\ML} now only serves as the
+  implementation language for the system (and user extensions), while
+  the specific Isar toplevel supports the concepts of theory and proof
+  development natively.  This includes the graph structure of theories
+  and the block structure of proofs, support for unlimited undo,
+  facilities for tracing, debugging, timing, profiling etc.
+
+  \medskip The toplevel maintains an implicit state, which is
+  transformed by a sequence of transitions -- either interactively or
+  in batch-mode.  In interactive mode, Isar state transitions are
+  encapsulated as safe transactions, such that both failure and undo
+  are handled conveniently without destroying the underlying draft
+  theory (cf.~\secref{sec:context-theory}).  In batch mode,
+  transitions operate in a linear (destructive) fashion, such that
+  error conditions abort the present attempt to construct a theory or
+  proof altogether.
+
+  The toplevel state is a disjoint sum of empty @{text toplevel}, or
+  @{text theory}, or @{text proof}.  On entering the main Isar loop we
+  start with an empty toplevel.  A theory is commenced by giving a
+  @{text \<THEORY>} header; within a theory we may issue theory
+  commands such as @{text \<DEFINITION>}, or state a @{text
+  \<THEOREM>} to be proven.  Now we are within a proof state, with a
+  rich collection of Isar proof commands for structured proof
+  composition, or unstructured proof scripts.  When the proof is
+  concluded we get back to the theory, which is then updated by
+  storing the resulting fact.  Further theory declarations or theorem
+  statements with proofs may follow, until we eventually conclude the
+  theory development by issuing @{text \<END>}.  The resulting theory
+  is then stored within the theory database and we are back to the
+  empty toplevel.
+
+  In addition to these proper state transformations, there are also
+  some diagnostic commands for peeking at the toplevel state without
+  modifying it (e.g.\ \isakeyword{thm}, \isakeyword{term},
+  \isakeyword{print-cases}).
+*}
+
+text %mlref {*
+  \begin{mldecls}
+  @{index_ML_type Toplevel.state} \\
+  @{index_ML Toplevel.UNDEF: "exn"} \\
+  @{index_ML Toplevel.is_toplevel: "Toplevel.state -> bool"} \\
+  @{index_ML Toplevel.theory_of: "Toplevel.state -> theory"} \\
+  @{index_ML Toplevel.proof_of: "Toplevel.state -> Proof.state"} \\
+  @{index_ML Toplevel.debug: "bool ref"} \\
+  @{index_ML Toplevel.timing: "bool ref"} \\
+  @{index_ML Toplevel.profiling: "int ref"} \\
+  \end{mldecls}
+
+  \begin{description}
+
+  \item @{ML_type Toplevel.state} represents Isar toplevel states,
+  which are normally manipulated through the concept of toplevel
+  transitions only (\secref{sec:toplevel-transition}).  Also note that
+  a raw toplevel state is subject to the same linearity restrictions
+  as a theory context (cf.~\secref{sec:context-theory}).
+
+  \item @{ML Toplevel.UNDEF} is raised for undefined toplevel
+  operations.  Many operations work only partially for certain cases,
+  since @{ML_type Toplevel.state} is a sum type.
+
+  \item @{ML Toplevel.is_toplevel}~@{text "state"} checks for an empty
+  toplevel state.
+
+  \item @{ML Toplevel.theory_of}~@{text "state"} selects the theory of
+  a theory or proof (!), otherwise raises @{ML Toplevel.UNDEF}.
+
+  \item @{ML Toplevel.proof_of}~@{text "state"} selects the Isar proof
+  state if available, otherwise raises @{ML Toplevel.UNDEF}.
+
+  \item @{ML "set Toplevel.debug"} makes the toplevel print further
+  details about internal error conditions, exceptions being raised
+  etc.
+
+  \item @{ML "set Toplevel.timing"} makes the toplevel print timing
+  information for each Isar command being executed.
+
+  \item @{ML Toplevel.profiling}~@{verbatim ":="}~@{text "n"} controls
+  low-level profiling of the underlying {\ML} runtime system.  For
+  Poly/ML, @{text "n = 1"} means time and @{text "n = 2"} space
+  profiling.
+
+  \end{description}
+*}
+
+
+subsection {* Toplevel transitions \label{sec:toplevel-transition} *}
+
+text {*
+  An Isar toplevel transition consists of a partial function on the
+  toplevel state, with additional information for diagnostics and
+  error reporting: there are fields for command name, source position,
+  optional source text, as well as flags for interactive-only commands
+  (which issue a warning in batch-mode), printing of result state,
+  etc.
+
+  The operational part is represented as the sequential union of a
+  list of partial functions, which are tried in turn until the first
+  one succeeds.  This acts like an outer case-expression for various
+  alternative state transitions.  For example, \isakeyword{qed} acts
+  differently for a local proofs vs.\ the global ending of the main
+  proof.
+
+  Toplevel transitions are composed via transition transformers.
+  Internally, Isar commands are put together from an empty transition
+  extended by name and source position (and optional source text).  It
+  is then left to the individual command parser to turn the given
+  concrete syntax into a suitable transition transformer that adjoins
+  actual operations on a theory or proof state etc.
+*}
+
+text %mlref {*
+  \begin{mldecls}
+  @{index_ML Toplevel.print: "Toplevel.transition -> Toplevel.transition"} \\
+  @{index_ML Toplevel.no_timing: "Toplevel.transition -> Toplevel.transition"} \\
+  @{index_ML Toplevel.keep: "(Toplevel.state -> unit) ->
+  Toplevel.transition -> Toplevel.transition"} \\
+  @{index_ML Toplevel.theory: "(theory -> theory) ->
+  Toplevel.transition -> Toplevel.transition"} \\
+  @{index_ML Toplevel.theory_to_proof: "(theory -> Proof.state) ->
+  Toplevel.transition -> Toplevel.transition"} \\
+  @{index_ML Toplevel.proof: "(Proof.state -> Proof.state) ->
+  Toplevel.transition -> Toplevel.transition"} \\
+  @{index_ML Toplevel.proofs: "(Proof.state -> Proof.state Seq.seq) ->
+  Toplevel.transition -> Toplevel.transition"} \\
+  @{index_ML Toplevel.end_proof: "(bool -> Proof.state -> Proof.context) ->
+  Toplevel.transition -> Toplevel.transition"} \\
+  \end{mldecls}
+
+  \begin{description}
+
+  \item @{ML Toplevel.print}~@{text "tr"} sets the print flag, which
+  causes the toplevel loop to echo the result state (in interactive
+  mode).
+
+  \item @{ML Toplevel.no_timing}~@{text "tr"} indicates that the
+  transition should never show timing information, e.g.\ because it is
+  a diagnostic command.
+
+  \item @{ML Toplevel.keep}~@{text "tr"} adjoins a diagnostic
+  function.
+
+  \item @{ML Toplevel.theory}~@{text "tr"} adjoins a theory
+  transformer.
+
+  \item @{ML Toplevel.theory_to_proof}~@{text "tr"} adjoins a global
+  goal function, which turns a theory into a proof state.  The theory
+  may be changed before entering the proof; the generic Isar goal
+  setup includes an argument that specifies how to apply the proven
+  result to the theory, when the proof is finished.
+
+  \item @{ML Toplevel.proof}~@{text "tr"} adjoins a deterministic
+  proof command, with a singleton result.
+
+  \item @{ML Toplevel.proofs}~@{text "tr"} adjoins a general proof
+  command, with zero or more result states (represented as a lazy
+  list).
+
+  \item @{ML Toplevel.end_proof}~@{text "tr"} adjoins a concluding
+  proof command, that returns the resulting theory, after storing the
+  resulting facts in the context etc.
+
+  \end{description}
+*}
+
+
+subsection {* Toplevel control *}
+
+text {*
+  There are a few special control commands that modify the behavior
+  the toplevel itself, and only make sense in interactive mode.  Under
+  normal circumstances, the user encounters these only implicitly as
+  part of the protocol between the Isabelle/Isar system and a
+  user-interface such as ProofGeneral.
+
+  \begin{description}
+
+  \item \isacommand{undo} follows the three-level hierarchy of empty
+  toplevel vs.\ theory vs.\ proof: undo within a proof reverts to the
+  previous proof context, undo after a proof reverts to the theory
+  before the initial goal statement, undo of a theory command reverts
+  to the previous theory value, undo of a theory header discontinues
+  the current theory development and removes it from the theory
+  database (\secref{sec:theory-database}).
+
+  \item \isacommand{kill} aborts the current level of development:
+  kill in a proof context reverts to the theory before the initial
+  goal statement, kill in a theory context aborts the current theory
+  development, removing it from the database.
+
+  \item \isacommand{exit} drops out of the Isar toplevel into the
+  underlying {\ML} toplevel (\secref{sec:ML-toplevel}).  The Isar
+  toplevel state is preserved and may be continued later.
+
+  \item \isacommand{quit} terminates the Isabelle/Isar process without
+  saving.
+
+  \end{description}
+*}
+
+
+section {* ML toplevel \label{sec:ML-toplevel} *}
+
+text {*
+  The {\ML} toplevel provides a read-compile-eval-print loop for {\ML}
+  values, types, structures, and functors.  {\ML} declarations operate
+  on the global system state, which consists of the compiler
+  environment plus the values of {\ML} reference variables.  There is
+  no clean way to undo {\ML} declarations, except for reverting to a
+  previously saved state of the whole Isabelle process.  {\ML} input
+  is either read interactively from a TTY, or from a string (usually
+  within a theory text), or from a source file (usually loaded from a
+  theory).
+
+  Whenever the {\ML} toplevel is active, the current Isabelle theory
+  context is passed as an internal reference variable.  Thus {\ML}
+  code may access the theory context during compilation, it may even
+  change the value of a theory being under construction --- while
+  observing the usual linearity restrictions
+  (cf.~\secref{sec:context-theory}).
+*}
+
+text %mlref {*
+  \begin{mldecls}
+  @{index_ML the_context: "unit -> theory"} \\
+  @{index_ML "Context.>> ": "(Context.generic -> Context.generic) -> unit"} \\
+  \end{mldecls}
+
+  \begin{description}
+
+  \item @{ML "the_context ()"} refers to the theory context of the
+  {\ML} toplevel --- at compile time!  {\ML} code needs to take care
+  to refer to @{ML "the_context ()"} correctly.  Recall that
+  evaluation of a function body is delayed until actual runtime.
+  Moreover, persistent {\ML} toplevel bindings to an unfinished theory
+  should be avoided: code should either project out the desired
+  information immediately, or produce an explicit @{ML_type
+  theory_ref} (cf.\ \secref{sec:context-theory}).
+
+  \item @{ML "Context.>>"}~@{text f} applies context transformation
+  @{text f} to the implicit context of the {\ML} toplevel.
+
+  \end{description}
+
+  It is very important to note that the above functions are really
+  restricted to the compile time, even though the {\ML} compiler is
+  invoked at runtime!  The majority of {\ML} code uses explicit
+  functional arguments of a theory or proof context instead.  Thus it
+  may be invoked for an arbitrary context later on, without having to
+  worry about any operational details.
+
+  \bigskip
+
+  \begin{mldecls}
+  @{index_ML Isar.main: "unit -> unit"} \\
+  @{index_ML Isar.loop: "unit -> unit"} \\
+  @{index_ML Isar.state: "unit -> Toplevel.state"} \\
+  @{index_ML Isar.exn: "unit -> (exn * string) option"} \\
+  @{index_ML Isar.context: "unit -> Proof.context"} \\
+  @{index_ML Isar.goal: "unit -> thm"} \\
+  \end{mldecls}
+
+  \begin{description}
+
+  \item @{ML "Isar.main ()"} invokes the Isar toplevel from {\ML},
+  initializing an empty toplevel state.
+
+  \item @{ML "Isar.loop ()"} continues the Isar toplevel with the
+  current state, after having dropped out of the Isar toplevel loop.
+
+  \item @{ML "Isar.state ()"} and @{ML "Isar.exn ()"} get current
+  toplevel state and error condition, respectively.  This only works
+  after having dropped out of the Isar toplevel loop.
+
+  \item @{ML "Isar.context ()"} produces the proof context from @{ML
+  "Isar.state ()"}, analogous to @{ML Context.proof_of}
+  (\secref{sec:generic-context}).
+
+  \item @{ML "Isar.goal ()"} picks the tactical goal from @{ML
+  "Isar.state ()"}, represented as a theorem according to
+  \secref{sec:tactical-goals}.
+
+  \end{description}
+*}
+
+
+section {* Theory database \label{sec:theory-database} *}
+
+text {*
+  The theory database maintains a collection of theories, together
+  with some administrative information about their original sources,
+  which are held in an external store (i.e.\ some directory within the
+  regular file system).
+
+  The theory database is organized as a directed acyclic graph;
+  entries are referenced by theory name.  Although some additional
+  interfaces allow to include a directory specification as well, this
+  is only a hint to the underlying theory loader.  The internal theory
+  name space is flat!
+
+  Theory @{text A} is associated with the main theory file @{text
+  A}\verb,.thy,, which needs to be accessible through the theory
+  loader path.  Any number of additional {\ML} source files may be
+  associated with each theory, by declaring these dependencies in the
+  theory header as @{text \<USES>}, and loading them consecutively
+  within the theory context.  The system keeps track of incoming {\ML}
+  sources and associates them with the current theory.  The file
+  @{text A}\verb,.ML, is loaded after a theory has been concluded, in
+  order to support legacy proof {\ML} proof scripts.
+
+  The basic internal actions of the theory database are @{text
+  "update"}, @{text "outdate"}, and @{text "remove"}:
+
+  \begin{itemize}
+
+  \item @{text "update A"} introduces a link of @{text "A"} with a
+  @{text "theory"} value of the same name; it asserts that the theory
+  sources are now consistent with that value;
+
+  \item @{text "outdate A"} invalidates the link of a theory database
+  entry to its sources, but retains the present theory value;
+
+  \item @{text "remove A"} deletes entry @{text "A"} from the theory
+  database.
+  
+  \end{itemize}
+
+  These actions are propagated to sub- or super-graphs of a theory
+  entry as expected, in order to preserve global consistency of the
+  state of all loaded theories with the sources of the external store.
+  This implies certain causalities between actions: @{text "update"}
+  or @{text "outdate"} of an entry will @{text "outdate"} all
+  descendants; @{text "remove"} will @{text "remove"} all descendants.
+
+  \medskip There are separate user-level interfaces to operate on the
+  theory database directly or indirectly.  The primitive actions then
+  just happen automatically while working with the system.  In
+  particular, processing a theory header @{text "\<THEORY> A
+  \<IMPORTS> B\<^sub>1 \<dots> B\<^sub>n \<BEGIN>"} ensures that the
+  sub-graph of the collective imports @{text "B\<^sub>1 \<dots> B\<^sub>n"}
+  is up-to-date, too.  Earlier theories are reloaded as required, with
+  @{text update} actions proceeding in topological order according to
+  theory dependencies.  There may be also a wave of implied @{text
+  outdate} actions for derived theory nodes until a stable situation
+  is achieved eventually.
+*}
+
+text %mlref {*
+  \begin{mldecls}
+  @{index_ML theory: "string -> theory"} \\
+  @{index_ML use_thy: "string -> unit"} \\
+  @{index_ML use_thys: "string list -> unit"} \\
+  @{index_ML ThyInfo.touch_thy: "string -> unit"} \\
+  @{index_ML ThyInfo.remove_thy: "string -> unit"} \\[1ex]
+  @{index_ML ThyInfo.begin_theory}@{verbatim ": ... -> bool -> theory"} \\
+  @{index_ML ThyInfo.end_theory: "theory -> unit"} \\
+  @{index_ML ThyInfo.register_theory: "theory -> unit"} \\[1ex]
+  @{verbatim "datatype action = Update | Outdate | Remove"} \\
+  @{index_ML ThyInfo.add_hook: "(ThyInfo.action -> string -> unit) -> unit"} \\
+  \end{mldecls}
+
+  \begin{description}
+
+  \item @{ML theory}~@{text A} retrieves the theory value presently
+  associated with name @{text A}.  Note that the result might be
+  outdated.
+
+  \item @{ML use_thy}~@{text A} ensures that theory @{text A} is fully
+  up-to-date wrt.\ the external file store, reloading outdated
+  ancestors as required.
+
+  \item @{ML use_thys} is similar to @{ML use_thy}, but handles
+  several theories simultaneously.  Thus it acts like processing the
+  import header of a theory, without performing the merge of the
+  result, though.
+
+  \item @{ML ThyInfo.touch_thy}~@{text A} performs and @{text outdate} action
+  on theory @{text A} and all descendants.
+
+  \item @{ML ThyInfo.remove_thy}~@{text A} deletes theory @{text A} and all
+  descendants from the theory database.
+
+  \item @{ML ThyInfo.begin_theory} is the basic operation behind a
+  @{text \<THEORY>} header declaration.  This is {\ML} functions is
+  normally not invoked directly.
+
+  \item @{ML ThyInfo.end_theory} concludes the loading of a theory
+  proper and stores the result in the theory database.
+
+  \item @{ML ThyInfo.register_theory}~@{text "text thy"} registers an
+  existing theory value with the theory loader database.  There is no
+  management of associated sources.
+
+  \item @{ML "ThyInfo.add_hook"}~@{text f} registers function @{text
+  f} as a hook for theory database actions.  The function will be
+  invoked with the action and theory name being involved; thus derived
+  actions may be performed in associated system components, e.g.\
+  maintaining the state of an editor for the theory sources.
+
+  The kind and order of actions occurring in practice depends both on
+  user interactions and the internal process of resolving theory
+  imports.  Hooks should not rely on a particular policy here!  Any
+  exceptions raised by the hook are ignored.
+
+  \end{description}
+*}
+
+end
diff -r ecd6f0ca62ea -r 44832d503659 doc-src/IsarImplementation/Thy/Isar.thy
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc-src/IsarImplementation/Thy/Isar.thy	Thu Mar 05 02:32:46 2009 +0100
@@ -0,0 +1,37 @@
+theory Isar
+imports Base
+begin
+
+chapter {* Isar language elements *}
+
+text {*
+  The primary Isar language consists of three main categories of
+  language elements:
+
+  \begin{enumerate}
+
+  \item Proof commands
+
+  \item Proof methods
+
+  \item Attributes
+
+  \end{enumerate}
+*}
+
+
+section {* Proof commands *}
+
+text FIXME
+
+
+section {* Proof methods *}
+
+text FIXME
+
+
+section {* Attributes *}
+
+text FIXME
+
+end
diff -r ecd6f0ca62ea -r 44832d503659 doc-src/IsarImplementation/Thy/Local_Theory.thy
diff -r ecd6f0ca62ea -r 44832d503659 doc-src/IsarImplementation/Thy/Logic.thy
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc-src/IsarImplementation/Thy/Logic.thy	Thu Mar 05 02:32:46 2009 +0100
@@ -0,0 +1,909 @@
+theory Logic
+imports Base
+begin
+
+chapter {* Primitive logic \label{ch:logic} *}
+
+text {*
+  The logical foundations of Isabelle/Isar are that of the Pure logic,
+  which has been introduced as a Natural Deduction framework in
+  \cite{paulson700}.  This is essentially the same logic as ``@{text
+  "\<lambda>HOL"}'' in the more abstract setting of Pure Type Systems (PTS)
+  \cite{Barendregt-Geuvers:2001}, although there are some key
+  differences in the specific treatment of simple types in
+  Isabelle/Pure.
+
+  Following type-theoretic parlance, the Pure logic consists of three
+  levels of @{text "\<lambda>"}-calculus with corresponding arrows, @{text
+  "\<Rightarrow>"} for syntactic function space (terms depending on terms), @{text
+  "\<And>"} for universal quantification (proofs depending on terms), and
+  @{text "\<Longrightarrow>"} for implication (proofs depending on proofs).
+
+  Derivations are relative to a logical theory, which declares type
+  constructors, constants, and axioms.  Theory declarations support
+  schematic polymorphism, which is strictly speaking outside the
+  logic.\footnote{This is the deeper logical reason, why the theory
+  context @{text "\<Theta>"} is separate from the proof context @{text "\<Gamma>"}
+  of the core calculus.}
+*}
+
+
+section {* Types \label{sec:types} *}
+
+text {*
+  The language of types is an uninterpreted order-sorted first-order
+  algebra; types are qualified by ordered type classes.
+
+  \medskip A \emph{type class} is an abstract syntactic entity
+  declared in the theory context.  The \emph{subclass relation} @{text
+  "c\<^isub>1 \<subseteq> c\<^isub>2"} is specified by stating an acyclic
+  generating relation; the transitive closure is maintained
+  internally.  The resulting relation is an ordering: reflexive,
+  transitive, and antisymmetric.
+
+  A \emph{sort} is a list of type classes written as @{text "s =
+  {c\<^isub>1, \<dots>, c\<^isub>m}"}, which represents symbolic
+  intersection.  Notationally, the curly braces are omitted for
+  singleton intersections, i.e.\ any class @{text "c"} may be read as
+  a sort @{text "{c}"}.  The ordering on type classes is extended to
+  sorts according to the meaning of intersections: @{text
+  "{c\<^isub>1, \<dots> c\<^isub>m} \<subseteq> {d\<^isub>1, \<dots>, d\<^isub>n}"} iff
+  @{text "\<forall>j. \<exists>i. c\<^isub>i \<subseteq> d\<^isub>j"}.  The empty intersection
+  @{text "{}"} refers to the universal sort, which is the largest
+  element wrt.\ the sort order.  The intersections of all (finitely
+  many) classes declared in the current theory are the minimal
+  elements wrt.\ the sort order.
+
+  \medskip A \emph{fixed type variable} is a pair of a basic name
+  (starting with a @{text "'"} character) and a sort constraint, e.g.\
+  @{text "('a, s)"} which is usually printed as @{text "\<alpha>\<^isub>s"}.
+  A \emph{schematic type variable} is a pair of an indexname and a
+  sort constraint, e.g.\ @{text "(('a, 0), s)"} which is usually
+  printed as @{text "?\<alpha>\<^isub>s"}.
+
+  Note that \emph{all} syntactic components contribute to the identity
+  of type variables, including the sort constraint.  The core logic
+  handles type variables with the same name but different sorts as
+  different, although some outer layers of the system make it hard to
+  produce anything like this.
+
+  A \emph{type constructor} @{text "\<kappa>"} is a @{text "k"}-ary operator
+  on types declared in the theory.  Type constructor application is
+  written postfix as @{text "(\<alpha>\<^isub>1, \<dots>, \<alpha>\<^isub>k)\<kappa>"}.  For
+  @{text "k = 0"} the argument tuple is omitted, e.g.\ @{text "prop"}
+  instead of @{text "()prop"}.  For @{text "k = 1"} the parentheses
+  are omitted, e.g.\ @{text "\<alpha> list"} instead of @{text "(\<alpha>)list"}.
+  Further notation is provided for specific constructors, notably the
+  right-associative infix @{text "\<alpha> \<Rightarrow> \<beta>"} instead of @{text "(\<alpha>,
+  \<beta>)fun"}.
+  
+  A \emph{type} is defined inductively over type variables and type
+  constructors as follows: @{text "\<tau> = \<alpha>\<^isub>s | ?\<alpha>\<^isub>s |
+  (\<tau>\<^sub>1, \<dots>, \<tau>\<^sub>k)\<kappa>"}.
+
+  A \emph{type abbreviation} is a syntactic definition @{text
+  "(\<^vec>\<alpha>)\<kappa> = \<tau>"} of an arbitrary type expression @{text "\<tau>"} over
+  variables @{text "\<^vec>\<alpha>"}.  Type abbreviations appear as type
+  constructors in the syntax, but are expanded before entering the
+  logical core.
+
+  A \emph{type arity} declares the image behavior of a type
+  constructor wrt.\ the algebra of sorts: @{text "\<kappa> :: (s\<^isub>1, \<dots>,
+  s\<^isub>k)s"} means that @{text "(\<tau>\<^isub>1, \<dots>, \<tau>\<^isub>k)\<kappa>"} is
+  of sort @{text "s"} if every argument type @{text "\<tau>\<^isub>i"} is
+  of sort @{text "s\<^isub>i"}.  Arity declarations are implicitly
+  completed, i.e.\ @{text "\<kappa> :: (\<^vec>s)c"} entails @{text "\<kappa> ::
+  (\<^vec>s)c'"} for any @{text "c' \<supseteq> c"}.
+
+  \medskip The sort algebra is always maintained as \emph{coregular},
+  which means that type arities are consistent with the subclass
+  relation: for any type constructor @{text "\<kappa>"}, and classes @{text
+  "c\<^isub>1 \<subseteq> c\<^isub>2"}, and arities @{text "\<kappa> ::
+  (\<^vec>s\<^isub>1)c\<^isub>1"} and @{text "\<kappa> ::
+  (\<^vec>s\<^isub>2)c\<^isub>2"} holds @{text "\<^vec>s\<^isub>1 \<subseteq>
+  \<^vec>s\<^isub>2"} component-wise.
+
+  The key property of a coregular order-sorted algebra is that sort
+  constraints can be solved in a most general fashion: for each type
+  constructor @{text "\<kappa>"} and sort @{text "s"} there is a most general
+  vector of argument sorts @{text "(s\<^isub>1, \<dots>, s\<^isub>k)"} such
+  that a type scheme @{text "(\<alpha>\<^bsub>s\<^isub>1\<^esub>, \<dots>,
+  \<alpha>\<^bsub>s\<^isub>k\<^esub>)\<kappa>"} is of sort @{text "s"}.
+  Consequently, type unification has most general solutions (modulo
+  equivalence of sorts), so type-inference produces primary types as
+  expected \cite{nipkow-prehofer}.
+*}
+
+text %mlref {*
+  \begin{mldecls}
+  @{index_ML_type class} \\
+  @{index_ML_type sort} \\
+  @{index_ML_type arity} \\
+  @{index_ML_type typ} \\
+  @{index_ML map_atyps: "(typ -> typ) -> typ -> typ"} \\
+  @{index_ML fold_atyps: "(typ -> 'a -> 'a) -> typ -> 'a -> 'a"} \\
+  \end{mldecls}
+  \begin{mldecls}
+  @{index_ML Sign.subsort: "theory -> sort * sort -> bool"} \\
+  @{index_ML Sign.of_sort: "theory -> typ * sort -> bool"} \\
+  @{index_ML Sign.add_types: "(string * int * mixfix) list -> theory -> theory"} \\
+  @{index_ML Sign.add_tyabbrs_i: "
+  (string * string list * typ * mixfix) list -> theory -> theory"} \\
+  @{index_ML Sign.primitive_class: "string * class list -> theory -> theory"} \\
+  @{index_ML Sign.primitive_classrel: "class * class -> theory -> theory"} \\
+  @{index_ML Sign.primitive_arity: "arity -> theory -> theory"} \\
+  \end{mldecls}
+
+  \begin{description}
+
+  \item @{ML_type class} represents type classes; this is an alias for
+  @{ML_type string}.
+
+  \item @{ML_type sort} represents sorts; this is an alias for
+  @{ML_type "class list"}.
+
+  \item @{ML_type arity} represents type arities; this is an alias for
+  triples of the form @{text "(\<kappa>, \<^vec>s, s)"} for @{text "\<kappa> ::
+  (\<^vec>s)s"} described above.
+
+  \item @{ML_type typ} represents types; this is a datatype with
+  constructors @{ML TFree}, @{ML TVar}, @{ML Type}.
+
+  \item @{ML map_atyps}~@{text "f \<tau>"} applies the mapping @{text "f"}
+  to all atomic types (@{ML TFree}, @{ML TVar}) occurring in @{text
+  "\<tau>"}.
+
+  \item @{ML fold_atyps}~@{text "f \<tau>"} iterates the operation @{text
+  "f"} over all occurrences of atomic types (@{ML TFree}, @{ML TVar})
+  in @{text "\<tau>"}; the type structure is traversed from left to right.
+
+  \item @{ML Sign.subsort}~@{text "thy (s\<^isub>1, s\<^isub>2)"}
+  tests the subsort relation @{text "s\<^isub>1 \<subseteq> s\<^isub>2"}.
+
+  \item @{ML Sign.of_sort}~@{text "thy (\<tau>, s)"} tests whether type
+  @{text "\<tau>"} is of sort @{text "s"}.
+
+  \item @{ML Sign.add_types}~@{text "[(\<kappa>, k, mx), \<dots>]"} declares a new
+  type constructors @{text "\<kappa>"} with @{text "k"} arguments and
+  optional mixfix syntax.
+
+  \item @{ML Sign.add_tyabbrs_i}~@{text "[(\<kappa>, \<^vec>\<alpha>, \<tau>, mx), \<dots>]"}
+  defines a new type abbreviation @{text "(\<^vec>\<alpha>)\<kappa> = \<tau>"} with
+  optional mixfix syntax.
+
+  \item @{ML Sign.primitive_class}~@{text "(c, [c\<^isub>1, \<dots>,
+  c\<^isub>n])"} declares a new class @{text "c"}, together with class
+  relations @{text "c \<subseteq> c\<^isub>i"}, for @{text "i = 1, \<dots>, n"}.
+
+  \item @{ML Sign.primitive_classrel}~@{text "(c\<^isub>1,
+  c\<^isub>2)"} declares the class relation @{text "c\<^isub>1 \<subseteq>
+  c\<^isub>2"}.
+
+  \item @{ML Sign.primitive_arity}~@{text "(\<kappa>, \<^vec>s, s)"} declares
+  the arity @{text "\<kappa> :: (\<^vec>s)s"}.
+
+  \end{description}
+*}
+
+
+section {* Terms \label{sec:terms} *}
+
+text {*
+  The language of terms is that of simply-typed @{text "\<lambda>"}-calculus
+  with de-Bruijn indices for bound variables (cf.\ \cite{debruijn72}
+  or \cite{paulson-ml2}), with the types being determined by the
+  corresponding binders.  In contrast, free variables and constants
+  are have an explicit name and type in each occurrence.
+
+  \medskip A \emph{bound variable} is a natural number @{text "b"},
+  which accounts for the number of intermediate binders between the
+  variable occurrence in the body and its binding position.  For
+  example, the de-Bruijn term @{text
+  "\<lambda>\<^bsub>nat\<^esub>. \<lambda>\<^bsub>nat\<^esub>. 1 + 0"} would
+  correspond to @{text
+  "\<lambda>x\<^bsub>nat\<^esub>. \<lambda>y\<^bsub>nat\<^esub>. x + y"} in a named
+  representation.  Note that a bound variable may be represented by
+  different de-Bruijn indices at different occurrences, depending on
+  the nesting of abstractions.
+
+  A \emph{loose variable} is a bound variable that is outside the
+  scope of local binders.  The types (and names) for loose variables
+  can be managed as a separate context, that is maintained as a stack
+  of hypothetical binders.  The core logic operates on closed terms,
+  without any loose variables.
+
+  A \emph{fixed variable} is a pair of a basic name and a type, e.g.\
+  @{text "(x, \<tau>)"} which is usually printed @{text "x\<^isub>\<tau>"}.  A
+  \emph{schematic variable} is a pair of an indexname and a type,
+  e.g.\ @{text "((x, 0), \<tau>)"} which is usually printed as @{text
+  "?x\<^isub>\<tau>"}.
+
+  \medskip A \emph{constant} is a pair of a basic name and a type,
+  e.g.\ @{text "(c, \<tau>)"} which is usually printed as @{text
+  "c\<^isub>\<tau>"}.  Constants are declared in the context as polymorphic
+  families @{text "c :: \<sigma>"}, meaning that all substitution instances
+  @{text "c\<^isub>\<tau>"} for @{text "\<tau> = \<sigma>\<vartheta>"} are valid.
+
+  The vector of \emph{type arguments} of constant @{text "c\<^isub>\<tau>"}
+  wrt.\ the declaration @{text "c :: \<sigma>"} is defined as the codomain of
+  the matcher @{text "\<vartheta> = {?\<alpha>\<^isub>1 \<mapsto> \<tau>\<^isub>1, \<dots>,
+  ?\<alpha>\<^isub>n \<mapsto> \<tau>\<^isub>n}"} presented in canonical order @{text
+  "(\<tau>\<^isub>1, \<dots>, \<tau>\<^isub>n)"}.  Within a given theory context,
+  there is a one-to-one correspondence between any constant @{text
+  "c\<^isub>\<tau>"} and the application @{text "c(\<tau>\<^isub>1, \<dots>,
+  \<tau>\<^isub>n)"} of its type arguments.  For example, with @{text "plus
+  :: \<alpha> \<Rightarrow> \<alpha> \<Rightarrow> \<alpha>"}, the instance @{text "plus\<^bsub>nat \<Rightarrow> nat \<Rightarrow>
+  nat\<^esub>"} corresponds to @{text "plus(nat)"}.
+
+  Constant declarations @{text "c :: \<sigma>"} may contain sort constraints
+  for type variables in @{text "\<sigma>"}.  These are observed by
+  type-inference as expected, but \emph{ignored} by the core logic.
+  This means the primitive logic is able to reason with instances of
+  polymorphic constants that the user-level type-checker would reject
+  due to violation of type class restrictions.
+
+  \medskip An \emph{atomic} term is either a variable or constant.  A
+  \emph{term} is defined inductively over atomic terms, with
+  abstraction and application as follows: @{text "t = b | x\<^isub>\<tau> |
+  ?x\<^isub>\<tau> | c\<^isub>\<tau> | \<lambda>\<^isub>\<tau>. t | t\<^isub>1 t\<^isub>2"}.
+  Parsing and printing takes care of converting between an external
+  representation with named bound variables.  Subsequently, we shall
+  use the latter notation instead of internal de-Bruijn
+  representation.
+
+  The inductive relation @{text "t :: \<tau>"} assigns a (unique) type to a
+  term according to the structure of atomic terms, abstractions, and
+  applicatins:
+  \[
+  \infer{@{text "a\<^isub>\<tau> :: \<tau>"}}{}
+  \qquad
+  \infer{@{text "(\<lambda>x\<^sub>\<tau>. t) :: \<tau> \<Rightarrow> \<sigma>"}}{@{text "t :: \<sigma>"}}
+  \qquad
+  \infer{@{text "t u :: \<sigma>"}}{@{text "t :: \<tau> \<Rightarrow> \<sigma>"} & @{text "u :: \<tau>"}}
+  \]
+  A \emph{well-typed term} is a term that can be typed according to these rules.
+
+  Typing information can be omitted: type-inference is able to
+  reconstruct the most general type of a raw term, while assigning
+  most general types to all of its variables and constants.
+  Type-inference depends on a context of type constraints for fixed
+  variables, and declarations for polymorphic constants.
+
+  The identity of atomic terms consists both of the name and the type
+  component.  This means that different variables @{text
+  "x\<^bsub>\<tau>\<^isub>1\<^esub>"} and @{text
+  "x\<^bsub>\<tau>\<^isub>2\<^esub>"} may become the same after type
+  instantiation.  Some outer layers of the system make it hard to
+  produce variables of the same name, but different types.  In
+  contrast, mixed instances of polymorphic constants occur frequently.
+
+  \medskip The \emph{hidden polymorphism} of a term @{text "t :: \<sigma>"}
+  is the set of type variables occurring in @{text "t"}, but not in
+  @{text "\<sigma>"}.  This means that the term implicitly depends on type
+  arguments that are not accounted in the result type, i.e.\ there are
+  different type instances @{text "t\<vartheta> :: \<sigma>"} and @{text
+  "t\<vartheta>' :: \<sigma>"} with the same type.  This slightly
+  pathological situation notoriously demands additional care.
+
+  \medskip A \emph{term abbreviation} is a syntactic definition @{text
+  "c\<^isub>\<sigma> \<equiv> t"} of a closed term @{text "t"} of type @{text "\<sigma>"},
+  without any hidden polymorphism.  A term abbreviation looks like a
+  constant in the syntax, but is expanded before entering the logical
+  core.  Abbreviations are usually reverted when printing terms, using
+  @{text "t \<rightarrow> c\<^isub>\<sigma>"} as rules for higher-order rewriting.
+
+  \medskip Canonical operations on @{text "\<lambda>"}-terms include @{text
+  "\<alpha>\<beta>\<eta>"}-conversion: @{text "\<alpha>"}-conversion refers to capture-free
+  renaming of bound variables; @{text "\<beta>"}-conversion contracts an
+  abstraction applied to an argument term, substituting the argument
+  in the body: @{text "(\<lambda>x. b)a"} becomes @{text "b[a/x]"}; @{text
+  "\<eta>"}-conversion contracts vacuous application-abstraction: @{text
+  "\<lambda>x. f x"} becomes @{text "f"}, provided that the bound variable
+  does not occur in @{text "f"}.
+
+  Terms are normally treated modulo @{text "\<alpha>"}-conversion, which is
+  implicit in the de-Bruijn representation.  Names for bound variables
+  in abstractions are maintained separately as (meaningless) comments,
+  mostly for parsing and printing.  Full @{text "\<alpha>\<beta>\<eta>"}-conversion is
+  commonplace in various standard operations (\secref{sec:obj-rules})
+  that are based on higher-order unification and matching.
+*}
+
+text %mlref {*
+  \begin{mldecls}
+  @{index_ML_type term} \\
+  @{index_ML "op aconv": "term * term -> bool"} \\
+  @{index_ML map_types: "(typ -> typ) -> term -> term"} \\
+  @{index_ML fold_types: "(typ -> 'a -> 'a) -> term -> 'a -> 'a"} \\
+  @{index_ML map_aterms: "(term -> term) -> term -> term"} \\
+  @{index_ML fold_aterms: "(term -> 'a -> 'a) -> term -> 'a -> 'a"} \\
+  \end{mldecls}
+  \begin{mldecls}
+  @{index_ML fastype_of: "term -> typ"} \\
+  @{index_ML lambda: "term -> term -> term"} \\
+  @{index_ML betapply: "term * term -> term"} \\
+  @{index_ML Sign.declare_const: "Properties.T -> (binding * typ) * mixfix ->
+  theory -> term * theory"} \\
+  @{index_ML Sign.add_abbrev: "string -> Properties.T -> binding * term ->
+  theory -> (term * term) * theory"} \\
+  @{index_ML Sign.const_typargs: "theory -> string * typ -> typ list"} \\
+  @{index_ML Sign.const_instance: "theory -> string * typ list -> typ"} \\
+  \end{mldecls}
+
+  \begin{description}
+
+  \item @{ML_type term} represents de-Bruijn terms, with comments in
+  abstractions, and explicitly named free variables and constants;
+  this is a datatype with constructors @{ML Bound}, @{ML Free}, @{ML
+  Var}, @{ML Const}, @{ML Abs}, @{ML "op $"}.
+
+  \item @{text "t"}~@{ML aconv}~@{text "u"} checks @{text
+  "\<alpha>"}-equivalence of two terms.  This is the basic equality relation
+  on type @{ML_type term}; raw datatype equality should only be used
+  for operations related to parsing or printing!
+
+  \item @{ML map_types}~@{text "f t"} applies the mapping @{text
+  "f"} to all types occurring in @{text "t"}.
+
+  \item @{ML fold_types}~@{text "f t"} iterates the operation @{text
+  "f"} over all occurrences of types in @{text "t"}; the term
+  structure is traversed from left to right.
+
+  \item @{ML map_aterms}~@{text "f t"} applies the mapping @{text "f"}
+  to all atomic terms (@{ML Bound}, @{ML Free}, @{ML Var}, @{ML
+  Const}) occurring in @{text "t"}.
+
+  \item @{ML fold_aterms}~@{text "f t"} iterates the operation @{text
+  "f"} over all occurrences of atomic terms (@{ML Bound}, @{ML Free},
+  @{ML Var}, @{ML Const}) in @{text "t"}; the term structure is
+  traversed from left to right.
+
+  \item @{ML fastype_of}~@{text "t"} determines the type of a
+  well-typed term.  This operation is relatively slow, despite the
+  omission of any sanity checks.
+
+  \item @{ML lambda}~@{text "a b"} produces an abstraction @{text
+  "\<lambda>a. b"}, where occurrences of the atomic term @{text "a"} in the
+  body @{text "b"} are replaced by bound variables.
+
+  \item @{ML betapply}~@{text "(t, u)"} produces an application @{text
+  "t u"}, with topmost @{text "\<beta>"}-conversion if @{text "t"} is an
+  abstraction.
+
+  \item @{ML Sign.declare_const}~@{text "properties ((c, \<sigma>), mx)"}
+  declares a new constant @{text "c :: \<sigma>"} with optional mixfix
+  syntax.
+
+  \item @{ML Sign.add_abbrev}~@{text "print_mode properties (c, t)"}
+  introduces a new term abbreviation @{text "c \<equiv> t"}.
+
+  \item @{ML Sign.const_typargs}~@{text "thy (c, \<tau>)"} and @{ML
+  Sign.const_instance}~@{text "thy (c, [\<tau>\<^isub>1, \<dots>, \<tau>\<^isub>n])"}
+  convert between two representations of polymorphic constants: full
+  type instance vs.\ compact type arguments form.
+
+  \end{description}
+*}
+
+
+section {* Theorems \label{sec:thms} *}
+
+text {*
+  A \emph{proposition} is a well-typed term of type @{text "prop"}, a
+  \emph{theorem} is a proven proposition (depending on a context of
+  hypotheses and the background theory).  Primitive inferences include
+  plain Natural Deduction rules for the primary connectives @{text
+  "\<And>"} and @{text "\<Longrightarrow>"} of the framework.  There is also a builtin
+  notion of equality/equivalence @{text "\<equiv>"}.
+*}
+
+
+subsection {* Primitive connectives and rules \label{sec:prim-rules} *}
+
+text {*
+  The theory @{text "Pure"} contains constant declarations for the
+  primitive connectives @{text "\<And>"}, @{text "\<Longrightarrow>"}, and @{text "\<equiv>"} of
+  the logical framework, see \figref{fig:pure-connectives}.  The
+  derivability judgment @{text "A\<^isub>1, \<dots>, A\<^isub>n \<turnstile> B"} is
+  defined inductively by the primitive inferences given in
+  \figref{fig:prim-rules}, with the global restriction that the
+  hypotheses must \emph{not} contain any schematic variables.  The
+  builtin equality is conceptually axiomatized as shown in
+  \figref{fig:pure-equality}, although the implementation works
+  directly with derived inferences.
+
+  \begin{figure}[htb]
+  \begin{center}
+  \begin{tabular}{ll}
+  @{text "all :: (\<alpha> \<Rightarrow> prop) \<Rightarrow> prop"} & universal quantification (binder @{text "\<And>"}) \\
+  @{text "\<Longrightarrow> :: prop \<Rightarrow> prop \<Rightarrow> prop"} & implication (right associative infix) \\
+  @{text "\<equiv> :: \<alpha> \<Rightarrow> \<alpha> \<Rightarrow> prop"} & equality relation (infix) \\
+  \end{tabular}
+  \caption{Primitive connectives of Pure}\label{fig:pure-connectives}
+  \end{center}
+  \end{figure}
+
+  \begin{figure}[htb]
+  \begin{center}
+  \[
+  \infer[@{text "(axiom)"}]{@{text "\<turnstile> A"}}{@{text "A \<in> \<Theta>"}}
+  \qquad
+  \infer[@{text "(assume)"}]{@{text "A \<turnstile> A"}}{}
+  \]
+  \[
+  \infer[@{text "(\<And>_intro)"}]{@{text "\<Gamma> \<turnstile> \<And>x. b[x]"}}{@{text "\<Gamma> \<turnstile> b[x]"} & @{text "x \<notin> \<Gamma>"}}
+  \qquad
+  \infer[@{text "(\<And>_elim)"}]{@{text "\<Gamma> \<turnstile> b[a]"}}{@{text "\<Gamma> \<turnstile> \<And>x. b[x]"}}
+  \]
+  \[
+  \infer[@{text "(\<Longrightarrow>_intro)"}]{@{text "\<Gamma> - A \<turnstile> A \<Longrightarrow> B"}}{@{text "\<Gamma> \<turnstile> B"}}
+  \qquad
+  \infer[@{text "(\<Longrightarrow>_elim)"}]{@{text "\<Gamma>\<^sub>1 \<union> \<Gamma>\<^sub>2 \<turnstile> B"}}{@{text "\<Gamma>\<^sub>1 \<turnstile> A \<Longrightarrow> B"} & @{text "\<Gamma>\<^sub>2 \<turnstile> A"}}
+  \]
+  \caption{Primitive inferences of Pure}\label{fig:prim-rules}
+  \end{center}
+  \end{figure}
+
+  \begin{figure}[htb]
+  \begin{center}
+  \begin{tabular}{ll}
+  @{text "\<turnstile> (\<lambda>x. b[x]) a \<equiv> b[a]"} & @{text "\<beta>"}-conversion \\
+  @{text "\<turnstile> x \<equiv> x"} & reflexivity \\
+  @{text "\<turnstile> x \<equiv> y \<Longrightarrow> P x \<Longrightarrow> P y"} & substitution \\
+  @{text "\<turnstile> (\<And>x. f x \<equiv> g x) \<Longrightarrow> f \<equiv> g"} & extensionality \\
+  @{text "\<turnstile> (A \<Longrightarrow> B) \<Longrightarrow> (B \<Longrightarrow> A) \<Longrightarrow> A \<equiv> B"} & logical equivalence \\
+  \end{tabular}
+  \caption{Conceptual axiomatization of Pure equality}\label{fig:pure-equality}
+  \end{center}
+  \end{figure}
+
+  The introduction and elimination rules for @{text "\<And>"} and @{text
+  "\<Longrightarrow>"} are analogous to formation of dependently typed @{text
+  "\<lambda>"}-terms representing the underlying proof objects.  Proof terms
+  are irrelevant in the Pure logic, though; they cannot occur within
+  propositions.  The system provides a runtime option to record
+  explicit proof terms for primitive inferences.  Thus all three
+  levels of @{text "\<lambda>"}-calculus become explicit: @{text "\<Rightarrow>"} for
+  terms, and @{text "\<And>/\<Longrightarrow>"} for proofs (cf.\
+  \cite{Berghofer-Nipkow:2000:TPHOL}).
+
+  Observe that locally fixed parameters (as in @{text "\<And>_intro"}) need
+  not be recorded in the hypotheses, because the simple syntactic
+  types of Pure are always inhabitable.  ``Assumptions'' @{text "x ::
+  \<tau>"} for type-membership are only present as long as some @{text
+  "x\<^isub>\<tau>"} occurs in the statement body.\footnote{This is the key
+  difference to ``@{text "\<lambda>HOL"}'' in the PTS framework
+  \cite{Barendregt-Geuvers:2001}, where hypotheses @{text "x : A"} are
+  treated uniformly for propositions and types.}
+
+  \medskip The axiomatization of a theory is implicitly closed by
+  forming all instances of type and term variables: @{text "\<turnstile>
+  A\<vartheta>"} holds for any substitution instance of an axiom
+  @{text "\<turnstile> A"}.  By pushing substitutions through derivations
+  inductively, we also get admissible @{text "generalize"} and @{text
+  "instance"} rules as shown in \figref{fig:subst-rules}.
+
+  \begin{figure}[htb]
+  \begin{center}
+  \[
+  \infer{@{text "\<Gamma> \<turnstile> B[?\<alpha>]"}}{@{text "\<Gamma> \<turnstile> B[\<alpha>]"} & @{text "\<alpha> \<notin> \<Gamma>"}}
+  \quad
+  \infer[\quad@{text "(generalize)"}]{@{text "\<Gamma> \<turnstile> B[?x]"}}{@{text "\<Gamma> \<turnstile> B[x]"} & @{text "x \<notin> \<Gamma>"}}
+  \]
+  \[
+  \infer{@{text "\<Gamma> \<turnstile> B[\<tau>]"}}{@{text "\<Gamma> \<turnstile> B[?\<alpha>]"}}
+  \quad
+  \infer[\quad@{text "(instantiate)"}]{@{text "\<Gamma> \<turnstile> B[t]"}}{@{text "\<Gamma> \<turnstile> B[?x]"}}
+  \]
+  \caption{Admissible substitution rules}\label{fig:subst-rules}
+  \end{center}
+  \end{figure}
+
+  Note that @{text "instantiate"} does not require an explicit
+  side-condition, because @{text "\<Gamma>"} may never contain schematic
+  variables.
+
+  In principle, variables could be substituted in hypotheses as well,
+  but this would disrupt the monotonicity of reasoning: deriving
+  @{text "\<Gamma>\<vartheta> \<turnstile> B\<vartheta>"} from @{text "\<Gamma> \<turnstile> B"} is
+  correct, but @{text "\<Gamma>\<vartheta> \<supseteq> \<Gamma>"} does not necessarily hold:
+  the result belongs to a different proof context.
+
+  \medskip An \emph{oracle} is a function that produces axioms on the
+  fly.  Logically, this is an instance of the @{text "axiom"} rule
+  (\figref{fig:prim-rules}), but there is an operational difference.
+  The system always records oracle invocations within derivations of
+  theorems by a unique tag.
+
+  Axiomatizations should be limited to the bare minimum, typically as
+  part of the initial logical basis of an object-logic formalization.
+  Later on, theories are usually developed in a strictly definitional
+  fashion, by stating only certain equalities over new constants.
+
+  A \emph{simple definition} consists of a constant declaration @{text
+  "c :: \<sigma>"} together with an axiom @{text "\<turnstile> c \<equiv> t"}, where @{text "t
+  :: \<sigma>"} is a closed term without any hidden polymorphism.  The RHS
+  may depend on further defined constants, but not @{text "c"} itself.
+  Definitions of functions may be presented as @{text "c \<^vec>x \<equiv>
+  t"} instead of the puristic @{text "c \<equiv> \<lambda>\<^vec>x. t"}.
+
+  An \emph{overloaded definition} consists of a collection of axioms
+  for the same constant, with zero or one equations @{text
+  "c((\<^vec>\<alpha>)\<kappa>) \<equiv> t"} for each type constructor @{text "\<kappa>"} (for
+  distinct variables @{text "\<^vec>\<alpha>"}).  The RHS may mention
+  previously defined constants as above, or arbitrary constants @{text
+  "d(\<alpha>\<^isub>i)"} for some @{text "\<alpha>\<^isub>i"} projected from @{text
+  "\<^vec>\<alpha>"}.  Thus overloaded definitions essentially work by
+  primitive recursion over the syntactic structure of a single type
+  argument.
+*}
+
+text %mlref {*
+  \begin{mldecls}
+  @{index_ML_type ctyp} \\
+  @{index_ML_type cterm} \\
+  @{index_ML Thm.ctyp_of: "theory -> typ -> ctyp"} \\
+  @{index_ML Thm.cterm_of: "theory -> term -> cterm"} \\
+  \end{mldecls}
+  \begin{mldecls}
+  @{index_ML_type thm} \\
+  @{index_ML proofs: "int ref"} \\
+  @{index_ML Thm.assume: "cterm -> thm"} \\
+  @{index_ML Thm.forall_intr: "cterm -> thm -> thm"} \\
+  @{index_ML Thm.forall_elim: "cterm -> thm -> thm"} \\
+  @{index_ML Thm.implies_intr: "cterm -> thm -> thm"} \\
+  @{index_ML Thm.implies_elim: "thm -> thm -> thm"} \\
+  @{index_ML Thm.generalize: "string list * string list -> int -> thm -> thm"} \\
+  @{index_ML Thm.instantiate: "(ctyp * ctyp) list * (cterm * cterm) list -> thm -> thm"} \\
+  @{index_ML Thm.axiom: "theory -> string -> thm"} \\
+  @{index_ML Thm.add_oracle: "bstring * ('a -> cterm) -> theory
+  -> (string * ('a -> thm)) * theory"} \\
+  \end{mldecls}
+  \begin{mldecls}
+  @{index_ML Theory.add_axioms_i: "(binding * term) list -> theory -> theory"} \\
+  @{index_ML Theory.add_deps: "string -> string * typ -> (string * typ) list -> theory -> theory"} \\
+  @{index_ML Theory.add_defs_i: "bool -> bool -> (binding * term) list -> theory -> theory"} \\
+  \end{mldecls}
+
+  \begin{description}
+
+  \item @{ML_type ctyp} and @{ML_type cterm} represent certified types
+  and terms, respectively.  These are abstract datatypes that
+  guarantee that its values have passed the full well-formedness (and
+  well-typedness) checks, relative to the declarations of type
+  constructors, constants etc. in the theory.
+
+  \item @{ML Thm.ctyp_of}~@{text "thy \<tau>"} and @{ML
+  Thm.cterm_of}~@{text "thy t"} explicitly checks types and terms,
+  respectively.  This also involves some basic normalizations, such
+  expansion of type and term abbreviations from the theory context.
+
+  Re-certification is relatively slow and should be avoided in tight
+  reasoning loops.  There are separate operations to decompose
+  certified entities (including actual theorems).
+
+  \item @{ML_type thm} represents proven propositions.  This is an
+  abstract datatype that guarantees that its values have been
+  constructed by basic principles of the @{ML_struct Thm} module.
+  Every @{ML thm} value contains a sliding back-reference to the
+  enclosing theory, cf.\ \secref{sec:context-theory}.
+
+  \item @{ML proofs} determines the detail of proof recording within
+  @{ML_type thm} values: @{ML 0} records only the names of oracles,
+  @{ML 1} records oracle names and propositions, @{ML 2} additionally
+  records full proof terms.  Officially named theorems that contribute
+  to a result are always recorded.
+
+  \item @{ML Thm.assume}, @{ML Thm.forall_intr}, @{ML
+  Thm.forall_elim}, @{ML Thm.implies_intr}, and @{ML Thm.implies_elim}
+  correspond to the primitive inferences of \figref{fig:prim-rules}.
+
+  \item @{ML Thm.generalize}~@{text "(\<^vec>\<alpha>, \<^vec>x)"}
+  corresponds to the @{text "generalize"} rules of
+  \figref{fig:subst-rules}.  Here collections of type and term
+  variables are generalized simultaneously, specified by the given
+  basic names.
+
+  \item @{ML Thm.instantiate}~@{text "(\<^vec>\<alpha>\<^isub>s,
+  \<^vec>x\<^isub>\<tau>)"} corresponds to the @{text "instantiate"} rules
+  of \figref{fig:subst-rules}.  Type variables are substituted before
+  term variables.  Note that the types in @{text "\<^vec>x\<^isub>\<tau>"}
+  refer to the instantiated versions.
+
+  \item @{ML Thm.axiom}~@{text "thy name"} retrieves a named
+  axiom, cf.\ @{text "axiom"} in \figref{fig:prim-rules}.
+
+  \item @{ML Thm.add_oracle}~@{text "(name, oracle)"} produces a named
+  oracle rule, essentially generating arbitrary axioms on the fly,
+  cf.\ @{text "axiom"} in \figref{fig:prim-rules}.
+
+  \item @{ML Theory.add_axioms_i}~@{text "[(name, A), \<dots>]"} declares
+  arbitrary propositions as axioms.
+
+  \item @{ML Theory.add_deps}~@{text "name c\<^isub>\<tau>
+  \<^vec>d\<^isub>\<sigma>"} declares dependencies of a named specification
+  for constant @{text "c\<^isub>\<tau>"}, relative to existing
+  specifications for constants @{text "\<^vec>d\<^isub>\<sigma>"}.
+
+  \item @{ML Theory.add_defs_i}~@{text "unchecked overloaded [(name, c
+  \<^vec>x \<equiv> t), \<dots>]"} states a definitional axiom for an existing
+  constant @{text "c"}.  Dependencies are recorded (cf.\ @{ML
+  Theory.add_deps}), unless the @{text "unchecked"} option is set.
+
+  \end{description}
+*}
+
+
+subsection {* Auxiliary definitions *}
+
+text {*
+  Theory @{text "Pure"} provides a few auxiliary definitions, see
+  \figref{fig:pure-aux}.  These special constants are normally not
+  exposed to the user, but appear in internal encodings.
+
+  \begin{figure}[htb]
+  \begin{center}
+  \begin{tabular}{ll}
+  @{text "conjunction :: prop \<Rightarrow> prop \<Rightarrow> prop"} & (infix @{text "&"}) \\
+  @{text "\<turnstile> A & B \<equiv> (\<And>C. (A \<Longrightarrow> B \<Longrightarrow> C) \<Longrightarrow> C)"} \\[1ex]
+  @{text "prop :: prop \<Rightarrow> prop"} & (prefix @{text "#"}, suppressed) \\
+  @{text "#A \<equiv> A"} \\[1ex]
+  @{text "term :: \<alpha> \<Rightarrow> prop"} & (prefix @{text "TERM"}) \\
+  @{text "term x \<equiv> (\<And>A. A \<Longrightarrow> A)"} \\[1ex]
+  @{text "TYPE :: \<alpha> itself"} & (prefix @{text "TYPE"}) \\
+  @{text "(unspecified)"} \\
+  \end{tabular}
+  \caption{Definitions of auxiliary connectives}\label{fig:pure-aux}
+  \end{center}
+  \end{figure}
+
+  Derived conjunction rules include introduction @{text "A \<Longrightarrow> B \<Longrightarrow> A &
+  B"}, and destructions @{text "A & B \<Longrightarrow> A"} and @{text "A & B \<Longrightarrow> B"}.
+  Conjunction allows to treat simultaneous assumptions and conclusions
+  uniformly.  For example, multiple claims are intermediately
+  represented as explicit conjunction, but this is refined into
+  separate sub-goals before the user continues the proof; the final
+  result is projected into a list of theorems (cf.\
+  \secref{sec:tactical-goals}).
+
+  The @{text "prop"} marker (@{text "#"}) makes arbitrarily complex
+  propositions appear as atomic, without changing the meaning: @{text
+  "\<Gamma> \<turnstile> A"} and @{text "\<Gamma> \<turnstile> #A"} are interchangeable.  See
+  \secref{sec:tactical-goals} for specific operations.
+
+  The @{text "term"} marker turns any well-typed term into a derivable
+  proposition: @{text "\<turnstile> TERM t"} holds unconditionally.  Although
+  this is logically vacuous, it allows to treat terms and proofs
+  uniformly, similar to a type-theoretic framework.
+
+  The @{text "TYPE"} constructor is the canonical representative of
+  the unspecified type @{text "\<alpha> itself"}; it essentially injects the
+  language of types into that of terms.  There is specific notation
+  @{text "TYPE(\<tau>)"} for @{text "TYPE\<^bsub>\<tau>
+ itself\<^esub>"}.
+  Although being devoid of any particular meaning, the @{text
+  "TYPE(\<tau>)"} accounts for the type @{text "\<tau>"} within the term
+  language.  In particular, @{text "TYPE(\<alpha>)"} may be used as formal
+  argument in primitive definitions, in order to circumvent hidden
+  polymorphism (cf.\ \secref{sec:terms}).  For example, @{text "c
+  TYPE(\<alpha>) \<equiv> A[\<alpha>]"} defines @{text "c :: \<alpha> itself \<Rightarrow> prop"} in terms of
+  a proposition @{text "A"} that depends on an additional type
+  argument, which is essentially a predicate on types.
+*}
+
+text %mlref {*
+  \begin{mldecls}
+  @{index_ML Conjunction.intr: "thm -> thm -> thm"} \\
+  @{index_ML Conjunction.elim: "thm -> thm * thm"} \\
+  @{index_ML Drule.mk_term: "cterm -> thm"} \\
+  @{index_ML Drule.dest_term: "thm -> cterm"} \\
+  @{index_ML Logic.mk_type: "typ -> term"} \\
+  @{index_ML Logic.dest_type: "term -> typ"} \\
+  \end{mldecls}
+
+  \begin{description}
+
+  \item @{ML Conjunction.intr} derives @{text "A & B"} from @{text
+  "A"} and @{text "B"}.
+
+  \item @{ML Conjunction.elim} derives @{text "A"} and @{text "B"}
+  from @{text "A & B"}.
+
+  \item @{ML Drule.mk_term} derives @{text "TERM t"}.
+
+  \item @{ML Drule.dest_term} recovers term @{text "t"} from @{text
+  "TERM t"}.
+
+  \item @{ML Logic.mk_type}~@{text "\<tau>"} produces the term @{text
+  "TYPE(\<tau>)"}.
+
+  \item @{ML Logic.dest_type}~@{text "TYPE(\<tau>)"} recovers the type
+  @{text "\<tau>"}.
+
+  \end{description}
+*}
+
+
+section {* Object-level rules \label{sec:obj-rules} *}
+
+text {*
+  The primitive inferences covered so far mostly serve foundational
+  purposes.  User-level reasoning usually works via object-level rules
+  that are represented as theorems of Pure.  Composition of rules
+  involves \emph{backchaining}, \emph{higher-order unification} modulo
+  @{text "\<alpha>\<beta>\<eta>"}-conversion of @{text "\<lambda>"}-terms, and so-called
+  \emph{lifting} of rules into a context of @{text "\<And>"} and @{text
+  "\<Longrightarrow>"} connectives.  Thus the full power of higher-order Natural
+  Deduction in Isabelle/Pure becomes readily available.
+*}
+
+
+subsection {* Hereditary Harrop Formulae *}
+
+text {*
+  The idea of object-level rules is to model Natural Deduction
+  inferences in the style of Gentzen \cite{Gentzen:1935}, but we allow
+  arbitrary nesting similar to \cite{extensions91}.  The most basic
+  rule format is that of a \emph{Horn Clause}:
+  \[
+  \infer{@{text "A"}}{@{text "A\<^sub>1"} & @{text "\<dots>"} & @{text "A\<^sub>n"}}
+  \]
+  where @{text "A, A\<^sub>1, \<dots>, A\<^sub>n"} are atomic propositions
+  of the framework, usually of the form @{text "Trueprop B"}, where
+  @{text "B"} is a (compound) object-level statement.  This
+  object-level inference corresponds to an iterated implication in
+  Pure like this:
+  \[
+  @{text "A\<^sub>1 \<Longrightarrow> \<dots> A\<^sub>n \<Longrightarrow> A"}
+  \]
+  As an example consider conjunction introduction: @{text "A \<Longrightarrow> B \<Longrightarrow> A \<and>
+  B"}.  Any parameters occurring in such rule statements are
+  conceptionally treated as arbitrary:
+  \[
+  @{text "\<And>x\<^sub>1 \<dots> x\<^sub>m. A\<^sub>1 x\<^sub>1 \<dots> x\<^sub>m \<Longrightarrow> \<dots> A\<^sub>n x\<^sub>1 \<dots> x\<^sub>m \<Longrightarrow> A x\<^sub>1 \<dots> x\<^sub>m"}
+  \]
+
+  Nesting of rules means that the positions of @{text "A\<^sub>i"} may
+  again hold compound rules, not just atomic propositions.
+  Propositions of this format are called \emph{Hereditary Harrop
+  Formulae} in the literature \cite{Miller:1991}.  Here we give an
+  inductive characterization as follows:
+
+  \medskip
+  \begin{tabular}{ll}
+  @{text "\<^bold>x"} & set of variables \\
+  @{text "\<^bold>A"} & set of atomic propositions \\
+  @{text "\<^bold>H  =  \<And>\<^bold>x\<^sup>*. \<^bold>H\<^sup>* \<Longrightarrow> \<^bold>A"} & set of Hereditary Harrop Formulas \\
+  \end{tabular}
+  \medskip
+
+  \noindent Thus we essentially impose nesting levels on propositions
+  formed from @{text "\<And>"} and @{text "\<Longrightarrow>"}.  At each level there is a
+  prefix of parameters and compound premises, concluding an atomic
+  proposition.  Typical examples are @{text "\<longrightarrow>"}-introduction @{text
+  "(A \<Longrightarrow> B) \<Longrightarrow> A \<longrightarrow> B"} or mathematical induction @{text "P 0 \<Longrightarrow> (\<And>n. P n
+  \<Longrightarrow> P (Suc n)) \<Longrightarrow> P n"}.  Even deeper nesting occurs in well-founded
+  induction @{text "(\<And>x. (\<And>y. y \<prec> x \<Longrightarrow> P y) \<Longrightarrow> P x) \<Longrightarrow> P x"}, but this
+  already marks the limit of rule complexity seen in practice.
+
+  \medskip Regular user-level inferences in Isabelle/Pure always
+  maintain the following canonical form of results:
+
+  \begin{itemize}
+
+  \item Normalization by @{text "(A \<Longrightarrow> (\<And>x. B x)) \<equiv> (\<And>x. A \<Longrightarrow> B x)"},
+  which is a theorem of Pure, means that quantifiers are pushed in
+  front of implication at each level of nesting.  The normal form is a
+  Hereditary Harrop Formula.
+
+  \item The outermost prefix of parameters is represented via
+  schematic variables: instead of @{text "\<And>\<^vec>x. \<^vec>H \<^vec>x
+  \<Longrightarrow> A \<^vec>x"} we have @{text "\<^vec>H ?\<^vec>x \<Longrightarrow> A ?\<^vec>x"}.
+  Note that this representation looses information about the order of
+  parameters, and vacuous quantifiers vanish automatically.
+
+  \end{itemize}
+*}
+
+text %mlref {*
+  \begin{mldecls}
+  @{index_ML MetaSimplifier.norm_hhf: "thm -> thm"} \\
+  \end{mldecls}
+
+  \begin{description}
+
+  \item @{ML MetaSimplifier.norm_hhf}~@{text thm} normalizes the given
+  theorem according to the canonical form specified above.  This is
+  occasionally helpful to repair some low-level tools that do not
+  handle Hereditary Harrop Formulae properly.
+
+  \end{description}
+*}
+
+
+subsection {* Rule composition *}
+
+text {*
+  The rule calculus of Isabelle/Pure provides two main inferences:
+  @{inference resolution} (i.e.\ back-chaining of rules) and
+  @{inference assumption} (i.e.\ closing a branch), both modulo
+  higher-order unification.  There are also combined variants, notably
+  @{inference elim_resolution} and @{inference dest_resolution}.
+
+  To understand the all-important @{inference resolution} principle,
+  we first consider raw @{inference_def composition} (modulo
+  higher-order unification with substitution @{text "\<vartheta>"}):
+  \[
+  \infer[(@{inference_def composition})]{@{text "\<^vec>A\<vartheta> \<Longrightarrow> C\<vartheta>"}}
+  {@{text "\<^vec>A \<Longrightarrow> B"} & @{text "B' \<Longrightarrow> C"} & @{text "B\<vartheta> = B'\<vartheta>"}}
+  \]
+  Here the conclusion of the first rule is unified with the premise of
+  the second; the resulting rule instance inherits the premises of the
+  first and conclusion of the second.  Note that @{text "C"} can again
+  consist of iterated implications.  We can also permute the premises
+  of the second rule back-and-forth in order to compose with @{text
+  "B'"} in any position (subsequently we shall always refer to
+  position 1 w.l.o.g.).
+
+  In @{inference composition} the internal structure of the common
+  part @{text "B"} and @{text "B'"} is not taken into account.  For
+  proper @{inference resolution} we require @{text "B"} to be atomic,
+  and explicitly observe the structure @{text "\<And>\<^vec>x. \<^vec>H
+  \<^vec>x \<Longrightarrow> B' \<^vec>x"} of the premise of the second rule.  The
+  idea is to adapt the first rule by ``lifting'' it into this context,
+  by means of iterated application of the following inferences:
+  \[
+  \infer[(@{inference_def imp_lift})]{@{text "(\<^vec>H \<Longrightarrow> \<^vec>A) \<Longrightarrow> (\<^vec>H \<Longrightarrow> B)"}}{@{text "\<^vec>A \<Longrightarrow> B"}}
+  \]
+  \[
+  \infer[(@{inference_def all_lift})]{@{text "(\<And>\<^vec>x. \<^vec>A (?\<^vec>a \<^vec>x)) \<Longrightarrow> (\<And>\<^vec>x. B (?\<^vec>a \<^vec>x))"}}{@{text "\<^vec>A ?\<^vec>a \<Longrightarrow> B ?\<^vec>a"}}
+  \]
+  By combining raw composition with lifting, we get full @{inference
+  resolution} as follows:
+  \[
+  \infer[(@{inference_def resolution})]
+  {@{text "(\<And>\<^vec>x. \<^vec>H \<^vec>x \<Longrightarrow> \<^vec>A (?\<^vec>a \<^vec>x))\<vartheta> \<Longrightarrow> C\<vartheta>"}}
+  {\begin{tabular}{l}
+    @{text "\<^vec>A ?\<^vec>a \<Longrightarrow> B ?\<^vec>a"} \\
+    @{text "(\<And>\<^vec>x. \<^vec>H \<^vec>x \<Longrightarrow> B' \<^vec>x) \<Longrightarrow> C"} \\
+    @{text "(\<lambda>\<^vec>x. B (?\<^vec>a \<^vec>x))\<vartheta> = B'\<vartheta>"} \\
+   \end{tabular}}
+  \]
+
+  Continued resolution of rules allows to back-chain a problem towards
+  more and sub-problems.  Branches are closed either by resolving with
+  a rule of 0 premises, or by producing a ``short-circuit'' within a
+  solved situation (again modulo unification):
+  \[
+  \infer[(@{inference_def assumption})]{@{text "C\<vartheta>"}}
+  {@{text "(\<And>\<^vec>x. \<^vec>H \<^vec>x \<Longrightarrow> A \<^vec>x) \<Longrightarrow> C"} & @{text "A\<vartheta> = H\<^sub>i\<vartheta>"}~~\text{(for some~@{text i})}}
+  \]
+
+  FIXME @{inference_def elim_resolution}, @{inference_def dest_resolution}
+*}
+
+text %mlref {*
+  \begin{mldecls}
+  @{index_ML "op RS": "thm * thm -> thm"} \\
+  @{index_ML "op OF": "thm * thm list -> thm"} \\
+  \end{mldecls}
+
+  \begin{description}
+
+  \item @{text "rule\<^sub>1 RS rule\<^sub>2"} resolves @{text
+  "rule\<^sub>1"} with @{text "rule\<^sub>2"} according to the
+  @{inference resolution} principle explained above.  Note that the
+  corresponding attribute in the Isar language is called @{attribute
+  THEN}.
+
+  \item @{text "rule OF rules"} resolves a list of rules with the
+  first rule, addressing its premises @{text "1, \<dots>, length rules"}
+  (operating from last to first).  This means the newly emerging
+  premises are all concatenated, without interfering.  Also note that
+  compared to @{text "RS"}, the rule argument order is swapped: @{text
+  "rule\<^sub>1 RS rule\<^sub>2 = rule\<^sub>2 OF [rule\<^sub>1]"}.
+
+  \end{description}
+*}
+
+end
diff -r ecd6f0ca62ea -r 44832d503659 doc-src/IsarImplementation/Thy/ML.thy
--- a/doc-src/IsarImplementation/Thy/ML.thy	Wed Mar 04 17:12:23 2009 -0800
+++ b/doc-src/IsarImplementation/Thy/ML.thy	Thu Mar 05 02:32:46 2009 +0100
@@ -631,4 +631,4 @@
   Most table functions correspond to those of association lists.
 *}
 
-end
+end
\ No newline at end of file
diff -r ecd6f0ca62ea -r 44832d503659 doc-src/IsarImplementation/Thy/Prelim.thy
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc-src/IsarImplementation/Thy/Prelim.thy	Thu Mar 05 02:32:46 2009 +0100
@@ -0,0 +1,764 @@
+theory Prelim
+imports Base
+begin
+
+chapter {* Preliminaries *}
+
+section {* Contexts \label{sec:context} *}
+
+text {*
+  A logical context represents the background that is required for
+  formulating statements and composing proofs.  It acts as a medium to
+  produce formal content, depending on earlier material (declarations,
+  results etc.).
+
+  For example, derivations within the Isabelle/Pure logic can be
+  described as a judgment @{text "\<Gamma> \<turnstile>\<^sub>\<Theta> \<phi>"}, which means that a
+  proposition @{text "\<phi>"} is derivable from hypotheses @{text "\<Gamma>"}
+  within the theory @{text "\<Theta>"}.  There are logical reasons for
+  keeping @{text "\<Theta>"} and @{text "\<Gamma>"} separate: theories can be
+  liberal about supporting type constructors and schematic
+  polymorphism of constants and axioms, while the inner calculus of
+  @{text "\<Gamma> \<turnstile> \<phi>"} is strictly limited to Simple Type Theory (with
+  fixed type variables in the assumptions).
+
+  \medskip Contexts and derivations are linked by the following key
+  principles:
+
+  \begin{itemize}
+
+  \item Transfer: monotonicity of derivations admits results to be
+  transferred into a \emph{larger} context, i.e.\ @{text "\<Gamma> \<turnstile>\<^sub>\<Theta>
+  \<phi>"} implies @{text "\<Gamma>' \<turnstile>\<^sub>\<Theta>\<^sub>' \<phi>"} for contexts @{text "\<Theta>'
+  \<supseteq> \<Theta>"} and @{text "\<Gamma>' \<supseteq> \<Gamma>"}.
+
+  \item Export: discharge of hypotheses admits results to be exported
+  into a \emph{smaller} context, i.e.\ @{text "\<Gamma>' \<turnstile>\<^sub>\<Theta> \<phi>"}
+  implies @{text "\<Gamma> \<turnstile>\<^sub>\<Theta> \<Delta> \<Longrightarrow> \<phi>"} where @{text "\<Gamma>' \<supseteq> \<Gamma>"} and
+  @{text "\<Delta> = \<Gamma>' - \<Gamma>"}.  Note that @{text "\<Theta>"} remains unchanged here,
+  only the @{text "\<Gamma>"} part is affected.
+
+  \end{itemize}
+
+  \medskip By modeling the main characteristics of the primitive
+  @{text "\<Theta>"} and @{text "\<Gamma>"} above, and abstracting over any
+  particular logical content, we arrive at the fundamental notions of
+  \emph{theory context} and \emph{proof context} in Isabelle/Isar.
+  These implement a certain policy to manage arbitrary \emph{context
+  data}.  There is a strongly-typed mechanism to declare new kinds of
+  data at compile time.
+
+  The internal bootstrap process of Isabelle/Pure eventually reaches a
+  stage where certain data slots provide the logical content of @{text
+  "\<Theta>"} and @{text "\<Gamma>"} sketched above, but this does not stop there!
+  Various additional data slots support all kinds of mechanisms that
+  are not necessarily part of the core logic.
+
+  For example, there would be data for canonical introduction and
+  elimination rules for arbitrary operators (depending on the
+  object-logic and application), which enables users to perform
+  standard proof steps implicitly (cf.\ the @{text "rule"} method
+  \cite{isabelle-isar-ref}).
+
+  \medskip Thus Isabelle/Isar is able to bring forth more and more
+  concepts successively.  In particular, an object-logic like
+  Isabelle/HOL continues the Isabelle/Pure setup by adding specific
+  components for automated reasoning (classical reasoner, tableau
+  prover, structured induction etc.) and derived specification
+  mechanisms (inductive predicates, recursive functions etc.).  All of
+  this is ultimately based on the generic data management by theory
+  and proof contexts introduced here.
+*}
+
+
+subsection {* Theory context \label{sec:context-theory} *}
+
+text {*
+  A \emph{theory} is a data container with explicit name and unique
+  identifier.  Theories are related by a (nominal) sub-theory
+  relation, which corresponds to the dependency graph of the original
+  construction; each theory is derived from a certain sub-graph of
+  ancestor theories.
+
+  The @{text "merge"} operation produces the least upper bound of two
+  theories, which actually degenerates into absorption of one theory
+  into the other (due to the nominal sub-theory relation).
+
+  The @{text "begin"} operation starts a new theory by importing
+  several parent theories and entering a special @{text "draft"} mode,
+  which is sustained until the final @{text "end"} operation.  A draft
+  theory acts like a linear type, where updates invalidate earlier
+  versions.  An invalidated draft is called ``stale''.
+
+  The @{text "checkpoint"} operation produces an intermediate stepping
+  stone that will survive the next update: both the original and the
+  changed theory remain valid and are related by the sub-theory
+  relation.  Checkpointing essentially recovers purely functional
+  theory values, at the expense of some extra internal bookkeeping.
+
+  The @{text "copy"} operation produces an auxiliary version that has
+  the same data content, but is unrelated to the original: updates of
+  the copy do not affect the original, neither does the sub-theory
+  relation hold.
+
+  \medskip The example in \figref{fig:ex-theory} below shows a theory
+  graph derived from @{text "Pure"}, with theory @{text "Length"}
+  importing @{text "Nat"} and @{text "List"}.  The body of @{text
+  "Length"} consists of a sequence of updates, working mostly on
+  drafts.  Intermediate checkpoints may occur as well, due to the
+  history mechanism provided by the Isar top-level, cf.\
+  \secref{sec:isar-toplevel}.
+
+  \begin{figure}[htb]
+  \begin{center}
+  \begin{tabular}{rcccl}
+        &            & @{text "Pure"} \\
+        &            & @{text "\<down>"} \\
+        &            & @{text "FOL"} \\
+        & $\swarrow$ &              & $\searrow$ & \\
+  @{text "Nat"} &    &              &            & @{text "List"} \\
+        & $\searrow$ &              & $\swarrow$ \\
+        &            & @{text "Length"} \\
+        &            & \multicolumn{3}{l}{~~@{keyword "imports"}} \\
+        &            & \multicolumn{3}{l}{~~@{keyword "begin"}} \\
+        &            & $\vdots$~~ \\
+        &            & @{text "\<bullet>"}~~ \\
+        &            & $\vdots$~~ \\
+        &            & @{text "\<bullet>"}~~ \\
+        &            & $\vdots$~~ \\
+        &            & \multicolumn{3}{l}{~~@{command "end"}} \\
+  \end{tabular}
+  \caption{A theory definition depending on ancestors}\label{fig:ex-theory}
+  \end{center}
+  \end{figure}
+
+  \medskip There is a separate notion of \emph{theory reference} for
+  maintaining a live link to an evolving theory context: updates on
+  drafts are propagated automatically.  Dynamic updating stops after
+  an explicit @{text "end"} only.
+
+  Derived entities may store a theory reference in order to indicate
+  the context they belong to.  This implicitly assumes monotonic
+  reasoning, because the referenced context may become larger without
+  further notice.
+*}
+
+text %mlref {*
+  \begin{mldecls}
+  @{index_ML_type theory} \\
+  @{index_ML Theory.subthy: "theory * theory -> bool"} \\
+  @{index_ML Theory.merge: "theory * theory -> theory"} \\
+  @{index_ML Theory.checkpoint: "theory -> theory"} \\
+  @{index_ML Theory.copy: "theory -> theory"} \\
+  \end{mldecls}
+  \begin{mldecls}
+  @{index_ML_type theory_ref} \\
+  @{index_ML Theory.deref: "theory_ref -> theory"} \\
+  @{index_ML Theory.check_thy: "theory -> theory_ref"} \\
+  \end{mldecls}
+
+  \begin{description}
+
+  \item @{ML_type theory} represents theory contexts.  This is
+  essentially a linear type!  Most operations destroy the original
+  version, which then becomes ``stale''.
+
+  \item @{ML "Theory.subthy"}~@{text "(thy\<^sub>1, thy\<^sub>2)"}
+  compares theories according to the inherent graph structure of the
+  construction.  This sub-theory relation is a nominal approximation
+  of inclusion (@{text "\<subseteq>"}) of the corresponding content.
+
+  \item @{ML "Theory.merge"}~@{text "(thy\<^sub>1, thy\<^sub>2)"}
+  absorbs one theory into the other.  This fails for unrelated
+  theories!
+
+  \item @{ML "Theory.checkpoint"}~@{text "thy"} produces a safe
+  stepping stone in the linear development of @{text "thy"}.  The next
+  update will result in two related, valid theories.
+
+  \item @{ML "Theory.copy"}~@{text "thy"} produces a variant of @{text
+  "thy"} that holds a copy of the same data.  The result is not
+  related to the original; the original is unchanged.
+
+  \item @{ML_type theory_ref} represents a sliding reference to an
+  always valid theory; updates on the original are propagated
+  automatically.
+
+  \item @{ML "Theory.deref"}~@{text "thy_ref"} turns a @{ML_type
+  "theory_ref"} into an @{ML_type "theory"} value.  As the referenced
+  theory evolves monotonically over time, later invocations of @{ML
+  "Theory.deref"} may refer to a larger context.
+
+  \item @{ML "Theory.check_thy"}~@{text "thy"} produces a @{ML_type
+  "theory_ref"} from a valid @{ML_type "theory"} value.
+
+  \end{description}
+*}
+
+
+subsection {* Proof context \label{sec:context-proof} *}
+
+text {*
+  A proof context is a container for pure data with a back-reference
+  to the theory it belongs to.  The @{text "init"} operation creates a
+  proof context from a given theory.  Modifications to draft theories
+  are propagated to the proof context as usual, but there is also an
+  explicit @{text "transfer"} operation to force resynchronization
+  with more substantial updates to the underlying theory.  The actual
+  context data does not require any special bookkeeping, thanks to the
+  lack of destructive features.
+
+  Entities derived in a proof context need to record inherent logical
+  requirements explicitly, since there is no separate context
+  identification as for theories.  For example, hypotheses used in
+  primitive derivations (cf.\ \secref{sec:thms}) are recorded
+  separately within the sequent @{text "\<Gamma> \<turnstile> \<phi>"}, just to make double
+  sure.  Results could still leak into an alien proof context due to
+  programming errors, but Isabelle/Isar includes some extra validity
+  checks in critical positions, notably at the end of a sub-proof.
+
+  Proof contexts may be manipulated arbitrarily, although the common
+  discipline is to follow block structure as a mental model: a given
+  context is extended consecutively, and results are exported back
+  into the original context.  Note that the Isar proof states model
+  block-structured reasoning explicitly, using a stack of proof
+  contexts internally.
+*}
+
+text %mlref {*
+  \begin{mldecls}
+  @{index_ML_type Proof.context} \\
+  @{index_ML ProofContext.init: "theory -> Proof.context"} \\
+  @{index_ML ProofContext.theory_of: "Proof.context -> theory"} \\
+  @{index_ML ProofContext.transfer: "theory -> Proof.context -> Proof.context"} \\
+  \end{mldecls}
+
+  \begin{description}
+
+  \item @{ML_type Proof.context} represents proof contexts.  Elements
+  of this type are essentially pure values, with a sliding reference
+  to the background theory.
+
+  \item @{ML ProofContext.init}~@{text "thy"} produces a proof context
+  derived from @{text "thy"}, initializing all data.
+
+  \item @{ML ProofContext.theory_of}~@{text "ctxt"} selects the
+  background theory from @{text "ctxt"}, dereferencing its internal
+  @{ML_type theory_ref}.
+
+  \item @{ML ProofContext.transfer}~@{text "thy ctxt"} promotes the
+  background theory of @{text "ctxt"} to the super theory @{text
+  "thy"}.
+
+  \end{description}
+*}
+
+
+subsection {* Generic contexts \label{sec:generic-context} *}
+
+text {*
+  A generic context is the disjoint sum of either a theory or proof
+  context.  Occasionally, this enables uniform treatment of generic
+  context data, typically extra-logical information.  Operations on
+  generic contexts include the usual injections, partial selections,
+  and combinators for lifting operations on either component of the
+  disjoint sum.
+
+  Moreover, there are total operations @{text "theory_of"} and @{text
+  "proof_of"} to convert a generic context into either kind: a theory
+  can always be selected from the sum, while a proof context might
+  have to be constructed by an ad-hoc @{text "init"} operation.
+*}
+
+text %mlref {*
+  \begin{mldecls}
+  @{index_ML_type Context.generic} \\
+  @{index_ML Context.theory_of: "Context.generic -> theory"} \\
+  @{index_ML Context.proof_of: "Context.generic -> Proof.context"} \\
+  \end{mldecls}
+
+  \begin{description}
+
+  \item @{ML_type Context.generic} is the direct sum of @{ML_type
+  "theory"} and @{ML_type "Proof.context"}, with the datatype
+  constructors @{ML "Context.Theory"} and @{ML "Context.Proof"}.
+
+  \item @{ML Context.theory_of}~@{text "context"} always produces a
+  theory from the generic @{text "context"}, using @{ML
+  "ProofContext.theory_of"} as required.
+
+  \item @{ML Context.proof_of}~@{text "context"} always produces a
+  proof context from the generic @{text "context"}, using @{ML
+  "ProofContext.init"} as required (note that this re-initializes the
+  context data with each invocation).
+
+  \end{description}
+*}
+
+
+subsection {* Context data \label{sec:context-data} *}
+
+text {*
+  The main purpose of theory and proof contexts is to manage arbitrary
+  data.  New data types can be declared incrementally at compile time.
+  There are separate declaration mechanisms for any of the three kinds
+  of contexts: theory, proof, generic.
+
+  \paragraph{Theory data} may refer to destructive entities, which are
+  maintained in direct correspondence to the linear evolution of
+  theory values, including explicit copies.\footnote{Most existing
+  instances of destructive theory data are merely historical relics
+  (e.g.\ the destructive theorem storage, and destructive hints for
+  the Simplifier and Classical rules).}  A theory data declaration
+  needs to implement the following SML signature:
+
+  \medskip
+  \begin{tabular}{ll}
+  @{text "\<type> T"} & representing type \\
+  @{text "\<val> empty: T"} & empty default value \\
+  @{text "\<val> copy: T \<rightarrow> T"} & refresh impure data \\
+  @{text "\<val> extend: T \<rightarrow> T"} & re-initialize on import \\
+  @{text "\<val> merge: T \<times> T \<rightarrow> T"} & join on import \\
+  \end{tabular}
+  \medskip
+
+  \noindent The @{text "empty"} value acts as initial default for
+  \emph{any} theory that does not declare actual data content; @{text
+  "copy"} maintains persistent integrity for impure data, it is just
+  the identity for pure values; @{text "extend"} is acts like a
+  unitary version of @{text "merge"}, both operations should also
+  include the functionality of @{text "copy"} for impure data.
+
+  \paragraph{Proof context data} is purely functional.  A declaration
+  needs to implement the following SML signature:
+
+  \medskip
+  \begin{tabular}{ll}
+  @{text "\<type> T"} & representing type \\
+  @{text "\<val> init: theory \<rightarrow> T"} & produce initial value \\
+  \end{tabular}
+  \medskip
+
+  \noindent The @{text "init"} operation is supposed to produce a pure
+  value from the given background theory.
+
+  \paragraph{Generic data} provides a hybrid interface for both theory
+  and proof data.  The declaration is essentially the same as for
+  (pure) theory data, without @{text "copy"}.  The @{text "init"}
+  operation for proof contexts merely selects the current data value
+  from the background theory.
+
+  \bigskip A data declaration of type @{text "T"} results in the
+  following interface:
+
+  \medskip
+  \begin{tabular}{ll}
+  @{text "init: theory \<rightarrow> T"} \\
+  @{text "get: context \<rightarrow> T"} \\
+  @{text "put: T \<rightarrow> context \<rightarrow> context"} \\
+  @{text "map: (T \<rightarrow> T) \<rightarrow> context \<rightarrow> context"} \\
+  \end{tabular}
+  \medskip
+
+  \noindent Here @{text "init"} is only applicable to impure theory
+  data to install a fresh copy persistently (destructive update on
+  uninitialized has no permanent effect).  The other operations provide
+  access for the particular kind of context (theory, proof, or generic
+  context).  Note that this is a safe interface: there is no other way
+  to access the corresponding data slot of a context.  By keeping
+  these operations private, a component may maintain abstract values
+  authentically, without other components interfering.
+*}
+
+text %mlref {*
+  \begin{mldecls}
+  @{index_ML_functor TheoryDataFun} \\
+  @{index_ML_functor ProofDataFun} \\
+  @{index_ML_functor GenericDataFun} \\
+  \end{mldecls}
+
+  \begin{description}
+
+  \item @{ML_functor TheoryDataFun}@{text "(spec)"} declares data for
+  type @{ML_type theory} according to the specification provided as
+  argument structure.  The resulting structure provides data init and
+  access operations as described above.
+
+  \item @{ML_functor ProofDataFun}@{text "(spec)"} is analogous to
+  @{ML_functor TheoryDataFun} for type @{ML_type Proof.context}.
+
+  \item @{ML_functor GenericDataFun}@{text "(spec)"} is analogous to
+  @{ML_functor TheoryDataFun} for type @{ML_type Context.generic}.
+
+  \end{description}
+*}
+
+
+section {* Names \label{sec:names} *}
+
+text {*
+  In principle, a name is just a string, but there are various
+  convention for encoding additional structure.  For example, ``@{text
+  "Foo.bar.baz"}'' is considered as a qualified name consisting of
+  three basic name components.  The individual constituents of a name
+  may have further substructure, e.g.\ the string
+  ``\verb,\,\verb,<alpha>,'' encodes as a single symbol.
+*}
+
+
+subsection {* Strings of symbols *}
+
+text {*
+  A \emph{symbol} constitutes the smallest textual unit in Isabelle
+  --- raw characters are normally not encountered at all.  Isabelle
+  strings consist of a sequence of symbols, represented as a packed
+  string or a list of strings.  Each symbol is in itself a small
+  string, which has either one of the following forms:
+
+  \begin{enumerate}
+
+  \item a single ASCII character ``@{text "c"}'', for example
+  ``\verb,a,'',
+
+  \item a regular symbol ``\verb,\,\verb,<,@{text "ident"}\verb,>,'',
+  for example ``\verb,\,\verb,<alpha>,'',
+
+  \item a control symbol ``\verb,\,\verb,<^,@{text "ident"}\verb,>,'',
+  for example ``\verb,\,\verb,<^bold>,'',
+
+  \item a raw symbol ``\verb,\,\verb,<^raw:,@{text text}\verb,>,''
+  where @{text text} constists of printable characters excluding
+  ``\verb,.,'' and ``\verb,>,'', for example
+  ``\verb,\,\verb,<^raw:$\sum_{i = 1}^n$>,'',
+
+  \item a numbered raw control symbol ``\verb,\,\verb,<^raw,@{text
+  n}\verb,>, where @{text n} consists of digits, for example
+  ``\verb,\,\verb,<^raw42>,''.
+
+  \end{enumerate}
+
+  \noindent The @{text "ident"} syntax for symbol names is @{text
+  "letter (letter | digit)\<^sup>*"}, where @{text "letter =
+  A..Za..z"} and @{text "digit = 0..9"}.  There are infinitely many
+  regular symbols and control symbols, but a fixed collection of
+  standard symbols is treated specifically.  For example,
+  ``\verb,\,\verb,<alpha>,'' is classified as a letter, which means it
+  may occur within regular Isabelle identifiers.
+
+  Since the character set underlying Isabelle symbols is 7-bit ASCII
+  and 8-bit characters are passed through transparently, Isabelle may
+  also process Unicode/UCS data in UTF-8 encoding.  Unicode provides
+  its own collection of mathematical symbols, but there is no built-in
+  link to the standard collection of Isabelle.
+
+  \medskip Output of Isabelle symbols depends on the print mode
+  (\secref{print-mode}).  For example, the standard {\LaTeX} setup of
+  the Isabelle document preparation system would present
+  ``\verb,\,\verb,<alpha>,'' as @{text "\<alpha>"}, and
+  ``\verb,\,\verb,<^bold>,\verb,\,\verb,<alpha>,'' as @{text
+  "\<^bold>\<alpha>"}.
+*}
+
+text %mlref {*
+  \begin{mldecls}
+  @{index_ML_type "Symbol.symbol"} \\
+  @{index_ML Symbol.explode: "string -> Symbol.symbol list"} \\
+  @{index_ML Symbol.is_letter: "Symbol.symbol -> bool"} \\
+  @{index_ML Symbol.is_digit: "Symbol.symbol -> bool"} \\
+  @{index_ML Symbol.is_quasi: "Symbol.symbol -> bool"} \\
+  @{index_ML Symbol.is_blank: "Symbol.symbol -> bool"} \\
+  \end{mldecls}
+  \begin{mldecls}
+  @{index_ML_type "Symbol.sym"} \\
+  @{index_ML Symbol.decode: "Symbol.symbol -> Symbol.sym"} \\
+  \end{mldecls}
+
+  \begin{description}
+
+  \item @{ML_type "Symbol.symbol"} represents individual Isabelle
+  symbols; this is an alias for @{ML_type "string"}.
+
+  \item @{ML "Symbol.explode"}~@{text "str"} produces a symbol list
+  from the packed form.  This function supercedes @{ML
+  "String.explode"} for virtually all purposes of manipulating text in
+  Isabelle!
+
+  \item @{ML "Symbol.is_letter"}, @{ML "Symbol.is_digit"}, @{ML
+  "Symbol.is_quasi"}, @{ML "Symbol.is_blank"} classify standard
+  symbols according to fixed syntactic conventions of Isabelle, cf.\
+  \cite{isabelle-isar-ref}.
+
+  \item @{ML_type "Symbol.sym"} is a concrete datatype that represents
+  the different kinds of symbols explicitly, with constructors @{ML
+  "Symbol.Char"}, @{ML "Symbol.Sym"}, @{ML "Symbol.Ctrl"}, @{ML
+  "Symbol.Raw"}.
+
+  \item @{ML "Symbol.decode"} converts the string representation of a
+  symbol into the datatype version.
+
+  \end{description}
+*}
+
+
+subsection {* Basic names \label{sec:basic-names} *}
+
+text {*
+  A \emph{basic name} essentially consists of a single Isabelle
+  identifier.  There are conventions to mark separate classes of basic
+  names, by attaching a suffix of underscores: one underscore means
+  \emph{internal name}, two underscores means \emph{Skolem name},
+  three underscores means \emph{internal Skolem name}.
+
+  For example, the basic name @{text "foo"} has the internal version
+  @{text "foo_"}, with Skolem versions @{text "foo__"} and @{text
+  "foo___"}, respectively.
+
+  These special versions provide copies of the basic name space, apart
+  from anything that normally appears in the user text.  For example,
+  system generated variables in Isar proof contexts are usually marked
+  as internal, which prevents mysterious name references like @{text
+  "xaa"} to appear in the text.
+
+  \medskip Manipulating binding scopes often requires on-the-fly
+  renamings.  A \emph{name context} contains a collection of already
+  used names.  The @{text "declare"} operation adds names to the
+  context.
+
+  The @{text "invents"} operation derives a number of fresh names from
+  a given starting point.  For example, the first three names derived
+  from @{text "a"} are @{text "a"}, @{text "b"}, @{text "c"}.
+
+  The @{text "variants"} operation produces fresh names by
+  incrementing tentative names as base-26 numbers (with digits @{text
+  "a..z"}) until all clashes are resolved.  For example, name @{text
+  "foo"} results in variants @{text "fooa"}, @{text "foob"}, @{text
+  "fooc"}, \dots, @{text "fooaa"}, @{text "fooab"} etc.; each renaming
+  step picks the next unused variant from this sequence.
+*}
+
+text %mlref {*
+  \begin{mldecls}
+  @{index_ML Name.internal: "string -> string"} \\
+  @{index_ML Name.skolem: "string -> string"} \\
+  \end{mldecls}
+  \begin{mldecls}
+  @{index_ML_type Name.context} \\
+  @{index_ML Name.context: Name.context} \\
+  @{index_ML Name.declare: "string -> Name.context -> Name.context"} \\
+  @{index_ML Name.invents: "Name.context -> string -> int -> string list"} \\
+  @{index_ML Name.variants: "string list -> Name.context -> string list * Name.context"} \\
+  \end{mldecls}
+
+  \begin{description}
+
+  \item @{ML Name.internal}~@{text "name"} produces an internal name
+  by adding one underscore.
+
+  \item @{ML Name.skolem}~@{text "name"} produces a Skolem name by
+  adding two underscores.
+
+  \item @{ML_type Name.context} represents the context of already used
+  names; the initial value is @{ML "Name.context"}.
+
+  \item @{ML Name.declare}~@{text "name"} enters a used name into the
+  context.
+
+  \item @{ML Name.invents}~@{text "context name n"} produces @{text
+  "n"} fresh names derived from @{text "name"}.
+
+  \item @{ML Name.variants}~@{text "names context"} produces fresh
+  variants of @{text "names"}; the result is entered into the context.
+
+  \end{description}
+*}
+
+
+subsection {* Indexed names *}
+
+text {*
+  An \emph{indexed name} (or @{text "indexname"}) is a pair of a basic
+  name and a natural number.  This representation allows efficient
+  renaming by incrementing the second component only.  The canonical
+  way to rename two collections of indexnames apart from each other is
+  this: determine the maximum index @{text "maxidx"} of the first
+  collection, then increment all indexes of the second collection by
+  @{text "maxidx + 1"}; the maximum index of an empty collection is
+  @{text "-1"}.
+
+  Occasionally, basic names and indexed names are injected into the
+  same pair type: the (improper) indexname @{text "(x, -1)"} is used
+  to encode basic names.
+
+  \medskip Isabelle syntax observes the following rules for
+  representing an indexname @{text "(x, i)"} as a packed string:
+
+  \begin{itemize}
+
+  \item @{text "?x"} if @{text "x"} does not end with a digit and @{text "i = 0"},
+
+  \item @{text "?xi"} if @{text "x"} does not end with a digit,
+
+  \item @{text "?x.i"} otherwise.
+
+  \end{itemize}
+
+  Indexnames may acquire large index numbers over time.  Results are
+  normalized towards @{text "0"} at certain checkpoints, notably at
+  the end of a proof.  This works by producing variants of the
+  corresponding basic name components.  For example, the collection
+  @{text "?x1, ?x7, ?x42"} becomes @{text "?x, ?xa, ?xb"}.
+*}
+
+text %mlref {*
+  \begin{mldecls}
+  @{index_ML_type indexname} \\
+  \end{mldecls}
+
+  \begin{description}
+
+  \item @{ML_type indexname} represents indexed names.  This is an
+  abbreviation for @{ML_type "string * int"}.  The second component is
+  usually non-negative, except for situations where @{text "(x, -1)"}
+  is used to embed basic names into this type.
+
+  \end{description}
+*}
+
+
+subsection {* Qualified names and name spaces *}
+
+text {*
+  A \emph{qualified name} consists of a non-empty sequence of basic
+  name components.  The packed representation uses a dot as separator,
+  as in ``@{text "A.b.c"}''.  The last component is called \emph{base}
+  name, the remaining prefix \emph{qualifier} (which may be empty).
+  The idea of qualified names is to encode nested structures by
+  recording the access paths as qualifiers.  For example, an item
+  named ``@{text "A.b.c"}'' may be understood as a local entity @{text
+  "c"}, within a local structure @{text "b"}, within a global
+  structure @{text "A"}.  Typically, name space hierarchies consist of
+  1--2 levels of qualification, but this need not be always so.
+
+  The empty name is commonly used as an indication of unnamed
+  entities, whenever this makes any sense.  The basic operations on
+  qualified names are smart enough to pass through such improper names
+  unchanged.
+
+  \medskip A @{text "naming"} policy tells how to turn a name
+  specification into a fully qualified internal name (by the @{text
+  "full"} operation), and how fully qualified names may be accessed
+  externally.  For example, the default naming policy is to prefix an
+  implicit path: @{text "full x"} produces @{text "path.x"}, and the
+  standard accesses for @{text "path.x"} include both @{text "x"} and
+  @{text "path.x"}.  Normally, the naming is implicit in the theory or
+  proof context; there are separate versions of the corresponding.
+
+  \medskip A @{text "name space"} manages a collection of fully
+  internalized names, together with a mapping between external names
+  and internal names (in both directions).  The corresponding @{text
+  "intern"} and @{text "extern"} operations are mostly used for
+  parsing and printing only!  The @{text "declare"} operation augments
+  a name space according to the accesses determined by the naming
+  policy.
+
+  \medskip As a general principle, there is a separate name space for
+  each kind of formal entity, e.g.\ logical constant, type
+  constructor, type class, theorem.  It is usually clear from the
+  occurrence in concrete syntax (or from the scope) which kind of
+  entity a name refers to.  For example, the very same name @{text
+  "c"} may be used uniformly for a constant, type constructor, and
+  type class.
+
+  There are common schemes to name theorems systematically, according
+  to the name of the main logical entity involved, e.g.\ @{text
+  "c.intro"} for a canonical theorem related to constant @{text "c"}.
+  This technique of mapping names from one space into another requires
+  some care in order to avoid conflicts.  In particular, theorem names
+  derived from a type constructor or type class are better suffixed in
+  addition to the usual qualification, e.g.\ @{text "c_type.intro"}
+  and @{text "c_class.intro"} for theorems related to type @{text "c"}
+  and class @{text "c"}, respectively.
+*}
+
+text %mlref {*
+  \begin{mldecls}
+  @{index_ML NameSpace.base: "string -> string"} \\
+  @{index_ML NameSpace.qualifier: "string -> string"} \\
+  @{index_ML NameSpace.append: "string -> string -> string"} \\
+  @{index_ML NameSpace.implode: "string list -> string"} \\
+  @{index_ML NameSpace.explode: "string -> string list"} \\
+  \end{mldecls}
+  \begin{mldecls}
+  @{index_ML_type NameSpace.naming} \\
+  @{index_ML NameSpace.default_naming: NameSpace.naming} \\
+  @{index_ML NameSpace.add_path: "string -> NameSpace.naming -> NameSpace.naming"} \\
+  @{index_ML NameSpace.full_name: "NameSpace.naming -> binding -> string"} \\
+  \end{mldecls}
+  \begin{mldecls}
+  @{index_ML_type NameSpace.T} \\
+  @{index_ML NameSpace.empty: NameSpace.T} \\
+  @{index_ML NameSpace.merge: "NameSpace.T * NameSpace.T -> NameSpace.T"} \\
+  @{index_ML NameSpace.declare: "NameSpace.naming -> binding -> NameSpace.T -> string * NameSpace.T"} \\
+  @{index_ML NameSpace.intern: "NameSpace.T -> string -> string"} \\
+  @{index_ML NameSpace.extern: "NameSpace.T -> string -> string"} \\
+  \end{mldecls}
+
+  \begin{description}
+
+  \item @{ML NameSpace.base}~@{text "name"} returns the base name of a
+  qualified name.
+
+  \item @{ML NameSpace.qualifier}~@{text "name"} returns the qualifier
+  of a qualified name.
+
+  \item @{ML NameSpace.append}~@{text "name\<^isub>1 name\<^isub>2"}
+  appends two qualified names.
+
+  \item @{ML NameSpace.implode}~@{text "name"} and @{ML
+  NameSpace.explode}~@{text "names"} convert between the packed string
+  representation and the explicit list form of qualified names.
+
+  \item @{ML_type NameSpace.naming} represents the abstract concept of
+  a naming policy.
+
+  \item @{ML NameSpace.default_naming} is the default naming policy.
+  In a theory context, this is usually augmented by a path prefix
+  consisting of the theory name.
+
+  \item @{ML NameSpace.add_path}~@{text "path naming"} augments the
+  naming policy by extending its path component.
+
+  \item @{ML NameSpace.full_name}@{text "naming binding"} turns a name
+  binding (usually a basic name) into the fully qualified
+  internal name, according to the given naming policy.
+
+  \item @{ML_type NameSpace.T} represents name spaces.
+
+  \item @{ML NameSpace.empty} and @{ML NameSpace.merge}~@{text
+  "(space\<^isub>1, space\<^isub>2)"} are the canonical operations for
+  maintaining name spaces according to theory data management
+  (\secref{sec:context-data}).
+
+  \item @{ML NameSpace.declare}~@{text "naming bindings space"} enters a
+  name binding as fully qualified internal name into the name space,
+  with external accesses determined by the naming policy.
+
+  \item @{ML NameSpace.intern}~@{text "space name"} internalizes a
+  (partially qualified) external name.
+
+  This operation is mostly for parsing!  Note that fully qualified
+  names stemming from declarations are produced via @{ML
+  "NameSpace.full_name"} and @{ML "NameSpace.declare"}
+  (or their derivatives for @{ML_type theory} and
+  @{ML_type Proof.context}).
+
+  \item @{ML NameSpace.extern}~@{text "space name"} externalizes a
+  (fully qualified) internal name.
+
+  This operation is mostly for printing!  Note unqualified names are
+  produced via @{ML NameSpace.base}.
+
+  \end{description}
+*}
+
+end
diff -r ecd6f0ca62ea -r 44832d503659 doc-src/IsarImplementation/Thy/Proof.thy
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc-src/IsarImplementation/Thy/Proof.thy	Thu Mar 05 02:32:46 2009 +0100
@@ -0,0 +1,330 @@
+theory Proof
+imports Base
+begin
+
+chapter {* Structured proofs *}
+
+section {* Variables \label{sec:variables} *}
+
+text {*
+  Any variable that is not explicitly bound by @{text "\<lambda>"}-abstraction
+  is considered as ``free''.  Logically, free variables act like
+  outermost universal quantification at the sequent level: @{text
+  "A\<^isub>1(x), \<dots>, A\<^isub>n(x) \<turnstile> B(x)"} means that the result
+  holds \emph{for all} values of @{text "x"}.  Free variables for
+  terms (not types) can be fully internalized into the logic: @{text
+  "\<turnstile> B(x)"} and @{text "\<turnstile> \<And>x. B(x)"} are interchangeable, provided
+  that @{text "x"} does not occur elsewhere in the context.
+  Inspecting @{text "\<turnstile> \<And>x. B(x)"} more closely, we see that inside the
+  quantifier, @{text "x"} is essentially ``arbitrary, but fixed'',
+  while from outside it appears as a place-holder for instantiation
+  (thanks to @{text "\<And>"} elimination).
+
+  The Pure logic represents the idea of variables being either inside
+  or outside the current scope by providing separate syntactic
+  categories for \emph{fixed variables} (e.g.\ @{text "x"}) vs.\
+  \emph{schematic variables} (e.g.\ @{text "?x"}).  Incidently, a
+  universal result @{text "\<turnstile> \<And>x. B(x)"} has the HHF normal form @{text
+  "\<turnstile> B(?x)"}, which represents its generality nicely without requiring
+  an explicit quantifier.  The same principle works for type
+  variables: @{text "\<turnstile> B(?\<alpha>)"} represents the idea of ``@{text "\<turnstile>
+  \<forall>\<alpha>. B(\<alpha>)"}'' without demanding a truly polymorphic framework.
+
+  \medskip Additional care is required to treat type variables in a
+  way that facilitates type-inference.  In principle, term variables
+  depend on type variables, which means that type variables would have
+  to be declared first.  For example, a raw type-theoretic framework
+  would demand the context to be constructed in stages as follows:
+  @{text "\<Gamma> = \<alpha>: type, x: \<alpha>, a: A(x\<^isub>\<alpha>)"}.
+
+  We allow a slightly less formalistic mode of operation: term
+  variables @{text "x"} are fixed without specifying a type yet
+  (essentially \emph{all} potential occurrences of some instance
+  @{text "x\<^isub>\<tau>"} are fixed); the first occurrence of @{text "x"}
+  within a specific term assigns its most general type, which is then
+  maintained consistently in the context.  The above example becomes
+  @{text "\<Gamma> = x: term, \<alpha>: type, A(x\<^isub>\<alpha>)"}, where type @{text
+  "\<alpha>"} is fixed \emph{after} term @{text "x"}, and the constraint
+  @{text "x :: \<alpha>"} is an implicit consequence of the occurrence of
+  @{text "x\<^isub>\<alpha>"} in the subsequent proposition.
+
+  This twist of dependencies is also accommodated by the reverse
+  operation of exporting results from a context: a type variable
+  @{text "\<alpha>"} is considered fixed as long as it occurs in some fixed
+  term variable of the context.  For example, exporting @{text "x:
+  term, \<alpha>: type \<turnstile> x\<^isub>\<alpha> = x\<^isub>\<alpha>"} produces in the first step
+  @{text "x: term \<turnstile> x\<^isub>\<alpha> = x\<^isub>\<alpha>"} for fixed @{text "\<alpha>"},
+  and only in the second step @{text "\<turnstile> ?x\<^isub>?\<^isub>\<alpha> =
+  ?x\<^isub>?\<^isub>\<alpha>"} for schematic @{text "?x"} and @{text "?\<alpha>"}.
+
+  \medskip The Isabelle/Isar proof context manages the gory details of
+  term vs.\ type variables, with high-level principles for moving the
+  frontier between fixed and schematic variables.
+
+  The @{text "add_fixes"} operation explictly declares fixed
+  variables; the @{text "declare_term"} operation absorbs a term into
+  a context by fixing new type variables and adding syntactic
+  constraints.
+
+  The @{text "export"} operation is able to perform the main work of
+  generalizing term and type variables as sketched above, assuming
+  that fixing variables and terms have been declared properly.
+
+  There @{text "import"} operation makes a generalized fact a genuine
+  part of the context, by inventing fixed variables for the schematic
+  ones.  The effect can be reversed by using @{text "export"} later,
+  potentially with an extended context; the result is equivalent to
+  the original modulo renaming of schematic variables.
+
+  The @{text "focus"} operation provides a variant of @{text "import"}
+  for nested propositions (with explicit quantification): @{text
+  "\<And>x\<^isub>1 \<dots> x\<^isub>n. B(x\<^isub>1, \<dots>, x\<^isub>n)"} is
+  decomposed by inventing fixed variables @{text "x\<^isub>1, \<dots>,
+  x\<^isub>n"} for the body.
+*}
+
+text %mlref {*
+  \begin{mldecls}
+  @{index_ML Variable.add_fixes: "
+  string list -> Proof.context -> string list * Proof.context"} \\
+  @{index_ML Variable.variant_fixes: "
+  string list -> Proof.context -> string list * Proof.context"} \\
+  @{index_ML Variable.declare_term: "term -> Proof.context -> Proof.context"} \\
+  @{index_ML Variable.declare_constraints: "term -> Proof.context -> Proof.context"} \\
+  @{index_ML Variable.export: "Proof.context -> Proof.context -> thm list -> thm list"} \\
+  @{index_ML Variable.polymorphic: "Proof.context -> term list -> term list"} \\
+  @{index_ML Variable.import_thms: "bool -> thm list -> Proof.context ->
+  ((ctyp list * cterm list) * thm list) * Proof.context"} \\
+  @{index_ML Variable.focus: "cterm -> Proof.context -> (cterm list * cterm) * Proof.context"} \\
+  \end{mldecls}
+
+  \begin{description}
+
+  \item @{ML Variable.add_fixes}~@{text "xs ctxt"} fixes term
+  variables @{text "xs"}, returning the resulting internal names.  By
+  default, the internal representation coincides with the external
+  one, which also means that the given variables must not be fixed
+  already.  There is a different policy within a local proof body: the
+  given names are just hints for newly invented Skolem variables.
+
+  \item @{ML Variable.variant_fixes} is similar to @{ML
+  Variable.add_fixes}, but always produces fresh variants of the given
+  names.
+
+  \item @{ML Variable.declare_term}~@{text "t ctxt"} declares term
+  @{text "t"} to belong to the context.  This automatically fixes new
+  type variables, but not term variables.  Syntactic constraints for
+  type and term variables are declared uniformly, though.
+
+  \item @{ML Variable.declare_constraints}~@{text "t ctxt"} declares
+  syntactic constraints from term @{text "t"}, without making it part
+  of the context yet.
+
+  \item @{ML Variable.export}~@{text "inner outer thms"} generalizes
+  fixed type and term variables in @{text "thms"} according to the
+  difference of the @{text "inner"} and @{text "outer"} context,
+  following the principles sketched above.
+
+  \item @{ML Variable.polymorphic}~@{text "ctxt ts"} generalizes type
+  variables in @{text "ts"} as far as possible, even those occurring
+  in fixed term variables.  The default policy of type-inference is to
+  fix newly introduced type variables, which is essentially reversed
+  with @{ML Variable.polymorphic}: here the given terms are detached
+  from the context as far as possible.
+
+  \item @{ML Variable.import_thms}~@{text "open thms ctxt"} invents fixed
+  type and term variables for the schematic ones occurring in @{text
+  "thms"}.  The @{text "open"} flag indicates whether the fixed names
+  should be accessible to the user, otherwise newly introduced names
+  are marked as ``internal'' (\secref{sec:names}).
+
+  \item @{ML Variable.focus}~@{text B} decomposes the outermost @{text
+  "\<And>"} prefix of proposition @{text "B"}.
+
+  \end{description}
+*}
+
+
+section {* Assumptions \label{sec:assumptions} *}
+
+text {*
+  An \emph{assumption} is a proposition that it is postulated in the
+  current context.  Local conclusions may use assumptions as
+  additional facts, but this imposes implicit hypotheses that weaken
+  the overall statement.
+
+  Assumptions are restricted to fixed non-schematic statements, i.e.\
+  all generality needs to be expressed by explicit quantifiers.
+  Nevertheless, the result will be in HHF normal form with outermost
+  quantifiers stripped.  For example, by assuming @{text "\<And>x :: \<alpha>. P
+  x"} we get @{text "\<And>x :: \<alpha>. P x \<turnstile> P ?x"} for schematic @{text "?x"}
+  of fixed type @{text "\<alpha>"}.  Local derivations accumulate more and
+  more explicit references to hypotheses: @{text "A\<^isub>1, \<dots>,
+  A\<^isub>n \<turnstile> B"} where @{text "A\<^isub>1, \<dots>, A\<^isub>n"} needs to
+  be covered by the assumptions of the current context.
+
+  \medskip The @{text "add_assms"} operation augments the context by
+  local assumptions, which are parameterized by an arbitrary @{text
+  "export"} rule (see below).
+
+  The @{text "export"} operation moves facts from a (larger) inner
+  context into a (smaller) outer context, by discharging the
+  difference of the assumptions as specified by the associated export
+  rules.  Note that the discharged portion is determined by the
+  difference contexts, not the facts being exported!  There is a
+  separate flag to indicate a goal context, where the result is meant
+  to refine an enclosing sub-goal of a structured proof state.
+
+  \medskip The most basic export rule discharges assumptions directly
+  by means of the @{text "\<Longrightarrow>"} introduction rule:
+  \[
+  \infer[(@{text "\<Longrightarrow>_intro"})]{@{text "\<Gamma> \\ A \<turnstile> A \<Longrightarrow> B"}}{@{text "\<Gamma> \<turnstile> B"}}
+  \]
+
+  The variant for goal refinements marks the newly introduced
+  premises, which causes the canonical Isar goal refinement scheme to
+  enforce unification with local premises within the goal:
+  \[
+  \infer[(@{text "#\<Longrightarrow>_intro"})]{@{text "\<Gamma> \\ A \<turnstile> #A \<Longrightarrow> B"}}{@{text "\<Gamma> \<turnstile> B"}}
+  \]
+
+  \medskip Alternative versions of assumptions may perform arbitrary
+  transformations on export, as long as the corresponding portion of
+  hypotheses is removed from the given facts.  For example, a local
+  definition works by fixing @{text "x"} and assuming @{text "x \<equiv> t"},
+  with the following export rule to reverse the effect:
+  \[
+  \infer[(@{text "\<equiv>-expand"})]{@{text "\<Gamma> \\ x \<equiv> t \<turnstile> B t"}}{@{text "\<Gamma> \<turnstile> B x"}}
+  \]
+  This works, because the assumption @{text "x \<equiv> t"} was introduced in
+  a context with @{text "x"} being fresh, so @{text "x"} does not
+  occur in @{text "\<Gamma>"} here.
+*}
+
+text %mlref {*
+  \begin{mldecls}
+  @{index_ML_type Assumption.export} \\
+  @{index_ML Assumption.assume: "cterm -> thm"} \\
+  @{index_ML Assumption.add_assms:
+    "Assumption.export ->
+  cterm list -> Proof.context -> thm list * Proof.context"} \\
+  @{index_ML Assumption.add_assumes: "
+  cterm list -> Proof.context -> thm list * Proof.context"} \\
+  @{index_ML Assumption.export: "bool -> Proof.context -> Proof.context -> thm -> thm"} \\
+  \end{mldecls}
+
+  \begin{description}
+
+  \item @{ML_type Assumption.export} represents arbitrary export
+  rules, which is any function of type @{ML_type "bool -> cterm list -> thm -> thm"},
+  where the @{ML_type "bool"} indicates goal mode, and the @{ML_type
+  "cterm list"} the collection of assumptions to be discharged
+  simultaneously.
+
+  \item @{ML Assumption.assume}~@{text "A"} turns proposition @{text
+  "A"} into a raw assumption @{text "A \<turnstile> A'"}, where the conclusion
+  @{text "A'"} is in HHF normal form.
+
+  \item @{ML Assumption.add_assms}~@{text "r As"} augments the context
+  by assumptions @{text "As"} with export rule @{text "r"}.  The
+  resulting facts are hypothetical theorems as produced by the raw
+  @{ML Assumption.assume}.
+
+  \item @{ML Assumption.add_assumes}~@{text "As"} is a special case of
+  @{ML Assumption.add_assms} where the export rule performs @{text
+  "\<Longrightarrow>_intro"} or @{text "#\<Longrightarrow>_intro"}, depending on goal mode.
+
+  \item @{ML Assumption.export}~@{text "is_goal inner outer thm"}
+  exports result @{text "thm"} from the the @{text "inner"} context
+  back into the @{text "outer"} one; @{text "is_goal = true"} means
+  this is a goal context.  The result is in HHF normal form.  Note
+  that @{ML "ProofContext.export"} combines @{ML "Variable.export"}
+  and @{ML "Assumption.export"} in the canonical way.
+
+  \end{description}
+*}
+
+
+section {* Results \label{sec:results} *}
+
+text {*
+  Local results are established by monotonic reasoning from facts
+  within a context.  This allows common combinations of theorems,
+  e.g.\ via @{text "\<And>/\<Longrightarrow>"} elimination, resolution rules, or equational
+  reasoning, see \secref{sec:thms}.  Unaccounted context manipulations
+  should be avoided, notably raw @{text "\<And>/\<Longrightarrow>"} introduction or ad-hoc
+  references to free variables or assumptions not present in the proof
+  context.
+
+  \medskip The @{text "SUBPROOF"} combinator allows to structure a
+  tactical proof recursively by decomposing a selected sub-goal:
+  @{text "(\<And>x. A(x) \<Longrightarrow> B(x)) \<Longrightarrow> \<dots>"} is turned into @{text "B(x) \<Longrightarrow> \<dots>"}
+  after fixing @{text "x"} and assuming @{text "A(x)"}.  This means
+  the tactic needs to solve the conclusion, but may use the premise as
+  a local fact, for locally fixed variables.
+
+  The @{text "prove"} operation provides an interface for structured
+  backwards reasoning under program control, with some explicit sanity
+  checks of the result.  The goal context can be augmented by
+  additional fixed variables (cf.\ \secref{sec:variables}) and
+  assumptions (cf.\ \secref{sec:assumptions}), which will be available
+  as local facts during the proof and discharged into implications in
+  the result.  Type and term variables are generalized as usual,
+  according to the context.
+
+  The @{text "obtain"} operation produces results by eliminating
+  existing facts by means of a given tactic.  This acts like a dual
+  conclusion: the proof demonstrates that the context may be augmented
+  by certain fixed variables and assumptions.  See also
+  \cite{isabelle-isar-ref} for the user-level @{text "\<OBTAIN>"} and
+  @{text "\<GUESS>"} elements.  Final results, which may not refer to
+  the parameters in the conclusion, need to exported explicitly into
+  the original context.
+*}
+
+text %mlref {*
+  \begin{mldecls}
+  @{index_ML SUBPROOF:
+  "({context: Proof.context, schematics: ctyp list * cterm list,
+    params: cterm list, asms: cterm list, concl: cterm,
+    prems: thm list} -> tactic) -> Proof.context -> int -> tactic"} \\
+  \end{mldecls}
+  \begin{mldecls}
+  @{index_ML Goal.prove: "Proof.context -> string list -> term list -> term ->
+  ({prems: thm list, context: Proof.context} -> tactic) -> thm"} \\
+  @{index_ML Goal.prove_multi: "Proof.context -> string list -> term list -> term list ->
+  ({prems: thm list, context: Proof.context} -> tactic) -> thm list"} \\
+  \end{mldecls}
+  \begin{mldecls}
+  @{index_ML Obtain.result: "(Proof.context -> tactic) ->
+  thm list -> Proof.context -> (cterm list * thm list) * Proof.context"} \\
+  \end{mldecls}
+
+  \begin{description}
+
+  \item @{ML SUBPROOF}~@{text "tac ctxt i"} decomposes the structure
+  of the specified sub-goal, producing an extended context and a
+  reduced goal, which needs to be solved by the given tactic.  All
+  schematic parameters of the goal are imported into the context as
+  fixed ones, which may not be instantiated in the sub-proof.
+
+  \item @{ML Goal.prove}~@{text "ctxt xs As C tac"} states goal @{text
+  "C"} in the context augmented by fixed variables @{text "xs"} and
+  assumptions @{text "As"}, and applies tactic @{text "tac"} to solve
+  it.  The latter may depend on the local assumptions being presented
+  as facts.  The result is in HHF normal form.
+
+  \item @{ML Goal.prove_multi} is simular to @{ML Goal.prove}, but
+  states several conclusions simultaneously.  The goal is encoded by
+  means of Pure conjunction; @{ML Goal.conjunction_tac} will turn this
+  into a collection of individual subgoals.
+
+  \item @{ML Obtain.result}~@{text "tac thms ctxt"} eliminates the
+  given facts using a tactic, which results in additional fixed
+  variables and assumptions in the context.  Final results need to be
+  exported explicitly.
+
+  \end{description}
+*}
+
+end
diff -r ecd6f0ca62ea -r 44832d503659 doc-src/IsarImplementation/Thy/Syntax.thy
diff -r ecd6f0ca62ea -r 44832d503659 doc-src/IsarImplementation/Thy/Tactic.thy
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc-src/IsarImplementation/Thy/Tactic.thy	Thu Mar 05 02:32:46 2009 +0100
@@ -0,0 +1,405 @@
+theory Tactic
+imports Base
+begin
+
+chapter {* Tactical reasoning *}
+
+text {*
+  Tactical reasoning works by refining the initial claim in a
+  backwards fashion, until a solved form is reached.  A @{text "goal"}
+  consists of several subgoals that need to be solved in order to
+  achieve the main statement; zero subgoals means that the proof may
+  be finished.  A @{text "tactic"} is a refinement operation that maps
+  a goal to a lazy sequence of potential successors.  A @{text
+  "tactical"} is a combinator for composing tactics.
+*}
+
+
+section {* Goals \label{sec:tactical-goals} *}
+
+text {*
+  Isabelle/Pure represents a goal as a theorem stating that the
+  subgoals imply the main goal: @{text "A\<^sub>1 \<Longrightarrow> \<dots> \<Longrightarrow> A\<^sub>n \<Longrightarrow>
+  C"}.  The outermost goal structure is that of a Horn Clause: i.e.\
+  an iterated implication without any quantifiers\footnote{Recall that
+  outermost @{text "\<And>x. \<phi>[x]"} is always represented via schematic
+  variables in the body: @{text "\<phi>[?x]"}.  These variables may get
+  instantiated during the course of reasoning.}.  For @{text "n = 0"}
+  a goal is called ``solved''.
+
+  The structure of each subgoal @{text "A\<^sub>i"} is that of a
+  general Hereditary Harrop Formula @{text "\<And>x\<^sub>1 \<dots>
+  \<And>x\<^sub>k. H\<^sub>1 \<Longrightarrow> \<dots> \<Longrightarrow> H\<^sub>m \<Longrightarrow> B"}.  Here @{text
+  "x\<^sub>1, \<dots>, x\<^sub>k"} are goal parameters, i.e.\
+  arbitrary-but-fixed entities of certain types, and @{text
+  "H\<^sub>1, \<dots>, H\<^sub>m"} are goal hypotheses, i.e.\ facts that may
+  be assumed locally.  Together, this forms the goal context of the
+  conclusion @{text B} to be established.  The goal hypotheses may be
+  again arbitrary Hereditary Harrop Formulas, although the level of
+  nesting rarely exceeds 1--2 in practice.
+
+  The main conclusion @{text C} is internally marked as a protected
+  proposition, which is represented explicitly by the notation @{text
+  "#C"}.  This ensures that the decomposition into subgoals and main
+  conclusion is well-defined for arbitrarily structured claims.
+
+  \medskip Basic goal management is performed via the following
+  Isabelle/Pure rules:
+
+  \[
+  \infer[@{text "(init)"}]{@{text "C \<Longrightarrow> #C"}}{} \qquad
+  \infer[@{text "(finish)"}]{@{text "C"}}{@{text "#C"}}
+  \]
+
+  \medskip The following low-level variants admit general reasoning
+  with protected propositions:
+
+  \[
+  \infer[@{text "(protect)"}]{@{text "#C"}}{@{text "C"}} \qquad
+  \infer[@{text "(conclude)"}]{@{text "A\<^sub>1 \<Longrightarrow> \<dots> \<Longrightarrow> A\<^sub>n \<Longrightarrow> C"}}{@{text "A\<^sub>1 \<Longrightarrow> \<dots> \<Longrightarrow> A\<^sub>n \<Longrightarrow> #C"}}
+  \]
+*}
+
+text %mlref {*
+  \begin{mldecls}
+  @{index_ML Goal.init: "cterm -> thm"} \\
+  @{index_ML Goal.finish: "thm -> thm"} \\
+  @{index_ML Goal.protect: "thm -> thm"} \\
+  @{index_ML Goal.conclude: "thm -> thm"} \\
+  \end{mldecls}
+
+  \begin{description}
+
+  \item @{ML "Goal.init"}~@{text C} initializes a tactical goal from
+  the well-formed proposition @{text C}.
+
+  \item @{ML "Goal.finish"}~@{text "thm"} checks whether theorem
+  @{text "thm"} is a solved goal (no subgoals), and concludes the
+  result by removing the goal protection.
+
+  \item @{ML "Goal.protect"}~@{text "thm"} protects the full statement
+  of theorem @{text "thm"}.
+
+  \item @{ML "Goal.conclude"}~@{text "thm"} removes the goal
+  protection, even if there are pending subgoals.
+
+  \end{description}
+*}
+
+
+section {* Tactics *}
+
+text {* A @{text "tactic"} is a function @{text "goal \<rightarrow> goal\<^sup>*\<^sup>*"} that
+  maps a given goal state (represented as a theorem, cf.\
+  \secref{sec:tactical-goals}) to a lazy sequence of potential
+  successor states.  The underlying sequence implementation is lazy
+  both in head and tail, and is purely functional in \emph{not}
+  supporting memoing.\footnote{The lack of memoing and the strict
+  nature of SML requires some care when working with low-level
+  sequence operations, to avoid duplicate or premature evaluation of
+  results.}
+
+  An \emph{empty result sequence} means that the tactic has failed: in
+  a compound tactic expressions other tactics might be tried instead,
+  or the whole refinement step might fail outright, producing a
+  toplevel error message.  When implementing tactics from scratch, one
+  should take care to observe the basic protocol of mapping regular
+  error conditions to an empty result; only serious faults should
+  emerge as exceptions.
+
+  By enumerating \emph{multiple results}, a tactic can easily express
+  the potential outcome of an internal search process.  There are also
+  combinators for building proof tools that involve search
+  systematically, see also \secref{sec:tacticals}.
+
+  \medskip As explained in \secref{sec:tactical-goals}, a goal state
+  essentially consists of a list of subgoals that imply the main goal
+  (conclusion).  Tactics may operate on all subgoals or on a
+  particularly specified subgoal, but must not change the main
+  conclusion (apart from instantiating schematic goal variables).
+
+  Tactics with explicit \emph{subgoal addressing} are of the form
+  @{text "int \<rightarrow> tactic"} and may be applied to a particular subgoal
+  (counting from 1).  If the subgoal number is out of range, the
+  tactic should fail with an empty result sequence, but must not raise
+  an exception!
+
+  Operating on a particular subgoal means to replace it by an interval
+  of zero or more subgoals in the same place; other subgoals must not
+  be affected, apart from instantiating schematic variables ranging
+  over the whole goal state.
+
+  A common pattern of composing tactics with subgoal addressing is to
+  try the first one, and then the second one only if the subgoal has
+  not been solved yet.  Special care is required here to avoid bumping
+  into unrelated subgoals that happen to come after the original
+  subgoal.  Assuming that there is only a single initial subgoal is a
+  very common error when implementing tactics!
+
+  Tactics with internal subgoal addressing should expose the subgoal
+  index as @{text "int"} argument in full generality; a hardwired
+  subgoal 1 inappropriate.
+  
+  \medskip The main well-formedness conditions for proper tactics are
+  summarized as follows.
+
+  \begin{itemize}
+
+  \item General tactic failure is indicated by an empty result, only
+  serious faults may produce an exception.
+
+  \item The main conclusion must not be changed, apart from
+  instantiating schematic variables.
+
+  \item A tactic operates either uniformly on all subgoals, or
+  specifically on a selected subgoal (without bumping into unrelated
+  subgoals).
+
+  \item Range errors in subgoal addressing produce an empty result.
+
+  \end{itemize}
+
+  Some of these conditions are checked by higher-level goal
+  infrastructure (\secref{sec:results}); others are not checked
+  explicitly, and violating them merely results in ill-behaved tactics
+  experienced by the user (e.g.\ tactics that insist in being
+  applicable only to singleton goals, or disallow composition with
+  basic tacticals).
+*}
+
+text %mlref {*
+  \begin{mldecls}
+  @{index_ML_type tactic: "thm -> thm Seq.seq"} \\
+  @{index_ML no_tac: tactic} \\
+  @{index_ML all_tac: tactic} \\
+  @{index_ML print_tac: "string -> tactic"} \\[1ex]
+  @{index_ML PRIMITIVE: "(thm -> thm) -> tactic"} \\[1ex]
+  @{index_ML SUBGOAL: "(term * int -> tactic) -> int -> tactic"} \\
+  @{index_ML CSUBGOAL: "(cterm * int -> tactic) -> int -> tactic"} \\
+  \end{mldecls}
+
+  \begin{description}
+
+  \item @{ML_type tactic} represents tactics.  The well-formedness
+  conditions described above need to be observed.  See also @{"file"
+  "~~/src/Pure/General/seq.ML"} for the underlying implementation of
+  lazy sequences.
+
+  \item @{ML_type "int -> tactic"} represents tactics with explicit
+  subgoal addressing, with well-formedness conditions as described
+  above.
+
+  \item @{ML no_tac} is a tactic that always fails, returning the
+  empty sequence.
+
+  \item @{ML all_tac} is a tactic that always succeeds, returning a
+  singleton sequence with unchanged goal state.
+
+  \item @{ML print_tac}~@{text "message"} is like @{ML all_tac}, but
+  prints a message together with the goal state on the tracing
+  channel.
+
+  \item @{ML PRIMITIVE}~@{text rule} turns a primitive inference rule
+  into a tactic with unique result.  Exception @{ML THM} is considered
+  a regular tactic failure and produces an empty result; other
+  exceptions are passed through.
+
+  \item @{ML SUBGOAL}~@{text "(fn (subgoal, i) => tactic)"} is the
+  most basic form to produce a tactic with subgoal addressing.  The
+  given abstraction over the subgoal term and subgoal number allows to
+  peek at the relevant information of the full goal state.  The
+  subgoal range is checked as required above.
+
+  \item @{ML CSUBGOAL} is similar to @{ML SUBGOAL}, but passes the
+  subgoal as @{ML_type cterm} instead of raw @{ML_type term}.  This
+  avoids expensive re-certification in situations where the subgoal is
+  used directly for primitive inferences.
+
+  \end{description}
+*}
+
+
+subsection {* Resolution and assumption tactics \label{sec:resolve-assume-tac} *}
+
+text {* \emph{Resolution} is the most basic mechanism for refining a
+  subgoal using a theorem as object-level rule.
+  \emph{Elim-resolution} is particularly suited for elimination rules:
+  it resolves with a rule, proves its first premise by assumption, and
+  finally deletes that assumption from any new subgoals.
+  \emph{Destruct-resolution} is like elim-resolution, but the given
+  destruction rules are first turned into canonical elimination
+  format.  \emph{Forward-resolution} is like destruct-resolution, but
+  without deleting the selected assumption.  The @{text "r/e/d/f"}
+  naming convention is maintained for several different kinds of
+  resolution rules and tactics.
+
+  Assumption tactics close a subgoal by unifying some of its premises
+  against its conclusion.
+
+  \medskip All the tactics in this section operate on a subgoal
+  designated by a positive integer.  Other subgoals might be affected
+  indirectly, due to instantiation of schematic variables.
+
+  There are various sources of non-determinism, the tactic result
+  sequence enumerates all possibilities of the following choices (if
+  applicable):
+
+  \begin{enumerate}
+
+  \item selecting one of the rules given as argument to the tactic;
+
+  \item selecting a subgoal premise to eliminate, unifying it against
+  the first premise of the rule;
+
+  \item unifying the conclusion of the subgoal to the conclusion of
+  the rule.
+
+  \end{enumerate}
+
+  Recall that higher-order unification may produce multiple results
+  that are enumerated here.
+*}
+
+text %mlref {*
+  \begin{mldecls}
+  @{index_ML resolve_tac: "thm list -> int -> tactic"} \\
+  @{index_ML eresolve_tac: "thm list -> int -> tactic"} \\
+  @{index_ML dresolve_tac: "thm list -> int -> tactic"} \\
+  @{index_ML forward_tac: "thm list -> int -> tactic"} \\[1ex]
+  @{index_ML assume_tac: "int -> tactic"} \\
+  @{index_ML eq_assume_tac: "int -> tactic"} \\[1ex]
+  @{index_ML match_tac: "thm list -> int -> tactic"} \\
+  @{index_ML ematch_tac: "thm list -> int -> tactic"} \\
+  @{index_ML dmatch_tac: "thm list -> int -> tactic"} \\
+  \end{mldecls}
+
+  \begin{description}
+
+  \item @{ML resolve_tac}~@{text "thms i"} refines the goal state
+  using the given theorems, which should normally be introduction
+  rules.  The tactic resolves a rule's conclusion with subgoal @{text
+  i}, replacing it by the corresponding versions of the rule's
+  premises.
+
+  \item @{ML eresolve_tac}~@{text "thms i"} performs elim-resolution
+  with the given theorems, which should normally be elimination rules.
+
+  \item @{ML dresolve_tac}~@{text "thms i"} performs
+  destruct-resolution with the given theorems, which should normally
+  be destruction rules.  This replaces an assumption by the result of
+  applying one of the rules.
+
+  \item @{ML forward_tac} is like @{ML dresolve_tac} except that the
+  selected assumption is not deleted.  It applies a rule to an
+  assumption, adding the result as a new assumption.
+
+  \item @{ML assume_tac}~@{text i} attempts to solve subgoal @{text i}
+  by assumption (modulo higher-order unification).
+
+  \item @{ML eq_assume_tac} is similar to @{ML assume_tac}, but checks
+  only for immediate @{text "\<alpha>"}-convertibility instead of using
+  unification.  It succeeds (with a unique next state) if one of the
+  assumptions is equal to the subgoal's conclusion.  Since it does not
+  instantiate variables, it cannot make other subgoals unprovable.
+
+  \item @{ML match_tac}, @{ML ematch_tac}, and @{ML dmatch_tac} are
+  similar to @{ML resolve_tac}, @{ML eresolve_tac}, and @{ML
+  dresolve_tac}, respectively, but do not instantiate schematic
+  variables in the goal state.
+
+  Flexible subgoals are not updated at will, but are left alone.
+  Strictly speaking, matching means to treat the unknowns in the goal
+  state as constants; these tactics merely discard unifiers that would
+  update the goal state.
+
+  \end{description}
+*}
+
+
+subsection {* Explicit instantiation within a subgoal context *}
+
+text {* The main resolution tactics (\secref{sec:resolve-assume-tac})
+  use higher-order unification, which works well in many practical
+  situations despite its daunting theoretical properties.
+  Nonetheless, there are important problem classes where unguided
+  higher-order unification is not so useful.  This typically involves
+  rules like universal elimination, existential introduction, or
+  equational substitution.  Here the unification problem involves
+  fully flexible @{text "?P ?x"} schemes, which are hard to manage
+  without further hints.
+
+  By providing a (small) rigid term for @{text "?x"} explicitly, the
+  remaining unification problem is to assign a (large) term to @{text
+  "?P"}, according to the shape of the given subgoal.  This is
+  sufficiently well-behaved in most practical situations.
+
+  \medskip Isabelle provides separate versions of the standard @{text
+  "r/e/d/f"} resolution tactics that allow to provide explicit
+  instantiations of unknowns of the given rule, wrt.\ terms that refer
+  to the implicit context of the selected subgoal.
+
+  An instantiation consists of a list of pairs of the form @{text
+  "(?x, t)"}, where @{text ?x} is a schematic variable occurring in
+  the given rule, and @{text t} is a term from the current proof
+  context, augmented by the local goal parameters of the selected
+  subgoal; cf.\ the @{text "focus"} operation described in
+  \secref{sec:variables}.
+
+  Entering the syntactic context of a subgoal is a brittle operation,
+  because its exact form is somewhat accidental, and the choice of
+  bound variable names depends on the presence of other local and
+  global names.  Explicit renaming of subgoal parameters prior to
+  explicit instantiation might help to achieve a bit more robustness.
+
+  Type instantiations may be given as well, via pairs like @{text
+  "(?'a, \<tau>)"}.  Type instantiations are distinguished from term
+  instantiations by the syntactic form of the schematic variable.
+  Types are instantiated before terms are.  Since term instantiation
+  already performs type-inference as expected, explicit type
+  instantiations are seldom necessary.
+*}
+
+text %mlref {*
+  \begin{mldecls}
+  @{index_ML res_inst_tac: "Proof.context -> (indexname * string) list -> thm -> int -> tactic"} \\
+  @{index_ML eres_inst_tac: "Proof.context -> (indexname * string) list -> thm -> int -> tactic"} \\
+  @{index_ML dres_inst_tac: "Proof.context -> (indexname * string) list -> thm -> int -> tactic"} \\
+  @{index_ML forw_inst_tac: "Proof.context -> (indexname * string) list -> thm -> int -> tactic"} \\[1ex]
+  @{index_ML rename_tac: "string list -> int -> tactic"} \\
+  \end{mldecls}
+
+  \begin{description}
+
+  \item @{ML res_inst_tac}~@{text "ctxt insts thm i"} instantiates the
+  rule @{text thm} with the instantiations @{text insts}, as described
+  above, and then performs resolution on subgoal @{text i}.
+  
+  \item @{ML eres_inst_tac} is like @{ML res_inst_tac}, but performs
+  elim-resolution.
+
+  \item @{ML dres_inst_tac} is like @{ML res_inst_tac}, but performs
+  destruct-resolution.
+
+  \item @{ML forw_inst_tac} is like @{ML dres_inst_tac} except that
+  the selected assumption is not deleted.
+
+  \item @{ML rename_tac}~@{text "names i"} renames the innermost
+  parameters of subgoal @{text i} according to the provided @{text
+  names} (which need to be distinct indentifiers).
+
+  \end{description}
+*}
+
+
+section {* Tacticals \label{sec:tacticals} *}
+
+text {*
+  A \emph{tactical} is a functional combinator for building up complex
+  tactics from simpler ones.  Typical tactical perform sequential
+  composition, disjunction (choice), iteration, or goal addressing.
+  Various search strategies may be expressed via tacticals.
+
+  \medskip FIXME
+*}
+
+end
diff -r ecd6f0ca62ea -r 44832d503659 doc-src/IsarImplementation/Thy/document/ML.tex
--- a/doc-src/IsarImplementation/Thy/document/ML.tex	Wed Mar 04 17:12:23 2009 -0800
+++ b/doc-src/IsarImplementation/Thy/document/ML.tex	Thu Mar 05 02:32:46 2009 +0100
@@ -785,7 +785,6 @@
 \isadelimtheory
 %
 \endisadelimtheory
-\isanewline
 \end{isabellebody}%
 %%% Local Variables:
 %%% mode: latex
diff -r ecd6f0ca62ea -r 44832d503659 doc-src/IsarRef/Thy/document/Generic.tex
--- a/doc-src/IsarRef/Thy/document/Generic.tex	Wed Mar 04 17:12:23 2009 -0800
+++ b/doc-src/IsarRef/Thy/document/Generic.tex	Thu Mar 05 02:32:46 2009 +0100
@@ -503,7 +503,7 @@
   \item \hyperlink{command.simproc-setup}{\mbox{\isa{\isacommand{simproc{\isacharunderscore}setup}}}} defines a named simplification
   procedure that is invoked by the Simplifier whenever any of the
   given term patterns match the current redex.  The implementation,
-  which is provided as ML source text, needs to be of type \verb|"morphism -> simpset -> cterm -> thm option"|, where the \verb|cterm| represents the current redex \isa{r} and the result is
+  which is provided as ML source text, needs to be of type \verb|morphism -> simpset -> cterm -> thm option|, where the \verb|cterm| represents the current redex \isa{r} and the result is
   supposed to be some proven rewrite rule \isa{{\isachardoublequote}r\ {\isasymequiv}\ r{\isacharprime}{\isachardoublequote}} (or a
   generalized version), or \verb|NONE| to indicate failure.  The
   \verb|simpset| argument holds the full context of the current