# HG changeset patch # User wenzelm # Date 1236216766 -3600 # Node ID 44832d5036598dc35d4caa5f893daade32956cc5 # Parent ecd6f0ca62ea58dfb517e89fa9c8aed6f820f30a# Parent 2d612824e642bff09a1b07231a2668e2a21ef1d4 merged diff -r ecd6f0ca62ea -r 44832d503659 doc-src/IsarImplementation/Thy/Base.thy --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc-src/IsarImplementation/Thy/Base.thy Thu Mar 05 02:32:46 2009 +0100 @@ -0,0 +1,6 @@ +theory Base +imports Pure +uses "../../antiquote_setup.ML" +begin + +end diff -r ecd6f0ca62ea -r 44832d503659 doc-src/IsarImplementation/Thy/Integration.thy --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc-src/IsarImplementation/Thy/Integration.thy Thu Mar 05 02:32:46 2009 +0100 @@ -0,0 +1,425 @@ +theory Integration +imports Base +begin + +chapter {* System integration *} + +section {* Isar toplevel \label{sec:isar-toplevel} *} + +text {* The Isar toplevel may be considered the centeral hub of the + Isabelle/Isar system, where all key components and sub-systems are + integrated into a single read-eval-print loop of Isar commands. We + shall even incorporate the existing {\ML} toplevel of the compiler + and run-time system (cf.\ \secref{sec:ML-toplevel}). + + Isabelle/Isar departs from the original ``LCF system architecture'' + where {\ML} was really The Meta Language for defining theories and + conducting proofs. Instead, {\ML} now only serves as the + implementation language for the system (and user extensions), while + the specific Isar toplevel supports the concepts of theory and proof + development natively. This includes the graph structure of theories + and the block structure of proofs, support for unlimited undo, + facilities for tracing, debugging, timing, profiling etc. + + \medskip The toplevel maintains an implicit state, which is + transformed by a sequence of transitions -- either interactively or + in batch-mode. In interactive mode, Isar state transitions are + encapsulated as safe transactions, such that both failure and undo + are handled conveniently without destroying the underlying draft + theory (cf.~\secref{sec:context-theory}). In batch mode, + transitions operate in a linear (destructive) fashion, such that + error conditions abort the present attempt to construct a theory or + proof altogether. + + The toplevel state is a disjoint sum of empty @{text toplevel}, or + @{text theory}, or @{text proof}. On entering the main Isar loop we + start with an empty toplevel. A theory is commenced by giving a + @{text \} header; within a theory we may issue theory + commands such as @{text \}, or state a @{text + \} to be proven. Now we are within a proof state, with a + rich collection of Isar proof commands for structured proof + composition, or unstructured proof scripts. When the proof is + concluded we get back to the theory, which is then updated by + storing the resulting fact. Further theory declarations or theorem + statements with proofs may follow, until we eventually conclude the + theory development by issuing @{text \}. The resulting theory + is then stored within the theory database and we are back to the + empty toplevel. + + In addition to these proper state transformations, there are also + some diagnostic commands for peeking at the toplevel state without + modifying it (e.g.\ \isakeyword{thm}, \isakeyword{term}, + \isakeyword{print-cases}). +*} + +text %mlref {* + \begin{mldecls} + @{index_ML_type Toplevel.state} \\ + @{index_ML Toplevel.UNDEF: "exn"} \\ + @{index_ML Toplevel.is_toplevel: "Toplevel.state -> bool"} \\ + @{index_ML Toplevel.theory_of: "Toplevel.state -> theory"} \\ + @{index_ML Toplevel.proof_of: "Toplevel.state -> Proof.state"} \\ + @{index_ML Toplevel.debug: "bool ref"} \\ + @{index_ML Toplevel.timing: "bool ref"} \\ + @{index_ML Toplevel.profiling: "int ref"} \\ + \end{mldecls} + + \begin{description} + + \item @{ML_type Toplevel.state} represents Isar toplevel states, + which are normally manipulated through the concept of toplevel + transitions only (\secref{sec:toplevel-transition}). Also note that + a raw toplevel state is subject to the same linearity restrictions + as a theory context (cf.~\secref{sec:context-theory}). + + \item @{ML Toplevel.UNDEF} is raised for undefined toplevel + operations. Many operations work only partially for certain cases, + since @{ML_type Toplevel.state} is a sum type. + + \item @{ML Toplevel.is_toplevel}~@{text "state"} checks for an empty + toplevel state. + + \item @{ML Toplevel.theory_of}~@{text "state"} selects the theory of + a theory or proof (!), otherwise raises @{ML Toplevel.UNDEF}. + + \item @{ML Toplevel.proof_of}~@{text "state"} selects the Isar proof + state if available, otherwise raises @{ML Toplevel.UNDEF}. + + \item @{ML "set Toplevel.debug"} makes the toplevel print further + details about internal error conditions, exceptions being raised + etc. + + \item @{ML "set Toplevel.timing"} makes the toplevel print timing + information for each Isar command being executed. + + \item @{ML Toplevel.profiling}~@{verbatim ":="}~@{text "n"} controls + low-level profiling of the underlying {\ML} runtime system. For + Poly/ML, @{text "n = 1"} means time and @{text "n = 2"} space + profiling. + + \end{description} +*} + + +subsection {* Toplevel transitions \label{sec:toplevel-transition} *} + +text {* + An Isar toplevel transition consists of a partial function on the + toplevel state, with additional information for diagnostics and + error reporting: there are fields for command name, source position, + optional source text, as well as flags for interactive-only commands + (which issue a warning in batch-mode), printing of result state, + etc. + + The operational part is represented as the sequential union of a + list of partial functions, which are tried in turn until the first + one succeeds. This acts like an outer case-expression for various + alternative state transitions. For example, \isakeyword{qed} acts + differently for a local proofs vs.\ the global ending of the main + proof. + + Toplevel transitions are composed via transition transformers. + Internally, Isar commands are put together from an empty transition + extended by name and source position (and optional source text). It + is then left to the individual command parser to turn the given + concrete syntax into a suitable transition transformer that adjoins + actual operations on a theory or proof state etc. +*} + +text %mlref {* + \begin{mldecls} + @{index_ML Toplevel.print: "Toplevel.transition -> Toplevel.transition"} \\ + @{index_ML Toplevel.no_timing: "Toplevel.transition -> Toplevel.transition"} \\ + @{index_ML Toplevel.keep: "(Toplevel.state -> unit) -> + Toplevel.transition -> Toplevel.transition"} \\ + @{index_ML Toplevel.theory: "(theory -> theory) -> + Toplevel.transition -> Toplevel.transition"} \\ + @{index_ML Toplevel.theory_to_proof: "(theory -> Proof.state) -> + Toplevel.transition -> Toplevel.transition"} \\ + @{index_ML Toplevel.proof: "(Proof.state -> Proof.state) -> + Toplevel.transition -> Toplevel.transition"} \\ + @{index_ML Toplevel.proofs: "(Proof.state -> Proof.state Seq.seq) -> + Toplevel.transition -> Toplevel.transition"} \\ + @{index_ML Toplevel.end_proof: "(bool -> Proof.state -> Proof.context) -> + Toplevel.transition -> Toplevel.transition"} \\ + \end{mldecls} + + \begin{description} + + \item @{ML Toplevel.print}~@{text "tr"} sets the print flag, which + causes the toplevel loop to echo the result state (in interactive + mode). + + \item @{ML Toplevel.no_timing}~@{text "tr"} indicates that the + transition should never show timing information, e.g.\ because it is + a diagnostic command. + + \item @{ML Toplevel.keep}~@{text "tr"} adjoins a diagnostic + function. + + \item @{ML Toplevel.theory}~@{text "tr"} adjoins a theory + transformer. + + \item @{ML Toplevel.theory_to_proof}~@{text "tr"} adjoins a global + goal function, which turns a theory into a proof state. The theory + may be changed before entering the proof; the generic Isar goal + setup includes an argument that specifies how to apply the proven + result to the theory, when the proof is finished. + + \item @{ML Toplevel.proof}~@{text "tr"} adjoins a deterministic + proof command, with a singleton result. + + \item @{ML Toplevel.proofs}~@{text "tr"} adjoins a general proof + command, with zero or more result states (represented as a lazy + list). + + \item @{ML Toplevel.end_proof}~@{text "tr"} adjoins a concluding + proof command, that returns the resulting theory, after storing the + resulting facts in the context etc. + + \end{description} +*} + + +subsection {* Toplevel control *} + +text {* + There are a few special control commands that modify the behavior + the toplevel itself, and only make sense in interactive mode. Under + normal circumstances, the user encounters these only implicitly as + part of the protocol between the Isabelle/Isar system and a + user-interface such as ProofGeneral. + + \begin{description} + + \item \isacommand{undo} follows the three-level hierarchy of empty + toplevel vs.\ theory vs.\ proof: undo within a proof reverts to the + previous proof context, undo after a proof reverts to the theory + before the initial goal statement, undo of a theory command reverts + to the previous theory value, undo of a theory header discontinues + the current theory development and removes it from the theory + database (\secref{sec:theory-database}). + + \item \isacommand{kill} aborts the current level of development: + kill in a proof context reverts to the theory before the initial + goal statement, kill in a theory context aborts the current theory + development, removing it from the database. + + \item \isacommand{exit} drops out of the Isar toplevel into the + underlying {\ML} toplevel (\secref{sec:ML-toplevel}). The Isar + toplevel state is preserved and may be continued later. + + \item \isacommand{quit} terminates the Isabelle/Isar process without + saving. + + \end{description} +*} + + +section {* ML toplevel \label{sec:ML-toplevel} *} + +text {* + The {\ML} toplevel provides a read-compile-eval-print loop for {\ML} + values, types, structures, and functors. {\ML} declarations operate + on the global system state, which consists of the compiler + environment plus the values of {\ML} reference variables. There is + no clean way to undo {\ML} declarations, except for reverting to a + previously saved state of the whole Isabelle process. {\ML} input + is either read interactively from a TTY, or from a string (usually + within a theory text), or from a source file (usually loaded from a + theory). + + Whenever the {\ML} toplevel is active, the current Isabelle theory + context is passed as an internal reference variable. Thus {\ML} + code may access the theory context during compilation, it may even + change the value of a theory being under construction --- while + observing the usual linearity restrictions + (cf.~\secref{sec:context-theory}). +*} + +text %mlref {* + \begin{mldecls} + @{index_ML the_context: "unit -> theory"} \\ + @{index_ML "Context.>> ": "(Context.generic -> Context.generic) -> unit"} \\ + \end{mldecls} + + \begin{description} + + \item @{ML "the_context ()"} refers to the theory context of the + {\ML} toplevel --- at compile time! {\ML} code needs to take care + to refer to @{ML "the_context ()"} correctly. Recall that + evaluation of a function body is delayed until actual runtime. + Moreover, persistent {\ML} toplevel bindings to an unfinished theory + should be avoided: code should either project out the desired + information immediately, or produce an explicit @{ML_type + theory_ref} (cf.\ \secref{sec:context-theory}). + + \item @{ML "Context.>>"}~@{text f} applies context transformation + @{text f} to the implicit context of the {\ML} toplevel. + + \end{description} + + It is very important to note that the above functions are really + restricted to the compile time, even though the {\ML} compiler is + invoked at runtime! The majority of {\ML} code uses explicit + functional arguments of a theory or proof context instead. Thus it + may be invoked for an arbitrary context later on, without having to + worry about any operational details. + + \bigskip + + \begin{mldecls} + @{index_ML Isar.main: "unit -> unit"} \\ + @{index_ML Isar.loop: "unit -> unit"} \\ + @{index_ML Isar.state: "unit -> Toplevel.state"} \\ + @{index_ML Isar.exn: "unit -> (exn * string) option"} \\ + @{index_ML Isar.context: "unit -> Proof.context"} \\ + @{index_ML Isar.goal: "unit -> thm"} \\ + \end{mldecls} + + \begin{description} + + \item @{ML "Isar.main ()"} invokes the Isar toplevel from {\ML}, + initializing an empty toplevel state. + + \item @{ML "Isar.loop ()"} continues the Isar toplevel with the + current state, after having dropped out of the Isar toplevel loop. + + \item @{ML "Isar.state ()"} and @{ML "Isar.exn ()"} get current + toplevel state and error condition, respectively. This only works + after having dropped out of the Isar toplevel loop. + + \item @{ML "Isar.context ()"} produces the proof context from @{ML + "Isar.state ()"}, analogous to @{ML Context.proof_of} + (\secref{sec:generic-context}). + + \item @{ML "Isar.goal ()"} picks the tactical goal from @{ML + "Isar.state ()"}, represented as a theorem according to + \secref{sec:tactical-goals}. + + \end{description} +*} + + +section {* Theory database \label{sec:theory-database} *} + +text {* + The theory database maintains a collection of theories, together + with some administrative information about their original sources, + which are held in an external store (i.e.\ some directory within the + regular file system). + + The theory database is organized as a directed acyclic graph; + entries are referenced by theory name. Although some additional + interfaces allow to include a directory specification as well, this + is only a hint to the underlying theory loader. The internal theory + name space is flat! + + Theory @{text A} is associated with the main theory file @{text + A}\verb,.thy,, which needs to be accessible through the theory + loader path. Any number of additional {\ML} source files may be + associated with each theory, by declaring these dependencies in the + theory header as @{text \}, and loading them consecutively + within the theory context. The system keeps track of incoming {\ML} + sources and associates them with the current theory. The file + @{text A}\verb,.ML, is loaded after a theory has been concluded, in + order to support legacy proof {\ML} proof scripts. + + The basic internal actions of the theory database are @{text + "update"}, @{text "outdate"}, and @{text "remove"}: + + \begin{itemize} + + \item @{text "update A"} introduces a link of @{text "A"} with a + @{text "theory"} value of the same name; it asserts that the theory + sources are now consistent with that value; + + \item @{text "outdate A"} invalidates the link of a theory database + entry to its sources, but retains the present theory value; + + \item @{text "remove A"} deletes entry @{text "A"} from the theory + database. + + \end{itemize} + + These actions are propagated to sub- or super-graphs of a theory + entry as expected, in order to preserve global consistency of the + state of all loaded theories with the sources of the external store. + This implies certain causalities between actions: @{text "update"} + or @{text "outdate"} of an entry will @{text "outdate"} all + descendants; @{text "remove"} will @{text "remove"} all descendants. + + \medskip There are separate user-level interfaces to operate on the + theory database directly or indirectly. The primitive actions then + just happen automatically while working with the system. In + particular, processing a theory header @{text "\ A + \ B\<^sub>1 \ B\<^sub>n \"} ensures that the + sub-graph of the collective imports @{text "B\<^sub>1 \ B\<^sub>n"} + is up-to-date, too. Earlier theories are reloaded as required, with + @{text update} actions proceeding in topological order according to + theory dependencies. There may be also a wave of implied @{text + outdate} actions for derived theory nodes until a stable situation + is achieved eventually. +*} + +text %mlref {* + \begin{mldecls} + @{index_ML theory: "string -> theory"} \\ + @{index_ML use_thy: "string -> unit"} \\ + @{index_ML use_thys: "string list -> unit"} \\ + @{index_ML ThyInfo.touch_thy: "string -> unit"} \\ + @{index_ML ThyInfo.remove_thy: "string -> unit"} \\[1ex] + @{index_ML ThyInfo.begin_theory}@{verbatim ": ... -> bool -> theory"} \\ + @{index_ML ThyInfo.end_theory: "theory -> unit"} \\ + @{index_ML ThyInfo.register_theory: "theory -> unit"} \\[1ex] + @{verbatim "datatype action = Update | Outdate | Remove"} \\ + @{index_ML ThyInfo.add_hook: "(ThyInfo.action -> string -> unit) -> unit"} \\ + \end{mldecls} + + \begin{description} + + \item @{ML theory}~@{text A} retrieves the theory value presently + associated with name @{text A}. Note that the result might be + outdated. + + \item @{ML use_thy}~@{text A} ensures that theory @{text A} is fully + up-to-date wrt.\ the external file store, reloading outdated + ancestors as required. + + \item @{ML use_thys} is similar to @{ML use_thy}, but handles + several theories simultaneously. Thus it acts like processing the + import header of a theory, without performing the merge of the + result, though. + + \item @{ML ThyInfo.touch_thy}~@{text A} performs and @{text outdate} action + on theory @{text A} and all descendants. + + \item @{ML ThyInfo.remove_thy}~@{text A} deletes theory @{text A} and all + descendants from the theory database. + + \item @{ML ThyInfo.begin_theory} is the basic operation behind a + @{text \} header declaration. This is {\ML} functions is + normally not invoked directly. + + \item @{ML ThyInfo.end_theory} concludes the loading of a theory + proper and stores the result in the theory database. + + \item @{ML ThyInfo.register_theory}~@{text "text thy"} registers an + existing theory value with the theory loader database. There is no + management of associated sources. + + \item @{ML "ThyInfo.add_hook"}~@{text f} registers function @{text + f} as a hook for theory database actions. The function will be + invoked with the action and theory name being involved; thus derived + actions may be performed in associated system components, e.g.\ + maintaining the state of an editor for the theory sources. + + The kind and order of actions occurring in practice depends both on + user interactions and the internal process of resolving theory + imports. Hooks should not rely on a particular policy here! Any + exceptions raised by the hook are ignored. + + \end{description} +*} + +end diff -r ecd6f0ca62ea -r 44832d503659 doc-src/IsarImplementation/Thy/Isar.thy --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc-src/IsarImplementation/Thy/Isar.thy Thu Mar 05 02:32:46 2009 +0100 @@ -0,0 +1,37 @@ +theory Isar +imports Base +begin + +chapter {* Isar language elements *} + +text {* + The primary Isar language consists of three main categories of + language elements: + + \begin{enumerate} + + \item Proof commands + + \item Proof methods + + \item Attributes + + \end{enumerate} +*} + + +section {* Proof commands *} + +text FIXME + + +section {* Proof methods *} + +text FIXME + + +section {* Attributes *} + +text FIXME + +end diff -r ecd6f0ca62ea -r 44832d503659 doc-src/IsarImplementation/Thy/Local_Theory.thy diff -r ecd6f0ca62ea -r 44832d503659 doc-src/IsarImplementation/Thy/Logic.thy --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc-src/IsarImplementation/Thy/Logic.thy Thu Mar 05 02:32:46 2009 +0100 @@ -0,0 +1,909 @@ +theory Logic +imports Base +begin + +chapter {* Primitive logic \label{ch:logic} *} + +text {* + The logical foundations of Isabelle/Isar are that of the Pure logic, + which has been introduced as a Natural Deduction framework in + \cite{paulson700}. This is essentially the same logic as ``@{text + "\HOL"}'' in the more abstract setting of Pure Type Systems (PTS) + \cite{Barendregt-Geuvers:2001}, although there are some key + differences in the specific treatment of simple types in + Isabelle/Pure. + + Following type-theoretic parlance, the Pure logic consists of three + levels of @{text "\"}-calculus with corresponding arrows, @{text + "\"} for syntactic function space (terms depending on terms), @{text + "\"} for universal quantification (proofs depending on terms), and + @{text "\"} for implication (proofs depending on proofs). + + Derivations are relative to a logical theory, which declares type + constructors, constants, and axioms. Theory declarations support + schematic polymorphism, which is strictly speaking outside the + logic.\footnote{This is the deeper logical reason, why the theory + context @{text "\"} is separate from the proof context @{text "\"} + of the core calculus.} +*} + + +section {* Types \label{sec:types} *} + +text {* + The language of types is an uninterpreted order-sorted first-order + algebra; types are qualified by ordered type classes. + + \medskip A \emph{type class} is an abstract syntactic entity + declared in the theory context. The \emph{subclass relation} @{text + "c\<^isub>1 \ c\<^isub>2"} is specified by stating an acyclic + generating relation; the transitive closure is maintained + internally. The resulting relation is an ordering: reflexive, + transitive, and antisymmetric. + + A \emph{sort} is a list of type classes written as @{text "s = + {c\<^isub>1, \, c\<^isub>m}"}, which represents symbolic + intersection. Notationally, the curly braces are omitted for + singleton intersections, i.e.\ any class @{text "c"} may be read as + a sort @{text "{c}"}. The ordering on type classes is extended to + sorts according to the meaning of intersections: @{text + "{c\<^isub>1, \ c\<^isub>m} \ {d\<^isub>1, \, d\<^isub>n}"} iff + @{text "\j. \i. c\<^isub>i \ d\<^isub>j"}. The empty intersection + @{text "{}"} refers to the universal sort, which is the largest + element wrt.\ the sort order. The intersections of all (finitely + many) classes declared in the current theory are the minimal + elements wrt.\ the sort order. + + \medskip A \emph{fixed type variable} is a pair of a basic name + (starting with a @{text "'"} character) and a sort constraint, e.g.\ + @{text "('a, s)"} which is usually printed as @{text "\\<^isub>s"}. + A \emph{schematic type variable} is a pair of an indexname and a + sort constraint, e.g.\ @{text "(('a, 0), s)"} which is usually + printed as @{text "?\\<^isub>s"}. + + Note that \emph{all} syntactic components contribute to the identity + of type variables, including the sort constraint. The core logic + handles type variables with the same name but different sorts as + different, although some outer layers of the system make it hard to + produce anything like this. + + A \emph{type constructor} @{text "\"} is a @{text "k"}-ary operator + on types declared in the theory. Type constructor application is + written postfix as @{text "(\\<^isub>1, \, \\<^isub>k)\"}. For + @{text "k = 0"} the argument tuple is omitted, e.g.\ @{text "prop"} + instead of @{text "()prop"}. For @{text "k = 1"} the parentheses + are omitted, e.g.\ @{text "\ list"} instead of @{text "(\)list"}. + Further notation is provided for specific constructors, notably the + right-associative infix @{text "\ \ \"} instead of @{text "(\, + \)fun"}. + + A \emph{type} is defined inductively over type variables and type + constructors as follows: @{text "\ = \\<^isub>s | ?\\<^isub>s | + (\\<^sub>1, \, \\<^sub>k)\"}. + + A \emph{type abbreviation} is a syntactic definition @{text + "(\<^vec>\)\ = \"} of an arbitrary type expression @{text "\"} over + variables @{text "\<^vec>\"}. Type abbreviations appear as type + constructors in the syntax, but are expanded before entering the + logical core. + + A \emph{type arity} declares the image behavior of a type + constructor wrt.\ the algebra of sorts: @{text "\ :: (s\<^isub>1, \, + s\<^isub>k)s"} means that @{text "(\\<^isub>1, \, \\<^isub>k)\"} is + of sort @{text "s"} if every argument type @{text "\\<^isub>i"} is + of sort @{text "s\<^isub>i"}. Arity declarations are implicitly + completed, i.e.\ @{text "\ :: (\<^vec>s)c"} entails @{text "\ :: + (\<^vec>s)c'"} for any @{text "c' \ c"}. + + \medskip The sort algebra is always maintained as \emph{coregular}, + which means that type arities are consistent with the subclass + relation: for any type constructor @{text "\"}, and classes @{text + "c\<^isub>1 \ c\<^isub>2"}, and arities @{text "\ :: + (\<^vec>s\<^isub>1)c\<^isub>1"} and @{text "\ :: + (\<^vec>s\<^isub>2)c\<^isub>2"} holds @{text "\<^vec>s\<^isub>1 \ + \<^vec>s\<^isub>2"} component-wise. + + The key property of a coregular order-sorted algebra is that sort + constraints can be solved in a most general fashion: for each type + constructor @{text "\"} and sort @{text "s"} there is a most general + vector of argument sorts @{text "(s\<^isub>1, \, s\<^isub>k)"} such + that a type scheme @{text "(\\<^bsub>s\<^isub>1\<^esub>, \, + \\<^bsub>s\<^isub>k\<^esub>)\"} is of sort @{text "s"}. + Consequently, type unification has most general solutions (modulo + equivalence of sorts), so type-inference produces primary types as + expected \cite{nipkow-prehofer}. +*} + +text %mlref {* + \begin{mldecls} + @{index_ML_type class} \\ + @{index_ML_type sort} \\ + @{index_ML_type arity} \\ + @{index_ML_type typ} \\ + @{index_ML map_atyps: "(typ -> typ) -> typ -> typ"} \\ + @{index_ML fold_atyps: "(typ -> 'a -> 'a) -> typ -> 'a -> 'a"} \\ + \end{mldecls} + \begin{mldecls} + @{index_ML Sign.subsort: "theory -> sort * sort -> bool"} \\ + @{index_ML Sign.of_sort: "theory -> typ * sort -> bool"} \\ + @{index_ML Sign.add_types: "(string * int * mixfix) list -> theory -> theory"} \\ + @{index_ML Sign.add_tyabbrs_i: " + (string * string list * typ * mixfix) list -> theory -> theory"} \\ + @{index_ML Sign.primitive_class: "string * class list -> theory -> theory"} \\ + @{index_ML Sign.primitive_classrel: "class * class -> theory -> theory"} \\ + @{index_ML Sign.primitive_arity: "arity -> theory -> theory"} \\ + \end{mldecls} + + \begin{description} + + \item @{ML_type class} represents type classes; this is an alias for + @{ML_type string}. + + \item @{ML_type sort} represents sorts; this is an alias for + @{ML_type "class list"}. + + \item @{ML_type arity} represents type arities; this is an alias for + triples of the form @{text "(\, \<^vec>s, s)"} for @{text "\ :: + (\<^vec>s)s"} described above. + + \item @{ML_type typ} represents types; this is a datatype with + constructors @{ML TFree}, @{ML TVar}, @{ML Type}. + + \item @{ML map_atyps}~@{text "f \"} applies the mapping @{text "f"} + to all atomic types (@{ML TFree}, @{ML TVar}) occurring in @{text + "\"}. + + \item @{ML fold_atyps}~@{text "f \"} iterates the operation @{text + "f"} over all occurrences of atomic types (@{ML TFree}, @{ML TVar}) + in @{text "\"}; the type structure is traversed from left to right. + + \item @{ML Sign.subsort}~@{text "thy (s\<^isub>1, s\<^isub>2)"} + tests the subsort relation @{text "s\<^isub>1 \ s\<^isub>2"}. + + \item @{ML Sign.of_sort}~@{text "thy (\, s)"} tests whether type + @{text "\"} is of sort @{text "s"}. + + \item @{ML Sign.add_types}~@{text "[(\, k, mx), \]"} declares a new + type constructors @{text "\"} with @{text "k"} arguments and + optional mixfix syntax. + + \item @{ML Sign.add_tyabbrs_i}~@{text "[(\, \<^vec>\, \, mx), \]"} + defines a new type abbreviation @{text "(\<^vec>\)\ = \"} with + optional mixfix syntax. + + \item @{ML Sign.primitive_class}~@{text "(c, [c\<^isub>1, \, + c\<^isub>n])"} declares a new class @{text "c"}, together with class + relations @{text "c \ c\<^isub>i"}, for @{text "i = 1, \, n"}. + + \item @{ML Sign.primitive_classrel}~@{text "(c\<^isub>1, + c\<^isub>2)"} declares the class relation @{text "c\<^isub>1 \ + c\<^isub>2"}. + + \item @{ML Sign.primitive_arity}~@{text "(\, \<^vec>s, s)"} declares + the arity @{text "\ :: (\<^vec>s)s"}. + + \end{description} +*} + + +section {* Terms \label{sec:terms} *} + +text {* + The language of terms is that of simply-typed @{text "\"}-calculus + with de-Bruijn indices for bound variables (cf.\ \cite{debruijn72} + or \cite{paulson-ml2}), with the types being determined by the + corresponding binders. In contrast, free variables and constants + are have an explicit name and type in each occurrence. + + \medskip A \emph{bound variable} is a natural number @{text "b"}, + which accounts for the number of intermediate binders between the + variable occurrence in the body and its binding position. For + example, the de-Bruijn term @{text + "\\<^bsub>nat\<^esub>. \\<^bsub>nat\<^esub>. 1 + 0"} would + correspond to @{text + "\x\<^bsub>nat\<^esub>. \y\<^bsub>nat\<^esub>. x + y"} in a named + representation. Note that a bound variable may be represented by + different de-Bruijn indices at different occurrences, depending on + the nesting of abstractions. + + A \emph{loose variable} is a bound variable that is outside the + scope of local binders. The types (and names) for loose variables + can be managed as a separate context, that is maintained as a stack + of hypothetical binders. The core logic operates on closed terms, + without any loose variables. + + A \emph{fixed variable} is a pair of a basic name and a type, e.g.\ + @{text "(x, \)"} which is usually printed @{text "x\<^isub>\"}. A + \emph{schematic variable} is a pair of an indexname and a type, + e.g.\ @{text "((x, 0), \)"} which is usually printed as @{text + "?x\<^isub>\"}. + + \medskip A \emph{constant} is a pair of a basic name and a type, + e.g.\ @{text "(c, \)"} which is usually printed as @{text + "c\<^isub>\"}. Constants are declared in the context as polymorphic + families @{text "c :: \"}, meaning that all substitution instances + @{text "c\<^isub>\"} for @{text "\ = \\"} are valid. + + The vector of \emph{type arguments} of constant @{text "c\<^isub>\"} + wrt.\ the declaration @{text "c :: \"} is defined as the codomain of + the matcher @{text "\ = {?\\<^isub>1 \ \\<^isub>1, \, + ?\\<^isub>n \ \\<^isub>n}"} presented in canonical order @{text + "(\\<^isub>1, \, \\<^isub>n)"}. Within a given theory context, + there is a one-to-one correspondence between any constant @{text + "c\<^isub>\"} and the application @{text "c(\\<^isub>1, \, + \\<^isub>n)"} of its type arguments. For example, with @{text "plus + :: \ \ \ \ \"}, the instance @{text "plus\<^bsub>nat \ nat \ + nat\<^esub>"} corresponds to @{text "plus(nat)"}. + + Constant declarations @{text "c :: \"} may contain sort constraints + for type variables in @{text "\"}. These are observed by + type-inference as expected, but \emph{ignored} by the core logic. + This means the primitive logic is able to reason with instances of + polymorphic constants that the user-level type-checker would reject + due to violation of type class restrictions. + + \medskip An \emph{atomic} term is either a variable or constant. A + \emph{term} is defined inductively over atomic terms, with + abstraction and application as follows: @{text "t = b | x\<^isub>\ | + ?x\<^isub>\ | c\<^isub>\ | \\<^isub>\. t | t\<^isub>1 t\<^isub>2"}. + Parsing and printing takes care of converting between an external + representation with named bound variables. Subsequently, we shall + use the latter notation instead of internal de-Bruijn + representation. + + The inductive relation @{text "t :: \"} assigns a (unique) type to a + term according to the structure of atomic terms, abstractions, and + applicatins: + \[ + \infer{@{text "a\<^isub>\ :: \"}}{} + \qquad + \infer{@{text "(\x\<^sub>\. t) :: \ \ \"}}{@{text "t :: \"}} + \qquad + \infer{@{text "t u :: \"}}{@{text "t :: \ \ \"} & @{text "u :: \"}} + \] + A \emph{well-typed term} is a term that can be typed according to these rules. + + Typing information can be omitted: type-inference is able to + reconstruct the most general type of a raw term, while assigning + most general types to all of its variables and constants. + Type-inference depends on a context of type constraints for fixed + variables, and declarations for polymorphic constants. + + The identity of atomic terms consists both of the name and the type + component. This means that different variables @{text + "x\<^bsub>\\<^isub>1\<^esub>"} and @{text + "x\<^bsub>\\<^isub>2\<^esub>"} may become the same after type + instantiation. Some outer layers of the system make it hard to + produce variables of the same name, but different types. In + contrast, mixed instances of polymorphic constants occur frequently. + + \medskip The \emph{hidden polymorphism} of a term @{text "t :: \"} + is the set of type variables occurring in @{text "t"}, but not in + @{text "\"}. This means that the term implicitly depends on type + arguments that are not accounted in the result type, i.e.\ there are + different type instances @{text "t\ :: \"} and @{text + "t\' :: \"} with the same type. This slightly + pathological situation notoriously demands additional care. + + \medskip A \emph{term abbreviation} is a syntactic definition @{text + "c\<^isub>\ \ t"} of a closed term @{text "t"} of type @{text "\"}, + without any hidden polymorphism. A term abbreviation looks like a + constant in the syntax, but is expanded before entering the logical + core. Abbreviations are usually reverted when printing terms, using + @{text "t \ c\<^isub>\"} as rules for higher-order rewriting. + + \medskip Canonical operations on @{text "\"}-terms include @{text + "\\\"}-conversion: @{text "\"}-conversion refers to capture-free + renaming of bound variables; @{text "\"}-conversion contracts an + abstraction applied to an argument term, substituting the argument + in the body: @{text "(\x. b)a"} becomes @{text "b[a/x]"}; @{text + "\"}-conversion contracts vacuous application-abstraction: @{text + "\x. f x"} becomes @{text "f"}, provided that the bound variable + does not occur in @{text "f"}. + + Terms are normally treated modulo @{text "\"}-conversion, which is + implicit in the de-Bruijn representation. Names for bound variables + in abstractions are maintained separately as (meaningless) comments, + mostly for parsing and printing. Full @{text "\\\"}-conversion is + commonplace in various standard operations (\secref{sec:obj-rules}) + that are based on higher-order unification and matching. +*} + +text %mlref {* + \begin{mldecls} + @{index_ML_type term} \\ + @{index_ML "op aconv": "term * term -> bool"} \\ + @{index_ML map_types: "(typ -> typ) -> term -> term"} \\ + @{index_ML fold_types: "(typ -> 'a -> 'a) -> term -> 'a -> 'a"} \\ + @{index_ML map_aterms: "(term -> term) -> term -> term"} \\ + @{index_ML fold_aterms: "(term -> 'a -> 'a) -> term -> 'a -> 'a"} \\ + \end{mldecls} + \begin{mldecls} + @{index_ML fastype_of: "term -> typ"} \\ + @{index_ML lambda: "term -> term -> term"} \\ + @{index_ML betapply: "term * term -> term"} \\ + @{index_ML Sign.declare_const: "Properties.T -> (binding * typ) * mixfix -> + theory -> term * theory"} \\ + @{index_ML Sign.add_abbrev: "string -> Properties.T -> binding * term -> + theory -> (term * term) * theory"} \\ + @{index_ML Sign.const_typargs: "theory -> string * typ -> typ list"} \\ + @{index_ML Sign.const_instance: "theory -> string * typ list -> typ"} \\ + \end{mldecls} + + \begin{description} + + \item @{ML_type term} represents de-Bruijn terms, with comments in + abstractions, and explicitly named free variables and constants; + this is a datatype with constructors @{ML Bound}, @{ML Free}, @{ML + Var}, @{ML Const}, @{ML Abs}, @{ML "op $"}. + + \item @{text "t"}~@{ML aconv}~@{text "u"} checks @{text + "\"}-equivalence of two terms. This is the basic equality relation + on type @{ML_type term}; raw datatype equality should only be used + for operations related to parsing or printing! + + \item @{ML map_types}~@{text "f t"} applies the mapping @{text + "f"} to all types occurring in @{text "t"}. + + \item @{ML fold_types}~@{text "f t"} iterates the operation @{text + "f"} over all occurrences of types in @{text "t"}; the term + structure is traversed from left to right. + + \item @{ML map_aterms}~@{text "f t"} applies the mapping @{text "f"} + to all atomic terms (@{ML Bound}, @{ML Free}, @{ML Var}, @{ML + Const}) occurring in @{text "t"}. + + \item @{ML fold_aterms}~@{text "f t"} iterates the operation @{text + "f"} over all occurrences of atomic terms (@{ML Bound}, @{ML Free}, + @{ML Var}, @{ML Const}) in @{text "t"}; the term structure is + traversed from left to right. + + \item @{ML fastype_of}~@{text "t"} determines the type of a + well-typed term. This operation is relatively slow, despite the + omission of any sanity checks. + + \item @{ML lambda}~@{text "a b"} produces an abstraction @{text + "\a. b"}, where occurrences of the atomic term @{text "a"} in the + body @{text "b"} are replaced by bound variables. + + \item @{ML betapply}~@{text "(t, u)"} produces an application @{text + "t u"}, with topmost @{text "\"}-conversion if @{text "t"} is an + abstraction. + + \item @{ML Sign.declare_const}~@{text "properties ((c, \), mx)"} + declares a new constant @{text "c :: \"} with optional mixfix + syntax. + + \item @{ML Sign.add_abbrev}~@{text "print_mode properties (c, t)"} + introduces a new term abbreviation @{text "c \ t"}. + + \item @{ML Sign.const_typargs}~@{text "thy (c, \)"} and @{ML + Sign.const_instance}~@{text "thy (c, [\\<^isub>1, \, \\<^isub>n])"} + convert between two representations of polymorphic constants: full + type instance vs.\ compact type arguments form. + + \end{description} +*} + + +section {* Theorems \label{sec:thms} *} + +text {* + A \emph{proposition} is a well-typed term of type @{text "prop"}, a + \emph{theorem} is a proven proposition (depending on a context of + hypotheses and the background theory). Primitive inferences include + plain Natural Deduction rules for the primary connectives @{text + "\"} and @{text "\"} of the framework. There is also a builtin + notion of equality/equivalence @{text "\"}. +*} + + +subsection {* Primitive connectives and rules \label{sec:prim-rules} *} + +text {* + The theory @{text "Pure"} contains constant declarations for the + primitive connectives @{text "\"}, @{text "\"}, and @{text "\"} of + the logical framework, see \figref{fig:pure-connectives}. The + derivability judgment @{text "A\<^isub>1, \, A\<^isub>n \ B"} is + defined inductively by the primitive inferences given in + \figref{fig:prim-rules}, with the global restriction that the + hypotheses must \emph{not} contain any schematic variables. The + builtin equality is conceptually axiomatized as shown in + \figref{fig:pure-equality}, although the implementation works + directly with derived inferences. + + \begin{figure}[htb] + \begin{center} + \begin{tabular}{ll} + @{text "all :: (\ \ prop) \ prop"} & universal quantification (binder @{text "\"}) \\ + @{text "\ :: prop \ prop \ prop"} & implication (right associative infix) \\ + @{text "\ :: \ \ \ \ prop"} & equality relation (infix) \\ + \end{tabular} + \caption{Primitive connectives of Pure}\label{fig:pure-connectives} + \end{center} + \end{figure} + + \begin{figure}[htb] + \begin{center} + \[ + \infer[@{text "(axiom)"}]{@{text "\ A"}}{@{text "A \ \"}} + \qquad + \infer[@{text "(assume)"}]{@{text "A \ A"}}{} + \] + \[ + \infer[@{text "(\_intro)"}]{@{text "\ \ \x. b[x]"}}{@{text "\ \ b[x]"} & @{text "x \ \"}} + \qquad + \infer[@{text "(\_elim)"}]{@{text "\ \ b[a]"}}{@{text "\ \ \x. b[x]"}} + \] + \[ + \infer[@{text "(\_intro)"}]{@{text "\ - A \ A \ B"}}{@{text "\ \ B"}} + \qquad + \infer[@{text "(\_elim)"}]{@{text "\\<^sub>1 \ \\<^sub>2 \ B"}}{@{text "\\<^sub>1 \ A \ B"} & @{text "\\<^sub>2 \ A"}} + \] + \caption{Primitive inferences of Pure}\label{fig:prim-rules} + \end{center} + \end{figure} + + \begin{figure}[htb] + \begin{center} + \begin{tabular}{ll} + @{text "\ (\x. b[x]) a \ b[a]"} & @{text "\"}-conversion \\ + @{text "\ x \ x"} & reflexivity \\ + @{text "\ x \ y \ P x \ P y"} & substitution \\ + @{text "\ (\x. f x \ g x) \ f \ g"} & extensionality \\ + @{text "\ (A \ B) \ (B \ A) \ A \ B"} & logical equivalence \\ + \end{tabular} + \caption{Conceptual axiomatization of Pure equality}\label{fig:pure-equality} + \end{center} + \end{figure} + + The introduction and elimination rules for @{text "\"} and @{text + "\"} are analogous to formation of dependently typed @{text + "\"}-terms representing the underlying proof objects. Proof terms + are irrelevant in the Pure logic, though; they cannot occur within + propositions. The system provides a runtime option to record + explicit proof terms for primitive inferences. Thus all three + levels of @{text "\"}-calculus become explicit: @{text "\"} for + terms, and @{text "\/\"} for proofs (cf.\ + \cite{Berghofer-Nipkow:2000:TPHOL}). + + Observe that locally fixed parameters (as in @{text "\_intro"}) need + not be recorded in the hypotheses, because the simple syntactic + types of Pure are always inhabitable. ``Assumptions'' @{text "x :: + \"} for type-membership are only present as long as some @{text + "x\<^isub>\"} occurs in the statement body.\footnote{This is the key + difference to ``@{text "\HOL"}'' in the PTS framework + \cite{Barendregt-Geuvers:2001}, where hypotheses @{text "x : A"} are + treated uniformly for propositions and types.} + + \medskip The axiomatization of a theory is implicitly closed by + forming all instances of type and term variables: @{text "\ + A\"} holds for any substitution instance of an axiom + @{text "\ A"}. By pushing substitutions through derivations + inductively, we also get admissible @{text "generalize"} and @{text + "instance"} rules as shown in \figref{fig:subst-rules}. + + \begin{figure}[htb] + \begin{center} + \[ + \infer{@{text "\ \ B[?\]"}}{@{text "\ \ B[\]"} & @{text "\ \ \"}} + \quad + \infer[\quad@{text "(generalize)"}]{@{text "\ \ B[?x]"}}{@{text "\ \ B[x]"} & @{text "x \ \"}} + \] + \[ + \infer{@{text "\ \ B[\]"}}{@{text "\ \ B[?\]"}} + \quad + \infer[\quad@{text "(instantiate)"}]{@{text "\ \ B[t]"}}{@{text "\ \ B[?x]"}} + \] + \caption{Admissible substitution rules}\label{fig:subst-rules} + \end{center} + \end{figure} + + Note that @{text "instantiate"} does not require an explicit + side-condition, because @{text "\"} may never contain schematic + variables. + + In principle, variables could be substituted in hypotheses as well, + but this would disrupt the monotonicity of reasoning: deriving + @{text "\\ \ B\"} from @{text "\ \ B"} is + correct, but @{text "\\ \ \"} does not necessarily hold: + the result belongs to a different proof context. + + \medskip An \emph{oracle} is a function that produces axioms on the + fly. Logically, this is an instance of the @{text "axiom"} rule + (\figref{fig:prim-rules}), but there is an operational difference. + The system always records oracle invocations within derivations of + theorems by a unique tag. + + Axiomatizations should be limited to the bare minimum, typically as + part of the initial logical basis of an object-logic formalization. + Later on, theories are usually developed in a strictly definitional + fashion, by stating only certain equalities over new constants. + + A \emph{simple definition} consists of a constant declaration @{text + "c :: \"} together with an axiom @{text "\ c \ t"}, where @{text "t + :: \"} is a closed term without any hidden polymorphism. The RHS + may depend on further defined constants, but not @{text "c"} itself. + Definitions of functions may be presented as @{text "c \<^vec>x \ + t"} instead of the puristic @{text "c \ \\<^vec>x. t"}. + + An \emph{overloaded definition} consists of a collection of axioms + for the same constant, with zero or one equations @{text + "c((\<^vec>\)\) \ t"} for each type constructor @{text "\"} (for + distinct variables @{text "\<^vec>\"}). The RHS may mention + previously defined constants as above, or arbitrary constants @{text + "d(\\<^isub>i)"} for some @{text "\\<^isub>i"} projected from @{text + "\<^vec>\"}. Thus overloaded definitions essentially work by + primitive recursion over the syntactic structure of a single type + argument. +*} + +text %mlref {* + \begin{mldecls} + @{index_ML_type ctyp} \\ + @{index_ML_type cterm} \\ + @{index_ML Thm.ctyp_of: "theory -> typ -> ctyp"} \\ + @{index_ML Thm.cterm_of: "theory -> term -> cterm"} \\ + \end{mldecls} + \begin{mldecls} + @{index_ML_type thm} \\ + @{index_ML proofs: "int ref"} \\ + @{index_ML Thm.assume: "cterm -> thm"} \\ + @{index_ML Thm.forall_intr: "cterm -> thm -> thm"} \\ + @{index_ML Thm.forall_elim: "cterm -> thm -> thm"} \\ + @{index_ML Thm.implies_intr: "cterm -> thm -> thm"} \\ + @{index_ML Thm.implies_elim: "thm -> thm -> thm"} \\ + @{index_ML Thm.generalize: "string list * string list -> int -> thm -> thm"} \\ + @{index_ML Thm.instantiate: "(ctyp * ctyp) list * (cterm * cterm) list -> thm -> thm"} \\ + @{index_ML Thm.axiom: "theory -> string -> thm"} \\ + @{index_ML Thm.add_oracle: "bstring * ('a -> cterm) -> theory + -> (string * ('a -> thm)) * theory"} \\ + \end{mldecls} + \begin{mldecls} + @{index_ML Theory.add_axioms_i: "(binding * term) list -> theory -> theory"} \\ + @{index_ML Theory.add_deps: "string -> string * typ -> (string * typ) list -> theory -> theory"} \\ + @{index_ML Theory.add_defs_i: "bool -> bool -> (binding * term) list -> theory -> theory"} \\ + \end{mldecls} + + \begin{description} + + \item @{ML_type ctyp} and @{ML_type cterm} represent certified types + and terms, respectively. These are abstract datatypes that + guarantee that its values have passed the full well-formedness (and + well-typedness) checks, relative to the declarations of type + constructors, constants etc. in the theory. + + \item @{ML Thm.ctyp_of}~@{text "thy \"} and @{ML + Thm.cterm_of}~@{text "thy t"} explicitly checks types and terms, + respectively. This also involves some basic normalizations, such + expansion of type and term abbreviations from the theory context. + + Re-certification is relatively slow and should be avoided in tight + reasoning loops. There are separate operations to decompose + certified entities (including actual theorems). + + \item @{ML_type thm} represents proven propositions. This is an + abstract datatype that guarantees that its values have been + constructed by basic principles of the @{ML_struct Thm} module. + Every @{ML thm} value contains a sliding back-reference to the + enclosing theory, cf.\ \secref{sec:context-theory}. + + \item @{ML proofs} determines the detail of proof recording within + @{ML_type thm} values: @{ML 0} records only the names of oracles, + @{ML 1} records oracle names and propositions, @{ML 2} additionally + records full proof terms. Officially named theorems that contribute + to a result are always recorded. + + \item @{ML Thm.assume}, @{ML Thm.forall_intr}, @{ML + Thm.forall_elim}, @{ML Thm.implies_intr}, and @{ML Thm.implies_elim} + correspond to the primitive inferences of \figref{fig:prim-rules}. + + \item @{ML Thm.generalize}~@{text "(\<^vec>\, \<^vec>x)"} + corresponds to the @{text "generalize"} rules of + \figref{fig:subst-rules}. Here collections of type and term + variables are generalized simultaneously, specified by the given + basic names. + + \item @{ML Thm.instantiate}~@{text "(\<^vec>\\<^isub>s, + \<^vec>x\<^isub>\)"} corresponds to the @{text "instantiate"} rules + of \figref{fig:subst-rules}. Type variables are substituted before + term variables. Note that the types in @{text "\<^vec>x\<^isub>\"} + refer to the instantiated versions. + + \item @{ML Thm.axiom}~@{text "thy name"} retrieves a named + axiom, cf.\ @{text "axiom"} in \figref{fig:prim-rules}. + + \item @{ML Thm.add_oracle}~@{text "(name, oracle)"} produces a named + oracle rule, essentially generating arbitrary axioms on the fly, + cf.\ @{text "axiom"} in \figref{fig:prim-rules}. + + \item @{ML Theory.add_axioms_i}~@{text "[(name, A), \]"} declares + arbitrary propositions as axioms. + + \item @{ML Theory.add_deps}~@{text "name c\<^isub>\ + \<^vec>d\<^isub>\"} declares dependencies of a named specification + for constant @{text "c\<^isub>\"}, relative to existing + specifications for constants @{text "\<^vec>d\<^isub>\"}. + + \item @{ML Theory.add_defs_i}~@{text "unchecked overloaded [(name, c + \<^vec>x \ t), \]"} states a definitional axiom for an existing + constant @{text "c"}. Dependencies are recorded (cf.\ @{ML + Theory.add_deps}), unless the @{text "unchecked"} option is set. + + \end{description} +*} + + +subsection {* Auxiliary definitions *} + +text {* + Theory @{text "Pure"} provides a few auxiliary definitions, see + \figref{fig:pure-aux}. These special constants are normally not + exposed to the user, but appear in internal encodings. + + \begin{figure}[htb] + \begin{center} + \begin{tabular}{ll} + @{text "conjunction :: prop \ prop \ prop"} & (infix @{text "&"}) \\ + @{text "\ A & B \ (\C. (A \ B \ C) \ C)"} \\[1ex] + @{text "prop :: prop \ prop"} & (prefix @{text "#"}, suppressed) \\ + @{text "#A \ A"} \\[1ex] + @{text "term :: \ \ prop"} & (prefix @{text "TERM"}) \\ + @{text "term x \ (\A. A \ A)"} \\[1ex] + @{text "TYPE :: \ itself"} & (prefix @{text "TYPE"}) \\ + @{text "(unspecified)"} \\ + \end{tabular} + \caption{Definitions of auxiliary connectives}\label{fig:pure-aux} + \end{center} + \end{figure} + + Derived conjunction rules include introduction @{text "A \ B \ A & + B"}, and destructions @{text "A & B \ A"} and @{text "A & B \ B"}. + Conjunction allows to treat simultaneous assumptions and conclusions + uniformly. For example, multiple claims are intermediately + represented as explicit conjunction, but this is refined into + separate sub-goals before the user continues the proof; the final + result is projected into a list of theorems (cf.\ + \secref{sec:tactical-goals}). + + The @{text "prop"} marker (@{text "#"}) makes arbitrarily complex + propositions appear as atomic, without changing the meaning: @{text + "\ \ A"} and @{text "\ \ #A"} are interchangeable. See + \secref{sec:tactical-goals} for specific operations. + + The @{text "term"} marker turns any well-typed term into a derivable + proposition: @{text "\ TERM t"} holds unconditionally. Although + this is logically vacuous, it allows to treat terms and proofs + uniformly, similar to a type-theoretic framework. + + The @{text "TYPE"} constructor is the canonical representative of + the unspecified type @{text "\ itself"}; it essentially injects the + language of types into that of terms. There is specific notation + @{text "TYPE(\)"} for @{text "TYPE\<^bsub>\ + itself\<^esub>"}. + Although being devoid of any particular meaning, the @{text + "TYPE(\)"} accounts for the type @{text "\"} within the term + language. In particular, @{text "TYPE(\)"} may be used as formal + argument in primitive definitions, in order to circumvent hidden + polymorphism (cf.\ \secref{sec:terms}). For example, @{text "c + TYPE(\) \ A[\]"} defines @{text "c :: \ itself \ prop"} in terms of + a proposition @{text "A"} that depends on an additional type + argument, which is essentially a predicate on types. +*} + +text %mlref {* + \begin{mldecls} + @{index_ML Conjunction.intr: "thm -> thm -> thm"} \\ + @{index_ML Conjunction.elim: "thm -> thm * thm"} \\ + @{index_ML Drule.mk_term: "cterm -> thm"} \\ + @{index_ML Drule.dest_term: "thm -> cterm"} \\ + @{index_ML Logic.mk_type: "typ -> term"} \\ + @{index_ML Logic.dest_type: "term -> typ"} \\ + \end{mldecls} + + \begin{description} + + \item @{ML Conjunction.intr} derives @{text "A & B"} from @{text + "A"} and @{text "B"}. + + \item @{ML Conjunction.elim} derives @{text "A"} and @{text "B"} + from @{text "A & B"}. + + \item @{ML Drule.mk_term} derives @{text "TERM t"}. + + \item @{ML Drule.dest_term} recovers term @{text "t"} from @{text + "TERM t"}. + + \item @{ML Logic.mk_type}~@{text "\"} produces the term @{text + "TYPE(\)"}. + + \item @{ML Logic.dest_type}~@{text "TYPE(\)"} recovers the type + @{text "\"}. + + \end{description} +*} + + +section {* Object-level rules \label{sec:obj-rules} *} + +text {* + The primitive inferences covered so far mostly serve foundational + purposes. User-level reasoning usually works via object-level rules + that are represented as theorems of Pure. Composition of rules + involves \emph{backchaining}, \emph{higher-order unification} modulo + @{text "\\\"}-conversion of @{text "\"}-terms, and so-called + \emph{lifting} of rules into a context of @{text "\"} and @{text + "\"} connectives. Thus the full power of higher-order Natural + Deduction in Isabelle/Pure becomes readily available. +*} + + +subsection {* Hereditary Harrop Formulae *} + +text {* + The idea of object-level rules is to model Natural Deduction + inferences in the style of Gentzen \cite{Gentzen:1935}, but we allow + arbitrary nesting similar to \cite{extensions91}. The most basic + rule format is that of a \emph{Horn Clause}: + \[ + \infer{@{text "A"}}{@{text "A\<^sub>1"} & @{text "\"} & @{text "A\<^sub>n"}} + \] + where @{text "A, A\<^sub>1, \, A\<^sub>n"} are atomic propositions + of the framework, usually of the form @{text "Trueprop B"}, where + @{text "B"} is a (compound) object-level statement. This + object-level inference corresponds to an iterated implication in + Pure like this: + \[ + @{text "A\<^sub>1 \ \ A\<^sub>n \ A"} + \] + As an example consider conjunction introduction: @{text "A \ B \ A \ + B"}. Any parameters occurring in such rule statements are + conceptionally treated as arbitrary: + \[ + @{text "\x\<^sub>1 \ x\<^sub>m. A\<^sub>1 x\<^sub>1 \ x\<^sub>m \ \ A\<^sub>n x\<^sub>1 \ x\<^sub>m \ A x\<^sub>1 \ x\<^sub>m"} + \] + + Nesting of rules means that the positions of @{text "A\<^sub>i"} may + again hold compound rules, not just atomic propositions. + Propositions of this format are called \emph{Hereditary Harrop + Formulae} in the literature \cite{Miller:1991}. Here we give an + inductive characterization as follows: + + \medskip + \begin{tabular}{ll} + @{text "\<^bold>x"} & set of variables \\ + @{text "\<^bold>A"} & set of atomic propositions \\ + @{text "\<^bold>H = \\<^bold>x\<^sup>*. \<^bold>H\<^sup>* \ \<^bold>A"} & set of Hereditary Harrop Formulas \\ + \end{tabular} + \medskip + + \noindent Thus we essentially impose nesting levels on propositions + formed from @{text "\"} and @{text "\"}. At each level there is a + prefix of parameters and compound premises, concluding an atomic + proposition. Typical examples are @{text "\"}-introduction @{text + "(A \ B) \ A \ B"} or mathematical induction @{text "P 0 \ (\n. P n + \ P (Suc n)) \ P n"}. Even deeper nesting occurs in well-founded + induction @{text "(\x. (\y. y \ x \ P y) \ P x) \ P x"}, but this + already marks the limit of rule complexity seen in practice. + + \medskip Regular user-level inferences in Isabelle/Pure always + maintain the following canonical form of results: + + \begin{itemize} + + \item Normalization by @{text "(A \ (\x. B x)) \ (\x. A \ B x)"}, + which is a theorem of Pure, means that quantifiers are pushed in + front of implication at each level of nesting. The normal form is a + Hereditary Harrop Formula. + + \item The outermost prefix of parameters is represented via + schematic variables: instead of @{text "\\<^vec>x. \<^vec>H \<^vec>x + \ A \<^vec>x"} we have @{text "\<^vec>H ?\<^vec>x \ A ?\<^vec>x"}. + Note that this representation looses information about the order of + parameters, and vacuous quantifiers vanish automatically. + + \end{itemize} +*} + +text %mlref {* + \begin{mldecls} + @{index_ML MetaSimplifier.norm_hhf: "thm -> thm"} \\ + \end{mldecls} + + \begin{description} + + \item @{ML MetaSimplifier.norm_hhf}~@{text thm} normalizes the given + theorem according to the canonical form specified above. This is + occasionally helpful to repair some low-level tools that do not + handle Hereditary Harrop Formulae properly. + + \end{description} +*} + + +subsection {* Rule composition *} + +text {* + The rule calculus of Isabelle/Pure provides two main inferences: + @{inference resolution} (i.e.\ back-chaining of rules) and + @{inference assumption} (i.e.\ closing a branch), both modulo + higher-order unification. There are also combined variants, notably + @{inference elim_resolution} and @{inference dest_resolution}. + + To understand the all-important @{inference resolution} principle, + we first consider raw @{inference_def composition} (modulo + higher-order unification with substitution @{text "\"}): + \[ + \infer[(@{inference_def composition})]{@{text "\<^vec>A\ \ C\"}} + {@{text "\<^vec>A \ B"} & @{text "B' \ C"} & @{text "B\ = B'\"}} + \] + Here the conclusion of the first rule is unified with the premise of + the second; the resulting rule instance inherits the premises of the + first and conclusion of the second. Note that @{text "C"} can again + consist of iterated implications. We can also permute the premises + of the second rule back-and-forth in order to compose with @{text + "B'"} in any position (subsequently we shall always refer to + position 1 w.l.o.g.). + + In @{inference composition} the internal structure of the common + part @{text "B"} and @{text "B'"} is not taken into account. For + proper @{inference resolution} we require @{text "B"} to be atomic, + and explicitly observe the structure @{text "\\<^vec>x. \<^vec>H + \<^vec>x \ B' \<^vec>x"} of the premise of the second rule. The + idea is to adapt the first rule by ``lifting'' it into this context, + by means of iterated application of the following inferences: + \[ + \infer[(@{inference_def imp_lift})]{@{text "(\<^vec>H \ \<^vec>A) \ (\<^vec>H \ B)"}}{@{text "\<^vec>A \ B"}} + \] + \[ + \infer[(@{inference_def all_lift})]{@{text "(\\<^vec>x. \<^vec>A (?\<^vec>a \<^vec>x)) \ (\\<^vec>x. B (?\<^vec>a \<^vec>x))"}}{@{text "\<^vec>A ?\<^vec>a \ B ?\<^vec>a"}} + \] + By combining raw composition with lifting, we get full @{inference + resolution} as follows: + \[ + \infer[(@{inference_def resolution})] + {@{text "(\\<^vec>x. \<^vec>H \<^vec>x \ \<^vec>A (?\<^vec>a \<^vec>x))\ \ C\"}} + {\begin{tabular}{l} + @{text "\<^vec>A ?\<^vec>a \ B ?\<^vec>a"} \\ + @{text "(\\<^vec>x. \<^vec>H \<^vec>x \ B' \<^vec>x) \ C"} \\ + @{text "(\\<^vec>x. B (?\<^vec>a \<^vec>x))\ = B'\"} \\ + \end{tabular}} + \] + + Continued resolution of rules allows to back-chain a problem towards + more and sub-problems. Branches are closed either by resolving with + a rule of 0 premises, or by producing a ``short-circuit'' within a + solved situation (again modulo unification): + \[ + \infer[(@{inference_def assumption})]{@{text "C\"}} + {@{text "(\\<^vec>x. \<^vec>H \<^vec>x \ A \<^vec>x) \ C"} & @{text "A\ = H\<^sub>i\"}~~\text{(for some~@{text i})}} + \] + + FIXME @{inference_def elim_resolution}, @{inference_def dest_resolution} +*} + +text %mlref {* + \begin{mldecls} + @{index_ML "op RS": "thm * thm -> thm"} \\ + @{index_ML "op OF": "thm * thm list -> thm"} \\ + \end{mldecls} + + \begin{description} + + \item @{text "rule\<^sub>1 RS rule\<^sub>2"} resolves @{text + "rule\<^sub>1"} with @{text "rule\<^sub>2"} according to the + @{inference resolution} principle explained above. Note that the + corresponding attribute in the Isar language is called @{attribute + THEN}. + + \item @{text "rule OF rules"} resolves a list of rules with the + first rule, addressing its premises @{text "1, \, length rules"} + (operating from last to first). This means the newly emerging + premises are all concatenated, without interfering. Also note that + compared to @{text "RS"}, the rule argument order is swapped: @{text + "rule\<^sub>1 RS rule\<^sub>2 = rule\<^sub>2 OF [rule\<^sub>1]"}. + + \end{description} +*} + +end diff -r ecd6f0ca62ea -r 44832d503659 doc-src/IsarImplementation/Thy/ML.thy --- a/doc-src/IsarImplementation/Thy/ML.thy Wed Mar 04 17:12:23 2009 -0800 +++ b/doc-src/IsarImplementation/Thy/ML.thy Thu Mar 05 02:32:46 2009 +0100 @@ -631,4 +631,4 @@ Most table functions correspond to those of association lists. *} -end +end \ No newline at end of file diff -r ecd6f0ca62ea -r 44832d503659 doc-src/IsarImplementation/Thy/Prelim.thy --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc-src/IsarImplementation/Thy/Prelim.thy Thu Mar 05 02:32:46 2009 +0100 @@ -0,0 +1,764 @@ +theory Prelim +imports Base +begin + +chapter {* Preliminaries *} + +section {* Contexts \label{sec:context} *} + +text {* + A logical context represents the background that is required for + formulating statements and composing proofs. It acts as a medium to + produce formal content, depending on earlier material (declarations, + results etc.). + + For example, derivations within the Isabelle/Pure logic can be + described as a judgment @{text "\ \\<^sub>\ \"}, which means that a + proposition @{text "\"} is derivable from hypotheses @{text "\"} + within the theory @{text "\"}. There are logical reasons for + keeping @{text "\"} and @{text "\"} separate: theories can be + liberal about supporting type constructors and schematic + polymorphism of constants and axioms, while the inner calculus of + @{text "\ \ \"} is strictly limited to Simple Type Theory (with + fixed type variables in the assumptions). + + \medskip Contexts and derivations are linked by the following key + principles: + + \begin{itemize} + + \item Transfer: monotonicity of derivations admits results to be + transferred into a \emph{larger} context, i.e.\ @{text "\ \\<^sub>\ + \"} implies @{text "\' \\<^sub>\\<^sub>' \"} for contexts @{text "\' + \ \"} and @{text "\' \ \"}. + + \item Export: discharge of hypotheses admits results to be exported + into a \emph{smaller} context, i.e.\ @{text "\' \\<^sub>\ \"} + implies @{text "\ \\<^sub>\ \ \ \"} where @{text "\' \ \"} and + @{text "\ = \' - \"}. Note that @{text "\"} remains unchanged here, + only the @{text "\"} part is affected. + + \end{itemize} + + \medskip By modeling the main characteristics of the primitive + @{text "\"} and @{text "\"} above, and abstracting over any + particular logical content, we arrive at the fundamental notions of + \emph{theory context} and \emph{proof context} in Isabelle/Isar. + These implement a certain policy to manage arbitrary \emph{context + data}. There is a strongly-typed mechanism to declare new kinds of + data at compile time. + + The internal bootstrap process of Isabelle/Pure eventually reaches a + stage where certain data slots provide the logical content of @{text + "\"} and @{text "\"} sketched above, but this does not stop there! + Various additional data slots support all kinds of mechanisms that + are not necessarily part of the core logic. + + For example, there would be data for canonical introduction and + elimination rules for arbitrary operators (depending on the + object-logic and application), which enables users to perform + standard proof steps implicitly (cf.\ the @{text "rule"} method + \cite{isabelle-isar-ref}). + + \medskip Thus Isabelle/Isar is able to bring forth more and more + concepts successively. In particular, an object-logic like + Isabelle/HOL continues the Isabelle/Pure setup by adding specific + components for automated reasoning (classical reasoner, tableau + prover, structured induction etc.) and derived specification + mechanisms (inductive predicates, recursive functions etc.). All of + this is ultimately based on the generic data management by theory + and proof contexts introduced here. +*} + + +subsection {* Theory context \label{sec:context-theory} *} + +text {* + A \emph{theory} is a data container with explicit name and unique + identifier. Theories are related by a (nominal) sub-theory + relation, which corresponds to the dependency graph of the original + construction; each theory is derived from a certain sub-graph of + ancestor theories. + + The @{text "merge"} operation produces the least upper bound of two + theories, which actually degenerates into absorption of one theory + into the other (due to the nominal sub-theory relation). + + The @{text "begin"} operation starts a new theory by importing + several parent theories and entering a special @{text "draft"} mode, + which is sustained until the final @{text "end"} operation. A draft + theory acts like a linear type, where updates invalidate earlier + versions. An invalidated draft is called ``stale''. + + The @{text "checkpoint"} operation produces an intermediate stepping + stone that will survive the next update: both the original and the + changed theory remain valid and are related by the sub-theory + relation. Checkpointing essentially recovers purely functional + theory values, at the expense of some extra internal bookkeeping. + + The @{text "copy"} operation produces an auxiliary version that has + the same data content, but is unrelated to the original: updates of + the copy do not affect the original, neither does the sub-theory + relation hold. + + \medskip The example in \figref{fig:ex-theory} below shows a theory + graph derived from @{text "Pure"}, with theory @{text "Length"} + importing @{text "Nat"} and @{text "List"}. The body of @{text + "Length"} consists of a sequence of updates, working mostly on + drafts. Intermediate checkpoints may occur as well, due to the + history mechanism provided by the Isar top-level, cf.\ + \secref{sec:isar-toplevel}. + + \begin{figure}[htb] + \begin{center} + \begin{tabular}{rcccl} + & & @{text "Pure"} \\ + & & @{text "\"} \\ + & & @{text "FOL"} \\ + & $\swarrow$ & & $\searrow$ & \\ + @{text "Nat"} & & & & @{text "List"} \\ + & $\searrow$ & & $\swarrow$ \\ + & & @{text "Length"} \\ + & & \multicolumn{3}{l}{~~@{keyword "imports"}} \\ + & & \multicolumn{3}{l}{~~@{keyword "begin"}} \\ + & & $\vdots$~~ \\ + & & @{text "\"}~~ \\ + & & $\vdots$~~ \\ + & & @{text "\"}~~ \\ + & & $\vdots$~~ \\ + & & \multicolumn{3}{l}{~~@{command "end"}} \\ + \end{tabular} + \caption{A theory definition depending on ancestors}\label{fig:ex-theory} + \end{center} + \end{figure} + + \medskip There is a separate notion of \emph{theory reference} for + maintaining a live link to an evolving theory context: updates on + drafts are propagated automatically. Dynamic updating stops after + an explicit @{text "end"} only. + + Derived entities may store a theory reference in order to indicate + the context they belong to. This implicitly assumes monotonic + reasoning, because the referenced context may become larger without + further notice. +*} + +text %mlref {* + \begin{mldecls} + @{index_ML_type theory} \\ + @{index_ML Theory.subthy: "theory * theory -> bool"} \\ + @{index_ML Theory.merge: "theory * theory -> theory"} \\ + @{index_ML Theory.checkpoint: "theory -> theory"} \\ + @{index_ML Theory.copy: "theory -> theory"} \\ + \end{mldecls} + \begin{mldecls} + @{index_ML_type theory_ref} \\ + @{index_ML Theory.deref: "theory_ref -> theory"} \\ + @{index_ML Theory.check_thy: "theory -> theory_ref"} \\ + \end{mldecls} + + \begin{description} + + \item @{ML_type theory} represents theory contexts. This is + essentially a linear type! Most operations destroy the original + version, which then becomes ``stale''. + + \item @{ML "Theory.subthy"}~@{text "(thy\<^sub>1, thy\<^sub>2)"} + compares theories according to the inherent graph structure of the + construction. This sub-theory relation is a nominal approximation + of inclusion (@{text "\"}) of the corresponding content. + + \item @{ML "Theory.merge"}~@{text "(thy\<^sub>1, thy\<^sub>2)"} + absorbs one theory into the other. This fails for unrelated + theories! + + \item @{ML "Theory.checkpoint"}~@{text "thy"} produces a safe + stepping stone in the linear development of @{text "thy"}. The next + update will result in two related, valid theories. + + \item @{ML "Theory.copy"}~@{text "thy"} produces a variant of @{text + "thy"} that holds a copy of the same data. The result is not + related to the original; the original is unchanged. + + \item @{ML_type theory_ref} represents a sliding reference to an + always valid theory; updates on the original are propagated + automatically. + + \item @{ML "Theory.deref"}~@{text "thy_ref"} turns a @{ML_type + "theory_ref"} into an @{ML_type "theory"} value. As the referenced + theory evolves monotonically over time, later invocations of @{ML + "Theory.deref"} may refer to a larger context. + + \item @{ML "Theory.check_thy"}~@{text "thy"} produces a @{ML_type + "theory_ref"} from a valid @{ML_type "theory"} value. + + \end{description} +*} + + +subsection {* Proof context \label{sec:context-proof} *} + +text {* + A proof context is a container for pure data with a back-reference + to the theory it belongs to. The @{text "init"} operation creates a + proof context from a given theory. Modifications to draft theories + are propagated to the proof context as usual, but there is also an + explicit @{text "transfer"} operation to force resynchronization + with more substantial updates to the underlying theory. The actual + context data does not require any special bookkeeping, thanks to the + lack of destructive features. + + Entities derived in a proof context need to record inherent logical + requirements explicitly, since there is no separate context + identification as for theories. For example, hypotheses used in + primitive derivations (cf.\ \secref{sec:thms}) are recorded + separately within the sequent @{text "\ \ \"}, just to make double + sure. Results could still leak into an alien proof context due to + programming errors, but Isabelle/Isar includes some extra validity + checks in critical positions, notably at the end of a sub-proof. + + Proof contexts may be manipulated arbitrarily, although the common + discipline is to follow block structure as a mental model: a given + context is extended consecutively, and results are exported back + into the original context. Note that the Isar proof states model + block-structured reasoning explicitly, using a stack of proof + contexts internally. +*} + +text %mlref {* + \begin{mldecls} + @{index_ML_type Proof.context} \\ + @{index_ML ProofContext.init: "theory -> Proof.context"} \\ + @{index_ML ProofContext.theory_of: "Proof.context -> theory"} \\ + @{index_ML ProofContext.transfer: "theory -> Proof.context -> Proof.context"} \\ + \end{mldecls} + + \begin{description} + + \item @{ML_type Proof.context} represents proof contexts. Elements + of this type are essentially pure values, with a sliding reference + to the background theory. + + \item @{ML ProofContext.init}~@{text "thy"} produces a proof context + derived from @{text "thy"}, initializing all data. + + \item @{ML ProofContext.theory_of}~@{text "ctxt"} selects the + background theory from @{text "ctxt"}, dereferencing its internal + @{ML_type theory_ref}. + + \item @{ML ProofContext.transfer}~@{text "thy ctxt"} promotes the + background theory of @{text "ctxt"} to the super theory @{text + "thy"}. + + \end{description} +*} + + +subsection {* Generic contexts \label{sec:generic-context} *} + +text {* + A generic context is the disjoint sum of either a theory or proof + context. Occasionally, this enables uniform treatment of generic + context data, typically extra-logical information. Operations on + generic contexts include the usual injections, partial selections, + and combinators for lifting operations on either component of the + disjoint sum. + + Moreover, there are total operations @{text "theory_of"} and @{text + "proof_of"} to convert a generic context into either kind: a theory + can always be selected from the sum, while a proof context might + have to be constructed by an ad-hoc @{text "init"} operation. +*} + +text %mlref {* + \begin{mldecls} + @{index_ML_type Context.generic} \\ + @{index_ML Context.theory_of: "Context.generic -> theory"} \\ + @{index_ML Context.proof_of: "Context.generic -> Proof.context"} \\ + \end{mldecls} + + \begin{description} + + \item @{ML_type Context.generic} is the direct sum of @{ML_type + "theory"} and @{ML_type "Proof.context"}, with the datatype + constructors @{ML "Context.Theory"} and @{ML "Context.Proof"}. + + \item @{ML Context.theory_of}~@{text "context"} always produces a + theory from the generic @{text "context"}, using @{ML + "ProofContext.theory_of"} as required. + + \item @{ML Context.proof_of}~@{text "context"} always produces a + proof context from the generic @{text "context"}, using @{ML + "ProofContext.init"} as required (note that this re-initializes the + context data with each invocation). + + \end{description} +*} + + +subsection {* Context data \label{sec:context-data} *} + +text {* + The main purpose of theory and proof contexts is to manage arbitrary + data. New data types can be declared incrementally at compile time. + There are separate declaration mechanisms for any of the three kinds + of contexts: theory, proof, generic. + + \paragraph{Theory data} may refer to destructive entities, which are + maintained in direct correspondence to the linear evolution of + theory values, including explicit copies.\footnote{Most existing + instances of destructive theory data are merely historical relics + (e.g.\ the destructive theorem storage, and destructive hints for + the Simplifier and Classical rules).} A theory data declaration + needs to implement the following SML signature: + + \medskip + \begin{tabular}{ll} + @{text "\ T"} & representing type \\ + @{text "\ empty: T"} & empty default value \\ + @{text "\ copy: T \ T"} & refresh impure data \\ + @{text "\ extend: T \ T"} & re-initialize on import \\ + @{text "\ merge: T \ T \ T"} & join on import \\ + \end{tabular} + \medskip + + \noindent The @{text "empty"} value acts as initial default for + \emph{any} theory that does not declare actual data content; @{text + "copy"} maintains persistent integrity for impure data, it is just + the identity for pure values; @{text "extend"} is acts like a + unitary version of @{text "merge"}, both operations should also + include the functionality of @{text "copy"} for impure data. + + \paragraph{Proof context data} is purely functional. A declaration + needs to implement the following SML signature: + + \medskip + \begin{tabular}{ll} + @{text "\ T"} & representing type \\ + @{text "\ init: theory \ T"} & produce initial value \\ + \end{tabular} + \medskip + + \noindent The @{text "init"} operation is supposed to produce a pure + value from the given background theory. + + \paragraph{Generic data} provides a hybrid interface for both theory + and proof data. The declaration is essentially the same as for + (pure) theory data, without @{text "copy"}. The @{text "init"} + operation for proof contexts merely selects the current data value + from the background theory. + + \bigskip A data declaration of type @{text "T"} results in the + following interface: + + \medskip + \begin{tabular}{ll} + @{text "init: theory \ T"} \\ + @{text "get: context \ T"} \\ + @{text "put: T \ context \ context"} \\ + @{text "map: (T \ T) \ context \ context"} \\ + \end{tabular} + \medskip + + \noindent Here @{text "init"} is only applicable to impure theory + data to install a fresh copy persistently (destructive update on + uninitialized has no permanent effect). The other operations provide + access for the particular kind of context (theory, proof, or generic + context). Note that this is a safe interface: there is no other way + to access the corresponding data slot of a context. By keeping + these operations private, a component may maintain abstract values + authentically, without other components interfering. +*} + +text %mlref {* + \begin{mldecls} + @{index_ML_functor TheoryDataFun} \\ + @{index_ML_functor ProofDataFun} \\ + @{index_ML_functor GenericDataFun} \\ + \end{mldecls} + + \begin{description} + + \item @{ML_functor TheoryDataFun}@{text "(spec)"} declares data for + type @{ML_type theory} according to the specification provided as + argument structure. The resulting structure provides data init and + access operations as described above. + + \item @{ML_functor ProofDataFun}@{text "(spec)"} is analogous to + @{ML_functor TheoryDataFun} for type @{ML_type Proof.context}. + + \item @{ML_functor GenericDataFun}@{text "(spec)"} is analogous to + @{ML_functor TheoryDataFun} for type @{ML_type Context.generic}. + + \end{description} +*} + + +section {* Names \label{sec:names} *} + +text {* + In principle, a name is just a string, but there are various + convention for encoding additional structure. For example, ``@{text + "Foo.bar.baz"}'' is considered as a qualified name consisting of + three basic name components. The individual constituents of a name + may have further substructure, e.g.\ the string + ``\verb,\,\verb,,'' encodes as a single symbol. +*} + + +subsection {* Strings of symbols *} + +text {* + A \emph{symbol} constitutes the smallest textual unit in Isabelle + --- raw characters are normally not encountered at all. Isabelle + strings consist of a sequence of symbols, represented as a packed + string or a list of strings. Each symbol is in itself a small + string, which has either one of the following forms: + + \begin{enumerate} + + \item a single ASCII character ``@{text "c"}'', for example + ``\verb,a,'', + + \item a regular symbol ``\verb,\,\verb,<,@{text "ident"}\verb,>,'', + for example ``\verb,\,\verb,,'', + + \item a control symbol ``\verb,\,\verb,<^,@{text "ident"}\verb,>,'', + for example ``\verb,\,\verb,<^bold>,'', + + \item a raw symbol ``\verb,\,\verb,<^raw:,@{text text}\verb,>,'' + where @{text text} constists of printable characters excluding + ``\verb,.,'' and ``\verb,>,'', for example + ``\verb,\,\verb,<^raw:$\sum_{i = 1}^n$>,'', + + \item a numbered raw control symbol ``\verb,\,\verb,<^raw,@{text + n}\verb,>, where @{text n} consists of digits, for example + ``\verb,\,\verb,<^raw42>,''. + + \end{enumerate} + + \noindent The @{text "ident"} syntax for symbol names is @{text + "letter (letter | digit)\<^sup>*"}, where @{text "letter = + A..Za..z"} and @{text "digit = 0..9"}. There are infinitely many + regular symbols and control symbols, but a fixed collection of + standard symbols is treated specifically. For example, + ``\verb,\,\verb,,'' is classified as a letter, which means it + may occur within regular Isabelle identifiers. + + Since the character set underlying Isabelle symbols is 7-bit ASCII + and 8-bit characters are passed through transparently, Isabelle may + also process Unicode/UCS data in UTF-8 encoding. Unicode provides + its own collection of mathematical symbols, but there is no built-in + link to the standard collection of Isabelle. + + \medskip Output of Isabelle symbols depends on the print mode + (\secref{print-mode}). For example, the standard {\LaTeX} setup of + the Isabelle document preparation system would present + ``\verb,\,\verb,,'' as @{text "\"}, and + ``\verb,\,\verb,<^bold>,\verb,\,\verb,,'' as @{text + "\<^bold>\"}. +*} + +text %mlref {* + \begin{mldecls} + @{index_ML_type "Symbol.symbol"} \\ + @{index_ML Symbol.explode: "string -> Symbol.symbol list"} \\ + @{index_ML Symbol.is_letter: "Symbol.symbol -> bool"} \\ + @{index_ML Symbol.is_digit: "Symbol.symbol -> bool"} \\ + @{index_ML Symbol.is_quasi: "Symbol.symbol -> bool"} \\ + @{index_ML Symbol.is_blank: "Symbol.symbol -> bool"} \\ + \end{mldecls} + \begin{mldecls} + @{index_ML_type "Symbol.sym"} \\ + @{index_ML Symbol.decode: "Symbol.symbol -> Symbol.sym"} \\ + \end{mldecls} + + \begin{description} + + \item @{ML_type "Symbol.symbol"} represents individual Isabelle + symbols; this is an alias for @{ML_type "string"}. + + \item @{ML "Symbol.explode"}~@{text "str"} produces a symbol list + from the packed form. This function supercedes @{ML + "String.explode"} for virtually all purposes of manipulating text in + Isabelle! + + \item @{ML "Symbol.is_letter"}, @{ML "Symbol.is_digit"}, @{ML + "Symbol.is_quasi"}, @{ML "Symbol.is_blank"} classify standard + symbols according to fixed syntactic conventions of Isabelle, cf.\ + \cite{isabelle-isar-ref}. + + \item @{ML_type "Symbol.sym"} is a concrete datatype that represents + the different kinds of symbols explicitly, with constructors @{ML + "Symbol.Char"}, @{ML "Symbol.Sym"}, @{ML "Symbol.Ctrl"}, @{ML + "Symbol.Raw"}. + + \item @{ML "Symbol.decode"} converts the string representation of a + symbol into the datatype version. + + \end{description} +*} + + +subsection {* Basic names \label{sec:basic-names} *} + +text {* + A \emph{basic name} essentially consists of a single Isabelle + identifier. There are conventions to mark separate classes of basic + names, by attaching a suffix of underscores: one underscore means + \emph{internal name}, two underscores means \emph{Skolem name}, + three underscores means \emph{internal Skolem name}. + + For example, the basic name @{text "foo"} has the internal version + @{text "foo_"}, with Skolem versions @{text "foo__"} and @{text + "foo___"}, respectively. + + These special versions provide copies of the basic name space, apart + from anything that normally appears in the user text. For example, + system generated variables in Isar proof contexts are usually marked + as internal, which prevents mysterious name references like @{text + "xaa"} to appear in the text. + + \medskip Manipulating binding scopes often requires on-the-fly + renamings. A \emph{name context} contains a collection of already + used names. The @{text "declare"} operation adds names to the + context. + + The @{text "invents"} operation derives a number of fresh names from + a given starting point. For example, the first three names derived + from @{text "a"} are @{text "a"}, @{text "b"}, @{text "c"}. + + The @{text "variants"} operation produces fresh names by + incrementing tentative names as base-26 numbers (with digits @{text + "a..z"}) until all clashes are resolved. For example, name @{text + "foo"} results in variants @{text "fooa"}, @{text "foob"}, @{text + "fooc"}, \dots, @{text "fooaa"}, @{text "fooab"} etc.; each renaming + step picks the next unused variant from this sequence. +*} + +text %mlref {* + \begin{mldecls} + @{index_ML Name.internal: "string -> string"} \\ + @{index_ML Name.skolem: "string -> string"} \\ + \end{mldecls} + \begin{mldecls} + @{index_ML_type Name.context} \\ + @{index_ML Name.context: Name.context} \\ + @{index_ML Name.declare: "string -> Name.context -> Name.context"} \\ + @{index_ML Name.invents: "Name.context -> string -> int -> string list"} \\ + @{index_ML Name.variants: "string list -> Name.context -> string list * Name.context"} \\ + \end{mldecls} + + \begin{description} + + \item @{ML Name.internal}~@{text "name"} produces an internal name + by adding one underscore. + + \item @{ML Name.skolem}~@{text "name"} produces a Skolem name by + adding two underscores. + + \item @{ML_type Name.context} represents the context of already used + names; the initial value is @{ML "Name.context"}. + + \item @{ML Name.declare}~@{text "name"} enters a used name into the + context. + + \item @{ML Name.invents}~@{text "context name n"} produces @{text + "n"} fresh names derived from @{text "name"}. + + \item @{ML Name.variants}~@{text "names context"} produces fresh + variants of @{text "names"}; the result is entered into the context. + + \end{description} +*} + + +subsection {* Indexed names *} + +text {* + An \emph{indexed name} (or @{text "indexname"}) is a pair of a basic + name and a natural number. This representation allows efficient + renaming by incrementing the second component only. The canonical + way to rename two collections of indexnames apart from each other is + this: determine the maximum index @{text "maxidx"} of the first + collection, then increment all indexes of the second collection by + @{text "maxidx + 1"}; the maximum index of an empty collection is + @{text "-1"}. + + Occasionally, basic names and indexed names are injected into the + same pair type: the (improper) indexname @{text "(x, -1)"} is used + to encode basic names. + + \medskip Isabelle syntax observes the following rules for + representing an indexname @{text "(x, i)"} as a packed string: + + \begin{itemize} + + \item @{text "?x"} if @{text "x"} does not end with a digit and @{text "i = 0"}, + + \item @{text "?xi"} if @{text "x"} does not end with a digit, + + \item @{text "?x.i"} otherwise. + + \end{itemize} + + Indexnames may acquire large index numbers over time. Results are + normalized towards @{text "0"} at certain checkpoints, notably at + the end of a proof. This works by producing variants of the + corresponding basic name components. For example, the collection + @{text "?x1, ?x7, ?x42"} becomes @{text "?x, ?xa, ?xb"}. +*} + +text %mlref {* + \begin{mldecls} + @{index_ML_type indexname} \\ + \end{mldecls} + + \begin{description} + + \item @{ML_type indexname} represents indexed names. This is an + abbreviation for @{ML_type "string * int"}. The second component is + usually non-negative, except for situations where @{text "(x, -1)"} + is used to embed basic names into this type. + + \end{description} +*} + + +subsection {* Qualified names and name spaces *} + +text {* + A \emph{qualified name} consists of a non-empty sequence of basic + name components. The packed representation uses a dot as separator, + as in ``@{text "A.b.c"}''. The last component is called \emph{base} + name, the remaining prefix \emph{qualifier} (which may be empty). + The idea of qualified names is to encode nested structures by + recording the access paths as qualifiers. For example, an item + named ``@{text "A.b.c"}'' may be understood as a local entity @{text + "c"}, within a local structure @{text "b"}, within a global + structure @{text "A"}. Typically, name space hierarchies consist of + 1--2 levels of qualification, but this need not be always so. + + The empty name is commonly used as an indication of unnamed + entities, whenever this makes any sense. The basic operations on + qualified names are smart enough to pass through such improper names + unchanged. + + \medskip A @{text "naming"} policy tells how to turn a name + specification into a fully qualified internal name (by the @{text + "full"} operation), and how fully qualified names may be accessed + externally. For example, the default naming policy is to prefix an + implicit path: @{text "full x"} produces @{text "path.x"}, and the + standard accesses for @{text "path.x"} include both @{text "x"} and + @{text "path.x"}. Normally, the naming is implicit in the theory or + proof context; there are separate versions of the corresponding. + + \medskip A @{text "name space"} manages a collection of fully + internalized names, together with a mapping between external names + and internal names (in both directions). The corresponding @{text + "intern"} and @{text "extern"} operations are mostly used for + parsing and printing only! The @{text "declare"} operation augments + a name space according to the accesses determined by the naming + policy. + + \medskip As a general principle, there is a separate name space for + each kind of formal entity, e.g.\ logical constant, type + constructor, type class, theorem. It is usually clear from the + occurrence in concrete syntax (or from the scope) which kind of + entity a name refers to. For example, the very same name @{text + "c"} may be used uniformly for a constant, type constructor, and + type class. + + There are common schemes to name theorems systematically, according + to the name of the main logical entity involved, e.g.\ @{text + "c.intro"} for a canonical theorem related to constant @{text "c"}. + This technique of mapping names from one space into another requires + some care in order to avoid conflicts. In particular, theorem names + derived from a type constructor or type class are better suffixed in + addition to the usual qualification, e.g.\ @{text "c_type.intro"} + and @{text "c_class.intro"} for theorems related to type @{text "c"} + and class @{text "c"}, respectively. +*} + +text %mlref {* + \begin{mldecls} + @{index_ML NameSpace.base: "string -> string"} \\ + @{index_ML NameSpace.qualifier: "string -> string"} \\ + @{index_ML NameSpace.append: "string -> string -> string"} \\ + @{index_ML NameSpace.implode: "string list -> string"} \\ + @{index_ML NameSpace.explode: "string -> string list"} \\ + \end{mldecls} + \begin{mldecls} + @{index_ML_type NameSpace.naming} \\ + @{index_ML NameSpace.default_naming: NameSpace.naming} \\ + @{index_ML NameSpace.add_path: "string -> NameSpace.naming -> NameSpace.naming"} \\ + @{index_ML NameSpace.full_name: "NameSpace.naming -> binding -> string"} \\ + \end{mldecls} + \begin{mldecls} + @{index_ML_type NameSpace.T} \\ + @{index_ML NameSpace.empty: NameSpace.T} \\ + @{index_ML NameSpace.merge: "NameSpace.T * NameSpace.T -> NameSpace.T"} \\ + @{index_ML NameSpace.declare: "NameSpace.naming -> binding -> NameSpace.T -> string * NameSpace.T"} \\ + @{index_ML NameSpace.intern: "NameSpace.T -> string -> string"} \\ + @{index_ML NameSpace.extern: "NameSpace.T -> string -> string"} \\ + \end{mldecls} + + \begin{description} + + \item @{ML NameSpace.base}~@{text "name"} returns the base name of a + qualified name. + + \item @{ML NameSpace.qualifier}~@{text "name"} returns the qualifier + of a qualified name. + + \item @{ML NameSpace.append}~@{text "name\<^isub>1 name\<^isub>2"} + appends two qualified names. + + \item @{ML NameSpace.implode}~@{text "name"} and @{ML + NameSpace.explode}~@{text "names"} convert between the packed string + representation and the explicit list form of qualified names. + + \item @{ML_type NameSpace.naming} represents the abstract concept of + a naming policy. + + \item @{ML NameSpace.default_naming} is the default naming policy. + In a theory context, this is usually augmented by a path prefix + consisting of the theory name. + + \item @{ML NameSpace.add_path}~@{text "path naming"} augments the + naming policy by extending its path component. + + \item @{ML NameSpace.full_name}@{text "naming binding"} turns a name + binding (usually a basic name) into the fully qualified + internal name, according to the given naming policy. + + \item @{ML_type NameSpace.T} represents name spaces. + + \item @{ML NameSpace.empty} and @{ML NameSpace.merge}~@{text + "(space\<^isub>1, space\<^isub>2)"} are the canonical operations for + maintaining name spaces according to theory data management + (\secref{sec:context-data}). + + \item @{ML NameSpace.declare}~@{text "naming bindings space"} enters a + name binding as fully qualified internal name into the name space, + with external accesses determined by the naming policy. + + \item @{ML NameSpace.intern}~@{text "space name"} internalizes a + (partially qualified) external name. + + This operation is mostly for parsing! Note that fully qualified + names stemming from declarations are produced via @{ML + "NameSpace.full_name"} and @{ML "NameSpace.declare"} + (or their derivatives for @{ML_type theory} and + @{ML_type Proof.context}). + + \item @{ML NameSpace.extern}~@{text "space name"} externalizes a + (fully qualified) internal name. + + This operation is mostly for printing! Note unqualified names are + produced via @{ML NameSpace.base}. + + \end{description} +*} + +end diff -r ecd6f0ca62ea -r 44832d503659 doc-src/IsarImplementation/Thy/Proof.thy --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc-src/IsarImplementation/Thy/Proof.thy Thu Mar 05 02:32:46 2009 +0100 @@ -0,0 +1,330 @@ +theory Proof +imports Base +begin + +chapter {* Structured proofs *} + +section {* Variables \label{sec:variables} *} + +text {* + Any variable that is not explicitly bound by @{text "\"}-abstraction + is considered as ``free''. Logically, free variables act like + outermost universal quantification at the sequent level: @{text + "A\<^isub>1(x), \, A\<^isub>n(x) \ B(x)"} means that the result + holds \emph{for all} values of @{text "x"}. Free variables for + terms (not types) can be fully internalized into the logic: @{text + "\ B(x)"} and @{text "\ \x. B(x)"} are interchangeable, provided + that @{text "x"} does not occur elsewhere in the context. + Inspecting @{text "\ \x. B(x)"} more closely, we see that inside the + quantifier, @{text "x"} is essentially ``arbitrary, but fixed'', + while from outside it appears as a place-holder for instantiation + (thanks to @{text "\"} elimination). + + The Pure logic represents the idea of variables being either inside + or outside the current scope by providing separate syntactic + categories for \emph{fixed variables} (e.g.\ @{text "x"}) vs.\ + \emph{schematic variables} (e.g.\ @{text "?x"}). Incidently, a + universal result @{text "\ \x. B(x)"} has the HHF normal form @{text + "\ B(?x)"}, which represents its generality nicely without requiring + an explicit quantifier. The same principle works for type + variables: @{text "\ B(?\)"} represents the idea of ``@{text "\ + \\. B(\)"}'' without demanding a truly polymorphic framework. + + \medskip Additional care is required to treat type variables in a + way that facilitates type-inference. In principle, term variables + depend on type variables, which means that type variables would have + to be declared first. For example, a raw type-theoretic framework + would demand the context to be constructed in stages as follows: + @{text "\ = \: type, x: \, a: A(x\<^isub>\)"}. + + We allow a slightly less formalistic mode of operation: term + variables @{text "x"} are fixed without specifying a type yet + (essentially \emph{all} potential occurrences of some instance + @{text "x\<^isub>\"} are fixed); the first occurrence of @{text "x"} + within a specific term assigns its most general type, which is then + maintained consistently in the context. The above example becomes + @{text "\ = x: term, \: type, A(x\<^isub>\)"}, where type @{text + "\"} is fixed \emph{after} term @{text "x"}, and the constraint + @{text "x :: \"} is an implicit consequence of the occurrence of + @{text "x\<^isub>\"} in the subsequent proposition. + + This twist of dependencies is also accommodated by the reverse + operation of exporting results from a context: a type variable + @{text "\"} is considered fixed as long as it occurs in some fixed + term variable of the context. For example, exporting @{text "x: + term, \: type \ x\<^isub>\ = x\<^isub>\"} produces in the first step + @{text "x: term \ x\<^isub>\ = x\<^isub>\"} for fixed @{text "\"}, + and only in the second step @{text "\ ?x\<^isub>?\<^isub>\ = + ?x\<^isub>?\<^isub>\"} for schematic @{text "?x"} and @{text "?\"}. + + \medskip The Isabelle/Isar proof context manages the gory details of + term vs.\ type variables, with high-level principles for moving the + frontier between fixed and schematic variables. + + The @{text "add_fixes"} operation explictly declares fixed + variables; the @{text "declare_term"} operation absorbs a term into + a context by fixing new type variables and adding syntactic + constraints. + + The @{text "export"} operation is able to perform the main work of + generalizing term and type variables as sketched above, assuming + that fixing variables and terms have been declared properly. + + There @{text "import"} operation makes a generalized fact a genuine + part of the context, by inventing fixed variables for the schematic + ones. The effect can be reversed by using @{text "export"} later, + potentially with an extended context; the result is equivalent to + the original modulo renaming of schematic variables. + + The @{text "focus"} operation provides a variant of @{text "import"} + for nested propositions (with explicit quantification): @{text + "\x\<^isub>1 \ x\<^isub>n. B(x\<^isub>1, \, x\<^isub>n)"} is + decomposed by inventing fixed variables @{text "x\<^isub>1, \, + x\<^isub>n"} for the body. +*} + +text %mlref {* + \begin{mldecls} + @{index_ML Variable.add_fixes: " + string list -> Proof.context -> string list * Proof.context"} \\ + @{index_ML Variable.variant_fixes: " + string list -> Proof.context -> string list * Proof.context"} \\ + @{index_ML Variable.declare_term: "term -> Proof.context -> Proof.context"} \\ + @{index_ML Variable.declare_constraints: "term -> Proof.context -> Proof.context"} \\ + @{index_ML Variable.export: "Proof.context -> Proof.context -> thm list -> thm list"} \\ + @{index_ML Variable.polymorphic: "Proof.context -> term list -> term list"} \\ + @{index_ML Variable.import_thms: "bool -> thm list -> Proof.context -> + ((ctyp list * cterm list) * thm list) * Proof.context"} \\ + @{index_ML Variable.focus: "cterm -> Proof.context -> (cterm list * cterm) * Proof.context"} \\ + \end{mldecls} + + \begin{description} + + \item @{ML Variable.add_fixes}~@{text "xs ctxt"} fixes term + variables @{text "xs"}, returning the resulting internal names. By + default, the internal representation coincides with the external + one, which also means that the given variables must not be fixed + already. There is a different policy within a local proof body: the + given names are just hints for newly invented Skolem variables. + + \item @{ML Variable.variant_fixes} is similar to @{ML + Variable.add_fixes}, but always produces fresh variants of the given + names. + + \item @{ML Variable.declare_term}~@{text "t ctxt"} declares term + @{text "t"} to belong to the context. This automatically fixes new + type variables, but not term variables. Syntactic constraints for + type and term variables are declared uniformly, though. + + \item @{ML Variable.declare_constraints}~@{text "t ctxt"} declares + syntactic constraints from term @{text "t"}, without making it part + of the context yet. + + \item @{ML Variable.export}~@{text "inner outer thms"} generalizes + fixed type and term variables in @{text "thms"} according to the + difference of the @{text "inner"} and @{text "outer"} context, + following the principles sketched above. + + \item @{ML Variable.polymorphic}~@{text "ctxt ts"} generalizes type + variables in @{text "ts"} as far as possible, even those occurring + in fixed term variables. The default policy of type-inference is to + fix newly introduced type variables, which is essentially reversed + with @{ML Variable.polymorphic}: here the given terms are detached + from the context as far as possible. + + \item @{ML Variable.import_thms}~@{text "open thms ctxt"} invents fixed + type and term variables for the schematic ones occurring in @{text + "thms"}. The @{text "open"} flag indicates whether the fixed names + should be accessible to the user, otherwise newly introduced names + are marked as ``internal'' (\secref{sec:names}). + + \item @{ML Variable.focus}~@{text B} decomposes the outermost @{text + "\"} prefix of proposition @{text "B"}. + + \end{description} +*} + + +section {* Assumptions \label{sec:assumptions} *} + +text {* + An \emph{assumption} is a proposition that it is postulated in the + current context. Local conclusions may use assumptions as + additional facts, but this imposes implicit hypotheses that weaken + the overall statement. + + Assumptions are restricted to fixed non-schematic statements, i.e.\ + all generality needs to be expressed by explicit quantifiers. + Nevertheless, the result will be in HHF normal form with outermost + quantifiers stripped. For example, by assuming @{text "\x :: \. P + x"} we get @{text "\x :: \. P x \ P ?x"} for schematic @{text "?x"} + of fixed type @{text "\"}. Local derivations accumulate more and + more explicit references to hypotheses: @{text "A\<^isub>1, \, + A\<^isub>n \ B"} where @{text "A\<^isub>1, \, A\<^isub>n"} needs to + be covered by the assumptions of the current context. + + \medskip The @{text "add_assms"} operation augments the context by + local assumptions, which are parameterized by an arbitrary @{text + "export"} rule (see below). + + The @{text "export"} operation moves facts from a (larger) inner + context into a (smaller) outer context, by discharging the + difference of the assumptions as specified by the associated export + rules. Note that the discharged portion is determined by the + difference contexts, not the facts being exported! There is a + separate flag to indicate a goal context, where the result is meant + to refine an enclosing sub-goal of a structured proof state. + + \medskip The most basic export rule discharges assumptions directly + by means of the @{text "\"} introduction rule: + \[ + \infer[(@{text "\_intro"})]{@{text "\ \\ A \ A \ B"}}{@{text "\ \ B"}} + \] + + The variant for goal refinements marks the newly introduced + premises, which causes the canonical Isar goal refinement scheme to + enforce unification with local premises within the goal: + \[ + \infer[(@{text "#\_intro"})]{@{text "\ \\ A \ #A \ B"}}{@{text "\ \ B"}} + \] + + \medskip Alternative versions of assumptions may perform arbitrary + transformations on export, as long as the corresponding portion of + hypotheses is removed from the given facts. For example, a local + definition works by fixing @{text "x"} and assuming @{text "x \ t"}, + with the following export rule to reverse the effect: + \[ + \infer[(@{text "\-expand"})]{@{text "\ \\ x \ t \ B t"}}{@{text "\ \ B x"}} + \] + This works, because the assumption @{text "x \ t"} was introduced in + a context with @{text "x"} being fresh, so @{text "x"} does not + occur in @{text "\"} here. +*} + +text %mlref {* + \begin{mldecls} + @{index_ML_type Assumption.export} \\ + @{index_ML Assumption.assume: "cterm -> thm"} \\ + @{index_ML Assumption.add_assms: + "Assumption.export -> + cterm list -> Proof.context -> thm list * Proof.context"} \\ + @{index_ML Assumption.add_assumes: " + cterm list -> Proof.context -> thm list * Proof.context"} \\ + @{index_ML Assumption.export: "bool -> Proof.context -> Proof.context -> thm -> thm"} \\ + \end{mldecls} + + \begin{description} + + \item @{ML_type Assumption.export} represents arbitrary export + rules, which is any function of type @{ML_type "bool -> cterm list -> thm -> thm"}, + where the @{ML_type "bool"} indicates goal mode, and the @{ML_type + "cterm list"} the collection of assumptions to be discharged + simultaneously. + + \item @{ML Assumption.assume}~@{text "A"} turns proposition @{text + "A"} into a raw assumption @{text "A \ A'"}, where the conclusion + @{text "A'"} is in HHF normal form. + + \item @{ML Assumption.add_assms}~@{text "r As"} augments the context + by assumptions @{text "As"} with export rule @{text "r"}. The + resulting facts are hypothetical theorems as produced by the raw + @{ML Assumption.assume}. + + \item @{ML Assumption.add_assumes}~@{text "As"} is a special case of + @{ML Assumption.add_assms} where the export rule performs @{text + "\_intro"} or @{text "#\_intro"}, depending on goal mode. + + \item @{ML Assumption.export}~@{text "is_goal inner outer thm"} + exports result @{text "thm"} from the the @{text "inner"} context + back into the @{text "outer"} one; @{text "is_goal = true"} means + this is a goal context. The result is in HHF normal form. Note + that @{ML "ProofContext.export"} combines @{ML "Variable.export"} + and @{ML "Assumption.export"} in the canonical way. + + \end{description} +*} + + +section {* Results \label{sec:results} *} + +text {* + Local results are established by monotonic reasoning from facts + within a context. This allows common combinations of theorems, + e.g.\ via @{text "\/\"} elimination, resolution rules, or equational + reasoning, see \secref{sec:thms}. Unaccounted context manipulations + should be avoided, notably raw @{text "\/\"} introduction or ad-hoc + references to free variables or assumptions not present in the proof + context. + + \medskip The @{text "SUBPROOF"} combinator allows to structure a + tactical proof recursively by decomposing a selected sub-goal: + @{text "(\x. A(x) \ B(x)) \ \"} is turned into @{text "B(x) \ \"} + after fixing @{text "x"} and assuming @{text "A(x)"}. This means + the tactic needs to solve the conclusion, but may use the premise as + a local fact, for locally fixed variables. + + The @{text "prove"} operation provides an interface for structured + backwards reasoning under program control, with some explicit sanity + checks of the result. The goal context can be augmented by + additional fixed variables (cf.\ \secref{sec:variables}) and + assumptions (cf.\ \secref{sec:assumptions}), which will be available + as local facts during the proof and discharged into implications in + the result. Type and term variables are generalized as usual, + according to the context. + + The @{text "obtain"} operation produces results by eliminating + existing facts by means of a given tactic. This acts like a dual + conclusion: the proof demonstrates that the context may be augmented + by certain fixed variables and assumptions. See also + \cite{isabelle-isar-ref} for the user-level @{text "\"} and + @{text "\"} elements. Final results, which may not refer to + the parameters in the conclusion, need to exported explicitly into + the original context. +*} + +text %mlref {* + \begin{mldecls} + @{index_ML SUBPROOF: + "({context: Proof.context, schematics: ctyp list * cterm list, + params: cterm list, asms: cterm list, concl: cterm, + prems: thm list} -> tactic) -> Proof.context -> int -> tactic"} \\ + \end{mldecls} + \begin{mldecls} + @{index_ML Goal.prove: "Proof.context -> string list -> term list -> term -> + ({prems: thm list, context: Proof.context} -> tactic) -> thm"} \\ + @{index_ML Goal.prove_multi: "Proof.context -> string list -> term list -> term list -> + ({prems: thm list, context: Proof.context} -> tactic) -> thm list"} \\ + \end{mldecls} + \begin{mldecls} + @{index_ML Obtain.result: "(Proof.context -> tactic) -> + thm list -> Proof.context -> (cterm list * thm list) * Proof.context"} \\ + \end{mldecls} + + \begin{description} + + \item @{ML SUBPROOF}~@{text "tac ctxt i"} decomposes the structure + of the specified sub-goal, producing an extended context and a + reduced goal, which needs to be solved by the given tactic. All + schematic parameters of the goal are imported into the context as + fixed ones, which may not be instantiated in the sub-proof. + + \item @{ML Goal.prove}~@{text "ctxt xs As C tac"} states goal @{text + "C"} in the context augmented by fixed variables @{text "xs"} and + assumptions @{text "As"}, and applies tactic @{text "tac"} to solve + it. The latter may depend on the local assumptions being presented + as facts. The result is in HHF normal form. + + \item @{ML Goal.prove_multi} is simular to @{ML Goal.prove}, but + states several conclusions simultaneously. The goal is encoded by + means of Pure conjunction; @{ML Goal.conjunction_tac} will turn this + into a collection of individual subgoals. + + \item @{ML Obtain.result}~@{text "tac thms ctxt"} eliminates the + given facts using a tactic, which results in additional fixed + variables and assumptions in the context. Final results need to be + exported explicitly. + + \end{description} +*} + +end diff -r ecd6f0ca62ea -r 44832d503659 doc-src/IsarImplementation/Thy/Syntax.thy diff -r ecd6f0ca62ea -r 44832d503659 doc-src/IsarImplementation/Thy/Tactic.thy --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc-src/IsarImplementation/Thy/Tactic.thy Thu Mar 05 02:32:46 2009 +0100 @@ -0,0 +1,405 @@ +theory Tactic +imports Base +begin + +chapter {* Tactical reasoning *} + +text {* + Tactical reasoning works by refining the initial claim in a + backwards fashion, until a solved form is reached. A @{text "goal"} + consists of several subgoals that need to be solved in order to + achieve the main statement; zero subgoals means that the proof may + be finished. A @{text "tactic"} is a refinement operation that maps + a goal to a lazy sequence of potential successors. A @{text + "tactical"} is a combinator for composing tactics. +*} + + +section {* Goals \label{sec:tactical-goals} *} + +text {* + Isabelle/Pure represents a goal as a theorem stating that the + subgoals imply the main goal: @{text "A\<^sub>1 \ \ \ A\<^sub>n \ + C"}. The outermost goal structure is that of a Horn Clause: i.e.\ + an iterated implication without any quantifiers\footnote{Recall that + outermost @{text "\x. \[x]"} is always represented via schematic + variables in the body: @{text "\[?x]"}. These variables may get + instantiated during the course of reasoning.}. For @{text "n = 0"} + a goal is called ``solved''. + + The structure of each subgoal @{text "A\<^sub>i"} is that of a + general Hereditary Harrop Formula @{text "\x\<^sub>1 \ + \x\<^sub>k. H\<^sub>1 \ \ \ H\<^sub>m \ B"}. Here @{text + "x\<^sub>1, \, x\<^sub>k"} are goal parameters, i.e.\ + arbitrary-but-fixed entities of certain types, and @{text + "H\<^sub>1, \, H\<^sub>m"} are goal hypotheses, i.e.\ facts that may + be assumed locally. Together, this forms the goal context of the + conclusion @{text B} to be established. The goal hypotheses may be + again arbitrary Hereditary Harrop Formulas, although the level of + nesting rarely exceeds 1--2 in practice. + + The main conclusion @{text C} is internally marked as a protected + proposition, which is represented explicitly by the notation @{text + "#C"}. This ensures that the decomposition into subgoals and main + conclusion is well-defined for arbitrarily structured claims. + + \medskip Basic goal management is performed via the following + Isabelle/Pure rules: + + \[ + \infer[@{text "(init)"}]{@{text "C \ #C"}}{} \qquad + \infer[@{text "(finish)"}]{@{text "C"}}{@{text "#C"}} + \] + + \medskip The following low-level variants admit general reasoning + with protected propositions: + + \[ + \infer[@{text "(protect)"}]{@{text "#C"}}{@{text "C"}} \qquad + \infer[@{text "(conclude)"}]{@{text "A\<^sub>1 \ \ \ A\<^sub>n \ C"}}{@{text "A\<^sub>1 \ \ \ A\<^sub>n \ #C"}} + \] +*} + +text %mlref {* + \begin{mldecls} + @{index_ML Goal.init: "cterm -> thm"} \\ + @{index_ML Goal.finish: "thm -> thm"} \\ + @{index_ML Goal.protect: "thm -> thm"} \\ + @{index_ML Goal.conclude: "thm -> thm"} \\ + \end{mldecls} + + \begin{description} + + \item @{ML "Goal.init"}~@{text C} initializes a tactical goal from + the well-formed proposition @{text C}. + + \item @{ML "Goal.finish"}~@{text "thm"} checks whether theorem + @{text "thm"} is a solved goal (no subgoals), and concludes the + result by removing the goal protection. + + \item @{ML "Goal.protect"}~@{text "thm"} protects the full statement + of theorem @{text "thm"}. + + \item @{ML "Goal.conclude"}~@{text "thm"} removes the goal + protection, even if there are pending subgoals. + + \end{description} +*} + + +section {* Tactics *} + +text {* A @{text "tactic"} is a function @{text "goal \ goal\<^sup>*\<^sup>*"} that + maps a given goal state (represented as a theorem, cf.\ + \secref{sec:tactical-goals}) to a lazy sequence of potential + successor states. The underlying sequence implementation is lazy + both in head and tail, and is purely functional in \emph{not} + supporting memoing.\footnote{The lack of memoing and the strict + nature of SML requires some care when working with low-level + sequence operations, to avoid duplicate or premature evaluation of + results.} + + An \emph{empty result sequence} means that the tactic has failed: in + a compound tactic expressions other tactics might be tried instead, + or the whole refinement step might fail outright, producing a + toplevel error message. When implementing tactics from scratch, one + should take care to observe the basic protocol of mapping regular + error conditions to an empty result; only serious faults should + emerge as exceptions. + + By enumerating \emph{multiple results}, a tactic can easily express + the potential outcome of an internal search process. There are also + combinators for building proof tools that involve search + systematically, see also \secref{sec:tacticals}. + + \medskip As explained in \secref{sec:tactical-goals}, a goal state + essentially consists of a list of subgoals that imply the main goal + (conclusion). Tactics may operate on all subgoals or on a + particularly specified subgoal, but must not change the main + conclusion (apart from instantiating schematic goal variables). + + Tactics with explicit \emph{subgoal addressing} are of the form + @{text "int \ tactic"} and may be applied to a particular subgoal + (counting from 1). If the subgoal number is out of range, the + tactic should fail with an empty result sequence, but must not raise + an exception! + + Operating on a particular subgoal means to replace it by an interval + of zero or more subgoals in the same place; other subgoals must not + be affected, apart from instantiating schematic variables ranging + over the whole goal state. + + A common pattern of composing tactics with subgoal addressing is to + try the first one, and then the second one only if the subgoal has + not been solved yet. Special care is required here to avoid bumping + into unrelated subgoals that happen to come after the original + subgoal. Assuming that there is only a single initial subgoal is a + very common error when implementing tactics! + + Tactics with internal subgoal addressing should expose the subgoal + index as @{text "int"} argument in full generality; a hardwired + subgoal 1 inappropriate. + + \medskip The main well-formedness conditions for proper tactics are + summarized as follows. + + \begin{itemize} + + \item General tactic failure is indicated by an empty result, only + serious faults may produce an exception. + + \item The main conclusion must not be changed, apart from + instantiating schematic variables. + + \item A tactic operates either uniformly on all subgoals, or + specifically on a selected subgoal (without bumping into unrelated + subgoals). + + \item Range errors in subgoal addressing produce an empty result. + + \end{itemize} + + Some of these conditions are checked by higher-level goal + infrastructure (\secref{sec:results}); others are not checked + explicitly, and violating them merely results in ill-behaved tactics + experienced by the user (e.g.\ tactics that insist in being + applicable only to singleton goals, or disallow composition with + basic tacticals). +*} + +text %mlref {* + \begin{mldecls} + @{index_ML_type tactic: "thm -> thm Seq.seq"} \\ + @{index_ML no_tac: tactic} \\ + @{index_ML all_tac: tactic} \\ + @{index_ML print_tac: "string -> tactic"} \\[1ex] + @{index_ML PRIMITIVE: "(thm -> thm) -> tactic"} \\[1ex] + @{index_ML SUBGOAL: "(term * int -> tactic) -> int -> tactic"} \\ + @{index_ML CSUBGOAL: "(cterm * int -> tactic) -> int -> tactic"} \\ + \end{mldecls} + + \begin{description} + + \item @{ML_type tactic} represents tactics. The well-formedness + conditions described above need to be observed. See also @{"file" + "~~/src/Pure/General/seq.ML"} for the underlying implementation of + lazy sequences. + + \item @{ML_type "int -> tactic"} represents tactics with explicit + subgoal addressing, with well-formedness conditions as described + above. + + \item @{ML no_tac} is a tactic that always fails, returning the + empty sequence. + + \item @{ML all_tac} is a tactic that always succeeds, returning a + singleton sequence with unchanged goal state. + + \item @{ML print_tac}~@{text "message"} is like @{ML all_tac}, but + prints a message together with the goal state on the tracing + channel. + + \item @{ML PRIMITIVE}~@{text rule} turns a primitive inference rule + into a tactic with unique result. Exception @{ML THM} is considered + a regular tactic failure and produces an empty result; other + exceptions are passed through. + + \item @{ML SUBGOAL}~@{text "(fn (subgoal, i) => tactic)"} is the + most basic form to produce a tactic with subgoal addressing. The + given abstraction over the subgoal term and subgoal number allows to + peek at the relevant information of the full goal state. The + subgoal range is checked as required above. + + \item @{ML CSUBGOAL} is similar to @{ML SUBGOAL}, but passes the + subgoal as @{ML_type cterm} instead of raw @{ML_type term}. This + avoids expensive re-certification in situations where the subgoal is + used directly for primitive inferences. + + \end{description} +*} + + +subsection {* Resolution and assumption tactics \label{sec:resolve-assume-tac} *} + +text {* \emph{Resolution} is the most basic mechanism for refining a + subgoal using a theorem as object-level rule. + \emph{Elim-resolution} is particularly suited for elimination rules: + it resolves with a rule, proves its first premise by assumption, and + finally deletes that assumption from any new subgoals. + \emph{Destruct-resolution} is like elim-resolution, but the given + destruction rules are first turned into canonical elimination + format. \emph{Forward-resolution} is like destruct-resolution, but + without deleting the selected assumption. The @{text "r/e/d/f"} + naming convention is maintained for several different kinds of + resolution rules and tactics. + + Assumption tactics close a subgoal by unifying some of its premises + against its conclusion. + + \medskip All the tactics in this section operate on a subgoal + designated by a positive integer. Other subgoals might be affected + indirectly, due to instantiation of schematic variables. + + There are various sources of non-determinism, the tactic result + sequence enumerates all possibilities of the following choices (if + applicable): + + \begin{enumerate} + + \item selecting one of the rules given as argument to the tactic; + + \item selecting a subgoal premise to eliminate, unifying it against + the first premise of the rule; + + \item unifying the conclusion of the subgoal to the conclusion of + the rule. + + \end{enumerate} + + Recall that higher-order unification may produce multiple results + that are enumerated here. +*} + +text %mlref {* + \begin{mldecls} + @{index_ML resolve_tac: "thm list -> int -> tactic"} \\ + @{index_ML eresolve_tac: "thm list -> int -> tactic"} \\ + @{index_ML dresolve_tac: "thm list -> int -> tactic"} \\ + @{index_ML forward_tac: "thm list -> int -> tactic"} \\[1ex] + @{index_ML assume_tac: "int -> tactic"} \\ + @{index_ML eq_assume_tac: "int -> tactic"} \\[1ex] + @{index_ML match_tac: "thm list -> int -> tactic"} \\ + @{index_ML ematch_tac: "thm list -> int -> tactic"} \\ + @{index_ML dmatch_tac: "thm list -> int -> tactic"} \\ + \end{mldecls} + + \begin{description} + + \item @{ML resolve_tac}~@{text "thms i"} refines the goal state + using the given theorems, which should normally be introduction + rules. The tactic resolves a rule's conclusion with subgoal @{text + i}, replacing it by the corresponding versions of the rule's + premises. + + \item @{ML eresolve_tac}~@{text "thms i"} performs elim-resolution + with the given theorems, which should normally be elimination rules. + + \item @{ML dresolve_tac}~@{text "thms i"} performs + destruct-resolution with the given theorems, which should normally + be destruction rules. This replaces an assumption by the result of + applying one of the rules. + + \item @{ML forward_tac} is like @{ML dresolve_tac} except that the + selected assumption is not deleted. It applies a rule to an + assumption, adding the result as a new assumption. + + \item @{ML assume_tac}~@{text i} attempts to solve subgoal @{text i} + by assumption (modulo higher-order unification). + + \item @{ML eq_assume_tac} is similar to @{ML assume_tac}, but checks + only for immediate @{text "\"}-convertibility instead of using + unification. It succeeds (with a unique next state) if one of the + assumptions is equal to the subgoal's conclusion. Since it does not + instantiate variables, it cannot make other subgoals unprovable. + + \item @{ML match_tac}, @{ML ematch_tac}, and @{ML dmatch_tac} are + similar to @{ML resolve_tac}, @{ML eresolve_tac}, and @{ML + dresolve_tac}, respectively, but do not instantiate schematic + variables in the goal state. + + Flexible subgoals are not updated at will, but are left alone. + Strictly speaking, matching means to treat the unknowns in the goal + state as constants; these tactics merely discard unifiers that would + update the goal state. + + \end{description} +*} + + +subsection {* Explicit instantiation within a subgoal context *} + +text {* The main resolution tactics (\secref{sec:resolve-assume-tac}) + use higher-order unification, which works well in many practical + situations despite its daunting theoretical properties. + Nonetheless, there are important problem classes where unguided + higher-order unification is not so useful. This typically involves + rules like universal elimination, existential introduction, or + equational substitution. Here the unification problem involves + fully flexible @{text "?P ?x"} schemes, which are hard to manage + without further hints. + + By providing a (small) rigid term for @{text "?x"} explicitly, the + remaining unification problem is to assign a (large) term to @{text + "?P"}, according to the shape of the given subgoal. This is + sufficiently well-behaved in most practical situations. + + \medskip Isabelle provides separate versions of the standard @{text + "r/e/d/f"} resolution tactics that allow to provide explicit + instantiations of unknowns of the given rule, wrt.\ terms that refer + to the implicit context of the selected subgoal. + + An instantiation consists of a list of pairs of the form @{text + "(?x, t)"}, where @{text ?x} is a schematic variable occurring in + the given rule, and @{text t} is a term from the current proof + context, augmented by the local goal parameters of the selected + subgoal; cf.\ the @{text "focus"} operation described in + \secref{sec:variables}. + + Entering the syntactic context of a subgoal is a brittle operation, + because its exact form is somewhat accidental, and the choice of + bound variable names depends on the presence of other local and + global names. Explicit renaming of subgoal parameters prior to + explicit instantiation might help to achieve a bit more robustness. + + Type instantiations may be given as well, via pairs like @{text + "(?'a, \)"}. Type instantiations are distinguished from term + instantiations by the syntactic form of the schematic variable. + Types are instantiated before terms are. Since term instantiation + already performs type-inference as expected, explicit type + instantiations are seldom necessary. +*} + +text %mlref {* + \begin{mldecls} + @{index_ML res_inst_tac: "Proof.context -> (indexname * string) list -> thm -> int -> tactic"} \\ + @{index_ML eres_inst_tac: "Proof.context -> (indexname * string) list -> thm -> int -> tactic"} \\ + @{index_ML dres_inst_tac: "Proof.context -> (indexname * string) list -> thm -> int -> tactic"} \\ + @{index_ML forw_inst_tac: "Proof.context -> (indexname * string) list -> thm -> int -> tactic"} \\[1ex] + @{index_ML rename_tac: "string list -> int -> tactic"} \\ + \end{mldecls} + + \begin{description} + + \item @{ML res_inst_tac}~@{text "ctxt insts thm i"} instantiates the + rule @{text thm} with the instantiations @{text insts}, as described + above, and then performs resolution on subgoal @{text i}. + + \item @{ML eres_inst_tac} is like @{ML res_inst_tac}, but performs + elim-resolution. + + \item @{ML dres_inst_tac} is like @{ML res_inst_tac}, but performs + destruct-resolution. + + \item @{ML forw_inst_tac} is like @{ML dres_inst_tac} except that + the selected assumption is not deleted. + + \item @{ML rename_tac}~@{text "names i"} renames the innermost + parameters of subgoal @{text i} according to the provided @{text + names} (which need to be distinct indentifiers). + + \end{description} +*} + + +section {* Tacticals \label{sec:tacticals} *} + +text {* + A \emph{tactical} is a functional combinator for building up complex + tactics from simpler ones. Typical tactical perform sequential + composition, disjunction (choice), iteration, or goal addressing. + Various search strategies may be expressed via tacticals. + + \medskip FIXME +*} + +end diff -r ecd6f0ca62ea -r 44832d503659 doc-src/IsarImplementation/Thy/document/ML.tex --- a/doc-src/IsarImplementation/Thy/document/ML.tex Wed Mar 04 17:12:23 2009 -0800 +++ b/doc-src/IsarImplementation/Thy/document/ML.tex Thu Mar 05 02:32:46 2009 +0100 @@ -785,7 +785,6 @@ \isadelimtheory % \endisadelimtheory -\isanewline \end{isabellebody}% %%% Local Variables: %%% mode: latex diff -r ecd6f0ca62ea -r 44832d503659 doc-src/IsarRef/Thy/document/Generic.tex --- a/doc-src/IsarRef/Thy/document/Generic.tex Wed Mar 04 17:12:23 2009 -0800 +++ b/doc-src/IsarRef/Thy/document/Generic.tex Thu Mar 05 02:32:46 2009 +0100 @@ -503,7 +503,7 @@ \item \hyperlink{command.simproc-setup}{\mbox{\isa{\isacommand{simproc{\isacharunderscore}setup}}}} defines a named simplification procedure that is invoked by the Simplifier whenever any of the given term patterns match the current redex. The implementation, - which is provided as ML source text, needs to be of type \verb|"morphism -> simpset -> cterm -> thm option"|, where the \verb|cterm| represents the current redex \isa{r} and the result is + which is provided as ML source text, needs to be of type \verb|morphism -> simpset -> cterm -> thm option|, where the \verb|cterm| represents the current redex \isa{r} and the result is supposed to be some proven rewrite rule \isa{{\isachardoublequote}r\ {\isasymequiv}\ r{\isacharprime}{\isachardoublequote}} (or a generalized version), or \verb|NONE| to indicate failure. The \verb|simpset| argument holds the full context of the current