# HG changeset patch # User haftmann # Date 1281703215 -7200 # Node ID 7935b334893e7bb55d1fb286daa11b17597a3e8b # Parent a461dd80f83c1754541ec3435caaade5fdcf26a9 sketch of new outline diff -r a461dd80f83c -r 7935b334893e doc-src/Codegen/Thy/Foundations.thy --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc-src/Codegen/Thy/Foundations.thy Fri Aug 13 14:40:15 2010 +0200 @@ -0,0 +1,432 @@ +theory Foundations +imports Introduction +begin + +section {* Code generation foundations \label{sec:program} *} + +subsection {* The @{text "Isabelle/HOL"} default setup *} + +text {* + We have already seen how by default equations stemming from + @{command definition}, @{command primrec} and @{command fun} + statements are used for code generation. This default behaviour + can be changed, e.g.\ by providing different code equations. + The customisations shown in this section are \emph{safe} + as regards correctness: all programs that can be generated are partially + correct. +*} + +subsection {* Selecting code equations *} + +text {* + Coming back to our introductory example, we + could provide an alternative code equations for @{const dequeue} + explicitly: +*} + +lemma %quote [code]: + "dequeue (AQueue xs []) = + (if xs = [] then (None, AQueue [] []) + else dequeue (AQueue [] (rev xs)))" + "dequeue (AQueue xs (y # ys)) = + (Some y, AQueue xs ys)" + by (cases xs, simp_all) (cases "rev xs", simp_all) + +text {* + \noindent The annotation @{text "[code]"} is an @{text Isar} + @{text attribute} which states that the given theorems should be + considered as code equations for a @{text fun} statement -- + the corresponding constant is determined syntactically. The resulting code: +*} + +text %quote {*@{code_stmts dequeue (consts) dequeue (Haskell)}*} + +text {* + \noindent You may note that the equality test @{term "xs = []"} has been + replaced by the predicate @{term "null xs"}. This is due to the default + setup in the \qn{preprocessor} to be discussed further below (\secref{sec:preproc}). + + Changing the default constructor set of datatypes is also + possible. See \secref{sec:datatypes} for an example. + + As told in \secref{sec:concept}, code generation is based + on a structured collection of code theorems. + This collection + may be inspected using the @{command code_thms} command: +*} + +code_thms %quote dequeue + +text {* + \noindent prints a table with \emph{all} code equations + for @{const dequeue}, including + \emph{all} code equations those equations depend + on recursively. + + Similarly, the @{command code_deps} command shows a graph + visualising dependencies between code equations. +*} + +subsection {* @{text class} and @{text instantiation} *} + +text {* + Concerning type classes and code generation, let us examine an example + from abstract algebra: +*} + +class %quote semigroup = + fixes mult :: "'a \ 'a \ 'a" (infixl "\" 70) + assumes assoc: "(x \ y) \ z = x \ (y \ z)" + +class %quote monoid = semigroup + + fixes neutral :: 'a ("\") + assumes neutl: "\ \ x = x" + and neutr: "x \ \ = x" + +instantiation %quote nat :: monoid +begin + +primrec %quote mult_nat where + "0 \ n = (0\nat)" + | "Suc m \ n = n + m \ n" + +definition %quote neutral_nat where + "\ = Suc 0" + +lemma %quote add_mult_distrib: + fixes n m q :: nat + shows "(n + m) \ q = n \ q + m \ q" + by (induct n) simp_all + +instance %quote proof + fix m n q :: nat + show "m \ n \ q = m \ (n \ q)" + by (induct m) (simp_all add: add_mult_distrib) + show "\ \ n = n" + by (simp add: neutral_nat_def) + show "m \ \ = m" + by (induct m) (simp_all add: neutral_nat_def) +qed + +end %quote + +text {* + \noindent We define the natural operation of the natural numbers + on monoids: +*} + +primrec %quote (in monoid) pow :: "nat \ 'a \ 'a" where + "pow 0 a = \" + | "pow (Suc n) a = a \ pow n a" + +text {* + \noindent This we use to define the discrete exponentiation function: +*} + +definition %quote bexp :: "nat \ nat" where + "bexp n = pow n (Suc (Suc 0))" + +text {* + \noindent The corresponding code in Haskell uses that language's native classes: +*} + +text %quote {*@{code_stmts bexp (Haskell)}*} + +text {* + \noindent This is a convenient place to show how explicit dictionary construction + manifests in generated code (here, the same example in @{text SML}) + \cite{Haftmann-Nipkow:2010:code}: +*} + +text %quote {*@{code_stmts bexp (SML)}*} + +text {* + \noindent Note the parameters with trailing underscore (@{verbatim "A_"}), + which are the dictionary parameters. +*} + +subsection {* The preprocessor \label{sec:preproc} *} + +text {* + Before selected function theorems are turned into abstract + code, a chain of definitional transformation steps is carried + out: \emph{preprocessing}. In essence, the preprocessor + consists of two components: a \emph{simpset} and \emph{function transformers}. + + The \emph{simpset} can apply the full generality of the + Isabelle simplifier. Due to the interpretation of theorems as code + equations, rewrites are applied to the right hand side and the + arguments of the left hand side of an equation, but never to the + constant heading the left hand side. An important special case are + \emph{unfold theorems}, which may be declared and removed using + the @{attribute code_unfold} or \emph{@{attribute code_unfold} del} + attribute, respectively. + + Some common applications: +*} + +text_raw {* + \begin{itemize} +*} + +text {* + \item replacing non-executable constructs by executable ones: +*} + +lemma %quote [code_unfold]: + "x \ set xs \ List.member xs x" by (fact in_set_member) + +text {* + \item eliminating superfluous constants: +*} + +lemma %quote [code_unfold]: + "1 = Suc 0" by (fact One_nat_def) + +text {* + \item replacing executable but inconvenient constructs: +*} + +lemma %quote [code_unfold]: + "xs = [] \ List.null xs" by (fact eq_Nil_null) + +text_raw {* + \end{itemize} +*} + +text {* + \noindent \emph{Function transformers} provide a very general interface, + transforming a list of function theorems to another + list of function theorems, provided that neither the heading + constant nor its type change. The @{term "0\nat"} / @{const Suc} + pattern elimination implemented in + theory @{text Efficient_Nat} (see \secref{eff_nat}) uses this + interface. + + \noindent The current setup of the preprocessor may be inspected using + the @{command print_codeproc} command. + @{command code_thms} provides a convenient + mechanism to inspect the impact of a preprocessor setup + on code equations. + + \begin{warn} + + Attribute @{attribute code_unfold} also applies to the + preprocessor of the ancient @{text "SML code generator"}; in case + this is not what you intend, use @{attribute code_inline} instead. + \end{warn} +*} + +subsection {* Datatypes \label{sec:datatypes} *} + +text {* + Conceptually, any datatype is spanned by a set of + \emph{constructors} of type @{text "\ = \ \ \ \\<^isub>1 \ \\<^isub>n"} where @{text + "{\\<^isub>1, \, \\<^isub>n}"} is exactly the set of \emph{all} type variables in + @{text "\"}. The HOL datatype package by default registers any new + datatype in the table of datatypes, which may be inspected using the + @{command print_codesetup} command. + + In some cases, it is appropriate to alter or extend this table. As + an example, we will develop an alternative representation of the + queue example given in \secref{sec:intro}. The amortised + representation is convenient for generating code but exposes its + \qt{implementation} details, which may be cumbersome when proving + theorems about it. Therefore, here is a simple, straightforward + representation of queues: +*} + +datatype %quote 'a queue = Queue "'a list" + +definition %quote empty :: "'a queue" where + "empty = Queue []" + +primrec %quote enqueue :: "'a \ 'a queue \ 'a queue" where + "enqueue x (Queue xs) = Queue (xs @ [x])" + +fun %quote dequeue :: "'a queue \ 'a option \ 'a queue" where + "dequeue (Queue []) = (None, Queue [])" + | "dequeue (Queue (x # xs)) = (Some x, Queue xs)" + +text {* + \noindent This we can use directly for proving; for executing, + we provide an alternative characterisation: +*} + +definition %quote AQueue :: "'a list \ 'a list \ 'a queue" where + "AQueue xs ys = Queue (ys @ rev xs)" + +code_datatype %quote AQueue + +text {* + \noindent Here we define a \qt{constructor} @{const "AQueue"} which + is defined in terms of @{text "Queue"} and interprets its arguments + according to what the \emph{content} of an amortised queue is supposed + to be. Equipped with this, we are able to prove the following equations + for our primitive queue operations which \qt{implement} the simple + queues in an amortised fashion: +*} + +lemma %quote empty_AQueue [code]: + "empty = AQueue [] []" + unfolding AQueue_def empty_def by simp + +lemma %quote enqueue_AQueue [code]: + "enqueue x (AQueue xs ys) = AQueue (x # xs) ys" + unfolding AQueue_def by simp + +lemma %quote dequeue_AQueue [code]: + "dequeue (AQueue xs []) = + (if xs = [] then (None, AQueue [] []) + else dequeue (AQueue [] (rev xs)))" + "dequeue (AQueue xs (y # ys)) = (Some y, AQueue xs ys)" + unfolding AQueue_def by simp_all + +text {* + \noindent For completeness, we provide a substitute for the + @{text case} combinator on queues: +*} + +lemma %quote queue_case_AQueue [code]: + "queue_case f (AQueue xs ys) = f (ys @ rev xs)" + unfolding AQueue_def by simp + +text {* + \noindent The resulting code looks as expected: +*} + +text %quote {*@{code_stmts empty enqueue dequeue (SML)}*} + +text {* + \noindent From this example, it can be glimpsed that using own + constructor sets is a little delicate since it changes the set of + valid patterns for values of that type. Without going into much + detail, here some practical hints: + + \begin{itemize} + + \item When changing the constructor set for datatypes, take care + to provide alternative equations for the @{text case} combinator. + + \item Values in the target language need not to be normalised -- + different values in the target language may represent the same + value in the logic. + + \item Usually, a good methodology to deal with the subtleties of + pattern matching is to see the type as an abstract type: provide + a set of operations which operate on the concrete representation + of the type, and derive further operations by combinations of + these primitive ones, without relying on a particular + representation. + + \end{itemize} +*} + + +subsection {* Equality *} + +text {* + Surely you have already noticed how equality is treated + by the code generator: +*} + +primrec %quote collect_duplicates :: "'a list \ 'a list \ 'a list \ 'a list" where + "collect_duplicates xs ys [] = xs" + | "collect_duplicates xs ys (z#zs) = (if z \ set xs + then if z \ set ys + then collect_duplicates xs ys zs + else collect_duplicates xs (z#ys) zs + else collect_duplicates (z#xs) (z#ys) zs)" + +text {* + \noindent During preprocessing, the membership test is rewritten, + resulting in @{const List.member}, which itself + performs an explicit equality check. +*} + +text %quote {*@{code_stmts collect_duplicates (SML)}*} + +text {* + \noindent Obviously, polymorphic equality is implemented the Haskell + way using a type class. How is this achieved? HOL introduces + an explicit class @{class eq} with a corresponding operation + @{const eq_class.eq} such that @{thm eq [no_vars]}. + The preprocessing framework does the rest by propagating the + @{class eq} constraints through all dependent code equations. + For datatypes, instances of @{class eq} are implicitly derived + when possible. For other types, you may instantiate @{text eq} + manually like any other type class. +*} + + +subsection {* Explicit partiality *} + +text {* + Partiality usually enters the game by partial patterns, as + in the following example, again for amortised queues: +*} + +definition %quote strict_dequeue :: "'a queue \ 'a \ 'a queue" where + "strict_dequeue q = (case dequeue q + of (Some x, q') \ (x, q'))" + +lemma %quote strict_dequeue_AQueue [code]: + "strict_dequeue (AQueue xs (y # ys)) = (y, AQueue xs ys)" + "strict_dequeue (AQueue xs []) = + (case rev xs of y # ys \ (y, AQueue [] ys))" + by (simp_all add: strict_dequeue_def dequeue_AQueue split: list.splits) + +text {* + \noindent In the corresponding code, there is no equation + for the pattern @{term "AQueue [] []"}: +*} + +text %quote {*@{code_stmts strict_dequeue (consts) strict_dequeue (Haskell)}*} + +text {* + \noindent In some cases it is desirable to have this + pseudo-\qt{partiality} more explicitly, e.g.~as follows: +*} + +axiomatization %quote empty_queue :: 'a + +definition %quote strict_dequeue' :: "'a queue \ 'a \ 'a queue" where + "strict_dequeue' q = (case dequeue q of (Some x, q') \ (x, q') | _ \ empty_queue)" + +lemma %quote strict_dequeue'_AQueue [code]: + "strict_dequeue' (AQueue xs []) = (if xs = [] then empty_queue + else strict_dequeue' (AQueue [] (rev xs)))" + "strict_dequeue' (AQueue xs (y # ys)) = + (y, AQueue xs ys)" + by (simp_all add: strict_dequeue'_def dequeue_AQueue split: list.splits) + +text {* + Observe that on the right hand side of the definition of @{const + "strict_dequeue'"}, the unspecified constant @{const empty_queue} occurs. + + Normally, if constants without any code equations occur in a + program, the code generator complains (since in most cases this is + indeed an error). But such constants can also be thought + of as function definitions which always fail, + since there is never a successful pattern match on the left hand + side. In order to categorise a constant into that category + explicitly, use @{command "code_abort"}: +*} + +code_abort %quote empty_queue + +text {* + \noindent Then the code generator will just insert an error or + exception at the appropriate position: +*} + +text %quote {*@{code_stmts strict_dequeue' (consts) empty_queue strict_dequeue' (Haskell)}*} + +text {* + \noindent This feature however is rarely needed in practice. + Note also that the @{text HOL} default setup already declares + @{const undefined} as @{command "code_abort"}, which is most + likely to be used in such situations. +*} + +end diff -r a461dd80f83c -r 7935b334893e doc-src/Codegen/Thy/Further.thy --- a/doc-src/Codegen/Thy/Further.thy Fri Aug 13 13:43:55 2010 +0200 +++ b/doc-src/Codegen/Thy/Further.thy Fri Aug 13 14:40:15 2010 +0200 @@ -191,4 +191,140 @@ the framework described there is available in theory @{theory Imperative_HOL}. *} + +subsection {* ML system interfaces \label{sec:ml} *} + +text {* + Since the code generator framework not only aims to provide + a nice Isar interface but also to form a base for + code-generation-based applications, here a short + description of the most important ML interfaces. +*} + +subsubsection {* Managing executable content *} + +text %mlref {* + \begin{mldecls} + @{index_ML Code.add_eqn: "thm -> theory -> theory"} \\ + @{index_ML Code.del_eqn: "thm -> theory -> theory"} \\ + @{index_ML Code_Preproc.map_pre: "(simpset -> simpset) -> theory -> theory"} \\ + @{index_ML Code_Preproc.map_post: "(simpset -> simpset) -> theory -> theory"} \\ + @{index_ML Code_Preproc.add_functrans: "string * (theory -> (thm * bool) list -> (thm * bool) list option) + -> theory -> theory"} \\ + @{index_ML Code_Preproc.del_functrans: "string -> theory -> theory"} \\ + @{index_ML Code.add_datatype: "(string * typ) list -> theory -> theory"} \\ + @{index_ML Code.get_type: "theory -> string + -> (string * sort) list * ((string * string list) * typ list) list"} \\ + @{index_ML Code.get_type_of_constr_or_abstr: "theory -> string -> (string * bool) option"} + \end{mldecls} + + \begin{description} + + \item @{ML Code.add_eqn}~@{text "thm"}~@{text "thy"} adds function + theorem @{text "thm"} to executable content. + + \item @{ML Code.del_eqn}~@{text "thm"}~@{text "thy"} removes function + theorem @{text "thm"} from executable content, if present. + + \item @{ML Code_Preproc.map_pre}~@{text "f"}~@{text "thy"} changes + the preprocessor simpset. + + \item @{ML Code_Preproc.add_functrans}~@{text "(name, f)"}~@{text "thy"} adds + function transformer @{text f} (named @{text name}) to executable content; + @{text f} is a transformer of the code equations belonging + to a certain function definition, depending on the + current theory context. Returning @{text NONE} indicates that no + transformation took place; otherwise, the whole process will be iterated + with the new code equations. + + \item @{ML Code_Preproc.del_functrans}~@{text "name"}~@{text "thy"} removes + function transformer named @{text name} from executable content. + + \item @{ML Code.add_datatype}~@{text cs}~@{text thy} adds + a datatype to executable content, with generation + set @{text cs}. + + \item @{ML Code.get_type_of_constr_or_abstr}~@{text "thy"}~@{text "const"} + returns type constructor corresponding to + constructor @{text const}; returns @{text NONE} + if @{text const} is no constructor. + + \end{description} +*} + +subsubsection {* Auxiliary *} + +text %mlref {* + \begin{mldecls} + @{index_ML Code.read_const: "theory -> string -> string"} + \end{mldecls} + + \begin{description} + + \item @{ML Code.read_const}~@{text thy}~@{text s} + reads a constant as a concrete term expression @{text s}. + + \end{description} + +*} + +subsubsection {* Data depending on the theory's executable content *} + +text {* + Implementing code generator applications on top + of the framework set out so far usually not only + involves using those primitive interfaces + but also storing code-dependent data and various + other things. + + Due to incrementality of code generation, changes in the + theory's executable content have to be propagated in a + certain fashion. Additionally, such changes may occur + not only during theory extension but also during theory + merge, which is a little bit nasty from an implementation + point of view. The framework provides a solution + to this technical challenge by providing a functorial + data slot @{ML_functor Code_Data}; on instantiation + of this functor, the following types and operations + are required: + + \medskip + \begin{tabular}{l} + @{text "type T"} \\ + @{text "val empty: T"} \\ + \end{tabular} + + \begin{description} + + \item @{text T} the type of data to store. + + \item @{text empty} initial (empty) data. + + \end{description} + + \noindent An instance of @{ML_functor Code_Data} provides the following + interface: + + \medskip + \begin{tabular}{l} + @{text "change: theory \ (T \ T) \ T"} \\ + @{text "change_yield: theory \ (T \ 'a * T) \ 'a * T"} + \end{tabular} + + \begin{description} + + \item @{text change} update of current data (cached!) + by giving a continuation. + + \item @{text change_yield} update with side result. + + \end{description} +*} + +text {* + \bigskip + + \emph{Happy proving, happy hacking!} +*} + end diff -r a461dd80f83c -r 7935b334893e doc-src/Codegen/Thy/Inductive_Predicate.thy --- a/doc-src/Codegen/Thy/Inductive_Predicate.thy Fri Aug 13 13:43:55 2010 +0200 +++ b/doc-src/Codegen/Thy/Inductive_Predicate.thy Fri Aug 13 14:40:15 2010 +0200 @@ -2,7 +2,7 @@ imports Setup begin -subsection {* Inductive Predicates *} +section {* Inductive Predicates *} (*<*) hide_const append diff -r a461dd80f83c -r 7935b334893e doc-src/Codegen/Thy/Introduction.thy --- a/doc-src/Codegen/Thy/Introduction.thy Fri Aug 13 13:43:55 2010 +0200 +++ b/doc-src/Codegen/Thy/Introduction.thy Fri Aug 13 14:40:15 2010 +0200 @@ -2,7 +2,17 @@ imports Setup begin -section {* Introduction and Overview *} +section {* Introduction *} + +subsection {* Code generation fundamental: shallow embedding *} + +subsection {* A quick start with the @{text "Isabelle/HOL"} toolbox *} + +subsection {* Type classes *} + +subsection {* How to continue from here *} + +subsection {* If something goes utterly wrong *} text {* This tutorial introduces a generic code generator for the diff -r a461dd80f83c -r 7935b334893e doc-src/Codegen/Thy/ML.thy --- a/doc-src/Codegen/Thy/ML.thy Fri Aug 13 13:43:55 2010 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,162 +0,0 @@ -theory "ML" -imports Setup -begin - -section {* ML system interfaces \label{sec:ml} *} - -text {* - Since the code generator framework not only aims to provide - a nice Isar interface but also to form a base for - code-generation-based applications, here a short - description of the most important ML interfaces. -*} - -subsection {* Executable theory content: @{text Code} *} - -text {* - This Pure module implements the core notions of - executable content of a theory. -*} - -subsubsection {* Managing executable content *} - -text %mlref {* - \begin{mldecls} - @{index_ML Code.add_eqn: "thm -> theory -> theory"} \\ - @{index_ML Code.del_eqn: "thm -> theory -> theory"} \\ - @{index_ML Code_Preproc.map_pre: "(simpset -> simpset) -> theory -> theory"} \\ - @{index_ML Code_Preproc.map_post: "(simpset -> simpset) -> theory -> theory"} \\ - @{index_ML Code_Preproc.add_functrans: "string * (theory -> (thm * bool) list -> (thm * bool) list option) - -> theory -> theory"} \\ - @{index_ML Code_Preproc.del_functrans: "string -> theory -> theory"} \\ - @{index_ML Code.add_datatype: "(string * typ) list -> theory -> theory"} \\ - @{index_ML Code.get_type: "theory -> string - -> (string * sort) list * ((string * string list) * typ list) list"} \\ - @{index_ML Code.get_type_of_constr_or_abstr: "theory -> string -> (string * bool) option"} - \end{mldecls} - - \begin{description} - - \item @{ML Code.add_eqn}~@{text "thm"}~@{text "thy"} adds function - theorem @{text "thm"} to executable content. - - \item @{ML Code.del_eqn}~@{text "thm"}~@{text "thy"} removes function - theorem @{text "thm"} from executable content, if present. - - \item @{ML Code_Preproc.map_pre}~@{text "f"}~@{text "thy"} changes - the preprocessor simpset. - - \item @{ML Code_Preproc.add_functrans}~@{text "(name, f)"}~@{text "thy"} adds - function transformer @{text f} (named @{text name}) to executable content; - @{text f} is a transformer of the code equations belonging - to a certain function definition, depending on the - current theory context. Returning @{text NONE} indicates that no - transformation took place; otherwise, the whole process will be iterated - with the new code equations. - - \item @{ML Code_Preproc.del_functrans}~@{text "name"}~@{text "thy"} removes - function transformer named @{text name} from executable content. - - \item @{ML Code.add_datatype}~@{text cs}~@{text thy} adds - a datatype to executable content, with generation - set @{text cs}. - - \item @{ML Code.get_type_of_constr_or_abstr}~@{text "thy"}~@{text "const"} - returns type constructor corresponding to - constructor @{text const}; returns @{text NONE} - if @{text const} is no constructor. - - \end{description} -*} - -subsection {* Auxiliary *} - -text %mlref {* - \begin{mldecls} - @{index_ML Code.read_const: "theory -> string -> string"} - \end{mldecls} - - \begin{description} - - \item @{ML Code.read_const}~@{text thy}~@{text s} - reads a constant as a concrete term expression @{text s}. - - \end{description} - -*} - -subsection {* Implementing code generator applications *} - -text {* - Implementing code generator applications on top - of the framework set out so far usually not only - involves using those primitive interfaces - but also storing code-dependent data and various - other things. -*} - -subsubsection {* Data depending on the theory's executable content *} - -text {* - Due to incrementality of code generation, changes in the - theory's executable content have to be propagated in a - certain fashion. Additionally, such changes may occur - not only during theory extension but also during theory - merge, which is a little bit nasty from an implementation - point of view. The framework provides a solution - to this technical challenge by providing a functorial - data slot @{ML_functor Code_Data}; on instantiation - of this functor, the following types and operations - are required: - - \medskip - \begin{tabular}{l} - @{text "type T"} \\ - @{text "val empty: T"} \\ - @{text "val purge: theory \ string list option \ T \ T"} - \end{tabular} - - \begin{description} - - \item @{text T} the type of data to store. - - \item @{text empty} initial (empty) data. - - \item @{text purge}~@{text thy}~@{text consts} propagates changes in executable content; - @{text consts} indicates the kind - of change: @{ML NONE} stands for a fundamental change - which invalidates any existing code, @{text "SOME consts"} - hints that executable content for constants @{text consts} - has changed. - - \end{description} - - \noindent An instance of @{ML_functor Code_Data} provides the following - interface: - - \medskip - \begin{tabular}{l} - @{text "get: theory \ T"} \\ - @{text "change: theory \ (T \ T) \ T"} \\ - @{text "change_yield: theory \ (T \ 'a * T) \ 'a * T"} - \end{tabular} - - \begin{description} - - \item @{text get} retrieval of the current data. - - \item @{text change} update of current data (cached!) - by giving a continuation. - - \item @{text change_yield} update with side result. - - \end{description} -*} - -text {* - \bigskip - - \emph{Happy proving, happy hacking!} -*} - -end diff -r a461dd80f83c -r 7935b334893e doc-src/Codegen/Thy/Program.thy --- a/doc-src/Codegen/Thy/Program.thy Fri Aug 13 13:43:55 2010 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,432 +0,0 @@ -theory Program -imports Introduction -begin - -section {* Turning Theories into Programs \label{sec:program} *} - -subsection {* The @{text "Isabelle/HOL"} default setup *} - -text {* - We have already seen how by default equations stemming from - @{command definition}, @{command primrec} and @{command fun} - statements are used for code generation. This default behaviour - can be changed, e.g.\ by providing different code equations. - The customisations shown in this section are \emph{safe} - as regards correctness: all programs that can be generated are partially - correct. -*} - -subsection {* Selecting code equations *} - -text {* - Coming back to our introductory example, we - could provide an alternative code equations for @{const dequeue} - explicitly: -*} - -lemma %quote [code]: - "dequeue (AQueue xs []) = - (if xs = [] then (None, AQueue [] []) - else dequeue (AQueue [] (rev xs)))" - "dequeue (AQueue xs (y # ys)) = - (Some y, AQueue xs ys)" - by (cases xs, simp_all) (cases "rev xs", simp_all) - -text {* - \noindent The annotation @{text "[code]"} is an @{text Isar} - @{text attribute} which states that the given theorems should be - considered as code equations for a @{text fun} statement -- - the corresponding constant is determined syntactically. The resulting code: -*} - -text %quote {*@{code_stmts dequeue (consts) dequeue (Haskell)}*} - -text {* - \noindent You may note that the equality test @{term "xs = []"} has been - replaced by the predicate @{term "null xs"}. This is due to the default - setup in the \qn{preprocessor} to be discussed further below (\secref{sec:preproc}). - - Changing the default constructor set of datatypes is also - possible. See \secref{sec:datatypes} for an example. - - As told in \secref{sec:concept}, code generation is based - on a structured collection of code theorems. - This collection - may be inspected using the @{command code_thms} command: -*} - -code_thms %quote dequeue - -text {* - \noindent prints a table with \emph{all} code equations - for @{const dequeue}, including - \emph{all} code equations those equations depend - on recursively. - - Similarly, the @{command code_deps} command shows a graph - visualising dependencies between code equations. -*} - -subsection {* @{text class} and @{text instantiation} *} - -text {* - Concerning type classes and code generation, let us examine an example - from abstract algebra: -*} - -class %quote semigroup = - fixes mult :: "'a \ 'a \ 'a" (infixl "\" 70) - assumes assoc: "(x \ y) \ z = x \ (y \ z)" - -class %quote monoid = semigroup + - fixes neutral :: 'a ("\") - assumes neutl: "\ \ x = x" - and neutr: "x \ \ = x" - -instantiation %quote nat :: monoid -begin - -primrec %quote mult_nat where - "0 \ n = (0\nat)" - | "Suc m \ n = n + m \ n" - -definition %quote neutral_nat where - "\ = Suc 0" - -lemma %quote add_mult_distrib: - fixes n m q :: nat - shows "(n + m) \ q = n \ q + m \ q" - by (induct n) simp_all - -instance %quote proof - fix m n q :: nat - show "m \ n \ q = m \ (n \ q)" - by (induct m) (simp_all add: add_mult_distrib) - show "\ \ n = n" - by (simp add: neutral_nat_def) - show "m \ \ = m" - by (induct m) (simp_all add: neutral_nat_def) -qed - -end %quote - -text {* - \noindent We define the natural operation of the natural numbers - on monoids: -*} - -primrec %quote (in monoid) pow :: "nat \ 'a \ 'a" where - "pow 0 a = \" - | "pow (Suc n) a = a \ pow n a" - -text {* - \noindent This we use to define the discrete exponentiation function: -*} - -definition %quote bexp :: "nat \ nat" where - "bexp n = pow n (Suc (Suc 0))" - -text {* - \noindent The corresponding code in Haskell uses that language's native classes: -*} - -text %quote {*@{code_stmts bexp (Haskell)}*} - -text {* - \noindent This is a convenient place to show how explicit dictionary construction - manifests in generated code (here, the same example in @{text SML}) - \cite{Haftmann-Nipkow:2010:code}: -*} - -text %quote {*@{code_stmts bexp (SML)}*} - -text {* - \noindent Note the parameters with trailing underscore (@{verbatim "A_"}), - which are the dictionary parameters. -*} - -subsection {* The preprocessor \label{sec:preproc} *} - -text {* - Before selected function theorems are turned into abstract - code, a chain of definitional transformation steps is carried - out: \emph{preprocessing}. In essence, the preprocessor - consists of two components: a \emph{simpset} and \emph{function transformers}. - - The \emph{simpset} can apply the full generality of the - Isabelle simplifier. Due to the interpretation of theorems as code - equations, rewrites are applied to the right hand side and the - arguments of the left hand side of an equation, but never to the - constant heading the left hand side. An important special case are - \emph{unfold theorems}, which may be declared and removed using - the @{attribute code_unfold} or \emph{@{attribute code_unfold} del} - attribute, respectively. - - Some common applications: -*} - -text_raw {* - \begin{itemize} -*} - -text {* - \item replacing non-executable constructs by executable ones: -*} - -lemma %quote [code_unfold]: - "x \ set xs \ List.member xs x" by (fact in_set_member) - -text {* - \item eliminating superfluous constants: -*} - -lemma %quote [code_unfold]: - "1 = Suc 0" by (fact One_nat_def) - -text {* - \item replacing executable but inconvenient constructs: -*} - -lemma %quote [code_unfold]: - "xs = [] \ List.null xs" by (fact eq_Nil_null) - -text_raw {* - \end{itemize} -*} - -text {* - \noindent \emph{Function transformers} provide a very general interface, - transforming a list of function theorems to another - list of function theorems, provided that neither the heading - constant nor its type change. The @{term "0\nat"} / @{const Suc} - pattern elimination implemented in - theory @{text Efficient_Nat} (see \secref{eff_nat}) uses this - interface. - - \noindent The current setup of the preprocessor may be inspected using - the @{command print_codeproc} command. - @{command code_thms} provides a convenient - mechanism to inspect the impact of a preprocessor setup - on code equations. - - \begin{warn} - - Attribute @{attribute code_unfold} also applies to the - preprocessor of the ancient @{text "SML code generator"}; in case - this is not what you intend, use @{attribute code_inline} instead. - \end{warn} -*} - -subsection {* Datatypes \label{sec:datatypes} *} - -text {* - Conceptually, any datatype is spanned by a set of - \emph{constructors} of type @{text "\ = \ \ \ \\<^isub>1 \ \\<^isub>n"} where @{text - "{\\<^isub>1, \, \\<^isub>n}"} is exactly the set of \emph{all} type variables in - @{text "\"}. The HOL datatype package by default registers any new - datatype in the table of datatypes, which may be inspected using the - @{command print_codesetup} command. - - In some cases, it is appropriate to alter or extend this table. As - an example, we will develop an alternative representation of the - queue example given in \secref{sec:intro}. The amortised - representation is convenient for generating code but exposes its - \qt{implementation} details, which may be cumbersome when proving - theorems about it. Therefore, here is a simple, straightforward - representation of queues: -*} - -datatype %quote 'a queue = Queue "'a list" - -definition %quote empty :: "'a queue" where - "empty = Queue []" - -primrec %quote enqueue :: "'a \ 'a queue \ 'a queue" where - "enqueue x (Queue xs) = Queue (xs @ [x])" - -fun %quote dequeue :: "'a queue \ 'a option \ 'a queue" where - "dequeue (Queue []) = (None, Queue [])" - | "dequeue (Queue (x # xs)) = (Some x, Queue xs)" - -text {* - \noindent This we can use directly for proving; for executing, - we provide an alternative characterisation: -*} - -definition %quote AQueue :: "'a list \ 'a list \ 'a queue" where - "AQueue xs ys = Queue (ys @ rev xs)" - -code_datatype %quote AQueue - -text {* - \noindent Here we define a \qt{constructor} @{const "AQueue"} which - is defined in terms of @{text "Queue"} and interprets its arguments - according to what the \emph{content} of an amortised queue is supposed - to be. Equipped with this, we are able to prove the following equations - for our primitive queue operations which \qt{implement} the simple - queues in an amortised fashion: -*} - -lemma %quote empty_AQueue [code]: - "empty = AQueue [] []" - unfolding AQueue_def empty_def by simp - -lemma %quote enqueue_AQueue [code]: - "enqueue x (AQueue xs ys) = AQueue (x # xs) ys" - unfolding AQueue_def by simp - -lemma %quote dequeue_AQueue [code]: - "dequeue (AQueue xs []) = - (if xs = [] then (None, AQueue [] []) - else dequeue (AQueue [] (rev xs)))" - "dequeue (AQueue xs (y # ys)) = (Some y, AQueue xs ys)" - unfolding AQueue_def by simp_all - -text {* - \noindent For completeness, we provide a substitute for the - @{text case} combinator on queues: -*} - -lemma %quote queue_case_AQueue [code]: - "queue_case f (AQueue xs ys) = f (ys @ rev xs)" - unfolding AQueue_def by simp - -text {* - \noindent The resulting code looks as expected: -*} - -text %quote {*@{code_stmts empty enqueue dequeue (SML)}*} - -text {* - \noindent From this example, it can be glimpsed that using own - constructor sets is a little delicate since it changes the set of - valid patterns for values of that type. Without going into much - detail, here some practical hints: - - \begin{itemize} - - \item When changing the constructor set for datatypes, take care - to provide alternative equations for the @{text case} combinator. - - \item Values in the target language need not to be normalised -- - different values in the target language may represent the same - value in the logic. - - \item Usually, a good methodology to deal with the subtleties of - pattern matching is to see the type as an abstract type: provide - a set of operations which operate on the concrete representation - of the type, and derive further operations by combinations of - these primitive ones, without relying on a particular - representation. - - \end{itemize} -*} - - -subsection {* Equality *} - -text {* - Surely you have already noticed how equality is treated - by the code generator: -*} - -primrec %quote collect_duplicates :: "'a list \ 'a list \ 'a list \ 'a list" where - "collect_duplicates xs ys [] = xs" - | "collect_duplicates xs ys (z#zs) = (if z \ set xs - then if z \ set ys - then collect_duplicates xs ys zs - else collect_duplicates xs (z#ys) zs - else collect_duplicates (z#xs) (z#ys) zs)" - -text {* - \noindent During preprocessing, the membership test is rewritten, - resulting in @{const List.member}, which itself - performs an explicit equality check. -*} - -text %quote {*@{code_stmts collect_duplicates (SML)}*} - -text {* - \noindent Obviously, polymorphic equality is implemented the Haskell - way using a type class. How is this achieved? HOL introduces - an explicit class @{class eq} with a corresponding operation - @{const eq_class.eq} such that @{thm eq [no_vars]}. - The preprocessing framework does the rest by propagating the - @{class eq} constraints through all dependent code equations. - For datatypes, instances of @{class eq} are implicitly derived - when possible. For other types, you may instantiate @{text eq} - manually like any other type class. -*} - - -subsection {* Explicit partiality *} - -text {* - Partiality usually enters the game by partial patterns, as - in the following example, again for amortised queues: -*} - -definition %quote strict_dequeue :: "'a queue \ 'a \ 'a queue" where - "strict_dequeue q = (case dequeue q - of (Some x, q') \ (x, q'))" - -lemma %quote strict_dequeue_AQueue [code]: - "strict_dequeue (AQueue xs (y # ys)) = (y, AQueue xs ys)" - "strict_dequeue (AQueue xs []) = - (case rev xs of y # ys \ (y, AQueue [] ys))" - by (simp_all add: strict_dequeue_def dequeue_AQueue split: list.splits) - -text {* - \noindent In the corresponding code, there is no equation - for the pattern @{term "AQueue [] []"}: -*} - -text %quote {*@{code_stmts strict_dequeue (consts) strict_dequeue (Haskell)}*} - -text {* - \noindent In some cases it is desirable to have this - pseudo-\qt{partiality} more explicitly, e.g.~as follows: -*} - -axiomatization %quote empty_queue :: 'a - -definition %quote strict_dequeue' :: "'a queue \ 'a \ 'a queue" where - "strict_dequeue' q = (case dequeue q of (Some x, q') \ (x, q') | _ \ empty_queue)" - -lemma %quote strict_dequeue'_AQueue [code]: - "strict_dequeue' (AQueue xs []) = (if xs = [] then empty_queue - else strict_dequeue' (AQueue [] (rev xs)))" - "strict_dequeue' (AQueue xs (y # ys)) = - (y, AQueue xs ys)" - by (simp_all add: strict_dequeue'_def dequeue_AQueue split: list.splits) - -text {* - Observe that on the right hand side of the definition of @{const - "strict_dequeue'"}, the unspecified constant @{const empty_queue} occurs. - - Normally, if constants without any code equations occur in a - program, the code generator complains (since in most cases this is - indeed an error). But such constants can also be thought - of as function definitions which always fail, - since there is never a successful pattern match on the left hand - side. In order to categorise a constant into that category - explicitly, use @{command "code_abort"}: -*} - -code_abort %quote empty_queue - -text {* - \noindent Then the code generator will just insert an error or - exception at the appropriate position: -*} - -text %quote {*@{code_stmts strict_dequeue' (consts) empty_queue strict_dequeue' (Haskell)}*} - -text {* - \noindent This feature however is rarely needed in practice. - Note also that the @{text HOL} default setup already declares - @{const undefined} as @{command "code_abort"}, which is most - likely to be used in such situations. -*} - -end diff -r a461dd80f83c -r 7935b334893e doc-src/Codegen/Thy/ROOT.ML --- a/doc-src/Codegen/Thy/ROOT.ML Fri Aug 13 13:43:55 2010 +0200 +++ b/doc-src/Codegen/Thy/ROOT.ML Fri Aug 13 14:40:15 2010 +0200 @@ -1,10 +1,9 @@ no_document use_thy "Setup"; -no_document use_thys ["Efficient_Nat"]; use_thy "Introduction"; -use_thy "Program"; +use_thy "Foundations"; +use_thy "Refinement"; use_thy "Inductive_Predicate"; use_thy "Adaptation"; use_thy "Further"; -use_thy "ML"; diff -r a461dd80f83c -r 7935b334893e doc-src/Codegen/Thy/Refinement.thy --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc-src/Codegen/Thy/Refinement.thy Fri Aug 13 14:40:15 2010 +0200 @@ -0,0 +1,7 @@ +theory Refinement +imports Setup +begin + +section {* Program and datatype refinement \label{sec:refinement} *} + +end diff -r a461dd80f83c -r 7935b334893e doc-src/Codegen/Thy/document/Further.tex --- a/doc-src/Codegen/Thy/document/Further.tex Fri Aug 13 13:43:55 2010 +0200 +++ b/doc-src/Codegen/Thy/document/Further.tex Fri Aug 13 14:40:15 2010 +0200 @@ -389,6 +389,179 @@ \end{isamarkuptext}% \isamarkuptrue% % +\isamarkupsubsection{ML system interfaces \label{sec:ml}% +} +\isamarkuptrue% +% +\begin{isamarkuptext}% +Since the code generator framework not only aims to provide + a nice Isar interface but also to form a base for + code-generation-based applications, here a short + description of the most important ML interfaces.% +\end{isamarkuptext}% +\isamarkuptrue% +% +\isamarkupsubsubsection{Managing executable content% +} +\isamarkuptrue% +% +\isadelimmlref +% +\endisadelimmlref +% +\isatagmlref +% +\begin{isamarkuptext}% +\begin{mldecls} + \indexdef{}{ML}{Code.add\_eqn}\verb|Code.add_eqn: thm -> theory -> theory| \\ + \indexdef{}{ML}{Code.del\_eqn}\verb|Code.del_eqn: thm -> theory -> theory| \\ + \indexdef{}{ML}{Code\_Preproc.map\_pre}\verb|Code_Preproc.map_pre: (simpset -> simpset) -> theory -> theory| \\ + \indexdef{}{ML}{Code\_Preproc.map\_post}\verb|Code_Preproc.map_post: (simpset -> simpset) -> theory -> theory| \\ + \indexdef{}{ML}{Code\_Preproc.add\_functrans}\verb|Code_Preproc.add_functrans: string * (theory -> (thm * bool) list -> (thm * bool) list option)|\isasep\isanewline% +\verb| -> theory -> theory| \\ + \indexdef{}{ML}{Code\_Preproc.del\_functrans}\verb|Code_Preproc.del_functrans: string -> theory -> theory| \\ + \indexdef{}{ML}{Code.add\_datatype}\verb|Code.add_datatype: (string * typ) list -> theory -> theory| \\ + \indexdef{}{ML}{Code.get\_type}\verb|Code.get_type: theory -> string|\isasep\isanewline% +\verb| -> (string * sort) list * ((string * string list) * typ list) list| \\ + \indexdef{}{ML}{Code.get\_type\_of\_constr\_or\_abstr}\verb|Code.get_type_of_constr_or_abstr: theory -> string -> (string * bool) option| + \end{mldecls} + + \begin{description} + + \item \verb|Code.add_eqn|~\isa{thm}~\isa{thy} adds function + theorem \isa{thm} to executable content. + + \item \verb|Code.del_eqn|~\isa{thm}~\isa{thy} removes function + theorem \isa{thm} from executable content, if present. + + \item \verb|Code_Preproc.map_pre|~\isa{f}~\isa{thy} changes + the preprocessor simpset. + + \item \verb|Code_Preproc.add_functrans|~\isa{{\isacharparenleft}name{\isacharcomma}\ f{\isacharparenright}}~\isa{thy} adds + function transformer \isa{f} (named \isa{name}) to executable content; + \isa{f} is a transformer of the code equations belonging + to a certain function definition, depending on the + current theory context. Returning \isa{NONE} indicates that no + transformation took place; otherwise, the whole process will be iterated + with the new code equations. + + \item \verb|Code_Preproc.del_functrans|~\isa{name}~\isa{thy} removes + function transformer named \isa{name} from executable content. + + \item \verb|Code.add_datatype|~\isa{cs}~\isa{thy} adds + a datatype to executable content, with generation + set \isa{cs}. + + \item \verb|Code.get_type_of_constr_or_abstr|~\isa{thy}~\isa{const} + returns type constructor corresponding to + constructor \isa{const}; returns \isa{NONE} + if \isa{const} is no constructor. + + \end{description}% +\end{isamarkuptext}% +\isamarkuptrue% +% +\endisatagmlref +{\isafoldmlref}% +% +\isadelimmlref +% +\endisadelimmlref +% +\isamarkupsubsubsection{Auxiliary% +} +\isamarkuptrue% +% +\isadelimmlref +% +\endisadelimmlref +% +\isatagmlref +% +\begin{isamarkuptext}% +\begin{mldecls} + \indexdef{}{ML}{Code.read\_const}\verb|Code.read_const: theory -> string -> string| + \end{mldecls} + + \begin{description} + + \item \verb|Code.read_const|~\isa{thy}~\isa{s} + reads a constant as a concrete term expression \isa{s}. + + \end{description}% +\end{isamarkuptext}% +\isamarkuptrue% +% +\endisatagmlref +{\isafoldmlref}% +% +\isadelimmlref +% +\endisadelimmlref +% +\isamarkupsubsubsection{Data depending on the theory's executable content% +} +\isamarkuptrue% +% +\begin{isamarkuptext}% +Implementing code generator applications on top + of the framework set out so far usually not only + involves using those primitive interfaces + but also storing code-dependent data and various + other things. + + Due to incrementality of code generation, changes in the + theory's executable content have to be propagated in a + certain fashion. Additionally, such changes may occur + not only during theory extension but also during theory + merge, which is a little bit nasty from an implementation + point of view. The framework provides a solution + to this technical challenge by providing a functorial + data slot \verb|Code_Data|; on instantiation + of this functor, the following types and operations + are required: + + \medskip + \begin{tabular}{l} + \isa{type\ T} \\ + \isa{val\ empty{\isacharcolon}\ T} \\ + \end{tabular} + + \begin{description} + + \item \isa{T} the type of data to store. + + \item \isa{empty} initial (empty) data. + + \end{description} + + \noindent An instance of \verb|Code_Data| provides the following + interface: + + \medskip + \begin{tabular}{l} + \isa{change{\isacharcolon}\ theory\ {\isasymrightarrow}\ {\isacharparenleft}T\ {\isasymrightarrow}\ T{\isacharparenright}\ {\isasymrightarrow}\ T} \\ + \isa{change{\isacharunderscore}yield{\isacharcolon}\ theory\ {\isasymrightarrow}\ {\isacharparenleft}T\ {\isasymrightarrow}\ {\isacharprime}a\ {\isacharasterisk}\ T{\isacharparenright}\ {\isasymrightarrow}\ {\isacharprime}a\ {\isacharasterisk}\ T} + \end{tabular} + + \begin{description} + + \item \isa{change} update of current data (cached!) + by giving a continuation. + + \item \isa{change{\isacharunderscore}yield} update with side result. + + \end{description}% +\end{isamarkuptext}% +\isamarkuptrue% +% +\begin{isamarkuptext}% +\bigskip + + \emph{Happy proving, happy hacking!}% +\end{isamarkuptext}% +\isamarkuptrue% +% \isadelimtheory % \endisadelimtheory diff -r a461dd80f83c -r 7935b334893e doc-src/Codegen/Thy/document/Inductive_Predicate.tex --- a/doc-src/Codegen/Thy/document/Inductive_Predicate.tex Fri Aug 13 13:43:55 2010 +0200 +++ b/doc-src/Codegen/Thy/document/Inductive_Predicate.tex Fri Aug 13 14:40:15 2010 +0200 @@ -18,7 +18,7 @@ % \endisadelimtheory % -\isamarkupsubsection{Inductive Predicates% +\isamarkupsection{Inductive Predicates% } \isamarkuptrue% % diff -r a461dd80f83c -r 7935b334893e doc-src/Codegen/Thy/document/Introduction.tex --- a/doc-src/Codegen/Thy/document/Introduction.tex Fri Aug 13 13:43:55 2010 +0200 +++ b/doc-src/Codegen/Thy/document/Introduction.tex Fri Aug 13 14:40:15 2010 +0200 @@ -18,7 +18,27 @@ % \endisadelimtheory % -\isamarkupsection{Introduction and Overview% +\isamarkupsection{Introduction% +} +\isamarkuptrue% +% +\isamarkupsubsection{Code generation fundamental: shallow embedding% +} +\isamarkuptrue% +% +\isamarkupsubsection{A quick start with the \isa{Isabelle{\isacharslash}HOL} toolbox% +} +\isamarkuptrue% +% +\isamarkupsubsection{Type classes% +} +\isamarkuptrue% +% +\isamarkupsubsection{How to continue from here% +} +\isamarkuptrue% +% +\isamarkupsubsection{If something goes utterly wrong% } \isamarkuptrue% % diff -r a461dd80f83c -r 7935b334893e doc-src/Codegen/codegen.tex --- a/doc-src/Codegen/codegen.tex Fri Aug 13 13:43:55 2010 +0200 +++ b/doc-src/Codegen/codegen.tex Fri Aug 13 14:40:15 2010 +0200 @@ -31,11 +31,11 @@ \clearfirst \input{Thy/document/Introduction.tex} -\input{Thy/document/Program.tex} +\input{Thy/document/Foundations.tex} +\input{Thy/document/Refinement.tex} \input{Thy/document/Inductive_Predicate.tex} \input{Thy/document/Adaptation.tex} \input{Thy/document/Further.tex} -\input{Thy/document/ML.tex} \begingroup \bibliographystyle{plain} \small\raggedright\frenchspacing