# HG changeset patch # User haftmann # Date 1222766387 -7200 # Node ID f65e8b318581dc671c40ef77ffc9251706fa97d2 # Parent 4ffb62675ade72004ba554f10a02d143ccbaf98c re-canibalised manual diff -r 4ffb62675ade -r f65e8b318581 doc-src/IsarAdvanced/Codegen/Thy/Adaption.thy --- a/doc-src/IsarAdvanced/Codegen/Thy/Adaption.thy Tue Sep 30 04:06:55 2008 +0200 +++ b/doc-src/IsarAdvanced/Codegen/Thy/Adaption.thy Tue Sep 30 11:19:47 2008 +0200 @@ -2,8 +2,213 @@ imports Setup begin -section {* Adaption to target languages *} +section {* Adaption to target languages \label{sec:adaption} *} + +subsection {* Common adaption cases *} + +text {* + The @{text HOL} @{text Main} theory already provides a code + generator setup + which should be suitable for most applications. Common extensions + and modifications are available by certain theories of the @{text HOL} + library; beside being useful in applications, they may serve + as a tutorial for customising the code generator setup (see below + \secref{sec:adaption_mechanisms}). + + \begin{description} + + \item[@{theory "Code_Integer"}] represents @{text HOL} integers by big + integer literals in target languages. + \item[@{theory "Code_Char"}] represents @{text HOL} characters by + character literals in target languages. + \item[@{theory "Code_Char_chr"}] like @{text "Code_Char"}, + but also offers treatment of character codes; includes + @{theory "Code_Char_chr"}. + \item[@{theory "Efficient_Nat"}] \label{eff_nat} implements natural numbers by integers, + which in general will result in higher efficiency; pattern + matching with @{term "0\nat"} / @{const "Suc"} + is eliminated; includes @{theory "Code_Integer"}. + \item[@{theory "Code_Index"}] provides an additional datatype + @{typ index} which is mapped to target-language built-in integers. + Useful for code setups which involve e.g. indexing of + target-language arrays. + \item[@{theory "Code_Message"}] provides an additional datatype + @{typ message_string} which is isomorphic to strings; + @{typ message_string}s are mapped to target-language strings. + Useful for code setups which involve e.g. printing (error) messages. + + \end{description} + + \begin{warn} + When importing any of these theories, they should form the last + items in an import list. Since these theories adapt the + code generator setup in a non-conservative fashion, + strange effects may occur otherwise. + \end{warn} +*} + + +subsection {* Adaption mechanisms \label{sec:adaption_mechanisms} *} + +text {* + \begin{warn} + The mechanisms shown here are especially for the curious; the user + rarely needs to do anything on his own beyond the defaults in @{text HOL}. + Adaption is a delicated task which requires a lot of dilligence since + it happend \emph{completely} outside the logic. + \end{warn} +*} + +text {* + Consider the following function and its corresponding + SML code: +*} + +primrec %quoteme in_interval :: "nat \ nat \ nat \ bool" where + "in_interval (k, l) n \ k \ n \ n \ l" + +code_type %invisible bool + (SML) +code_const %invisible True and False and "op \" and Not + (SML and and and) + +text %quoteme {*@{code_stmts in_interval (SML)}*} + +text {* + \noindent Though this is correct code, it is a little bit unsatisfactory: + boolean values and operators are materialised as distinguished + entities with have nothing to do with the SML-built-in notion + of \qt{bool}. This results in less readable code; + additionally, eager evaluation may cause programs to + loop or break which would perfectly terminate when + the existing SML @{verbatim "bool"} would be used. To map + the HOL @{typ bool} on SML @{verbatim "bool"}, we may use + \qn{custom serialisations}: +*} + +code_type %tt bool + (SML "bool") +code_const %tt True and False and "op \" + (SML "true" and "false" and "_ andalso _") -subsection {* \ldots *} +text {* + The @{command code_type} command takes a type constructor + as arguments together with a list of custom serialisations. + Each custom serialisation starts with a target language + identifier followed by an expression, which during + code serialisation is inserted whenever the type constructor + would occur. For constants, @{command code_const} implements + the corresponding mechanism. Each ``@{verbatim "_"}'' in + a serialisation expression is treated as a placeholder + for the type constructor's (the constant's) arguments. +*} + +text %quoteme {*@{code_stmts in_interval (SML)}*} + +text {* + \lstsml{Thy/examples/bool_mlbool.ML} + + \noindent This still is not perfect: the parentheses + around the \qt{andalso} expression are superfluous. + Though the serializer + by no means attempts to imitate the rich Isabelle syntax + framework, it provides some common idioms, notably + associative infixes with precedences which may be used here: +*} + +code_const %tt "op \" + (SML infixl 1 "andalso") + +text %quoteme {*@{code_stmts in_interval (SML)}*} + +text {* + Next, we try to map HOL pairs to SML pairs, using the + infix ``@{verbatim "*"}'' type constructor and parentheses: +*} + +code_type %invisible * + (SML) +code_const %invisible Pair + (SML) + +code_type %tt * + (SML infix 2 "*") +code_const %tt Pair + (SML "!((_),/ (_))") + +text {* + The initial bang ``@{verbatim "!"}'' tells the serializer to never put + parentheses around the whole expression (they are already present), + while the parentheses around argument place holders + tell not to put parentheses around the arguments. + The slash ``@{verbatim "/"}'' (followed by arbitrary white space) + inserts a space which may be used as a break if necessary + during pretty printing. + + These examples give a glimpse what mechanisms + custom serialisations provide; however their usage + requires careful thinking in order not to introduce + inconsistencies -- or, in other words: + custom serialisations are completely axiomatic. + + A further noteworthy details is that any special + character in a custom serialisation may be quoted + using ``@{verbatim "'"}''; thus, in + ``@{verbatim "fn '_ => _"}'' the first + ``@{verbatim "_"}'' is a proper underscore while the + second ``@{verbatim "_"}'' is a placeholder. + + The HOL theories provide further + examples for custom serialisations. +*} + + +subsection {* @{text Haskell} serialisation *} + +text {* + For convenience, the default + @{text HOL} setup for @{text Haskell} maps the @{class eq} class to + its counterpart in @{text Haskell}, giving custom serialisations + for the class @{class eq} (by command @{command code_class}) and its operation + @{const HOL.eq} +*} + +code_class %tt eq + (Haskell "Eq" where "HOL.eq" \ "(==)") + +code_const %tt "op =" + (Haskell infixl 4 "==") + +text {* + A problem now occurs whenever a type which + is an instance of @{class eq} in @{text HOL} is mapped + on a @{text Haskell}-built-in type which is also an instance + of @{text Haskell} @{text Eq}: +*} + +typedecl %quoteme bar + +instantiation %quoteme bar :: eq +begin + +definition %quoteme "eq_class.eq (x\bar) y \ x = y" + +instance %quoteme by default (simp add: eq_bar_def) end + +code_type %tt bar + (Haskell "Integer") + +text {* + The code generator would produce + an additional instance, which of course is rejectedby the @{text Haskell} + compiler. + To suppress this additional instance, use + @{text "code_instance"}: +*} + +code_instance %tt bar :: eq + (Haskell -) + +end diff -r 4ffb62675ade -r f65e8b318581 doc-src/IsarAdvanced/Codegen/Thy/Codegen.thy --- a/doc-src/IsarAdvanced/Codegen/Thy/Codegen.thy Tue Sep 30 04:06:55 2008 +0200 +++ b/doc-src/IsarAdvanced/Codegen/Thy/Codegen.thy Tue Sep 30 11:19:47 2008 +0200 @@ -1,1352 +1,2 @@ - -(* $Id$ *) - -(*<*) -theory Codegen -imports Main -uses "../../../antiquote_setup.ML" -begin - -ML {* Code_Target.code_width := 74 *} -(*>*) - -chapter {* Code generation from Isabelle theories *} - -section {* Introduction *} - -subsection {* Motivation *} - -text {* - Executing formal specifications as programs is a well-established - topic in the theorem proving community. With increasing - application of theorem proving systems in the area of - software development and verification, its relevance manifests - for running test cases and rapid prototyping. In logical - calculi like constructive type theory, - a notion of executability is implicit due to the nature - of the calculus. In contrast, specifications in Isabelle - can be highly non-executable. In order to bridge - the gap between logic and executable specifications, - an explicit non-trivial transformation has to be applied: - code generation. - - This tutorial introduces a generic code generator for the - Isabelle system \cite{isa-tutorial}. - Generic in the sense that the - \qn{target language} for which code shall ultimately be - generated is not fixed but may be an arbitrary state-of-the-art - functional programming language (currently, the implementation - supports SML \cite{SML}, OCaml \cite{OCaml} and Haskell - \cite{haskell-revised-report}). - We aim to provide a - versatile environment - suitable for software development and verification, - structuring the process - of code generation into a small set of orthogonal principles - while achieving a big coverage of application areas - with maximum flexibility. - - Conceptually the code generator framework is part - of Isabelle's @{text Pure} meta logic; the object logic - @{text HOL} which is an extension of @{text Pure} - already comes with a reasonable framework setup and thus provides - a good working horse for raising code-generation-driven - applications. So, we assume some familiarity and experience - with the ingredients of the @{text HOL} \emph{Main} theory - (see also \cite{isa-tutorial}). -*} - - -subsection {* Overview *} - -text {* - The code generator aims to be usable with no further ado - in most cases while allowing for detailed customization. - This manifests in the structure of this tutorial: - we start with a generic example \secref{sec:example} - and introduce code generation concepts \secref{sec:concept}. - Section - \secref{sec:basics} explains how to use the framework naively, - presuming a reasonable default setup. Then, section - \secref{sec:advanced} deals with advanced topics, - introducing further aspects of the code generator framework - in a motivation-driven manner. Last, section \secref{sec:ml} - introduces the framework's internal programming interfaces. - - \begin{warn} - Ultimately, the code generator which this tutorial deals with - is supposed to replace the already established code generator - by Stefan Berghofer \cite{Berghofer-Nipkow:2002}. - So, for the moment, there are two distinct code generators - in Isabelle. - Also note that while the framework itself is - object-logic independent, only @{text HOL} provides a reasonable - framework setup. - \end{warn} -*} - - -section {* An example: a simple theory of search trees \label{sec:example} *} - -text {* - When writing executable specifications using @{text HOL}, - it is convenient to use - three existing packages: the datatype package for defining - datatypes, the function package for (recursive) functions, - and the class package for overloaded definitions. - - We develope a small theory of search trees; trees are represented - as a datatype with key type @{typ "'a"} and value type @{typ "'b"}: -*} - -datatype ('a, 'b) searchtree = Leaf "'a\linorder" 'b - | Branch "('a, 'b) searchtree" "'a" "('a, 'b) searchtree" - -text {* - \noindent Note that we have constrained the type of keys - to the class of total orders, @{text linorder}. - - We define @{text find} and @{text update} functions: -*} - -primrec - find :: "('a\linorder, 'b) searchtree \ 'a \ 'b option" where - "find (Leaf key val) it = (if it = key then Some val else None)" - | "find (Branch t1 key t2) it = (if it \ key then find t1 it else find t2 it)" - -fun - update :: "'a\linorder \ 'b \ ('a, 'b) searchtree \ ('a, 'b) searchtree" where - "update (it, entry) (Leaf key val) = ( - if it = key then Leaf key entry - else if it \ key - then Branch (Leaf it entry) it (Leaf key val) - else Branch (Leaf key val) it (Leaf it entry) - )" - | "update (it, entry) (Branch t1 key t2) = ( - if it \ key - then (Branch (update (it, entry) t1) key t2) - else (Branch t1 key (update (it, entry) t2)) - )" - -text {* - \noindent For testing purpose, we define a small example - using natural numbers @{typ nat} (which are a @{text linorder}) - as keys and list of nats as values: -*} - -definition - example :: "(nat, nat list) searchtree" -where - "example = update (Suc (Suc (Suc (Suc 0))), [Suc (Suc 0), Suc (Suc 0)]) (update (Suc (Suc (Suc 0)), [Suc (Suc (Suc 0))]) - (update (Suc (Suc 0), [Suc (Suc 0)]) (Leaf (Suc 0) [])))" - -text {* - \noindent Then we generate code -*} - -export_code example (*<*)in SML (*>*)in SML file "examples/tree.ML" - -text {* - \noindent which looks like: - \lstsml{Thy/examples/tree.ML} -*} - - -section {* Code generation concepts and process \label{sec:concept} *} - -text {* - \begin{figure}[h] - \centering - \includegraphics[width=0.7\textwidth]{codegen_process} - \caption{code generator -- processing overview} - \label{fig:process} - \end{figure} - - The code generator employs a notion of executability - for three foundational executable ingredients known - from functional programming: - \emph{defining equations}, \emph{datatypes}, and - \emph{type classes}. A defining equation as a first approximation - is a theorem of the form @{text "f t\<^isub>1 t\<^isub>2 \ t\<^isub>n \ t"} - (an equation headed by a constant @{text f} with arguments - @{text "t\<^isub>1 t\<^isub>2 \ t\<^isub>n"} and right hand side @{text t}). - Code generation aims to turn defining equations - into a functional program by running through - a process (see figure \ref{fig:process}): - - \begin{itemize} - - \item Out of the vast collection of theorems proven in a - \qn{theory}, a reasonable subset modeling - defining equations is \qn{selected}. - - \item On those selected theorems, certain - transformations are carried out - (\qn{preprocessing}). Their purpose is to turn theorems - representing non- or badly executable - specifications into equivalent but executable counterparts. - The result is a structured collection of \qn{code theorems}. - - \item These \qn{code theorems} then are \qn{translated} - into an Haskell-like intermediate - language. - - \item Finally, out of the intermediate language the final - code in the desired \qn{target language} is \qn{serialized}. - - \end{itemize} - - From these steps, only the two last are carried out - outside the logic; by keeping this layer as - thin as possible, the amount of code to trust is - kept to a minimum. -*} - - - -section {* Basics \label{sec:basics} *} - -subsection {* Invoking the code generator *} - -text {* - Thanks to a reasonable setup of the @{text HOL} theories, in - most cases code generation proceeds without further ado: -*} - -primrec - fac :: "nat \ nat" where - "fac 0 = 1" - | "fac (Suc n) = Suc n * fac n" - -text {* - \noindent This executable specification is now turned to SML code: -*} - -export_code fac (*<*)in SML(*>*)in SML file "examples/fac.ML" - -text {* - \noindent The @{text "\"} command takes a space-separated list of - constants together with \qn{serialization directives} - These start with a \qn{target language} - identifier, followed by a file specification - where to write the generated code to. - - Internally, the defining equations for all selected - constants are taken, including any transitively required - constants, datatypes and classes, resulting in the following - code: - - \lstsml{Thy/examples/fac.ML} - - The code generator will complain when a required - ingredient does not provide a executable counterpart, - e.g.~generating code - for constants not yielding - a defining equation (e.g.~the Hilbert choice - operation @{text "SOME"}): -*} -(*<*) -setup {* Sign.add_path "foo" *} -(*>*) -definition - pick_some :: "'a list \ 'a" where - "pick_some xs = (SOME x. x \ set xs)" -(*<*) -hide const pick_some - -setup {* Sign.parent_path *} - -definition - pick_some :: "'a list \ 'a" where - "pick_some = hd" -(*>*) - -export_code pick_some in SML file "examples/fail_const.ML" - -text {* \noindent will fail. *} - -subsection {* Theorem selection *} - -text {* - The list of all defining equations in a theory may be inspected - using the @{text "\"} command: -*} - -print_codesetup - -text {* - \noindent which displays a table of constant with corresponding - defining equations (the additional stuff displayed - shall not bother us for the moment). - - The typical @{text HOL} tools are already set up in a way that - function definitions introduced by @{text "\"}, - @{text "\"}, @{text "\"}, - @{text "\"}, @{text "\"}, - @{text "\"} are implicitly propagated - to this defining equation table. Specific theorems may be - selected using an attribute: \emph{code func}. As example, - a weight selector function: -*} - -primrec - pick :: "(nat \ 'a) list \ nat \ 'a" where - "pick (x#xs) n = (let (k, v) = x in - if n < k then v else pick xs (n - k))" - -text {* - \noindent We want to eliminate the explicit destruction - of @{term x} to @{term "(k, v)"}: -*} - -lemma [code func]: - "pick ((k, v)#xs) n = (if n < k then v else pick xs (n - k))" - by simp - -export_code pick (*<*)in SML(*>*) in SML file "examples/pick1.ML" - -text {* - \noindent This theorem now is used for generating code: - - \lstsml{Thy/examples/pick1.ML} - - \noindent The policy is that \emph{default equations} stemming from - @{text "\"}, - @{text "\"}, @{text "\"}, - @{text "\"}, @{text "\"}, - @{text "\"} statements are discarded as soon as an - equation is explicitly selected by means of \emph{code func}. - Further applications of \emph{code func} add theorems incrementally, - but syntactic redundancies are implicitly dropped. For example, - using a modified version of the @{const fac} function - as defining equation, the then redundant (since - syntactically subsumed) original defining equations - are dropped. - - \begin{warn} - The attributes \emph{code} and \emph{code del} - associated with the existing code generator also apply to - the new one: \emph{code} implies \emph{code func}, - and \emph{code del} implies \emph{code func del}. - \end{warn} -*} - -subsection {* Type classes *} - -text {* - Type classes enter the game via the Isar class package. - For a short introduction how to use it, see \cite{isabelle-classes}; - here we just illustrate its impact on code generation. - - In a target language, type classes may be represented - natively (as in the case of Haskell). For languages - like SML, they are implemented using \emph{dictionaries}. - Our following example specifies a class \qt{null}, - assigning to each of its inhabitants a \qt{null} value: -*} - -class null = type + - fixes null :: 'a - -primrec - head :: "'a\null list \ 'a" where - "head [] = null" - | "head (x#xs) = x" - -text {* - \noindent We provide some instances for our @{text null}: -*} - -instantiation option and list :: (type) null -begin - -definition - "null = None" - -definition - "null = []" - -instance .. - -end - -text {* - \noindent Constructing a dummy example: -*} - -definition - "dummy = head [Some (Suc 0), None]" - -text {* - Type classes offer a suitable occasion to introduce - the Haskell serializer. Its usage is almost the same - as SML, but, in accordance with conventions - some Haskell systems enforce, each module ends - up in a single file. The module hierarchy is reflected in - the file system, with root directory given as file specification. -*} - -export_code dummy in Haskell file "examples/" - (* NOTE: you may use Haskell only once in this document, otherwise - you have to work in distinct subdirectories *) - -text {* - \lsthaskell{Thy/examples/Codegen.hs} - \noindent (we have left out all other modules). - - \medskip - - The whole code in SML with explicit dictionary passing: -*} - -export_code dummy (*<*)in SML(*>*)in SML file "examples/class.ML" - -text {* - \lstsml{Thy/examples/class.ML} - - \medskip - - \noindent or in OCaml: -*} - -export_code dummy in OCaml file "examples/class.ocaml" - -text {* - \lstsml{Thy/examples/class.ocaml} - - \medskip The explicit association of constants - to classes can be inspected using the @{text "\"} - command. -*} - - -section {* Recipes and advanced topics \label{sec:advanced} *} - -text {* - In this tutorial, we do not attempt to give an exhaustive - description of the code generator framework; instead, - we cast a light on advanced topics by introducing - them together with practically motivated examples. Concerning - further reading, see - - \begin{itemize} - - \item the Isabelle/Isar Reference Manual \cite{isabelle-isar-ref} - for exhaustive syntax diagrams. - \item or \cite{Haftmann-Nipkow:2007:codegen} which deals with foundational issues - of the code generator framework. - - \end{itemize} -*} - -subsection {* Library theories \label{sec:library} *} - -text {* - The @{text HOL} @{text Main} theory already provides a code - generator setup - which should be suitable for most applications. Common extensions - and modifications are available by certain theories of the @{text HOL} - library; beside being useful in applications, they may serve - as a tutorial for customizing the code generator setup. - - \begin{description} - - \item[@{text "Code_Integer"}] represents @{text HOL} integers by big - integer literals in target languages. - \item[@{text "Code_Char"}] represents @{text HOL} characters by - character literals in target languages. - \item[@{text "Code_Char_chr"}] like @{text "Code_Char"}, - but also offers treatment of character codes; includes - @{text "Code_Integer"}. - \item[@{text "Efficient_Nat"}] \label{eff_nat} implements natural numbers by integers, - which in general will result in higher efficency; pattern - matching with @{term "0\nat"} / @{const "Suc"} - is eliminated; includes @{text "Code_Integer"}. - \item[@{text "Code_Index"}] provides an additional datatype - @{text index} which is mapped to target-language built-in integers. - Useful for code setups which involve e.g. indexing of - target-language arrays. - \item[@{text "Code_Message"}] provides an additional datatype - @{text message_string} which is isomorphic to strings; - @{text message_string}s are mapped to target-language strings. - Useful for code setups which involve e.g. printing (error) messages. - - \end{description} - - \begin{warn} - When importing any of these theories, they should form the last - items in an import list. Since these theories adapt the - code generator setup in a non-conservative fashion, - strange effects may occur otherwise. - \end{warn} -*} - -subsection {* Preprocessing *} - -text {* - Before selected function theorems are turned into abstract - code, a chain of definitional transformation steps is carried - out: \emph{preprocessing}. In essence, the preprocessor - consists of two components: a \emph{simpset} and \emph{function transformers}. - - The \emph{simpset} allows to employ the full generality of the Isabelle - simplifier. Due to the interpretation of theorems - as defining equations, rewrites are applied to the right - hand side and the arguments of the left hand side of an - equation, but never to the constant heading the left hand side. - An important special case are \emph{inline theorems} which may be - declared an undeclared using the - \emph{code inline} or \emph{code inline del} attribute respectively. - Some common applications: -*} - -text_raw {* - \begin{itemize} -*} - -text {* - \item replacing non-executable constructs by executable ones: -*} - - lemma [code inline]: - "x \ set xs \ x mem xs" by (induct xs) simp_all - -text {* - \item eliminating superfluous constants: -*} - - lemma [code inline]: - "1 = Suc 0" by simp - -text {* - \item replacing executable but inconvenient constructs: -*} - - lemma [code inline]: - "xs = [] \ List.null xs" by (induct xs) simp_all - -text_raw {* - \end{itemize} -*} - -text {* - - \emph{Function transformers} provide a very general interface, - transforming a list of function theorems to another - list of function theorems, provided that neither the heading - constant nor its type change. The @{term "0\nat"} / @{const Suc} - pattern elimination implemented in - theory @{text "Efficient_Nat"} (see \secref{eff_nat}) uses this - interface. - - \noindent The current setup of the preprocessor may be inspected using - the @{text "\"} command. - - \begin{warn} - The attribute \emph{code unfold} - associated with the existing code generator also applies to - the new one: \emph{code unfold} implies \emph{code inline}. - \end{warn} -*} - - -subsection {* Concerning operational equality *} - -text {* - Surely you have already noticed how equality is treated - by the code generator: -*} - -primrec - collect_duplicates :: "'a list \ 'a list \ 'a list \ 'a list" where - "collect_duplicates xs ys [] = xs" - | "collect_duplicates xs ys (z#zs) = (if z \ set xs - then if z \ set ys - then collect_duplicates xs ys zs - else collect_duplicates xs (z#ys) zs - else collect_duplicates (z#xs) (z#ys) zs)" - -text {* - The membership test during preprocessing is rewritten, - resulting in @{const List.member}, which itself - performs an explicit equality check. -*} - -export_code collect_duplicates (*<*)in SML(*>*)in SML file "examples/collect_duplicates.ML" - -text {* - \lstsml{Thy/examples/collect_duplicates.ML} -*} - -text {* - Obviously, polymorphic equality is implemented the Haskell - way using a type class. How is this achieved? HOL introduces - an explicit class @{text eq} with a corresponding operation - @{const eq_class.eq} such that @{thm eq [no_vars]}. - The preprocessing framework does the rest. - For datatypes, instances of @{text eq} are implicitly derived - when possible. For other types, you may instantiate @{text eq} - manually like any other type class. - - Though this @{text eq} class is designed to get rarely in - the way, a subtlety - enters the stage when definitions of overloaded constants - are dependent on operational equality. For example, let - us define a lexicographic ordering on tuples: -*} - -instantiation * :: (ord, ord) ord -begin - -definition - [code func del]: "p1 < p2 \ (let (x1, y1) = p1; (x2, y2) = p2 in - x1 < x2 \ (x1 = x2 \ y1 < y2))" - -definition - [code func del]: "p1 \ p2 \ (let (x1, y1) = p1; (x2, y2) = p2 in - x1 < x2 \ (x1 = x2 \ y1 \ y2))" - -instance .. end - -lemma ord_prod [code func(*<*), code func del(*>*)]: - "(x1 \ 'a\ord, y1 \ 'b\ord) < (x2, y2) \ x1 < x2 \ (x1 = x2 \ y1 < y2)" - "(x1 \ 'a\ord, y1 \ 'b\ord) \ (x2, y2) \ x1 < x2 \ (x1 = x2 \ y1 \ y2)" - unfolding less_prod_def less_eq_prod_def by simp_all - -text {* - Then code generation will fail. Why? The definition - of @{term "op \"} depends on equality on both arguments, - which are polymorphic and impose an additional @{text eq} - class constraint, thus violating the type discipline - for class operations. - - The solution is to add @{text eq} explicitly to the first sort arguments in the - code theorems: -*} - -lemma ord_prod_code [code func]: - "(x1 \ 'a\{ord, eq}, y1 \ 'b\ord) < (x2, y2) \ - x1 < x2 \ (x1 = x2 \ y1 < y2)" - "(x1 \ 'a\{ord, eq}, y1 \ 'b\ord) \ (x2, y2) \ - x1 < x2 \ (x1 = x2 \ y1 \ y2)" - unfolding ord_prod by rule+ - -text {* - \noindent Then code generation succeeds: -*} - -export_code "op \ \ 'a\{eq, ord} \ 'b\ord \ 'a \ 'b \ bool" - (*<*)in SML(*>*)in SML file "examples/lexicographic.ML" - -text {* - \lstsml{Thy/examples/lexicographic.ML} -*} - -text {* - In general, code theorems for overloaded constants may have more - restrictive sort constraints than the underlying instance relation - between class and type constructor as long as the whole system of - constraints is coregular; code theorems violating coregularity - are rejected immediately. Consequently, it might be necessary - to delete disturbing theorems in the code theorem table, - as we have done here with the original definitions @{text less_prod_def} - and @{text less_eq_prod_def}. - - In some cases, the automatically derived defining equations - for equality on a particular type may not be appropriate. - As example, watch the following datatype representing - monomorphic parametric types (where type constructors - are referred to by natural numbers): -*} - -datatype monotype = Mono nat "monotype list" -(*<*) -lemma monotype_eq: - "Mono tyco1 typargs1 = Mono tyco2 typargs2 \ - tyco1 = tyco2 \ typargs1 = typargs2" by simp -(*>*) - -text {* - Then code generation for SML would fail with a message - that the generated code conains illegal mutual dependencies: - the theorem @{thm monotype_eq [no_vars]} already requires the - instance @{text "monotype \ eq"}, which itself requires - @{thm monotype_eq [no_vars]}; Haskell has no problem with mutually - recursive @{text instance} and @{text function} definitions, - but the SML serializer does not support this. - - In such cases, you have to provide you own equality equations - involving auxiliary constants. In our case, - @{const [show_types] list_all2} can do the job: -*} - -lemma monotype_eq_list_all2 [code func]: - "Mono tyco1 typargs1 = Mono tyco2 typargs2 \ - tyco1 = tyco2 \ list_all2 (op =) typargs1 typargs2" - by (simp add: list_all2_eq [symmetric]) - -text {* - does not depend on instance @{text "monotype \ eq"}: -*} - -export_code "op = :: monotype \ monotype \ bool" - (*<*)in SML(*>*)in SML file "examples/monotype.ML" - -text {* - \lstsml{Thy/examples/monotype.ML} -*} - -subsection {* Programs as sets of theorems *} - -text {* - As told in \secref{sec:concept}, code generation is based - on a structured collection of code theorems. - For explorative purpose, this collection - may be inspected using the @{text "\"} command: -*} - -code_thms "op mod :: nat \ nat \ nat" - -text {* - \noindent prints a table with \emph{all} defining equations - for @{term "op mod :: nat \ nat \ nat"}, including - \emph{all} defining equations those equations depend - on recursivly. @{text "\"} provides a convenient - mechanism to inspect the impact of a preprocessor setup - on defining equations. - - Similarly, the @{text "\"} command shows a graph - visualizing dependencies between defining equations. -*} - - -subsection {* Constructor sets for datatypes *} - -text {* - Conceptually, any datatype is spanned by a set of - \emph{constructors} of type @{text "\ = \ \ \ \\<^isub>1 \ \\<^isub>n"} - where @{text "{\\<^isub>1, \, \\<^isub>n}"} is excactly the set of \emph{all} - type variables in @{text "\"}. The HOL datatype package - by default registers any new datatype in the table - of datatypes, which may be inspected using - the @{text "\"} command. - - In some cases, it may be convenient to alter or - extend this table; as an example, we will develope an alternative - representation of natural numbers as binary digits, whose - size does increase logarithmically with its value, not linear - \footnote{Indeed, the @{text "Efficient_Nat"} theory (see \ref{eff_nat}) - does something similar}. First, the digit representation: -*} - -definition Dig0 :: "nat \ nat" where - "Dig0 n = 2 * n" - -definition Dig1 :: "nat \ nat" where - "Dig1 n = Suc (2 * n)" - -text {* - \noindent We will use these two ">digits"< to represent natural numbers - in binary digits, e.g.: -*} - -lemma 42: "42 = Dig0 (Dig1 (Dig0 (Dig1 (Dig0 1))))" - by (simp add: Dig0_def Dig1_def) - -text {* - \noindent Of course we also have to provide proper code equations for - the operations, e.g. @{term "op + \ nat \ nat \ nat"}: -*} - -lemma plus_Dig [code func]: - "0 + n = n" - "m + 0 = m" - "1 + Dig0 n = Dig1 n" - "Dig0 m + 1 = Dig1 m" - "1 + Dig1 n = Dig0 (n + 1)" - "Dig1 m + 1 = Dig0 (m + 1)" - "Dig0 m + Dig0 n = Dig0 (m + n)" - "Dig0 m + Dig1 n = Dig1 (m + n)" - "Dig1 m + Dig0 n = Dig1 (m + n)" - "Dig1 m + Dig1 n = Dig0 (m + n + 1)" - by (simp_all add: Dig0_def Dig1_def) - -text {* - \noindent We then instruct the code generator to view @{term "0\nat"}, - @{term "1\nat"}, @{term Dig0} and @{term Dig1} as - datatype constructors: -*} - -code_datatype "0\nat" "1\nat" Dig0 Dig1 - -text {* - \noindent For the former constructor @{term Suc}, we provide a code - equation and remove some parts of the default code generator setup - which are an obstacle here: -*} - -lemma Suc_Dig [code func]: - "Suc n = n + 1" - by simp - -declare One_nat_def [code inline del] -declare add_Suc_shift [code func del] - -text {* - \noindent This yields the following code: -*} - -export_code "op + \ nat \ nat \ nat" (*<*)in SML(*>*) in SML file "examples/nat_binary.ML" - -text {* \lstsml{Thy/examples/nat_binary.ML} *} - -text {* - \medskip - - From this example, it can be easily glimpsed that using own constructor sets - is a little delicate since it changes the set of valid patterns for values - of that type. Without going into much detail, here some practical hints: - - \begin{itemize} - \item When changing the constuctor set for datatypes, take care to - provide an alternative for the @{text case} combinator (e.g. by replacing - it using the preprocessor). - \item Values in the target language need not to be normalized -- different - values in the target language may represent the same value in the - logic (e.g. @{text "Dig1 0 = 1"}). - \item Usually, a good methodology to deal with the subleties of pattern - matching is to see the type as an abstract type: provide a set - of operations which operate on the concrete representation of the type, - and derive further operations by combinations of these primitive ones, - without relying on a particular representation. - \end{itemize} -*} - -code_datatype %invisible "0::nat" Suc -declare %invisible plus_Dig [code func del] -declare %invisible One_nat_def [code inline] -declare %invisible add_Suc_shift [code func] -lemma %invisible [code func]: "0 + n = (n \ nat)" by simp - - -subsection {* Customizing serialization *} - -subsubsection {* Basics *} - -text {* - Consider the following function and its corresponding - SML code: -*} - -primrec - in_interval :: "nat \ nat \ nat \ bool" where - "in_interval (k, l) n \ k \ n \ n \ l" -(*<*) -code_type %tt bool - (SML) -code_const %tt True and False and "op \" and Not - (SML and and and) -(*>*) -export_code in_interval (*<*)in SML(*>*)in SML file "examples/bool_literal.ML" - -text {* - \lstsml{Thy/examples/bool_literal.ML} - - \noindent Though this is correct code, it is a little bit unsatisfactory: - boolean values and operators are materialized as distinguished - entities with have nothing to do with the SML-builtin notion - of \qt{bool}. This results in less readable code; - additionally, eager evaluation may cause programs to - loop or break which would perfectly terminate when - the existing SML \qt{bool} would be used. To map - the HOL \qt{bool} on SML \qt{bool}, we may use - \qn{custom serializations}: -*} - -code_type %tt bool - (SML "bool") -code_const %tt True and False and "op \" - (SML "true" and "false" and "_ andalso _") - -text {* - The @{text "\"} commad takes a type constructor - as arguments together with a list of custom serializations. - Each custom serialization starts with a target language - identifier followed by an expression, which during - code serialization is inserted whenever the type constructor - would occur. For constants, @{text "\"} implements - the corresponding mechanism. Each ``@{verbatim "_"}'' in - a serialization expression is treated as a placeholder - for the type constructor's (the constant's) arguments. -*} - -export_code in_interval (*<*)in SML(*>*) in SML file "examples/bool_mlbool.ML" - -text {* - \lstsml{Thy/examples/bool_mlbool.ML} - - \noindent This still is not perfect: the parentheses - around the \qt{andalso} expression are superfluous. - Though the serializer - by no means attempts to imitate the rich Isabelle syntax - framework, it provides some common idioms, notably - associative infixes with precedences which may be used here: -*} - -code_const %tt "op \" - (SML infixl 1 "andalso") - -export_code in_interval (*<*)in SML(*>*) in SML file "examples/bool_infix.ML" - -text {* - \lstsml{Thy/examples/bool_infix.ML} - - \medskip - - Next, we try to map HOL pairs to SML pairs, using the - infix ``@{verbatim "*"}'' type constructor and parentheses: -*} -(*<*) -code_type * - (SML) -code_const Pair - (SML) -(*>*) -code_type %tt * - (SML infix 2 "*") -code_const %tt Pair - (SML "!((_),/ (_))") - -text {* - The initial bang ``@{verbatim "!"}'' tells the serializer to never put - parentheses around the whole expression (they are already present), - while the parentheses around argument place holders - tell not to put parentheses around the arguments. - The slash ``@{verbatim "/"}'' (followed by arbitrary white space) - inserts a space which may be used as a break if necessary - during pretty printing. - - These examples give a glimpse what mechanisms - custom serializations provide; however their usage - requires careful thinking in order not to introduce - inconsistencies -- or, in other words: - custom serializations are completely axiomatic. - - A further noteworthy details is that any special - character in a custom serialization may be quoted - using ``@{verbatim "'"}''; thus, in - ``@{verbatim "fn '_ => _"}'' the first - ``@{verbatim "_"}'' is a proper underscore while the - second ``@{verbatim "_"}'' is a placeholder. - - The HOL theories provide further - examples for custom serializations. -*} - - -subsubsection {* Haskell serialization *} - -text {* - For convenience, the default - HOL setup for Haskell maps the @{text eq} class to - its counterpart in Haskell, giving custom serializations - for the class (@{text "\"}) and its operation: -*} - -code_class %tt eq - (Haskell "Eq" where "op =" \ "(==)") - -code_const %tt "op =" - (Haskell infixl 4 "==") - -text {* - A problem now occurs whenever a type which - is an instance of @{text eq} in HOL is mapped - on a Haskell-builtin type which is also an instance - of Haskell @{text Eq}: -*} - -typedecl bar - -instantiation bar :: eq -begin - -definition "eq_class.eq (x\bar) y \ x = y" - -instance by default (simp add: eq_bar_def) - -end - -code_type %tt bar - (Haskell "Integer") - -text {* - The code generator would produce - an additional instance, which of course is rejected. - To suppress this additional instance, use - @{text "\"}: -*} - -code_instance %tt bar :: eq - (Haskell -) - - -subsubsection {* Pretty printing *} - -text {* - The serializer provides ML interfaces to set up - pretty serializations for expressions like lists, numerals - and characters; these are - monolithic stubs and should only be used with the - theories introduced in \secref{sec:library}. -*} - - -subsection {* Cyclic module dependencies *} - -text {* - Sometimes the awkward situation occurs that dependencies - between definitions introduce cyclic dependencies - between modules, which in the Haskell world leaves - you to the mercy of the Haskell implementation you are using, - while for SML code generation is not possible. - - A solution is to declare module names explicitly. - Let use assume the three cyclically dependent - modules are named \emph{A}, \emph{B} and \emph{C}. - Then, by stating -*} - -code_modulename SML - A ABC - B ABC - C ABC - -text {* - we explicitly map all those modules on \emph{ABC}, - resulting in an ad-hoc merge of this three modules - at serialization time. -*} - -subsection {* Incremental code generation *} - -text {* - Code generation is \emph{incremental}: theorems - and abstract intermediate code are cached and extended on demand. - The cache may be partially or fully dropped if the underlying - executable content of the theory changes. - Implementation of caching is supposed to transparently - hid away the details from the user. Anyway, caching - reaches the surface by using a slightly more general form - of the @{text "\"}, @{text "\"} - and @{text "\"} commands: the list of constants - may be omitted. Then, all constants with code theorems - in the current cache are referred to. -*} - -(*subsection {* Code generation and proof extraction *} - -text {* - \fixme -*}*) - - -section {* ML interfaces \label{sec:ml} *} - -text {* - Since the code generator framework not only aims to provide - a nice Isar interface but also to form a base for - code-generation-based applications, here a short - description of the most important ML interfaces. -*} - -subsection {* Executable theory content: @{text Code} *} - -text {* - This Pure module implements the core notions of - executable content of a theory. -*} - -subsubsection {* Managing executable content *} - -text %mlref {* - \begin{mldecls} - @{index_ML Code.add_func: "thm -> theory -> theory"} \\ - @{index_ML Code.del_func: "thm -> theory -> theory"} \\ - @{index_ML Code.add_funcl: "string * thm list Susp.T -> theory -> theory"} \\ - @{index_ML Code.map_pre: "(MetaSimplifier.simpset -> MetaSimplifier.simpset) -> theory -> theory"} \\ - @{index_ML Code.map_post: "(MetaSimplifier.simpset -> MetaSimplifier.simpset) -> theory -> theory"} \\ - @{index_ML Code.add_functrans: "string * (theory -> thm list -> thm list option) - -> theory -> theory"} \\ - @{index_ML Code.del_functrans: "string -> theory -> theory"} \\ - @{index_ML Code.add_datatype: "(string * typ) list -> theory -> theory"} \\ - @{index_ML Code.get_datatype: "theory -> string - -> (string * sort) list * (string * typ list) list"} \\ - @{index_ML Code.get_datatype_of_constr: "theory -> string -> string option"} - \end{mldecls} - - \begin{description} - - \item @{ML Code.add_func}~@{text "thm"}~@{text "thy"} adds function - theorem @{text "thm"} to executable content. - - \item @{ML Code.del_func}~@{text "thm"}~@{text "thy"} removes function - theorem @{text "thm"} from executable content, if present. - - \item @{ML Code.add_funcl}~@{text "(const, lthms)"}~@{text "thy"} adds - suspended defining equations @{text lthms} for constant - @{text const} to executable content. - - \item @{ML Code.map_pre}~@{text "f"}~@{text "thy"} changes - the preprocessor simpset. - - \item @{ML Code.add_functrans}~@{text "(name, f)"}~@{text "thy"} adds - function transformer @{text f} (named @{text name}) to executable content; - @{text f} is a transformer of the defining equations belonging - to a certain function definition, depending on the - current theory context. Returning @{text NONE} indicates that no - transformation took place; otherwise, the whole process will be iterated - with the new defining equations. - - \item @{ML Code.del_functrans}~@{text "name"}~@{text "thy"} removes - function transformer named @{text name} from executable content. - - \item @{ML Code.add_datatype}~@{text cs}~@{text thy} adds - a datatype to executable content, with generation - set @{text cs}. - - \item @{ML Code.get_datatype_of_constr}~@{text "thy"}~@{text "const"} - returns type constructor corresponding to - constructor @{text const}; returns @{text NONE} - if @{text const} is no constructor. - - \end{description} -*} - -subsection {* Auxiliary *} - -text %mlref {* - \begin{mldecls} - @{index_ML Code_Unit.read_const: "theory -> string -> string"} \\ - @{index_ML Code_Unit.head_func: "thm -> string * ((string * sort) list * typ)"} \\ - @{index_ML Code_Unit.rewrite_func: "MetaSimplifier.simpset -> thm -> thm"} \\ - \end{mldecls} - - \begin{description} - - \item @{ML Code_Unit.read_const}~@{text thy}~@{text s} - reads a constant as a concrete term expression @{text s}. - - \item @{ML Code_Unit.head_func}~@{text thm} - extracts the constant and its type from a defining equation @{text thm}. - - \item @{ML Code_Unit.rewrite_func}~@{text ss}~@{text thm} - rewrites a defining equation @{text thm} with a simpset @{text ss}; - only arguments and right hand side are rewritten, - not the head of the defining equation. - - \end{description} - -*} - -subsection {* Implementing code generator applications *} - -text {* - Implementing code generator applications on top - of the framework set out so far usually not only - involves using those primitive interfaces - but also storing code-dependent data and various - other things. - - \begin{warn} - Some interfaces discussed here have not reached - a final state yet. - Changes likely to occur in future. - \end{warn} -*} - -subsubsection {* Data depending on the theory's executable content *} - -text {* - Due to incrementality of code generation, changes in the - theory's executable content have to be propagated in a - certain fashion. Additionally, such changes may occur - not only during theory extension but also during theory - merge, which is a little bit nasty from an implementation - point of view. The framework provides a solution - to this technical challenge by providing a functorial - data slot @{ML_functor CodeDataFun}; on instantiation - of this functor, the following types and operations - are required: - - \medskip - \begin{tabular}{l} - @{text "type T"} \\ - @{text "val empty: T"} \\ - @{text "val merge: Pretty.pp \ T * T \ T"} \\ - @{text "val purge: theory option \ CodeUnit.const list option \ T \ T"} - \end{tabular} - - \begin{description} - - \item @{text T} the type of data to store. - - \item @{text empty} initial (empty) data. - - \item @{text merge} merging two data slots. - - \item @{text purge}~@{text thy}~@{text consts} propagates changes in executable content; - if possible, the current theory context is handed over - as argument @{text thy} (if there is no current theory context (e.g.~during - theory merge, @{ML NONE}); @{text consts} indicates the kind - of change: @{ML NONE} stands for a fundamental change - which invalidates any existing code, @{text "SOME consts"} - hints that executable content for constants @{text consts} - has changed. - - \end{description} - - An instance of @{ML_functor CodeDataFun} provides the following - interface: - - \medskip - \begin{tabular}{l} - @{text "get: theory \ T"} \\ - @{text "change: theory \ (T \ T) \ T"} \\ - @{text "change_yield: theory \ (T \ 'a * T) \ 'a * T"} - \end{tabular} - - \begin{description} - - \item @{text get} retrieval of the current data. - - \item @{text change} update of current data (cached!) - by giving a continuation. - - \item @{text change_yield} update with side result. - - \end{description} -*} - -(*subsubsection {* Datatype hooks *} - -text {* - Isabelle/HOL's datatype package provides a mechanism to - extend theories depending on datatype declarations: - \emph{datatype hooks}. For example, when declaring a new - datatype, a hook proves defining equations for equality on - that datatype (if possible). -*} - -text %mlref {* - \begin{mldecls} - @{index_ML_type DatatypeHooks.hook: "string list -> theory -> theory"} \\ - @{index_ML DatatypeHooks.add: "DatatypeHooks.hook -> theory -> theory"} - \end{mldecls} - - \begin{description} - - \item @{ML_type DatatypeHooks.hook} specifies the interface - of \emph{datatype hooks}: a theory update - depending on the list of newly introduced - datatype names. - - \item @{ML DatatypeHooks.add} adds a hook to the - chain of all hooks. - - \end{description} -*} - -subsubsection {* Trivial typedefs -- type copies *} - -text {* - Sometimes packages will introduce new types - as \emph{marked type copies} similar to Haskell's - @{text newtype} declaration (e.g. the HOL record package) - \emph{without} tinkering with the overhead of datatypes. - Technically, these type copies are trivial forms of typedefs. - Since these type copies in code generation view are nothing - else than datatypes, they have been given a own package - in order to faciliate code generation: -*} - -text %mlref {* - \begin{mldecls} - @{index_ML_type TypecopyPackage.info} \\ - @{index_ML TypecopyPackage.add_typecopy: " - bstring * string list -> typ -> (bstring * bstring) option - -> theory -> (string * TypecopyPackage.info) * theory"} \\ - @{index_ML TypecopyPackage.get_typecopy_info: "theory - -> string -> TypecopyPackage.info option"} \\ - @{index_ML TypecopyPackage.get_spec: "theory -> string - -> (string * sort) list * (string * typ list) list"} \\ - @{index_ML_type TypecopyPackage.hook: "string * TypecopyPackage.info -> theory -> theory"} \\ - @{index_ML TypecopyPackage.add_hook: - "TypecopyPackage.hook -> theory -> theory"} \\ - \end{mldecls} - - \begin{description} - - \item @{ML_type TypecopyPackage.info} a record containing - the specification and further data of a type copy. - - \item @{ML TypecopyPackage.add_typecopy} defines a new - type copy. - - \item @{ML TypecopyPackage.get_typecopy_info} retrieves - data of an existing type copy. - - \item @{ML TypecopyPackage.get_spec} retrieves datatype-like - specification of a type copy. - - \item @{ML_type TypecopyPackage.hook},~@{ML TypecopyPackage.add_hook} - provide a hook mechanism corresponding to the hook mechanism - on datatypes. - - \end{description} -*} - -subsubsection {* Unifying type copies and datatypes *} - -text {* - Since datatypes and type copies are mapped to the same concept (datatypes) - by code generation, the view on both is unified \qt{code types}: -*} - -text %mlref {* - \begin{mldecls} - @{index_ML_type DatatypeCodegen.hook: "(string * (bool * ((string * sort) list - * (string * typ list) list))) list - -> theory -> theory"} \\ - @{index_ML DatatypeCodegen.add_codetypes_hook_bootstrap: " - DatatypeCodegen.hook -> theory -> theory"} - \end{mldecls} -*} - -text {* - \begin{description} - - \item @{ML_type DatatypeCodegen.hook} specifies the code type hook - interface: a theory transformation depending on a list of - mutual recursive code types; each entry in the list - has the structure @{text "(name, (is_data, (vars, cons)))"} - where @{text name} is the name of the code type, @{text is_data} - is true iff @{text name} is a datatype rather then a type copy, - and @{text "(vars, cons)"} is the specification of the code type. - - \item @{ML DatatypeCodegen.add_codetypes_hook_bootstrap} adds a code - type hook; the hook is immediately processed for all already - existing datatypes, in blocks of mutual recursive datatypes, - where all datatypes a block depends on are processed before - the block. - - \end{description} -*}*) - -text {* - \emph{Happy proving, happy hacking!} -*} - -end diff -r 4ffb62675ade -r f65e8b318581 doc-src/IsarAdvanced/Codegen/Thy/Further.thy --- a/doc-src/IsarAdvanced/Codegen/Thy/Further.thy Tue Sep 30 04:06:55 2008 +0200 +++ b/doc-src/IsarAdvanced/Codegen/Thy/Further.thy Tue Sep 30 11:19:47 2008 +0200 @@ -2,9 +2,45 @@ imports Setup begin -section {* Further topics *} +section {* Further topics \label{sec:further} *} + +subsection {* Further reading *} + +text {* + Do dive deeper into the issue of code generation, you should visit + the Isabelle/Isar Reference Manual \cite{isabelle-isar-ref} which + constains exhaustive syntax diagrams. +*} + +subsection {* Modules *} + +text {* + When invoking the @{command export_code} command it is possible to leave + out the @{keyword "module_name"} part; then code is distributed over + different modules, where the module name space roughly is induced + by the @{text Isabelle} theory namespace. -subsection {* Serializer options *} + Then sometimes the awkward situation occurs that dependencies between + definitions introduce cyclic dependencies between modules, which in the + @{text Haskell} world leaves you to the mercy of the @{text Haskell} implementation + you are using, while for @{text SML}/@{text OCaml} code generation is not possible. + + A solution is to declare module names explicitly. + Let use assume the three cyclically dependent + modules are named \emph{A}, \emph{B} and \emph{C}. + Then, by stating +*} + +code_modulename SML + A ABC + B ABC + C ABC + +text {* + we explicitly map all those modules on \emph{ABC}, + resulting in an ad-hoc merge of this three modules + at serialisation time. +*} subsection {* Evaluation oracle *} @@ -14,4 +50,6 @@ text {* extending targets, adding targets *} +subsection {* Imperative data structures *} + end diff -r 4ffb62675ade -r f65e8b318581 doc-src/IsarAdvanced/Codegen/Thy/Introduction.thy --- a/doc-src/IsarAdvanced/Codegen/Thy/Introduction.thy Tue Sep 30 04:06:55 2008 +0200 +++ b/doc-src/IsarAdvanced/Codegen/Thy/Introduction.thy Tue Sep 30 11:19:47 2008 +0200 @@ -8,15 +8,173 @@ text {* This tutorial introduces a generic code generator for the - Isabelle system \cite{isa-tutorial}. + @{text Isabelle} system. + Generic in the sense that the + \qn{target language} for which code shall ultimately be + generated is not fixed but may be an arbitrary state-of-the-art + functional programming language (currently, the implementation + supports @{text SML} \cite{SML}, @{text OCaml} \cite{OCaml} and @{text Haskell} + \cite{haskell-revised-report}). + + Conceptually the code generator framework is part + of Isabelle's @{text Pure} meta logic framework; the logic + @{text HOL} which is an extension of @{text Pure} + already comes with a reasonable framework setup and thus provides + a good working horse for raising code-generation-driven + applications. So, we assume some familiarity and experience + with the ingredients of the @{text HOL} distribution theories. + (see also \cite{isa-tutorial}). + + The code generator aims to be usable with no further ado + in most cases while allowing for detailed customisation. + This manifests in the structure of this tutorial: after a short + conceptual introduction with an example \secref{sec:intro}, + we discuss the generic customisation facilities \secref{sec:program}. + A further section \secref{sec:adaption} is dedicated to the matter of + \qn{adaption} to specific target language environments. After some + further issues \secref{sec:further} we conclude with an overview + of some ML programming interfaces \secref{sec:ml}. + + \begin{warn} + Ultimately, the code generator which this tutorial deals with + is supposed to replace the already established code generator + by Stefan Berghofer \cite{Berghofer-Nipkow:2002}. + So, for the moment, there are two distinct code generators + in Isabelle. + Also note that while the framework itself is + object-logic independent, only @{text HOL} provides a reasonable + framework setup. + \end{warn} + *} -subsection {* Code generation via shallow embedding *} +subsection {* Code generation via shallow embedding \label{sec:intro} *} + +text {* + The key concept for understanding @{text Isabelle}'s code generation is + \emph{shallow embedding}, i.e.~logical entities like constants, types and + classes are identified with corresponding concepts in the target language. + + Inside @{text HOL}, the @{command datatype} and + @{command definition}/@{command primrec}/@{command fun} declarations form + the core of a functional programming language. The default code generator setup + allows to turn those into functional programs immediately. + + This means that \qt{naive} code generation can proceed without further ado. + For example, here a simple \qt{implementation} of amortised queues: +*} + +datatype %quoteme 'a queue = Queue "'a list" "'a list" + +definition %quoteme empty :: "'a queue" where + "empty = Queue [] []" + +primrec %quoteme enqueue :: "'a \ 'a queue \ 'a queue" where + "enqueue x (Queue xs ys) = Queue (x # xs) ys" + +fun %quoteme dequeue :: "'a queue \ 'a option \ 'a queue" where + "dequeue (Queue [] []) = (None, Queue [] [])" + | "dequeue (Queue xs (y # ys)) = (Some y, Queue xs ys)" + | "dequeue (Queue xs []) = (case rev xs of y # ys \ (Some y, Queue [] ys))" + +text {* \noindent Then we can generate code e.g.~for @{text SML} as follows: *} -text {* example *} +export_code %quoteme empty dequeue enqueue in SML module_name Example file "examples/example.ML" + +text {* \noindent resulting in the following code: *} + +text %quoteme {*@{code_stmts empty enqueue dequeue (SML)}*} + +text {* + \noindent The @{command export_code} command takes a space-separated list of + constants for which code shall be generated; anything else needed for those + is added implicitly. Then follows by a target language identifier + (@{text SML}, @{text OCaml} or @{text Haskell}) and a freely chosen module name. + A file name denotes the destination to store the generated code. Note that + the semantics of the destination depends on the target language: for + @{text SML} and @{text OCaml} it denotes a \emph{file}, for @{text Haskell} + it denotes a \emph{directory} where a file named as the module name + (with extension @{text ".hs"}) is written: +*} + +export_code %quoteme empty dequeue enqueue in SML module_name Example file "examples/Example.ML" + +text {* + \noindent This is how the corresponding code in @{text Haskell} looks like: +*} + +text %quoteme {*@{code_stmts empty enqueue dequeue (Haskell)}*} + +text {* + \noindent This demonstrates the basic usage of the @{command export_code} command; + for more details see \secref{sec:serializer}. +*} subsection {* Code generator architecture *} -text {* foundation, forward references for yet unexplained things *} +text {* + What you have seen so far should be already enough in a lot of cases. If you + are content with this, you can quit reading here. Anyway, in order to customise + and adapt the code generator, it is inevitable to gain some understanding + how it works. + + \begin{figure}[h] + \centering + \includegraphics[width=0.7\textwidth]{codegen_process} + \caption{Code generator architecture} + \label{fig:arch} + \end{figure} + + The code generator itself consists of three major components + which operate sequentially, i.e. the result of one is the input + of the next in the chain, see diagram \ref{fig:arch}: + + The code generator employs a notion of executability + for three foundational executable ingredients known + from functional programming: + \emph{defining equations}, \emph{datatypes}, and + \emph{type classes}. A defining equation as a first approximation + is a theorem of the form @{text "f t\<^isub>1 t\<^isub>2 \ t\<^isub>n \ t"} + (an equation headed by a constant @{text f} with arguments + @{text "t\<^isub>1 t\<^isub>2 \ t\<^isub>n"} and right hand side @{text t}). + Code generation aims to turn defining equations + into a functional program by running through the following + process: + + \begin{itemize} + + \item Out of the vast collection of theorems proven in a + \qn{theory}, a reasonable subset modelling + defining equations is \qn{selected}. + + \item On those selected theorems, certain + transformations are carried out + (\qn{preprocessing}). Their purpose is to turn theorems + representing non- or badly executable + specifications into equivalent but executable counterparts. + The result is a structured collection of \qn{code theorems}. + + \item Before the selected defining equations are continued with, + they can be \qn{preprocessed}, i.e. subjected to theorem + transformations. This \qn{preprocessor} is an interface which + allows to apply + the full expressiveness of ML-based theorem transformations + to code generation; motivating examples are shown below, see + \secref{sec:preproc}. + The result of the preprocessing step is a structured collection + of defining equations. + + \item These defining equations are \qn{translated} to a program + in an abstract intermediate language. + + \item Finally, the abstract program is \qn{serialised} into concrete + source code of a target language. + + \end{itemize} + + \noindent From these steps, only the two last are carried out outside the logic; by + keeping this layer as thin as possible, the amount of code to trust is + kept to a minimum. +*} end diff -r 4ffb62675ade -r f65e8b318581 doc-src/IsarAdvanced/Codegen/Thy/ML.thy --- a/doc-src/IsarAdvanced/Codegen/Thy/ML.thy Tue Sep 30 04:06:55 2008 +0200 +++ b/doc-src/IsarAdvanced/Codegen/Thy/ML.thy Tue Sep 30 11:19:47 2008 +0200 @@ -2,6 +2,180 @@ imports Setup begin -section {* ML system interfaces *} +section {* ML system interfaces \label{sec:ml} *} + +text {* + Since the code generator framework not only aims to provide + a nice Isar interface but also to form a base for + code-generation-based applications, here a short + description of the most important ML interfaces. +*} + +subsection {* Executable theory content: @{text Code} *} + +text {* + This Pure module implements the core notions of + executable content of a theory. +*} + +subsubsection {* Managing executable content *} + +text %mlref {* + \begin{mldecls} + @{index_ML Code.add_eqn: "thm -> theory -> theory"} \\ + @{index_ML Code.del_eqn: "thm -> theory -> theory"} \\ + @{index_ML Code.add_eqnl: "string * (thm * bool) list Susp.T -> theory -> theory"} \\ + @{index_ML Code.map_pre: "(MetaSimplifier.simpset -> MetaSimplifier.simpset) -> theory -> theory"} \\ + @{index_ML Code.map_post: "(MetaSimplifier.simpset -> MetaSimplifier.simpset) -> theory -> theory"} \\ + @{index_ML Code.add_functrans: "string * (theory -> (thm * bool) list -> (thm * bool) list option) + -> theory -> theory"} \\ + @{index_ML Code.del_functrans: "string -> theory -> theory"} \\ + @{index_ML Code.add_datatype: "(string * typ) list -> theory -> theory"} \\ + @{index_ML Code.get_datatype: "theory -> string + -> (string * sort) list * (string * typ list) list"} \\ + @{index_ML Code.get_datatype_of_constr: "theory -> string -> string option"} + \end{mldecls} + + \begin{description} + + \item @{ML Code.add_eqn}~@{text "thm"}~@{text "thy"} adds function + theorem @{text "thm"} to executable content. + + \item @{ML Code.del_eqn}~@{text "thm"}~@{text "thy"} removes function + theorem @{text "thm"} from executable content, if present. + + \item @{ML Code.add_eqnl}~@{text "(const, lthms)"}~@{text "thy"} adds + suspended defining equations @{text lthms} for constant + @{text const} to executable content. + + \item @{ML Code.map_pre}~@{text "f"}~@{text "thy"} changes + the preprocessor simpset. + + \item @{ML Code.add_functrans}~@{text "(name, f)"}~@{text "thy"} adds + function transformer @{text f} (named @{text name}) to executable content; + @{text f} is a transformer of the defining equations belonging + to a certain function definition, depending on the + current theory context. Returning @{text NONE} indicates that no + transformation took place; otherwise, the whole process will be iterated + with the new defining equations. + + \item @{ML Code.del_functrans}~@{text "name"}~@{text "thy"} removes + function transformer named @{text name} from executable content. + + \item @{ML Code.add_datatype}~@{text cs}~@{text thy} adds + a datatype to executable content, with generation + set @{text cs}. + + \item @{ML Code.get_datatype_of_constr}~@{text "thy"}~@{text "const"} + returns type constructor corresponding to + constructor @{text const}; returns @{text NONE} + if @{text const} is no constructor. + + \end{description} +*} + +subsection {* Auxiliary *} + +text %mlref {* + \begin{mldecls} + @{index_ML Code_Unit.read_const: "theory -> string -> string"} \\ + @{index_ML Code_Unit.head_eqn: "theory -> thm -> string * ((string * sort) list * typ)"} \\ + @{index_ML Code_Unit.rewrite_eqn: "MetaSimplifier.simpset -> thm -> thm"} \\ + \end{mldecls} + + \begin{description} + + \item @{ML Code_Unit.read_const}~@{text thy}~@{text s} + reads a constant as a concrete term expression @{text s}. + + \item @{ML Code_Unit.head_eqn}~@{text thy}~@{text thm} + extracts the constant and its type from a defining equation @{text thm}. + + \item @{ML Code_Unit.rewrite_eqn}~@{text ss}~@{text thm} + rewrites a defining equation @{text thm} with a simpset @{text ss}; + only arguments and right hand side are rewritten, + not the head of the defining equation. + + \end{description} + +*} + +subsection {* Implementing code generator applications *} + +text {* + Implementing code generator applications on top + of the framework set out so far usually not only + involves using those primitive interfaces + but also storing code-dependent data and various + other things. + + \begin{warn} + Some interfaces discussed here have not reached + a final state yet. + Changes likely to occur in future. + \end{warn} +*} + +subsubsection {* Data depending on the theory's executable content *} + +text {* + Due to incrementality of code generation, changes in the + theory's executable content have to be propagated in a + certain fashion. Additionally, such changes may occur + not only during theory extension but also during theory + merge, which is a little bit nasty from an implementation + point of view. The framework provides a solution + to this technical challenge by providing a functorial + data slot @{ML_functor CodeDataFun}; on instantiation + of this functor, the following types and operations + are required: + + \medskip + \begin{tabular}{l} + @{text "type T"} \\ + @{text "val empty: T"} \\ + @{text "val purge: theory \ CodeUnit.const list option \ T \ T"} + \end{tabular} + + \begin{description} + + \item @{text T} the type of data to store. + + \item @{text empty} initial (empty) data. + + \item @{text purge}~@{text thy}~@{text consts} propagates changes in executable content; + @{text consts} indicates the kind + of change: @{ML NONE} stands for a fundamental change + which invalidates any existing code, @{text "SOME consts"} + hints that executable content for constants @{text consts} + has changed. + + \end{description} + + An instance of @{ML_functor CodeDataFun} provides the following + interface: + + \medskip + \begin{tabular}{l} + @{text "get: theory \ T"} \\ + @{text "change: theory \ (T \ T) \ T"} \\ + @{text "change_yield: theory \ (T \ 'a * T) \ 'a * T"} + \end{tabular} + + \begin{description} + + \item @{text get} retrieval of the current data. + + \item @{text change} update of current data (cached!) + by giving a continuation. + + \item @{text change_yield} update with side result. + + \end{description} +*} + +text {* + \emph{Happy proving, happy hacking!} +*} end diff -r 4ffb62675ade -r f65e8b318581 doc-src/IsarAdvanced/Codegen/Thy/Program.thy --- a/doc-src/IsarAdvanced/Codegen/Thy/Program.thy Tue Sep 30 04:06:55 2008 +0200 +++ b/doc-src/IsarAdvanced/Codegen/Thy/Program.thy Tue Sep 30 11:19:47 2008 +0200 @@ -1,27 +1,469 @@ theory Program -imports Setup +imports Introduction begin -section {* Turning Theories into Programs *} +section {* Turning Theories into Programs \label{sec:program} *} subsection {* The @{text "Isabelle/HOL"} default setup *} -text {* Invoking the code generator *} - subsection {* Selecting code equations *} -text {* inspection by @{text "code_thms"} *} +text {* + We have already seen how by default equations stemming from + @{command definition}/@{command primrec}/@{command fun} + statements are used for code generation. Deviating from this + \emph{default} behaviour is always possible -- e.g.~we + could provide an alternative @{text fun} for @{const dequeue} proving + the following equations explicitly: +*} + +lemma %quoteme [code func]: + "dequeue (Queue xs []) = (if xs = [] then (None, Queue [] []) else dequeue (Queue [] (rev xs)))" + "dequeue (Queue xs (y # ys)) = (Some y, Queue xs ys)" + by (cases xs, simp_all) (cases "rev xs", simp_all) + +text {* + \noindent The annotation @{text "[code func]"} is an @{text Isar} + @{text attribute} which states that the given theorems should be + considered as defining equations for a @{text fun} statement -- + the corresponding constant is determined syntactically. The resulting code: +*} + +text %quoteme {*@{code_stmts empty enqueue dequeue (Haskell)}*} + +text {* + \noindent You may note that the equality test @{term "xs = []"} has been + replaced by the predicate @{term "null xs"}. This is due to the default + setup in the \qn{preprocessor} to be discussed further below (\secref{sec:preproc}). + + Changing the default constructor set of datatypes is also + possible but rarely desired in practice. See \secref{sec:datatypes} for an example. + + As told in \secref{sec:concept}, code generation is based + on a structured collection of code theorems. + For explorative purpose, this collection + may be inspected using the @{command code_thms} command: +*} + +code_thms %quoteme dequeue + +text {* + \noindent prints a table with \emph{all} defining equations + for @{const dequeue}, including + \emph{all} defining equations those equations depend + on recursively. + + Similarly, the @{command code_deps} command shows a graph + visualising dependencies between defining equations. +*} + +subsection {* @{text class} and @{text instantiation} *} + +text {* + Concerning type classes and code generation, let us again examine an example + from abstract algebra: +*} + +class %quoteme semigroup = type + + fixes mult :: "'a \ 'a \ 'a" (infixl "\" 70) + assumes assoc: "(x \ y) \ z = x \ (y \ z)" + +class %quoteme monoid = semigroup + + fixes neutral :: 'a ("\") + assumes neutl: "\ \ x = x" + and neutr: "x \ \ = x" + +instantiation %quoteme nat :: monoid +begin + +primrec %quoteme mult_nat where + "0 \ n = (0\nat)" + | "Suc m \ n = n + m \ n" + +definition %quoteme neutral_nat where + "\ = Suc 0" -subsection {* The preprocessor *} +lemma %quoteme add_mult_distrib: + fixes n m q :: nat + shows "(n + m) \ q = n \ q + m \ q" + by (induct n) simp_all + +instance %quoteme proof + fix m n q :: nat + show "m \ n \ q = m \ (n \ q)" + by (induct m) (simp_all add: add_mult_distrib) + show "\ \ n = n" + by (simp add: neutral_nat_def) + show "m \ \ = m" + by (induct m) (simp_all add: neutral_nat_def) +qed + +end %quoteme + +text {* + \noindent We define the natural operation of the natural numbers + on monoids: +*} + +primrec %quoteme pow :: "nat \ 'a\monoid \ 'a\monoid" where + "pow 0 a = \" + | "pow (Suc n) a = a \ pow n a" + +text {* + \noindent This we use to define the discrete exponentiation function: +*} + +definition %quoteme bexp :: "nat \ nat" where + "bexp n = pow n (Suc (Suc 0))" + +text {* + \noindent The corresponding code: +*} + +text %quoteme {*@{code_stmts bexp (Haskell)}*} + +text {* + \noindent An inspection reveals that the equations stemming from the + @{text primrec} statement within instantiation of class + @{class semigroup} with type @{typ nat} are mapped to a separate + function declaration @{text mult_nat} which in turn is used + to provide the right hand side for the @{text "instance Semigroup Nat"} + \fixme[courier fuer code text, isastyle fuer class]. This perfectly + agrees with the restriction that @{text inst} statements may + only contain one single equation for each class class parameter + The @{text instantiation} mechanism manages that for each + overloaded constant @{text "f [\ \<^bvec>\\s\<^isub>k\<^evec>]"} + a \emph{shadow constant} @{text "f\<^isub>\ [\<^bvec>\\s\<^isub>k\<^evec>]"} is + declared satisfying @{text "f [\ \<^bvec>\\s\<^isub>k\<^evec>] \ f\<^isub>\ [\<^bvec>\\s\<^isub>k\<^evec>]"}. + this equation is indeed the one used for the statement; + using it as a rewrite rule, defining equations for + @{text "f [\ \<^bvec>\\s\<^isub>k\<^evec>]"} can be turned into + defining equations for @{text "f\<^isub>\ [\<^bvec>\\s\<^isub>k\<^evec>]"}. This + happens transparently, providing the illusion that class parameters + can be instantiated with more than one equation. + + This is a convenient place to show how explicit dictionary construction + manifests in generated code (here, the same example in @{text SML}): +*} + +text %quoteme {*@{code_stmts bexp (SML)}*} + + +subsection {* The preprocessor \label{sec:preproc} *} + +text {* + Before selected function theorems are turned into abstract + code, a chain of definitional transformation steps is carried + out: \emph{preprocessing}. In essence, the preprocessor + consists of two components: a \emph{simpset} and \emph{function transformers}. + + The \emph{simpset} allows to employ the full generality of the Isabelle + simplifier. Due to the interpretation of theorems + as defining equations, rewrites are applied to the right + hand side and the arguments of the left hand side of an + equation, but never to the constant heading the left hand side. + An important special case are \emph{inline theorems} which may be + declared and undeclared using the + \emph{code inline} or \emph{code inline del} attribute respectively. -subsection {* Datatypes *} + Some common applications: +*} + +text_raw {* + \begin{itemize} +*} + +text {* + \item replacing non-executable constructs by executable ones: +*} + +lemma %quoteme [code inline]: + "x \ set xs \ x mem xs" by (induct xs) simp_all + +text {* + \item eliminating superfluous constants: +*} + +lemma %quoteme [code inline]: + "1 = Suc 0" by simp + +text {* + \item replacing executable but inconvenient constructs: +*} + +lemma %quoteme [code inline]: + "xs = [] \ List.null xs" by (induct xs) simp_all + +text_raw {* + \end{itemize} +*} + +text {* + \emph{Function transformers} provide a very general interface, + transforming a list of function theorems to another + list of function theorems, provided that neither the heading + constant nor its type change. The @{term "0\nat"} / @{const Suc} + pattern elimination implemented in + theory @{text Efficient_Nat} (see \secref{eff_nat}) uses this + interface. + + \noindent The current setup of the preprocessor may be inspected using + the @{command print_codesetup} command. + @{command code_thms} provides a convenient + mechanism to inspect the impact of a preprocessor setup + on defining equations. + + \begin{warn} + The attribute \emph{code unfold} + associated with the existing code generator also applies to + the new one: \emph{code unfold} implies \emph{code inline}. + \end{warn} +*} + +subsection {* Datatypes \label{sec:datatypes} *} + +text {* + Conceptually, any datatype is spanned by a set of + \emph{constructors} of type @{text "\ = \ \ \ \\<^isub>1 \ \\<^isub>n"} + where @{text "{\\<^isub>1, \, \\<^isub>n}"} is excactly the set of \emph{all} + type variables in @{text "\"}. The HOL datatype package + by default registers any new datatype in the table + of datatypes, which may be inspected using + the @{command print_codesetup} command. + + In some cases, it may be convenient to alter or + extend this table; as an example, we will develop an alternative + representation of natural numbers as binary digits, whose + size does increase logarithmically with its value, not linear + \footnote{Indeed, the @{theory Efficient_Nat} theory (see \ref{eff_nat}) + does something similar}. First, the digit representation: +*} + +definition %quoteme Dig0 :: "nat \ nat" where + "Dig0 n = 2 * n" + +definition %quoteme Dig1 :: "nat \ nat" where + "Dig1 n = Suc (2 * n)" -text {* example: @{text "rat"}, cases *} +text {* + \noindent We will use these two ">digits"< to represent natural numbers + in binary digits, e.g.: +*} + +lemma %quoteme 42: "42 = Dig0 (Dig1 (Dig0 (Dig1 (Dig0 1))))" + by (simp add: Dig0_def Dig1_def) + +text {* + \noindent Of course we also have to provide proper code equations for + the operations, e.g. @{term "op + \ nat \ nat \ nat"}: +*} + +lemma %quoteme plus_Dig [code func]: + "0 + n = n" + "m + 0 = m" + "1 + Dig0 n = Dig1 n" + "Dig0 m + 1 = Dig1 m" + "1 + Dig1 n = Dig0 (n + 1)" + "Dig1 m + 1 = Dig0 (m + 1)" + "Dig0 m + Dig0 n = Dig0 (m + n)" + "Dig0 m + Dig1 n = Dig1 (m + n)" + "Dig1 m + Dig0 n = Dig1 (m + n)" + "Dig1 m + Dig1 n = Dig0 (m + n + 1)" + by (simp_all add: Dig0_def Dig1_def) + +text {* + \noindent We then instruct the code generator to view @{term "0\nat"}, + @{term "1\nat"}, @{term Dig0} and @{term Dig1} as + datatype constructors: +*} + +code_datatype %quoteme "0\nat" "1\nat" Dig0 Dig1 + +text {* + \noindent For the former constructor @{term Suc}, we provide a code + equation and remove some parts of the default code generator setup + which are an obstacle here: +*} + +lemma %quoteme Suc_Dig [code func]: + "Suc n = n + 1" + by simp + +declare %quoteme One_nat_def [code inline del] +declare %quoteme add_Suc_shift [code func del] + +text {* + \noindent This yields the following code: +*} + +text %quoteme {*@{code_stmts "op + \ nat \ nat \ nat" (SML)}*} + +text {* + \medskip + + From this example, it can be easily glimpsed that using own constructor sets + is a little delicate since it changes the set of valid patterns for values + of that type. Without going into much detail, here some practical hints: + + \begin{itemize} + \item When changing the constructor set for datatypes, take care to + provide an alternative for the @{text case} combinator (e.g.~by replacing + it using the preprocessor). + \item Values in the target language need not to be normalised -- different + values in the target language may represent the same value in the + logic (e.g. @{term "Dig1 0 = 1"}). + \item Usually, a good methodology to deal with the subtleties of pattern + matching is to see the type as an abstract type: provide a set + of operations which operate on the concrete representation of the type, + and derive further operations by combinations of these primitive ones, + without relying on a particular representation. + \end{itemize} +*} + +code_datatype %invisible "0::nat" Suc +declare %invisible plus_Dig [code func del] +declare %invisible One_nat_def [code inline] +declare %invisible add_Suc_shift [code func] +lemma %invisible [code func]: "0 + n = (n \ nat)" by simp + subsection {* Equality and wellsortedness *} +text {* + Surely you have already noticed how equality is treated + by the code generator: +*} + +primrec %quoteme + collect_duplicates :: "'a list \ 'a list \ 'a list \ 'a list" where + "collect_duplicates xs ys [] = xs" + | "collect_duplicates xs ys (z#zs) = (if z \ set xs + then if z \ set ys + then collect_duplicates xs ys zs + else collect_duplicates xs (z#ys) zs + else collect_duplicates (z#xs) (z#ys) zs)" + +text {* + The membership test during preprocessing is rewritten, + resulting in @{const List.member}, which itself + performs an explicit equality check. +*} + +text %quoteme {*@{code_stmts collect_duplicates (SML)}*} + +text {* + \noindent Obviously, polymorphic equality is implemented the Haskell + way using a type class. How is this achieved? HOL introduces + an explicit class @{class eq} with a corresponding operation + @{const eq_class.eq} such that @{thm eq [no_vars]}. + The preprocessing framework does the rest. + For datatypes, instances of @{class eq} are implicitly derived + when possible. For other types, you may instantiate @{text eq} + manually like any other type class. + + Though this @{text eq} class is designed to get rarely in + the way, a subtlety + enters the stage when definitions of overloaded constants + are dependent on operational equality. For example, let + us define a lexicographic ordering on tuples: +*} + +instantiation * :: (ord, ord) ord +begin + +definition + [code func del]: "p1 < p2 \ (let (x1, y1) = p1; (x2, y2) = p2 in + x1 < x2 \ (x1 = x2 \ y1 < y2))" + +definition + [code func del]: "p1 \ p2 \ (let (x1, y1) = p1; (x2, y2) = p2 in + x1 < x2 \ (x1 = x2 \ y1 \ y2))" + +instance .. + +end + +lemma ord_prod [code func(*<*), code func del(*>*)]: + "(x1 \ 'a\ord, y1 \ 'b\ord) < (x2, y2) \ x1 < x2 \ (x1 = x2 \ y1 < y2)" + "(x1 \ 'a\ord, y1 \ 'b\ord) \ (x2, y2) \ x1 < x2 \ (x1 = x2 \ y1 \ y2)" + unfolding less_prod_def less_eq_prod_def by simp_all + +text {* + Then code generation will fail. Why? The definition + of @{term "op \"} depends on equality on both arguments, + which are polymorphic and impose an additional @{class eq} + class constraint, thus violating the type discipline + for class operations. + + The solution is to add @{class eq} explicitly to the first sort arguments in the + code theorems: +*} + +lemma ord_prod_code [code func]: + "(x1 \ 'a\{ord, eq}, y1 \ 'b\ord) < (x2, y2) \ + x1 < x2 \ (x1 = x2 \ y1 < y2)" + "(x1 \ 'a\{ord, eq}, y1 \ 'b\ord) \ (x2, y2) \ + x1 < x2 \ (x1 = x2 \ y1 \ y2)" + unfolding ord_prod by rule+ + +text {* + \noindent Then code generation succeeds: +*} + +text %quoteme {*@{code_stmts "op \ \ 'a\{eq, ord} \ 'b\ord \ 'a \ 'b \ bool" (SML)}*} + +text {* + In general, code theorems for overloaded constants may have more + restrictive sort constraints than the underlying instance relation + between class and type constructor as long as the whole system of + constraints is coregular; code theorems violating coregularity + are rejected immediately. Consequently, it might be necessary + to delete disturbing theorems in the code theorem table, + as we have done here with the original definitions @{fact less_prod_def} + and @{fact less_eq_prod_def}. + + In some cases, the automatically derived defining equations + for equality on a particular type may not be appropriate. + As example, watch the following datatype representing + monomorphic parametric types (where type constructors + are referred to by natural numbers): +*} + +datatype %quoteme monotype = Mono nat "monotype list" +(*<*) +lemma monotype_eq: + "Mono tyco1 typargs1 = Mono tyco2 typargs2 \ + tyco1 = tyco2 \ typargs1 = typargs2" by simp +(*>*) + +text {* + Then code generation for SML would fail with a message + that the generated code contains illegal mutual dependencies: + the theorem @{thm monotype_eq [no_vars]} already requires the + instance @{text "monotype \ eq"}, which itself requires + @{thm monotype_eq [no_vars]}; Haskell has no problem with mutually + recursive @{text instance} and @{text function} definitions, + but the SML serializer does not support this. + + In such cases, you have to provide you own equality equations + involving auxiliary constants. In our case, + @{const [show_types] list_all2} can do the job: +*} + +lemma %quoteme monotype_eq_list_all2 [code func]: + "eq_class.eq (Mono tyco1 typargs1) (Mono tyco2 typargs2) \ + tyco1 = tyco2 \ list_all2 eq_class.eq typargs1 typargs2" + by (simp add: eq list_all2_eq [symmetric]) + +text {* + \noindent does not depend on instance @{text "monotype \ eq"}: +*} + +text %quoteme {*@{code_stmts "eq_class.eq :: monotype \ monotype \ bool" (SML)}*} + + subsection {* Partiality *} -text {* @{text "code_abort"}, examples: maps *} +text {* @{command "code_abort"}, examples: maps *} end diff -r 4ffb62675ade -r f65e8b318581 doc-src/IsarAdvanced/Codegen/Thy/ROOT.ML --- a/doc-src/IsarAdvanced/Codegen/Thy/ROOT.ML Tue Sep 30 04:06:55 2008 +0200 +++ b/doc-src/IsarAdvanced/Codegen/Thy/ROOT.ML Tue Sep 30 11:19:47 2008 +0200 @@ -1,12 +1,11 @@ (* $Id$ *) -use_thy "Codegen"; - -(*no_document use_thy "Setup"; +no_document use_thy "Setup"; +no_document use_thys ["Efficient_Nat", "Code_Integer"]; use_thy "Introduction"; use_thy "Program"; use_thy "Adaption"; use_thy "Further"; -use_thy "ML";*) +use_thy "ML"; diff -r 4ffb62675ade -r f65e8b318581 doc-src/IsarAdvanced/Codegen/Thy/Setup.thy --- a/doc-src/IsarAdvanced/Codegen/Thy/Setup.thy Tue Sep 30 04:06:55 2008 +0200 +++ b/doc-src/IsarAdvanced/Codegen/Thy/Setup.thy Tue Sep 30 11:19:47 2008 +0200 @@ -1,8 +1,61 @@ theory Setup -imports Main +imports Complex_Main uses "../../../antiquote_setup.ML" begin +ML {* no_document use_thys ["Efficient_Nat", "Code_Char_chr"] *} + ML_val {* Code_Target.code_width := 74 *} +ML {* +fun pr_class ctxt = Sign.extern_class (ProofContext.theory_of ctxt) + o Sign.read_class (ProofContext.theory_of ctxt); +*} + +ML {* +val _ = ThyOutput.add_commands + [("class", ThyOutput.args (Scan.lift Args.name) (K pr_class))] +*} + +ML {* +val verbatim_line = space_implode "\\verb,|," o map (enclose "\\verb|" "|") o space_explode "|"; +val verbatim_lines = rev + #> dropwhile (fn s => s = "") + #> rev + #> map verbatim_line #> space_implode "\\newline%\n" + #> prefix "\\isaverbatim%\n\\noindent%\n" +*} + +ML {* +local + + val parse_const_terms = Scan.repeat1 Args.term + >> (fn ts => fn thy => map (Code_Unit.check_const thy) ts); + val parse_consts = Scan.lift (Args.parens (Args.$$$ "consts")) |-- parse_const_terms + >> (fn mk_cs => fn thy => map (Code_Name.const thy) (mk_cs thy)); + val parse_types = Scan.lift (Args.parens (Args.$$$ "types") |-- Scan.repeat1 Args.name) + >> (fn tycos => fn thy => map (Code_Name.tyco thy o Sign.intern_type thy) tycos); + val parse_classes = Scan.lift (Args.parens (Args.$$$ "classes") |-- Scan.repeat1 Args.name) + >> (fn classes => fn thy => map (Code_Name.class thy o Sign.intern_class thy) classes); + val parse_instances = Scan.lift (Args.parens (Args.$$$ "instances") |-- Scan.repeat1 (Args.name --| Args.$$$ "::" -- Args.name)) + >> (fn insts => fn thy => map (Code_Name.instance thy o apsnd (Sign.intern_type thy) o apfst (Sign.intern_class thy) o swap) insts); + val parse_names = parse_consts || parse_types || parse_classes || parse_instances; + + fun code_stmts src ctxt ((mk_cs, mk_stmtss), target) = + Code_Target.code_of (ProofContext.theory_of ctxt) + target "Example" (mk_cs (ProofContext.theory_of ctxt)) + (maps (fn f => f (ProofContext.theory_of ctxt)) mk_stmtss) + |> split_lines + |> verbatim_lines; + +in + +val _ = ThyOutput.add_commands + [("code_stmts", ThyOutput.args + (parse_const_terms -- Scan.repeat parse_names -- Scan.lift (Args.parens Args.name)) + code_stmts)] + end +*} + +end diff -r 4ffb62675ade -r f65e8b318581 doc-src/IsarAdvanced/Codegen/codegen.tex --- a/doc-src/IsarAdvanced/Codegen/codegen.tex Tue Sep 30 04:06:55 2008 +0200 +++ b/doc-src/IsarAdvanced/Codegen/codegen.tex Tue Sep 30 11:19:47 2008 +0200 @@ -10,47 +10,15 @@ \usepackage{style} \usepackage{../../pdfsetup} -\newcommand{\cmd}[1]{\isacommand{#1}} +\makeatletter -\newcommand{\isasymINFIX}{\cmd{infix}} -\newcommand{\isasymLOCALE}{\cmd{locale}} -\newcommand{\isasymINCLUDES}{\cmd{includes}} -\newcommand{\isasymDATATYPE}{\cmd{datatype}} -\newcommand{\isasymAXCLASS}{\cmd{axclass}} -\newcommand{\isasymFIXES}{\cmd{fixes}} -\newcommand{\isasymASSUMES}{\cmd{assumes}} -\newcommand{\isasymDEFINES}{\cmd{defines}} -\newcommand{\isasymNOTES}{\cmd{notes}} -\newcommand{\isasymSHOWS}{\cmd{shows}} -\newcommand{\isasymCLASS}{\cmd{class}} -\newcommand{\isasymINSTANCE}{\cmd{instance}} -\newcommand{\isasymLEMMA}{\cmd{lemma}} -\newcommand{\isasymPROOF}{\cmd{proof}} -\newcommand{\isasymQED}{\cmd{qed}} -\newcommand{\isasymFIX}{\cmd{fix}} -\newcommand{\isasymASSUME}{\cmd{assume}} -\newcommand{\isasymSHOW}{\cmd{show}} -\newcommand{\isasymNOTE}{\cmd{note}} -\newcommand{\isasymEXPORTCODE}{\cmd{export\_code}} -\newcommand{\isasymCODEDATATYPE}{\cmd{code\_datatype}} -\newcommand{\isasymCODECONST}{\cmd{code\_const}} -\newcommand{\isasymCODETYPE}{\cmd{code\_type}} -\newcommand{\isasymCODECLASS}{\cmd{code\_class}} -\newcommand{\isasymCODEINSTANCE}{\cmd{code\_instance}} -\newcommand{\isasymCODERESERVED}{\cmd{code\_reserved}} -\newcommand{\isasymCODEMODULENAME}{\cmd{code\_modulename}} -\newcommand{\isasymCODEMODULEPROLOG}{\cmd{code\_moduleprolog}} -\newcommand{\isasymCODEAXIOMS}{\cmd{code\_axioms}} -\newcommand{\isasymCODEABSTYPE}{\cmd{code\_abstype}} -\newcommand{\isasymPRINTCODESETUP}{\cmd{print\_codesetup}} -\newcommand{\isasymPRINTCLASSES}{\cmd{print\_classes}} -\newcommand{\isasymCODETHMS}{\cmd{code\_thms}} -\newcommand{\isasymCODEDEPS}{\cmd{code\_deps}} -\newcommand{\isasymFUN}{\cmd{fun}} -\newcommand{\isasymFUNCTION}{\cmd{function}} -\newcommand{\isasymPRIMREC}{\cmd{primrec}} -\newcommand{\isasymCONSTDEFS}{\cmd{constdefs}} -\newcommand{\isasymRECDEF}{\cmd{recdef}} +\newcommand{\isaverbatim}{\renewcommand{\baselinestretch}{1}\setlength{\baselineskip}{8pt}\fontsize{8pt}{0pt}} +\isakeeptag{quoteme} +\newenvironment{quoteme}{\begin{quote}\isa@parindent\parindent\parindent0pt\isa@parskip\parskip\parskip0pt}{\end{quote}} +\renewcommand{\isatagquoteme}{\begin{quoteme}} +\renewcommand{\endisatagquoteme}{\end{quoteme}} + +\makeatother \isakeeptag{tt} \renewcommand{\isatagtt}{\isabellestyle{tt}\isastyle} @@ -62,10 +30,6 @@ \newcommand{\strong}[1]{{\bfseries #1}} \newcommand{\fixme}[1][!]{\strong{FIXME: #1}} -\lstset{basicstyle=\scriptsize\ttfamily,keywordstyle=\itshape,commentstyle=\itshape\sffamily,frame=single} -\newcommand{\lstsml}[1]{\lstset{language=ML}\lstinputlisting{#1}} -\newcommand{\lsthaskell}[1]{\lstset{language=Haskell}\lstinputlisting{#1}} - \hyphenation{Isabelle} \hyphenation{Isar} @@ -90,12 +54,11 @@ \pagenumbering{roman} \clearfirst -\input{Thy/document/Codegen.tex} -% \input{Thy/document/Introduction.tex} -% \input{Thy/document/Program.tex} -% \input{Thy/document/Adaption.tex} -% \input{Thy/document/Further.tex} -% \input{Thy/document/ML.tex} +\input{Thy/document/Introduction.tex} +\input{Thy/document/Program.tex} +\input{Thy/document/Adaption.tex} +\input{Thy/document/Further.tex} +\input{Thy/document/ML.tex} \begingroup %\tocentry{\bibname}