re-canibalised manual
authorhaftmann
Tue Sep 30 11:19:47 2008 +0200 (2008-09-30)
changeset 28419f65e8b318581
parent 28418 4ffb62675ade
child 28420 293b166c45c5
re-canibalised manual
doc-src/IsarAdvanced/Codegen/Thy/Adaption.thy
doc-src/IsarAdvanced/Codegen/Thy/Codegen.thy
doc-src/IsarAdvanced/Codegen/Thy/Further.thy
doc-src/IsarAdvanced/Codegen/Thy/Introduction.thy
doc-src/IsarAdvanced/Codegen/Thy/ML.thy
doc-src/IsarAdvanced/Codegen/Thy/Program.thy
doc-src/IsarAdvanced/Codegen/Thy/ROOT.ML
doc-src/IsarAdvanced/Codegen/Thy/Setup.thy
doc-src/IsarAdvanced/Codegen/codegen.tex
     1.1 --- a/doc-src/IsarAdvanced/Codegen/Thy/Adaption.thy	Tue Sep 30 04:06:55 2008 +0200
     1.2 +++ b/doc-src/IsarAdvanced/Codegen/Thy/Adaption.thy	Tue Sep 30 11:19:47 2008 +0200
     1.3 @@ -2,8 +2,213 @@
     1.4  imports Setup
     1.5  begin
     1.6  
     1.7 -section {* Adaption to target languages *}
     1.8 +section {* Adaption to target languages \label{sec:adaption} *}
     1.9 +
    1.10 +subsection {* Common adaption cases *}
    1.11 +
    1.12 +text {*
    1.13 +  The @{text HOL} @{text Main} theory already provides a code
    1.14 +  generator setup
    1.15 +  which should be suitable for most applications. Common extensions
    1.16 +  and modifications are available by certain theories of the @{text HOL}
    1.17 +  library; beside being useful in applications, they may serve
    1.18 +  as a tutorial for customising the code generator setup (see below
    1.19 +  \secref{sec:adaption_mechanisms}).
    1.20 +
    1.21 +  \begin{description}
    1.22 +
    1.23 +    \item[@{theory "Code_Integer"}] represents @{text HOL} integers by big
    1.24 +       integer literals in target languages.
    1.25 +    \item[@{theory "Code_Char"}] represents @{text HOL} characters by 
    1.26 +       character literals in target languages.
    1.27 +    \item[@{theory "Code_Char_chr"}] like @{text "Code_Char"},
    1.28 +       but also offers treatment of character codes; includes
    1.29 +       @{theory "Code_Char_chr"}.
    1.30 +    \item[@{theory "Efficient_Nat"}] \label{eff_nat} implements natural numbers by integers,
    1.31 +       which in general will result in higher efficiency; pattern
    1.32 +       matching with @{term "0\<Colon>nat"} / @{const "Suc"}
    1.33 +       is eliminated;  includes @{theory "Code_Integer"}.
    1.34 +    \item[@{theory "Code_Index"}] provides an additional datatype
    1.35 +       @{typ index} which is mapped to target-language built-in integers.
    1.36 +       Useful for code setups which involve e.g. indexing of
    1.37 +       target-language arrays.
    1.38 +    \item[@{theory "Code_Message"}] provides an additional datatype
    1.39 +       @{typ message_string} which is isomorphic to strings;
    1.40 +       @{typ message_string}s are mapped to target-language strings.
    1.41 +       Useful for code setups which involve e.g. printing (error) messages.
    1.42 +
    1.43 +  \end{description}
    1.44 +
    1.45 +  \begin{warn}
    1.46 +    When importing any of these theories, they should form the last
    1.47 +    items in an import list.  Since these theories adapt the
    1.48 +    code generator setup in a non-conservative fashion,
    1.49 +    strange effects may occur otherwise.
    1.50 +  \end{warn}
    1.51 +*}
    1.52 +
    1.53 +
    1.54 +subsection {* Adaption mechanisms \label{sec:adaption_mechanisms} *}
    1.55 +
    1.56 +text {*
    1.57 +  \begin{warn}
    1.58 +    The mechanisms shown here are especially for the curious;  the user
    1.59 +    rarely needs to do anything on his own beyond the defaults in @{text HOL}.
    1.60 +    Adaption is a delicated task which requires a lot of dilligence since
    1.61 +    it happend \emph{completely} outside the logic.
    1.62 +  \end{warn}
    1.63 +*}
    1.64 +
    1.65 +text {*
    1.66 +  Consider the following function and its corresponding
    1.67 +  SML code:
    1.68 +*}
    1.69 +
    1.70 +primrec %quoteme in_interval :: "nat \<times> nat \<Rightarrow> nat \<Rightarrow> bool" where
    1.71 +  "in_interval (k, l) n \<longleftrightarrow> k \<le> n \<and> n \<le> l"
    1.72 +
    1.73 +code_type %invisible bool
    1.74 +  (SML)
    1.75 +code_const %invisible True and False and "op \<and>" and Not
    1.76 +  (SML and and and)
    1.77 +
    1.78 +text %quoteme {*@{code_stmts in_interval (SML)}*}
    1.79 +
    1.80 +text {*
    1.81 +  \noindent Though this is correct code, it is a little bit unsatisfactory:
    1.82 +  boolean values and operators are materialised as distinguished
    1.83 +  entities with have nothing to do with the SML-built-in notion
    1.84 +  of \qt{bool}.  This results in less readable code;
    1.85 +  additionally, eager evaluation may cause programs to
    1.86 +  loop or break which would perfectly terminate when
    1.87 +  the existing SML @{verbatim "bool"} would be used.  To map
    1.88 +  the HOL @{typ bool} on SML @{verbatim "bool"}, we may use
    1.89 +  \qn{custom serialisations}:
    1.90 +*}
    1.91 +
    1.92 +code_type %tt bool
    1.93 +  (SML "bool")
    1.94 +code_const %tt True and False and "op \<and>"
    1.95 +  (SML "true" and "false" and "_ andalso _")
    1.96  
    1.97 -subsection {* \ldots *}
    1.98 +text {*
    1.99 +  The @{command code_type} command takes a type constructor
   1.100 +  as arguments together with a list of custom serialisations.
   1.101 +  Each custom serialisation starts with a target language
   1.102 +  identifier followed by an expression, which during
   1.103 +  code serialisation is inserted whenever the type constructor
   1.104 +  would occur.  For constants, @{command code_const} implements
   1.105 +  the corresponding mechanism.  Each ``@{verbatim "_"}'' in
   1.106 +  a serialisation expression is treated as a placeholder
   1.107 +  for the type constructor's (the constant's) arguments.
   1.108 +*}
   1.109 +
   1.110 +text %quoteme {*@{code_stmts in_interval (SML)}*}
   1.111 +
   1.112 +text {*
   1.113 +  \lstsml{Thy/examples/bool_mlbool.ML}
   1.114 +
   1.115 +  \noindent This still is not perfect: the parentheses
   1.116 +  around the \qt{andalso} expression are superfluous.
   1.117 +  Though the serializer
   1.118 +  by no means attempts to imitate the rich Isabelle syntax
   1.119 +  framework, it provides some common idioms, notably
   1.120 +  associative infixes with precedences which may be used here:
   1.121 +*}
   1.122 +
   1.123 +code_const %tt "op \<and>"
   1.124 +  (SML infixl 1 "andalso")
   1.125 +
   1.126 +text %quoteme {*@{code_stmts in_interval (SML)}*}
   1.127 +
   1.128 +text {*
   1.129 +  Next, we try to map HOL pairs to SML pairs, using the
   1.130 +  infix ``@{verbatim "*"}'' type constructor and parentheses:
   1.131 +*}
   1.132 +
   1.133 +code_type %invisible *
   1.134 +  (SML)
   1.135 +code_const %invisible Pair
   1.136 +  (SML)
   1.137 +
   1.138 +code_type %tt *
   1.139 +  (SML infix 2 "*")
   1.140 +code_const %tt Pair
   1.141 +  (SML "!((_),/ (_))")
   1.142 +
   1.143 +text {*
   1.144 +  The initial bang ``@{verbatim "!"}'' tells the serializer to never put
   1.145 +  parentheses around the whole expression (they are already present),
   1.146 +  while the parentheses around argument place holders
   1.147 +  tell not to put parentheses around the arguments.
   1.148 +  The slash ``@{verbatim "/"}'' (followed by arbitrary white space)
   1.149 +  inserts a space which may be used as a break if necessary
   1.150 +  during pretty printing.
   1.151 +
   1.152 +  These examples give a glimpse what mechanisms
   1.153 +  custom serialisations provide; however their usage
   1.154 +  requires careful thinking in order not to introduce
   1.155 +  inconsistencies -- or, in other words:
   1.156 +  custom serialisations are completely axiomatic.
   1.157 +
   1.158 +  A further noteworthy details is that any special
   1.159 +  character in a custom serialisation may be quoted
   1.160 +  using ``@{verbatim "'"}''; thus, in
   1.161 +  ``@{verbatim "fn '_ => _"}'' the first
   1.162 +  ``@{verbatim "_"}'' is a proper underscore while the
   1.163 +  second ``@{verbatim "_"}'' is a placeholder.
   1.164 +
   1.165 +  The HOL theories provide further
   1.166 +  examples for custom serialisations.
   1.167 +*}
   1.168 +
   1.169 +
   1.170 +subsection {* @{text Haskell} serialisation *}
   1.171 +
   1.172 +text {*
   1.173 +  For convenience, the default
   1.174 +  @{text HOL} setup for @{text Haskell} maps the @{class eq} class to
   1.175 +  its counterpart in @{text Haskell}, giving custom serialisations
   1.176 +  for the class @{class eq} (by command @{command code_class}) and its operation
   1.177 +  @{const HOL.eq}
   1.178 +*}
   1.179 +
   1.180 +code_class %tt eq
   1.181 +  (Haskell "Eq" where "HOL.eq" \<equiv> "(==)")
   1.182 +
   1.183 +code_const %tt "op ="
   1.184 +  (Haskell infixl 4 "==")
   1.185 +
   1.186 +text {*
   1.187 +  A problem now occurs whenever a type which
   1.188 +  is an instance of @{class eq} in @{text HOL} is mapped
   1.189 +  on a @{text Haskell}-built-in type which is also an instance
   1.190 +  of @{text Haskell} @{text Eq}:
   1.191 +*}
   1.192 +
   1.193 +typedecl %quoteme bar
   1.194 +
   1.195 +instantiation %quoteme bar :: eq
   1.196 +begin
   1.197 +
   1.198 +definition %quoteme "eq_class.eq (x\<Colon>bar) y \<longleftrightarrow> x = y"
   1.199 +
   1.200 +instance %quoteme by default (simp add: eq_bar_def)
   1.201  
   1.202  end
   1.203 +
   1.204 +code_type %tt bar
   1.205 +  (Haskell "Integer")
   1.206 +
   1.207 +text {*
   1.208 +  The code generator would produce
   1.209 +  an additional instance, which of course is rejectedby the @{text Haskell}
   1.210 +  compiler.
   1.211 +  To suppress this additional instance, use
   1.212 +  @{text "code_instance"}:
   1.213 +*}
   1.214 +
   1.215 +code_instance %tt bar :: eq
   1.216 +  (Haskell -)
   1.217 +
   1.218 +end
     2.1 --- a/doc-src/IsarAdvanced/Codegen/Thy/Codegen.thy	Tue Sep 30 04:06:55 2008 +0200
     2.2 +++ b/doc-src/IsarAdvanced/Codegen/Thy/Codegen.thy	Tue Sep 30 11:19:47 2008 +0200
     2.3 @@ -1,1352 +1,2 @@
     2.4 -
     2.5 -(* $Id$ *)
     2.6 -
     2.7 -(*<*)
     2.8 -theory Codegen
     2.9 -imports Main
    2.10 -uses "../../../antiquote_setup.ML"
    2.11 -begin
    2.12 -
    2.13 -ML {* Code_Target.code_width := 74 *}
    2.14 -(*>*)
    2.15 -
    2.16 -chapter {* Code generation from Isabelle theories *}
    2.17 -
    2.18 -section {* Introduction *}
    2.19 -
    2.20 -subsection {* Motivation *}
    2.21 -
    2.22 -text {*
    2.23 -  Executing formal specifications as programs is a well-established
    2.24 -  topic in the theorem proving community.  With increasing
    2.25 -  application of theorem proving systems in the area of
    2.26 -  software development and verification, its relevance manifests
    2.27 -  for running test cases and rapid prototyping.  In logical
    2.28 -  calculi like constructive type theory,
    2.29 -  a notion of executability is implicit due to the nature
    2.30 -  of the calculus.  In contrast, specifications in Isabelle
    2.31 -  can be highly non-executable.  In order to bridge
    2.32 -  the gap between logic and executable specifications,
    2.33 -  an explicit non-trivial transformation has to be applied:
    2.34 -  code generation.
    2.35 -
    2.36 -  This tutorial introduces a generic code generator for the
    2.37 -  Isabelle system \cite{isa-tutorial}.
    2.38 -  Generic in the sense that the
    2.39 -  \qn{target language} for which code shall ultimately be
    2.40 -  generated is not fixed but may be an arbitrary state-of-the-art
    2.41 -  functional programming language (currently, the implementation
    2.42 -  supports SML \cite{SML}, OCaml \cite{OCaml} and Haskell
    2.43 -  \cite{haskell-revised-report}).
    2.44 -  We aim to provide a
    2.45 -  versatile environment
    2.46 -  suitable for software development and verification,
    2.47 -  structuring the process
    2.48 -  of code generation into a small set of orthogonal principles
    2.49 -  while achieving a big coverage of application areas
    2.50 -  with maximum flexibility.
    2.51 -
    2.52 -  Conceptually the code generator framework is part
    2.53 -  of Isabelle's @{text Pure} meta logic; the object logic
    2.54 -  @{text HOL} which is an extension of @{text Pure}
    2.55 -  already comes with a reasonable framework setup and thus provides
    2.56 -  a good working horse for raising code-generation-driven
    2.57 -  applications.  So, we assume some familiarity and experience
    2.58 -  with the ingredients of the @{text HOL} \emph{Main} theory
    2.59 -  (see also \cite{isa-tutorial}).
    2.60 -*}
    2.61 -
    2.62 -
    2.63 -subsection {* Overview *}
    2.64 -
    2.65 -text {*
    2.66 -  The code generator aims to be usable with no further ado
    2.67 -  in most cases while allowing for detailed customization.
    2.68 -  This manifests in the structure of this tutorial:
    2.69 -  we start with a generic example \secref{sec:example}
    2.70 -  and introduce code generation concepts \secref{sec:concept}.
    2.71 -  Section
    2.72 -  \secref{sec:basics} explains how to use the framework naively,
    2.73 -  presuming a reasonable default setup.  Then, section
    2.74 -  \secref{sec:advanced} deals with advanced topics,
    2.75 -  introducing further aspects of the code generator framework
    2.76 -  in a motivation-driven manner.  Last, section \secref{sec:ml}
    2.77 -  introduces the framework's internal programming interfaces.
    2.78 -
    2.79 -  \begin{warn}
    2.80 -    Ultimately, the code generator which this tutorial deals with
    2.81 -    is supposed to replace the already established code generator
    2.82 -    by Stefan Berghofer \cite{Berghofer-Nipkow:2002}.
    2.83 -    So, for the moment, there are two distinct code generators
    2.84 -    in Isabelle.
    2.85 -    Also note that while the framework itself is
    2.86 -    object-logic independent, only @{text HOL} provides a reasonable
    2.87 -    framework setup.    
    2.88 -  \end{warn}
    2.89 -*}
    2.90 -
    2.91 -
    2.92 -section {* An example: a simple theory of search trees \label{sec:example} *}
    2.93 -
    2.94 -text {*
    2.95 -  When writing executable specifications using @{text HOL},
    2.96 -  it is convenient to use
    2.97 -  three existing packages: the datatype package for defining
    2.98 -  datatypes, the function package for (recursive) functions,
    2.99 -  and the class package for overloaded definitions.
   2.100 -
   2.101 -  We develope a small theory of search trees; trees are represented
   2.102 -  as a datatype with key type @{typ "'a"} and value type @{typ "'b"}:
   2.103 -*}
   2.104 -
   2.105 -datatype ('a, 'b) searchtree = Leaf "'a\<Colon>linorder" 'b
   2.106 -  | Branch "('a, 'b) searchtree" "'a" "('a, 'b) searchtree"
   2.107 -
   2.108 -text {*
   2.109 -  \noindent Note that we have constrained the type of keys
   2.110 -  to the class of total orders, @{text linorder}.
   2.111 -
   2.112 -  We define @{text find} and @{text update} functions:
   2.113 -*}
   2.114 -
   2.115 -primrec
   2.116 -  find :: "('a\<Colon>linorder, 'b) searchtree \<Rightarrow> 'a \<Rightarrow> 'b option" where
   2.117 -  "find (Leaf key val) it = (if it = key then Some val else None)"
   2.118 -  | "find (Branch t1 key t2) it = (if it \<le> key then find t1 it else find t2 it)"
   2.119 -
   2.120 -fun
   2.121 -  update :: "'a\<Colon>linorder \<times> 'b \<Rightarrow> ('a, 'b) searchtree \<Rightarrow> ('a, 'b) searchtree" where
   2.122 -  "update (it, entry) (Leaf key val) = (
   2.123 -    if it = key then Leaf key entry
   2.124 -      else if it \<le> key
   2.125 -      then Branch (Leaf it entry) it (Leaf key val)
   2.126 -      else Branch (Leaf key val) it (Leaf it entry)
   2.127 -   )"
   2.128 -  | "update (it, entry) (Branch t1 key t2) = (
   2.129 -    if it \<le> key
   2.130 -      then (Branch (update (it, entry) t1) key t2)
   2.131 -      else (Branch t1 key (update (it, entry) t2))
   2.132 -   )"
   2.133 -
   2.134 -text {*
   2.135 -  \noindent For testing purpose, we define a small example
   2.136 -  using natural numbers @{typ nat} (which are a @{text linorder})
   2.137 -  as keys and list of nats as values:
   2.138 -*}
   2.139 -
   2.140 -definition
   2.141 -  example :: "(nat, nat list) searchtree"
   2.142 -where
   2.143 -  "example = update (Suc (Suc (Suc (Suc 0))), [Suc (Suc 0), Suc (Suc 0)]) (update (Suc (Suc (Suc 0)), [Suc (Suc (Suc 0))])
   2.144 -    (update (Suc (Suc 0), [Suc (Suc 0)]) (Leaf (Suc 0) [])))"
   2.145 -
   2.146 -text {*
   2.147 -  \noindent Then we generate code
   2.148 -*}
   2.149 -
   2.150 -export_code example (*<*)in SML (*>*)in SML file "examples/tree.ML"
   2.151 -
   2.152 -text {*
   2.153 -  \noindent which looks like:
   2.154 -  \lstsml{Thy/examples/tree.ML}
   2.155 -*}
   2.156 -
   2.157 -
   2.158 -section {* Code generation concepts and process \label{sec:concept} *}
   2.159 -
   2.160 -text {*
   2.161 -  \begin{figure}[h]
   2.162 -  \centering
   2.163 -  \includegraphics[width=0.7\textwidth]{codegen_process}
   2.164 -  \caption{code generator -- processing overview}
   2.165 -  \label{fig:process}
   2.166 -  \end{figure}
   2.167 -
   2.168 -  The code generator employs a notion of executability
   2.169 -  for three foundational executable ingredients known
   2.170 -  from functional programming:
   2.171 -  \emph{defining equations}, \emph{datatypes}, and
   2.172 -  \emph{type classes}. A defining equation as a first approximation
   2.173 -  is a theorem of the form @{text "f t\<^isub>1 t\<^isub>2 \<dots> t\<^isub>n \<equiv> t"}
   2.174 -  (an equation headed by a constant @{text f} with arguments
   2.175 -  @{text "t\<^isub>1 t\<^isub>2 \<dots> t\<^isub>n"} and right hand side @{text t}).
   2.176 -  Code generation aims to turn defining equations
   2.177 -  into a functional program by running through
   2.178 -  a process (see figure \ref{fig:process}):
   2.179 -
   2.180 -  \begin{itemize}
   2.181 -
   2.182 -    \item Out of the vast collection of theorems proven in a
   2.183 -      \qn{theory}, a reasonable subset modeling
   2.184 -      defining equations is \qn{selected}.
   2.185 -
   2.186 -    \item On those selected theorems, certain
   2.187 -      transformations are carried out
   2.188 -      (\qn{preprocessing}).  Their purpose is to turn theorems
   2.189 -      representing non- or badly executable
   2.190 -      specifications into equivalent but executable counterparts.
   2.191 -      The result is a structured collection of \qn{code theorems}.
   2.192 -
   2.193 -    \item These \qn{code theorems} then are \qn{translated}
   2.194 -      into an Haskell-like intermediate
   2.195 -      language.
   2.196 -
   2.197 -    \item Finally, out of the intermediate language the final
   2.198 -      code in the desired \qn{target language} is \qn{serialized}.
   2.199 -
   2.200 -  \end{itemize}
   2.201 -
   2.202 -  From these steps, only the two last are carried out
   2.203 -  outside the logic; by keeping this layer as
   2.204 -  thin as possible, the amount of code to trust is
   2.205 -  kept to a minimum.
   2.206 -*}
   2.207 -
   2.208 -
   2.209 -
   2.210 -section {* Basics \label{sec:basics} *}
   2.211 -
   2.212 -subsection {* Invoking the code generator *}
   2.213 -
   2.214 -text {*
   2.215 -  Thanks to a reasonable setup of the @{text HOL} theories, in
   2.216 -  most cases code generation proceeds without further ado:
   2.217 -*}
   2.218 -
   2.219 -primrec
   2.220 -  fac :: "nat \<Rightarrow> nat" where
   2.221 -    "fac 0 = 1"
   2.222 -  | "fac (Suc n) = Suc n * fac n"
   2.223 -
   2.224 -text {*
   2.225 -  \noindent This executable specification is now turned to SML code:
   2.226 -*}
   2.227 -
   2.228 -export_code fac (*<*)in SML(*>*)in SML file "examples/fac.ML"
   2.229 -
   2.230 -text {*
   2.231 -  \noindent  The @{text "\<EXPORTCODE>"} command takes a space-separated list of
   2.232 -  constants together with \qn{serialization directives}
   2.233 -  These start with a \qn{target language}
   2.234 -  identifier, followed by a file specification
   2.235 -  where to write the generated code to.
   2.236 -
   2.237 -  Internally, the defining equations for all selected
   2.238 -  constants are taken, including any transitively required
   2.239 -  constants, datatypes and classes, resulting in the following
   2.240 -  code:
   2.241 -
   2.242 -  \lstsml{Thy/examples/fac.ML}
   2.243 -
   2.244 -  The code generator will complain when a required
   2.245 -  ingredient does not provide a executable counterpart,
   2.246 -  e.g.~generating code
   2.247 -  for constants not yielding
   2.248 -  a defining equation (e.g.~the Hilbert choice
   2.249 -  operation @{text "SOME"}):
   2.250 -*}
   2.251 -(*<*)
   2.252 -setup {* Sign.add_path "foo" *}
   2.253 -(*>*)
   2.254 -definition
   2.255 -  pick_some :: "'a list \<Rightarrow> 'a" where
   2.256 -  "pick_some xs = (SOME x. x \<in> set xs)"
   2.257 -(*<*)
   2.258 -hide const pick_some
   2.259 -
   2.260 -setup {* Sign.parent_path *}
   2.261 -
   2.262 -definition
   2.263 -  pick_some :: "'a list \<Rightarrow> 'a" where
   2.264 -  "pick_some = hd"
   2.265 -(*>*)
   2.266 -
   2.267 -export_code pick_some in SML file "examples/fail_const.ML"
   2.268 -
   2.269 -text {* \noindent will fail. *}
   2.270 -
   2.271 -subsection {* Theorem selection *}
   2.272 -
   2.273 -text {*
   2.274 -  The list of all defining equations in a theory may be inspected
   2.275 -  using the @{text "\<PRINTCODESETUP>"} command:
   2.276 -*}
   2.277 -
   2.278 -print_codesetup
   2.279 -
   2.280 -text {*
   2.281 -  \noindent which displays a table of constant with corresponding
   2.282 -  defining equations (the additional stuff displayed
   2.283 -  shall not bother us for the moment).
   2.284 -
   2.285 -  The typical @{text HOL} tools are already set up in a way that
   2.286 -  function definitions introduced by @{text "\<DEFINITION>"},
   2.287 -  @{text "\<PRIMREC>"}, @{text "\<FUN>"},
   2.288 -  @{text "\<FUNCTION>"}, @{text "\<CONSTDEFS>"},
   2.289 -  @{text "\<RECDEF>"} are implicitly propagated
   2.290 -  to this defining equation table. Specific theorems may be
   2.291 -  selected using an attribute: \emph{code func}. As example,
   2.292 -  a weight selector function:
   2.293 -*}
   2.294 -
   2.295 -primrec
   2.296 -  pick :: "(nat \<times> 'a) list \<Rightarrow> nat \<Rightarrow> 'a" where
   2.297 -  "pick (x#xs) n = (let (k, v) = x in
   2.298 -    if n < k then v else pick xs (n - k))"
   2.299 -
   2.300 -text {*
   2.301 -  \noindent We want to eliminate the explicit destruction
   2.302 -  of @{term x} to @{term "(k, v)"}:
   2.303 -*}
   2.304 -
   2.305 -lemma [code func]:
   2.306 -  "pick ((k, v)#xs) n = (if n < k then v else pick xs (n - k))"
   2.307 -  by simp
   2.308 -
   2.309 -export_code pick (*<*)in SML(*>*) in SML file "examples/pick1.ML"
   2.310 -
   2.311 -text {*
   2.312 -  \noindent This theorem now is used for generating code:
   2.313 -
   2.314 -  \lstsml{Thy/examples/pick1.ML}
   2.315 -
   2.316 -  \noindent The policy is that \emph{default equations} stemming from
   2.317 -  @{text "\<DEFINITION>"},
   2.318 -  @{text "\<PRIMREC>"}, @{text "\<FUN>"},
   2.319 -  @{text "\<FUNCTION>"}, @{text "\<CONSTDEFS>"},
   2.320 -  @{text "\<RECDEF>"} statements are discarded as soon as an
   2.321 -  equation is explicitly selected by means of \emph{code func}.
   2.322 -  Further applications of \emph{code func} add theorems incrementally,
   2.323 -  but syntactic redundancies are implicitly dropped.  For example,
   2.324 -  using a modified version of the @{const fac} function
   2.325 -  as defining equation, the then redundant (since
   2.326 -  syntactically subsumed) original defining equations
   2.327 -  are dropped.
   2.328 -
   2.329 -  \begin{warn}
   2.330 -    The attributes \emph{code} and \emph{code del}
   2.331 -    associated with the existing code generator also apply to
   2.332 -    the new one: \emph{code} implies \emph{code func},
   2.333 -    and \emph{code del} implies \emph{code func del}.
   2.334 -  \end{warn}
   2.335 -*}
   2.336 -
   2.337 -subsection {* Type classes *}
   2.338 -
   2.339 -text {*
   2.340 -  Type classes enter the game via the Isar class package.
   2.341 -  For a short introduction how to use it, see \cite{isabelle-classes};
   2.342 -  here we just illustrate its impact on code generation.
   2.343 -
   2.344 -  In a target language, type classes may be represented
   2.345 -  natively (as in the case of Haskell).  For languages
   2.346 -  like SML, they are implemented using \emph{dictionaries}.
   2.347 -  Our following example specifies a class \qt{null},
   2.348 -  assigning to each of its inhabitants a \qt{null} value:
   2.349 -*}
   2.350 -
   2.351 -class null = type +
   2.352 -  fixes null :: 'a
   2.353 -
   2.354 -primrec
   2.355 -  head :: "'a\<Colon>null list \<Rightarrow> 'a" where
   2.356 -  "head [] = null"
   2.357 -  | "head (x#xs) = x"
   2.358 -
   2.359 -text {*
   2.360 - \noindent  We provide some instances for our @{text null}:
   2.361 -*}
   2.362 -
   2.363 -instantiation option and list :: (type) null
   2.364 -begin
   2.365 -
   2.366 -definition
   2.367 -  "null = None"
   2.368 -
   2.369 -definition
   2.370 -  "null = []"
   2.371 -
   2.372 -instance ..
   2.373 -
   2.374 -end
   2.375 -
   2.376 -text {*
   2.377 -  \noindent Constructing a dummy example:
   2.378 -*}
   2.379 -
   2.380 -definition
   2.381 -  "dummy = head [Some (Suc 0), None]"
   2.382 -
   2.383 -text {*
   2.384 -  Type classes offer a suitable occasion to introduce
   2.385 -  the Haskell serializer.  Its usage is almost the same
   2.386 -  as SML, but, in accordance with conventions
   2.387 -  some Haskell systems enforce, each module ends
   2.388 -  up in a single file. The module hierarchy is reflected in
   2.389 -  the file system, with root directory given as file specification.
   2.390 -*}
   2.391 -
   2.392 -export_code dummy in Haskell file "examples/"
   2.393 -  (* NOTE: you may use Haskell only once in this document, otherwise
   2.394 -  you have to work in distinct subdirectories *)
   2.395 -
   2.396 -text {*
   2.397 -  \lsthaskell{Thy/examples/Codegen.hs}
   2.398 -  \noindent (we have left out all other modules).
   2.399 -
   2.400 -  \medskip
   2.401 -
   2.402 -  The whole code in SML with explicit dictionary passing:
   2.403 -*}
   2.404 -
   2.405 -export_code dummy (*<*)in SML(*>*)in SML file "examples/class.ML"
   2.406 -
   2.407 -text {*
   2.408 -  \lstsml{Thy/examples/class.ML}
   2.409 -
   2.410 -  \medskip
   2.411 -
   2.412 -  \noindent or in OCaml:
   2.413 -*}
   2.414 -
   2.415 -export_code dummy in OCaml file "examples/class.ocaml"
   2.416 -
   2.417 -text {*
   2.418 -  \lstsml{Thy/examples/class.ocaml}
   2.419 -
   2.420 -  \medskip The explicit association of constants
   2.421 -  to classes can be inspected using the @{text "\<PRINTCLASSES>"}
   2.422 -  command.
   2.423 -*}
   2.424 -
   2.425 -
   2.426 -section {* Recipes and advanced topics \label{sec:advanced} *}
   2.427 -
   2.428 -text {*
   2.429 -  In this tutorial, we do not attempt to give an exhaustive
   2.430 -  description of the code generator framework; instead,
   2.431 -  we cast a light on advanced topics by introducing
   2.432 -  them together with practically motivated examples.  Concerning
   2.433 -  further reading, see
   2.434 -
   2.435 -  \begin{itemize}
   2.436 -
   2.437 -  \item the Isabelle/Isar Reference Manual \cite{isabelle-isar-ref}
   2.438 -    for exhaustive syntax diagrams.
   2.439 -  \item or \cite{Haftmann-Nipkow:2007:codegen} which deals with foundational issues
   2.440 -    of the code generator framework.
   2.441 -
   2.442 -  \end{itemize}
   2.443 -*}
   2.444 -
   2.445 -subsection {* Library theories \label{sec:library} *}
   2.446 -
   2.447 -text {*
   2.448 -  The @{text HOL} @{text Main} theory already provides a code
   2.449 -  generator setup
   2.450 -  which should be suitable for most applications. Common extensions
   2.451 -  and modifications are available by certain theories of the @{text HOL}
   2.452 -  library; beside being useful in applications, they may serve
   2.453 -  as a tutorial for customizing the code generator setup.
   2.454 -
   2.455 -  \begin{description}
   2.456 -
   2.457 -    \item[@{text "Code_Integer"}] represents @{text HOL} integers by big
   2.458 -       integer literals in target languages.
   2.459 -    \item[@{text "Code_Char"}] represents @{text HOL} characters by 
   2.460 -       character literals in target languages.
   2.461 -    \item[@{text "Code_Char_chr"}] like @{text "Code_Char"},
   2.462 -       but also offers treatment of character codes; includes
   2.463 -       @{text "Code_Integer"}.
   2.464 -    \item[@{text "Efficient_Nat"}] \label{eff_nat} implements natural numbers by integers,
   2.465 -       which in general will result in higher efficency; pattern
   2.466 -       matching with @{term "0\<Colon>nat"} / @{const "Suc"}
   2.467 -       is eliminated;  includes @{text "Code_Integer"}.
   2.468 -    \item[@{text "Code_Index"}] provides an additional datatype
   2.469 -       @{text index} which is mapped to target-language built-in integers.
   2.470 -       Useful for code setups which involve e.g. indexing of
   2.471 -       target-language arrays.
   2.472 -    \item[@{text "Code_Message"}] provides an additional datatype
   2.473 -       @{text message_string} which is isomorphic to strings;
   2.474 -       @{text message_string}s are mapped to target-language strings.
   2.475 -       Useful for code setups which involve e.g. printing (error) messages.
   2.476 -
   2.477 -  \end{description}
   2.478 -
   2.479 -  \begin{warn}
   2.480 -    When importing any of these theories, they should form the last
   2.481 -    items in an import list.  Since these theories adapt the
   2.482 -    code generator setup in a non-conservative fashion,
   2.483 -    strange effects may occur otherwise.
   2.484 -  \end{warn}
   2.485 -*}
   2.486 -
   2.487 -subsection {* Preprocessing *}
   2.488 -
   2.489 -text {*
   2.490 -  Before selected function theorems are turned into abstract
   2.491 -  code, a chain of definitional transformation steps is carried
   2.492 -  out: \emph{preprocessing}.  In essence, the preprocessor
   2.493 -  consists of two components: a \emph{simpset} and \emph{function transformers}.
   2.494 -
   2.495 -  The \emph{simpset} allows to employ the full generality of the Isabelle
   2.496 -  simplifier.  Due to the interpretation of theorems
   2.497 -  as defining equations, rewrites are applied to the right
   2.498 -  hand side and the arguments of the left hand side of an
   2.499 -  equation, but never to the constant heading the left hand side.
   2.500 -  An important special case are \emph{inline theorems} which may be
   2.501 -  declared an undeclared using the
   2.502 -  \emph{code inline} or \emph{code inline del} attribute respectively.
   2.503 -  Some common applications:
   2.504 -*}
   2.505 -
   2.506 -text_raw {*
   2.507 -  \begin{itemize}
   2.508 -*}
   2.509 -
   2.510 -text {*
   2.511 -     \item replacing non-executable constructs by executable ones:
   2.512 -*}     
   2.513 -
   2.514 -  lemma [code inline]:
   2.515 -    "x \<in> set xs \<longleftrightarrow> x mem xs" by (induct xs) simp_all
   2.516 -
   2.517 -text {*
   2.518 -     \item eliminating superfluous constants:
   2.519 -*}
   2.520 -
   2.521 -  lemma [code inline]:
   2.522 -    "1 = Suc 0" by simp
   2.523 -
   2.524 -text {*
   2.525 -     \item replacing executable but inconvenient constructs:
   2.526 -*}
   2.527 -
   2.528 -  lemma [code inline]:
   2.529 -    "xs = [] \<longleftrightarrow> List.null xs" by (induct xs) simp_all
   2.530 -
   2.531 -text_raw {*
   2.532 -  \end{itemize}
   2.533 -*}
   2.534 -
   2.535 -text {*
   2.536 -
   2.537 -  \emph{Function transformers} provide a very general interface,
   2.538 -  transforming a list of function theorems to another
   2.539 -  list of function theorems, provided that neither the heading
   2.540 -  constant nor its type change.  The @{term "0\<Colon>nat"} / @{const Suc}
   2.541 -  pattern elimination implemented in
   2.542 -  theory @{text "Efficient_Nat"} (see \secref{eff_nat}) uses this
   2.543 -  interface.
   2.544 -
   2.545 -  \noindent The current setup of the preprocessor may be inspected using
   2.546 -  the @{text "\<PRINTCODESETUP>"} command.
   2.547 -
   2.548 -  \begin{warn}
   2.549 -    The attribute \emph{code unfold}
   2.550 -    associated with the existing code generator also applies to
   2.551 -    the new one: \emph{code unfold} implies \emph{code inline}.
   2.552 -  \end{warn}
   2.553 -*}
   2.554 -
   2.555 -
   2.556 -subsection {* Concerning operational equality *}
   2.557 -
   2.558 -text {*
   2.559 -  Surely you have already noticed how equality is treated
   2.560 -  by the code generator:
   2.561 -*}
   2.562 -
   2.563 -primrec
   2.564 -  collect_duplicates :: "'a list \<Rightarrow> 'a list \<Rightarrow> 'a list \<Rightarrow> 'a list" where
   2.565 -    "collect_duplicates xs ys [] = xs"
   2.566 -  | "collect_duplicates xs ys (z#zs) = (if z \<in> set xs
   2.567 -      then if z \<in> set ys
   2.568 -        then collect_duplicates xs ys zs
   2.569 -        else collect_duplicates xs (z#ys) zs
   2.570 -      else collect_duplicates (z#xs) (z#ys) zs)"
   2.571 -
   2.572 -text {*
   2.573 -  The membership test during preprocessing is rewritten,
   2.574 -  resulting in @{const List.member}, which itself
   2.575 -  performs an explicit equality check.
   2.576 -*}
   2.577 -
   2.578 -export_code collect_duplicates (*<*)in SML(*>*)in SML file "examples/collect_duplicates.ML"
   2.579 -
   2.580 -text {*
   2.581 -  \lstsml{Thy/examples/collect_duplicates.ML}
   2.582 -*}
   2.583 -
   2.584 -text {*
   2.585 -  Obviously, polymorphic equality is implemented the Haskell
   2.586 -  way using a type class.  How is this achieved?  HOL introduces
   2.587 -  an explicit class @{text eq} with a corresponding operation
   2.588 -  @{const eq_class.eq} such that @{thm eq [no_vars]}.
   2.589 -  The preprocessing framework does the rest.
   2.590 -  For datatypes, instances of @{text eq} are implicitly derived
   2.591 -  when possible.  For other types, you may instantiate @{text eq}
   2.592 -  manually like any other type class.
   2.593 -
   2.594 -  Though this @{text eq} class is designed to get rarely in
   2.595 -  the way, a subtlety
   2.596 -  enters the stage when definitions of overloaded constants
   2.597 -  are dependent on operational equality.  For example, let
   2.598 -  us define a lexicographic ordering on tuples:
   2.599 -*}
   2.600 -
   2.601 -instantiation * :: (ord, ord) ord
   2.602 -begin
   2.603 -
   2.604 -definition
   2.605 -  [code func del]: "p1 < p2 \<longleftrightarrow> (let (x1, y1) = p1; (x2, y2) = p2 in
   2.606 -    x1 < x2 \<or> (x1 = x2 \<and> y1 < y2))"
   2.607 -
   2.608 -definition
   2.609 -  [code func del]: "p1 \<le> p2 \<longleftrightarrow> (let (x1, y1) = p1; (x2, y2) = p2 in
   2.610 -    x1 < x2 \<or> (x1 = x2 \<and> y1 \<le> y2))"
   2.611 -
   2.612 -instance ..
   2.613  
   2.614  end
   2.615 -
   2.616 -lemma ord_prod [code func(*<*), code func del(*>*)]:
   2.617 -  "(x1 \<Colon> 'a\<Colon>ord, y1 \<Colon> 'b\<Colon>ord) < (x2, y2) \<longleftrightarrow> x1 < x2 \<or> (x1 = x2 \<and> y1 < y2)"
   2.618 -  "(x1 \<Colon> 'a\<Colon>ord, y1 \<Colon> 'b\<Colon>ord) \<le> (x2, y2) \<longleftrightarrow> x1 < x2 \<or> (x1 = x2 \<and> y1 \<le> y2)"
   2.619 -  unfolding less_prod_def less_eq_prod_def by simp_all
   2.620 -
   2.621 -text {*
   2.622 -  Then code generation will fail.  Why?  The definition
   2.623 -  of @{term "op \<le>"} depends on equality on both arguments,
   2.624 -  which are polymorphic and impose an additional @{text eq}
   2.625 -  class constraint, thus violating the type discipline
   2.626 -  for class operations.
   2.627 -
   2.628 -  The solution is to add @{text eq} explicitly to the first sort arguments in the
   2.629 -  code theorems:
   2.630 -*}
   2.631 -
   2.632 -lemma ord_prod_code [code func]:
   2.633 -  "(x1 \<Colon> 'a\<Colon>{ord, eq}, y1 \<Colon> 'b\<Colon>ord) < (x2, y2) \<longleftrightarrow>
   2.634 -    x1 < x2 \<or> (x1 = x2 \<and> y1 < y2)"
   2.635 -  "(x1 \<Colon> 'a\<Colon>{ord, eq}, y1 \<Colon> 'b\<Colon>ord) \<le> (x2, y2) \<longleftrightarrow>
   2.636 -    x1 < x2 \<or> (x1 = x2 \<and> y1 \<le> y2)"
   2.637 -  unfolding ord_prod by rule+
   2.638 -
   2.639 -text {*
   2.640 -  \noindent Then code generation succeeds:
   2.641 -*}
   2.642 -
   2.643 -export_code "op \<le> \<Colon> 'a\<Colon>{eq, ord} \<times> 'b\<Colon>ord \<Rightarrow> 'a \<times> 'b \<Rightarrow> bool"
   2.644 -  (*<*)in SML(*>*)in SML file "examples/lexicographic.ML"
   2.645 -
   2.646 -text {*
   2.647 -  \lstsml{Thy/examples/lexicographic.ML}
   2.648 -*}
   2.649 -
   2.650 -text {*
   2.651 -  In general, code theorems for overloaded constants may have more
   2.652 -  restrictive sort constraints than the underlying instance relation
   2.653 -  between class and type constructor as long as the whole system of
   2.654 -  constraints is coregular; code theorems violating coregularity
   2.655 -  are rejected immediately.  Consequently, it might be necessary
   2.656 -  to delete disturbing theorems in the code theorem table,
   2.657 -  as we have done here with the original definitions @{text less_prod_def}
   2.658 -  and @{text less_eq_prod_def}.
   2.659 -
   2.660 -  In some cases, the automatically derived defining equations
   2.661 -  for equality on a particular type may not be appropriate.
   2.662 -  As example, watch the following datatype representing
   2.663 -  monomorphic parametric types (where type constructors
   2.664 -  are referred to by natural numbers):
   2.665 -*}
   2.666 -
   2.667 -datatype monotype = Mono nat "monotype list"
   2.668 -(*<*)
   2.669 -lemma monotype_eq:
   2.670 -  "Mono tyco1 typargs1 = Mono tyco2 typargs2 \<equiv> 
   2.671 -     tyco1 = tyco2 \<and> typargs1 = typargs2" by simp
   2.672 -(*>*)
   2.673 -
   2.674 -text {*
   2.675 -  Then code generation for SML would fail with a message
   2.676 -  that the generated code conains illegal mutual dependencies:
   2.677 -  the theorem @{thm monotype_eq [no_vars]} already requires the
   2.678 -  instance @{text "monotype \<Colon> eq"}, which itself requires
   2.679 -  @{thm monotype_eq [no_vars]};  Haskell has no problem with mutually
   2.680 -  recursive @{text instance} and @{text function} definitions,
   2.681 -  but the SML serializer does not support this.
   2.682 -
   2.683 -  In such cases, you have to provide you own equality equations
   2.684 -  involving auxiliary constants.  In our case,
   2.685 -  @{const [show_types] list_all2} can do the job:
   2.686 -*}
   2.687 -
   2.688 -lemma monotype_eq_list_all2 [code func]:
   2.689 -  "Mono tyco1 typargs1 = Mono tyco2 typargs2 \<longleftrightarrow>
   2.690 -     tyco1 = tyco2 \<and> list_all2 (op =) typargs1 typargs2"
   2.691 -  by (simp add: list_all2_eq [symmetric])
   2.692 -
   2.693 -text {*
   2.694 -  does not depend on instance @{text "monotype \<Colon> eq"}:
   2.695 -*}
   2.696 -
   2.697 -export_code "op = :: monotype \<Rightarrow> monotype \<Rightarrow> bool"
   2.698 -  (*<*)in SML(*>*)in SML file "examples/monotype.ML"
   2.699 -
   2.700 -text {*
   2.701 -  \lstsml{Thy/examples/monotype.ML}
   2.702 -*}
   2.703 -
   2.704 -subsection {* Programs as sets of theorems *}
   2.705 -
   2.706 -text {*
   2.707 -  As told in \secref{sec:concept}, code generation is based
   2.708 -  on a structured collection of code theorems.
   2.709 -  For explorative purpose, this collection
   2.710 -  may be inspected using the @{text "\<CODETHMS>"} command:
   2.711 -*}
   2.712 -
   2.713 -code_thms "op mod :: nat \<Rightarrow> nat \<Rightarrow> nat"
   2.714 -
   2.715 -text {*
   2.716 -  \noindent prints a table with \emph{all} defining equations
   2.717 -  for @{term "op mod :: nat \<Rightarrow> nat \<Rightarrow> nat"}, including
   2.718 -  \emph{all} defining equations those equations depend
   2.719 -  on recursivly.  @{text "\<CODETHMS>"} provides a convenient
   2.720 -  mechanism to inspect the impact of a preprocessor setup
   2.721 -  on defining equations.
   2.722 -  
   2.723 -  Similarly, the @{text "\<CODEDEPS>"} command shows a graph
   2.724 -  visualizing dependencies between defining equations.
   2.725 -*}
   2.726 -
   2.727 -
   2.728 -subsection {* Constructor sets for datatypes *}
   2.729 -
   2.730 -text {*
   2.731 -  Conceptually, any datatype is spanned by a set of
   2.732 -  \emph{constructors} of type @{text "\<tau> = \<dots> \<Rightarrow> \<kappa> \<alpha>\<^isub>1 \<dots> \<alpha>\<^isub>n"}
   2.733 -  where @{text "{\<alpha>\<^isub>1, \<dots>, \<alpha>\<^isub>n}"} is excactly the set of \emph{all}
   2.734 -  type variables in @{text "\<tau>"}.  The HOL datatype package
   2.735 -  by default registers any new datatype in the table
   2.736 -  of datatypes, which may be inspected using
   2.737 -  the @{text "\<PRINTCODESETUP>"} command.
   2.738 -
   2.739 -  In some cases, it may be convenient to alter or
   2.740 -  extend this table;  as an example, we will develope an alternative
   2.741 -  representation of natural numbers as binary digits, whose
   2.742 -  size does increase logarithmically with its value, not linear
   2.743 -  \footnote{Indeed, the @{text "Efficient_Nat"} theory (see \ref{eff_nat})
   2.744 -    does something similar}.  First, the digit representation:
   2.745 -*}
   2.746 -
   2.747 -definition Dig0 :: "nat \<Rightarrow> nat" where
   2.748 -  "Dig0 n = 2 * n"
   2.749 -
   2.750 -definition Dig1 :: "nat \<Rightarrow> nat" where
   2.751 -  "Dig1 n = Suc (2 * n)"
   2.752 -
   2.753 -text {*
   2.754 -  \noindent We will use these two ">digits"< to represent natural numbers
   2.755 -  in binary digits, e.g.:
   2.756 -*}
   2.757 -
   2.758 -lemma 42: "42 = Dig0 (Dig1 (Dig0 (Dig1 (Dig0 1))))"
   2.759 -  by (simp add: Dig0_def Dig1_def)
   2.760 -
   2.761 -text {*
   2.762 -  \noindent Of course we also have to provide proper code equations for
   2.763 -  the operations, e.g. @{term "op + \<Colon> nat \<Rightarrow> nat \<Rightarrow> nat"}:
   2.764 -*}
   2.765 -
   2.766 -lemma plus_Dig [code func]:
   2.767 -  "0 + n = n"
   2.768 -  "m + 0 = m"
   2.769 -  "1 + Dig0 n = Dig1 n"
   2.770 -  "Dig0 m + 1 = Dig1 m"
   2.771 -  "1 + Dig1 n = Dig0 (n + 1)"
   2.772 -  "Dig1 m + 1 = Dig0 (m + 1)"
   2.773 -  "Dig0 m + Dig0 n = Dig0 (m + n)"
   2.774 -  "Dig0 m + Dig1 n = Dig1 (m + n)"
   2.775 -  "Dig1 m + Dig0 n = Dig1 (m + n)"
   2.776 -  "Dig1 m + Dig1 n = Dig0 (m + n + 1)"
   2.777 -  by (simp_all add: Dig0_def Dig1_def)
   2.778 -
   2.779 -text {*
   2.780 -  \noindent We then instruct the code generator to view @{term "0\<Colon>nat"},
   2.781 -  @{term "1\<Colon>nat"}, @{term Dig0} and @{term Dig1} as
   2.782 -  datatype constructors:
   2.783 -*}
   2.784 -
   2.785 -code_datatype "0\<Colon>nat" "1\<Colon>nat" Dig0 Dig1
   2.786 -
   2.787 -text {*
   2.788 -  \noindent For the former constructor @{term Suc}, we provide a code
   2.789 -  equation and remove some parts of the default code generator setup
   2.790 -  which are an obstacle here:
   2.791 -*}
   2.792 -
   2.793 -lemma Suc_Dig [code func]:
   2.794 -  "Suc n = n + 1"
   2.795 -  by simp
   2.796 -
   2.797 -declare One_nat_def [code inline del]
   2.798 -declare add_Suc_shift [code func del] 
   2.799 -
   2.800 -text {*
   2.801 -  \noindent This yields the following code:
   2.802 -*}
   2.803 -
   2.804 -export_code "op + \<Colon> nat \<Rightarrow> nat \<Rightarrow> nat" (*<*)in SML(*>*) in SML file "examples/nat_binary.ML"
   2.805 -
   2.806 -text {* \lstsml{Thy/examples/nat_binary.ML} *}
   2.807 -
   2.808 -text {*
   2.809 -  \medskip
   2.810 -
   2.811 -  From this example, it can be easily glimpsed that using own constructor sets
   2.812 -  is a little delicate since it changes the set of valid patterns for values
   2.813 -  of that type.  Without going into much detail, here some practical hints:
   2.814 -
   2.815 -  \begin{itemize}
   2.816 -    \item When changing the constuctor set for datatypes, take care to
   2.817 -      provide an alternative for the @{text case} combinator (e.g. by replacing
   2.818 -      it using the preprocessor).
   2.819 -    \item Values in the target language need not to be normalized -- different
   2.820 -      values in the target language may represent the same value in the
   2.821 -      logic (e.g. @{text "Dig1 0 = 1"}).
   2.822 -    \item Usually, a good methodology to deal with the subleties of pattern
   2.823 -      matching is to see the type as an abstract type: provide a set
   2.824 -      of operations which operate on the concrete representation of the type,
   2.825 -      and derive further operations by combinations of these primitive ones,
   2.826 -      without relying on a particular representation.
   2.827 -  \end{itemize}
   2.828 -*}
   2.829 -
   2.830 -code_datatype %invisible "0::nat" Suc
   2.831 -declare %invisible plus_Dig [code func del]
   2.832 -declare %invisible One_nat_def [code inline]
   2.833 -declare %invisible add_Suc_shift [code func] 
   2.834 -lemma %invisible [code func]: "0 + n = (n \<Colon> nat)" by simp
   2.835 -
   2.836 -
   2.837 -subsection {* Customizing serialization  *}
   2.838 -
   2.839 -subsubsection {* Basics *}
   2.840 -
   2.841 -text {*
   2.842 -  Consider the following function and its corresponding
   2.843 -  SML code:
   2.844 -*}
   2.845 -
   2.846 -primrec
   2.847 -  in_interval :: "nat \<times> nat \<Rightarrow> nat \<Rightarrow> bool" where
   2.848 -  "in_interval (k, l) n \<longleftrightarrow> k \<le> n \<and> n \<le> l"
   2.849 -(*<*)
   2.850 -code_type %tt bool
   2.851 -  (SML)
   2.852 -code_const %tt True and False and "op \<and>" and Not
   2.853 -  (SML and and and)
   2.854 -(*>*)
   2.855 -export_code in_interval (*<*)in SML(*>*)in SML file "examples/bool_literal.ML"
   2.856 -
   2.857 -text {*
   2.858 -  \lstsml{Thy/examples/bool_literal.ML}
   2.859 -
   2.860 -  \noindent Though this is correct code, it is a little bit unsatisfactory:
   2.861 -  boolean values and operators are materialized as distinguished
   2.862 -  entities with have nothing to do with the SML-builtin notion
   2.863 -  of \qt{bool}.  This results in less readable code;
   2.864 -  additionally, eager evaluation may cause programs to
   2.865 -  loop or break which would perfectly terminate when
   2.866 -  the existing SML \qt{bool} would be used.  To map
   2.867 -  the HOL \qt{bool} on SML \qt{bool}, we may use
   2.868 -  \qn{custom serializations}:
   2.869 -*}
   2.870 -
   2.871 -code_type %tt bool
   2.872 -  (SML "bool")
   2.873 -code_const %tt True and False and "op \<and>"
   2.874 -  (SML "true" and "false" and "_ andalso _")
   2.875 -
   2.876 -text {*
   2.877 -  The @{text "\<CODETYPE>"} commad takes a type constructor
   2.878 -  as arguments together with a list of custom serializations.
   2.879 -  Each custom serialization starts with a target language
   2.880 -  identifier followed by an expression, which during
   2.881 -  code serialization is inserted whenever the type constructor
   2.882 -  would occur.  For constants, @{text "\<CODECONST>"} implements
   2.883 -  the corresponding mechanism.  Each ``@{verbatim "_"}'' in
   2.884 -  a serialization expression is treated as a placeholder
   2.885 -  for the type constructor's (the constant's) arguments.
   2.886 -*}
   2.887 -
   2.888 -export_code in_interval (*<*)in SML(*>*) in SML file "examples/bool_mlbool.ML"
   2.889 -
   2.890 -text {*
   2.891 -  \lstsml{Thy/examples/bool_mlbool.ML}
   2.892 -
   2.893 -  \noindent This still is not perfect: the parentheses
   2.894 -  around the \qt{andalso} expression are superfluous.
   2.895 -  Though the serializer
   2.896 -  by no means attempts to imitate the rich Isabelle syntax
   2.897 -  framework, it provides some common idioms, notably
   2.898 -  associative infixes with precedences which may be used here:
   2.899 -*}
   2.900 -
   2.901 -code_const %tt "op \<and>"
   2.902 -  (SML infixl 1 "andalso")
   2.903 -
   2.904 -export_code in_interval (*<*)in SML(*>*) in SML file "examples/bool_infix.ML"
   2.905 -
   2.906 -text {*
   2.907 -  \lstsml{Thy/examples/bool_infix.ML}
   2.908 -
   2.909 -  \medskip
   2.910 -
   2.911 -  Next, we try to map HOL pairs to SML pairs, using the
   2.912 -  infix ``@{verbatim "*"}'' type constructor and parentheses:
   2.913 -*}
   2.914 -(*<*)
   2.915 -code_type *
   2.916 -  (SML)
   2.917 -code_const Pair
   2.918 -  (SML)
   2.919 -(*>*)
   2.920 -code_type %tt *
   2.921 -  (SML infix 2 "*")
   2.922 -code_const %tt Pair
   2.923 -  (SML "!((_),/ (_))")
   2.924 -
   2.925 -text {*
   2.926 -  The initial bang ``@{verbatim "!"}'' tells the serializer to never put
   2.927 -  parentheses around the whole expression (they are already present),
   2.928 -  while the parentheses around argument place holders
   2.929 -  tell not to put parentheses around the arguments.
   2.930 -  The slash ``@{verbatim "/"}'' (followed by arbitrary white space)
   2.931 -  inserts a space which may be used as a break if necessary
   2.932 -  during pretty printing.
   2.933 -
   2.934 -  These examples give a glimpse what mechanisms
   2.935 -  custom serializations provide; however their usage
   2.936 -  requires careful thinking in order not to introduce
   2.937 -  inconsistencies -- or, in other words:
   2.938 -  custom serializations are completely axiomatic.
   2.939 -
   2.940 -  A further noteworthy details is that any special
   2.941 -  character in a custom serialization may be quoted
   2.942 -  using ``@{verbatim "'"}''; thus, in
   2.943 -  ``@{verbatim "fn '_ => _"}'' the first
   2.944 -  ``@{verbatim "_"}'' is a proper underscore while the
   2.945 -  second ``@{verbatim "_"}'' is a placeholder.
   2.946 -
   2.947 -  The HOL theories provide further
   2.948 -  examples for custom serializations.
   2.949 -*}
   2.950 -
   2.951 -
   2.952 -subsubsection {* Haskell serialization *}
   2.953 -
   2.954 -text {*
   2.955 -  For convenience, the default
   2.956 -  HOL setup for Haskell maps the @{text eq} class to
   2.957 -  its counterpart in Haskell, giving custom serializations
   2.958 -  for the class (@{text "\<CODECLASS>"}) and its operation:
   2.959 -*}
   2.960 -
   2.961 -code_class %tt eq
   2.962 -  (Haskell "Eq" where "op =" \<equiv> "(==)")
   2.963 -
   2.964 -code_const %tt "op ="
   2.965 -  (Haskell infixl 4 "==")
   2.966 -
   2.967 -text {*
   2.968 -  A problem now occurs whenever a type which
   2.969 -  is an instance of @{text eq} in HOL is mapped
   2.970 -  on a Haskell-builtin type which is also an instance
   2.971 -  of Haskell @{text Eq}:
   2.972 -*}
   2.973 -
   2.974 -typedecl bar
   2.975 -
   2.976 -instantiation bar :: eq
   2.977 -begin
   2.978 -
   2.979 -definition "eq_class.eq (x\<Colon>bar) y \<longleftrightarrow> x = y"
   2.980 -
   2.981 -instance by default (simp add: eq_bar_def)
   2.982 -
   2.983 -end
   2.984 -
   2.985 -code_type %tt bar
   2.986 -  (Haskell "Integer")
   2.987 -
   2.988 -text {*
   2.989 -  The code generator would produce
   2.990 -  an additional instance, which of course is rejected.
   2.991 -  To suppress this additional instance, use
   2.992 -  @{text "\<CODEINSTANCE>"}:
   2.993 -*}
   2.994 -
   2.995 -code_instance %tt bar :: eq
   2.996 -  (Haskell -)
   2.997 -
   2.998 -
   2.999 -subsubsection {* Pretty printing *}
  2.1000 -
  2.1001 -text {*
  2.1002 -  The serializer provides ML interfaces to set up
  2.1003 -  pretty serializations for expressions like lists, numerals
  2.1004 -  and characters;  these are
  2.1005 -  monolithic stubs and should only be used with the
  2.1006 -  theories introduced in \secref{sec:library}.
  2.1007 -*}
  2.1008 -
  2.1009 -
  2.1010 -subsection {* Cyclic module dependencies *}
  2.1011 -
  2.1012 -text {*
  2.1013 -  Sometimes the awkward situation occurs that dependencies
  2.1014 -  between definitions introduce cyclic dependencies
  2.1015 -  between modules, which in the Haskell world leaves
  2.1016 -  you to the mercy of the Haskell implementation you are using,
  2.1017 -  while for SML code generation is not possible.
  2.1018 -
  2.1019 -  A solution is to declare module names explicitly.
  2.1020 -  Let use assume the three cyclically dependent
  2.1021 -  modules are named \emph{A}, \emph{B} and \emph{C}.
  2.1022 -  Then, by stating
  2.1023 -*}
  2.1024 -
  2.1025 -code_modulename SML
  2.1026 -  A ABC
  2.1027 -  B ABC
  2.1028 -  C ABC
  2.1029 -
  2.1030 -text {*
  2.1031 -  we explicitly map all those modules on \emph{ABC},
  2.1032 -  resulting in an ad-hoc merge of this three modules
  2.1033 -  at serialization time.
  2.1034 -*}
  2.1035 -
  2.1036 -subsection {* Incremental code generation *}
  2.1037 -
  2.1038 -text {*
  2.1039 -  Code generation is \emph{incremental}: theorems
  2.1040 -  and abstract intermediate code are cached and extended on demand.
  2.1041 -  The cache may be partially or fully dropped if the underlying
  2.1042 -  executable content of the theory changes.
  2.1043 -  Implementation of caching is supposed to transparently
  2.1044 -  hid away the details from the user.  Anyway, caching
  2.1045 -  reaches the surface by using a slightly more general form
  2.1046 -  of the @{text "\<CODETHMS>"}, @{text "\<CODEDEPS>"}
  2.1047 -  and @{text "\<EXPORTCODE>"} commands: the list of constants
  2.1048 -  may be omitted.  Then, all constants with code theorems
  2.1049 -  in the current cache are referred to.
  2.1050 -*}
  2.1051 -
  2.1052 -(*subsection {* Code generation and proof extraction *}
  2.1053 -
  2.1054 -text {*
  2.1055 -  \fixme
  2.1056 -*}*)
  2.1057 -
  2.1058 -
  2.1059 -section {* ML interfaces \label{sec:ml} *}
  2.1060 -
  2.1061 -text {*
  2.1062 -  Since the code generator framework not only aims to provide
  2.1063 -  a nice Isar interface but also to form a base for
  2.1064 -  code-generation-based applications, here a short
  2.1065 -  description of the most important ML interfaces.
  2.1066 -*}
  2.1067 -
  2.1068 -subsection {* Executable theory content: @{text Code} *}
  2.1069 -
  2.1070 -text {*
  2.1071 -  This Pure module implements the core notions of
  2.1072 -  executable content of a theory.
  2.1073 -*}
  2.1074 -
  2.1075 -subsubsection {* Managing executable content *}
  2.1076 -
  2.1077 -text %mlref {*
  2.1078 -  \begin{mldecls}
  2.1079 -  @{index_ML Code.add_func: "thm -> theory -> theory"} \\
  2.1080 -  @{index_ML Code.del_func: "thm -> theory -> theory"} \\
  2.1081 -  @{index_ML Code.add_funcl: "string * thm list Susp.T -> theory -> theory"} \\
  2.1082 -  @{index_ML Code.map_pre: "(MetaSimplifier.simpset -> MetaSimplifier.simpset) -> theory -> theory"} \\
  2.1083 -  @{index_ML Code.map_post: "(MetaSimplifier.simpset -> MetaSimplifier.simpset) -> theory -> theory"} \\
  2.1084 -  @{index_ML Code.add_functrans: "string * (theory -> thm list -> thm list option)
  2.1085 -    -> theory -> theory"} \\
  2.1086 -  @{index_ML Code.del_functrans: "string -> theory -> theory"} \\
  2.1087 -  @{index_ML Code.add_datatype: "(string * typ) list -> theory -> theory"} \\
  2.1088 -  @{index_ML Code.get_datatype: "theory -> string
  2.1089 -    -> (string * sort) list * (string * typ list) list"} \\
  2.1090 -  @{index_ML Code.get_datatype_of_constr: "theory -> string -> string option"}
  2.1091 -  \end{mldecls}
  2.1092 -
  2.1093 -  \begin{description}
  2.1094 -
  2.1095 -  \item @{ML Code.add_func}~@{text "thm"}~@{text "thy"} adds function
  2.1096 -     theorem @{text "thm"} to executable content.
  2.1097 -
  2.1098 -  \item @{ML Code.del_func}~@{text "thm"}~@{text "thy"} removes function
  2.1099 -     theorem @{text "thm"} from executable content, if present.
  2.1100 -
  2.1101 -  \item @{ML Code.add_funcl}~@{text "(const, lthms)"}~@{text "thy"} adds
  2.1102 -     suspended defining equations @{text lthms} for constant
  2.1103 -     @{text const} to executable content.
  2.1104 -
  2.1105 -  \item @{ML Code.map_pre}~@{text "f"}~@{text "thy"} changes
  2.1106 -     the preprocessor simpset.
  2.1107 -
  2.1108 -  \item @{ML Code.add_functrans}~@{text "(name, f)"}~@{text "thy"} adds
  2.1109 -     function transformer @{text f} (named @{text name}) to executable content;
  2.1110 -     @{text f} is a transformer of the defining equations belonging
  2.1111 -     to a certain function definition, depending on the
  2.1112 -     current theory context.  Returning @{text NONE} indicates that no
  2.1113 -     transformation took place;  otherwise, the whole process will be iterated
  2.1114 -     with the new defining equations.
  2.1115 -
  2.1116 -  \item @{ML Code.del_functrans}~@{text "name"}~@{text "thy"} removes
  2.1117 -     function transformer named @{text name} from executable content.
  2.1118 -
  2.1119 -  \item @{ML Code.add_datatype}~@{text cs}~@{text thy} adds
  2.1120 -     a datatype to executable content, with generation
  2.1121 -     set @{text cs}.
  2.1122 -
  2.1123 -  \item @{ML Code.get_datatype_of_constr}~@{text "thy"}~@{text "const"}
  2.1124 -     returns type constructor corresponding to
  2.1125 -     constructor @{text const}; returns @{text NONE}
  2.1126 -     if @{text const} is no constructor.
  2.1127 -
  2.1128 -  \end{description}
  2.1129 -*}
  2.1130 -
  2.1131 -subsection {* Auxiliary *}
  2.1132 -
  2.1133 -text %mlref {*
  2.1134 -  \begin{mldecls}
  2.1135 -  @{index_ML Code_Unit.read_const: "theory -> string -> string"} \\
  2.1136 -  @{index_ML Code_Unit.head_func: "thm -> string * ((string * sort) list * typ)"} \\
  2.1137 -  @{index_ML Code_Unit.rewrite_func: "MetaSimplifier.simpset -> thm -> thm"} \\
  2.1138 -  \end{mldecls}
  2.1139 -
  2.1140 -  \begin{description}
  2.1141 -
  2.1142 -  \item @{ML Code_Unit.read_const}~@{text thy}~@{text s}
  2.1143 -     reads a constant as a concrete term expression @{text s}.
  2.1144 -
  2.1145 -  \item @{ML Code_Unit.head_func}~@{text thm}
  2.1146 -     extracts the constant and its type from a defining equation @{text thm}.
  2.1147 -
  2.1148 -  \item @{ML Code_Unit.rewrite_func}~@{text ss}~@{text thm}
  2.1149 -     rewrites a defining equation @{text thm} with a simpset @{text ss};
  2.1150 -     only arguments and right hand side are rewritten,
  2.1151 -     not the head of the defining equation.
  2.1152 -
  2.1153 -  \end{description}
  2.1154 -
  2.1155 -*}
  2.1156 -
  2.1157 -subsection {* Implementing code generator applications *}
  2.1158 -
  2.1159 -text {*
  2.1160 -  Implementing code generator applications on top
  2.1161 -  of the framework set out so far usually not only
  2.1162 -  involves using those primitive interfaces
  2.1163 -  but also storing code-dependent data and various
  2.1164 -  other things.
  2.1165 -
  2.1166 -  \begin{warn}
  2.1167 -    Some interfaces discussed here have not reached
  2.1168 -    a final state yet.
  2.1169 -    Changes likely to occur in future.
  2.1170 -  \end{warn}
  2.1171 -*}
  2.1172 -
  2.1173 -subsubsection {* Data depending on the theory's executable content *}
  2.1174 -
  2.1175 -text {*
  2.1176 -  Due to incrementality of code generation, changes in the
  2.1177 -  theory's executable content have to be propagated in a
  2.1178 -  certain fashion.  Additionally, such changes may occur
  2.1179 -  not only during theory extension but also during theory
  2.1180 -  merge, which is a little bit nasty from an implementation
  2.1181 -  point of view.  The framework provides a solution
  2.1182 -  to this technical challenge by providing a functorial
  2.1183 -  data slot @{ML_functor CodeDataFun}; on instantiation
  2.1184 -  of this functor, the following types and operations
  2.1185 -  are required:
  2.1186 -
  2.1187 -  \medskip
  2.1188 -  \begin{tabular}{l}
  2.1189 -  @{text "type T"} \\
  2.1190 -  @{text "val empty: T"} \\
  2.1191 -  @{text "val merge: Pretty.pp \<rightarrow> T * T \<rightarrow> T"} \\
  2.1192 -  @{text "val purge: theory option \<rightarrow> CodeUnit.const list option \<rightarrow> T \<rightarrow> T"}
  2.1193 -  \end{tabular}
  2.1194 -
  2.1195 -  \begin{description}
  2.1196 -
  2.1197 -  \item @{text T} the type of data to store.
  2.1198 -
  2.1199 -  \item @{text empty} initial (empty) data.
  2.1200 -
  2.1201 -  \item @{text merge} merging two data slots.
  2.1202 -
  2.1203 -  \item @{text purge}~@{text thy}~@{text consts} propagates changes in executable content;
  2.1204 -    if possible, the current theory context is handed over
  2.1205 -    as argument @{text thy} (if there is no current theory context (e.g.~during
  2.1206 -    theory merge, @{ML NONE}); @{text consts} indicates the kind
  2.1207 -    of change: @{ML NONE} stands for a fundamental change
  2.1208 -    which invalidates any existing code, @{text "SOME consts"}
  2.1209 -    hints that executable content for constants @{text consts}
  2.1210 -    has changed.
  2.1211 -
  2.1212 -  \end{description}
  2.1213 -
  2.1214 -  An instance of @{ML_functor CodeDataFun} provides the following
  2.1215 -  interface:
  2.1216 -
  2.1217 -  \medskip
  2.1218 -  \begin{tabular}{l}
  2.1219 -  @{text "get: theory \<rightarrow> T"} \\
  2.1220 -  @{text "change: theory \<rightarrow> (T \<rightarrow> T) \<rightarrow> T"} \\
  2.1221 -  @{text "change_yield: theory \<rightarrow> (T \<rightarrow> 'a * T) \<rightarrow> 'a * T"}
  2.1222 -  \end{tabular}
  2.1223 -
  2.1224 -  \begin{description}
  2.1225 -
  2.1226 -  \item @{text get} retrieval of the current data.
  2.1227 -
  2.1228 -  \item @{text change} update of current data (cached!)
  2.1229 -    by giving a continuation.
  2.1230 -
  2.1231 -  \item @{text change_yield} update with side result.
  2.1232 -
  2.1233 -  \end{description}
  2.1234 -*}
  2.1235 -
  2.1236 -(*subsubsection {* Datatype hooks *}
  2.1237 -
  2.1238 -text {*
  2.1239 -  Isabelle/HOL's datatype package provides a mechanism to
  2.1240 -  extend theories depending on datatype declarations:
  2.1241 -  \emph{datatype hooks}.  For example, when declaring a new
  2.1242 -  datatype, a hook proves defining equations for equality on
  2.1243 -  that datatype (if possible).
  2.1244 -*}
  2.1245 -
  2.1246 -text %mlref {*
  2.1247 -  \begin{mldecls}
  2.1248 -  @{index_ML_type DatatypeHooks.hook: "string list -> theory -> theory"} \\
  2.1249 -  @{index_ML DatatypeHooks.add: "DatatypeHooks.hook -> theory -> theory"}
  2.1250 -  \end{mldecls}
  2.1251 -
  2.1252 -  \begin{description}
  2.1253 -
  2.1254 -  \item @{ML_type DatatypeHooks.hook} specifies the interface
  2.1255 -     of \emph{datatype hooks}: a theory update
  2.1256 -     depending on the list of newly introduced
  2.1257 -     datatype names.
  2.1258 -
  2.1259 -  \item @{ML DatatypeHooks.add} adds a hook to the
  2.1260 -     chain of all hooks.
  2.1261 -
  2.1262 -  \end{description}
  2.1263 -*}
  2.1264 -
  2.1265 -subsubsection {* Trivial typedefs -- type copies *}
  2.1266 -
  2.1267 -text {*
  2.1268 -  Sometimes packages will introduce new types
  2.1269 -  as \emph{marked type copies} similar to Haskell's
  2.1270 -  @{text newtype} declaration (e.g. the HOL record package)
  2.1271 -  \emph{without} tinkering with the overhead of datatypes.
  2.1272 -  Technically, these type copies are trivial forms of typedefs.
  2.1273 -  Since these type copies in code generation view are nothing
  2.1274 -  else than datatypes, they have been given a own package
  2.1275 -  in order to faciliate code generation:
  2.1276 -*}
  2.1277 -
  2.1278 -text %mlref {*
  2.1279 -  \begin{mldecls}
  2.1280 -  @{index_ML_type TypecopyPackage.info} \\
  2.1281 -  @{index_ML TypecopyPackage.add_typecopy: "
  2.1282 -    bstring * string list -> typ -> (bstring * bstring) option
  2.1283 -    -> theory -> (string * TypecopyPackage.info) * theory"} \\
  2.1284 -  @{index_ML TypecopyPackage.get_typecopy_info: "theory
  2.1285 -    -> string -> TypecopyPackage.info option"} \\
  2.1286 -  @{index_ML TypecopyPackage.get_spec: "theory -> string
  2.1287 -    -> (string * sort) list * (string * typ list) list"} \\
  2.1288 -  @{index_ML_type TypecopyPackage.hook: "string * TypecopyPackage.info -> theory -> theory"} \\
  2.1289 -  @{index_ML TypecopyPackage.add_hook:
  2.1290 -     "TypecopyPackage.hook -> theory -> theory"} \\
  2.1291 -  \end{mldecls}
  2.1292 -
  2.1293 -  \begin{description}
  2.1294 -
  2.1295 -  \item @{ML_type TypecopyPackage.info} a record containing
  2.1296 -     the specification and further data of a type copy.
  2.1297 -
  2.1298 -  \item @{ML TypecopyPackage.add_typecopy} defines a new
  2.1299 -     type copy.
  2.1300 -
  2.1301 -  \item @{ML TypecopyPackage.get_typecopy_info} retrieves
  2.1302 -     data of an existing type copy.
  2.1303 -
  2.1304 -  \item @{ML TypecopyPackage.get_spec} retrieves datatype-like
  2.1305 -     specification of a type copy.
  2.1306 -
  2.1307 -  \item @{ML_type TypecopyPackage.hook},~@{ML TypecopyPackage.add_hook}
  2.1308 -     provide a hook mechanism corresponding to the hook mechanism
  2.1309 -     on datatypes.
  2.1310 -
  2.1311 -  \end{description}
  2.1312 -*}
  2.1313 -
  2.1314 -subsubsection {* Unifying type copies and datatypes *}
  2.1315 -
  2.1316 -text {*
  2.1317 -  Since datatypes and type copies are mapped to the same concept (datatypes)
  2.1318 -  by code generation, the view on both is unified \qt{code types}:
  2.1319 -*}
  2.1320 -
  2.1321 -text %mlref {*
  2.1322 -  \begin{mldecls}
  2.1323 -  @{index_ML_type DatatypeCodegen.hook: "(string * (bool * ((string * sort) list
  2.1324 -    * (string * typ list) list))) list
  2.1325 -    -> theory -> theory"} \\
  2.1326 -  @{index_ML DatatypeCodegen.add_codetypes_hook_bootstrap: "
  2.1327 -      DatatypeCodegen.hook -> theory -> theory"}
  2.1328 -  \end{mldecls}
  2.1329 -*}
  2.1330 -
  2.1331 -text {*
  2.1332 -  \begin{description}
  2.1333 -
  2.1334 -  \item @{ML_type DatatypeCodegen.hook} specifies the code type hook
  2.1335 -     interface: a theory transformation depending on a list of
  2.1336 -     mutual recursive code types; each entry in the list
  2.1337 -     has the structure @{text "(name, (is_data, (vars, cons)))"}
  2.1338 -     where @{text name} is the name of the code type, @{text is_data}
  2.1339 -     is true iff @{text name} is a datatype rather then a type copy,
  2.1340 -     and @{text "(vars, cons)"} is the specification of the code type.
  2.1341 -
  2.1342 -  \item @{ML DatatypeCodegen.add_codetypes_hook_bootstrap} adds a code
  2.1343 -     type hook;  the hook is immediately processed for all already
  2.1344 -     existing datatypes, in blocks of mutual recursive datatypes,
  2.1345 -     where all datatypes a block depends on are processed before
  2.1346 -     the block.
  2.1347 -
  2.1348 -  \end{description}
  2.1349 -*}*)
  2.1350 -
  2.1351 -text {*
  2.1352 -  \emph{Happy proving, happy hacking!}
  2.1353 -*}
  2.1354 -
  2.1355 -end
     3.1 --- a/doc-src/IsarAdvanced/Codegen/Thy/Further.thy	Tue Sep 30 04:06:55 2008 +0200
     3.2 +++ b/doc-src/IsarAdvanced/Codegen/Thy/Further.thy	Tue Sep 30 11:19:47 2008 +0200
     3.3 @@ -2,9 +2,45 @@
     3.4  imports Setup
     3.5  begin
     3.6  
     3.7 -section {* Further topics *}
     3.8 +section {* Further topics \label{sec:further} *}
     3.9 +
    3.10 +subsection {* Further reading *}
    3.11 +
    3.12 +text {*
    3.13 +  Do dive deeper into the issue of code generation, you should visit
    3.14 +  the Isabelle/Isar Reference Manual \cite{isabelle-isar-ref} which
    3.15 +  constains exhaustive syntax diagrams.
    3.16 +*}
    3.17 +
    3.18 +subsection {* Modules *}
    3.19 +
    3.20 +text {*
    3.21 +  When invoking the @{command export_code} command it is possible to leave
    3.22 +  out the @{keyword "module_name"} part;  then code is distributed over
    3.23 +  different modules, where the module name space roughly is induced
    3.24 +  by the @{text Isabelle} theory namespace.
    3.25  
    3.26 -subsection {* Serializer options *}
    3.27 +  Then sometimes the awkward situation occurs that dependencies between
    3.28 +  definitions introduce cyclic dependencies between modules, which in the
    3.29 +  @{text Haskell} world leaves you to the mercy of the @{text Haskell} implementation
    3.30 +  you are using,  while for @{text SML}/@{text OCaml} code generation is not possible.
    3.31 +
    3.32 +  A solution is to declare module names explicitly.
    3.33 +  Let use assume the three cyclically dependent
    3.34 +  modules are named \emph{A}, \emph{B} and \emph{C}.
    3.35 +  Then, by stating
    3.36 +*}
    3.37 +
    3.38 +code_modulename SML
    3.39 +  A ABC
    3.40 +  B ABC
    3.41 +  C ABC
    3.42 +
    3.43 +text {*
    3.44 +  we explicitly map all those modules on \emph{ABC},
    3.45 +  resulting in an ad-hoc merge of this three modules
    3.46 +  at serialisation time.
    3.47 +*}
    3.48  
    3.49  subsection {* Evaluation oracle *}
    3.50  
    3.51 @@ -14,4 +50,6 @@
    3.52  
    3.53  text {* extending targets, adding targets *}
    3.54  
    3.55 +subsection {* Imperative data structures *}
    3.56 +
    3.57  end
     4.1 --- a/doc-src/IsarAdvanced/Codegen/Thy/Introduction.thy	Tue Sep 30 04:06:55 2008 +0200
     4.2 +++ b/doc-src/IsarAdvanced/Codegen/Thy/Introduction.thy	Tue Sep 30 11:19:47 2008 +0200
     4.3 @@ -8,15 +8,173 @@
     4.4  
     4.5  text {*
     4.6    This tutorial introduces a generic code generator for the
     4.7 -  Isabelle system \cite{isa-tutorial}.
     4.8 +  @{text Isabelle} system.
     4.9 +  Generic in the sense that the
    4.10 +  \qn{target language} for which code shall ultimately be
    4.11 +  generated is not fixed but may be an arbitrary state-of-the-art
    4.12 +  functional programming language (currently, the implementation
    4.13 +  supports @{text SML} \cite{SML}, @{text OCaml} \cite{OCaml} and @{text Haskell}
    4.14 +  \cite{haskell-revised-report}).
    4.15 +
    4.16 +  Conceptually the code generator framework is part
    4.17 +  of Isabelle's @{text Pure} meta logic framework; the logic
    4.18 +  @{text HOL} which is an extension of @{text Pure}
    4.19 +  already comes with a reasonable framework setup and thus provides
    4.20 +  a good working horse for raising code-generation-driven
    4.21 +  applications.  So, we assume some familiarity and experience
    4.22 +  with the ingredients of the @{text HOL} distribution theories.
    4.23 +  (see also \cite{isa-tutorial}).
    4.24 +
    4.25 +  The code generator aims to be usable with no further ado
    4.26 +  in most cases while allowing for detailed customisation.
    4.27 +  This manifests in the structure of this tutorial: after a short
    4.28 +  conceptual introduction with an example \secref{sec:intro},
    4.29 +  we discuss the generic customisation facilities \secref{sec:program}.
    4.30 +  A further section \secref{sec:adaption} is dedicated to the matter of
    4.31 +  \qn{adaption} to specific target language environments.  After some
    4.32 +  further issues \secref{sec:further} we conclude with an overview
    4.33 +  of some ML programming interfaces \secref{sec:ml}.
    4.34 +
    4.35 +  \begin{warn}
    4.36 +    Ultimately, the code generator which this tutorial deals with
    4.37 +    is supposed to replace the already established code generator
    4.38 +    by Stefan Berghofer \cite{Berghofer-Nipkow:2002}.
    4.39 +    So, for the moment, there are two distinct code generators
    4.40 +    in Isabelle.
    4.41 +    Also note that while the framework itself is
    4.42 +    object-logic independent, only @{text HOL} provides a reasonable
    4.43 +    framework setup.    
    4.44 +  \end{warn}
    4.45 +
    4.46  *}
    4.47  
    4.48 -subsection {* Code generation via shallow embedding *}
    4.49 +subsection {* Code generation via shallow embedding \label{sec:intro} *}
    4.50 +
    4.51 +text {*
    4.52 +  The key concept for understanding @{text Isabelle}'s code generation is
    4.53 +  \emph{shallow embedding}, i.e.~logical entities like constants, types and
    4.54 +  classes are identified with corresponding concepts in the target language.
    4.55 +
    4.56 +  Inside @{text HOL}, the @{command datatype} and
    4.57 +  @{command definition}/@{command primrec}/@{command fun} declarations form
    4.58 +  the core of a functional programming language.  The default code generator setup
    4.59 +  allows to turn those into functional programs immediately.
    4.60 +
    4.61 +  This means that \qt{naive} code generation can proceed without further ado.
    4.62 +  For example, here a simple \qt{implementation} of amortised queues:
    4.63 +*}
    4.64 +
    4.65 +datatype %quoteme 'a queue = Queue "'a list" "'a list"
    4.66 +
    4.67 +definition %quoteme empty :: "'a queue" where
    4.68 +  "empty = Queue [] []"
    4.69 +
    4.70 +primrec %quoteme enqueue :: "'a \<Rightarrow> 'a queue \<Rightarrow> 'a queue" where
    4.71 +  "enqueue x (Queue xs ys) = Queue (x # xs) ys"
    4.72 +
    4.73 +fun %quoteme dequeue :: "'a queue \<Rightarrow> 'a option \<times> 'a queue" where
    4.74 +    "dequeue (Queue [] []) = (None, Queue [] [])"
    4.75 +  | "dequeue (Queue xs (y # ys)) = (Some y, Queue xs ys)"
    4.76 +  | "dequeue (Queue xs []) = (case rev xs of y # ys \<Rightarrow> (Some y, Queue [] ys))"
    4.77 +
    4.78 +text {* \noindent Then we can generate code e.g.~for @{text SML} as follows: *}
    4.79  
    4.80 -text {* example *}
    4.81 +export_code %quoteme empty dequeue enqueue in SML module_name Example file "examples/example.ML"
    4.82 +
    4.83 +text {* \noindent resulting in the following code: *}
    4.84 +
    4.85 +text %quoteme {*@{code_stmts empty enqueue dequeue (SML)}*}
    4.86 +
    4.87 +text {*
    4.88 +  \noindent The @{command export_code} command takes a space-separated list of
    4.89 +  constants for which code shall be generated;  anything else needed for those
    4.90 +  is added implicitly.  Then follows by a target language identifier
    4.91 +  (@{text SML}, @{text OCaml} or @{text Haskell}) and a freely chosen module name.
    4.92 +  A file name denotes the destination to store the generated code.  Note that
    4.93 +  the semantics of the destination depends on the target language:  for
    4.94 +  @{text SML} and @{text OCaml} it denotes a \emph{file}, for @{text Haskell}
    4.95 +  it denotes a \emph{directory} where a file named as the module name
    4.96 +  (with extension @{text ".hs"}) is written:
    4.97 +*}
    4.98 +
    4.99 +export_code %quoteme empty dequeue enqueue in SML module_name Example file "examples/Example.ML"
   4.100 +
   4.101 +text {*
   4.102 +  \noindent This is how the corresponding code in @{text Haskell} looks like:
   4.103 +*}
   4.104 +
   4.105 +text %quoteme {*@{code_stmts empty enqueue dequeue (Haskell)}*}
   4.106 +
   4.107 +text {*
   4.108 +  \noindent This demonstrates the basic usage of the @{command export_code} command;
   4.109 +  for more details see \secref{sec:serializer}.
   4.110 +*}
   4.111  
   4.112  subsection {* Code generator architecture *}
   4.113  
   4.114 -text {* foundation, forward references for yet unexplained things  *}
   4.115 +text {*
   4.116 +  What you have seen so far should be already enough in a lot of cases.  If you
   4.117 +  are content with this, you can quit reading here.  Anyway, in order to customise
   4.118 +  and adapt the code generator, it is inevitable to gain some understanding
   4.119 +  how it works.
   4.120 +
   4.121 +  \begin{figure}[h]
   4.122 +    \centering
   4.123 +    \includegraphics[width=0.7\textwidth]{codegen_process}
   4.124 +    \caption{Code generator architecture}
   4.125 +    \label{fig:arch}
   4.126 +  \end{figure}
   4.127 +
   4.128 +  The code generator itself consists of three major components
   4.129 +  which operate sequentially, i.e. the result of one is the input
   4.130 +  of the next in the chain,  see diagram \ref{fig:arch}:
   4.131 +
   4.132 +  The code generator employs a notion of executability
   4.133 +  for three foundational executable ingredients known
   4.134 +  from functional programming:
   4.135 +  \emph{defining equations}, \emph{datatypes}, and
   4.136 +  \emph{type classes}.  A defining equation as a first approximation
   4.137 +  is a theorem of the form @{text "f t\<^isub>1 t\<^isub>2 \<dots> t\<^isub>n \<equiv> t"}
   4.138 +  (an equation headed by a constant @{text f} with arguments
   4.139 +  @{text "t\<^isub>1 t\<^isub>2 \<dots> t\<^isub>n"} and right hand side @{text t}).
   4.140 +  Code generation aims to turn defining equations
   4.141 +  into a functional program by running through the following
   4.142 +  process:
   4.143 +
   4.144 +  \begin{itemize}
   4.145 +
   4.146 +    \item Out of the vast collection of theorems proven in a
   4.147 +      \qn{theory}, a reasonable subset modelling
   4.148 +      defining equations is \qn{selected}.
   4.149 +
   4.150 +    \item On those selected theorems, certain
   4.151 +      transformations are carried out
   4.152 +      (\qn{preprocessing}).  Their purpose is to turn theorems
   4.153 +      representing non- or badly executable
   4.154 +      specifications into equivalent but executable counterparts.
   4.155 +      The result is a structured collection of \qn{code theorems}.
   4.156 +
   4.157 +    \item Before the selected defining equations are continued with,
   4.158 +      they can be \qn{preprocessed}, i.e. subjected to theorem
   4.159 +      transformations.  This \qn{preprocessor} is an interface which
   4.160 +      allows to apply
   4.161 +      the full expressiveness of ML-based theorem transformations
   4.162 +      to code generation;  motivating examples are shown below, see
   4.163 +      \secref{sec:preproc}.
   4.164 +      The result of the preprocessing step is a structured collection
   4.165 +      of defining equations.
   4.166 +
   4.167 +    \item These defining equations are \qn{translated} to a program
   4.168 +      in an abstract intermediate language.
   4.169 +
   4.170 +    \item Finally, the abstract program is \qn{serialised} into concrete
   4.171 +      source code of a target language.
   4.172 +
   4.173 +  \end{itemize}
   4.174 +
   4.175 +  \noindent From these steps, only the two last are carried out outside the logic;  by
   4.176 +  keeping this layer as thin as possible, the amount of code to trust is
   4.177 +  kept to a minimum.
   4.178 +*}
   4.179  
   4.180  end
     5.1 --- a/doc-src/IsarAdvanced/Codegen/Thy/ML.thy	Tue Sep 30 04:06:55 2008 +0200
     5.2 +++ b/doc-src/IsarAdvanced/Codegen/Thy/ML.thy	Tue Sep 30 11:19:47 2008 +0200
     5.3 @@ -2,6 +2,180 @@
     5.4  imports Setup
     5.5  begin
     5.6  
     5.7 -section {* ML system interfaces *}
     5.8 +section {* ML system interfaces \label{sec:ml} *}
     5.9 +
    5.10 +text {*
    5.11 +  Since the code generator framework not only aims to provide
    5.12 +  a nice Isar interface but also to form a base for
    5.13 +  code-generation-based applications, here a short
    5.14 +  description of the most important ML interfaces.
    5.15 +*}
    5.16 +
    5.17 +subsection {* Executable theory content: @{text Code} *}
    5.18 +
    5.19 +text {*
    5.20 +  This Pure module implements the core notions of
    5.21 +  executable content of a theory.
    5.22 +*}
    5.23 +
    5.24 +subsubsection {* Managing executable content *}
    5.25 +
    5.26 +text %mlref {*
    5.27 +  \begin{mldecls}
    5.28 +  @{index_ML Code.add_eqn: "thm -> theory -> theory"} \\
    5.29 +  @{index_ML Code.del_eqn: "thm -> theory -> theory"} \\
    5.30 +  @{index_ML Code.add_eqnl: "string * (thm * bool) list Susp.T -> theory -> theory"} \\
    5.31 +  @{index_ML Code.map_pre: "(MetaSimplifier.simpset -> MetaSimplifier.simpset) -> theory -> theory"} \\
    5.32 +  @{index_ML Code.map_post: "(MetaSimplifier.simpset -> MetaSimplifier.simpset) -> theory -> theory"} \\
    5.33 +  @{index_ML Code.add_functrans: "string * (theory -> (thm * bool) list -> (thm * bool) list option)
    5.34 +    -> theory -> theory"} \\
    5.35 +  @{index_ML Code.del_functrans: "string -> theory -> theory"} \\
    5.36 +  @{index_ML Code.add_datatype: "(string * typ) list -> theory -> theory"} \\
    5.37 +  @{index_ML Code.get_datatype: "theory -> string
    5.38 +    -> (string * sort) list * (string * typ list) list"} \\
    5.39 +  @{index_ML Code.get_datatype_of_constr: "theory -> string -> string option"}
    5.40 +  \end{mldecls}
    5.41 +
    5.42 +  \begin{description}
    5.43 +
    5.44 +  \item @{ML Code.add_eqn}~@{text "thm"}~@{text "thy"} adds function
    5.45 +     theorem @{text "thm"} to executable content.
    5.46 +
    5.47 +  \item @{ML Code.del_eqn}~@{text "thm"}~@{text "thy"} removes function
    5.48 +     theorem @{text "thm"} from executable content, if present.
    5.49 +
    5.50 +  \item @{ML Code.add_eqnl}~@{text "(const, lthms)"}~@{text "thy"} adds
    5.51 +     suspended defining equations @{text lthms} for constant
    5.52 +     @{text const} to executable content.
    5.53 +
    5.54 +  \item @{ML Code.map_pre}~@{text "f"}~@{text "thy"} changes
    5.55 +     the preprocessor simpset.
    5.56 +
    5.57 +  \item @{ML Code.add_functrans}~@{text "(name, f)"}~@{text "thy"} adds
    5.58 +     function transformer @{text f} (named @{text name}) to executable content;
    5.59 +     @{text f} is a transformer of the defining equations belonging
    5.60 +     to a certain function definition, depending on the
    5.61 +     current theory context.  Returning @{text NONE} indicates that no
    5.62 +     transformation took place;  otherwise, the whole process will be iterated
    5.63 +     with the new defining equations.
    5.64 +
    5.65 +  \item @{ML Code.del_functrans}~@{text "name"}~@{text "thy"} removes
    5.66 +     function transformer named @{text name} from executable content.
    5.67 +
    5.68 +  \item @{ML Code.add_datatype}~@{text cs}~@{text thy} adds
    5.69 +     a datatype to executable content, with generation
    5.70 +     set @{text cs}.
    5.71 +
    5.72 +  \item @{ML Code.get_datatype_of_constr}~@{text "thy"}~@{text "const"}
    5.73 +     returns type constructor corresponding to
    5.74 +     constructor @{text const}; returns @{text NONE}
    5.75 +     if @{text const} is no constructor.
    5.76 +
    5.77 +  \end{description}
    5.78 +*}
    5.79 +
    5.80 +subsection {* Auxiliary *}
    5.81 +
    5.82 +text %mlref {*
    5.83 +  \begin{mldecls}
    5.84 +  @{index_ML Code_Unit.read_const: "theory -> string -> string"} \\
    5.85 +  @{index_ML Code_Unit.head_eqn: "theory -> thm -> string * ((string * sort) list * typ)"} \\
    5.86 +  @{index_ML Code_Unit.rewrite_eqn: "MetaSimplifier.simpset -> thm -> thm"} \\
    5.87 +  \end{mldecls}
    5.88 +
    5.89 +  \begin{description}
    5.90 +
    5.91 +  \item @{ML Code_Unit.read_const}~@{text thy}~@{text s}
    5.92 +     reads a constant as a concrete term expression @{text s}.
    5.93 +
    5.94 +  \item @{ML Code_Unit.head_eqn}~@{text thy}~@{text thm}
    5.95 +     extracts the constant and its type from a defining equation @{text thm}.
    5.96 +
    5.97 +  \item @{ML Code_Unit.rewrite_eqn}~@{text ss}~@{text thm}
    5.98 +     rewrites a defining equation @{text thm} with a simpset @{text ss};
    5.99 +     only arguments and right hand side are rewritten,
   5.100 +     not the head of the defining equation.
   5.101 +
   5.102 +  \end{description}
   5.103 +
   5.104 +*}
   5.105 +
   5.106 +subsection {* Implementing code generator applications *}
   5.107 +
   5.108 +text {*
   5.109 +  Implementing code generator applications on top
   5.110 +  of the framework set out so far usually not only
   5.111 +  involves using those primitive interfaces
   5.112 +  but also storing code-dependent data and various
   5.113 +  other things.
   5.114 +
   5.115 +  \begin{warn}
   5.116 +    Some interfaces discussed here have not reached
   5.117 +    a final state yet.
   5.118 +    Changes likely to occur in future.
   5.119 +  \end{warn}
   5.120 +*}
   5.121 +
   5.122 +subsubsection {* Data depending on the theory's executable content *}
   5.123 +
   5.124 +text {*
   5.125 +  Due to incrementality of code generation, changes in the
   5.126 +  theory's executable content have to be propagated in a
   5.127 +  certain fashion.  Additionally, such changes may occur
   5.128 +  not only during theory extension but also during theory
   5.129 +  merge, which is a little bit nasty from an implementation
   5.130 +  point of view.  The framework provides a solution
   5.131 +  to this technical challenge by providing a functorial
   5.132 +  data slot @{ML_functor CodeDataFun}; on instantiation
   5.133 +  of this functor, the following types and operations
   5.134 +  are required:
   5.135 +
   5.136 +  \medskip
   5.137 +  \begin{tabular}{l}
   5.138 +  @{text "type T"} \\
   5.139 +  @{text "val empty: T"} \\
   5.140 +  @{text "val purge: theory \<rightarrow> CodeUnit.const list option \<rightarrow> T \<rightarrow> T"}
   5.141 +  \end{tabular}
   5.142 +
   5.143 +  \begin{description}
   5.144 +
   5.145 +  \item @{text T} the type of data to store.
   5.146 +
   5.147 +  \item @{text empty} initial (empty) data.
   5.148 +
   5.149 +  \item @{text purge}~@{text thy}~@{text consts} propagates changes in executable content;
   5.150 +    @{text consts} indicates the kind
   5.151 +    of change: @{ML NONE} stands for a fundamental change
   5.152 +    which invalidates any existing code, @{text "SOME consts"}
   5.153 +    hints that executable content for constants @{text consts}
   5.154 +    has changed.
   5.155 +
   5.156 +  \end{description}
   5.157 +
   5.158 +  An instance of @{ML_functor CodeDataFun} provides the following
   5.159 +  interface:
   5.160 +
   5.161 +  \medskip
   5.162 +  \begin{tabular}{l}
   5.163 +  @{text "get: theory \<rightarrow> T"} \\
   5.164 +  @{text "change: theory \<rightarrow> (T \<rightarrow> T) \<rightarrow> T"} \\
   5.165 +  @{text "change_yield: theory \<rightarrow> (T \<rightarrow> 'a * T) \<rightarrow> 'a * T"}
   5.166 +  \end{tabular}
   5.167 +
   5.168 +  \begin{description}
   5.169 +
   5.170 +  \item @{text get} retrieval of the current data.
   5.171 +
   5.172 +  \item @{text change} update of current data (cached!)
   5.173 +    by giving a continuation.
   5.174 +
   5.175 +  \item @{text change_yield} update with side result.
   5.176 +
   5.177 +  \end{description}
   5.178 +*}
   5.179 +
   5.180 +text {*
   5.181 +  \emph{Happy proving, happy hacking!}
   5.182 +*}
   5.183  
   5.184  end
     6.1 --- a/doc-src/IsarAdvanced/Codegen/Thy/Program.thy	Tue Sep 30 04:06:55 2008 +0200
     6.2 +++ b/doc-src/IsarAdvanced/Codegen/Thy/Program.thy	Tue Sep 30 11:19:47 2008 +0200
     6.3 @@ -1,27 +1,469 @@
     6.4  theory Program
     6.5 -imports Setup
     6.6 +imports Introduction
     6.7  begin
     6.8  
     6.9 -section {* Turning Theories into Programs *}
    6.10 +section {* Turning Theories into Programs \label{sec:program} *}
    6.11  
    6.12  subsection {* The @{text "Isabelle/HOL"} default setup *}
    6.13  
    6.14 -text {* Invoking the code generator *}
    6.15 -
    6.16  subsection {* Selecting code equations *}
    6.17  
    6.18 -text {* inspection by @{text "code_thms"} *}
    6.19 +text {*
    6.20 +  We have already seen how by default equations stemming from
    6.21 +  @{command definition}/@{command primrec}/@{command fun}
    6.22 +  statements are used for code generation.  Deviating from this
    6.23 +  \emph{default} behaviour is always possible -- e.g.~we
    6.24 +  could provide an alternative @{text fun} for @{const dequeue} proving
    6.25 +  the following equations explicitly:
    6.26 +*}
    6.27 +
    6.28 +lemma %quoteme [code func]:
    6.29 +  "dequeue (Queue xs []) = (if xs = [] then (None, Queue [] []) else dequeue (Queue [] (rev xs)))"
    6.30 +  "dequeue (Queue xs (y # ys)) = (Some y, Queue xs ys)"
    6.31 +  by (cases xs, simp_all) (cases "rev xs", simp_all)
    6.32 +
    6.33 +text {*
    6.34 +  \noindent The annotation @{text "[code func]"} is an @{text Isar}
    6.35 +  @{text attribute} which states that the given theorems should be
    6.36 +  considered as defining equations for a @{text fun} statement --
    6.37 +  the corresponding constant is determined syntactically.  The resulting code:
    6.38 +*}
    6.39 +
    6.40 +text %quoteme {*@{code_stmts empty enqueue dequeue (Haskell)}*}
    6.41 +
    6.42 +text {*
    6.43 +  \noindent You may note that the equality test @{term "xs = []"} has been
    6.44 +  replaced by the predicate @{term "null xs"}.  This is due to the default
    6.45 +  setup in the \qn{preprocessor} to be discussed further below (\secref{sec:preproc}).
    6.46 +
    6.47 +  Changing the default constructor set of datatypes is also
    6.48 +  possible but rarely desired in practice.  See \secref{sec:datatypes} for an example.
    6.49 +
    6.50 +  As told in \secref{sec:concept}, code generation is based
    6.51 +  on a structured collection of code theorems.
    6.52 +  For explorative purpose, this collection
    6.53 +  may be inspected using the @{command code_thms} command:
    6.54 +*}
    6.55 +
    6.56 +code_thms %quoteme dequeue
    6.57 +
    6.58 +text {*
    6.59 +  \noindent prints a table with \emph{all} defining equations
    6.60 +  for @{const dequeue}, including
    6.61 +  \emph{all} defining equations those equations depend
    6.62 +  on recursively.
    6.63 +  
    6.64 +  Similarly, the @{command code_deps} command shows a graph
    6.65 +  visualising dependencies between defining equations.
    6.66 +*}
    6.67 +
    6.68 +subsection {* @{text class} and @{text instantiation} *}
    6.69 +
    6.70 +text {*
    6.71 +  Concerning type classes and code generation, let us again examine an example
    6.72 +  from abstract algebra:
    6.73 +*}
    6.74 +
    6.75 +class %quoteme semigroup = type +
    6.76 +  fixes mult :: "'a \<Rightarrow> 'a \<Rightarrow> 'a" (infixl "\<otimes>" 70)
    6.77 +  assumes assoc: "(x \<otimes> y) \<otimes> z = x \<otimes> (y \<otimes> z)"
    6.78 +
    6.79 +class %quoteme monoid = semigroup +
    6.80 +  fixes neutral :: 'a ("\<one>")
    6.81 +  assumes neutl: "\<one> \<otimes> x = x"
    6.82 +    and neutr: "x \<otimes> \<one> = x"
    6.83 +
    6.84 +instantiation %quoteme nat :: monoid
    6.85 +begin
    6.86 +
    6.87 +primrec %quoteme mult_nat where
    6.88 +    "0 \<otimes> n = (0\<Colon>nat)"
    6.89 +  | "Suc m \<otimes> n = n + m \<otimes> n"
    6.90 +
    6.91 +definition %quoteme neutral_nat where
    6.92 +  "\<one> = Suc 0"
    6.93  
    6.94 -subsection {* The preprocessor *}
    6.95 +lemma %quoteme add_mult_distrib:
    6.96 +  fixes n m q :: nat
    6.97 +  shows "(n + m) \<otimes> q = n \<otimes> q + m \<otimes> q"
    6.98 +  by (induct n) simp_all
    6.99 +
   6.100 +instance %quoteme proof
   6.101 +  fix m n q :: nat
   6.102 +  show "m \<otimes> n \<otimes> q = m \<otimes> (n \<otimes> q)"
   6.103 +    by (induct m) (simp_all add: add_mult_distrib)
   6.104 +  show "\<one> \<otimes> n = n"
   6.105 +    by (simp add: neutral_nat_def)
   6.106 +  show "m \<otimes> \<one> = m"
   6.107 +    by (induct m) (simp_all add: neutral_nat_def)
   6.108 +qed
   6.109 +
   6.110 +end %quoteme
   6.111 +
   6.112 +text {*
   6.113 +  \noindent We define the natural operation of the natural numbers
   6.114 +  on monoids:
   6.115 +*}
   6.116 +
   6.117 +primrec %quoteme pow :: "nat \<Rightarrow> 'a\<Colon>monoid \<Rightarrow> 'a\<Colon>monoid" where
   6.118 +    "pow 0 a = \<one>"
   6.119 +  | "pow (Suc n) a = a \<otimes> pow n a"
   6.120 +
   6.121 +text {*
   6.122 +  \noindent This we use to define the discrete exponentiation function:
   6.123 +*}
   6.124 +
   6.125 +definition %quoteme bexp :: "nat \<Rightarrow> nat" where
   6.126 +  "bexp n = pow n (Suc (Suc 0))"
   6.127 +
   6.128 +text {*
   6.129 +  \noindent The corresponding code:
   6.130 +*}
   6.131 +
   6.132 +text %quoteme {*@{code_stmts bexp (Haskell)}*}
   6.133 +
   6.134 +text {*
   6.135 +  \noindent An inspection reveals that the equations stemming from the
   6.136 +  @{text primrec} statement within instantiation of class
   6.137 +  @{class semigroup} with type @{typ nat} are mapped to a separate
   6.138 +  function declaration @{text mult_nat} which in turn is used
   6.139 +  to provide the right hand side for the @{text "instance Semigroup Nat"}
   6.140 +  \fixme[courier fuer code text, isastyle fuer class].  This perfectly
   6.141 +  agrees with the restriction that @{text inst} statements may
   6.142 +  only contain one single equation for each class class parameter
   6.143 +  The @{text instantiation} mechanism manages that for each
   6.144 +  overloaded constant @{text "f [\<kappa> \<^bvec>\<alpha>\<Colon>s\<^isub>k\<^evec>]"}
   6.145 +  a \emph{shadow constant} @{text "f\<^isub>\<kappa> [\<^bvec>\<alpha>\<Colon>s\<^isub>k\<^evec>]"} is
   6.146 +  declared satisfying @{text "f [\<kappa> \<^bvec>\<alpha>\<Colon>s\<^isub>k\<^evec>] \<equiv> f\<^isub>\<kappa> [\<^bvec>\<alpha>\<Colon>s\<^isub>k\<^evec>]"}.
   6.147 +  this equation is indeed the one used for the statement;
   6.148 +  using it as a rewrite rule, defining equations for 
   6.149 +  @{text "f [\<kappa> \<^bvec>\<alpha>\<Colon>s\<^isub>k\<^evec>]"} can be turned into
   6.150 +  defining equations for @{text "f\<^isub>\<kappa> [\<^bvec>\<alpha>\<Colon>s\<^isub>k\<^evec>]"}.  This
   6.151 +  happens transparently, providing the illusion that class parameters
   6.152 +  can be instantiated with more than one equation.
   6.153 +
   6.154 +  This is a convenient place to show how explicit dictionary construction
   6.155 +  manifests in generated code (here, the same example in @{text SML}):
   6.156 +*}
   6.157 +
   6.158 +text %quoteme {*@{code_stmts bexp (SML)}*}
   6.159 +
   6.160 +
   6.161 +subsection {* The preprocessor \label{sec:preproc} *}
   6.162 +
   6.163 +text {*
   6.164 +  Before selected function theorems are turned into abstract
   6.165 +  code, a chain of definitional transformation steps is carried
   6.166 +  out: \emph{preprocessing}.  In essence, the preprocessor
   6.167 +  consists of two components: a \emph{simpset} and \emph{function transformers}.
   6.168 +
   6.169 +  The \emph{simpset} allows to employ the full generality of the Isabelle
   6.170 +  simplifier.  Due to the interpretation of theorems
   6.171 +  as defining equations, rewrites are applied to the right
   6.172 +  hand side and the arguments of the left hand side of an
   6.173 +  equation, but never to the constant heading the left hand side.
   6.174 +  An important special case are \emph{inline theorems} which may be
   6.175 +  declared and undeclared using the
   6.176 +  \emph{code inline} or \emph{code inline del} attribute respectively.
   6.177  
   6.178 -subsection {* Datatypes *}
   6.179 +  Some common applications:
   6.180 +*}
   6.181 +
   6.182 +text_raw {*
   6.183 +  \begin{itemize}
   6.184 +*}
   6.185 +
   6.186 +text {*
   6.187 +     \item replacing non-executable constructs by executable ones:
   6.188 +*}     
   6.189 +
   6.190 +lemma %quoteme [code inline]:
   6.191 +  "x \<in> set xs \<longleftrightarrow> x mem xs" by (induct xs) simp_all
   6.192 +
   6.193 +text {*
   6.194 +     \item eliminating superfluous constants:
   6.195 +*}
   6.196 +
   6.197 +lemma %quoteme [code inline]:
   6.198 +  "1 = Suc 0" by simp
   6.199 +
   6.200 +text {*
   6.201 +     \item replacing executable but inconvenient constructs:
   6.202 +*}
   6.203 +
   6.204 +lemma %quoteme [code inline]:
   6.205 +  "xs = [] \<longleftrightarrow> List.null xs" by (induct xs) simp_all
   6.206 +
   6.207 +text_raw {*
   6.208 +  \end{itemize}
   6.209 +*}
   6.210 +
   6.211 +text {*
   6.212 +  \emph{Function transformers} provide a very general interface,
   6.213 +  transforming a list of function theorems to another
   6.214 +  list of function theorems, provided that neither the heading
   6.215 +  constant nor its type change.  The @{term "0\<Colon>nat"} / @{const Suc}
   6.216 +  pattern elimination implemented in
   6.217 +  theory @{text Efficient_Nat} (see \secref{eff_nat}) uses this
   6.218 +  interface.
   6.219 +
   6.220 +  \noindent The current setup of the preprocessor may be inspected using
   6.221 +  the @{command print_codesetup} command.
   6.222 +  @{command code_thms} provides a convenient
   6.223 +  mechanism to inspect the impact of a preprocessor setup
   6.224 +  on defining equations.
   6.225 +
   6.226 +  \begin{warn}
   6.227 +    The attribute \emph{code unfold}
   6.228 +    associated with the existing code generator also applies to
   6.229 +    the new one: \emph{code unfold} implies \emph{code inline}.
   6.230 +  \end{warn}
   6.231 +*}
   6.232 +
   6.233 +subsection {* Datatypes \label{sec:datatypes} *}
   6.234 +
   6.235 +text {*
   6.236 +  Conceptually, any datatype is spanned by a set of
   6.237 +  \emph{constructors} of type @{text "\<tau> = \<dots> \<Rightarrow> \<kappa> \<alpha>\<^isub>1 \<dots> \<alpha>\<^isub>n"}
   6.238 +  where @{text "{\<alpha>\<^isub>1, \<dots>, \<alpha>\<^isub>n}"} is excactly the set of \emph{all}
   6.239 +  type variables in @{text "\<tau>"}.  The HOL datatype package
   6.240 +  by default registers any new datatype in the table
   6.241 +  of datatypes, which may be inspected using
   6.242 +  the @{command print_codesetup} command.
   6.243 +
   6.244 +  In some cases, it may be convenient to alter or
   6.245 +  extend this table;  as an example, we will develop an alternative
   6.246 +  representation of natural numbers as binary digits, whose
   6.247 +  size does increase logarithmically with its value, not linear
   6.248 +  \footnote{Indeed, the @{theory Efficient_Nat} theory (see \ref{eff_nat})
   6.249 +    does something similar}.  First, the digit representation:
   6.250 +*}
   6.251 +
   6.252 +definition %quoteme Dig0 :: "nat \<Rightarrow> nat" where
   6.253 +  "Dig0 n = 2 * n"
   6.254 +
   6.255 +definition %quoteme  Dig1 :: "nat \<Rightarrow> nat" where
   6.256 +  "Dig1 n = Suc (2 * n)"
   6.257  
   6.258 -text {* example: @{text "rat"}, cases *}
   6.259 +text {*
   6.260 +  \noindent We will use these two ">digits"< to represent natural numbers
   6.261 +  in binary digits, e.g.:
   6.262 +*}
   6.263 +
   6.264 +lemma %quoteme 42: "42 = Dig0 (Dig1 (Dig0 (Dig1 (Dig0 1))))"
   6.265 +  by (simp add: Dig0_def Dig1_def)
   6.266 +
   6.267 +text {*
   6.268 +  \noindent Of course we also have to provide proper code equations for
   6.269 +  the operations, e.g. @{term "op + \<Colon> nat \<Rightarrow> nat \<Rightarrow> nat"}:
   6.270 +*}
   6.271 +
   6.272 +lemma %quoteme plus_Dig [code func]:
   6.273 +  "0 + n = n"
   6.274 +  "m + 0 = m"
   6.275 +  "1 + Dig0 n = Dig1 n"
   6.276 +  "Dig0 m + 1 = Dig1 m"
   6.277 +  "1 + Dig1 n = Dig0 (n + 1)"
   6.278 +  "Dig1 m + 1 = Dig0 (m + 1)"
   6.279 +  "Dig0 m + Dig0 n = Dig0 (m + n)"
   6.280 +  "Dig0 m + Dig1 n = Dig1 (m + n)"
   6.281 +  "Dig1 m + Dig0 n = Dig1 (m + n)"
   6.282 +  "Dig1 m + Dig1 n = Dig0 (m + n + 1)"
   6.283 +  by (simp_all add: Dig0_def Dig1_def)
   6.284 +
   6.285 +text {*
   6.286 +  \noindent We then instruct the code generator to view @{term "0\<Colon>nat"},
   6.287 +  @{term "1\<Colon>nat"}, @{term Dig0} and @{term Dig1} as
   6.288 +  datatype constructors:
   6.289 +*}
   6.290 +
   6.291 +code_datatype %quoteme "0\<Colon>nat" "1\<Colon>nat" Dig0 Dig1
   6.292 +
   6.293 +text {*
   6.294 +  \noindent For the former constructor @{term Suc}, we provide a code
   6.295 +  equation and remove some parts of the default code generator setup
   6.296 +  which are an obstacle here:
   6.297 +*}
   6.298 +
   6.299 +lemma %quoteme Suc_Dig [code func]:
   6.300 +  "Suc n = n + 1"
   6.301 +  by simp
   6.302 +
   6.303 +declare %quoteme One_nat_def [code inline del]
   6.304 +declare %quoteme add_Suc_shift [code func del] 
   6.305 +
   6.306 +text {*
   6.307 +  \noindent This yields the following code:
   6.308 +*}
   6.309 +
   6.310 +text %quoteme {*@{code_stmts "op + \<Colon> nat \<Rightarrow> nat \<Rightarrow> nat" (SML)}*}
   6.311 +
   6.312 +text {*
   6.313 +  \medskip
   6.314 +
   6.315 +  From this example, it can be easily glimpsed that using own constructor sets
   6.316 +  is a little delicate since it changes the set of valid patterns for values
   6.317 +  of that type.  Without going into much detail, here some practical hints:
   6.318 +
   6.319 +  \begin{itemize}
   6.320 +    \item When changing the constructor set for datatypes, take care to
   6.321 +      provide an alternative for the @{text case} combinator (e.g.~by replacing
   6.322 +      it using the preprocessor).
   6.323 +    \item Values in the target language need not to be normalised -- different
   6.324 +      values in the target language may represent the same value in the
   6.325 +      logic (e.g. @{term "Dig1 0 = 1"}).
   6.326 +    \item Usually, a good methodology to deal with the subtleties of pattern
   6.327 +      matching is to see the type as an abstract type: provide a set
   6.328 +      of operations which operate on the concrete representation of the type,
   6.329 +      and derive further operations by combinations of these primitive ones,
   6.330 +      without relying on a particular representation.
   6.331 +  \end{itemize}
   6.332 +*}
   6.333 +
   6.334 +code_datatype %invisible "0::nat" Suc
   6.335 +declare %invisible plus_Dig [code func del]
   6.336 +declare %invisible One_nat_def [code inline]
   6.337 +declare %invisible add_Suc_shift [code func] 
   6.338 +lemma %invisible [code func]: "0 + n = (n \<Colon> nat)" by simp
   6.339 +
   6.340  
   6.341  subsection {* Equality and wellsortedness *}
   6.342  
   6.343 +text {*
   6.344 +  Surely you have already noticed how equality is treated
   6.345 +  by the code generator:
   6.346 +*}
   6.347 +
   6.348 +primrec %quoteme
   6.349 +  collect_duplicates :: "'a list \<Rightarrow> 'a list \<Rightarrow> 'a list \<Rightarrow> 'a list" where
   6.350 +    "collect_duplicates xs ys [] = xs"
   6.351 +  | "collect_duplicates xs ys (z#zs) = (if z \<in> set xs
   6.352 +      then if z \<in> set ys
   6.353 +        then collect_duplicates xs ys zs
   6.354 +        else collect_duplicates xs (z#ys) zs
   6.355 +      else collect_duplicates (z#xs) (z#ys) zs)"
   6.356 +
   6.357 +text {*
   6.358 +  The membership test during preprocessing is rewritten,
   6.359 +  resulting in @{const List.member}, which itself
   6.360 +  performs an explicit equality check.
   6.361 +*}
   6.362 +
   6.363 +text %quoteme {*@{code_stmts collect_duplicates (SML)}*}
   6.364 +
   6.365 +text {*
   6.366 +  \noindent Obviously, polymorphic equality is implemented the Haskell
   6.367 +  way using a type class.  How is this achieved?  HOL introduces
   6.368 +  an explicit class @{class eq} with a corresponding operation
   6.369 +  @{const eq_class.eq} such that @{thm eq [no_vars]}.
   6.370 +  The preprocessing framework does the rest.
   6.371 +  For datatypes, instances of @{class eq} are implicitly derived
   6.372 +  when possible.  For other types, you may instantiate @{text eq}
   6.373 +  manually like any other type class.
   6.374 +
   6.375 +  Though this @{text eq} class is designed to get rarely in
   6.376 +  the way, a subtlety
   6.377 +  enters the stage when definitions of overloaded constants
   6.378 +  are dependent on operational equality.  For example, let
   6.379 +  us define a lexicographic ordering on tuples:
   6.380 +*}
   6.381 +
   6.382 +instantiation * :: (ord, ord) ord
   6.383 +begin
   6.384 +
   6.385 +definition
   6.386 +  [code func del]: "p1 < p2 \<longleftrightarrow> (let (x1, y1) = p1; (x2, y2) = p2 in
   6.387 +    x1 < x2 \<or> (x1 = x2 \<and> y1 < y2))"
   6.388 +
   6.389 +definition
   6.390 +  [code func del]: "p1 \<le> p2 \<longleftrightarrow> (let (x1, y1) = p1; (x2, y2) = p2 in
   6.391 +    x1 < x2 \<or> (x1 = x2 \<and> y1 \<le> y2))"
   6.392 +
   6.393 +instance ..
   6.394 +
   6.395 +end
   6.396 +
   6.397 +lemma ord_prod [code func(*<*), code func del(*>*)]:
   6.398 +  "(x1 \<Colon> 'a\<Colon>ord, y1 \<Colon> 'b\<Colon>ord) < (x2, y2) \<longleftrightarrow> x1 < x2 \<or> (x1 = x2 \<and> y1 < y2)"
   6.399 +  "(x1 \<Colon> 'a\<Colon>ord, y1 \<Colon> 'b\<Colon>ord) \<le> (x2, y2) \<longleftrightarrow> x1 < x2 \<or> (x1 = x2 \<and> y1 \<le> y2)"
   6.400 +  unfolding less_prod_def less_eq_prod_def by simp_all
   6.401 +
   6.402 +text {*
   6.403 +  Then code generation will fail.  Why?  The definition
   6.404 +  of @{term "op \<le>"} depends on equality on both arguments,
   6.405 +  which are polymorphic and impose an additional @{class eq}
   6.406 +  class constraint, thus violating the type discipline
   6.407 +  for class operations.
   6.408 +
   6.409 +  The solution is to add @{class eq} explicitly to the first sort arguments in the
   6.410 +  code theorems:
   6.411 +*}
   6.412 +
   6.413 +lemma ord_prod_code [code func]:
   6.414 +  "(x1 \<Colon> 'a\<Colon>{ord, eq}, y1 \<Colon> 'b\<Colon>ord) < (x2, y2) \<longleftrightarrow>
   6.415 +    x1 < x2 \<or> (x1 = x2 \<and> y1 < y2)"
   6.416 +  "(x1 \<Colon> 'a\<Colon>{ord, eq}, y1 \<Colon> 'b\<Colon>ord) \<le> (x2, y2) \<longleftrightarrow>
   6.417 +    x1 < x2 \<or> (x1 = x2 \<and> y1 \<le> y2)"
   6.418 +  unfolding ord_prod by rule+
   6.419 +
   6.420 +text {*
   6.421 +  \noindent Then code generation succeeds:
   6.422 +*}
   6.423 +
   6.424 +text %quoteme {*@{code_stmts "op \<le> \<Colon> 'a\<Colon>{eq, ord} \<times> 'b\<Colon>ord \<Rightarrow> 'a \<times> 'b \<Rightarrow> bool" (SML)}*}
   6.425 +
   6.426 +text {*
   6.427 +  In general, code theorems for overloaded constants may have more
   6.428 +  restrictive sort constraints than the underlying instance relation
   6.429 +  between class and type constructor as long as the whole system of
   6.430 +  constraints is coregular; code theorems violating coregularity
   6.431 +  are rejected immediately.  Consequently, it might be necessary
   6.432 +  to delete disturbing theorems in the code theorem table,
   6.433 +  as we have done here with the original definitions @{fact less_prod_def}
   6.434 +  and @{fact less_eq_prod_def}.
   6.435 +
   6.436 +  In some cases, the automatically derived defining equations
   6.437 +  for equality on a particular type may not be appropriate.
   6.438 +  As example, watch the following datatype representing
   6.439 +  monomorphic parametric types (where type constructors
   6.440 +  are referred to by natural numbers):
   6.441 +*}
   6.442 +
   6.443 +datatype %quoteme monotype = Mono nat "monotype list"
   6.444 +(*<*)
   6.445 +lemma monotype_eq:
   6.446 +  "Mono tyco1 typargs1 = Mono tyco2 typargs2 \<equiv> 
   6.447 +     tyco1 = tyco2 \<and> typargs1 = typargs2" by simp
   6.448 +(*>*)
   6.449 +
   6.450 +text {*
   6.451 +  Then code generation for SML would fail with a message
   6.452 +  that the generated code contains illegal mutual dependencies:
   6.453 +  the theorem @{thm monotype_eq [no_vars]} already requires the
   6.454 +  instance @{text "monotype \<Colon> eq"}, which itself requires
   6.455 +  @{thm monotype_eq [no_vars]};  Haskell has no problem with mutually
   6.456 +  recursive @{text instance} and @{text function} definitions,
   6.457 +  but the SML serializer does not support this.
   6.458 +
   6.459 +  In such cases, you have to provide you own equality equations
   6.460 +  involving auxiliary constants.  In our case,
   6.461 +  @{const [show_types] list_all2} can do the job:
   6.462 +*}
   6.463 +
   6.464 +lemma %quoteme monotype_eq_list_all2 [code func]:
   6.465 +  "eq_class.eq (Mono tyco1 typargs1) (Mono tyco2 typargs2) \<longleftrightarrow>
   6.466 +     tyco1 = tyco2 \<and> list_all2 eq_class.eq typargs1 typargs2"
   6.467 +  by (simp add: eq list_all2_eq [symmetric])
   6.468 +
   6.469 +text {*
   6.470 +  \noindent does not depend on instance @{text "monotype \<Colon> eq"}:
   6.471 +*}
   6.472 +
   6.473 +text %quoteme {*@{code_stmts "eq_class.eq :: monotype \<Rightarrow> monotype \<Rightarrow> bool" (SML)}*}
   6.474 +
   6.475 +
   6.476  subsection {* Partiality *}
   6.477  
   6.478 -text {* @{text "code_abort"}, examples: maps *}
   6.479 +text {* @{command "code_abort"}, examples: maps *}
   6.480  
   6.481  end
     7.1 --- a/doc-src/IsarAdvanced/Codegen/Thy/ROOT.ML	Tue Sep 30 04:06:55 2008 +0200
     7.2 +++ b/doc-src/IsarAdvanced/Codegen/Thy/ROOT.ML	Tue Sep 30 11:19:47 2008 +0200
     7.3 @@ -1,12 +1,11 @@
     7.4  
     7.5  (* $Id$ *)
     7.6  
     7.7 -use_thy "Codegen";
     7.8 -
     7.9 -(*no_document use_thy "Setup";
    7.10 +no_document use_thy "Setup";
    7.11 +no_document use_thys ["Efficient_Nat", "Code_Integer"];
    7.12  
    7.13  use_thy "Introduction";
    7.14  use_thy "Program";
    7.15  use_thy "Adaption";
    7.16  use_thy "Further";
    7.17 -use_thy "ML";*)
    7.18 +use_thy "ML";
     8.1 --- a/doc-src/IsarAdvanced/Codegen/Thy/Setup.thy	Tue Sep 30 04:06:55 2008 +0200
     8.2 +++ b/doc-src/IsarAdvanced/Codegen/Thy/Setup.thy	Tue Sep 30 11:19:47 2008 +0200
     8.3 @@ -1,8 +1,61 @@
     8.4  theory Setup
     8.5 -imports Main
     8.6 +imports Complex_Main
     8.7  uses "../../../antiquote_setup.ML"
     8.8  begin
     8.9  
    8.10 +ML {* no_document use_thys ["Efficient_Nat", "Code_Char_chr"] *}
    8.11 +
    8.12  ML_val {* Code_Target.code_width := 74 *}
    8.13  
    8.14 +ML {*
    8.15 +fun pr_class ctxt = Sign.extern_class (ProofContext.theory_of ctxt)
    8.16 +  o Sign.read_class (ProofContext.theory_of ctxt);
    8.17 +*}
    8.18 +
    8.19 +ML {*
    8.20 +val _ = ThyOutput.add_commands
    8.21 +  [("class", ThyOutput.args (Scan.lift Args.name) (K pr_class))]
    8.22 +*}
    8.23 +
    8.24 +ML {*
    8.25 +val verbatim_line = space_implode "\\verb,|," o map (enclose "\\verb|" "|") o space_explode "|";
    8.26 +val verbatim_lines = rev
    8.27 +  #> dropwhile (fn s => s = "")
    8.28 +  #> rev
    8.29 +  #> map verbatim_line #> space_implode "\\newline%\n"
    8.30 +  #> prefix "\\isaverbatim%\n\\noindent%\n"
    8.31 +*}
    8.32 +
    8.33 +ML {*
    8.34 +local
    8.35 +
    8.36 +  val parse_const_terms = Scan.repeat1 Args.term
    8.37 +    >> (fn ts => fn thy => map (Code_Unit.check_const thy) ts);
    8.38 +  val parse_consts = Scan.lift (Args.parens (Args.$$$ "consts")) |-- parse_const_terms
    8.39 +    >> (fn mk_cs => fn thy => map (Code_Name.const thy) (mk_cs thy));
    8.40 +  val parse_types = Scan.lift (Args.parens (Args.$$$ "types") |-- Scan.repeat1 Args.name)
    8.41 +    >> (fn tycos => fn thy => map (Code_Name.tyco thy o Sign.intern_type thy) tycos);
    8.42 +  val parse_classes = Scan.lift (Args.parens (Args.$$$ "classes") |-- Scan.repeat1 Args.name)
    8.43 +    >> (fn classes => fn thy => map (Code_Name.class thy o Sign.intern_class thy) classes);
    8.44 +  val parse_instances = Scan.lift (Args.parens (Args.$$$ "instances") |-- Scan.repeat1 (Args.name --| Args.$$$ "::" -- Args.name))
    8.45 +    >> (fn insts => fn thy => map (Code_Name.instance thy o apsnd (Sign.intern_type thy) o apfst (Sign.intern_class thy) o swap) insts);
    8.46 +  val parse_names = parse_consts || parse_types || parse_classes || parse_instances; 
    8.47 +
    8.48 +  fun code_stmts src ctxt ((mk_cs, mk_stmtss), target) =
    8.49 +    Code_Target.code_of (ProofContext.theory_of ctxt)
    8.50 +      target "Example" (mk_cs (ProofContext.theory_of ctxt))
    8.51 +        (maps (fn f => f (ProofContext.theory_of ctxt)) mk_stmtss)
    8.52 +    |> split_lines
    8.53 +    |> verbatim_lines;
    8.54 +
    8.55 +in
    8.56 +
    8.57 +val _ = ThyOutput.add_commands
    8.58 +  [("code_stmts", ThyOutput.args
    8.59 +      (parse_const_terms -- Scan.repeat parse_names -- Scan.lift (Args.parens Args.name))
    8.60 +        code_stmts)]
    8.61 +
    8.62  end
    8.63 +*}
    8.64 +
    8.65 +end
     9.1 --- a/doc-src/IsarAdvanced/Codegen/codegen.tex	Tue Sep 30 04:06:55 2008 +0200
     9.2 +++ b/doc-src/IsarAdvanced/Codegen/codegen.tex	Tue Sep 30 11:19:47 2008 +0200
     9.3 @@ -10,47 +10,15 @@
     9.4  \usepackage{style}
     9.5  \usepackage{../../pdfsetup}
     9.6  
     9.7 -\newcommand{\cmd}[1]{\isacommand{#1}}
     9.8 +\makeatletter
     9.9  
    9.10 -\newcommand{\isasymINFIX}{\cmd{infix}}
    9.11 -\newcommand{\isasymLOCALE}{\cmd{locale}}
    9.12 -\newcommand{\isasymINCLUDES}{\cmd{includes}}
    9.13 -\newcommand{\isasymDATATYPE}{\cmd{datatype}}
    9.14 -\newcommand{\isasymAXCLASS}{\cmd{axclass}}
    9.15 -\newcommand{\isasymFIXES}{\cmd{fixes}}
    9.16 -\newcommand{\isasymASSUMES}{\cmd{assumes}}
    9.17 -\newcommand{\isasymDEFINES}{\cmd{defines}}
    9.18 -\newcommand{\isasymNOTES}{\cmd{notes}}
    9.19 -\newcommand{\isasymSHOWS}{\cmd{shows}}
    9.20 -\newcommand{\isasymCLASS}{\cmd{class}}
    9.21 -\newcommand{\isasymINSTANCE}{\cmd{instance}}
    9.22 -\newcommand{\isasymLEMMA}{\cmd{lemma}}
    9.23 -\newcommand{\isasymPROOF}{\cmd{proof}}
    9.24 -\newcommand{\isasymQED}{\cmd{qed}}
    9.25 -\newcommand{\isasymFIX}{\cmd{fix}}
    9.26 -\newcommand{\isasymASSUME}{\cmd{assume}}
    9.27 -\newcommand{\isasymSHOW}{\cmd{show}}
    9.28 -\newcommand{\isasymNOTE}{\cmd{note}}
    9.29 -\newcommand{\isasymEXPORTCODE}{\cmd{export\_code}}
    9.30 -\newcommand{\isasymCODEDATATYPE}{\cmd{code\_datatype}}
    9.31 -\newcommand{\isasymCODECONST}{\cmd{code\_const}}
    9.32 -\newcommand{\isasymCODETYPE}{\cmd{code\_type}}
    9.33 -\newcommand{\isasymCODECLASS}{\cmd{code\_class}}
    9.34 -\newcommand{\isasymCODEINSTANCE}{\cmd{code\_instance}}
    9.35 -\newcommand{\isasymCODERESERVED}{\cmd{code\_reserved}}
    9.36 -\newcommand{\isasymCODEMODULENAME}{\cmd{code\_modulename}}
    9.37 -\newcommand{\isasymCODEMODULEPROLOG}{\cmd{code\_moduleprolog}}
    9.38 -\newcommand{\isasymCODEAXIOMS}{\cmd{code\_axioms}}
    9.39 -\newcommand{\isasymCODEABSTYPE}{\cmd{code\_abstype}}
    9.40 -\newcommand{\isasymPRINTCODESETUP}{\cmd{print\_codesetup}}
    9.41 -\newcommand{\isasymPRINTCLASSES}{\cmd{print\_classes}}
    9.42 -\newcommand{\isasymCODETHMS}{\cmd{code\_thms}}
    9.43 -\newcommand{\isasymCODEDEPS}{\cmd{code\_deps}}
    9.44 -\newcommand{\isasymFUN}{\cmd{fun}}
    9.45 -\newcommand{\isasymFUNCTION}{\cmd{function}}
    9.46 -\newcommand{\isasymPRIMREC}{\cmd{primrec}}
    9.47 -\newcommand{\isasymCONSTDEFS}{\cmd{constdefs}}
    9.48 -\newcommand{\isasymRECDEF}{\cmd{recdef}}
    9.49 +\newcommand{\isaverbatim}{\renewcommand{\baselinestretch}{1}\setlength{\baselineskip}{8pt}\fontsize{8pt}{0pt}}
    9.50 +\isakeeptag{quoteme}
    9.51 +\newenvironment{quoteme}{\begin{quote}\isa@parindent\parindent\parindent0pt\isa@parskip\parskip\parskip0pt}{\end{quote}}
    9.52 +\renewcommand{\isatagquoteme}{\begin{quoteme}}
    9.53 +\renewcommand{\endisatagquoteme}{\end{quoteme}}
    9.54 +
    9.55 +\makeatother
    9.56  
    9.57  \isakeeptag{tt}
    9.58  \renewcommand{\isatagtt}{\isabellestyle{tt}\isastyle}
    9.59 @@ -62,10 +30,6 @@
    9.60  \newcommand{\strong}[1]{{\bfseries #1}}
    9.61  \newcommand{\fixme}[1][!]{\strong{FIXME: #1}}
    9.62  
    9.63 -\lstset{basicstyle=\scriptsize\ttfamily,keywordstyle=\itshape,commentstyle=\itshape\sffamily,frame=single}
    9.64 -\newcommand{\lstsml}[1]{\lstset{language=ML}\lstinputlisting{#1}}
    9.65 -\newcommand{\lsthaskell}[1]{\lstset{language=Haskell}\lstinputlisting{#1}}
    9.66 -
    9.67  \hyphenation{Isabelle}
    9.68  \hyphenation{Isar}
    9.69  
    9.70 @@ -90,12 +54,11 @@
    9.71  \pagenumbering{roman}
    9.72  \clearfirst
    9.73  
    9.74 -\input{Thy/document/Codegen.tex}
    9.75 -% \input{Thy/document/Introduction.tex}
    9.76 -% \input{Thy/document/Program.tex}
    9.77 -% \input{Thy/document/Adaption.tex}
    9.78 -% \input{Thy/document/Further.tex}
    9.79 -% \input{Thy/document/ML.tex}
    9.80 +\input{Thy/document/Introduction.tex}
    9.81 +\input{Thy/document/Program.tex}
    9.82 +\input{Thy/document/Adaption.tex}
    9.83 +\input{Thy/document/Further.tex}
    9.84 +\input{Thy/document/ML.tex}
    9.85  
    9.86  \begingroup
    9.87  %\tocentry{\bibname}