diff -r 9965099f51ad -r b9238cbcdd41 doc-src/Codegen/Adaptation.thy
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc-src/Codegen/Adaptation.thy	Mon Aug 27 23:00:38 2012 +0200
@@ -0,0 +1,347 @@
+theory Adaptation
+imports Setup
+begin
+
+setup %invisible {* Code_Target.extend_target ("\<SML>", ("SML", K I))
+  #> Code_Target.extend_target ("\<SMLdummy>", ("Haskell", K I)) *}
+
+section {* Adaptation to target languages \label{sec:adaptation} *}
+
+subsection {* Adapting code generation *}
+
+text {*
+  The aspects of code generation introduced so far have two aspects
+  in common:
+
+  \begin{itemize}
+
+    \item They act uniformly, without reference to a specific target
+       language.
+
+    \item They are \emph{safe} in the sense that as long as you trust
+       the code generator meta theory and implementation, you cannot
+       produce programs that yield results which are not derivable in
+       the logic.
+
+  \end{itemize}
+
+  \noindent In this section we will introduce means to \emph{adapt}
+  the serialiser to a specific target language, i.e.~to print program
+  fragments in a way which accommodates \qt{already existing}
+  ingredients of a target language environment, for three reasons:
+
+  \begin{itemize}
+    \item improving readability and aesthetics of generated code
+    \item gaining efficiency
+    \item interface with language parts which have no direct counterpart
+      in @{text "HOL"} (say, imperative data structures)
+  \end{itemize}
+
+  \noindent Generally, you should avoid using those features yourself
+  \emph{at any cost}:
+
+  \begin{itemize}
+
+    \item The safe configuration methods act uniformly on every target
+      language, whereas for adaptation you have to treat each target
+      language separately.
+
+    \item Application is extremely tedious since there is no
+      abstraction which would allow for a static check, making it easy
+      to produce garbage.
+
+    \item Subtle errors can be introduced unconsciously.
+
+  \end{itemize}
+
+  \noindent However, even if you ought refrain from setting up
+  adaptation yourself, already the @{text "HOL"} comes with some
+  reasonable default adaptations (say, using target language list
+  syntax).  There also some common adaptation cases which you can
+  setup by importing particular library theories.  In order to
+  understand these, we provide some clues here; these however are not
+  supposed to replace a careful study of the sources.
+*}
+
+
+subsection {* The adaptation principle *}
+
+text {*
+  Figure \ref{fig:adaptation} illustrates what \qt{adaptation} is
+  conceptually supposed to be:
+
+  \begin{figure}[here]
+    \includegraphics{adaptation}
+    \caption{The adaptation principle}
+    \label{fig:adaptation}
+  \end{figure}
+
+  \noindent In the tame view, code generation acts as broker between
+  @{text logic}, @{text "intermediate language"} and @{text "target
+  language"} by means of @{text translation} and @{text
+  serialisation}; for the latter, the serialiser has to observe the
+  structure of the @{text language} itself plus some @{text reserved}
+  keywords which have to be avoided for generated code.  However, if
+  you consider @{text adaptation} mechanisms, the code generated by
+  the serializer is just the tip of the iceberg:
+
+  \begin{itemize}
+
+    \item @{text serialisation} can be \emph{parametrised} such that
+      logical entities are mapped to target-specific ones
+      (e.g. target-specific list syntax, see also
+      \secref{sec:adaptation_mechanisms})
+
+    \item Such parametrisations can involve references to a
+      target-specific standard @{text library} (e.g. using the @{text
+      Haskell} @{verbatim Maybe} type instead of the @{text HOL}
+      @{type "option"} type); if such are used, the corresponding
+      identifiers (in our example, @{verbatim Maybe}, @{verbatim
+      Nothing} and @{verbatim Just}) also have to be considered @{text
+      reserved}.
+
+    \item Even more, the user can enrich the library of the
+      target-language by providing code snippets (\qt{@{text
+      "includes"}}) which are prepended to any generated code (see
+      \secref{sec:include}); this typically also involves further
+      @{text reserved} identifiers.
+
+  \end{itemize}
+
+  \noindent As figure \ref{fig:adaptation} illustrates, all these
+  adaptation mechanisms have to act consistently; it is at the
+  discretion of the user to take care for this.
+*}
+
+subsection {* Common adaptation patterns *}
+
+text {*
+  The @{theory HOL} @{theory Main} theory already provides a code
+  generator setup which should be suitable for most applications.
+  Common extensions and modifications are available by certain
+  theories of the @{text HOL} library; beside being useful in
+  applications, they may serve as a tutorial for customising the code
+  generator setup (see below \secref{sec:adaptation_mechanisms}).
+
+  \begin{description}
+
+    \item[@{text "Code_Integer"}] represents @{text HOL} integers by
+       big integer literals in target languages.
+
+    \item[@{text "Code_Char"}] represents @{text HOL} characters by
+       character literals in target languages.
+
+    \item[@{text "Code_Char_chr"}] like @{text "Code_Char"}, but
+       also offers treatment of character codes; includes @{text
+       "Code_Char"}.
+
+    \item[@{text "Efficient_Nat"}] \label{eff_nat} implements
+       natural numbers by integers, which in general will result in
+       higher efficiency; pattern matching with @{term "0\<Colon>nat"} /
+       @{const "Suc"} is eliminated; includes @{text "Code_Integer"}
+       and @{text "Code_Numeral"}.
+
+    \item[@{theory "Code_Numeral"}] provides an additional datatype
+       @{typ index} which is mapped to target-language built-in
+       integers.  Useful for code setups which involve e.g.~indexing
+       of target-language arrays.  Part of @{text "HOL-Main"}.
+
+    \item[@{theory "String"}] provides an additional datatype @{typ
+       String.literal} which is isomorphic to strings; @{typ
+       String.literal}s are mapped to target-language strings.  Useful
+       for code setups which involve e.g.~printing (error) messages.
+       Part of @{text "HOL-Main"}.
+
+  \end{description}
+
+  \begin{warn}
+    When importing any of those theories which are not part of
+    @{text "HOL-Main"}, they should form the last
+    items in an import list.  Since these theories adapt the code
+    generator setup in a non-conservative fashion, strange effects may
+    occur otherwise.
+  \end{warn}
+*}
+
+
+subsection {* Parametrising serialisation \label{sec:adaptation_mechanisms} *}
+
+text {*
+  Consider the following function and its corresponding SML code:
+*}
+
+primrec %quote in_interval :: "nat \<times> nat \<Rightarrow> nat \<Rightarrow> bool" where
+  "in_interval (k, l) n \<longleftrightarrow> k \<le> n \<and> n \<le> l"
+(*<*)
+code_type %invisible bool
+  (SML)
+code_const %invisible True and False and "op \<and>" and Not
+  (SML and and and)
+(*>*)
+text %quotetypewriter {*
+  @{code_stmts in_interval (SML)}
+*}
+
+text {*
+  \noindent Though this is correct code, it is a little bit
+  unsatisfactory: boolean values and operators are materialised as
+  distinguished entities with have nothing to do with the SML-built-in
+  notion of \qt{bool}.  This results in less readable code;
+  additionally, eager evaluation may cause programs to loop or break
+  which would perfectly terminate when the existing SML @{verbatim
+  "bool"} would be used.  To map the HOL @{typ bool} on SML @{verbatim
+  "bool"}, we may use \qn{custom serialisations}:
+*}
+
+code_type %quotett bool
+  (SML "bool")
+code_const %quotett True and False and "op \<and>"
+  (SML "true" and "false" and "_ andalso _")
+
+text {*
+  \noindent The @{command_def code_type} command takes a type constructor
+  as arguments together with a list of custom serialisations.  Each
+  custom serialisation starts with a target language identifier
+  followed by an expression, which during code serialisation is
+  inserted whenever the type constructor would occur.  For constants,
+  @{command_def code_const} implements the corresponding mechanism.  Each
+  ``@{verbatim "_"}'' in a serialisation expression is treated as a
+  placeholder for the type constructor's (the constant's) arguments.
+*}
+
+text %quotetypewriter {*
+  @{code_stmts in_interval (SML)}
+*}
+
+text {*
+  \noindent This still is not perfect: the parentheses around the
+  \qt{andalso} expression are superfluous.  Though the serialiser by
+  no means attempts to imitate the rich Isabelle syntax framework, it
+  provides some common idioms, notably associative infixes with
+  precedences which may be used here:
+*}
+
+code_const %quotett "op \<and>"
+  (SML infixl 1 "andalso")
+
+text %quotetypewriter {*
+  @{code_stmts in_interval (SML)}
+*}
+
+text {*
+  \noindent The attentive reader may ask how we assert that no
+  generated code will accidentally overwrite.  For this reason the
+  serialiser has an internal table of identifiers which have to be
+  avoided to be used for new declarations.  Initially, this table
+  typically contains the keywords of the target language.  It can be
+  extended manually, thus avoiding accidental overwrites, using the
+  @{command_def "code_reserved"} command:
+*}
+
+code_reserved %quote "\<SMLdummy>" bool true false andalso
+
+text {*
+  \noindent Next, we try to map HOL pairs to SML pairs, using the
+  infix ``@{verbatim "*"}'' type constructor and parentheses:
+*}
+(*<*)
+code_type %invisible prod
+  (SML)
+code_const %invisible Pair
+  (SML)
+(*>*)
+code_type %quotett prod
+  (SML infix 2 "*")
+code_const %quotett Pair
+  (SML "!((_),/ (_))")
+
+text {*
+  \noindent The initial bang ``@{verbatim "!"}'' tells the serialiser
+  never to put parentheses around the whole expression (they are
+  already present), while the parentheses around argument place
+  holders tell not to put parentheses around the arguments.  The slash
+  ``@{verbatim "/"}'' (followed by arbitrary white space) inserts a
+  space which may be used as a break if necessary during pretty
+  printing.
+
+  These examples give a glimpse what mechanisms custom serialisations
+  provide; however their usage requires careful thinking in order not
+  to introduce inconsistencies -- or, in other words: custom
+  serialisations are completely axiomatic.
+
+  A further noteworthy detail is that any special character in a
+  custom serialisation may be quoted using ``@{verbatim "'"}''; thus,
+  in ``@{verbatim "fn '_ => _"}'' the first ``@{verbatim "_"}'' is a
+  proper underscore while the second ``@{verbatim "_"}'' is a
+  placeholder.
+*}
+
+
+subsection {* @{text Haskell} serialisation *}
+
+text {*
+  For convenience, the default @{text HOL} setup for @{text Haskell}
+  maps the @{class equal} class to its counterpart in @{text Haskell},
+  giving custom serialisations for the class @{class equal} (by command
+  @{command_def code_class}) and its operation @{const [source] HOL.equal}
+*}
+
+code_class %quotett equal
+  (Haskell "Eq")
+
+code_const %quotett "HOL.equal"
+  (Haskell infixl 4 "==")
+
+text {*
+  \noindent A problem now occurs whenever a type which is an instance
+  of @{class equal} in @{text HOL} is mapped on a @{text
+  Haskell}-built-in type which is also an instance of @{text Haskell}
+  @{text Eq}:
+*}
+
+typedecl %quote bar
+
+instantiation %quote bar :: equal
+begin
+
+definition %quote "HOL.equal (x\<Colon>bar) y \<longleftrightarrow> x = y"
+
+instance %quote by default (simp add: equal_bar_def)
+
+end %quote (*<*)
+
+(*>*) code_type %quotett bar
+  (Haskell "Integer")
+
+text {*
+  \noindent The code generator would produce an additional instance,
+  which of course is rejected by the @{text Haskell} compiler.  To
+  suppress this additional instance, use @{command_def "code_instance"}:
+*}
+
+code_instance %quotett bar :: equal
+  (Haskell -)
+
+
+subsection {* Enhancing the target language context \label{sec:include} *}
+
+text {*
+  In rare cases it is necessary to \emph{enrich} the context of a
+  target language; this is accomplished using the @{command_def
+  "code_include"} command:
+*}
+
+code_include %quotett Haskell "Errno"
+{*errno i = error ("Error number: " ++ show i)*}
+
+code_reserved %quotett Haskell Errno
+
+text {*
+  \noindent Such named @{text include}s are then prepended to every
+  generated code.  Inspect such code in order to find out how
+  @{command "code_include"} behaves with respect to a particular
+  target language.
+*}
+
+end
+