doc-src/Codegen/Thy/Adaptation.thy
author haftmann
Thu, 02 Sep 2010 16:41:41 +0200
changeset 39063 5f9692dd621f
parent 38506 03d767575713
child 39643 29cc021398fc
permissions -rw-r--r--
adapted to change eq -> equal

theory Adaptation
imports Setup
begin

setup %invisible {* Code_Target.extend_target ("\<SML>", ("SML", K I)) *}

section {* Adaptation to target languages \label{sec:adaptation} *}

subsection {* Adapting code generation *}

text {*
  The aspects of code generation introduced so far have two aspects
  in common:

  \begin{itemize}

    \item They act uniformly, without reference to a specific target
       language.

    \item They are \emph{safe} in the sense that as long as you trust
       the code generator meta theory and implementation, you cannot
       produce programs that yield results which are not derivable in
       the logic.

  \end{itemize}

  \noindent In this section we will introduce means to \emph{adapt}
  the serialiser to a specific target language, i.e.~to print program
  fragments in a way which accommodates \qt{already existing}
  ingredients of a target language environment, for three reasons:

  \begin{itemize}
    \item improving readability and aesthetics of generated code
    \item gaining efficiency
    \item interface with language parts which have no direct counterpart
      in @{text "HOL"} (say, imperative data structures)
  \end{itemize}

  \noindent Generally, you should avoid using those features yourself
  \emph{at any cost}:

  \begin{itemize}

    \item The safe configuration methods act uniformly on every target
      language, whereas for adaptation you have to treat each target
      language separately.

    \item Application is extremely tedious since there is no
      abstraction which would allow for a static check, making it easy
      to produce garbage.

    \item Subtle errors can be introduced unconsciously.

  \end{itemize}

  \noindent However, even if you ought refrain from setting up
  adaptation yourself, already the @{text "HOL"} comes with some
  reasonable default adaptations (say, using target language list
  syntax).  There also some common adaptation cases which you can
  setup by importing particular library theories.  In order to
  understand these, we provide some clues here; these however are not
  supposed to replace a careful study of the sources.
*}


subsection {* The adaptation principle *}

text {*
  Figure \ref{fig:adaptation} illustrates what \qt{adaptation} is
  conceptually supposed to be:

  \begin{figure}[here]
    \includegraphics{adaptation}
    \caption{The adaptation principle}
    \label{fig:adaptation}
  \end{figure}

  \noindent In the tame view, code generation acts as broker between
  @{text logic}, @{text "intermediate language"} and @{text "target
  language"} by means of @{text translation} and @{text
  serialisation}; for the latter, the serialiser has to observe the
  structure of the @{text language} itself plus some @{text reserved}
  keywords which have to be avoided for generated code.  However, if
  you consider @{text adaptation} mechanisms, the code generated by
  the serializer is just the tip of the iceberg:

  \begin{itemize}

    \item @{text serialisation} can be \emph{parametrised} such that
      logical entities are mapped to target-specific ones
      (e.g. target-specific list syntax, see also
      \secref{sec:adaptation_mechanisms})

    \item Such parametrisations can involve references to a
      target-specific standard @{text library} (e.g. using the @{text
      Haskell} @{verbatim Maybe} type instead of the @{text HOL}
      @{type "option"} type); if such are used, the corresponding
      identifiers (in our example, @{verbatim Maybe}, @{verbatim
      Nothing} and @{verbatim Just}) also have to be considered @{text
      reserved}.

    \item Even more, the user can enrich the library of the
      target-language by providing code snippets (\qt{@{text
      "includes"}}) which are prepended to any generated code (see
      \secref{sec:include}); this typically also involves further
      @{text reserved} identifiers.

  \end{itemize}

  \noindent As figure \ref{fig:adaptation} illustrates, all these
  adaptation mechanisms have to act consistently; it is at the
  discretion of the user to take care for this.
*}

subsection {* Common adaptation patterns *}

text {*
  The @{theory HOL} @{theory Main} theory already provides a code
  generator setup which should be suitable for most applications.
  Common extensions and modifications are available by certain
  theories of the @{text HOL} library; beside being useful in
  applications, they may serve as a tutorial for customising the code
  generator setup (see below \secref{sec:adaptation_mechanisms}).

  \begin{description}

    \item[@{theory "Code_Integer"}] represents @{text HOL} integers by
       big integer literals in target languages.

    \item[@{theory "Code_Char"}] represents @{text HOL} characters by
       character literals in target languages.

    \item[@{theory "Code_Char_chr"}] like @{text "Code_Char"}, but
       also offers treatment of character codes; includes @{theory
       "Code_Char"}.

    \item[@{theory "Efficient_Nat"}] \label{eff_nat} implements
       natural numbers by integers, which in general will result in
       higher efficiency; pattern matching with @{term "0\<Colon>nat"} /
       @{const "Suc"} is eliminated; includes @{theory "Code_Integer"}
       and @{theory "Code_Numeral"}.

    \item[@{theory "Code_Numeral"}] provides an additional datatype
       @{typ index} which is mapped to target-language built-in
       integers.  Useful for code setups which involve e.g.~indexing
       of target-language arrays.

    \item[@{theory "String"}] provides an additional datatype @{typ
       String.literal} which is isomorphic to strings; @{typ
       String.literal}s are mapped to target-language strings.  Useful
       for code setups which involve e.g.~printing (error) messages.

  \end{description}

  \begin{warn}
    When importing any of these theories, they should form the last
    items in an import list.  Since these theories adapt the code
    generator setup in a non-conservative fashion, strange effects may
    occur otherwise.
  \end{warn}
*}


subsection {* Parametrising serialisation \label{sec:adaptation_mechanisms} *}

text {*
  Consider the following function and its corresponding SML code:
*}

primrec %quote in_interval :: "nat \<times> nat \<Rightarrow> nat \<Rightarrow> bool" where
  "in_interval (k, l) n \<longleftrightarrow> k \<le> n \<and> n \<le> l"
(*<*)
code_type %invisible bool
  (SML)
code_const %invisible True and False and "op \<and>" and Not
  (SML and and and)
(*>*)
text %quote {*@{code_stmts in_interval (SML)}*}

text {*
  \noindent Though this is correct code, it is a little bit
  unsatisfactory: boolean values and operators are materialised as
  distinguished entities with have nothing to do with the SML-built-in
  notion of \qt{bool}.  This results in less readable code;
  additionally, eager evaluation may cause programs to loop or break
  which would perfectly terminate when the existing SML @{verbatim
  "bool"} would be used.  To map the HOL @{typ bool} on SML @{verbatim
  "bool"}, we may use \qn{custom serialisations}:
*}

code_type %quotett bool
  (SML "bool")
code_const %quotett True and False and "op \<and>"
  (SML "true" and "false" and "_ andalso _")

text {*
  \noindent The @{command_def code_type} command takes a type constructor
  as arguments together with a list of custom serialisations.  Each
  custom serialisation starts with a target language identifier
  followed by an expression, which during code serialisation is
  inserted whenever the type constructor would occur.  For constants,
  @{command_def code_const} implements the corresponding mechanism.  Each
  ``@{verbatim "_"}'' in a serialisation expression is treated as a
  placeholder for the type constructor's (the constant's) arguments.
*}

text %quote {*@{code_stmts in_interval (SML)}*}

text {*
  \noindent This still is not perfect: the parentheses around the
  \qt{andalso} expression are superfluous.  Though the serialiser by
  no means attempts to imitate the rich Isabelle syntax framework, it
  provides some common idioms, notably associative infixes with
  precedences which may be used here:
*}

code_const %quotett "op \<and>"
  (SML infixl 1 "andalso")

text %quote {*@{code_stmts in_interval (SML)}*}

text {*
  \noindent The attentive reader may ask how we assert that no
  generated code will accidentally overwrite.  For this reason the
  serialiser has an internal table of identifiers which have to be
  avoided to be used for new declarations.  Initially, this table
  typically contains the keywords of the target language.  It can be
  extended manually, thus avoiding accidental overwrites, using the
  @{command_def "code_reserved"} command:
*}

code_reserved %quote "\<SML>" bool true false andalso

text {*
  \noindent Next, we try to map HOL pairs to SML pairs, using the
  infix ``@{verbatim "*"}'' type constructor and parentheses:
*}
(*<*)
code_type %invisible prod
  (SML)
code_const %invisible Pair
  (SML)
(*>*)
code_type %quotett prod
  (SML infix 2 "*")
code_const %quotett Pair
  (SML "!((_),/ (_))")

text {*
  \noindent The initial bang ``@{verbatim "!"}'' tells the serialiser
  never to put parentheses around the whole expression (they are
  already present), while the parentheses around argument place
  holders tell not to put parentheses around the arguments.  The slash
  ``@{verbatim "/"}'' (followed by arbitrary white space) inserts a
  space which may be used as a break if necessary during pretty
  printing.

  These examples give a glimpse what mechanisms custom serialisations
  provide; however their usage requires careful thinking in order not
  to introduce inconsistencies -- or, in other words: custom
  serialisations are completely axiomatic.

  A further noteworthy details is that any special character in a
  custom serialisation may be quoted using ``@{verbatim "'"}''; thus,
  in ``@{verbatim "fn '_ => _"}'' the first ``@{verbatim "_"}'' is a
  proper underscore while the second ``@{verbatim "_"}'' is a
  placeholder.
*}


subsection {* @{text Haskell} serialisation *}

text {*
  For convenience, the default @{text HOL} setup for @{text Haskell}
  maps the @{class equal} class to its counterpart in @{text Haskell},
  giving custom serialisations for the class @{class equal} (by command
  @{command_def code_class}) and its operation @{const HOL.equal}
*}

code_class %quotett equal
  (Haskell "Eq")

code_const %quotett "op ="
  (Haskell infixl 4 "==")

text {*
  \noindent A problem now occurs whenever a type which is an instance
  of @{class equal} in @{text HOL} is mapped on a @{text
  Haskell}-built-in type which is also an instance of @{text Haskell}
  @{text Eq}:
*}

typedecl %quote bar

instantiation %quote bar :: equal
begin

definition %quote "HOL.equal (x\<Colon>bar) y \<longleftrightarrow> x = y"

instance %quote by default (simp add: equal_bar_def)

end %quote (*<*)

(*>*) code_type %quotett bar
  (Haskell "Integer")

text {*
  \noindent The code generator would produce an additional instance,
  which of course is rejected by the @{text Haskell} compiler.  To
  suppress this additional instance, use @{command_def "code_instance"}:
*}

code_instance %quotett bar :: equal
  (Haskell -)


subsection {* Enhancing the target language context \label{sec:include} *}

text {*
  In rare cases it is necessary to \emph{enrich} the context of a
  target language; this is accomplished using the @{command_def
  "code_include"} command:
*}

code_include %quotett Haskell "Errno"
{*errno i = error ("Error number: " ++ show i)*}

code_reserved %quotett Haskell Errno

text {*
  \noindent Such named @{text include}s are then prepended to every
  generated code.  Inspect such code in order to find out how
  @{command "code_include"} behaves with respect to a particular
  target language.
*}

end