doc-src/IsarAdvanced/Codegen/Thy/Adaption.thy
author haftmann
Fri Oct 10 15:23:33 2008 +0200 (2008-10-10)
changeset 28564 1358b1ddd915
parent 28561 056255ade52a
child 28593 f087237af65d
permissions -rw-r--r--
tuned
     1 theory Adaption
     2 imports Setup
     3 begin
     4 
     5 setup %invisible {* Code_Target.extend_target ("SML ", ("SML", I)) *}
     6 
     7 section {* Adaption to target languages \label{sec:adaption} *}
     8 
     9 subsection {* Adapting code generation *}
    10 
    11 text {*
    12   The aspects of code generation introduced so far have two aspects
    13   in common:
    14 
    15   \begin{itemize}
    16     \item They act uniformly, without reference to a specific
    17        target language.
    18     \item They are \emph{safe} in the sense that as long as you trust
    19        the code generator meta theory and implementation, you cannot
    20        produce programs that yield results which are not derivable
    21        in the logic.
    22   \end{itemize}
    23 
    24   \noindent In this section we will introduce means to \emph{adapt} the serialiser
    25   to a specific target language, i.e.~to print program fragments
    26   in a way which accomodates \qt{already existing} ingredients of
    27   a target language environment, for three reasons:
    28 
    29   \begin{itemize}
    30     \item improving readability and aethetics of generated code
    31     \item gaining efficiency
    32     \item interface with language parts which have no direct counterpart
    33       in @{text "HOL"} (say, imperative data structures)
    34   \end{itemize}
    35 
    36   \noindent Generally, you should avoid using those features yourself
    37   \emph{at any cost}:
    38 
    39   \begin{itemize}
    40     \item The safe configuration methods act uniformly on every target language,
    41       whereas for adaption you have to treat each target language separate.
    42     \item Application is extremely tedious since there is no abstraction
    43       which would allow for a static check, makeing it easy to produce garbage.
    44     \item More or less subtle erros can be introduced unconsciously.
    45   \end{itemize}
    46 
    47   \noindent However, even if you ought refrain from setting up adaption
    48   yourself, already the @{text "HOL"} comes with some reasonable default
    49   adaptions (say, using target language list syntax).  There also some
    50   common adaption cases which you can setup by importing particular
    51   library theories.  In order to understand these, we provide some clues here;
    52   these however are not supposed to replace a careful study of the sources.
    53 *}
    54 
    55 subsection {* The adaption principle *}
    56 
    57 text {*
    58   \begin{tikzpicture}
    59     \draw (0, 0) circle (1cm);
    60     \draw (0.5, 0) circle (0.5cm);
    61     \draw (0, 0.5) circle (0.5cm);
    62     \draw (-0.5, 0) circle (0.5cm);
    63     \draw (0, -0.5) circle (0.5cm);
    64   \end{tikzpicture}
    65 *}
    66 
    67 subsection {* Common adaption cases *}
    68 
    69 text {*
    70   The @{theory HOL} @{theory Main} theory already provides a code
    71   generator setup
    72   which should be suitable for most applications. Common extensions
    73   and modifications are available by certain theories of the @{text HOL}
    74   library; beside being useful in applications, they may serve
    75   as a tutorial for customising the code generator setup (see below
    76   \secref{sec:adaption_mechanisms}).
    77 
    78   \begin{description}
    79 
    80     \item[@{theory "Code_Integer"}] represents @{text HOL} integers by big
    81        integer literals in target languages.
    82     \item[@{theory "Code_Char"}] represents @{text HOL} characters by 
    83        character literals in target languages.
    84     \item[@{theory "Code_Char_chr"}] like @{text "Code_Char"},
    85        but also offers treatment of character codes; includes
    86        @{theory "Code_Char"}.
    87     \item[@{theory "Efficient_Nat"}] \label{eff_nat} implements natural numbers by integers,
    88        which in general will result in higher efficiency; pattern
    89        matching with @{term "0\<Colon>nat"} / @{const "Suc"}
    90        is eliminated;  includes @{theory "Code_Integer"}
    91        and @{theory "Code_Index"}.
    92     \item[@{theory "Code_Index"}] provides an additional datatype
    93        @{typ index} which is mapped to target-language built-in integers.
    94        Useful for code setups which involve e.g. indexing of
    95        target-language arrays.
    96     \item[@{theory "Code_Message"}] provides an additional datatype
    97        @{typ message_string} which is isomorphic to strings;
    98        @{typ message_string}s are mapped to target-language strings.
    99        Useful for code setups which involve e.g. printing (error) messages.
   100 
   101   \end{description}
   102 
   103   \begin{warn}
   104     When importing any of these theories, they should form the last
   105     items in an import list.  Since these theories adapt the
   106     code generator setup in a non-conservative fashion,
   107     strange effects may occur otherwise.
   108   \end{warn}
   109 *}
   110 
   111 
   112 subsection {* Adaption mechanisms \label{sec:adaption_mechanisms} *}
   113 
   114 text {*
   115   Consider the following function and its corresponding
   116   SML code:
   117 *}
   118 
   119 primrec %quote in_interval :: "nat \<times> nat \<Rightarrow> nat \<Rightarrow> bool" where
   120   "in_interval (k, l) n \<longleftrightarrow> k \<le> n \<and> n \<le> l"
   121 (*<*)
   122 code_type %invisible bool
   123   (SML)
   124 code_const %invisible True and False and "op \<and>" and Not
   125   (SML and and and)
   126 (*>*)
   127 text %quote {*@{code_stmts in_interval (SML)}*}
   128 
   129 text {*
   130   \noindent Though this is correct code, it is a little bit unsatisfactory:
   131   boolean values and operators are materialised as distinguished
   132   entities with have nothing to do with the SML-built-in notion
   133   of \qt{bool}.  This results in less readable code;
   134   additionally, eager evaluation may cause programs to
   135   loop or break which would perfectly terminate when
   136   the existing SML @{verbatim "bool"} would be used.  To map
   137   the HOL @{typ bool} on SML @{verbatim "bool"}, we may use
   138   \qn{custom serialisations}:
   139 *}
   140 
   141 code_type %quotett bool
   142   (SML "bool")
   143 code_const %quotett True and False and "op \<and>"
   144   (SML "true" and "false" and "_ andalso _")
   145 
   146 text {*
   147   \noindent The @{command code_type} command takes a type constructor
   148   as arguments together with a list of custom serialisations.
   149   Each custom serialisation starts with a target language
   150   identifier followed by an expression, which during
   151   code serialisation is inserted whenever the type constructor
   152   would occur.  For constants, @{command code_const} implements
   153   the corresponding mechanism.  Each ``@{verbatim "_"}'' in
   154   a serialisation expression is treated as a placeholder
   155   for the type constructor's (the constant's) arguments.
   156 *}
   157 
   158 text %quote {*@{code_stmts in_interval (SML)}*}
   159 
   160 text {*
   161   \noindent This still is not perfect: the parentheses
   162   around the \qt{andalso} expression are superfluous.
   163   Though the serializer
   164   by no means attempts to imitate the rich Isabelle syntax
   165   framework, it provides some common idioms, notably
   166   associative infixes with precedences which may be used here:
   167 *}
   168 
   169 code_const %quotett "op \<and>"
   170   (SML infixl 1 "andalso")
   171 
   172 text %quote {*@{code_stmts in_interval (SML)}*}
   173 
   174 text {*
   175   \noindent The attentive reader may ask how we assert that no generated
   176   code will accidentally overwrite.  For this reason the serialiser has
   177   an internal table of identifiers which have to be avoided to be used
   178   for new declarations.  Initially, this table typically contains the
   179   keywords of the target language.  It can be extended manually, thus avoiding
   180   accidental overwrites, using the @{command "code_reserved"} command:
   181 *}
   182 
   183 code_reserved %quote "SML " bool true false andalso
   184 
   185 text {*
   186   \noindent Next, we try to map HOL pairs to SML pairs, using the
   187   infix ``@{verbatim "*"}'' type constructor and parentheses:
   188 *}
   189 (*<*)
   190 code_type %invisible *
   191   (SML)
   192 code_const %invisible Pair
   193   (SML)
   194 (*>*)
   195 code_type %quotett *
   196   (SML infix 2 "*")
   197 code_const %quotett Pair
   198   (SML "!((_),/ (_))")
   199 
   200 text {*
   201   \noindent The initial bang ``@{verbatim "!"}'' tells the serializer
   202   never to put
   203   parentheses around the whole expression (they are already present),
   204   while the parentheses around argument place holders
   205   tell not to put parentheses around the arguments.
   206   The slash ``@{verbatim "/"}'' (followed by arbitrary white space)
   207   inserts a space which may be used as a break if necessary
   208   during pretty printing.
   209 
   210   These examples give a glimpse what mechanisms
   211   custom serialisations provide; however their usage
   212   requires careful thinking in order not to introduce
   213   inconsistencies -- or, in other words:
   214   custom serialisations are completely axiomatic.
   215 
   216   A further noteworthy details is that any special
   217   character in a custom serialisation may be quoted
   218   using ``@{verbatim "'"}''; thus, in
   219   ``@{verbatim "fn '_ => _"}'' the first
   220   ``@{verbatim "_"}'' is a proper underscore while the
   221   second ``@{verbatim "_"}'' is a placeholder.
   222 *}
   223 
   224 
   225 subsection {* @{text Haskell} serialisation *}
   226 
   227 text {*
   228   For convenience, the default
   229   @{text HOL} setup for @{text Haskell} maps the @{class eq} class to
   230   its counterpart in @{text Haskell}, giving custom serialisations
   231   for the class @{class eq} (by command @{command code_class}) and its operation
   232   @{const HOL.eq}
   233 *}
   234 
   235 code_class %quotett eq
   236   (Haskell "Eq" where "HOL.eq" \<equiv> "(==)")
   237 
   238 code_const %quotett "op ="
   239   (Haskell infixl 4 "==")
   240 
   241 text {*
   242   \noindent A problem now occurs whenever a type which
   243   is an instance of @{class eq} in @{text HOL} is mapped
   244   on a @{text Haskell}-built-in type which is also an instance
   245   of @{text Haskell} @{text Eq}:
   246 *}
   247 
   248 typedecl %quote bar
   249 
   250 instantiation %quote bar :: eq
   251 begin
   252 
   253 definition %quote "eq_class.eq (x\<Colon>bar) y \<longleftrightarrow> x = y"
   254 
   255 instance %quote by default (simp add: eq_bar_def)
   256 
   257 end %quote
   258 
   259 code_type %quotett bar
   260   (Haskell "Integer")
   261 
   262 text {*
   263   \noindent The code generator would produce
   264   an additional instance, which of course is rejectedby the @{text Haskell}
   265   compiler.
   266   To suppress this additional instance, use
   267   @{text "code_instance"}:
   268 *}
   269 
   270 code_instance %quotett bar :: eq
   271   (Haskell -)
   272 
   273 
   274 subsection {* Enhancing the target language context *}
   275 
   276 text {*
   277   In rare cases it is neccessary to \emph{enrich} the context of a
   278   target language;  this is accomplished using the @{command "code_include"}
   279   command:
   280 *}
   281 
   282 code_include %quotett Haskell "Errno"
   283 {*errno i = error ("Error number: " ++ show i)*}
   284 
   285 code_reserved %quotett Haskell Errno
   286 
   287 text {*
   288   \noindent Such named @{text include}s are then prepended to every generated code.
   289   Inspect such code in order to find out how @{command "code_include"} behaves
   290   with respect to a particular target language.
   291 *}
   292 
   293 end