doc-src/IsarAdvanced/Codegen/Thy/Adaption.thy
author haftmann
Fri Oct 24 10:41:13 2008 +0200 (2008-10-24)
changeset 28679 d7384e8e99b3
parent 28635 cc53d2ab0170
child 28714 1992553cccfe
permissions -rw-r--r--
explicit namings for generated code
haftmann@28213
     1
theory Adaption
haftmann@28213
     2
imports Setup
haftmann@28213
     3
begin
haftmann@28213
     4
haftmann@28679
     5
setup %invisible {* Code_Target.extend_target ("\<SML>", ("SML", K I)) *}
haftmann@28561
     6
haftmann@28419
     7
section {* Adaption to target languages \label{sec:adaption} *}
haftmann@28419
     8
haftmann@28561
     9
subsection {* Adapting code generation *}
haftmann@28561
    10
haftmann@28561
    11
text {*
haftmann@28561
    12
  The aspects of code generation introduced so far have two aspects
haftmann@28561
    13
  in common:
haftmann@28561
    14
haftmann@28561
    15
  \begin{itemize}
haftmann@28561
    16
    \item They act uniformly, without reference to a specific
haftmann@28561
    17
       target language.
haftmann@28561
    18
    \item They are \emph{safe} in the sense that as long as you trust
haftmann@28561
    19
       the code generator meta theory and implementation, you cannot
haftmann@28561
    20
       produce programs that yield results which are not derivable
haftmann@28561
    21
       in the logic.
haftmann@28561
    22
  \end{itemize}
haftmann@28561
    23
haftmann@28561
    24
  \noindent In this section we will introduce means to \emph{adapt} the serialiser
haftmann@28561
    25
  to a specific target language, i.e.~to print program fragments
haftmann@28593
    26
  in a way which accommodates \qt{already existing} ingredients of
haftmann@28561
    27
  a target language environment, for three reasons:
haftmann@28561
    28
haftmann@28561
    29
  \begin{itemize}
haftmann@28593
    30
    \item improving readability and aesthetics of generated code
haftmann@28561
    31
    \item gaining efficiency
haftmann@28561
    32
    \item interface with language parts which have no direct counterpart
haftmann@28561
    33
      in @{text "HOL"} (say, imperative data structures)
haftmann@28561
    34
  \end{itemize}
haftmann@28561
    35
haftmann@28561
    36
  \noindent Generally, you should avoid using those features yourself
haftmann@28561
    37
  \emph{at any cost}:
haftmann@28561
    38
haftmann@28561
    39
  \begin{itemize}
haftmann@28561
    40
    \item The safe configuration methods act uniformly on every target language,
haftmann@28561
    41
      whereas for adaption you have to treat each target language separate.
haftmann@28561
    42
    \item Application is extremely tedious since there is no abstraction
haftmann@28593
    43
      which would allow for a static check, making it easy to produce garbage.
haftmann@28593
    44
    \item More or less subtle errors can be introduced unconsciously.
haftmann@28561
    45
  \end{itemize}
haftmann@28561
    46
haftmann@28561
    47
  \noindent However, even if you ought refrain from setting up adaption
haftmann@28561
    48
  yourself, already the @{text "HOL"} comes with some reasonable default
haftmann@28561
    49
  adaptions (say, using target language list syntax).  There also some
haftmann@28561
    50
  common adaption cases which you can setup by importing particular
haftmann@28561
    51
  library theories.  In order to understand these, we provide some clues here;
haftmann@28561
    52
  these however are not supposed to replace a careful study of the sources.
haftmann@28561
    53
*}
haftmann@28561
    54
haftmann@28561
    55
subsection {* The adaption principle *}
haftmann@28561
    56
haftmann@28561
    57
text {*
haftmann@28601
    58
  The following figure illustrates what \qt{adaption} is conceptually
haftmann@28601
    59
  supposed to be:
haftmann@28601
    60
haftmann@28601
    61
  \begin{figure}[here]
haftmann@28601
    62
    \begin{tikzpicture}[scale = 0.5]
haftmann@28601
    63
      \tikzstyle water=[color = blue, thick]
haftmann@28601
    64
      \tikzstyle ice=[color = black, very thick, cap = round, join = round, fill = white]
haftmann@28601
    65
      \tikzstyle process=[color = green, semithick, ->]
haftmann@28601
    66
      \tikzstyle adaption=[color = red, semithick, ->]
haftmann@28601
    67
      \tikzstyle target=[color = black]
haftmann@28601
    68
      \foreach \x in {0, ..., 24}
haftmann@28601
    69
        \draw[style=water] (\x, 0.25) sin + (0.25, 0.25) cos + (0.25, -0.25) sin
haftmann@28601
    70
          + (0.25, -0.25) cos + (0.25, 0.25);
haftmann@28601
    71
      \draw[style=ice] (1, 0) --
haftmann@28601
    72
        (3, 6) node[above, fill=white] {logic} -- (5, 0) -- cycle;
haftmann@28601
    73
      \draw[style=ice] (9, 0) --
haftmann@28601
    74
        (11, 6) node[above, fill=white] {intermediate language} -- (13, 0) -- cycle;
haftmann@28601
    75
      \draw[style=ice] (15, -6) --
haftmann@28601
    76
        (19, 6) node[above, fill=white] {target language} -- (23, -6) -- cycle;
haftmann@28601
    77
      \draw[style=process]
haftmann@28601
    78
        (3.5, 3) .. controls (7, 5) .. node[fill=white] {translation} (10.5, 3);
haftmann@28601
    79
      \draw[style=process]
haftmann@28601
    80
        (11.5, 3) .. controls (15, 5) .. node[fill=white] (serialisation) {serialisation} (18.5, 3);
haftmann@28601
    81
      \node (adaption) at (11, -2) [style=adaption] {adaption};
haftmann@28601
    82
      \node at (19, 3) [rotate=90] {generated};
haftmann@28601
    83
      \node at (19.5, -5) {language};
haftmann@28601
    84
      \node at (19.5, -3) {library};
haftmann@28601
    85
      \node (includes) at (19.5, -1) {includes};
haftmann@28609
    86
      \node (reserved) at (16.5, -3) [rotate=72] {reserved}; % proper 71.57
haftmann@28601
    87
      \draw[style=process]
haftmann@28601
    88
        (includes) -- (serialisation);
haftmann@28601
    89
      \draw[style=process]
haftmann@28601
    90
        (reserved) -- (serialisation);
haftmann@28601
    91
      \draw[style=adaption]
haftmann@28601
    92
        (adaption) -- (serialisation);
haftmann@28601
    93
      \draw[style=adaption]
haftmann@28601
    94
        (adaption) -- (includes);
haftmann@28601
    95
      \draw[style=adaption]
haftmann@28601
    96
        (adaption) -- (reserved);
haftmann@28601
    97
    \end{tikzpicture}
haftmann@28601
    98
    \caption{The adaption principle}
haftmann@28601
    99
    \label{fig:adaption}
haftmann@28601
   100
  \end{figure}
haftmann@28601
   101
haftmann@28601
   102
  \noindent In the tame view, code generation acts as broker between
haftmann@28601
   103
  @{text logic}, @{text "intermediate language"} and
haftmann@28601
   104
  @{text "target language"} by means of @{text translation} and
haftmann@28601
   105
  @{text serialisation};  for the latter, the serialiser has to observe
haftmann@28601
   106
  the structure of the @{text language} itself plus some @{text reserved}
haftmann@28601
   107
  keywords which have to be avoided for generated code.
haftmann@28601
   108
  However, if you consider @{text adaption} mechanisms, the code generated
haftmann@28601
   109
  by the serializer is just the tip of the iceberg:
haftmann@28601
   110
haftmann@28601
   111
  \begin{itemize}
haftmann@28635
   112
    \item @{text serialisation} can be \emph{parametrised} such that
haftmann@28635
   113
      logical entities are mapped to target-specific ones
haftmann@28635
   114
      (e.g. target-specific list syntax,
haftmann@28635
   115
        see also \secref{sec:adaption_mechanisms})
haftmann@28635
   116
    \item Such parametrisations can involve references to a
haftmann@28635
   117
      target-specific standard @{text library} (e.g. using
haftmann@28635
   118
      the @{text Haskell} @{verbatim Maybe} type instead
haftmann@28635
   119
      of the @{text HOL} @{type "option"} type);
haftmann@28635
   120
      if such are used, the corresponding identifiers
haftmann@28635
   121
      (in our example, @{verbatim Maybe}, @{verbatim Nothing}
haftmann@28635
   122
      and @{verbatim Just}) also have to be considered @{text reserved}.
haftmann@28635
   123
    \item Even more, the user can enrich the library of the
haftmann@28635
   124
      target-language by providing code snippets
haftmann@28635
   125
      (\qt{@{text "includes"}}) which are prepended to
haftmann@28635
   126
      any generated code (see \secref{sec:include});  this typically
haftmann@28635
   127
      also involves further @{text reserved} identifiers.
haftmann@28601
   128
  \end{itemize}
haftmann@28635
   129
haftmann@28635
   130
  \noindent As figure \ref{fig:adaption} illustrates, all these adaption mechanisms
haftmann@28635
   131
  have to act consistently;  it is at the discretion of the user
haftmann@28635
   132
  to take care for this.
haftmann@28561
   133
*}
haftmann@28561
   134
haftmann@28635
   135
subsection {* Common adaption patterns *}
haftmann@28419
   136
haftmann@28419
   137
text {*
haftmann@28428
   138
  The @{theory HOL} @{theory Main} theory already provides a code
haftmann@28419
   139
  generator setup
haftmann@28593
   140
  which should be suitable for most applications.  Common extensions
haftmann@28419
   141
  and modifications are available by certain theories of the @{text HOL}
haftmann@28419
   142
  library; beside being useful in applications, they may serve
haftmann@28419
   143
  as a tutorial for customising the code generator setup (see below
haftmann@28419
   144
  \secref{sec:adaption_mechanisms}).
haftmann@28419
   145
haftmann@28419
   146
  \begin{description}
haftmann@28419
   147
haftmann@28419
   148
    \item[@{theory "Code_Integer"}] represents @{text HOL} integers by big
haftmann@28419
   149
       integer literals in target languages.
haftmann@28419
   150
    \item[@{theory "Code_Char"}] represents @{text HOL} characters by 
haftmann@28419
   151
       character literals in target languages.
haftmann@28419
   152
    \item[@{theory "Code_Char_chr"}] like @{text "Code_Char"},
haftmann@28419
   153
       but also offers treatment of character codes; includes
haftmann@28561
   154
       @{theory "Code_Char"}.
haftmann@28419
   155
    \item[@{theory "Efficient_Nat"}] \label{eff_nat} implements natural numbers by integers,
haftmann@28419
   156
       which in general will result in higher efficiency; pattern
haftmann@28419
   157
       matching with @{term "0\<Colon>nat"} / @{const "Suc"}
haftmann@28561
   158
       is eliminated;  includes @{theory "Code_Integer"}
haftmann@28561
   159
       and @{theory "Code_Index"}.
haftmann@28419
   160
    \item[@{theory "Code_Index"}] provides an additional datatype
haftmann@28419
   161
       @{typ index} which is mapped to target-language built-in integers.
haftmann@28419
   162
       Useful for code setups which involve e.g. indexing of
haftmann@28419
   163
       target-language arrays.
haftmann@28419
   164
    \item[@{theory "Code_Message"}] provides an additional datatype
haftmann@28419
   165
       @{typ message_string} which is isomorphic to strings;
haftmann@28419
   166
       @{typ message_string}s are mapped to target-language strings.
haftmann@28419
   167
       Useful for code setups which involve e.g. printing (error) messages.
haftmann@28419
   168
haftmann@28419
   169
  \end{description}
haftmann@28419
   170
haftmann@28419
   171
  \begin{warn}
haftmann@28419
   172
    When importing any of these theories, they should form the last
haftmann@28419
   173
    items in an import list.  Since these theories adapt the
haftmann@28419
   174
    code generator setup in a non-conservative fashion,
haftmann@28419
   175
    strange effects may occur otherwise.
haftmann@28419
   176
  \end{warn}
haftmann@28419
   177
*}
haftmann@28419
   178
haftmann@28419
   179
haftmann@28635
   180
subsection {* Parametrising serialisation \label{sec:adaption_mechanisms} *}
haftmann@28419
   181
haftmann@28419
   182
text {*
haftmann@28561
   183
  Consider the following function and its corresponding
haftmann@28419
   184
  SML code:
haftmann@28419
   185
*}
haftmann@28419
   186
haftmann@28564
   187
primrec %quote in_interval :: "nat \<times> nat \<Rightarrow> nat \<Rightarrow> bool" where
haftmann@28419
   188
  "in_interval (k, l) n \<longleftrightarrow> k \<le> n \<and> n \<le> l"
haftmann@28447
   189
(*<*)
haftmann@28419
   190
code_type %invisible bool
haftmann@28419
   191
  (SML)
haftmann@28419
   192
code_const %invisible True and False and "op \<and>" and Not
haftmann@28419
   193
  (SML and and and)
haftmann@28447
   194
(*>*)
haftmann@28564
   195
text %quote {*@{code_stmts in_interval (SML)}*}
haftmann@28419
   196
haftmann@28419
   197
text {*
haftmann@28419
   198
  \noindent Though this is correct code, it is a little bit unsatisfactory:
haftmann@28419
   199
  boolean values and operators are materialised as distinguished
haftmann@28419
   200
  entities with have nothing to do with the SML-built-in notion
haftmann@28419
   201
  of \qt{bool}.  This results in less readable code;
haftmann@28419
   202
  additionally, eager evaluation may cause programs to
haftmann@28419
   203
  loop or break which would perfectly terminate when
haftmann@28419
   204
  the existing SML @{verbatim "bool"} would be used.  To map
haftmann@28419
   205
  the HOL @{typ bool} on SML @{verbatim "bool"}, we may use
haftmann@28419
   206
  \qn{custom serialisations}:
haftmann@28419
   207
*}
haftmann@28419
   208
haftmann@28564
   209
code_type %quotett bool
haftmann@28419
   210
  (SML "bool")
haftmann@28564
   211
code_const %quotett True and False and "op \<and>"
haftmann@28419
   212
  (SML "true" and "false" and "_ andalso _")
haftmann@28213
   213
haftmann@28419
   214
text {*
haftmann@28447
   215
  \noindent The @{command code_type} command takes a type constructor
haftmann@28419
   216
  as arguments together with a list of custom serialisations.
haftmann@28419
   217
  Each custom serialisation starts with a target language
haftmann@28419
   218
  identifier followed by an expression, which during
haftmann@28419
   219
  code serialisation is inserted whenever the type constructor
haftmann@28419
   220
  would occur.  For constants, @{command code_const} implements
haftmann@28419
   221
  the corresponding mechanism.  Each ``@{verbatim "_"}'' in
haftmann@28419
   222
  a serialisation expression is treated as a placeholder
haftmann@28419
   223
  for the type constructor's (the constant's) arguments.
haftmann@28419
   224
*}
haftmann@28419
   225
haftmann@28564
   226
text %quote {*@{code_stmts in_interval (SML)}*}
haftmann@28419
   227
haftmann@28419
   228
text {*
haftmann@28419
   229
  \noindent This still is not perfect: the parentheses
haftmann@28419
   230
  around the \qt{andalso} expression are superfluous.
haftmann@28593
   231
  Though the serialiser
haftmann@28419
   232
  by no means attempts to imitate the rich Isabelle syntax
haftmann@28419
   233
  framework, it provides some common idioms, notably
haftmann@28419
   234
  associative infixes with precedences which may be used here:
haftmann@28419
   235
*}
haftmann@28419
   236
haftmann@28564
   237
code_const %quotett "op \<and>"
haftmann@28419
   238
  (SML infixl 1 "andalso")
haftmann@28419
   239
haftmann@28564
   240
text %quote {*@{code_stmts in_interval (SML)}*}
haftmann@28419
   241
haftmann@28419
   242
text {*
haftmann@28561
   243
  \noindent The attentive reader may ask how we assert that no generated
haftmann@28561
   244
  code will accidentally overwrite.  For this reason the serialiser has
haftmann@28561
   245
  an internal table of identifiers which have to be avoided to be used
haftmann@28561
   246
  for new declarations.  Initially, this table typically contains the
haftmann@28561
   247
  keywords of the target language.  It can be extended manually, thus avoiding
haftmann@28561
   248
  accidental overwrites, using the @{command "code_reserved"} command:
haftmann@28561
   249
*}
haftmann@28561
   250
haftmann@28601
   251
code_reserved %quote "\<SML>" bool true false andalso
haftmann@28561
   252
haftmann@28561
   253
text {*
haftmann@28447
   254
  \noindent Next, we try to map HOL pairs to SML pairs, using the
haftmann@28419
   255
  infix ``@{verbatim "*"}'' type constructor and parentheses:
haftmann@28419
   256
*}
haftmann@28447
   257
(*<*)
haftmann@28419
   258
code_type %invisible *
haftmann@28419
   259
  (SML)
haftmann@28419
   260
code_const %invisible Pair
haftmann@28419
   261
  (SML)
haftmann@28447
   262
(*>*)
haftmann@28564
   263
code_type %quotett *
haftmann@28419
   264
  (SML infix 2 "*")
haftmann@28564
   265
code_const %quotett Pair
haftmann@28419
   266
  (SML "!((_),/ (_))")
haftmann@28419
   267
haftmann@28419
   268
text {*
haftmann@28593
   269
  \noindent The initial bang ``@{verbatim "!"}'' tells the serialiser
haftmann@28561
   270
  never to put
haftmann@28419
   271
  parentheses around the whole expression (they are already present),
haftmann@28419
   272
  while the parentheses around argument place holders
haftmann@28419
   273
  tell not to put parentheses around the arguments.
haftmann@28419
   274
  The slash ``@{verbatim "/"}'' (followed by arbitrary white space)
haftmann@28419
   275
  inserts a space which may be used as a break if necessary
haftmann@28419
   276
  during pretty printing.
haftmann@28419
   277
haftmann@28419
   278
  These examples give a glimpse what mechanisms
haftmann@28419
   279
  custom serialisations provide; however their usage
haftmann@28419
   280
  requires careful thinking in order not to introduce
haftmann@28419
   281
  inconsistencies -- or, in other words:
haftmann@28419
   282
  custom serialisations are completely axiomatic.
haftmann@28419
   283
haftmann@28419
   284
  A further noteworthy details is that any special
haftmann@28419
   285
  character in a custom serialisation may be quoted
haftmann@28419
   286
  using ``@{verbatim "'"}''; thus, in
haftmann@28419
   287
  ``@{verbatim "fn '_ => _"}'' the first
haftmann@28419
   288
  ``@{verbatim "_"}'' is a proper underscore while the
haftmann@28419
   289
  second ``@{verbatim "_"}'' is a placeholder.
haftmann@28419
   290
*}
haftmann@28419
   291
haftmann@28419
   292
haftmann@28419
   293
subsection {* @{text Haskell} serialisation *}
haftmann@28419
   294
haftmann@28419
   295
text {*
haftmann@28419
   296
  For convenience, the default
haftmann@28419
   297
  @{text HOL} setup for @{text Haskell} maps the @{class eq} class to
haftmann@28419
   298
  its counterpart in @{text Haskell}, giving custom serialisations
haftmann@28419
   299
  for the class @{class eq} (by command @{command code_class}) and its operation
haftmann@28419
   300
  @{const HOL.eq}
haftmann@28419
   301
*}
haftmann@28419
   302
haftmann@28564
   303
code_class %quotett eq
haftmann@28419
   304
  (Haskell "Eq" where "HOL.eq" \<equiv> "(==)")
haftmann@28419
   305
haftmann@28564
   306
code_const %quotett "op ="
haftmann@28419
   307
  (Haskell infixl 4 "==")
haftmann@28419
   308
haftmann@28419
   309
text {*
haftmann@28447
   310
  \noindent A problem now occurs whenever a type which
haftmann@28419
   311
  is an instance of @{class eq} in @{text HOL} is mapped
haftmann@28419
   312
  on a @{text Haskell}-built-in type which is also an instance
haftmann@28419
   313
  of @{text Haskell} @{text Eq}:
haftmann@28419
   314
*}
haftmann@28419
   315
haftmann@28564
   316
typedecl %quote bar
haftmann@28419
   317
haftmann@28564
   318
instantiation %quote bar :: eq
haftmann@28419
   319
begin
haftmann@28419
   320
haftmann@28564
   321
definition %quote "eq_class.eq (x\<Colon>bar) y \<longleftrightarrow> x = y"
haftmann@28419
   322
haftmann@28564
   323
instance %quote by default (simp add: eq_bar_def)
haftmann@28213
   324
haftmann@28564
   325
end %quote
haftmann@28419
   326
haftmann@28564
   327
code_type %quotett bar
haftmann@28419
   328
  (Haskell "Integer")
haftmann@28419
   329
haftmann@28419
   330
text {*
haftmann@28447
   331
  \noindent The code generator would produce
haftmann@28593
   332
  an additional instance, which of course is rejected by the @{text Haskell}
haftmann@28419
   333
  compiler.
haftmann@28419
   334
  To suppress this additional instance, use
haftmann@28419
   335
  @{text "code_instance"}:
haftmann@28419
   336
*}
haftmann@28419
   337
haftmann@28564
   338
code_instance %quotett bar :: eq
haftmann@28419
   339
  (Haskell -)
haftmann@28419
   340
haftmann@28561
   341
haftmann@28635
   342
subsection {* Enhancing the target language context \label{sec:include} *}
haftmann@28561
   343
haftmann@28561
   344
text {*
haftmann@28593
   345
  In rare cases it is necessary to \emph{enrich} the context of a
haftmann@28561
   346
  target language;  this is accomplished using the @{command "code_include"}
haftmann@28561
   347
  command:
haftmann@28561
   348
*}
haftmann@28561
   349
haftmann@28564
   350
code_include %quotett Haskell "Errno"
haftmann@28561
   351
{*errno i = error ("Error number: " ++ show i)*}
haftmann@28561
   352
haftmann@28564
   353
code_reserved %quotett Haskell Errno
haftmann@28561
   354
haftmann@28561
   355
text {*
haftmann@28561
   356
  \noindent Such named @{text include}s are then prepended to every generated code.
haftmann@28561
   357
  Inspect such code in order to find out how @{command "code_include"} behaves
haftmann@28561
   358
  with respect to a particular target language.
haftmann@28561
   359
*}
haftmann@28561
   360
haftmann@28419
   361
end