src/Doc/Codegen/Adaptation.thy
author haftmann
Mon Feb 06 20:56:34 2017 +0100 (2017-02-06)
changeset 64990 c6a7de505796
parent 63680 6e1e8b5abbfa
child 65041 2525e680f94f
permissions -rw-r--r--
more explicit errors in pathological cases
haftmann@31050
     1
theory Adaptation
haftmann@28213
     2
imports Setup
haftmann@28213
     3
begin
haftmann@28213
     4
haftmann@59377
     5
setup %invisible \<open>Code_Target.add_derived_target ("\<SML>", [("SML", I)])
haftmann@59377
     6
  #> Code_Target.add_derived_target ("\<SMLdummy>", [("Haskell", I)])\<close>
haftmann@28561
     7
haftmann@59377
     8
section \<open>Adaptation to target languages \label{sec:adaptation}\<close>
haftmann@28419
     9
haftmann@59377
    10
subsection \<open>Adapting code generation\<close>
haftmann@28561
    11
haftmann@59377
    12
text \<open>
haftmann@28561
    13
  The aspects of code generation introduced so far have two aspects
haftmann@28561
    14
  in common:
haftmann@28561
    15
haftmann@28561
    16
  \begin{itemize}
haftmann@38450
    17
haftmann@38450
    18
    \item They act uniformly, without reference to a specific target
haftmann@38450
    19
       language.
haftmann@38450
    20
haftmann@28561
    21
    \item They are \emph{safe} in the sense that as long as you trust
haftmann@28561
    22
       the code generator meta theory and implementation, you cannot
haftmann@38450
    23
       produce programs that yield results which are not derivable in
haftmann@38450
    24
       the logic.
haftmann@38450
    25
haftmann@28561
    26
  \end{itemize}
haftmann@28561
    27
haftmann@38450
    28
  \noindent In this section we will introduce means to \emph{adapt}
haftmann@38450
    29
  the serialiser to a specific target language, i.e.~to print program
haftmann@38450
    30
  fragments in a way which accommodates \qt{already existing}
haftmann@38450
    31
  ingredients of a target language environment, for three reasons:
haftmann@28561
    32
haftmann@28561
    33
  \begin{itemize}
haftmann@28593
    34
    \item improving readability and aesthetics of generated code
haftmann@28561
    35
    \item gaining efficiency
haftmann@28561
    36
    \item interface with language parts which have no direct counterpart
haftmann@28561
    37
      in @{text "HOL"} (say, imperative data structures)
haftmann@28561
    38
  \end{itemize}
haftmann@28561
    39
haftmann@28561
    40
  \noindent Generally, you should avoid using those features yourself
haftmann@28561
    41
  \emph{at any cost}:
haftmann@28561
    42
haftmann@28561
    43
  \begin{itemize}
haftmann@38450
    44
haftmann@38450
    45
    \item The safe configuration methods act uniformly on every target
haftmann@38450
    46
      language, whereas for adaptation you have to treat each target
haftmann@38450
    47
      language separately.
haftmann@38450
    48
haftmann@38450
    49
    \item Application is extremely tedious since there is no
haftmann@38450
    50
      abstraction which would allow for a static check, making it easy
haftmann@38450
    51
      to produce garbage.
haftmann@38450
    52
paulson@34155
    53
    \item Subtle errors can be introduced unconsciously.
haftmann@38450
    54
haftmann@28561
    55
  \end{itemize}
haftmann@28561
    56
haftmann@38450
    57
  \noindent However, even if you ought refrain from setting up
haftmann@51162
    58
  adaptation yourself, already @{text "HOL"} comes with some
haftmann@38450
    59
  reasonable default adaptations (say, using target language list
haftmann@38450
    60
  syntax).  There also some common adaptation cases which you can
haftmann@38450
    61
  setup by importing particular library theories.  In order to
haftmann@38450
    62
  understand these, we provide some clues here; these however are not
haftmann@38450
    63
  supposed to replace a careful study of the sources.
haftmann@59377
    64
\<close>
haftmann@28561
    65
haftmann@38450
    66
haftmann@59377
    67
subsection \<open>The adaptation principle\<close>
haftmann@28561
    68
haftmann@59377
    69
text \<open>
haftmann@38450
    70
  Figure \ref{fig:adaptation} illustrates what \qt{adaptation} is
haftmann@38450
    71
  conceptually supposed to be:
haftmann@28601
    72
wenzelm@60768
    73
  \begin{figure}[h]
wenzelm@52742
    74
    \begin{tikzpicture}[scale = 0.5]
wenzelm@52742
    75
      \tikzstyle water=[color = blue, thick]
wenzelm@52742
    76
      \tikzstyle ice=[color = black, very thick, cap = round, join = round, fill = white]
wenzelm@52742
    77
      \tikzstyle process=[color = green, semithick, ->]
wenzelm@52742
    78
      \tikzstyle adaptation=[color = red, semithick, ->]
wenzelm@52742
    79
      \tikzstyle target=[color = black]
wenzelm@52742
    80
      \foreach \x in {0, ..., 24}
wenzelm@52742
    81
        \draw[style=water] (\x, 0.25) sin + (0.25, 0.25) cos + (0.25, -0.25) sin
wenzelm@52742
    82
          + (0.25, -0.25) cos + (0.25, 0.25);
wenzelm@52742
    83
      \draw[style=ice] (1, 0) --
wenzelm@52742
    84
        (3, 6) node[above, fill=white] {logic} -- (5, 0) -- cycle;
wenzelm@52742
    85
      \draw[style=ice] (9, 0) --
wenzelm@52742
    86
        (11, 6) node[above, fill=white] {intermediate language} -- (13, 0) -- cycle;
wenzelm@52742
    87
      \draw[style=ice] (15, -6) --
wenzelm@52742
    88
        (19, 6) node[above, fill=white] {target language} -- (23, -6) -- cycle;
wenzelm@52742
    89
      \draw[style=process]
wenzelm@52742
    90
        (3.5, 3) .. controls (7, 5) .. node[fill=white] {translation} (10.5, 3);
wenzelm@52742
    91
      \draw[style=process]
wenzelm@52742
    92
        (11.5, 3) .. controls (15, 5) .. node[fill=white] (serialisation) {serialisation} (18.5, 3);
wenzelm@52742
    93
      \node (adaptation) at (11, -2) [style=adaptation] {adaptation};
wenzelm@52742
    94
      \node at (19, 3) [rotate=90] {generated};
wenzelm@52742
    95
      \node at (19.5, -5) {language};
wenzelm@52742
    96
      \node at (19.5, -3) {library};
wenzelm@52742
    97
      \node (includes) at (19.5, -1) {includes};
wenzelm@52742
    98
      \node (reserved) at (16.5, -3) [rotate=72] {reserved}; % proper 71.57
wenzelm@52742
    99
      \draw[style=process]
wenzelm@52742
   100
        (includes) -- (serialisation);
wenzelm@52742
   101
      \draw[style=process]
wenzelm@52742
   102
        (reserved) -- (serialisation);
wenzelm@52742
   103
      \draw[style=adaptation]
wenzelm@52742
   104
        (adaptation) -- (serialisation);
wenzelm@52742
   105
      \draw[style=adaptation]
wenzelm@52742
   106
        (adaptation) -- (includes);
wenzelm@52742
   107
      \draw[style=adaptation]
wenzelm@52742
   108
        (adaptation) -- (reserved);
wenzelm@52742
   109
    \end{tikzpicture}
haftmann@31050
   110
    \caption{The adaptation principle}
haftmann@31050
   111
    \label{fig:adaptation}
haftmann@28601
   112
  \end{figure}
haftmann@28601
   113
haftmann@28601
   114
  \noindent In the tame view, code generation acts as broker between
haftmann@38450
   115
  @{text logic}, @{text "intermediate language"} and @{text "target
haftmann@38450
   116
  language"} by means of @{text translation} and @{text
haftmann@38450
   117
  serialisation}; for the latter, the serialiser has to observe the
haftmann@38450
   118
  structure of the @{text language} itself plus some @{text reserved}
haftmann@38450
   119
  keywords which have to be avoided for generated code.  However, if
haftmann@38450
   120
  you consider @{text adaptation} mechanisms, the code generated by
haftmann@38450
   121
  the serializer is just the tip of the iceberg:
haftmann@28601
   122
haftmann@28601
   123
  \begin{itemize}
haftmann@38450
   124
haftmann@28635
   125
    \item @{text serialisation} can be \emph{parametrised} such that
haftmann@28635
   126
      logical entities are mapped to target-specific ones
haftmann@38450
   127
      (e.g. target-specific list syntax, see also
haftmann@38450
   128
      \secref{sec:adaptation_mechanisms})
haftmann@38450
   129
haftmann@28635
   130
    \item Such parametrisations can involve references to a
haftmann@38450
   131
      target-specific standard @{text library} (e.g. using the @{text
haftmann@38450
   132
      Haskell} @{verbatim Maybe} type instead of the @{text HOL}
haftmann@38450
   133
      @{type "option"} type); if such are used, the corresponding
haftmann@38450
   134
      identifiers (in our example, @{verbatim Maybe}, @{verbatim
haftmann@38450
   135
      Nothing} and @{verbatim Just}) also have to be considered @{text
haftmann@38450
   136
      reserved}.
haftmann@38450
   137
haftmann@28635
   138
    \item Even more, the user can enrich the library of the
haftmann@38450
   139
      target-language by providing code snippets (\qt{@{text
haftmann@38450
   140
      "includes"}}) which are prepended to any generated code (see
haftmann@38450
   141
      \secref{sec:include}); this typically also involves further
haftmann@38450
   142
      @{text reserved} identifiers.
haftmann@38450
   143
haftmann@28601
   144
  \end{itemize}
haftmann@28635
   145
haftmann@38450
   146
  \noindent As figure \ref{fig:adaptation} illustrates, all these
haftmann@38450
   147
  adaptation mechanisms have to act consistently; it is at the
haftmann@38450
   148
  discretion of the user to take care for this.
haftmann@59377
   149
\<close>
haftmann@28561
   150
haftmann@59482
   151
subsection \<open>Common adaptation applications\<close>
haftmann@28419
   152
haftmann@59377
   153
text \<open>
haftmann@28428
   154
  The @{theory HOL} @{theory Main} theory already provides a code
haftmann@38450
   155
  generator setup which should be suitable for most applications.
haftmann@38450
   156
  Common extensions and modifications are available by certain
wenzelm@63680
   157
  theories in \<^dir>\<open>~~/src/HOL/Library\<close>; beside being useful in
haftmann@38450
   158
  applications, they may serve as a tutorial for customising the code
haftmann@38450
   159
  generator setup (see below \secref{sec:adaptation_mechanisms}).
haftmann@28419
   160
haftmann@28419
   161
  \begin{description}
haftmann@28419
   162
haftmann@51143
   163
    \item[@{theory "Code_Numeral"}] provides additional numeric
haftmann@51143
   164
       types @{typ integer} and @{typ natural} isomorphic to types
haftmann@51143
   165
       @{typ int} and @{typ nat} respectively.  Type @{typ integer}
haftmann@51143
   166
       is mapped to target-language built-in integers; @{typ natural}
haftmann@51143
   167
       is implemented as abstract type over @{typ integer}.
haftmann@51143
   168
       Useful for code setups which involve e.g.~indexing
haftmann@51143
   169
       of target-language arrays.  Part of @{text "HOL-Main"}.
haftmann@51143
   170
haftmann@51143
   171
    \item[@{text "Code_Target_Int"}] implements type @{typ int}
haftmann@51143
   172
       by @{typ integer} and thus by target-language built-in integers.
haftmann@38450
   173
haftmann@51171
   174
    \item[@{text "Code_Binary_Nat"}] implements type
haftmann@51143
   175
       @{typ nat} using a binary rather than a linear representation,
haftmann@51143
   176
       which yields a considerable speedup for computations.
wenzelm@61076
   177
       Pattern matching with @{term "0::nat"} / @{const "Suc"} is eliminated
haftmann@51171
   178
       by a preprocessor.\label{abstract_nat}
haftmann@51143
   179
haftmann@51171
   180
    \item[@{text "Code_Target_Nat"}] implements type @{typ nat}
haftmann@51171
   181
       by @{typ integer} and thus by target-language built-in integers.
wenzelm@61076
   182
       Pattern matching with @{term "0::nat"} / @{const "Suc"} is eliminated
haftmann@51171
   183
       by a preprocessor.
haftmann@51143
   184
haftmann@51162
   185
    \item[@{text "Code_Target_Numeral"}] is a convenience theory
haftmann@51143
   186
       containing both @{text "Code_Target_Nat"} and
haftmann@51143
   187
       @{text "Code_Target_Int"}.
haftmann@38450
   188
haftmann@38450
   189
    \item[@{theory "String"}] provides an additional datatype @{typ
haftmann@38450
   190
       String.literal} which is isomorphic to strings; @{typ
haftmann@38450
   191
       String.literal}s are mapped to target-language strings.  Useful
haftmann@38450
   192
       for code setups which involve e.g.~printing (error) messages.
haftmann@46519
   193
       Part of @{text "HOL-Main"}.
haftmann@28419
   194
haftmann@59482
   195
    \item[@{text "Code_Char"}] represents @{text HOL} characters by
haftmann@59482
   196
       character literals in target languages.  \emph{Warning:} This
haftmann@59482
   197
       modifies adaptation in a non-conservative manner and thus
haftmann@59482
   198
       should always be imported \emph{last} in a theory header.
haftmann@59482
   199
haftmann@51162
   200
    \item[@{theory "IArray"}] provides a type @{typ "'a iarray"}
haftmann@51162
   201
       isomorphic to lists but implemented by (effectively immutable)
haftmann@51162
   202
       arrays \emph{in SML only}.
haftmann@28419
   203
haftmann@51162
   204
  \end{description}
haftmann@59377
   205
\<close>
haftmann@28419
   206
haftmann@28419
   207
haftmann@59377
   208
subsection \<open>Parametrising serialisation \label{sec:adaptation_mechanisms}\<close>
haftmann@28419
   209
haftmann@59377
   210
text \<open>
haftmann@38450
   211
  Consider the following function and its corresponding SML code:
haftmann@59377
   212
\<close>
haftmann@28419
   213
haftmann@28564
   214
primrec %quote in_interval :: "nat \<times> nat \<Rightarrow> nat \<Rightarrow> bool" where
haftmann@28419
   215
  "in_interval (k, l) n \<longleftrightarrow> k \<le> n \<and> n \<le> l"
haftmann@28447
   216
(*<*)
haftmann@52378
   217
code_printing %invisible
haftmann@52378
   218
  type_constructor bool \<rightharpoonup> (SML)
haftmann@52378
   219
| constant True \<rightharpoonup> (SML)
haftmann@52378
   220
| constant False \<rightharpoonup> (SML)
haftmann@52378
   221
| constant HOL.conj \<rightharpoonup> (SML)
haftmann@52378
   222
| constant Not \<rightharpoonup> (SML)
haftmann@28447
   223
(*>*)
haftmann@59377
   224
text %quotetypewriter \<open>
haftmann@39683
   225
  @{code_stmts in_interval (SML)}
haftmann@59377
   226
\<close>
haftmann@28419
   227
haftmann@59377
   228
text \<open>
haftmann@38450
   229
  \noindent Though this is correct code, it is a little bit
haftmann@38450
   230
  unsatisfactory: boolean values and operators are materialised as
haftmann@38450
   231
  distinguished entities with have nothing to do with the SML-built-in
haftmann@38450
   232
  notion of \qt{bool}.  This results in less readable code;
haftmann@38450
   233
  additionally, eager evaluation may cause programs to loop or break
haftmann@38450
   234
  which would perfectly terminate when the existing SML @{verbatim
haftmann@38450
   235
  "bool"} would be used.  To map the HOL @{typ bool} on SML @{verbatim
haftmann@38450
   236
  "bool"}, we may use \qn{custom serialisations}:
haftmann@59377
   237
\<close>
haftmann@28419
   238
haftmann@52378
   239
code_printing %quotett
haftmann@52378
   240
  type_constructor bool \<rightharpoonup> (SML) "bool"
haftmann@52378
   241
| constant True \<rightharpoonup> (SML) "true"
haftmann@52378
   242
| constant False \<rightharpoonup> (SML) "false"
haftmann@52378
   243
| constant HOL.conj \<rightharpoonup> (SML) "_ andalso _"
haftmann@28213
   244
haftmann@59377
   245
text \<open>
haftmann@52378
   246
  \noindent The @{command_def code_printing} command takes a series
haftmann@52378
   247
  of symbols (contants, type constructor, \ldots)
haftmann@52378
   248
  together with target-specific custom serialisations.  Each
haftmann@38450
   249
  custom serialisation starts with a target language identifier
haftmann@38450
   250
  followed by an expression, which during code serialisation is
haftmann@52378
   251
  inserted whenever the type constructor would occur.  Each
haftmann@38450
   252
  ``@{verbatim "_"}'' in a serialisation expression is treated as a
haftmann@52378
   253
  placeholder for the constant's or the type constructor's arguments.
haftmann@59377
   254
\<close>
haftmann@28419
   255
haftmann@59377
   256
text %quotetypewriter \<open>
haftmann@39683
   257
  @{code_stmts in_interval (SML)}
haftmann@59377
   258
\<close>
haftmann@28419
   259
haftmann@59377
   260
text \<open>
haftmann@38450
   261
  \noindent This still is not perfect: the parentheses around the
haftmann@38450
   262
  \qt{andalso} expression are superfluous.  Though the serialiser by
haftmann@38450
   263
  no means attempts to imitate the rich Isabelle syntax framework, it
haftmann@38450
   264
  provides some common idioms, notably associative infixes with
haftmann@38450
   265
  precedences which may be used here:
haftmann@59377
   266
\<close>
haftmann@28419
   267
haftmann@52378
   268
code_printing %quotett
haftmann@52378
   269
  constant HOL.conj \<rightharpoonup> (SML) infixl 1 "andalso"
haftmann@28419
   270
haftmann@59377
   271
text %quotetypewriter \<open>
haftmann@39683
   272
  @{code_stmts in_interval (SML)}
haftmann@59377
   273
\<close>
haftmann@28419
   274
haftmann@59377
   275
text \<open>
haftmann@38450
   276
  \noindent The attentive reader may ask how we assert that no
haftmann@38450
   277
  generated code will accidentally overwrite.  For this reason the
haftmann@38450
   278
  serialiser has an internal table of identifiers which have to be
haftmann@38450
   279
  avoided to be used for new declarations.  Initially, this table
haftmann@38450
   280
  typically contains the keywords of the target language.  It can be
haftmann@38450
   281
  extended manually, thus avoiding accidental overwrites, using the
haftmann@38505
   282
  @{command_def "code_reserved"} command:
haftmann@59377
   283
\<close>
haftmann@28561
   284
haftmann@40351
   285
code_reserved %quote "\<SMLdummy>" bool true false andalso
haftmann@28561
   286
haftmann@59377
   287
text \<open>
haftmann@28447
   288
  \noindent Next, we try to map HOL pairs to SML pairs, using the
haftmann@28419
   289
  infix ``@{verbatim "*"}'' type constructor and parentheses:
haftmann@59377
   290
\<close>
haftmann@28447
   291
(*<*)
haftmann@52378
   292
code_printing %invisible
haftmann@52378
   293
  type_constructor prod \<rightharpoonup> (SML)
haftmann@52378
   294
| constant Pair \<rightharpoonup> (SML)
haftmann@28447
   295
(*>*)
haftmann@52378
   296
code_printing %quotett
haftmann@52378
   297
  type_constructor prod \<rightharpoonup> (SML) infix 2 "*"
haftmann@52378
   298
| constant Pair \<rightharpoonup> (SML) "!((_),/ (_))"
haftmann@28419
   299
haftmann@59377
   300
text \<open>
haftmann@28593
   301
  \noindent The initial bang ``@{verbatim "!"}'' tells the serialiser
haftmann@38450
   302
  never to put parentheses around the whole expression (they are
haftmann@38450
   303
  already present), while the parentheses around argument place
haftmann@38450
   304
  holders tell not to put parentheses around the arguments.  The slash
haftmann@38450
   305
  ``@{verbatim "/"}'' (followed by arbitrary white space) inserts a
haftmann@38450
   306
  space which may be used as a break if necessary during pretty
haftmann@38450
   307
  printing.
haftmann@28419
   308
haftmann@38450
   309
  These examples give a glimpse what mechanisms custom serialisations
haftmann@38450
   310
  provide; however their usage requires careful thinking in order not
haftmann@38450
   311
  to introduce inconsistencies -- or, in other words: custom
haftmann@38450
   312
  serialisations are completely axiomatic.
haftmann@28419
   313
haftmann@39643
   314
  A further noteworthy detail is that any special character in a
haftmann@38450
   315
  custom serialisation may be quoted using ``@{verbatim "'"}''; thus,
haftmann@38450
   316
  in ``@{verbatim "fn '_ => _"}'' the first ``@{verbatim "_"}'' is a
haftmann@38450
   317
  proper underscore while the second ``@{verbatim "_"}'' is a
haftmann@38450
   318
  placeholder.
haftmann@59377
   319
\<close>
haftmann@28419
   320
haftmann@28419
   321
haftmann@59377
   322
subsection \<open>@{text Haskell} serialisation\<close>
haftmann@28419
   323
haftmann@59377
   324
text \<open>
haftmann@38450
   325
  For convenience, the default @{text HOL} setup for @{text Haskell}
haftmann@39063
   326
  maps the @{class equal} class to its counterpart in @{text Haskell},
haftmann@52378
   327
  giving custom serialisations for the class @{class equal}
haftmann@52378
   328
  and its operation @{const [source] HOL.equal}.
haftmann@59377
   329
\<close>
haftmann@28419
   330
haftmann@52378
   331
code_printing %quotett
haftmann@52378
   332
  type_class equal \<rightharpoonup> (Haskell) "Eq"
haftmann@52378
   333
| constant HOL.equal \<rightharpoonup> (Haskell) infixl 4 "=="
haftmann@28419
   334
haftmann@59377
   335
text \<open>
haftmann@38450
   336
  \noindent A problem now occurs whenever a type which is an instance
haftmann@39063
   337
  of @{class equal} in @{text HOL} is mapped on a @{text
haftmann@38450
   338
  Haskell}-built-in type which is also an instance of @{text Haskell}
haftmann@38450
   339
  @{text Eq}:
haftmann@59377
   340
\<close>
haftmann@28419
   341
haftmann@28564
   342
typedecl %quote bar
haftmann@28419
   343
haftmann@39063
   344
instantiation %quote bar :: equal
haftmann@28419
   345
begin
haftmann@28419
   346
wenzelm@61076
   347
definition %quote "HOL.equal (x::bar) y \<longleftrightarrow> x = y"
haftmann@28419
   348
wenzelm@61169
   349
instance %quote by standard (simp add: equal_bar_def)
haftmann@28213
   350
haftmann@30880
   351
end %quote (*<*)
haftmann@30880
   352
haftmann@52378
   353
(*>*) code_printing %quotett
haftmann@52378
   354
  type_constructor bar \<rightharpoonup> (Haskell) "Integer"
haftmann@28419
   355
haftmann@59377
   356
text \<open>
haftmann@38450
   357
  \noindent The code generator would produce an additional instance,
haftmann@38450
   358
  which of course is rejected by the @{text Haskell} compiler.  To
haftmann@52378
   359
  suppress this additional instance:
haftmann@59377
   360
\<close>
haftmann@28419
   361
haftmann@52378
   362
code_printing %quotett
haftmann@52378
   363
  class_instance bar :: "HOL.equal" \<rightharpoonup> (Haskell) -
haftmann@28419
   364
haftmann@28561
   365
haftmann@59377
   366
subsection \<open>Enhancing the target language context \label{sec:include}\<close>
haftmann@28561
   367
haftmann@59377
   368
text \<open>
haftmann@28593
   369
  In rare cases it is necessary to \emph{enrich} the context of a
haftmann@52378
   370
  target language; this can also be accomplished using the @{command
haftmann@52378
   371
  "code_printing"} command:
haftmann@59377
   372
\<close>
haftmann@28561
   373
haftmann@52378
   374
code_printing %quotett
haftmann@59379
   375
  code_module "Errno" \<rightharpoonup> (Haskell)
haftmann@59379
   376
    \<open>errno i = error ("Error number: " ++ show i)\<close>
haftmann@28561
   377
haftmann@39745
   378
code_reserved %quotett Haskell Errno
haftmann@28561
   379
haftmann@59377
   380
text \<open>
haftmann@52378
   381
  \noindent Such named modules are then prepended to every
haftmann@38450
   382
  generated code.  Inspect such code in order to find out how
haftmann@52378
   383
  this behaves with respect to a particular
haftmann@38450
   384
  target language.
haftmann@59377
   385
\<close>
haftmann@28561
   386
haftmann@28419
   387
end
haftmann@46519
   388