src/Doc/Codegen/Adaptation.thy
author haftmann
Thu Feb 05 13:01:12 2015 +0100 (2015-02-05)
changeset 59482 9b590e32646a
parent 59379 c7f6f01ede15
child 60768 f47bd91fdc75
permissions -rw-r--r--
more explicit hint on import order
     1 theory Adaptation
     2 imports Setup
     3 begin
     4 
     5 setup %invisible \<open>Code_Target.add_derived_target ("\<SML>", [("SML", I)])
     6   #> Code_Target.add_derived_target ("\<SMLdummy>", [("Haskell", I)])\<close>
     7 
     8 section \<open>Adaptation to target languages \label{sec:adaptation}\<close>
     9 
    10 subsection \<open>Adapting code generation\<close>
    11 
    12 text \<open>
    13   The aspects of code generation introduced so far have two aspects
    14   in common:
    15 
    16   \begin{itemize}
    17 
    18     \item They act uniformly, without reference to a specific target
    19        language.
    20 
    21     \item They are \emph{safe} in the sense that as long as you trust
    22        the code generator meta theory and implementation, you cannot
    23        produce programs that yield results which are not derivable in
    24        the logic.
    25 
    26   \end{itemize}
    27 
    28   \noindent In this section we will introduce means to \emph{adapt}
    29   the serialiser to a specific target language, i.e.~to print program
    30   fragments in a way which accommodates \qt{already existing}
    31   ingredients of a target language environment, for three reasons:
    32 
    33   \begin{itemize}
    34     \item improving readability and aesthetics of generated code
    35     \item gaining efficiency
    36     \item interface with language parts which have no direct counterpart
    37       in @{text "HOL"} (say, imperative data structures)
    38   \end{itemize}
    39 
    40   \noindent Generally, you should avoid using those features yourself
    41   \emph{at any cost}:
    42 
    43   \begin{itemize}
    44 
    45     \item The safe configuration methods act uniformly on every target
    46       language, whereas for adaptation you have to treat each target
    47       language separately.
    48 
    49     \item Application is extremely tedious since there is no
    50       abstraction which would allow for a static check, making it easy
    51       to produce garbage.
    52 
    53     \item Subtle errors can be introduced unconsciously.
    54 
    55   \end{itemize}
    56 
    57   \noindent However, even if you ought refrain from setting up
    58   adaptation yourself, already @{text "HOL"} comes with some
    59   reasonable default adaptations (say, using target language list
    60   syntax).  There also some common adaptation cases which you can
    61   setup by importing particular library theories.  In order to
    62   understand these, we provide some clues here; these however are not
    63   supposed to replace a careful study of the sources.
    64 \<close>
    65 
    66 
    67 subsection \<open>The adaptation principle\<close>
    68 
    69 text \<open>
    70   Figure \ref{fig:adaptation} illustrates what \qt{adaptation} is
    71   conceptually supposed to be:
    72 
    73   \begin{figure}[here]
    74     \begin{tikzpicture}[scale = 0.5]
    75       \tikzstyle water=[color = blue, thick]
    76       \tikzstyle ice=[color = black, very thick, cap = round, join = round, fill = white]
    77       \tikzstyle process=[color = green, semithick, ->]
    78       \tikzstyle adaptation=[color = red, semithick, ->]
    79       \tikzstyle target=[color = black]
    80       \foreach \x in {0, ..., 24}
    81         \draw[style=water] (\x, 0.25) sin + (0.25, 0.25) cos + (0.25, -0.25) sin
    82           + (0.25, -0.25) cos + (0.25, 0.25);
    83       \draw[style=ice] (1, 0) --
    84         (3, 6) node[above, fill=white] {logic} -- (5, 0) -- cycle;
    85       \draw[style=ice] (9, 0) --
    86         (11, 6) node[above, fill=white] {intermediate language} -- (13, 0) -- cycle;
    87       \draw[style=ice] (15, -6) --
    88         (19, 6) node[above, fill=white] {target language} -- (23, -6) -- cycle;
    89       \draw[style=process]
    90         (3.5, 3) .. controls (7, 5) .. node[fill=white] {translation} (10.5, 3);
    91       \draw[style=process]
    92         (11.5, 3) .. controls (15, 5) .. node[fill=white] (serialisation) {serialisation} (18.5, 3);
    93       \node (adaptation) at (11, -2) [style=adaptation] {adaptation};
    94       \node at (19, 3) [rotate=90] {generated};
    95       \node at (19.5, -5) {language};
    96       \node at (19.5, -3) {library};
    97       \node (includes) at (19.5, -1) {includes};
    98       \node (reserved) at (16.5, -3) [rotate=72] {reserved}; % proper 71.57
    99       \draw[style=process]
   100         (includes) -- (serialisation);
   101       \draw[style=process]
   102         (reserved) -- (serialisation);
   103       \draw[style=adaptation]
   104         (adaptation) -- (serialisation);
   105       \draw[style=adaptation]
   106         (adaptation) -- (includes);
   107       \draw[style=adaptation]
   108         (adaptation) -- (reserved);
   109     \end{tikzpicture}
   110     \caption{The adaptation principle}
   111     \label{fig:adaptation}
   112   \end{figure}
   113 
   114   \noindent In the tame view, code generation acts as broker between
   115   @{text logic}, @{text "intermediate language"} and @{text "target
   116   language"} by means of @{text translation} and @{text
   117   serialisation}; for the latter, the serialiser has to observe the
   118   structure of the @{text language} itself plus some @{text reserved}
   119   keywords which have to be avoided for generated code.  However, if
   120   you consider @{text adaptation} mechanisms, the code generated by
   121   the serializer is just the tip of the iceberg:
   122 
   123   \begin{itemize}
   124 
   125     \item @{text serialisation} can be \emph{parametrised} such that
   126       logical entities are mapped to target-specific ones
   127       (e.g. target-specific list syntax, see also
   128       \secref{sec:adaptation_mechanisms})
   129 
   130     \item Such parametrisations can involve references to a
   131       target-specific standard @{text library} (e.g. using the @{text
   132       Haskell} @{verbatim Maybe} type instead of the @{text HOL}
   133       @{type "option"} type); if such are used, the corresponding
   134       identifiers (in our example, @{verbatim Maybe}, @{verbatim
   135       Nothing} and @{verbatim Just}) also have to be considered @{text
   136       reserved}.
   137 
   138     \item Even more, the user can enrich the library of the
   139       target-language by providing code snippets (\qt{@{text
   140       "includes"}}) which are prepended to any generated code (see
   141       \secref{sec:include}); this typically also involves further
   142       @{text reserved} identifiers.
   143 
   144   \end{itemize}
   145 
   146   \noindent As figure \ref{fig:adaptation} illustrates, all these
   147   adaptation mechanisms have to act consistently; it is at the
   148   discretion of the user to take care for this.
   149 \<close>
   150 
   151 subsection \<open>Common adaptation applications\<close>
   152 
   153 text \<open>
   154   The @{theory HOL} @{theory Main} theory already provides a code
   155   generator setup which should be suitable for most applications.
   156   Common extensions and modifications are available by certain
   157   theories in @{file "~~/src/HOL/Library"}; beside being useful in
   158   applications, they may serve as a tutorial for customising the code
   159   generator setup (see below \secref{sec:adaptation_mechanisms}).
   160 
   161   \begin{description}
   162 
   163     \item[@{theory "Code_Numeral"}] provides additional numeric
   164        types @{typ integer} and @{typ natural} isomorphic to types
   165        @{typ int} and @{typ nat} respectively.  Type @{typ integer}
   166        is mapped to target-language built-in integers; @{typ natural}
   167        is implemented as abstract type over @{typ integer}.
   168        Useful for code setups which involve e.g.~indexing
   169        of target-language arrays.  Part of @{text "HOL-Main"}.
   170 
   171     \item[@{text "Code_Target_Int"}] implements type @{typ int}
   172        by @{typ integer} and thus by target-language built-in integers.
   173 
   174     \item[@{text "Code_Binary_Nat"}] implements type
   175        @{typ nat} using a binary rather than a linear representation,
   176        which yields a considerable speedup for computations.
   177        Pattern matching with @{term "0\<Colon>nat"} / @{const "Suc"} is eliminated
   178        by a preprocessor.\label{abstract_nat}
   179 
   180     \item[@{text "Code_Target_Nat"}] implements type @{typ nat}
   181        by @{typ integer} and thus by target-language built-in integers.
   182        Pattern matching with @{term "0\<Colon>nat"} / @{const "Suc"} is eliminated
   183        by a preprocessor.
   184 
   185     \item[@{text "Code_Target_Numeral"}] is a convenience theory
   186        containing both @{text "Code_Target_Nat"} and
   187        @{text "Code_Target_Int"}.
   188 
   189     \item[@{theory "String"}] provides an additional datatype @{typ
   190        String.literal} which is isomorphic to strings; @{typ
   191        String.literal}s are mapped to target-language strings.  Useful
   192        for code setups which involve e.g.~printing (error) messages.
   193        Part of @{text "HOL-Main"}.
   194 
   195     \item[@{text "Code_Char"}] represents @{text HOL} characters by
   196        character literals in target languages.  \emph{Warning:} This
   197        modifies adaptation in a non-conservative manner and thus
   198        should always be imported \emph{last} in a theory header.
   199 
   200     \item[@{theory "IArray"}] provides a type @{typ "'a iarray"}
   201        isomorphic to lists but implemented by (effectively immutable)
   202        arrays \emph{in SML only}.
   203 
   204   \end{description}
   205 \<close>
   206 
   207 
   208 subsection \<open>Parametrising serialisation \label{sec:adaptation_mechanisms}\<close>
   209 
   210 text \<open>
   211   Consider the following function and its corresponding SML code:
   212 \<close>
   213 
   214 primrec %quote in_interval :: "nat \<times> nat \<Rightarrow> nat \<Rightarrow> bool" where
   215   "in_interval (k, l) n \<longleftrightarrow> k \<le> n \<and> n \<le> l"
   216 (*<*)
   217 code_printing %invisible
   218   type_constructor bool \<rightharpoonup> (SML)
   219 | constant True \<rightharpoonup> (SML)
   220 | constant False \<rightharpoonup> (SML)
   221 | constant HOL.conj \<rightharpoonup> (SML)
   222 | constant Not \<rightharpoonup> (SML)
   223 (*>*)
   224 text %quotetypewriter \<open>
   225   @{code_stmts in_interval (SML)}
   226 \<close>
   227 
   228 text \<open>
   229   \noindent Though this is correct code, it is a little bit
   230   unsatisfactory: boolean values and operators are materialised as
   231   distinguished entities with have nothing to do with the SML-built-in
   232   notion of \qt{bool}.  This results in less readable code;
   233   additionally, eager evaluation may cause programs to loop or break
   234   which would perfectly terminate when the existing SML @{verbatim
   235   "bool"} would be used.  To map the HOL @{typ bool} on SML @{verbatim
   236   "bool"}, we may use \qn{custom serialisations}:
   237 \<close>
   238 
   239 code_printing %quotett
   240   type_constructor bool \<rightharpoonup> (SML) "bool"
   241 | constant True \<rightharpoonup> (SML) "true"
   242 | constant False \<rightharpoonup> (SML) "false"
   243 | constant HOL.conj \<rightharpoonup> (SML) "_ andalso _"
   244 
   245 text \<open>
   246   \noindent The @{command_def code_printing} command takes a series
   247   of symbols (contants, type constructor, \ldots)
   248   together with target-specific custom serialisations.  Each
   249   custom serialisation starts with a target language identifier
   250   followed by an expression, which during code serialisation is
   251   inserted whenever the type constructor would occur.  Each
   252   ``@{verbatim "_"}'' in a serialisation expression is treated as a
   253   placeholder for the constant's or the type constructor's arguments.
   254 \<close>
   255 
   256 text %quotetypewriter \<open>
   257   @{code_stmts in_interval (SML)}
   258 \<close>
   259 
   260 text \<open>
   261   \noindent This still is not perfect: the parentheses around the
   262   \qt{andalso} expression are superfluous.  Though the serialiser by
   263   no means attempts to imitate the rich Isabelle syntax framework, it
   264   provides some common idioms, notably associative infixes with
   265   precedences which may be used here:
   266 \<close>
   267 
   268 code_printing %quotett
   269   constant HOL.conj \<rightharpoonup> (SML) infixl 1 "andalso"
   270 
   271 text %quotetypewriter \<open>
   272   @{code_stmts in_interval (SML)}
   273 \<close>
   274 
   275 text \<open>
   276   \noindent The attentive reader may ask how we assert that no
   277   generated code will accidentally overwrite.  For this reason the
   278   serialiser has an internal table of identifiers which have to be
   279   avoided to be used for new declarations.  Initially, this table
   280   typically contains the keywords of the target language.  It can be
   281   extended manually, thus avoiding accidental overwrites, using the
   282   @{command_def "code_reserved"} command:
   283 \<close>
   284 
   285 code_reserved %quote "\<SMLdummy>" bool true false andalso
   286 
   287 text \<open>
   288   \noindent Next, we try to map HOL pairs to SML pairs, using the
   289   infix ``@{verbatim "*"}'' type constructor and parentheses:
   290 \<close>
   291 (*<*)
   292 code_printing %invisible
   293   type_constructor prod \<rightharpoonup> (SML)
   294 | constant Pair \<rightharpoonup> (SML)
   295 (*>*)
   296 code_printing %quotett
   297   type_constructor prod \<rightharpoonup> (SML) infix 2 "*"
   298 | constant Pair \<rightharpoonup> (SML) "!((_),/ (_))"
   299 
   300 text \<open>
   301   \noindent The initial bang ``@{verbatim "!"}'' tells the serialiser
   302   never to put parentheses around the whole expression (they are
   303   already present), while the parentheses around argument place
   304   holders tell not to put parentheses around the arguments.  The slash
   305   ``@{verbatim "/"}'' (followed by arbitrary white space) inserts a
   306   space which may be used as a break if necessary during pretty
   307   printing.
   308 
   309   These examples give a glimpse what mechanisms custom serialisations
   310   provide; however their usage requires careful thinking in order not
   311   to introduce inconsistencies -- or, in other words: custom
   312   serialisations are completely axiomatic.
   313 
   314   A further noteworthy detail is that any special character in a
   315   custom serialisation may be quoted using ``@{verbatim "'"}''; thus,
   316   in ``@{verbatim "fn '_ => _"}'' the first ``@{verbatim "_"}'' is a
   317   proper underscore while the second ``@{verbatim "_"}'' is a
   318   placeholder.
   319 \<close>
   320 
   321 
   322 subsection \<open>@{text Haskell} serialisation\<close>
   323 
   324 text \<open>
   325   For convenience, the default @{text HOL} setup for @{text Haskell}
   326   maps the @{class equal} class to its counterpart in @{text Haskell},
   327   giving custom serialisations for the class @{class equal}
   328   and its operation @{const [source] HOL.equal}.
   329 \<close>
   330 
   331 code_printing %quotett
   332   type_class equal \<rightharpoonup> (Haskell) "Eq"
   333 | constant HOL.equal \<rightharpoonup> (Haskell) infixl 4 "=="
   334 
   335 text \<open>
   336   \noindent A problem now occurs whenever a type which is an instance
   337   of @{class equal} in @{text HOL} is mapped on a @{text
   338   Haskell}-built-in type which is also an instance of @{text Haskell}
   339   @{text Eq}:
   340 \<close>
   341 
   342 typedecl %quote bar
   343 
   344 instantiation %quote bar :: equal
   345 begin
   346 
   347 definition %quote "HOL.equal (x\<Colon>bar) y \<longleftrightarrow> x = y"
   348 
   349 instance %quote by default (simp add: equal_bar_def)
   350 
   351 end %quote (*<*)
   352 
   353 (*>*) code_printing %quotett
   354   type_constructor bar \<rightharpoonup> (Haskell) "Integer"
   355 
   356 text \<open>
   357   \noindent The code generator would produce an additional instance,
   358   which of course is rejected by the @{text Haskell} compiler.  To
   359   suppress this additional instance:
   360 \<close>
   361 
   362 code_printing %quotett
   363   class_instance bar :: "HOL.equal" \<rightharpoonup> (Haskell) -
   364 
   365 
   366 subsection \<open>Enhancing the target language context \label{sec:include}\<close>
   367 
   368 text \<open>
   369   In rare cases it is necessary to \emph{enrich} the context of a
   370   target language; this can also be accomplished using the @{command
   371   "code_printing"} command:
   372 \<close>
   373 
   374 code_printing %quotett
   375   code_module "Errno" \<rightharpoonup> (Haskell)
   376     \<open>errno i = error ("Error number: " ++ show i)\<close>
   377 
   378 code_reserved %quotett Haskell Errno
   379 
   380 text \<open>
   381   \noindent Such named modules are then prepended to every
   382   generated code.  Inspect such code in order to find out how
   383   this behaves with respect to a particular
   384   target language.
   385 \<close>
   386 
   387 end
   388