doc-src/IsarAdvanced/Codegen/Thy/Adaption.thy
changeset 30228 2aaf339fb7c1
parent 30224 79136ce06bdb
parent 30227 853abb4853cc
child 30229 9861257b18e6
child 30243 09d5944e224e
equal deleted inserted replaced
30224:79136ce06bdb 30228:2aaf339fb7c1
     1 theory Adaption
       
     2 imports Setup
       
     3 begin
       
     4 
       
     5 setup %invisible {* Code_Target.extend_target ("\<SML>", ("SML", K I)) *}
       
     6 
       
     7 section {* Adaption to target languages \label{sec:adaption} *}
       
     8 
       
     9 subsection {* Adapting code generation *}
       
    10 
       
    11 text {*
       
    12   The aspects of code generation introduced so far have two aspects
       
    13   in common:
       
    14 
       
    15   \begin{itemize}
       
    16     \item They act uniformly, without reference to a specific
       
    17        target language.
       
    18     \item They are \emph{safe} in the sense that as long as you trust
       
    19        the code generator meta theory and implementation, you cannot
       
    20        produce programs that yield results which are not derivable
       
    21        in the logic.
       
    22   \end{itemize}
       
    23 
       
    24   \noindent In this section we will introduce means to \emph{adapt} the serialiser
       
    25   to a specific target language, i.e.~to print program fragments
       
    26   in a way which accommodates \qt{already existing} ingredients of
       
    27   a target language environment, for three reasons:
       
    28 
       
    29   \begin{itemize}
       
    30     \item improving readability and aesthetics of generated code
       
    31     \item gaining efficiency
       
    32     \item interface with language parts which have no direct counterpart
       
    33       in @{text "HOL"} (say, imperative data structures)
       
    34   \end{itemize}
       
    35 
       
    36   \noindent Generally, you should avoid using those features yourself
       
    37   \emph{at any cost}:
       
    38 
       
    39   \begin{itemize}
       
    40     \item The safe configuration methods act uniformly on every target language,
       
    41       whereas for adaption you have to treat each target language separate.
       
    42     \item Application is extremely tedious since there is no abstraction
       
    43       which would allow for a static check, making it easy to produce garbage.
       
    44     \item More or less subtle errors can be introduced unconsciously.
       
    45   \end{itemize}
       
    46 
       
    47   \noindent However, even if you ought refrain from setting up adaption
       
    48   yourself, already the @{text "HOL"} comes with some reasonable default
       
    49   adaptions (say, using target language list syntax).  There also some
       
    50   common adaption cases which you can setup by importing particular
       
    51   library theories.  In order to understand these, we provide some clues here;
       
    52   these however are not supposed to replace a careful study of the sources.
       
    53 *}
       
    54 
       
    55 subsection {* The adaption principle *}
       
    56 
       
    57 text {*
       
    58   The following figure illustrates what \qt{adaption} is conceptually
       
    59   supposed to be:
       
    60 
       
    61   \begin{figure}[here]
       
    62     \begin{tikzpicture}[scale = 0.5]
       
    63       \tikzstyle water=[color = blue, thick]
       
    64       \tikzstyle ice=[color = black, very thick, cap = round, join = round, fill = white]
       
    65       \tikzstyle process=[color = green, semithick, ->]
       
    66       \tikzstyle adaption=[color = red, semithick, ->]
       
    67       \tikzstyle target=[color = black]
       
    68       \foreach \x in {0, ..., 24}
       
    69         \draw[style=water] (\x, 0.25) sin + (0.25, 0.25) cos + (0.25, -0.25) sin
       
    70           + (0.25, -0.25) cos + (0.25, 0.25);
       
    71       \draw[style=ice] (1, 0) --
       
    72         (3, 6) node[above, fill=white] {logic} -- (5, 0) -- cycle;
       
    73       \draw[style=ice] (9, 0) --
       
    74         (11, 6) node[above, fill=white] {intermediate language} -- (13, 0) -- cycle;
       
    75       \draw[style=ice] (15, -6) --
       
    76         (19, 6) node[above, fill=white] {target language} -- (23, -6) -- cycle;
       
    77       \draw[style=process]
       
    78         (3.5, 3) .. controls (7, 5) .. node[fill=white] {translation} (10.5, 3);
       
    79       \draw[style=process]
       
    80         (11.5, 3) .. controls (15, 5) .. node[fill=white] (serialisation) {serialisation} (18.5, 3);
       
    81       \node (adaption) at (11, -2) [style=adaption] {adaption};
       
    82       \node at (19, 3) [rotate=90] {generated};
       
    83       \node at (19.5, -5) {language};
       
    84       \node at (19.5, -3) {library};
       
    85       \node (includes) at (19.5, -1) {includes};
       
    86       \node (reserved) at (16.5, -3) [rotate=72] {reserved}; % proper 71.57
       
    87       \draw[style=process]
       
    88         (includes) -- (serialisation);
       
    89       \draw[style=process]
       
    90         (reserved) -- (serialisation);
       
    91       \draw[style=adaption]
       
    92         (adaption) -- (serialisation);
       
    93       \draw[style=adaption]
       
    94         (adaption) -- (includes);
       
    95       \draw[style=adaption]
       
    96         (adaption) -- (reserved);
       
    97     \end{tikzpicture}
       
    98     \caption{The adaption principle}
       
    99     \label{fig:adaption}
       
   100   \end{figure}
       
   101 
       
   102   \noindent In the tame view, code generation acts as broker between
       
   103   @{text logic}, @{text "intermediate language"} and
       
   104   @{text "target language"} by means of @{text translation} and
       
   105   @{text serialisation};  for the latter, the serialiser has to observe
       
   106   the structure of the @{text language} itself plus some @{text reserved}
       
   107   keywords which have to be avoided for generated code.
       
   108   However, if you consider @{text adaption} mechanisms, the code generated
       
   109   by the serializer is just the tip of the iceberg:
       
   110 
       
   111   \begin{itemize}
       
   112     \item @{text serialisation} can be \emph{parametrised} such that
       
   113       logical entities are mapped to target-specific ones
       
   114       (e.g. target-specific list syntax,
       
   115         see also \secref{sec:adaption_mechanisms})
       
   116     \item Such parametrisations can involve references to a
       
   117       target-specific standard @{text library} (e.g. using
       
   118       the @{text Haskell} @{verbatim Maybe} type instead
       
   119       of the @{text HOL} @{type "option"} type);
       
   120       if such are used, the corresponding identifiers
       
   121       (in our example, @{verbatim Maybe}, @{verbatim Nothing}
       
   122       and @{verbatim Just}) also have to be considered @{text reserved}.
       
   123     \item Even more, the user can enrich the library of the
       
   124       target-language by providing code snippets
       
   125       (\qt{@{text "includes"}}) which are prepended to
       
   126       any generated code (see \secref{sec:include});  this typically
       
   127       also involves further @{text reserved} identifiers.
       
   128   \end{itemize}
       
   129 
       
   130   \noindent As figure \ref{fig:adaption} illustrates, all these adaption mechanisms
       
   131   have to act consistently;  it is at the discretion of the user
       
   132   to take care for this.
       
   133 *}
       
   134 
       
   135 subsection {* Common adaption patterns *}
       
   136 
       
   137 text {*
       
   138   The @{theory HOL} @{theory Main} theory already provides a code
       
   139   generator setup
       
   140   which should be suitable for most applications.  Common extensions
       
   141   and modifications are available by certain theories of the @{text HOL}
       
   142   library; beside being useful in applications, they may serve
       
   143   as a tutorial for customising the code generator setup (see below
       
   144   \secref{sec:adaption_mechanisms}).
       
   145 
       
   146   \begin{description}
       
   147 
       
   148     \item[@{theory "Code_Integer"}] represents @{text HOL} integers by big
       
   149        integer literals in target languages.
       
   150     \item[@{theory "Code_Char"}] represents @{text HOL} characters by 
       
   151        character literals in target languages.
       
   152     \item[@{theory "Code_Char_chr"}] like @{text "Code_Char"},
       
   153        but also offers treatment of character codes; includes
       
   154        @{theory "Code_Char"}.
       
   155     \item[@{theory "Efficient_Nat"}] \label{eff_nat} implements natural numbers by integers,
       
   156        which in general will result in higher efficiency; pattern
       
   157        matching with @{term "0\<Colon>nat"} / @{const "Suc"}
       
   158        is eliminated;  includes @{theory "Code_Integer"}
       
   159        and @{theory "Code_Index"}.
       
   160     \item[@{theory "Code_Index"}] provides an additional datatype
       
   161        @{typ index} which is mapped to target-language built-in integers.
       
   162        Useful for code setups which involve e.g. indexing of
       
   163        target-language arrays.
       
   164     \item[@{theory "Code_Message"}] provides an additional datatype
       
   165        @{typ message_string} which is isomorphic to strings;
       
   166        @{typ message_string}s are mapped to target-language strings.
       
   167        Useful for code setups which involve e.g. printing (error) messages.
       
   168 
       
   169   \end{description}
       
   170 
       
   171   \begin{warn}
       
   172     When importing any of these theories, they should form the last
       
   173     items in an import list.  Since these theories adapt the
       
   174     code generator setup in a non-conservative fashion,
       
   175     strange effects may occur otherwise.
       
   176   \end{warn}
       
   177 *}
       
   178 
       
   179 
       
   180 subsection {* Parametrising serialisation \label{sec:adaption_mechanisms} *}
       
   181 
       
   182 text {*
       
   183   Consider the following function and its corresponding
       
   184   SML code:
       
   185 *}
       
   186 
       
   187 primrec %quote in_interval :: "nat \<times> nat \<Rightarrow> nat \<Rightarrow> bool" where
       
   188   "in_interval (k, l) n \<longleftrightarrow> k \<le> n \<and> n \<le> l"
       
   189 (*<*)
       
   190 code_type %invisible bool
       
   191   (SML)
       
   192 code_const %invisible True and False and "op \<and>" and Not
       
   193   (SML and and and)
       
   194 (*>*)
       
   195 text %quote {*@{code_stmts in_interval (SML)}*}
       
   196 
       
   197 text {*
       
   198   \noindent Though this is correct code, it is a little bit unsatisfactory:
       
   199   boolean values and operators are materialised as distinguished
       
   200   entities with have nothing to do with the SML-built-in notion
       
   201   of \qt{bool}.  This results in less readable code;
       
   202   additionally, eager evaluation may cause programs to
       
   203   loop or break which would perfectly terminate when
       
   204   the existing SML @{verbatim "bool"} would be used.  To map
       
   205   the HOL @{typ bool} on SML @{verbatim "bool"}, we may use
       
   206   \qn{custom serialisations}:
       
   207 *}
       
   208 
       
   209 code_type %quotett bool
       
   210   (SML "bool")
       
   211 code_const %quotett True and False and "op \<and>"
       
   212   (SML "true" and "false" and "_ andalso _")
       
   213 
       
   214 text {*
       
   215   \noindent The @{command code_type} command takes a type constructor
       
   216   as arguments together with a list of custom serialisations.
       
   217   Each custom serialisation starts with a target language
       
   218   identifier followed by an expression, which during
       
   219   code serialisation is inserted whenever the type constructor
       
   220   would occur.  For constants, @{command code_const} implements
       
   221   the corresponding mechanism.  Each ``@{verbatim "_"}'' in
       
   222   a serialisation expression is treated as a placeholder
       
   223   for the type constructor's (the constant's) arguments.
       
   224 *}
       
   225 
       
   226 text %quote {*@{code_stmts in_interval (SML)}*}
       
   227 
       
   228 text {*
       
   229   \noindent This still is not perfect: the parentheses
       
   230   around the \qt{andalso} expression are superfluous.
       
   231   Though the serialiser
       
   232   by no means attempts to imitate the rich Isabelle syntax
       
   233   framework, it provides some common idioms, notably
       
   234   associative infixes with precedences which may be used here:
       
   235 *}
       
   236 
       
   237 code_const %quotett "op \<and>"
       
   238   (SML infixl 1 "andalso")
       
   239 
       
   240 text %quote {*@{code_stmts in_interval (SML)}*}
       
   241 
       
   242 text {*
       
   243   \noindent The attentive reader may ask how we assert that no generated
       
   244   code will accidentally overwrite.  For this reason the serialiser has
       
   245   an internal table of identifiers which have to be avoided to be used
       
   246   for new declarations.  Initially, this table typically contains the
       
   247   keywords of the target language.  It can be extended manually, thus avoiding
       
   248   accidental overwrites, using the @{command "code_reserved"} command:
       
   249 *}
       
   250 
       
   251 code_reserved %quote "\<SML>" bool true false andalso
       
   252 
       
   253 text {*
       
   254   \noindent Next, we try to map HOL pairs to SML pairs, using the
       
   255   infix ``@{verbatim "*"}'' type constructor and parentheses:
       
   256 *}
       
   257 (*<*)
       
   258 code_type %invisible *
       
   259   (SML)
       
   260 code_const %invisible Pair
       
   261   (SML)
       
   262 (*>*)
       
   263 code_type %quotett *
       
   264   (SML infix 2 "*")
       
   265 code_const %quotett Pair
       
   266   (SML "!((_),/ (_))")
       
   267 
       
   268 text {*
       
   269   \noindent The initial bang ``@{verbatim "!"}'' tells the serialiser
       
   270   never to put
       
   271   parentheses around the whole expression (they are already present),
       
   272   while the parentheses around argument place holders
       
   273   tell not to put parentheses around the arguments.
       
   274   The slash ``@{verbatim "/"}'' (followed by arbitrary white space)
       
   275   inserts a space which may be used as a break if necessary
       
   276   during pretty printing.
       
   277 
       
   278   These examples give a glimpse what mechanisms
       
   279   custom serialisations provide; however their usage
       
   280   requires careful thinking in order not to introduce
       
   281   inconsistencies -- or, in other words:
       
   282   custom serialisations are completely axiomatic.
       
   283 
       
   284   A further noteworthy details is that any special
       
   285   character in a custom serialisation may be quoted
       
   286   using ``@{verbatim "'"}''; thus, in
       
   287   ``@{verbatim "fn '_ => _"}'' the first
       
   288   ``@{verbatim "_"}'' is a proper underscore while the
       
   289   second ``@{verbatim "_"}'' is a placeholder.
       
   290 *}
       
   291 
       
   292 
       
   293 subsection {* @{text Haskell} serialisation *}
       
   294 
       
   295 text {*
       
   296   For convenience, the default
       
   297   @{text HOL} setup for @{text Haskell} maps the @{class eq} class to
       
   298   its counterpart in @{text Haskell}, giving custom serialisations
       
   299   for the class @{class eq} (by command @{command code_class}) and its operation
       
   300   @{const HOL.eq}
       
   301 *}
       
   302 
       
   303 code_class %quotett eq
       
   304   (Haskell "Eq")
       
   305 
       
   306 code_const %quotett "op ="
       
   307   (Haskell infixl 4 "==")
       
   308 
       
   309 text {*
       
   310   \noindent A problem now occurs whenever a type which
       
   311   is an instance of @{class eq} in @{text HOL} is mapped
       
   312   on a @{text Haskell}-built-in type which is also an instance
       
   313   of @{text Haskell} @{text Eq}:
       
   314 *}
       
   315 
       
   316 typedecl %quote bar
       
   317 
       
   318 instantiation %quote bar :: eq
       
   319 begin
       
   320 
       
   321 definition %quote "eq_class.eq (x\<Colon>bar) y \<longleftrightarrow> x = y"
       
   322 
       
   323 instance %quote by default (simp add: eq_bar_def)
       
   324 
       
   325 end %quote
       
   326 
       
   327 code_type %quotett bar
       
   328   (Haskell "Integer")
       
   329 
       
   330 text {*
       
   331   \noindent The code generator would produce
       
   332   an additional instance, which of course is rejected by the @{text Haskell}
       
   333   compiler.
       
   334   To suppress this additional instance, use
       
   335   @{text "code_instance"}:
       
   336 *}
       
   337 
       
   338 code_instance %quotett bar :: eq
       
   339   (Haskell -)
       
   340 
       
   341 
       
   342 subsection {* Enhancing the target language context \label{sec:include} *}
       
   343 
       
   344 text {*
       
   345   In rare cases it is necessary to \emph{enrich} the context of a
       
   346   target language;  this is accomplished using the @{command "code_include"}
       
   347   command:
       
   348 *}
       
   349 
       
   350 code_include %quotett Haskell "Errno"
       
   351 {*errno i = error ("Error number: " ++ show i)*}
       
   352 
       
   353 code_reserved %quotett Haskell Errno
       
   354 
       
   355 text {*
       
   356   \noindent Such named @{text include}s are then prepended to every generated code.
       
   357   Inspect such code in order to find out how @{command "code_include"} behaves
       
   358   with respect to a particular target language.
       
   359 *}
       
   360 
       
   361 end