author haftmann Mon Nov 03 14:15:25 2008 +0100 (2008-11-03) changeset 28714 1992553cccfe parent 28679 d7384e8e99b3 permissions -rw-r--r--
improved verbatim mechanism
     1 theory Adaption

     2 imports Setup

     3 begin

     4

     5 setup %invisible {* Code_Target.extend_target ("\<SML>", ("SML", K I)) *}

     6

     7 section {* Adaption to target languages \label{sec:adaption} *}

     8

     9 subsection {* Adapting code generation *}

    10

    11 text {*

    12   The aspects of code generation introduced so far have two aspects

    13   in common:

    14

    15   \begin{itemize}

    16     \item They act uniformly, without reference to a specific

    17        target language.

    18     \item They are \emph{safe} in the sense that as long as you trust

    19        the code generator meta theory and implementation, you cannot

    20        produce programs that yield results which are not derivable

    21        in the logic.

    22   \end{itemize}

    23

    24   \noindent In this section we will introduce means to \emph{adapt} the serialiser

    25   to a specific target language, i.e.~to print program fragments

    26   in a way which accommodates \qt{already existing} ingredients of

    27   a target language environment, for three reasons:

    28

    29   \begin{itemize}

    30     \item improving readability and aesthetics of generated code

    31     \item gaining efficiency

    32     \item interface with language parts which have no direct counterpart

    33       in @{text "HOL"} (say, imperative data structures)

    34   \end{itemize}

    35

    36   \noindent Generally, you should avoid using those features yourself

    37   \emph{at any cost}:

    38

    39   \begin{itemize}

    40     \item The safe configuration methods act uniformly on every target language,

    41       whereas for adaption you have to treat each target language separate.

    42     \item Application is extremely tedious since there is no abstraction

    43       which would allow for a static check, making it easy to produce garbage.

    44     \item More or less subtle errors can be introduced unconsciously.

    45   \end{itemize}

    46

    47   \noindent However, even if you ought refrain from setting up adaption

    48   yourself, already the @{text "HOL"} comes with some reasonable default

    49   adaptions (say, using target language list syntax).  There also some

    50   common adaption cases which you can setup by importing particular

    51   library theories.  In order to understand these, we provide some clues here;

    52   these however are not supposed to replace a careful study of the sources.

    53 *}

    54

    55 subsection {* The adaption principle *}

    56

    57 text {*

    58   The following figure illustrates what \qt{adaption} is conceptually

    59   supposed to be:

    60

    61   \begin{figure}[here]

    62     \begin{tikzpicture}[scale = 0.5]

    63       \tikzstyle water=[color = blue, thick]

    64       \tikzstyle ice=[color = black, very thick, cap = round, join = round, fill = white]

    65       \tikzstyle process=[color = green, semithick, ->]

    66       \tikzstyle adaption=[color = red, semithick, ->]

    67       \tikzstyle target=[color = black]

    68       \foreach \x in {0, ..., 24}

    69         \draw[style=water] (\x, 0.25) sin + (0.25, 0.25) cos + (0.25, -0.25) sin

    70           + (0.25, -0.25) cos + (0.25, 0.25);

    71       \draw[style=ice] (1, 0) --

    72         (3, 6) node[above, fill=white] {logic} -- (5, 0) -- cycle;

    73       \draw[style=ice] (9, 0) --

    74         (11, 6) node[above, fill=white] {intermediate language} -- (13, 0) -- cycle;

    75       \draw[style=ice] (15, -6) --

    76         (19, 6) node[above, fill=white] {target language} -- (23, -6) -- cycle;

    77       \draw[style=process]

    78         (3.5, 3) .. controls (7, 5) .. node[fill=white] {translation} (10.5, 3);

    79       \draw[style=process]

    80         (11.5, 3) .. controls (15, 5) .. node[fill=white] (serialisation) {serialisation} (18.5, 3);

    81       \node (adaption) at (11, -2) [style=adaption] {adaption};

    82       \node at (19, 3) [rotate=90] {generated};

    83       \node at (19.5, -5) {language};

    84       \node at (19.5, -3) {library};

    85       \node (includes) at (19.5, -1) {includes};

    86       \node (reserved) at (16.5, -3) [rotate=72] {reserved}; % proper 71.57

    87       \draw[style=process]

    88         (includes) -- (serialisation);

    89       \draw[style=process]

    90         (reserved) -- (serialisation);

    91       \draw[style=adaption]

    92         (adaption) -- (serialisation);

    93       \draw[style=adaption]

    94         (adaption) -- (includes);

    95       \draw[style=adaption]

    96         (adaption) -- (reserved);

    97     \end{tikzpicture}

    98     \caption{The adaption principle}

    99     \label{fig:adaption}

   100   \end{figure}

   101

   102   \noindent In the tame view, code generation acts as broker between

   103   @{text logic}, @{text "intermediate language"} and

   104   @{text "target language"} by means of @{text translation} and

   105   @{text serialisation};  for the latter, the serialiser has to observe

   106   the structure of the @{text language} itself plus some @{text reserved}

   107   keywords which have to be avoided for generated code.

   108   However, if you consider @{text adaption} mechanisms, the code generated

   109   by the serializer is just the tip of the iceberg:

   110

   111   \begin{itemize}

   112     \item @{text serialisation} can be \emph{parametrised} such that

   113       logical entities are mapped to target-specific ones

   114       (e.g. target-specific list syntax,

   115         see also \secref{sec:adaption_mechanisms})

   116     \item Such parametrisations can involve references to a

   117       target-specific standard @{text library} (e.g. using

   118       the @{text Haskell} @{verbatim Maybe} type instead

   119       of the @{text HOL} @{type "option"} type);

   120       if such are used, the corresponding identifiers

   121       (in our example, @{verbatim Maybe}, @{verbatim Nothing}

   122       and @{verbatim Just}) also have to be considered @{text reserved}.

   123     \item Even more, the user can enrich the library of the

   124       target-language by providing code snippets

   125       (\qt{@{text "includes"}}) which are prepended to

   126       any generated code (see \secref{sec:include});  this typically

   127       also involves further @{text reserved} identifiers.

   128   \end{itemize}

   129

   130   \noindent As figure \ref{fig:adaption} illustrates, all these adaption mechanisms

   131   have to act consistently;  it is at the discretion of the user

   132   to take care for this.

   133 *}

   134

   135 subsection {* Common adaption patterns *}

   136

   137 text {*

   138   The @{theory HOL} @{theory Main} theory already provides a code

   139   generator setup

   140   which should be suitable for most applications.  Common extensions

   141   and modifications are available by certain theories of the @{text HOL}

   142   library; beside being useful in applications, they may serve

   143   as a tutorial for customising the code generator setup (see below

   144   \secref{sec:adaption_mechanisms}).

   145

   146   \begin{description}

   147

   148     \item[@{theory "Code_Integer"}] represents @{text HOL} integers by big

   149        integer literals in target languages.

   150     \item[@{theory "Code_Char"}] represents @{text HOL} characters by

   151        character literals in target languages.

   152     \item[@{theory "Code_Char_chr"}] like @{text "Code_Char"},

   153        but also offers treatment of character codes; includes

   154        @{theory "Code_Char"}.

   155     \item[@{theory "Efficient_Nat"}] \label{eff_nat} implements natural numbers by integers,

   156        which in general will result in higher efficiency; pattern

   157        matching with @{term "0\<Colon>nat"} / @{const "Suc"}

   158        is eliminated;  includes @{theory "Code_Integer"}

   159        and @{theory "Code_Index"}.

   160     \item[@{theory "Code_Index"}] provides an additional datatype

   161        @{typ index} which is mapped to target-language built-in integers.

   162        Useful for code setups which involve e.g. indexing of

   163        target-language arrays.

   164     \item[@{theory "Code_Message"}] provides an additional datatype

   165        @{typ message_string} which is isomorphic to strings;

   166        @{typ message_string}s are mapped to target-language strings.

   167        Useful for code setups which involve e.g. printing (error) messages.

   168

   169   \end{description}

   170

   171   \begin{warn}

   172     When importing any of these theories, they should form the last

   173     items in an import list.  Since these theories adapt the

   174     code generator setup in a non-conservative fashion,

   175     strange effects may occur otherwise.

   176   \end{warn}

   177 *}

   178

   179

   180 subsection {* Parametrising serialisation \label{sec:adaption_mechanisms} *}

   181

   182 text {*

   183   Consider the following function and its corresponding

   184   SML code:

   185 *}

   186

   187 primrec %quote in_interval :: "nat \<times> nat \<Rightarrow> nat \<Rightarrow> bool" where

   188   "in_interval (k, l) n \<longleftrightarrow> k \<le> n \<and> n \<le> l"

   189 (*<*)

   190 code_type %invisible bool

   191   (SML)

   192 code_const %invisible True and False and "op \<and>" and Not

   193   (SML and and and)

   194 (*>*)

   195 text %quote {*@{code_stmts in_interval (SML)}*}

   196

   197 text {*

   198   \noindent Though this is correct code, it is a little bit unsatisfactory:

   199   boolean values and operators are materialised as distinguished

   200   entities with have nothing to do with the SML-built-in notion

   201   of \qt{bool}.  This results in less readable code;

   202   additionally, eager evaluation may cause programs to

   203   loop or break which would perfectly terminate when

   204   the existing SML @{verbatim "bool"} would be used.  To map

   205   the HOL @{typ bool} on SML @{verbatim "bool"}, we may use

   206   \qn{custom serialisations}:

   207 *}

   208

   209 code_type %quotett bool

   210   (SML "bool")

   211 code_const %quotett True and False and "op \<and>"

   212   (SML "true" and "false" and "_ andalso _")

   213

   214 text {*

   215   \noindent The @{command code_type} command takes a type constructor

   216   as arguments together with a list of custom serialisations.

   217   Each custom serialisation starts with a target language

   218   identifier followed by an expression, which during

   219   code serialisation is inserted whenever the type constructor

   220   would occur.  For constants, @{command code_const} implements

   221   the corresponding mechanism.  Each @{verbatim "_"}'' in

   222   a serialisation expression is treated as a placeholder

   223   for the type constructor's (the constant's) arguments.

   224 *}

   225

   226 text %quote {*@{code_stmts in_interval (SML)}*}

   227

   228 text {*

   229   \noindent This still is not perfect: the parentheses

   230   around the \qt{andalso} expression are superfluous.

   231   Though the serialiser

   232   by no means attempts to imitate the rich Isabelle syntax

   233   framework, it provides some common idioms, notably

   234   associative infixes with precedences which may be used here:

   235 *}

   236

   237 code_const %quotett "op \<and>"

   238   (SML infixl 1 "andalso")

   239

   240 text %quote {*@{code_stmts in_interval (SML)}*}

   241

   242 text {*

   243   \noindent The attentive reader may ask how we assert that no generated

   244   code will accidentally overwrite.  For this reason the serialiser has

   245   an internal table of identifiers which have to be avoided to be used

   246   for new declarations.  Initially, this table typically contains the

   247   keywords of the target language.  It can be extended manually, thus avoiding

   248   accidental overwrites, using the @{command "code_reserved"} command:

   249 *}

   250

   251 code_reserved %quote "\<SML>" bool true false andalso

   252

   253 text {*

   254   \noindent Next, we try to map HOL pairs to SML pairs, using the

   255   infix @{verbatim "*"}'' type constructor and parentheses:

   256 *}

   257 (*<*)

   258 code_type %invisible *

   259   (SML)

   260 code_const %invisible Pair

   261   (SML)

   262 (*>*)

   263 code_type %quotett *

   264   (SML infix 2 "*")

   265 code_const %quotett Pair

   266   (SML "!((_),/ (_))")

   267

   268 text {*

   269   \noindent The initial bang @{verbatim "!"}'' tells the serialiser

   270   never to put

   271   parentheses around the whole expression (they are already present),

   272   while the parentheses around argument place holders

   273   tell not to put parentheses around the arguments.

   274   The slash @{verbatim "/"}'' (followed by arbitrary white space)

   275   inserts a space which may be used as a break if necessary

   276   during pretty printing.

   277

   278   These examples give a glimpse what mechanisms

   279   custom serialisations provide; however their usage

   280   requires careful thinking in order not to introduce

   281   inconsistencies -- or, in other words:

   282   custom serialisations are completely axiomatic.

   283

   284   A further noteworthy details is that any special

   285   character in a custom serialisation may be quoted

   286   using @{verbatim "'"}''; thus, in

   287   @{verbatim "fn '_ => _"}'' the first

   288   @{verbatim "_"}'' is a proper underscore while the

   289   second @{verbatim "_"}'' is a placeholder.

   290 *}

   291

   292

   293 subsection {* @{text Haskell} serialisation *}

   294

   295 text {*

   296   For convenience, the default

   297   @{text HOL} setup for @{text Haskell} maps the @{class eq} class to

   298   its counterpart in @{text Haskell}, giving custom serialisations

   299   for the class @{class eq} (by command @{command code_class}) and its operation

   300   @{const HOL.eq}

   301 *}

   302

   303 code_class %quotett eq

   304   (Haskell "Eq")

   305

   306 code_const %quotett "op ="

   307   (Haskell infixl 4 "==")

   308

   309 text {*

   310   \noindent A problem now occurs whenever a type which

   311   is an instance of @{class eq} in @{text HOL} is mapped

   312   on a @{text Haskell}-built-in type which is also an instance

   313   of @{text Haskell} @{text Eq}:

   314 *}

   315

   316 typedecl %quote bar

   317

   318 instantiation %quote bar :: eq

   319 begin

   320

   321 definition %quote "eq_class.eq (x\<Colon>bar) y \<longleftrightarrow> x = y"

   322

   323 instance %quote by default (simp add: eq_bar_def)

   324

   325 end %quote

   326

   327 code_type %quotett bar

   328   (Haskell "Integer")

   329

   330 text {*

   331   \noindent The code generator would produce

   332   an additional instance, which of course is rejected by the @{text Haskell}

   333   compiler.

   334   To suppress this additional instance, use

   335   @{text "code_instance"}:

   336 *}

   337

   338 code_instance %quotett bar :: eq

   339   (Haskell -)

   340

   341

   342 subsection {* Enhancing the target language context \label{sec:include} *}

   343

   344 text {*

   345   In rare cases it is necessary to \emph{enrich} the context of a

   346   target language;  this is accomplished using the @{command "code_include"}

   347   command:

   348 *}

   349

   350 code_include %quotett Haskell "Errno"

   351 {*errno i = error ("Error number: " ++ show i)*}

   352

   353 code_reserved %quotett Haskell Errno

   354

   355 text {*

   356   \noindent Such named @{text include}s are then prepended to every generated code.

   357   Inspect such code in order to find out how @{command "code_include"} behaves

   358   with respect to a particular target language.

   359 *}

   360

   361 end