author wenzelm Sun Sep 13 22:56:52 2015 +0200 (2015-09-13) changeset 61169 4de9ff3ea29a parent 61076 bdc1e2f0a86a child 63669 256fc20716f2 permissions -rw-r--r--
tuned proofs -- less legacy;
     1 theory Adaptation

     2 imports Setup

     3 begin

     4

     5 setup %invisible \<open>Code_Target.add_derived_target ("\<SML>", [("SML", I)])

     6   #> Code_Target.add_derived_target ("\<SMLdummy>", [("Haskell", I)])\<close>

     7

     8 section \<open>Adaptation to target languages \label{sec:adaptation}\<close>

     9

    10 subsection \<open>Adapting code generation\<close>

    11

    12 text \<open>

    13   The aspects of code generation introduced so far have two aspects

    14   in common:

    15

    16   \begin{itemize}

    17

    18     \item They act uniformly, without reference to a specific target

    19        language.

    20

    21     \item They are \emph{safe} in the sense that as long as you trust

    22        the code generator meta theory and implementation, you cannot

    23        produce programs that yield results which are not derivable in

    24        the logic.

    25

    26   \end{itemize}

    27

    28   \noindent In this section we will introduce means to \emph{adapt}

    29   the serialiser to a specific target language, i.e.~to print program

    30   fragments in a way which accommodates \qt{already existing}

    31   ingredients of a target language environment, for three reasons:

    32

    33   \begin{itemize}

    34     \item improving readability and aesthetics of generated code

    35     \item gaining efficiency

    36     \item interface with language parts which have no direct counterpart

    37       in @{text "HOL"} (say, imperative data structures)

    38   \end{itemize}

    39

    40   \noindent Generally, you should avoid using those features yourself

    41   \emph{at any cost}:

    42

    43   \begin{itemize}

    44

    45     \item The safe configuration methods act uniformly on every target

    46       language, whereas for adaptation you have to treat each target

    47       language separately.

    48

    49     \item Application is extremely tedious since there is no

    50       abstraction which would allow for a static check, making it easy

    51       to produce garbage.

    52

    53     \item Subtle errors can be introduced unconsciously.

    54

    55   \end{itemize}

    56

    57   \noindent However, even if you ought refrain from setting up

    58   adaptation yourself, already @{text "HOL"} comes with some

    59   reasonable default adaptations (say, using target language list

    60   syntax).  There also some common adaptation cases which you can

    61   setup by importing particular library theories.  In order to

    62   understand these, we provide some clues here; these however are not

    63   supposed to replace a careful study of the sources.

    64 \<close>

    65

    66

    67 subsection \<open>The adaptation principle\<close>

    68

    69 text \<open>

    70   Figure \ref{fig:adaptation} illustrates what \qt{adaptation} is

    71   conceptually supposed to be:

    72

    73   \begin{figure}[h]

    74     \begin{tikzpicture}[scale = 0.5]

    75       \tikzstyle water=[color = blue, thick]

    76       \tikzstyle ice=[color = black, very thick, cap = round, join = round, fill = white]

    77       \tikzstyle process=[color = green, semithick, ->]

    78       \tikzstyle adaptation=[color = red, semithick, ->]

    79       \tikzstyle target=[color = black]

    80       \foreach \x in {0, ..., 24}

    81         \draw[style=water] (\x, 0.25) sin + (0.25, 0.25) cos + (0.25, -0.25) sin

    82           + (0.25, -0.25) cos + (0.25, 0.25);

    83       \draw[style=ice] (1, 0) --

    84         (3, 6) node[above, fill=white] {logic} -- (5, 0) -- cycle;

    85       \draw[style=ice] (9, 0) --

    86         (11, 6) node[above, fill=white] {intermediate language} -- (13, 0) -- cycle;

    87       \draw[style=ice] (15, -6) --

    88         (19, 6) node[above, fill=white] {target language} -- (23, -6) -- cycle;

    89       \draw[style=process]

    90         (3.5, 3) .. controls (7, 5) .. node[fill=white] {translation} (10.5, 3);

    91       \draw[style=process]

    92         (11.5, 3) .. controls (15, 5) .. node[fill=white] (serialisation) {serialisation} (18.5, 3);

    93       \node (adaptation) at (11, -2) [style=adaptation] {adaptation};

    94       \node at (19, 3) [rotate=90] {generated};

    95       \node at (19.5, -5) {language};

    96       \node at (19.5, -3) {library};

    97       \node (includes) at (19.5, -1) {includes};

    98       \node (reserved) at (16.5, -3) [rotate=72] {reserved}; % proper 71.57

    99       \draw[style=process]

   100         (includes) -- (serialisation);

   101       \draw[style=process]

   102         (reserved) -- (serialisation);

   103       \draw[style=adaptation]

   104         (adaptation) -- (serialisation);

   105       \draw[style=adaptation]

   106         (adaptation) -- (includes);

   107       \draw[style=adaptation]

   108         (adaptation) -- (reserved);

   109     \end{tikzpicture}

   110     \caption{The adaptation principle}

   111     \label{fig:adaptation}

   112   \end{figure}

   113

   114   \noindent In the tame view, code generation acts as broker between

   115   @{text logic}, @{text "intermediate language"} and @{text "target

   116   language"} by means of @{text translation} and @{text

   117   serialisation}; for the latter, the serialiser has to observe the

   118   structure of the @{text language} itself plus some @{text reserved}

   119   keywords which have to be avoided for generated code.  However, if

   120   you consider @{text adaptation} mechanisms, the code generated by

   121   the serializer is just the tip of the iceberg:

   122

   123   \begin{itemize}

   124

   125     \item @{text serialisation} can be \emph{parametrised} such that

   126       logical entities are mapped to target-specific ones

   127       (e.g. target-specific list syntax, see also

   128       \secref{sec:adaptation_mechanisms})

   129

   130     \item Such parametrisations can involve references to a

   131       target-specific standard @{text library} (e.g. using the @{text

   132       Haskell} @{verbatim Maybe} type instead of the @{text HOL}

   133       @{type "option"} type); if such are used, the corresponding

   134       identifiers (in our example, @{verbatim Maybe}, @{verbatim

   135       Nothing} and @{verbatim Just}) also have to be considered @{text

   136       reserved}.

   137

   138     \item Even more, the user can enrich the library of the

   139       target-language by providing code snippets (\qt{@{text

   140       "includes"}}) which are prepended to any generated code (see

   141       \secref{sec:include}); this typically also involves further

   142       @{text reserved} identifiers.

   143

   144   \end{itemize}

   145

   146   \noindent As figure \ref{fig:adaptation} illustrates, all these

   147   adaptation mechanisms have to act consistently; it is at the

   148   discretion of the user to take care for this.

   149 \<close>

   150

   151 subsection \<open>Common adaptation applications\<close>

   152

   153 text \<open>

   154   The @{theory HOL} @{theory Main} theory already provides a code

   155   generator setup which should be suitable for most applications.

   156   Common extensions and modifications are available by certain

   157   theories in @{file "~~/src/HOL/Library"}; beside being useful in

   158   applications, they may serve as a tutorial for customising the code

   159   generator setup (see below \secref{sec:adaptation_mechanisms}).

   160

   161   \begin{description}

   162

   163     \item[@{theory "Code_Numeral"}] provides additional numeric

   164        types @{typ integer} and @{typ natural} isomorphic to types

   165        @{typ int} and @{typ nat} respectively.  Type @{typ integer}

   166        is mapped to target-language built-in integers; @{typ natural}

   167        is implemented as abstract type over @{typ integer}.

   168        Useful for code setups which involve e.g.~indexing

   169        of target-language arrays.  Part of @{text "HOL-Main"}.

   170

   171     \item[@{text "Code_Target_Int"}] implements type @{typ int}

   172        by @{typ integer} and thus by target-language built-in integers.

   173

   174     \item[@{text "Code_Binary_Nat"}] implements type

   175        @{typ nat} using a binary rather than a linear representation,

   176        which yields a considerable speedup for computations.

   177        Pattern matching with @{term "0::nat"} / @{const "Suc"} is eliminated

   178        by a preprocessor.\label{abstract_nat}

   179

   180     \item[@{text "Code_Target_Nat"}] implements type @{typ nat}

   181        by @{typ integer} and thus by target-language built-in integers.

   182        Pattern matching with @{term "0::nat"} / @{const "Suc"} is eliminated

   183        by a preprocessor.

   184

   185     \item[@{text "Code_Target_Numeral"}] is a convenience theory

   186        containing both @{text "Code_Target_Nat"} and

   187        @{text "Code_Target_Int"}.

   188

   189     \item[@{theory "String"}] provides an additional datatype @{typ

   190        String.literal} which is isomorphic to strings; @{typ

   191        String.literal}s are mapped to target-language strings.  Useful

   192        for code setups which involve e.g.~printing (error) messages.

   193        Part of @{text "HOL-Main"}.

   194

   195     \item[@{text "Code_Char"}] represents @{text HOL} characters by

   196        character literals in target languages.  \emph{Warning:} This

   197        modifies adaptation in a non-conservative manner and thus

   198        should always be imported \emph{last} in a theory header.

   199

   200     \item[@{theory "IArray"}] provides a type @{typ "'a iarray"}

   201        isomorphic to lists but implemented by (effectively immutable)

   202        arrays \emph{in SML only}.

   203

   204   \end{description}

   205 \<close>

   206

   207

   208 subsection \<open>Parametrising serialisation \label{sec:adaptation_mechanisms}\<close>

   209

   210 text \<open>

   211   Consider the following function and its corresponding SML code:

   212 \<close>

   213

   214 primrec %quote in_interval :: "nat \<times> nat \<Rightarrow> nat \<Rightarrow> bool" where

   215   "in_interval (k, l) n \<longleftrightarrow> k \<le> n \<and> n \<le> l"

   216 (*<*)

   217 code_printing %invisible

   218   type_constructor bool \<rightharpoonup> (SML)

   219 | constant True \<rightharpoonup> (SML)

   220 | constant False \<rightharpoonup> (SML)

   221 | constant HOL.conj \<rightharpoonup> (SML)

   222 | constant Not \<rightharpoonup> (SML)

   223 (*>*)

   224 text %quotetypewriter \<open>

   225   @{code_stmts in_interval (SML)}

   226 \<close>

   227

   228 text \<open>

   229   \noindent Though this is correct code, it is a little bit

   230   unsatisfactory: boolean values and operators are materialised as

   231   distinguished entities with have nothing to do with the SML-built-in

   232   notion of \qt{bool}.  This results in less readable code;

   233   additionally, eager evaluation may cause programs to loop or break

   234   which would perfectly terminate when the existing SML @{verbatim

   235   "bool"} would be used.  To map the HOL @{typ bool} on SML @{verbatim

   236   "bool"}, we may use \qn{custom serialisations}:

   237 \<close>

   238

   239 code_printing %quotett

   240   type_constructor bool \<rightharpoonup> (SML) "bool"

   241 | constant True \<rightharpoonup> (SML) "true"

   242 | constant False \<rightharpoonup> (SML) "false"

   243 | constant HOL.conj \<rightharpoonup> (SML) "_ andalso _"

   244

   245 text \<open>

   246   \noindent The @{command_def code_printing} command takes a series

   247   of symbols (contants, type constructor, \ldots)

   248   together with target-specific custom serialisations.  Each

   249   custom serialisation starts with a target language identifier

   250   followed by an expression, which during code serialisation is

   251   inserted whenever the type constructor would occur.  Each

   252   @{verbatim "_"}'' in a serialisation expression is treated as a

   253   placeholder for the constant's or the type constructor's arguments.

   254 \<close>

   255

   256 text %quotetypewriter \<open>

   257   @{code_stmts in_interval (SML)}

   258 \<close>

   259

   260 text \<open>

   261   \noindent This still is not perfect: the parentheses around the

   262   \qt{andalso} expression are superfluous.  Though the serialiser by

   263   no means attempts to imitate the rich Isabelle syntax framework, it

   264   provides some common idioms, notably associative infixes with

   265   precedences which may be used here:

   266 \<close>

   267

   268 code_printing %quotett

   269   constant HOL.conj \<rightharpoonup> (SML) infixl 1 "andalso"

   270

   271 text %quotetypewriter \<open>

   272   @{code_stmts in_interval (SML)}

   273 \<close>

   274

   275 text \<open>

   276   \noindent The attentive reader may ask how we assert that no

   277   generated code will accidentally overwrite.  For this reason the

   278   serialiser has an internal table of identifiers which have to be

   279   avoided to be used for new declarations.  Initially, this table

   280   typically contains the keywords of the target language.  It can be

   281   extended manually, thus avoiding accidental overwrites, using the

   282   @{command_def "code_reserved"} command:

   283 \<close>

   284

   285 code_reserved %quote "\<SMLdummy>" bool true false andalso

   286

   287 text \<open>

   288   \noindent Next, we try to map HOL pairs to SML pairs, using the

   289   infix @{verbatim "*"}'' type constructor and parentheses:

   290 \<close>

   291 (*<*)

   292 code_printing %invisible

   293   type_constructor prod \<rightharpoonup> (SML)

   294 | constant Pair \<rightharpoonup> (SML)

   295 (*>*)

   296 code_printing %quotett

   297   type_constructor prod \<rightharpoonup> (SML) infix 2 "*"

   298 | constant Pair \<rightharpoonup> (SML) "!((_),/ (_))"

   299

   300 text \<open>

   301   \noindent The initial bang @{verbatim "!"}'' tells the serialiser

   302   never to put parentheses around the whole expression (they are

   303   already present), while the parentheses around argument place

   304   holders tell not to put parentheses around the arguments.  The slash

   305   @{verbatim "/"}'' (followed by arbitrary white space) inserts a

   306   space which may be used as a break if necessary during pretty

   307   printing.

   308

   309   These examples give a glimpse what mechanisms custom serialisations

   310   provide; however their usage requires careful thinking in order not

   311   to introduce inconsistencies -- or, in other words: custom

   312   serialisations are completely axiomatic.

   313

   314   A further noteworthy detail is that any special character in a

   315   custom serialisation may be quoted using @{verbatim "'"}''; thus,

   316   in @{verbatim "fn '_ => _"}'' the first @{verbatim "_"}'' is a

   317   proper underscore while the second @{verbatim "_"}'' is a

   318   placeholder.

   319 \<close>

   320

   321

   322 subsection \<open>@{text Haskell} serialisation\<close>

   323

   324 text \<open>

   325   For convenience, the default @{text HOL} setup for @{text Haskell}

   326   maps the @{class equal} class to its counterpart in @{text Haskell},

   327   giving custom serialisations for the class @{class equal}

   328   and its operation @{const [source] HOL.equal}.

   329 \<close>

   330

   331 code_printing %quotett

   332   type_class equal \<rightharpoonup> (Haskell) "Eq"

   333 | constant HOL.equal \<rightharpoonup> (Haskell) infixl 4 "=="

   334

   335 text \<open>

   336   \noindent A problem now occurs whenever a type which is an instance

   337   of @{class equal} in @{text HOL} is mapped on a @{text

   338   Haskell}-built-in type which is also an instance of @{text Haskell}

   339   @{text Eq}:

   340 \<close>

   341

   342 typedecl %quote bar

   343

   344 instantiation %quote bar :: equal

   345 begin

   346

   347 definition %quote "HOL.equal (x::bar) y \<longleftrightarrow> x = y"

   348

   349 instance %quote by standard (simp add: equal_bar_def)

   350

   351 end %quote (*<*)

   352

   353 (*>*) code_printing %quotett

   354   type_constructor bar \<rightharpoonup> (Haskell) "Integer"

   355

   356 text \<open>

   357   \noindent The code generator would produce an additional instance,

   358   which of course is rejected by the @{text Haskell} compiler.  To

   359   suppress this additional instance:

   360 \<close>

   361

   362 code_printing %quotett

   363   class_instance bar :: "HOL.equal" \<rightharpoonup> (Haskell) -

   364

   365

   366 subsection \<open>Enhancing the target language context \label{sec:include}\<close>

   367

   368 text \<open>

   369   In rare cases it is necessary to \emph{enrich} the context of a

   370   target language; this can also be accomplished using the @{command

   371   "code_printing"} command:

   372 \<close>

   373

   374 code_printing %quotett

   375   code_module "Errno" \<rightharpoonup> (Haskell)

   376     \<open>errno i = error ("Error number: " ++ show i)\<close>

   377

   378 code_reserved %quotett Haskell Errno

   379

   380 text \<open>

   381   \noindent Such named modules are then prepended to every

   382   generated code.  Inspect such code in order to find out how

   383   this behaves with respect to a particular

   384   target language.

   385 \<close>

   386

   387 end

   388