author haftmann Wed Oct 01 13:33:54 2008 +0200 (2008-10-01) changeset 28447 df77ed974a78 parent 28428 fd007794561f child 28456 7a558c872104 permissions -rw-r--r--
fixed
     1 theory Program

     2 imports Introduction

     3 begin

     4

     5 section {* Turning Theories into Programs \label{sec:program} *}

     6

     7 subsection {* The @{text "Isabelle/HOL"} default setup *}

     8

     9 text {*

    10   We have already seen how by default equations stemming from

    11   @{command definition}/@{command primrec}/@{command fun}

    12   statements are used for code generation.  This default behaviour

    13   can be changed, e.g. by providing different defining equations.

    14   All kinds of customization shown in this section is \emph{safe}

    15   in the sense that the user does not have to worry about

    16   correctness -- all programs generatable that way are partially

    17   correct.

    18 *}

    19

    20 subsection {* Selecting code equations *}

    21

    22 text {*

    23   Coming back to our introductory example, we

    24   could provide an alternative defining equations for @{const dequeue}

    25   explicitly:

    26 *}

    27

    28 lemma %quoteme [code func]:

    29   "dequeue (Queue xs []) =

    30      (if xs = [] then (None, Queue [] []) else dequeue (Queue [] (rev xs)))"

    31   "dequeue (Queue xs (y # ys)) =

    32      (Some y, Queue xs ys)"

    33   by (cases xs, simp_all) (cases "rev xs", simp_all)

    34

    35 text {*

    36   \noindent The annotation @{text "[code func]"} is an @{text Isar}

    37   @{text attribute} which states that the given theorems should be

    38   considered as defining equations for a @{text fun} statement --

    39   the corresponding constant is determined syntactically.  The resulting code:

    40 *}

    41

    42 text %quoteme {*@{code_stmts empty enqueue dequeue (Haskell)}*}

    43

    44 text {*

    45   \noindent You may note that the equality test @{term "xs = []"} has been

    46   replaced by the predicate @{term "null xs"}.  This is due to the default

    47   setup in the \qn{preprocessor} to be discussed further below (\secref{sec:preproc}).

    48

    49   Changing the default constructor set of datatypes is also

    50   possible but rarely desired in practice.  See \secref{sec:datatypes} for an example.

    51

    52   As told in \secref{sec:concept}, code generation is based

    53   on a structured collection of code theorems.

    54   For explorative purpose, this collection

    55   may be inspected using the @{command code_thms} command:

    56 *}

    57

    58 code_thms %quoteme dequeue

    59

    60 text {*

    61   \noindent prints a table with \emph{all} defining equations

    62   for @{const dequeue}, including

    63   \emph{all} defining equations those equations depend

    64   on recursively.

    65

    66   Similarly, the @{command code_deps} command shows a graph

    67   visualising dependencies between defining equations.

    68 *}

    69

    70 subsection {* @{text class} and @{text instantiation} *}

    71

    72 text {*

    73   Concerning type classes and code generation, let us examine an example

    74   from abstract algebra:

    75 *}

    76

    77 class %quoteme semigroup = type +

    78   fixes mult :: "'a \<Rightarrow> 'a \<Rightarrow> 'a" (infixl "\<otimes>" 70)

    79   assumes assoc: "(x \<otimes> y) \<otimes> z = x \<otimes> (y \<otimes> z)"

    80

    81 class %quoteme monoid = semigroup +

    82   fixes neutral :: 'a ("\<one>")

    83   assumes neutl: "\<one> \<otimes> x = x"

    84     and neutr: "x \<otimes> \<one> = x"

    85

    86 instantiation %quoteme nat :: monoid

    87 begin

    88

    89 primrec %quoteme mult_nat where

    90     "0 \<otimes> n = (0\<Colon>nat)"

    91   | "Suc m \<otimes> n = n + m \<otimes> n"

    92

    93 definition %quoteme neutral_nat where

    94   "\<one> = Suc 0"

    95

    96 lemma %quoteme add_mult_distrib:

    97   fixes n m q :: nat

    98   shows "(n + m) \<otimes> q = n \<otimes> q + m \<otimes> q"

    99   by (induct n) simp_all

   100

   101 instance %quoteme proof

   102   fix m n q :: nat

   103   show "m \<otimes> n \<otimes> q = m \<otimes> (n \<otimes> q)"

   104     by (induct m) (simp_all add: add_mult_distrib)

   105   show "\<one> \<otimes> n = n"

   106     by (simp add: neutral_nat_def)

   107   show "m \<otimes> \<one> = m"

   108     by (induct m) (simp_all add: neutral_nat_def)

   109 qed

   110

   111 end %quoteme

   112

   113 text {*

   114   \noindent We define the natural operation of the natural numbers

   115   on monoids:

   116 *}

   117

   118 primrec %quoteme (in monoid) pow :: "nat \<Rightarrow> 'a \<Rightarrow> 'a" where

   119     "pow 0 a = \<one>"

   120   | "pow (Suc n) a = a \<otimes> pow n a"

   121

   122 text {*

   123   \noindent This we use to define the discrete exponentiation function:

   124 *}

   125

   126 definition %quoteme bexp :: "nat \<Rightarrow> nat" where

   127   "bexp n = pow n (Suc (Suc 0))"

   128

   129 text {*

   130   \noindent The corresponding code:

   131 *}

   132

   133 text %quoteme {*@{code_stmts bexp (Haskell)}*}

   134

   135 text {*

   136   \noindent This is a convenient place to show how explicit dictionary construction

   137   manifests in generated code (here, the same example in @{text SML}):

   138 *}

   139

   140 text %quoteme {*@{code_stmts bexp (SML)}*}

   141

   142 text {*

   143   \noindent Note the parameters with trailing underscore (@{verbatim "A_"})

   144     which are the dictionary parameters.

   145 *}

   146

   147 subsection {* The preprocessor \label{sec:preproc} *}

   148

   149 text {*

   150   Before selected function theorems are turned into abstract

   151   code, a chain of definitional transformation steps is carried

   152   out: \emph{preprocessing}.  In essence, the preprocessor

   153   consists of two components: a \emph{simpset} and \emph{function transformers}.

   154

   155   The \emph{simpset} allows to employ the full generality of the Isabelle

   156   simplifier.  Due to the interpretation of theorems

   157   as defining equations, rewrites are applied to the right

   158   hand side and the arguments of the left hand side of an

   159   equation, but never to the constant heading the left hand side.

   160   An important special case are \emph{inline theorems} which may be

   161   declared and undeclared using the

   162   \emph{code inline} or \emph{code inline del} attribute respectively.

   163

   164   Some common applications:

   165 *}

   166

   167 text_raw {*

   168   \begin{itemize}

   169 *}

   170

   171 text {*

   172      \item replacing non-executable constructs by executable ones:

   173 *}

   174

   175 lemma %quoteme [code inline]:

   176   "x \<in> set xs \<longleftrightarrow> x mem xs" by (induct xs) simp_all

   177

   178 text {*

   179      \item eliminating superfluous constants:

   180 *}

   181

   182 lemma %quoteme [code inline]:

   183   "1 = Suc 0" by simp

   184

   185 text {*

   186      \item replacing executable but inconvenient constructs:

   187 *}

   188

   189 lemma %quoteme [code inline]:

   190   "xs = [] \<longleftrightarrow> List.null xs" by (induct xs) simp_all

   191

   192 text_raw {*

   193   \end{itemize}

   194 *}

   195

   196 text {*

   197   \noindent \emph{Function transformers} provide a very general interface,

   198   transforming a list of function theorems to another

   199   list of function theorems, provided that neither the heading

   200   constant nor its type change.  The @{term "0\<Colon>nat"} / @{const Suc}

   201   pattern elimination implemented in

   202   theory @{text Efficient_Nat} (see \secref{eff_nat}) uses this

   203   interface.

   204

   205   \noindent The current setup of the preprocessor may be inspected using

   206   the @{command print_codesetup} command.

   207   @{command code_thms} provides a convenient

   208   mechanism to inspect the impact of a preprocessor setup

   209   on defining equations.

   210

   211   \begin{warn}

   212     The attribute \emph{code unfold}

   213     associated with the @{text "SML code generator"} also applies to

   214     the @{text "generic code generator"}:

   215     \emph{code unfold} implies \emph{code inline}.

   216   \end{warn}

   217 *}

   218

   219 subsection {* Datatypes \label{sec:datatypes} *}

   220

   221 text {*

   222   Conceptually, any datatype is spanned by a set of

   223   \emph{constructors} of type @{text "\<tau> = \<dots> \<Rightarrow> \<kappa> \<alpha>\<^isub>1 \<dots> \<alpha>\<^isub>n"}

   224   where @{text "{\<alpha>\<^isub>1, \<dots>, \<alpha>\<^isub>n}"} is excactly the set of \emph{all}

   225   type variables in @{text "\<tau>"}.  The HOL datatype package

   226   by default registers any new datatype in the table

   227   of datatypes, which may be inspected using

   228   the @{command print_codesetup} command.

   229

   230   In some cases, it may be convenient to alter or

   231   extend this table;  as an example, we will develop an alternative

   232   representation of natural numbers as binary digits, whose

   233   size does increase logarithmically with its value, not linear

   234   \footnote{Indeed, the @{theory Efficient_Nat} theory (see \ref{eff_nat})

   235     does something similar}.  First, the digit representation:

   236 *}

   237

   238 definition %quoteme Dig0 :: "nat \<Rightarrow> nat" where

   239   "Dig0 n = 2 * n"

   240

   241 definition %quoteme  Dig1 :: "nat \<Rightarrow> nat" where

   242   "Dig1 n = Suc (2 * n)"

   243

   244 text {*

   245   \noindent We will use these two \qt{digits} to represent natural numbers

   246   in binary digits, e.g.:

   247 *}

   248

   249 lemma %quoteme 42: "42 = Dig0 (Dig1 (Dig0 (Dig1 (Dig0 1))))"

   250   by (simp add: Dig0_def Dig1_def)

   251

   252 text {*

   253   \noindent Of course we also have to provide proper code equations for

   254   the operations, e.g. @{term "op + \<Colon> nat \<Rightarrow> nat \<Rightarrow> nat"}:

   255 *}

   256

   257 lemma %quoteme plus_Dig [code func]:

   258   "0 + n = n"

   259   "m + 0 = m"

   260   "1 + Dig0 n = Dig1 n"

   261   "Dig0 m + 1 = Dig1 m"

   262   "1 + Dig1 n = Dig0 (n + 1)"

   263   "Dig1 m + 1 = Dig0 (m + 1)"

   264   "Dig0 m + Dig0 n = Dig0 (m + n)"

   265   "Dig0 m + Dig1 n = Dig1 (m + n)"

   266   "Dig1 m + Dig0 n = Dig1 (m + n)"

   267   "Dig1 m + Dig1 n = Dig0 (m + n + 1)"

   268   by (simp_all add: Dig0_def Dig1_def)

   269

   270 text {*

   271   \noindent We then instruct the code generator to view @{term "0\<Colon>nat"},

   272   @{term "1\<Colon>nat"}, @{term Dig0} and @{term Dig1} as

   273   datatype constructors:

   274 *}

   275

   276 code_datatype %quoteme "0\<Colon>nat" "1\<Colon>nat" Dig0 Dig1

   277

   278 text {*

   279   \noindent For the former constructor @{term Suc}, we provide a code

   280   equation and remove some parts of the default code generator setup

   281   which are an obstacle here:

   282 *}

   283

   284 lemma %quoteme Suc_Dig [code func]:

   285   "Suc n = n + 1"

   286   by simp

   287

   288 declare %quoteme One_nat_def [code inline del]

   289 declare %quoteme add_Suc_shift [code func del]

   290

   291 text {*

   292   \noindent This yields the following code:

   293 *}

   294

   295 text %quoteme {*@{code_stmts "op + \<Colon> nat \<Rightarrow> nat \<Rightarrow> nat" (SML)}*}

   296

   297 text {*

   298   \noindent From this example, it can be easily glimpsed that using own constructor sets

   299   is a little delicate since it changes the set of valid patterns for values

   300   of that type.  Without going into much detail, here some practical hints:

   301

   302   \begin{itemize}

   303     \item When changing the constructor set for datatypes, take care to

   304       provide an alternative for the @{text case} combinator (e.g.~by replacing

   305       it using the preprocessor).

   306     \item Values in the target language need not to be normalised -- different

   307       values in the target language may represent the same value in the

   308       logic (e.g. @{term "Dig1 0 = 1"}).

   309     \item Usually, a good methodology to deal with the subtleties of pattern

   310       matching is to see the type as an abstract type: provide a set

   311       of operations which operate on the concrete representation of the type,

   312       and derive further operations by combinations of these primitive ones,

   313       without relying on a particular representation.

   314   \end{itemize}

   315 *}

   316

   317 code_datatype %invisible "0::nat" Suc

   318 declare %invisible plus_Dig [code func del]

   319 declare %invisible One_nat_def [code inline]

   320 declare %invisible add_Suc_shift [code func]

   321 lemma %invisible [code func]: "0 + n = (n \<Colon> nat)" by simp

   322

   323

   324 subsection {* Equality and wellsortedness *}

   325

   326 text {*

   327   Surely you have already noticed how equality is treated

   328   by the code generator:

   329 *}

   330

   331 primrec %quoteme collect_duplicates :: "'a list \<Rightarrow> 'a list \<Rightarrow> 'a list \<Rightarrow> 'a list" where

   332   "collect_duplicates xs ys [] = xs"

   333   | "collect_duplicates xs ys (z#zs) = (if z \<in> set xs

   334       then if z \<in> set ys

   335         then collect_duplicates xs ys zs

   336         else collect_duplicates xs (z#ys) zs

   337       else collect_duplicates (z#xs) (z#ys) zs)"

   338

   339 text {*

   340   \noindent The membership test during preprocessing is rewritten,

   341   resulting in @{const List.member}, which itself

   342   performs an explicit equality check.

   343 *}

   344

   345 text %quoteme {*@{code_stmts collect_duplicates (SML)}*}

   346

   347 text {*

   348   \noindent Obviously, polymorphic equality is implemented the Haskell

   349   way using a type class.  How is this achieved?  HOL introduces

   350   an explicit class @{class eq} with a corresponding operation

   351   @{const eq_class.eq} such that @{thm eq [no_vars]}.

   352   The preprocessing framework does the rest by propagating the

   353   @{class eq} constraints through all dependent defining equations.

   354   For datatypes, instances of @{class eq} are implicitly derived

   355   when possible.  For other types, you may instantiate @{text eq}

   356   manually like any other type class.

   357

   358   Though this @{text eq} class is designed to get rarely in

   359   the way, a subtlety

   360   enters the stage when definitions of overloaded constants

   361   are dependent on operational equality.  For example, let

   362   us define a lexicographic ordering on tuples

   363   (also see theory @{theory Product_ord}):

   364 *}

   365

   366 instantiation %quoteme "*" :: (order, order) order

   367 begin

   368

   369 definition %quoteme [code func del]:

   370   "x \<le> y \<longleftrightarrow> fst x < fst y \<or> fst x = fst y \<and> snd x \<le> snd y"

   371

   372 definition %quoteme [code func del]:

   373   "x < y \<longleftrightarrow> fst x < fst y \<or> fst x = fst y \<and> snd x < snd y"

   374

   375 instance %quoteme proof

   376 qed (auto simp: less_eq_prod_def less_prod_def intro: order_less_trans)

   377

   378 end %quoteme

   379

   380 lemma %quoteme order_prod [code func]:

   381   "(x1 \<Colon> 'a\<Colon>order, y1 \<Colon> 'b\<Colon>order) < (x2, y2) \<longleftrightarrow>

   382      x1 < x2 \<or> x1 = x2 \<and> y1 < y2"

   383   "(x1 \<Colon> 'a\<Colon>order, y1 \<Colon> 'b\<Colon>order) \<le> (x2, y2) \<longleftrightarrow>

   384      x1 < x2 \<or> x1 = x2 \<and> y1 \<le> y2"

   385   by (simp_all add: less_prod_def less_eq_prod_def)

   386

   387 text {*

   388   \noindent Then code generation will fail.  Why?  The definition

   389   of @{term "op \<le>"} depends on equality on both arguments,

   390   which are polymorphic and impose an additional @{class eq}

   391   class constraint, which the preprocessor does not propagate

   392   (for technical reasons).

   393

   394   The solution is to add @{class eq} explicitly to the first sort arguments in the

   395   code theorems:

   396 *}

   397

   398 lemma %quoteme order_prod_code [code func]:

   399   "(x1 \<Colon> 'a\<Colon>{order, eq}, y1 \<Colon> 'b\<Colon>order) < (x2, y2) \<longleftrightarrow>

   400      x1 < x2 \<or> x1 = x2 \<and> y1 < y2"

   401   "(x1 \<Colon> 'a\<Colon>{order, eq}, y1 \<Colon> 'b\<Colon>order) \<le> (x2, y2) \<longleftrightarrow>

   402      x1 < x2 \<or> x1 = x2 \<and> y1 \<le> y2"

   403   by (simp_all add: less_prod_def less_eq_prod_def)

   404

   405 text {*

   406   \noindent Then code generation succeeds:

   407 *}

   408

   409 text %quoteme {*@{code_stmts "op \<le> \<Colon> _ \<times> _ \<Rightarrow> _ \<times> _ \<Rightarrow> bool" (SML)}*}

   410

   411 text {*

   412   In some cases, the automatically derived defining equations

   413   for equality on a particular type may not be appropriate.

   414   As example, watch the following datatype representing

   415   monomorphic parametric types (where type constructors

   416   are referred to by natural numbers):

   417 *}

   418

   419 datatype %quoteme monotype = Mono nat "monotype list"

   420 (*<*)

   421 lemma monotype_eq:

   422   "Mono tyco1 typargs1 = Mono tyco2 typargs2 \<equiv>

   423      tyco1 = tyco2 \<and> typargs1 = typargs2" by simp

   424 (*>*)

   425

   426 text {*

   427   Then code generation for SML would fail with a message

   428   that the generated code contains illegal mutual dependencies:

   429   the theorem @{thm monotype_eq [no_vars]} already requires the

   430   instance @{text "monotype \<Colon> eq"}, which itself requires

   431   @{thm monotype_eq [no_vars]};  Haskell has no problem with mutually

   432   recursive @{text inst} and @{text fun} definitions,

   433   but the SML serializer does not support this.

   434

   435   In such cases, you have to provide your own equality equations

   436   involving auxiliary constants.  In our case,

   437   @{const [show_types] list_all2} can do the job:

   438 *}

   439

   440 lemma %quoteme monotype_eq_list_all2 [code func]:

   441   "eq_class.eq (Mono tyco1 typargs1) (Mono tyco2 typargs2) \<longleftrightarrow>

   442      tyco1 = tyco2 \<and> list_all2 eq_class.eq typargs1 typargs2"

   443   by (simp add: eq list_all2_eq [symmetric])

   444

   445 text {*

   446   \noindent does not depend on instance @{text "monotype \<Colon> eq"}:

   447 *}

   448

   449 text %quoteme {*@{code_stmts "eq_class.eq :: monotype \<Rightarrow> monotype \<Rightarrow> bool" (SML)}*}

   450

   451

   452 subsection {* Partiality *}

   453

   454 text {* @{command "code_abort"}, examples: maps *}

   455

   456 end