doc-src/IsarAdvanced/Codegen/Thy/Program.thy
author haftmann
Wed Oct 01 13:33:54 2008 +0200 (2008-10-01)
changeset 28447 df77ed974a78
parent 28428 fd007794561f
child 28456 7a558c872104
permissions -rw-r--r--
fixed
     1 theory Program
     2 imports Introduction
     3 begin
     4 
     5 section {* Turning Theories into Programs \label{sec:program} *}
     6 
     7 subsection {* The @{text "Isabelle/HOL"} default setup *}
     8 
     9 text {*
    10   We have already seen how by default equations stemming from
    11   @{command definition}/@{command primrec}/@{command fun}
    12   statements are used for code generation.  This default behaviour
    13   can be changed, e.g. by providing different defining equations.
    14   All kinds of customization shown in this section is \emph{safe}
    15   in the sense that the user does not have to worry about
    16   correctness -- all programs generatable that way are partially
    17   correct.
    18 *}
    19 
    20 subsection {* Selecting code equations *}
    21 
    22 text {*
    23   Coming back to our introductory example, we
    24   could provide an alternative defining equations for @{const dequeue}
    25   explicitly:
    26 *}
    27 
    28 lemma %quoteme [code func]:
    29   "dequeue (Queue xs []) =
    30      (if xs = [] then (None, Queue [] []) else dequeue (Queue [] (rev xs)))"
    31   "dequeue (Queue xs (y # ys)) =
    32      (Some y, Queue xs ys)"
    33   by (cases xs, simp_all) (cases "rev xs", simp_all)
    34 
    35 text {*
    36   \noindent The annotation @{text "[code func]"} is an @{text Isar}
    37   @{text attribute} which states that the given theorems should be
    38   considered as defining equations for a @{text fun} statement --
    39   the corresponding constant is determined syntactically.  The resulting code:
    40 *}
    41 
    42 text %quoteme {*@{code_stmts empty enqueue dequeue (Haskell)}*}
    43 
    44 text {*
    45   \noindent You may note that the equality test @{term "xs = []"} has been
    46   replaced by the predicate @{term "null xs"}.  This is due to the default
    47   setup in the \qn{preprocessor} to be discussed further below (\secref{sec:preproc}).
    48 
    49   Changing the default constructor set of datatypes is also
    50   possible but rarely desired in practice.  See \secref{sec:datatypes} for an example.
    51 
    52   As told in \secref{sec:concept}, code generation is based
    53   on a structured collection of code theorems.
    54   For explorative purpose, this collection
    55   may be inspected using the @{command code_thms} command:
    56 *}
    57 
    58 code_thms %quoteme dequeue
    59 
    60 text {*
    61   \noindent prints a table with \emph{all} defining equations
    62   for @{const dequeue}, including
    63   \emph{all} defining equations those equations depend
    64   on recursively.
    65   
    66   Similarly, the @{command code_deps} command shows a graph
    67   visualising dependencies between defining equations.
    68 *}
    69 
    70 subsection {* @{text class} and @{text instantiation} *}
    71 
    72 text {*
    73   Concerning type classes and code generation, let us examine an example
    74   from abstract algebra:
    75 *}
    76 
    77 class %quoteme semigroup = type +
    78   fixes mult :: "'a \<Rightarrow> 'a \<Rightarrow> 'a" (infixl "\<otimes>" 70)
    79   assumes assoc: "(x \<otimes> y) \<otimes> z = x \<otimes> (y \<otimes> z)"
    80 
    81 class %quoteme monoid = semigroup +
    82   fixes neutral :: 'a ("\<one>")
    83   assumes neutl: "\<one> \<otimes> x = x"
    84     and neutr: "x \<otimes> \<one> = x"
    85 
    86 instantiation %quoteme nat :: monoid
    87 begin
    88 
    89 primrec %quoteme mult_nat where
    90     "0 \<otimes> n = (0\<Colon>nat)"
    91   | "Suc m \<otimes> n = n + m \<otimes> n"
    92 
    93 definition %quoteme neutral_nat where
    94   "\<one> = Suc 0"
    95 
    96 lemma %quoteme add_mult_distrib:
    97   fixes n m q :: nat
    98   shows "(n + m) \<otimes> q = n \<otimes> q + m \<otimes> q"
    99   by (induct n) simp_all
   100 
   101 instance %quoteme proof
   102   fix m n q :: nat
   103   show "m \<otimes> n \<otimes> q = m \<otimes> (n \<otimes> q)"
   104     by (induct m) (simp_all add: add_mult_distrib)
   105   show "\<one> \<otimes> n = n"
   106     by (simp add: neutral_nat_def)
   107   show "m \<otimes> \<one> = m"
   108     by (induct m) (simp_all add: neutral_nat_def)
   109 qed
   110 
   111 end %quoteme
   112 
   113 text {*
   114   \noindent We define the natural operation of the natural numbers
   115   on monoids:
   116 *}
   117 
   118 primrec %quoteme (in monoid) pow :: "nat \<Rightarrow> 'a \<Rightarrow> 'a" where
   119     "pow 0 a = \<one>"
   120   | "pow (Suc n) a = a \<otimes> pow n a"
   121 
   122 text {*
   123   \noindent This we use to define the discrete exponentiation function:
   124 *}
   125 
   126 definition %quoteme bexp :: "nat \<Rightarrow> nat" where
   127   "bexp n = pow n (Suc (Suc 0))"
   128 
   129 text {*
   130   \noindent The corresponding code:
   131 *}
   132 
   133 text %quoteme {*@{code_stmts bexp (Haskell)}*}
   134 
   135 text {*
   136   \noindent This is a convenient place to show how explicit dictionary construction
   137   manifests in generated code (here, the same example in @{text SML}):
   138 *}
   139 
   140 text %quoteme {*@{code_stmts bexp (SML)}*}
   141 
   142 text {*
   143   \noindent Note the parameters with trailing underscore (@{verbatim "A_"})
   144     which are the dictionary parameters.
   145 *}
   146 
   147 subsection {* The preprocessor \label{sec:preproc} *}
   148 
   149 text {*
   150   Before selected function theorems are turned into abstract
   151   code, a chain of definitional transformation steps is carried
   152   out: \emph{preprocessing}.  In essence, the preprocessor
   153   consists of two components: a \emph{simpset} and \emph{function transformers}.
   154 
   155   The \emph{simpset} allows to employ the full generality of the Isabelle
   156   simplifier.  Due to the interpretation of theorems
   157   as defining equations, rewrites are applied to the right
   158   hand side and the arguments of the left hand side of an
   159   equation, but never to the constant heading the left hand side.
   160   An important special case are \emph{inline theorems} which may be
   161   declared and undeclared using the
   162   \emph{code inline} or \emph{code inline del} attribute respectively.
   163 
   164   Some common applications:
   165 *}
   166 
   167 text_raw {*
   168   \begin{itemize}
   169 *}
   170 
   171 text {*
   172      \item replacing non-executable constructs by executable ones:
   173 *}     
   174 
   175 lemma %quoteme [code inline]:
   176   "x \<in> set xs \<longleftrightarrow> x mem xs" by (induct xs) simp_all
   177 
   178 text {*
   179      \item eliminating superfluous constants:
   180 *}
   181 
   182 lemma %quoteme [code inline]:
   183   "1 = Suc 0" by simp
   184 
   185 text {*
   186      \item replacing executable but inconvenient constructs:
   187 *}
   188 
   189 lemma %quoteme [code inline]:
   190   "xs = [] \<longleftrightarrow> List.null xs" by (induct xs) simp_all
   191 
   192 text_raw {*
   193   \end{itemize}
   194 *}
   195 
   196 text {*
   197   \noindent \emph{Function transformers} provide a very general interface,
   198   transforming a list of function theorems to another
   199   list of function theorems, provided that neither the heading
   200   constant nor its type change.  The @{term "0\<Colon>nat"} / @{const Suc}
   201   pattern elimination implemented in
   202   theory @{text Efficient_Nat} (see \secref{eff_nat}) uses this
   203   interface.
   204 
   205   \noindent The current setup of the preprocessor may be inspected using
   206   the @{command print_codesetup} command.
   207   @{command code_thms} provides a convenient
   208   mechanism to inspect the impact of a preprocessor setup
   209   on defining equations.
   210 
   211   \begin{warn}
   212     The attribute \emph{code unfold}
   213     associated with the @{text "SML code generator"} also applies to
   214     the @{text "generic code generator"}:
   215     \emph{code unfold} implies \emph{code inline}.
   216   \end{warn}
   217 *}
   218 
   219 subsection {* Datatypes \label{sec:datatypes} *}
   220 
   221 text {*
   222   Conceptually, any datatype is spanned by a set of
   223   \emph{constructors} of type @{text "\<tau> = \<dots> \<Rightarrow> \<kappa> \<alpha>\<^isub>1 \<dots> \<alpha>\<^isub>n"}
   224   where @{text "{\<alpha>\<^isub>1, \<dots>, \<alpha>\<^isub>n}"} is excactly the set of \emph{all}
   225   type variables in @{text "\<tau>"}.  The HOL datatype package
   226   by default registers any new datatype in the table
   227   of datatypes, which may be inspected using
   228   the @{command print_codesetup} command.
   229 
   230   In some cases, it may be convenient to alter or
   231   extend this table;  as an example, we will develop an alternative
   232   representation of natural numbers as binary digits, whose
   233   size does increase logarithmically with its value, not linear
   234   \footnote{Indeed, the @{theory Efficient_Nat} theory (see \ref{eff_nat})
   235     does something similar}.  First, the digit representation:
   236 *}
   237 
   238 definition %quoteme Dig0 :: "nat \<Rightarrow> nat" where
   239   "Dig0 n = 2 * n"
   240 
   241 definition %quoteme  Dig1 :: "nat \<Rightarrow> nat" where
   242   "Dig1 n = Suc (2 * n)"
   243 
   244 text {*
   245   \noindent We will use these two \qt{digits} to represent natural numbers
   246   in binary digits, e.g.:
   247 *}
   248 
   249 lemma %quoteme 42: "42 = Dig0 (Dig1 (Dig0 (Dig1 (Dig0 1))))"
   250   by (simp add: Dig0_def Dig1_def)
   251 
   252 text {*
   253   \noindent Of course we also have to provide proper code equations for
   254   the operations, e.g. @{term "op + \<Colon> nat \<Rightarrow> nat \<Rightarrow> nat"}:
   255 *}
   256 
   257 lemma %quoteme plus_Dig [code func]:
   258   "0 + n = n"
   259   "m + 0 = m"
   260   "1 + Dig0 n = Dig1 n"
   261   "Dig0 m + 1 = Dig1 m"
   262   "1 + Dig1 n = Dig0 (n + 1)"
   263   "Dig1 m + 1 = Dig0 (m + 1)"
   264   "Dig0 m + Dig0 n = Dig0 (m + n)"
   265   "Dig0 m + Dig1 n = Dig1 (m + n)"
   266   "Dig1 m + Dig0 n = Dig1 (m + n)"
   267   "Dig1 m + Dig1 n = Dig0 (m + n + 1)"
   268   by (simp_all add: Dig0_def Dig1_def)
   269 
   270 text {*
   271   \noindent We then instruct the code generator to view @{term "0\<Colon>nat"},
   272   @{term "1\<Colon>nat"}, @{term Dig0} and @{term Dig1} as
   273   datatype constructors:
   274 *}
   275 
   276 code_datatype %quoteme "0\<Colon>nat" "1\<Colon>nat" Dig0 Dig1
   277 
   278 text {*
   279   \noindent For the former constructor @{term Suc}, we provide a code
   280   equation and remove some parts of the default code generator setup
   281   which are an obstacle here:
   282 *}
   283 
   284 lemma %quoteme Suc_Dig [code func]:
   285   "Suc n = n + 1"
   286   by simp
   287 
   288 declare %quoteme One_nat_def [code inline del]
   289 declare %quoteme add_Suc_shift [code func del] 
   290 
   291 text {*
   292   \noindent This yields the following code:
   293 *}
   294 
   295 text %quoteme {*@{code_stmts "op + \<Colon> nat \<Rightarrow> nat \<Rightarrow> nat" (SML)}*}
   296 
   297 text {*
   298   \noindent From this example, it can be easily glimpsed that using own constructor sets
   299   is a little delicate since it changes the set of valid patterns for values
   300   of that type.  Without going into much detail, here some practical hints:
   301 
   302   \begin{itemize}
   303     \item When changing the constructor set for datatypes, take care to
   304       provide an alternative for the @{text case} combinator (e.g.~by replacing
   305       it using the preprocessor).
   306     \item Values in the target language need not to be normalised -- different
   307       values in the target language may represent the same value in the
   308       logic (e.g. @{term "Dig1 0 = 1"}).
   309     \item Usually, a good methodology to deal with the subtleties of pattern
   310       matching is to see the type as an abstract type: provide a set
   311       of operations which operate on the concrete representation of the type,
   312       and derive further operations by combinations of these primitive ones,
   313       without relying on a particular representation.
   314   \end{itemize}
   315 *}
   316 
   317 code_datatype %invisible "0::nat" Suc
   318 declare %invisible plus_Dig [code func del]
   319 declare %invisible One_nat_def [code inline]
   320 declare %invisible add_Suc_shift [code func] 
   321 lemma %invisible [code func]: "0 + n = (n \<Colon> nat)" by simp
   322 
   323 
   324 subsection {* Equality and wellsortedness *}
   325 
   326 text {*
   327   Surely you have already noticed how equality is treated
   328   by the code generator:
   329 *}
   330 
   331 primrec %quoteme collect_duplicates :: "'a list \<Rightarrow> 'a list \<Rightarrow> 'a list \<Rightarrow> 'a list" where
   332   "collect_duplicates xs ys [] = xs"
   333   | "collect_duplicates xs ys (z#zs) = (if z \<in> set xs
   334       then if z \<in> set ys
   335         then collect_duplicates xs ys zs
   336         else collect_duplicates xs (z#ys) zs
   337       else collect_duplicates (z#xs) (z#ys) zs)"
   338 
   339 text {*
   340   \noindent The membership test during preprocessing is rewritten,
   341   resulting in @{const List.member}, which itself
   342   performs an explicit equality check.
   343 *}
   344 
   345 text %quoteme {*@{code_stmts collect_duplicates (SML)}*}
   346 
   347 text {*
   348   \noindent Obviously, polymorphic equality is implemented the Haskell
   349   way using a type class.  How is this achieved?  HOL introduces
   350   an explicit class @{class eq} with a corresponding operation
   351   @{const eq_class.eq} such that @{thm eq [no_vars]}.
   352   The preprocessing framework does the rest by propagating the
   353   @{class eq} constraints through all dependent defining equations.
   354   For datatypes, instances of @{class eq} are implicitly derived
   355   when possible.  For other types, you may instantiate @{text eq}
   356   manually like any other type class.
   357 
   358   Though this @{text eq} class is designed to get rarely in
   359   the way, a subtlety
   360   enters the stage when definitions of overloaded constants
   361   are dependent on operational equality.  For example, let
   362   us define a lexicographic ordering on tuples
   363   (also see theory @{theory Product_ord}):
   364 *}
   365 
   366 instantiation %quoteme "*" :: (order, order) order
   367 begin
   368 
   369 definition %quoteme [code func del]:
   370   "x \<le> y \<longleftrightarrow> fst x < fst y \<or> fst x = fst y \<and> snd x \<le> snd y"
   371 
   372 definition %quoteme [code func del]:
   373   "x < y \<longleftrightarrow> fst x < fst y \<or> fst x = fst y \<and> snd x < snd y"
   374 
   375 instance %quoteme proof
   376 qed (auto simp: less_eq_prod_def less_prod_def intro: order_less_trans)
   377 
   378 end %quoteme
   379 
   380 lemma %quoteme order_prod [code func]:
   381   "(x1 \<Colon> 'a\<Colon>order, y1 \<Colon> 'b\<Colon>order) < (x2, y2) \<longleftrightarrow>
   382      x1 < x2 \<or> x1 = x2 \<and> y1 < y2"
   383   "(x1 \<Colon> 'a\<Colon>order, y1 \<Colon> 'b\<Colon>order) \<le> (x2, y2) \<longleftrightarrow>
   384      x1 < x2 \<or> x1 = x2 \<and> y1 \<le> y2"
   385   by (simp_all add: less_prod_def less_eq_prod_def)
   386 
   387 text {*
   388   \noindent Then code generation will fail.  Why?  The definition
   389   of @{term "op \<le>"} depends on equality on both arguments,
   390   which are polymorphic and impose an additional @{class eq}
   391   class constraint, which the preprocessor does not propagate
   392   (for technical reasons).
   393 
   394   The solution is to add @{class eq} explicitly to the first sort arguments in the
   395   code theorems:
   396 *}
   397 
   398 lemma %quoteme order_prod_code [code func]:
   399   "(x1 \<Colon> 'a\<Colon>{order, eq}, y1 \<Colon> 'b\<Colon>order) < (x2, y2) \<longleftrightarrow>
   400      x1 < x2 \<or> x1 = x2 \<and> y1 < y2"
   401   "(x1 \<Colon> 'a\<Colon>{order, eq}, y1 \<Colon> 'b\<Colon>order) \<le> (x2, y2) \<longleftrightarrow>
   402      x1 < x2 \<or> x1 = x2 \<and> y1 \<le> y2"
   403   by (simp_all add: less_prod_def less_eq_prod_def)
   404 
   405 text {*
   406   \noindent Then code generation succeeds:
   407 *}
   408 
   409 text %quoteme {*@{code_stmts "op \<le> \<Colon> _ \<times> _ \<Rightarrow> _ \<times> _ \<Rightarrow> bool" (SML)}*}
   410 
   411 text {*
   412   In some cases, the automatically derived defining equations
   413   for equality on a particular type may not be appropriate.
   414   As example, watch the following datatype representing
   415   monomorphic parametric types (where type constructors
   416   are referred to by natural numbers):
   417 *}
   418 
   419 datatype %quoteme monotype = Mono nat "monotype list"
   420 (*<*)
   421 lemma monotype_eq:
   422   "Mono tyco1 typargs1 = Mono tyco2 typargs2 \<equiv> 
   423      tyco1 = tyco2 \<and> typargs1 = typargs2" by simp
   424 (*>*)
   425 
   426 text {*
   427   Then code generation for SML would fail with a message
   428   that the generated code contains illegal mutual dependencies:
   429   the theorem @{thm monotype_eq [no_vars]} already requires the
   430   instance @{text "monotype \<Colon> eq"}, which itself requires
   431   @{thm monotype_eq [no_vars]};  Haskell has no problem with mutually
   432   recursive @{text inst} and @{text fun} definitions,
   433   but the SML serializer does not support this.
   434 
   435   In such cases, you have to provide your own equality equations
   436   involving auxiliary constants.  In our case,
   437   @{const [show_types] list_all2} can do the job:
   438 *}
   439 
   440 lemma %quoteme monotype_eq_list_all2 [code func]:
   441   "eq_class.eq (Mono tyco1 typargs1) (Mono tyco2 typargs2) \<longleftrightarrow>
   442      tyco1 = tyco2 \<and> list_all2 eq_class.eq typargs1 typargs2"
   443   by (simp add: eq list_all2_eq [symmetric])
   444 
   445 text {*
   446   \noindent does not depend on instance @{text "monotype \<Colon> eq"}:
   447 *}
   448 
   449 text %quoteme {*@{code_stmts "eq_class.eq :: monotype \<Rightarrow> monotype \<Rightarrow> bool" (SML)}*}
   450 
   451 
   452 subsection {* Partiality *}
   453 
   454 text {* @{command "code_abort"}, examples: maps *}
   455 
   456 end