doc-src/IsarAdvanced/Codegen/Thy/Program.thy
changeset 28419 f65e8b318581
parent 28213 b52f9205a02d
child 28428 fd007794561f
equal deleted inserted replaced
28418:4ffb62675ade 28419:f65e8b318581
     1 theory Program
     1 theory Program
     2 imports Setup
     2 imports Introduction
     3 begin
     3 begin
     4 
     4 
     5 section {* Turning Theories into Programs *}
     5 section {* Turning Theories into Programs \label{sec:program} *}
     6 
     6 
     7 subsection {* The @{text "Isabelle/HOL"} default setup *}
     7 subsection {* The @{text "Isabelle/HOL"} default setup *}
     8 
     8 
     9 text {* Invoking the code generator *}
       
    10 
       
    11 subsection {* Selecting code equations *}
     9 subsection {* Selecting code equations *}
    12 
    10 
    13 text {* inspection by @{text "code_thms"} *}
    11 text {*
    14 
    12   We have already seen how by default equations stemming from
    15 subsection {* The preprocessor *}
    13   @{command definition}/@{command primrec}/@{command fun}
    16 
    14   statements are used for code generation.  Deviating from this
    17 subsection {* Datatypes *}
    15   \emph{default} behaviour is always possible -- e.g.~we
    18 
    16   could provide an alternative @{text fun} for @{const dequeue} proving
    19 text {* example: @{text "rat"}, cases *}
    17   the following equations explicitly:
       
    18 *}
       
    19 
       
    20 lemma %quoteme [code func]:
       
    21   "dequeue (Queue xs []) = (if xs = [] then (None, Queue [] []) else dequeue (Queue [] (rev xs)))"
       
    22   "dequeue (Queue xs (y # ys)) = (Some y, Queue xs ys)"
       
    23   by (cases xs, simp_all) (cases "rev xs", simp_all)
       
    24 
       
    25 text {*
       
    26   \noindent The annotation @{text "[code func]"} is an @{text Isar}
       
    27   @{text attribute} which states that the given theorems should be
       
    28   considered as defining equations for a @{text fun} statement --
       
    29   the corresponding constant is determined syntactically.  The resulting code:
       
    30 *}
       
    31 
       
    32 text %quoteme {*@{code_stmts empty enqueue dequeue (Haskell)}*}
       
    33 
       
    34 text {*
       
    35   \noindent You may note that the equality test @{term "xs = []"} has been
       
    36   replaced by the predicate @{term "null xs"}.  This is due to the default
       
    37   setup in the \qn{preprocessor} to be discussed further below (\secref{sec:preproc}).
       
    38 
       
    39   Changing the default constructor set of datatypes is also
       
    40   possible but rarely desired in practice.  See \secref{sec:datatypes} for an example.
       
    41 
       
    42   As told in \secref{sec:concept}, code generation is based
       
    43   on a structured collection of code theorems.
       
    44   For explorative purpose, this collection
       
    45   may be inspected using the @{command code_thms} command:
       
    46 *}
       
    47 
       
    48 code_thms %quoteme dequeue
       
    49 
       
    50 text {*
       
    51   \noindent prints a table with \emph{all} defining equations
       
    52   for @{const dequeue}, including
       
    53   \emph{all} defining equations those equations depend
       
    54   on recursively.
       
    55   
       
    56   Similarly, the @{command code_deps} command shows a graph
       
    57   visualising dependencies between defining equations.
       
    58 *}
       
    59 
       
    60 subsection {* @{text class} and @{text instantiation} *}
       
    61 
       
    62 text {*
       
    63   Concerning type classes and code generation, let us again examine an example
       
    64   from abstract algebra:
       
    65 *}
       
    66 
       
    67 class %quoteme semigroup = type +
       
    68   fixes mult :: "'a \<Rightarrow> 'a \<Rightarrow> 'a" (infixl "\<otimes>" 70)
       
    69   assumes assoc: "(x \<otimes> y) \<otimes> z = x \<otimes> (y \<otimes> z)"
       
    70 
       
    71 class %quoteme monoid = semigroup +
       
    72   fixes neutral :: 'a ("\<one>")
       
    73   assumes neutl: "\<one> \<otimes> x = x"
       
    74     and neutr: "x \<otimes> \<one> = x"
       
    75 
       
    76 instantiation %quoteme nat :: monoid
       
    77 begin
       
    78 
       
    79 primrec %quoteme mult_nat where
       
    80     "0 \<otimes> n = (0\<Colon>nat)"
       
    81   | "Suc m \<otimes> n = n + m \<otimes> n"
       
    82 
       
    83 definition %quoteme neutral_nat where
       
    84   "\<one> = Suc 0"
       
    85 
       
    86 lemma %quoteme add_mult_distrib:
       
    87   fixes n m q :: nat
       
    88   shows "(n + m) \<otimes> q = n \<otimes> q + m \<otimes> q"
       
    89   by (induct n) simp_all
       
    90 
       
    91 instance %quoteme proof
       
    92   fix m n q :: nat
       
    93   show "m \<otimes> n \<otimes> q = m \<otimes> (n \<otimes> q)"
       
    94     by (induct m) (simp_all add: add_mult_distrib)
       
    95   show "\<one> \<otimes> n = n"
       
    96     by (simp add: neutral_nat_def)
       
    97   show "m \<otimes> \<one> = m"
       
    98     by (induct m) (simp_all add: neutral_nat_def)
       
    99 qed
       
   100 
       
   101 end %quoteme
       
   102 
       
   103 text {*
       
   104   \noindent We define the natural operation of the natural numbers
       
   105   on monoids:
       
   106 *}
       
   107 
       
   108 primrec %quoteme pow :: "nat \<Rightarrow> 'a\<Colon>monoid \<Rightarrow> 'a\<Colon>monoid" where
       
   109     "pow 0 a = \<one>"
       
   110   | "pow (Suc n) a = a \<otimes> pow n a"
       
   111 
       
   112 text {*
       
   113   \noindent This we use to define the discrete exponentiation function:
       
   114 *}
       
   115 
       
   116 definition %quoteme bexp :: "nat \<Rightarrow> nat" where
       
   117   "bexp n = pow n (Suc (Suc 0))"
       
   118 
       
   119 text {*
       
   120   \noindent The corresponding code:
       
   121 *}
       
   122 
       
   123 text %quoteme {*@{code_stmts bexp (Haskell)}*}
       
   124 
       
   125 text {*
       
   126   \noindent An inspection reveals that the equations stemming from the
       
   127   @{text primrec} statement within instantiation of class
       
   128   @{class semigroup} with type @{typ nat} are mapped to a separate
       
   129   function declaration @{text mult_nat} which in turn is used
       
   130   to provide the right hand side for the @{text "instance Semigroup Nat"}
       
   131   \fixme[courier fuer code text, isastyle fuer class].  This perfectly
       
   132   agrees with the restriction that @{text inst} statements may
       
   133   only contain one single equation for each class class parameter
       
   134   The @{text instantiation} mechanism manages that for each
       
   135   overloaded constant @{text "f [\<kappa> \<^bvec>\<alpha>\<Colon>s\<^isub>k\<^evec>]"}
       
   136   a \emph{shadow constant} @{text "f\<^isub>\<kappa> [\<^bvec>\<alpha>\<Colon>s\<^isub>k\<^evec>]"} is
       
   137   declared satisfying @{text "f [\<kappa> \<^bvec>\<alpha>\<Colon>s\<^isub>k\<^evec>] \<equiv> f\<^isub>\<kappa> [\<^bvec>\<alpha>\<Colon>s\<^isub>k\<^evec>]"}.
       
   138   this equation is indeed the one used for the statement;
       
   139   using it as a rewrite rule, defining equations for 
       
   140   @{text "f [\<kappa> \<^bvec>\<alpha>\<Colon>s\<^isub>k\<^evec>]"} can be turned into
       
   141   defining equations for @{text "f\<^isub>\<kappa> [\<^bvec>\<alpha>\<Colon>s\<^isub>k\<^evec>]"}.  This
       
   142   happens transparently, providing the illusion that class parameters
       
   143   can be instantiated with more than one equation.
       
   144 
       
   145   This is a convenient place to show how explicit dictionary construction
       
   146   manifests in generated code (here, the same example in @{text SML}):
       
   147 *}
       
   148 
       
   149 text %quoteme {*@{code_stmts bexp (SML)}*}
       
   150 
       
   151 
       
   152 subsection {* The preprocessor \label{sec:preproc} *}
       
   153 
       
   154 text {*
       
   155   Before selected function theorems are turned into abstract
       
   156   code, a chain of definitional transformation steps is carried
       
   157   out: \emph{preprocessing}.  In essence, the preprocessor
       
   158   consists of two components: a \emph{simpset} and \emph{function transformers}.
       
   159 
       
   160   The \emph{simpset} allows to employ the full generality of the Isabelle
       
   161   simplifier.  Due to the interpretation of theorems
       
   162   as defining equations, rewrites are applied to the right
       
   163   hand side and the arguments of the left hand side of an
       
   164   equation, but never to the constant heading the left hand side.
       
   165   An important special case are \emph{inline theorems} which may be
       
   166   declared and undeclared using the
       
   167   \emph{code inline} or \emph{code inline del} attribute respectively.
       
   168 
       
   169   Some common applications:
       
   170 *}
       
   171 
       
   172 text_raw {*
       
   173   \begin{itemize}
       
   174 *}
       
   175 
       
   176 text {*
       
   177      \item replacing non-executable constructs by executable ones:
       
   178 *}     
       
   179 
       
   180 lemma %quoteme [code inline]:
       
   181   "x \<in> set xs \<longleftrightarrow> x mem xs" by (induct xs) simp_all
       
   182 
       
   183 text {*
       
   184      \item eliminating superfluous constants:
       
   185 *}
       
   186 
       
   187 lemma %quoteme [code inline]:
       
   188   "1 = Suc 0" by simp
       
   189 
       
   190 text {*
       
   191      \item replacing executable but inconvenient constructs:
       
   192 *}
       
   193 
       
   194 lemma %quoteme [code inline]:
       
   195   "xs = [] \<longleftrightarrow> List.null xs" by (induct xs) simp_all
       
   196 
       
   197 text_raw {*
       
   198   \end{itemize}
       
   199 *}
       
   200 
       
   201 text {*
       
   202   \emph{Function transformers} provide a very general interface,
       
   203   transforming a list of function theorems to another
       
   204   list of function theorems, provided that neither the heading
       
   205   constant nor its type change.  The @{term "0\<Colon>nat"} / @{const Suc}
       
   206   pattern elimination implemented in
       
   207   theory @{text Efficient_Nat} (see \secref{eff_nat}) uses this
       
   208   interface.
       
   209 
       
   210   \noindent The current setup of the preprocessor may be inspected using
       
   211   the @{command print_codesetup} command.
       
   212   @{command code_thms} provides a convenient
       
   213   mechanism to inspect the impact of a preprocessor setup
       
   214   on defining equations.
       
   215 
       
   216   \begin{warn}
       
   217     The attribute \emph{code unfold}
       
   218     associated with the existing code generator also applies to
       
   219     the new one: \emph{code unfold} implies \emph{code inline}.
       
   220   \end{warn}
       
   221 *}
       
   222 
       
   223 subsection {* Datatypes \label{sec:datatypes} *}
       
   224 
       
   225 text {*
       
   226   Conceptually, any datatype is spanned by a set of
       
   227   \emph{constructors} of type @{text "\<tau> = \<dots> \<Rightarrow> \<kappa> \<alpha>\<^isub>1 \<dots> \<alpha>\<^isub>n"}
       
   228   where @{text "{\<alpha>\<^isub>1, \<dots>, \<alpha>\<^isub>n}"} is excactly the set of \emph{all}
       
   229   type variables in @{text "\<tau>"}.  The HOL datatype package
       
   230   by default registers any new datatype in the table
       
   231   of datatypes, which may be inspected using
       
   232   the @{command print_codesetup} command.
       
   233 
       
   234   In some cases, it may be convenient to alter or
       
   235   extend this table;  as an example, we will develop an alternative
       
   236   representation of natural numbers as binary digits, whose
       
   237   size does increase logarithmically with its value, not linear
       
   238   \footnote{Indeed, the @{theory Efficient_Nat} theory (see \ref{eff_nat})
       
   239     does something similar}.  First, the digit representation:
       
   240 *}
       
   241 
       
   242 definition %quoteme Dig0 :: "nat \<Rightarrow> nat" where
       
   243   "Dig0 n = 2 * n"
       
   244 
       
   245 definition %quoteme  Dig1 :: "nat \<Rightarrow> nat" where
       
   246   "Dig1 n = Suc (2 * n)"
       
   247 
       
   248 text {*
       
   249   \noindent We will use these two ">digits"< to represent natural numbers
       
   250   in binary digits, e.g.:
       
   251 *}
       
   252 
       
   253 lemma %quoteme 42: "42 = Dig0 (Dig1 (Dig0 (Dig1 (Dig0 1))))"
       
   254   by (simp add: Dig0_def Dig1_def)
       
   255 
       
   256 text {*
       
   257   \noindent Of course we also have to provide proper code equations for
       
   258   the operations, e.g. @{term "op + \<Colon> nat \<Rightarrow> nat \<Rightarrow> nat"}:
       
   259 *}
       
   260 
       
   261 lemma %quoteme plus_Dig [code func]:
       
   262   "0 + n = n"
       
   263   "m + 0 = m"
       
   264   "1 + Dig0 n = Dig1 n"
       
   265   "Dig0 m + 1 = Dig1 m"
       
   266   "1 + Dig1 n = Dig0 (n + 1)"
       
   267   "Dig1 m + 1 = Dig0 (m + 1)"
       
   268   "Dig0 m + Dig0 n = Dig0 (m + n)"
       
   269   "Dig0 m + Dig1 n = Dig1 (m + n)"
       
   270   "Dig1 m + Dig0 n = Dig1 (m + n)"
       
   271   "Dig1 m + Dig1 n = Dig0 (m + n + 1)"
       
   272   by (simp_all add: Dig0_def Dig1_def)
       
   273 
       
   274 text {*
       
   275   \noindent We then instruct the code generator to view @{term "0\<Colon>nat"},
       
   276   @{term "1\<Colon>nat"}, @{term Dig0} and @{term Dig1} as
       
   277   datatype constructors:
       
   278 *}
       
   279 
       
   280 code_datatype %quoteme "0\<Colon>nat" "1\<Colon>nat" Dig0 Dig1
       
   281 
       
   282 text {*
       
   283   \noindent For the former constructor @{term Suc}, we provide a code
       
   284   equation and remove some parts of the default code generator setup
       
   285   which are an obstacle here:
       
   286 *}
       
   287 
       
   288 lemma %quoteme Suc_Dig [code func]:
       
   289   "Suc n = n + 1"
       
   290   by simp
       
   291 
       
   292 declare %quoteme One_nat_def [code inline del]
       
   293 declare %quoteme add_Suc_shift [code func del] 
       
   294 
       
   295 text {*
       
   296   \noindent This yields the following code:
       
   297 *}
       
   298 
       
   299 text %quoteme {*@{code_stmts "op + \<Colon> nat \<Rightarrow> nat \<Rightarrow> nat" (SML)}*}
       
   300 
       
   301 text {*
       
   302   \medskip
       
   303 
       
   304   From this example, it can be easily glimpsed that using own constructor sets
       
   305   is a little delicate since it changes the set of valid patterns for values
       
   306   of that type.  Without going into much detail, here some practical hints:
       
   307 
       
   308   \begin{itemize}
       
   309     \item When changing the constructor set for datatypes, take care to
       
   310       provide an alternative for the @{text case} combinator (e.g.~by replacing
       
   311       it using the preprocessor).
       
   312     \item Values in the target language need not to be normalised -- different
       
   313       values in the target language may represent the same value in the
       
   314       logic (e.g. @{term "Dig1 0 = 1"}).
       
   315     \item Usually, a good methodology to deal with the subtleties of pattern
       
   316       matching is to see the type as an abstract type: provide a set
       
   317       of operations which operate on the concrete representation of the type,
       
   318       and derive further operations by combinations of these primitive ones,
       
   319       without relying on a particular representation.
       
   320   \end{itemize}
       
   321 *}
       
   322 
       
   323 code_datatype %invisible "0::nat" Suc
       
   324 declare %invisible plus_Dig [code func del]
       
   325 declare %invisible One_nat_def [code inline]
       
   326 declare %invisible add_Suc_shift [code func] 
       
   327 lemma %invisible [code func]: "0 + n = (n \<Colon> nat)" by simp
       
   328 
    20 
   329 
    21 subsection {* Equality and wellsortedness *}
   330 subsection {* Equality and wellsortedness *}
    22 
   331 
       
   332 text {*
       
   333   Surely you have already noticed how equality is treated
       
   334   by the code generator:
       
   335 *}
       
   336 
       
   337 primrec %quoteme
       
   338   collect_duplicates :: "'a list \<Rightarrow> 'a list \<Rightarrow> 'a list \<Rightarrow> 'a list" where
       
   339     "collect_duplicates xs ys [] = xs"
       
   340   | "collect_duplicates xs ys (z#zs) = (if z \<in> set xs
       
   341       then if z \<in> set ys
       
   342         then collect_duplicates xs ys zs
       
   343         else collect_duplicates xs (z#ys) zs
       
   344       else collect_duplicates (z#xs) (z#ys) zs)"
       
   345 
       
   346 text {*
       
   347   The membership test during preprocessing is rewritten,
       
   348   resulting in @{const List.member}, which itself
       
   349   performs an explicit equality check.
       
   350 *}
       
   351 
       
   352 text %quoteme {*@{code_stmts collect_duplicates (SML)}*}
       
   353 
       
   354 text {*
       
   355   \noindent Obviously, polymorphic equality is implemented the Haskell
       
   356   way using a type class.  How is this achieved?  HOL introduces
       
   357   an explicit class @{class eq} with a corresponding operation
       
   358   @{const eq_class.eq} such that @{thm eq [no_vars]}.
       
   359   The preprocessing framework does the rest.
       
   360   For datatypes, instances of @{class eq} are implicitly derived
       
   361   when possible.  For other types, you may instantiate @{text eq}
       
   362   manually like any other type class.
       
   363 
       
   364   Though this @{text eq} class is designed to get rarely in
       
   365   the way, a subtlety
       
   366   enters the stage when definitions of overloaded constants
       
   367   are dependent on operational equality.  For example, let
       
   368   us define a lexicographic ordering on tuples:
       
   369 *}
       
   370 
       
   371 instantiation * :: (ord, ord) ord
       
   372 begin
       
   373 
       
   374 definition
       
   375   [code func del]: "p1 < p2 \<longleftrightarrow> (let (x1, y1) = p1; (x2, y2) = p2 in
       
   376     x1 < x2 \<or> (x1 = x2 \<and> y1 < y2))"
       
   377 
       
   378 definition
       
   379   [code func del]: "p1 \<le> p2 \<longleftrightarrow> (let (x1, y1) = p1; (x2, y2) = p2 in
       
   380     x1 < x2 \<or> (x1 = x2 \<and> y1 \<le> y2))"
       
   381 
       
   382 instance ..
       
   383 
       
   384 end
       
   385 
       
   386 lemma ord_prod [code func(*<*), code func del(*>*)]:
       
   387   "(x1 \<Colon> 'a\<Colon>ord, y1 \<Colon> 'b\<Colon>ord) < (x2, y2) \<longleftrightarrow> x1 < x2 \<or> (x1 = x2 \<and> y1 < y2)"
       
   388   "(x1 \<Colon> 'a\<Colon>ord, y1 \<Colon> 'b\<Colon>ord) \<le> (x2, y2) \<longleftrightarrow> x1 < x2 \<or> (x1 = x2 \<and> y1 \<le> y2)"
       
   389   unfolding less_prod_def less_eq_prod_def by simp_all
       
   390 
       
   391 text {*
       
   392   Then code generation will fail.  Why?  The definition
       
   393   of @{term "op \<le>"} depends on equality on both arguments,
       
   394   which are polymorphic and impose an additional @{class eq}
       
   395   class constraint, thus violating the type discipline
       
   396   for class operations.
       
   397 
       
   398   The solution is to add @{class eq} explicitly to the first sort arguments in the
       
   399   code theorems:
       
   400 *}
       
   401 
       
   402 lemma ord_prod_code [code func]:
       
   403   "(x1 \<Colon> 'a\<Colon>{ord, eq}, y1 \<Colon> 'b\<Colon>ord) < (x2, y2) \<longleftrightarrow>
       
   404     x1 < x2 \<or> (x1 = x2 \<and> y1 < y2)"
       
   405   "(x1 \<Colon> 'a\<Colon>{ord, eq}, y1 \<Colon> 'b\<Colon>ord) \<le> (x2, y2) \<longleftrightarrow>
       
   406     x1 < x2 \<or> (x1 = x2 \<and> y1 \<le> y2)"
       
   407   unfolding ord_prod by rule+
       
   408 
       
   409 text {*
       
   410   \noindent Then code generation succeeds:
       
   411 *}
       
   412 
       
   413 text %quoteme {*@{code_stmts "op \<le> \<Colon> 'a\<Colon>{eq, ord} \<times> 'b\<Colon>ord \<Rightarrow> 'a \<times> 'b \<Rightarrow> bool" (SML)}*}
       
   414 
       
   415 text {*
       
   416   In general, code theorems for overloaded constants may have more
       
   417   restrictive sort constraints than the underlying instance relation
       
   418   between class and type constructor as long as the whole system of
       
   419   constraints is coregular; code theorems violating coregularity
       
   420   are rejected immediately.  Consequently, it might be necessary
       
   421   to delete disturbing theorems in the code theorem table,
       
   422   as we have done here with the original definitions @{fact less_prod_def}
       
   423   and @{fact less_eq_prod_def}.
       
   424 
       
   425   In some cases, the automatically derived defining equations
       
   426   for equality on a particular type may not be appropriate.
       
   427   As example, watch the following datatype representing
       
   428   monomorphic parametric types (where type constructors
       
   429   are referred to by natural numbers):
       
   430 *}
       
   431 
       
   432 datatype %quoteme monotype = Mono nat "monotype list"
       
   433 (*<*)
       
   434 lemma monotype_eq:
       
   435   "Mono tyco1 typargs1 = Mono tyco2 typargs2 \<equiv> 
       
   436      tyco1 = tyco2 \<and> typargs1 = typargs2" by simp
       
   437 (*>*)
       
   438 
       
   439 text {*
       
   440   Then code generation for SML would fail with a message
       
   441   that the generated code contains illegal mutual dependencies:
       
   442   the theorem @{thm monotype_eq [no_vars]} already requires the
       
   443   instance @{text "monotype \<Colon> eq"}, which itself requires
       
   444   @{thm monotype_eq [no_vars]};  Haskell has no problem with mutually
       
   445   recursive @{text instance} and @{text function} definitions,
       
   446   but the SML serializer does not support this.
       
   447 
       
   448   In such cases, you have to provide you own equality equations
       
   449   involving auxiliary constants.  In our case,
       
   450   @{const [show_types] list_all2} can do the job:
       
   451 *}
       
   452 
       
   453 lemma %quoteme monotype_eq_list_all2 [code func]:
       
   454   "eq_class.eq (Mono tyco1 typargs1) (Mono tyco2 typargs2) \<longleftrightarrow>
       
   455      tyco1 = tyco2 \<and> list_all2 eq_class.eq typargs1 typargs2"
       
   456   by (simp add: eq list_all2_eq [symmetric])
       
   457 
       
   458 text {*
       
   459   \noindent does not depend on instance @{text "monotype \<Colon> eq"}:
       
   460 *}
       
   461 
       
   462 text %quoteme {*@{code_stmts "eq_class.eq :: monotype \<Rightarrow> monotype \<Rightarrow> bool" (SML)}*}
       
   463 
       
   464 
    23 subsection {* Partiality *}
   465 subsection {* Partiality *}
    24 
   466 
    25 text {* @{text "code_abort"}, examples: maps *}
   467 text {* @{command "code_abort"}, examples: maps *}
    26 
   468 
    27 end
   469 end