doc-src/Codegen/Thy/Program.thy
changeset 38407 0dbbb511634d
parent 38401 c4de81b7fdec
parent 38406 bbb02b67caac
child 38408 721b4d6095b7
equal deleted inserted replaced
38401:c4de81b7fdec 38407:0dbbb511634d
     1 theory Program
       
     2 imports Introduction
       
     3 begin
       
     4 
       
     5 section {* Turning Theories into Programs \label{sec:program} *}
       
     6 
       
     7 subsection {* The @{text "Isabelle/HOL"} default setup *}
       
     8 
       
     9 text {*
       
    10   We have already seen how by default equations stemming from
       
    11   @{command definition}, @{command primrec} and @{command fun}
       
    12   statements are used for code generation.  This default behaviour
       
    13   can be changed, e.g.\ by providing different code equations.
       
    14   The customisations shown in this section are \emph{safe}
       
    15   as regards correctness: all programs that can be generated are partially
       
    16   correct.
       
    17 *}
       
    18 
       
    19 subsection {* Selecting code equations *}
       
    20 
       
    21 text {*
       
    22   Coming back to our introductory example, we
       
    23   could provide an alternative code equations for @{const dequeue}
       
    24   explicitly:
       
    25 *}
       
    26 
       
    27 lemma %quote [code]:
       
    28   "dequeue (AQueue xs []) =
       
    29      (if xs = [] then (None, AQueue [] [])
       
    30        else dequeue (AQueue [] (rev xs)))"
       
    31   "dequeue (AQueue xs (y # ys)) =
       
    32      (Some y, AQueue xs ys)"
       
    33   by (cases xs, simp_all) (cases "rev xs", simp_all)
       
    34 
       
    35 text {*
       
    36   \noindent The annotation @{text "[code]"} is an @{text Isar}
       
    37   @{text attribute} which states that the given theorems should be
       
    38   considered as code equations for a @{text fun} statement --
       
    39   the corresponding constant is determined syntactically.  The resulting code:
       
    40 *}
       
    41 
       
    42 text %quote {*@{code_stmts dequeue (consts) dequeue (Haskell)}*}
       
    43 
       
    44 text {*
       
    45   \noindent You may note that the equality test @{term "xs = []"} has been
       
    46   replaced by the predicate @{term "null xs"}.  This is due to the default
       
    47   setup in the \qn{preprocessor} to be discussed further below (\secref{sec:preproc}).
       
    48 
       
    49   Changing the default constructor set of datatypes is also
       
    50   possible.  See \secref{sec:datatypes} for an example.
       
    51 
       
    52   As told in \secref{sec:concept}, code generation is based
       
    53   on a structured collection of code theorems.
       
    54   This collection
       
    55   may be inspected using the @{command code_thms} command:
       
    56 *}
       
    57 
       
    58 code_thms %quote dequeue
       
    59 
       
    60 text {*
       
    61   \noindent prints a table with \emph{all} code equations
       
    62   for @{const dequeue}, including
       
    63   \emph{all} code equations those equations depend
       
    64   on recursively.
       
    65   
       
    66   Similarly, the @{command code_deps} command shows a graph
       
    67   visualising dependencies between code equations.
       
    68 *}
       
    69 
       
    70 subsection {* @{text class} and @{text instantiation} *}
       
    71 
       
    72 text {*
       
    73   Concerning type classes and code generation, let us examine an example
       
    74   from abstract algebra:
       
    75 *}
       
    76 
       
    77 class %quote semigroup =
       
    78   fixes mult :: "'a \<Rightarrow> 'a \<Rightarrow> 'a" (infixl "\<otimes>" 70)
       
    79   assumes assoc: "(x \<otimes> y) \<otimes> z = x \<otimes> (y \<otimes> z)"
       
    80 
       
    81 class %quote monoid = semigroup +
       
    82   fixes neutral :: 'a ("\<one>")
       
    83   assumes neutl: "\<one> \<otimes> x = x"
       
    84     and neutr: "x \<otimes> \<one> = x"
       
    85 
       
    86 instantiation %quote nat :: monoid
       
    87 begin
       
    88 
       
    89 primrec %quote mult_nat where
       
    90     "0 \<otimes> n = (0\<Colon>nat)"
       
    91   | "Suc m \<otimes> n = n + m \<otimes> n"
       
    92 
       
    93 definition %quote neutral_nat where
       
    94   "\<one> = Suc 0"
       
    95 
       
    96 lemma %quote add_mult_distrib:
       
    97   fixes n m q :: nat
       
    98   shows "(n + m) \<otimes> q = n \<otimes> q + m \<otimes> q"
       
    99   by (induct n) simp_all
       
   100 
       
   101 instance %quote proof
       
   102   fix m n q :: nat
       
   103   show "m \<otimes> n \<otimes> q = m \<otimes> (n \<otimes> q)"
       
   104     by (induct m) (simp_all add: add_mult_distrib)
       
   105   show "\<one> \<otimes> n = n"
       
   106     by (simp add: neutral_nat_def)
       
   107   show "m \<otimes> \<one> = m"
       
   108     by (induct m) (simp_all add: neutral_nat_def)
       
   109 qed
       
   110 
       
   111 end %quote
       
   112 
       
   113 text {*
       
   114   \noindent We define the natural operation of the natural numbers
       
   115   on monoids:
       
   116 *}
       
   117 
       
   118 primrec %quote (in monoid) pow :: "nat \<Rightarrow> 'a \<Rightarrow> 'a" where
       
   119     "pow 0 a = \<one>"
       
   120   | "pow (Suc n) a = a \<otimes> pow n a"
       
   121 
       
   122 text {*
       
   123   \noindent This we use to define the discrete exponentiation function:
       
   124 *}
       
   125 
       
   126 definition %quote bexp :: "nat \<Rightarrow> nat" where
       
   127   "bexp n = pow n (Suc (Suc 0))"
       
   128 
       
   129 text {*
       
   130   \noindent The corresponding code in Haskell uses that language's native classes:
       
   131 *}
       
   132 
       
   133 text %quote {*@{code_stmts bexp (Haskell)}*}
       
   134 
       
   135 text {*
       
   136   \noindent This is a convenient place to show how explicit dictionary construction
       
   137   manifests in generated code (here, the same example in @{text SML})
       
   138   \cite{Haftmann-Nipkow:2010:code}:
       
   139 *}
       
   140 
       
   141 text %quote {*@{code_stmts bexp (SML)}*}
       
   142 
       
   143 text {*
       
   144   \noindent Note the parameters with trailing underscore (@{verbatim "A_"}),
       
   145     which are the dictionary parameters.
       
   146 *}
       
   147 
       
   148 subsection {* The preprocessor \label{sec:preproc} *}
       
   149 
       
   150 text {*
       
   151   Before selected function theorems are turned into abstract
       
   152   code, a chain of definitional transformation steps is carried
       
   153   out: \emph{preprocessing}.  In essence, the preprocessor
       
   154   consists of two components: a \emph{simpset} and \emph{function transformers}.
       
   155 
       
   156   The \emph{simpset} can apply the full generality of the
       
   157   Isabelle simplifier.  Due to the interpretation of theorems as code
       
   158   equations, rewrites are applied to the right hand side and the
       
   159   arguments of the left hand side of an equation, but never to the
       
   160   constant heading the left hand side.  An important special case are
       
   161   \emph{unfold theorems},  which may be declared and removed using
       
   162   the @{attribute code_unfold} or \emph{@{attribute code_unfold} del}
       
   163   attribute, respectively.
       
   164 
       
   165   Some common applications:
       
   166 *}
       
   167 
       
   168 text_raw {*
       
   169   \begin{itemize}
       
   170 *}
       
   171 
       
   172 text {*
       
   173      \item replacing non-executable constructs by executable ones:
       
   174 *}     
       
   175 
       
   176 lemma %quote [code_unfold]:
       
   177   "x \<in> set xs \<longleftrightarrow> List.member xs x" by (fact in_set_member)
       
   178 
       
   179 text {*
       
   180      \item eliminating superfluous constants:
       
   181 *}
       
   182 
       
   183 lemma %quote [code_unfold]:
       
   184   "1 = Suc 0" by (fact One_nat_def)
       
   185 
       
   186 text {*
       
   187      \item replacing executable but inconvenient constructs:
       
   188 *}
       
   189 
       
   190 lemma %quote [code_unfold]:
       
   191   "xs = [] \<longleftrightarrow> List.null xs" by (fact eq_Nil_null)
       
   192 
       
   193 text_raw {*
       
   194   \end{itemize}
       
   195 *}
       
   196 
       
   197 text {*
       
   198   \noindent \emph{Function transformers} provide a very general interface,
       
   199   transforming a list of function theorems to another
       
   200   list of function theorems, provided that neither the heading
       
   201   constant nor its type change.  The @{term "0\<Colon>nat"} / @{const Suc}
       
   202   pattern elimination implemented in
       
   203   theory @{text Efficient_Nat} (see \secref{eff_nat}) uses this
       
   204   interface.
       
   205 
       
   206   \noindent The current setup of the preprocessor may be inspected using
       
   207   the @{command print_codeproc} command.
       
   208   @{command code_thms} provides a convenient
       
   209   mechanism to inspect the impact of a preprocessor setup
       
   210   on code equations.
       
   211 
       
   212   \begin{warn}
       
   213 
       
   214     Attribute @{attribute code_unfold} also applies to the
       
   215     preprocessor of the ancient @{text "SML code generator"}; in case
       
   216     this is not what you intend, use @{attribute code_inline} instead.
       
   217   \end{warn}
       
   218 *}
       
   219 
       
   220 subsection {* Datatypes \label{sec:datatypes} *}
       
   221 
       
   222 text {*
       
   223   Conceptually, any datatype is spanned by a set of
       
   224   \emph{constructors} of type @{text "\<tau> = \<dots> \<Rightarrow> \<kappa> \<alpha>\<^isub>1 \<dots> \<alpha>\<^isub>n"} where @{text
       
   225   "{\<alpha>\<^isub>1, \<dots>, \<alpha>\<^isub>n}"} is exactly the set of \emph{all} type variables in
       
   226   @{text "\<tau>"}.  The HOL datatype package by default registers any new
       
   227   datatype in the table of datatypes, which may be inspected using the
       
   228   @{command print_codesetup} command.
       
   229 
       
   230   In some cases, it is appropriate to alter or extend this table.  As
       
   231   an example, we will develop an alternative representation of the
       
   232   queue example given in \secref{sec:intro}.  The amortised
       
   233   representation is convenient for generating code but exposes its
       
   234   \qt{implementation} details, which may be cumbersome when proving
       
   235   theorems about it.  Therefore, here is a simple, straightforward
       
   236   representation of queues:
       
   237 *}
       
   238 
       
   239 datatype %quote 'a queue = Queue "'a list"
       
   240 
       
   241 definition %quote empty :: "'a queue" where
       
   242   "empty = Queue []"
       
   243 
       
   244 primrec %quote enqueue :: "'a \<Rightarrow> 'a queue \<Rightarrow> 'a queue" where
       
   245   "enqueue x (Queue xs) = Queue (xs @ [x])"
       
   246 
       
   247 fun %quote dequeue :: "'a queue \<Rightarrow> 'a option \<times> 'a queue" where
       
   248     "dequeue (Queue []) = (None, Queue [])"
       
   249   | "dequeue (Queue (x # xs)) = (Some x, Queue xs)"
       
   250 
       
   251 text {*
       
   252   \noindent This we can use directly for proving;  for executing,
       
   253   we provide an alternative characterisation:
       
   254 *}
       
   255 
       
   256 definition %quote AQueue :: "'a list \<Rightarrow> 'a list \<Rightarrow> 'a queue" where
       
   257   "AQueue xs ys = Queue (ys @ rev xs)"
       
   258 
       
   259 code_datatype %quote AQueue
       
   260 
       
   261 text {*
       
   262   \noindent Here we define a \qt{constructor} @{const "AQueue"} which
       
   263   is defined in terms of @{text "Queue"} and interprets its arguments
       
   264   according to what the \emph{content} of an amortised queue is supposed
       
   265   to be.  Equipped with this, we are able to prove the following equations
       
   266   for our primitive queue operations which \qt{implement} the simple
       
   267   queues in an amortised fashion:
       
   268 *}
       
   269 
       
   270 lemma %quote empty_AQueue [code]:
       
   271   "empty = AQueue [] []"
       
   272   unfolding AQueue_def empty_def by simp
       
   273 
       
   274 lemma %quote enqueue_AQueue [code]:
       
   275   "enqueue x (AQueue xs ys) = AQueue (x # xs) ys"
       
   276   unfolding AQueue_def by simp
       
   277 
       
   278 lemma %quote dequeue_AQueue [code]:
       
   279   "dequeue (AQueue xs []) =
       
   280     (if xs = [] then (None, AQueue [] [])
       
   281     else dequeue (AQueue [] (rev xs)))"
       
   282   "dequeue (AQueue xs (y # ys)) = (Some y, AQueue xs ys)"
       
   283   unfolding AQueue_def by simp_all
       
   284 
       
   285 text {*
       
   286   \noindent For completeness, we provide a substitute for the
       
   287   @{text case} combinator on queues:
       
   288 *}
       
   289 
       
   290 lemma %quote queue_case_AQueue [code]:
       
   291   "queue_case f (AQueue xs ys) = f (ys @ rev xs)"
       
   292   unfolding AQueue_def by simp
       
   293 
       
   294 text {*
       
   295   \noindent The resulting code looks as expected:
       
   296 *}
       
   297 
       
   298 text %quote {*@{code_stmts empty enqueue dequeue (SML)}*}
       
   299 
       
   300 text {*
       
   301   \noindent From this example, it can be glimpsed that using own
       
   302   constructor sets is a little delicate since it changes the set of
       
   303   valid patterns for values of that type.  Without going into much
       
   304   detail, here some practical hints:
       
   305 
       
   306   \begin{itemize}
       
   307 
       
   308     \item When changing the constructor set for datatypes, take care
       
   309       to provide alternative equations for the @{text case} combinator.
       
   310 
       
   311     \item Values in the target language need not to be normalised --
       
   312       different values in the target language may represent the same
       
   313       value in the logic.
       
   314 
       
   315     \item Usually, a good methodology to deal with the subtleties of
       
   316       pattern matching is to see the type as an abstract type: provide
       
   317       a set of operations which operate on the concrete representation
       
   318       of the type, and derive further operations by combinations of
       
   319       these primitive ones, without relying on a particular
       
   320       representation.
       
   321 
       
   322   \end{itemize}
       
   323 *}
       
   324 
       
   325 
       
   326 subsection {* Equality *}
       
   327 
       
   328 text {*
       
   329   Surely you have already noticed how equality is treated
       
   330   by the code generator:
       
   331 *}
       
   332 
       
   333 primrec %quote collect_duplicates :: "'a list \<Rightarrow> 'a list \<Rightarrow> 'a list \<Rightarrow> 'a list" where
       
   334   "collect_duplicates xs ys [] = xs"
       
   335   | "collect_duplicates xs ys (z#zs) = (if z \<in> set xs
       
   336       then if z \<in> set ys
       
   337         then collect_duplicates xs ys zs
       
   338         else collect_duplicates xs (z#ys) zs
       
   339       else collect_duplicates (z#xs) (z#ys) zs)"
       
   340 
       
   341 text {*
       
   342   \noindent During preprocessing, the membership test is rewritten,
       
   343   resulting in @{const List.member}, which itself
       
   344   performs an explicit equality check.
       
   345 *}
       
   346 
       
   347 text %quote {*@{code_stmts collect_duplicates (SML)}*}
       
   348 
       
   349 text {*
       
   350   \noindent Obviously, polymorphic equality is implemented the Haskell
       
   351   way using a type class.  How is this achieved?  HOL introduces
       
   352   an explicit class @{class eq} with a corresponding operation
       
   353   @{const eq_class.eq} such that @{thm eq [no_vars]}.
       
   354   The preprocessing framework does the rest by propagating the
       
   355   @{class eq} constraints through all dependent code equations.
       
   356   For datatypes, instances of @{class eq} are implicitly derived
       
   357   when possible.  For other types, you may instantiate @{text eq}
       
   358   manually like any other type class.
       
   359 *}
       
   360 
       
   361 
       
   362 subsection {* Explicit partiality *}
       
   363 
       
   364 text {*
       
   365   Partiality usually enters the game by partial patterns, as
       
   366   in the following example, again for amortised queues:
       
   367 *}
       
   368 
       
   369 definition %quote strict_dequeue :: "'a queue \<Rightarrow> 'a \<times> 'a queue" where
       
   370   "strict_dequeue q = (case dequeue q
       
   371     of (Some x, q') \<Rightarrow> (x, q'))"
       
   372 
       
   373 lemma %quote strict_dequeue_AQueue [code]:
       
   374   "strict_dequeue (AQueue xs (y # ys)) = (y, AQueue xs ys)"
       
   375   "strict_dequeue (AQueue xs []) =
       
   376     (case rev xs of y # ys \<Rightarrow> (y, AQueue [] ys))"
       
   377   by (simp_all add: strict_dequeue_def dequeue_AQueue split: list.splits)
       
   378 
       
   379 text {*
       
   380   \noindent In the corresponding code, there is no equation
       
   381   for the pattern @{term "AQueue [] []"}:
       
   382 *}
       
   383 
       
   384 text %quote {*@{code_stmts strict_dequeue (consts) strict_dequeue (Haskell)}*}
       
   385 
       
   386 text {*
       
   387   \noindent In some cases it is desirable to have this
       
   388   pseudo-\qt{partiality} more explicitly, e.g.~as follows:
       
   389 *}
       
   390 
       
   391 axiomatization %quote empty_queue :: 'a
       
   392 
       
   393 definition %quote strict_dequeue' :: "'a queue \<Rightarrow> 'a \<times> 'a queue" where
       
   394   "strict_dequeue' q = (case dequeue q of (Some x, q') \<Rightarrow> (x, q') | _ \<Rightarrow> empty_queue)"
       
   395 
       
   396 lemma %quote strict_dequeue'_AQueue [code]:
       
   397   "strict_dequeue' (AQueue xs []) = (if xs = [] then empty_queue
       
   398      else strict_dequeue' (AQueue [] (rev xs)))"
       
   399   "strict_dequeue' (AQueue xs (y # ys)) =
       
   400      (y, AQueue xs ys)"
       
   401   by (simp_all add: strict_dequeue'_def dequeue_AQueue split: list.splits)
       
   402 
       
   403 text {*
       
   404   Observe that on the right hand side of the definition of @{const
       
   405   "strict_dequeue'"}, the unspecified constant @{const empty_queue} occurs.
       
   406 
       
   407   Normally, if constants without any code equations occur in a
       
   408   program, the code generator complains (since in most cases this is
       
   409   indeed an error).  But such constants can also be thought
       
   410   of as function definitions which always fail,
       
   411   since there is never a successful pattern match on the left hand
       
   412   side.  In order to categorise a constant into that category
       
   413   explicitly, use @{command "code_abort"}:
       
   414 *}
       
   415 
       
   416 code_abort %quote empty_queue
       
   417 
       
   418 text {*
       
   419   \noindent Then the code generator will just insert an error or
       
   420   exception at the appropriate position:
       
   421 *}
       
   422 
       
   423 text %quote {*@{code_stmts strict_dequeue' (consts) empty_queue strict_dequeue' (Haskell)}*}
       
   424 
       
   425 text {*
       
   426   \noindent This feature however is rarely needed in practice.
       
   427   Note also that the @{text HOL} default setup already declares
       
   428   @{const undefined} as @{command "code_abort"}, which is most
       
   429   likely to be used in such situations.
       
   430 *}
       
   431 
       
   432 end