author | wenzelm |
Tue, 03 Aug 2010 22:28:43 +0200 | |
changeset 38142 | c202426474c3 |
parent 37612 | 48fed6598be9 |
permissions | -rw-r--r-- |
28213 | 1 |
theory Program |
28419 | 2 |
imports Introduction |
28213 | 3 |
begin |
4 |
||
28419 | 5 |
section {* Turning Theories into Programs \label{sec:program} *} |
28213 | 6 |
|
7 |
subsection {* The @{text "Isabelle/HOL"} default setup *} |
|
8 |
||
28419 | 9 |
text {* |
10 |
We have already seen how by default equations stemming from |
|
34155 | 11 |
@{command definition}, @{command primrec} and @{command fun} |
28447 | 12 |
statements are used for code generation. This default behaviour |
34155 | 13 |
can be changed, e.g.\ by providing different code equations. |
14 |
The customisations shown in this section are \emph{safe} |
|
15 |
as regards correctness: all programs that can be generated are partially |
|
28447 | 16 |
correct. |
17 |
*} |
|
18 |
||
19 |
subsection {* Selecting code equations *} |
|
20 |
||
21 |
text {* |
|
22 |
Coming back to our introductory example, we |
|
29560 | 23 |
could provide an alternative code equations for @{const dequeue} |
28447 | 24 |
explicitly: |
28419 | 25 |
*} |
26 |
||
28564 | 27 |
lemma %quote [code]: |
29798 | 28 |
"dequeue (AQueue xs []) = |
29 |
(if xs = [] then (None, AQueue [] []) |
|
30 |
else dequeue (AQueue [] (rev xs)))" |
|
31 |
"dequeue (AQueue xs (y # ys)) = |
|
32 |
(Some y, AQueue xs ys)" |
|
28419 | 33 |
by (cases xs, simp_all) (cases "rev xs", simp_all) |
34 |
||
35 |
text {* |
|
28562 | 36 |
\noindent The annotation @{text "[code]"} is an @{text Isar} |
28419 | 37 |
@{text attribute} which states that the given theorems should be |
29560 | 38 |
considered as code equations for a @{text fun} statement -- |
28419 | 39 |
the corresponding constant is determined syntactically. The resulting code: |
40 |
*} |
|
41 |
||
28564 | 42 |
text %quote {*@{code_stmts dequeue (consts) dequeue (Haskell)}*} |
28419 | 43 |
|
44 |
text {* |
|
45 |
\noindent You may note that the equality test @{term "xs = []"} has been |
|
46 |
replaced by the predicate @{term "null xs"}. This is due to the default |
|
47 |
setup in the \qn{preprocessor} to be discussed further below (\secref{sec:preproc}). |
|
48 |
||
49 |
Changing the default constructor set of datatypes is also |
|
29794 | 50 |
possible. See \secref{sec:datatypes} for an example. |
28419 | 51 |
|
52 |
As told in \secref{sec:concept}, code generation is based |
|
53 |
on a structured collection of code theorems. |
|
34155 | 54 |
This collection |
28419 | 55 |
may be inspected using the @{command code_thms} command: |
56 |
*} |
|
57 |
||
28564 | 58 |
code_thms %quote dequeue |
28419 | 59 |
|
60 |
text {* |
|
29560 | 61 |
\noindent prints a table with \emph{all} code equations |
28419 | 62 |
for @{const dequeue}, including |
29560 | 63 |
\emph{all} code equations those equations depend |
28419 | 64 |
on recursively. |
65 |
||
66 |
Similarly, the @{command code_deps} command shows a graph |
|
29560 | 67 |
visualising dependencies between code equations. |
28419 | 68 |
*} |
69 |
||
70 |
subsection {* @{text class} and @{text instantiation} *} |
|
71 |
||
72 |
text {* |
|
28447 | 73 |
Concerning type classes and code generation, let us examine an example |
28419 | 74 |
from abstract algebra: |
75 |
*} |
|
76 |
||
29794 | 77 |
class %quote semigroup = |
28419 | 78 |
fixes mult :: "'a \<Rightarrow> 'a \<Rightarrow> 'a" (infixl "\<otimes>" 70) |
79 |
assumes assoc: "(x \<otimes> y) \<otimes> z = x \<otimes> (y \<otimes> z)" |
|
80 |
||
28564 | 81 |
class %quote monoid = semigroup + |
28419 | 82 |
fixes neutral :: 'a ("\<one>") |
83 |
assumes neutl: "\<one> \<otimes> x = x" |
|
84 |
and neutr: "x \<otimes> \<one> = x" |
|
85 |
||
28564 | 86 |
instantiation %quote nat :: monoid |
28419 | 87 |
begin |
88 |
||
28564 | 89 |
primrec %quote mult_nat where |
28419 | 90 |
"0 \<otimes> n = (0\<Colon>nat)" |
91 |
| "Suc m \<otimes> n = n + m \<otimes> n" |
|
92 |
||
28564 | 93 |
definition %quote neutral_nat where |
28419 | 94 |
"\<one> = Suc 0" |
28213 | 95 |
|
28564 | 96 |
lemma %quote add_mult_distrib: |
28419 | 97 |
fixes n m q :: nat |
98 |
shows "(n + m) \<otimes> q = n \<otimes> q + m \<otimes> q" |
|
99 |
by (induct n) simp_all |
|
100 |
||
28564 | 101 |
instance %quote proof |
28419 | 102 |
fix m n q :: nat |
103 |
show "m \<otimes> n \<otimes> q = m \<otimes> (n \<otimes> q)" |
|
104 |
by (induct m) (simp_all add: add_mult_distrib) |
|
105 |
show "\<one> \<otimes> n = n" |
|
106 |
by (simp add: neutral_nat_def) |
|
107 |
show "m \<otimes> \<one> = m" |
|
108 |
by (induct m) (simp_all add: neutral_nat_def) |
|
109 |
qed |
|
110 |
||
28564 | 111 |
end %quote |
28419 | 112 |
|
113 |
text {* |
|
114 |
\noindent We define the natural operation of the natural numbers |
|
115 |
on monoids: |
|
116 |
*} |
|
117 |
||
28564 | 118 |
primrec %quote (in monoid) pow :: "nat \<Rightarrow> 'a \<Rightarrow> 'a" where |
28419 | 119 |
"pow 0 a = \<one>" |
120 |
| "pow (Suc n) a = a \<otimes> pow n a" |
|
121 |
||
122 |
text {* |
|
123 |
\noindent This we use to define the discrete exponentiation function: |
|
124 |
*} |
|
125 |
||
28564 | 126 |
definition %quote bexp :: "nat \<Rightarrow> nat" where |
28419 | 127 |
"bexp n = pow n (Suc (Suc 0))" |
128 |
||
129 |
text {* |
|
34155 | 130 |
\noindent The corresponding code in Haskell uses that language's native classes: |
28419 | 131 |
*} |
132 |
||
28564 | 133 |
text %quote {*@{code_stmts bexp (Haskell)}*} |
28419 | 134 |
|
135 |
text {* |
|
28447 | 136 |
\noindent This is a convenient place to show how explicit dictionary construction |
37427 | 137 |
manifests in generated code (here, the same example in @{text SML}) |
138 |
\cite{Haftmann-Nipkow:2010:code}: |
|
28419 | 139 |
*} |
140 |
||
28564 | 141 |
text %quote {*@{code_stmts bexp (SML)}*} |
28419 | 142 |
|
28447 | 143 |
text {* |
34155 | 144 |
\noindent Note the parameters with trailing underscore (@{verbatim "A_"}), |
28447 | 145 |
which are the dictionary parameters. |
146 |
*} |
|
28419 | 147 |
|
148 |
subsection {* The preprocessor \label{sec:preproc} *} |
|
149 |
||
150 |
text {* |
|
151 |
Before selected function theorems are turned into abstract |
|
152 |
code, a chain of definitional transformation steps is carried |
|
153 |
out: \emph{preprocessing}. In essence, the preprocessor |
|
154 |
consists of two components: a \emph{simpset} and \emph{function transformers}. |
|
155 |
||
34155 | 156 |
The \emph{simpset} can apply the full generality of the |
32000 | 157 |
Isabelle simplifier. Due to the interpretation of theorems as code |
158 |
equations, rewrites are applied to the right hand side and the |
|
159 |
arguments of the left hand side of an equation, but never to the |
|
160 |
constant heading the left hand side. An important special case are |
|
34155 | 161 |
\emph{unfold theorems}, which may be declared and removed using |
32000 | 162 |
the @{attribute code_unfold} or \emph{@{attribute code_unfold} del} |
34155 | 163 |
attribute, respectively. |
28213 | 164 |
|
28419 | 165 |
Some common applications: |
166 |
*} |
|
167 |
||
168 |
text_raw {* |
|
169 |
\begin{itemize} |
|
170 |
*} |
|
171 |
||
172 |
text {* |
|
173 |
\item replacing non-executable constructs by executable ones: |
|
174 |
*} |
|
175 |
||
37211 | 176 |
lemma %quote [code_unfold]: |
37612
48fed6598be9
adapted to reorganization of auxiliary list operations; split off predicate compiler into separate theory
haftmann
parents:
37427
diff
changeset
|
177 |
"x \<in> set xs \<longleftrightarrow> List.member xs x" by (fact in_set_member) |
28419 | 178 |
|
179 |
text {* |
|
180 |
\item eliminating superfluous constants: |
|
181 |
*} |
|
182 |
||
37211 | 183 |
lemma %quote [code_unfold]: |
184 |
"1 = Suc 0" by (fact One_nat_def) |
|
28419 | 185 |
|
186 |
text {* |
|
187 |
\item replacing executable but inconvenient constructs: |
|
188 |
*} |
|
189 |
||
37211 | 190 |
lemma %quote [code_unfold]: |
37612
48fed6598be9
adapted to reorganization of auxiliary list operations; split off predicate compiler into separate theory
haftmann
parents:
37427
diff
changeset
|
191 |
"xs = [] \<longleftrightarrow> List.null xs" by (fact eq_Nil_null) |
28419 | 192 |
|
193 |
text_raw {* |
|
194 |
\end{itemize} |
|
195 |
*} |
|
196 |
||
197 |
text {* |
|
28447 | 198 |
\noindent \emph{Function transformers} provide a very general interface, |
28419 | 199 |
transforming a list of function theorems to another |
200 |
list of function theorems, provided that neither the heading |
|
201 |
constant nor its type change. The @{term "0\<Colon>nat"} / @{const Suc} |
|
202 |
pattern elimination implemented in |
|
203 |
theory @{text Efficient_Nat} (see \secref{eff_nat}) uses this |
|
204 |
interface. |
|
205 |
||
206 |
\noindent The current setup of the preprocessor may be inspected using |
|
31254 | 207 |
the @{command print_codeproc} command. |
28419 | 208 |
@{command code_thms} provides a convenient |
209 |
mechanism to inspect the impact of a preprocessor setup |
|
29560 | 210 |
on code equations. |
28419 | 211 |
|
212 |
\begin{warn} |
|
32000 | 213 |
|
214 |
Attribute @{attribute code_unfold} also applies to the |
|
215 |
preprocessor of the ancient @{text "SML code generator"}; in case |
|
216 |
this is not what you intend, use @{attribute code_inline} instead. |
|
28419 | 217 |
\end{warn} |
218 |
*} |
|
219 |
||
220 |
subsection {* Datatypes \label{sec:datatypes} *} |
|
221 |
||
222 |
text {* |
|
223 |
Conceptually, any datatype is spanned by a set of |
|
29794 | 224 |
\emph{constructors} of type @{text "\<tau> = \<dots> \<Rightarrow> \<kappa> \<alpha>\<^isub>1 \<dots> \<alpha>\<^isub>n"} where @{text |
225 |
"{\<alpha>\<^isub>1, \<dots>, \<alpha>\<^isub>n}"} is exactly the set of \emph{all} type variables in |
|
226 |
@{text "\<tau>"}. The HOL datatype package by default registers any new |
|
227 |
datatype in the table of datatypes, which may be inspected using the |
|
228 |
@{command print_codesetup} command. |
|
28419 | 229 |
|
29794 | 230 |
In some cases, it is appropriate to alter or extend this table. As |
231 |
an example, we will develop an alternative representation of the |
|
232 |
queue example given in \secref{sec:intro}. The amortised |
|
233 |
representation is convenient for generating code but exposes its |
|
234 |
\qt{implementation} details, which may be cumbersome when proving |
|
34155 | 235 |
theorems about it. Therefore, here is a simple, straightforward |
29794 | 236 |
representation of queues: |
28419 | 237 |
*} |
238 |
||
29794 | 239 |
datatype %quote 'a queue = Queue "'a list" |
240 |
||
241 |
definition %quote empty :: "'a queue" where |
|
242 |
"empty = Queue []" |
|
28419 | 243 |
|
29794 | 244 |
primrec %quote enqueue :: "'a \<Rightarrow> 'a queue \<Rightarrow> 'a queue" where |
245 |
"enqueue x (Queue xs) = Queue (xs @ [x])" |
|
246 |
||
247 |
fun %quote dequeue :: "'a queue \<Rightarrow> 'a option \<times> 'a queue" where |
|
248 |
"dequeue (Queue []) = (None, Queue [])" |
|
249 |
| "dequeue (Queue (x # xs)) = (Some x, Queue xs)" |
|
28213 | 250 |
|
28419 | 251 |
text {* |
29794 | 252 |
\noindent This we can use directly for proving; for executing, |
253 |
we provide an alternative characterisation: |
|
28419 | 254 |
*} |
255 |
||
29794 | 256 |
definition %quote AQueue :: "'a list \<Rightarrow> 'a list \<Rightarrow> 'a queue" where |
257 |
"AQueue xs ys = Queue (ys @ rev xs)" |
|
258 |
||
259 |
code_datatype %quote AQueue |
|
260 |
||
29798 | 261 |
text {* |
262 |
\noindent Here we define a \qt{constructor} @{const "AQueue"} which |
|
263 |
is defined in terms of @{text "Queue"} and interprets its arguments |
|
264 |
according to what the \emph{content} of an amortised queue is supposed |
|
265 |
to be. Equipped with this, we are able to prove the following equations |
|
266 |
for our primitive queue operations which \qt{implement} the simple |
|
267 |
queues in an amortised fashion: |
|
268 |
*} |
|
29794 | 269 |
|
270 |
lemma %quote empty_AQueue [code]: |
|
271 |
"empty = AQueue [] []" |
|
272 |
unfolding AQueue_def empty_def by simp |
|
273 |
||
274 |
lemma %quote enqueue_AQueue [code]: |
|
275 |
"enqueue x (AQueue xs ys) = AQueue (x # xs) ys" |
|
276 |
unfolding AQueue_def by simp |
|
28419 | 277 |
|
29794 | 278 |
lemma %quote dequeue_AQueue [code]: |
279 |
"dequeue (AQueue xs []) = |
|
29798 | 280 |
(if xs = [] then (None, AQueue [] []) |
281 |
else dequeue (AQueue [] (rev xs)))" |
|
29794 | 282 |
"dequeue (AQueue xs (y # ys)) = (Some y, AQueue xs ys)" |
283 |
unfolding AQueue_def by simp_all |
|
284 |
||
29798 | 285 |
text {* |
286 |
\noindent For completeness, we provide a substitute for the |
|
287 |
@{text case} combinator on queues: |
|
288 |
*} |
|
29794 | 289 |
|
30227 | 290 |
lemma %quote queue_case_AQueue [code]: |
291 |
"queue_case f (AQueue xs ys) = f (ys @ rev xs)" |
|
292 |
unfolding AQueue_def by simp |
|
29794 | 293 |
|
29798 | 294 |
text {* |
295 |
\noindent The resulting code looks as expected: |
|
296 |
*} |
|
29794 | 297 |
|
298 |
text %quote {*@{code_stmts empty enqueue dequeue (SML)}*} |
|
28419 | 299 |
|
300 |
text {* |
|
29794 | 301 |
\noindent From this example, it can be glimpsed that using own |
302 |
constructor sets is a little delicate since it changes the set of |
|
303 |
valid patterns for values of that type. Without going into much |
|
304 |
detail, here some practical hints: |
|
28419 | 305 |
|
306 |
\begin{itemize} |
|
29794 | 307 |
|
308 |
\item When changing the constructor set for datatypes, take care |
|
30227 | 309 |
to provide alternative equations for the @{text case} combinator. |
29794 | 310 |
|
311 |
\item Values in the target language need not to be normalised -- |
|
312 |
different values in the target language may represent the same |
|
313 |
value in the logic. |
|
314 |
||
315 |
\item Usually, a good methodology to deal with the subtleties of |
|
316 |
pattern matching is to see the type as an abstract type: provide |
|
317 |
a set of operations which operate on the concrete representation |
|
318 |
of the type, and derive further operations by combinations of |
|
319 |
these primitive ones, without relying on a particular |
|
320 |
representation. |
|
321 |
||
28419 | 322 |
\end{itemize} |
323 |
*} |
|
324 |
||
28213 | 325 |
|
30938
c6c9359e474c
wellsortedness is no issue for a user manual any more
haftmann
parents:
30227
diff
changeset
|
326 |
subsection {* Equality *} |
28213 | 327 |
|
28419 | 328 |
text {* |
329 |
Surely you have already noticed how equality is treated |
|
330 |
by the code generator: |
|
331 |
*} |
|
332 |
||
28564 | 333 |
primrec %quote collect_duplicates :: "'a list \<Rightarrow> 'a list \<Rightarrow> 'a list \<Rightarrow> 'a list" where |
28447 | 334 |
"collect_duplicates xs ys [] = xs" |
28419 | 335 |
| "collect_duplicates xs ys (z#zs) = (if z \<in> set xs |
336 |
then if z \<in> set ys |
|
337 |
then collect_duplicates xs ys zs |
|
338 |
else collect_duplicates xs (z#ys) zs |
|
339 |
else collect_duplicates (z#xs) (z#ys) zs)" |
|
340 |
||
341 |
text {* |
|
37612
48fed6598be9
adapted to reorganization of auxiliary list operations; split off predicate compiler into separate theory
haftmann
parents:
37427
diff
changeset
|
342 |
\noindent During preprocessing, the membership test is rewritten, |
28419 | 343 |
resulting in @{const List.member}, which itself |
344 |
performs an explicit equality check. |
|
345 |
*} |
|
346 |
||
28564 | 347 |
text %quote {*@{code_stmts collect_duplicates (SML)}*} |
28419 | 348 |
|
349 |
text {* |
|
350 |
\noindent Obviously, polymorphic equality is implemented the Haskell |
|
351 |
way using a type class. How is this achieved? HOL introduces |
|
352 |
an explicit class @{class eq} with a corresponding operation |
|
353 |
@{const eq_class.eq} such that @{thm eq [no_vars]}. |
|
28447 | 354 |
The preprocessing framework does the rest by propagating the |
29560 | 355 |
@{class eq} constraints through all dependent code equations. |
28419 | 356 |
For datatypes, instances of @{class eq} are implicitly derived |
357 |
when possible. For other types, you may instantiate @{text eq} |
|
358 |
manually like any other type class. |
|
359 |
*} |
|
360 |
||
361 |
||
28462 | 362 |
subsection {* Explicit partiality *} |
363 |
||
364 |
text {* |
|
365 |
Partiality usually enters the game by partial patterns, as |
|
366 |
in the following example, again for amortised queues: |
|
367 |
*} |
|
368 |
||
29798 | 369 |
definition %quote strict_dequeue :: "'a queue \<Rightarrow> 'a \<times> 'a queue" where |
370 |
"strict_dequeue q = (case dequeue q |
|
371 |
of (Some x, q') \<Rightarrow> (x, q'))" |
|
372 |
||
373 |
lemma %quote strict_dequeue_AQueue [code]: |
|
374 |
"strict_dequeue (AQueue xs (y # ys)) = (y, AQueue xs ys)" |
|
375 |
"strict_dequeue (AQueue xs []) = |
|
376 |
(case rev xs of y # ys \<Rightarrow> (y, AQueue [] ys))" |
|
377 |
by (simp_all add: strict_dequeue_def dequeue_AQueue split: list.splits) |
|
28462 | 378 |
|
379 |
text {* |
|
380 |
\noindent In the corresponding code, there is no equation |
|
29798 | 381 |
for the pattern @{term "AQueue [] []"}: |
28462 | 382 |
*} |
383 |
||
28564 | 384 |
text %quote {*@{code_stmts strict_dequeue (consts) strict_dequeue (Haskell)}*} |
28462 | 385 |
|
386 |
text {* |
|
387 |
\noindent In some cases it is desirable to have this |
|
388 |
pseudo-\qt{partiality} more explicitly, e.g.~as follows: |
|
389 |
*} |
|
390 |
||
28564 | 391 |
axiomatization %quote empty_queue :: 'a |
28462 | 392 |
|
29798 | 393 |
definition %quote strict_dequeue' :: "'a queue \<Rightarrow> 'a \<times> 'a queue" where |
394 |
"strict_dequeue' q = (case dequeue q of (Some x, q') \<Rightarrow> (x, q') | _ \<Rightarrow> empty_queue)" |
|
28213 | 395 |
|
29798 | 396 |
lemma %quote strict_dequeue'_AQueue [code]: |
397 |
"strict_dequeue' (AQueue xs []) = (if xs = [] then empty_queue |
|
398 |
else strict_dequeue' (AQueue [] (rev xs)))" |
|
399 |
"strict_dequeue' (AQueue xs (y # ys)) = |
|
400 |
(y, AQueue xs ys)" |
|
401 |
by (simp_all add: strict_dequeue'_def dequeue_AQueue split: list.splits) |
|
28462 | 402 |
|
403 |
text {* |
|
29798 | 404 |
Observe that on the right hand side of the definition of @{const |
34155 | 405 |
"strict_dequeue'"}, the unspecified constant @{const empty_queue} occurs. |
28462 | 406 |
|
29798 | 407 |
Normally, if constants without any code equations occur in a |
408 |
program, the code generator complains (since in most cases this is |
|
34155 | 409 |
indeed an error). But such constants can also be thought |
410 |
of as function definitions which always fail, |
|
29798 | 411 |
since there is never a successful pattern match on the left hand |
412 |
side. In order to categorise a constant into that category |
|
413 |
explicitly, use @{command "code_abort"}: |
|
28462 | 414 |
*} |
415 |
||
28564 | 416 |
code_abort %quote empty_queue |
28462 | 417 |
|
418 |
text {* |
|
419 |
\noindent Then the code generator will just insert an error or |
|
420 |
exception at the appropriate position: |
|
421 |
*} |
|
422 |
||
28564 | 423 |
text %quote {*@{code_stmts strict_dequeue' (consts) empty_queue strict_dequeue' (Haskell)}*} |
28462 | 424 |
|
425 |
text {* |
|
426 |
\noindent This feature however is rarely needed in practice. |
|
427 |
Note also that the @{text HOL} default setup already declares |
|
428 |
@{const undefined} as @{command "code_abort"}, which is most |
|
429 |
likely to be used in such situations. |
|
430 |
*} |
|
28213 | 431 |
|
432 |
end |