| author | wenzelm | 
| Tue, 03 Apr 2012 21:09:09 +0200 | |
| changeset 47322 | e19a3759f303 | 
| parent 46516 | 92f981f4a61b | 
| permissions | -rw-r--r-- | 
| 38405 | 1  | 
theory Refinement  | 
2  | 
imports Setup  | 
|
3  | 
begin  | 
|
4  | 
||
5  | 
section {* Program and datatype refinement \label{sec:refinement} *}
 | 
|
6  | 
||
| 38451 | 7  | 
text {*
 | 
8  | 
  Code generation by shallow embedding (cf.~\secref{sec:principle})
 | 
|
9  | 
allows to choose code equations and datatype constructors freely,  | 
|
10  | 
given that some very basic syntactic properties are met; this  | 
|
11  | 
flexibility opens up mechanisms for refinement which allow to extend  | 
|
12  | 
the scope and quality of generated code dramatically.  | 
|
13  | 
*}  | 
|
14  | 
||
15  | 
||
16  | 
subsection {* Program refinement *}
 | 
|
17  | 
||
18  | 
text {*
 | 
|
19  | 
Program refinement works by choosing appropriate code equations  | 
|
| 40352 | 20  | 
  explicitly (cf.~\secref{sec:equations}); as example, we use Fibonacci
 | 
| 38451 | 21  | 
numbers:  | 
22  | 
*}  | 
|
23  | 
||
24  | 
fun %quote fib :: "nat \<Rightarrow> nat" where  | 
|
25  | 
"fib 0 = 0"  | 
|
26  | 
| "fib (Suc 0) = Suc 0"  | 
|
27  | 
| "fib (Suc (Suc n)) = fib n + fib (Suc n)"  | 
|
28  | 
||
29  | 
text {*
 | 
|
30  | 
\noindent The runtime of the corresponding code grows exponential due  | 
|
31  | 
to two recursive calls:  | 
|
32  | 
*}  | 
|
33  | 
||
| 39745 | 34  | 
text %quotetypewriter {*
 | 
| 39683 | 35  | 
  @{code_stmts fib (consts) fib (Haskell)}
 | 
| 
39664
 
0afaf89ab591
more canonical type setting of type writer code examples
 
haftmann 
parents: 
39599 
diff
changeset
 | 
36  | 
*}  | 
| 38451 | 37  | 
|
38  | 
text {*
 | 
|
39  | 
\noindent A more efficient implementation would use dynamic  | 
|
40  | 
programming, e.g.~sharing of common intermediate results between  | 
|
41  | 
recursive calls. This idea is expressed by an auxiliary operation  | 
|
42  | 
which computes a Fibonacci number and its successor simultaneously:  | 
|
43  | 
*}  | 
|
44  | 
||
45  | 
definition %quote fib_step :: "nat \<Rightarrow> nat \<times> nat" where  | 
|
46  | 
"fib_step n = (fib (Suc n), fib n)"  | 
|
47  | 
||
48  | 
text {*
 | 
|
49  | 
\noindent This operation can be implemented by recursion using  | 
|
50  | 
dynamic programming:  | 
|
51  | 
*}  | 
|
52  | 
||
53  | 
lemma %quote [code]:  | 
|
54  | 
"fib_step 0 = (Suc 0, 0)"  | 
|
55  | 
"fib_step (Suc n) = (let (m, q) = fib_step n in (m + q, m))"  | 
|
56  | 
by (simp_all add: fib_step_def)  | 
|
57  | 
||
58  | 
text {*
 | 
|
59  | 
  \noindent What remains is to implement @{const fib} by @{const
 | 
|
60  | 
fib_step} as follows:  | 
|
61  | 
*}  | 
|
62  | 
||
63  | 
lemma %quote [code]:  | 
|
64  | 
"fib 0 = 0"  | 
|
65  | 
"fib (Suc n) = fst (fib_step n)"  | 
|
66  | 
by (simp_all add: fib_step_def)  | 
|
67  | 
||
68  | 
text {*
 | 
|
69  | 
\noindent The resulting code shows only linear growth of runtime:  | 
|
70  | 
*}  | 
|
71  | 
||
| 39745 | 72  | 
text %quotetypewriter {*
 | 
| 39683 | 73  | 
  @{code_stmts fib (consts) fib fib_step (Haskell)}
 | 
| 
39664
 
0afaf89ab591
more canonical type setting of type writer code examples
 
haftmann 
parents: 
39599 
diff
changeset
 | 
74  | 
*}  | 
| 38451 | 75  | 
|
76  | 
||
| 38459 | 77  | 
subsection {* Datatype refinement *}
 | 
| 38437 | 78  | 
|
79  | 
text {*
 | 
|
| 38459 | 80  | 
  Selecting specific code equations \emph{and} datatype constructors
 | 
81  | 
leads to datatype refinement. As an example, we will develop an  | 
|
82  | 
alternative representation of the queue example given in  | 
|
83  | 
  \secref{sec:queue_example}.  The amortised representation is
 | 
|
84  | 
  convenient for generating code but exposes its \qt{implementation}
 | 
|
85  | 
details, which may be cumbersome when proving theorems about it.  | 
|
86  | 
Therefore, here is a simple, straightforward representation of  | 
|
87  | 
queues:  | 
|
| 38437 | 88  | 
*}  | 
89  | 
||
90  | 
datatype %quote 'a queue = Queue "'a list"  | 
|
91  | 
||
92  | 
definition %quote empty :: "'a queue" where  | 
|
93  | 
"empty = Queue []"  | 
|
94  | 
||
95  | 
primrec %quote enqueue :: "'a \<Rightarrow> 'a queue \<Rightarrow> 'a queue" where  | 
|
96  | 
"enqueue x (Queue xs) = Queue (xs @ [x])"  | 
|
97  | 
||
98  | 
fun %quote dequeue :: "'a queue \<Rightarrow> 'a option \<times> 'a queue" where  | 
|
99  | 
"dequeue (Queue []) = (None, Queue [])"  | 
|
100  | 
| "dequeue (Queue (x # xs)) = (Some x, Queue xs)"  | 
|
101  | 
||
102  | 
text {*
 | 
|
103  | 
\noindent This we can use directly for proving; for executing,  | 
|
104  | 
we provide an alternative characterisation:  | 
|
105  | 
*}  | 
|
106  | 
||
107  | 
definition %quote AQueue :: "'a list \<Rightarrow> 'a list \<Rightarrow> 'a queue" where  | 
|
108  | 
"AQueue xs ys = Queue (ys @ rev xs)"  | 
|
109  | 
||
110  | 
code_datatype %quote AQueue  | 
|
111  | 
||
112  | 
text {*
 | 
|
113  | 
  \noindent Here we define a \qt{constructor} @{const "AQueue"} which
 | 
|
114  | 
  is defined in terms of @{text "Queue"} and interprets its arguments
 | 
|
115  | 
  according to what the \emph{content} of an amortised queue is supposed
 | 
|
| 38459 | 116  | 
to be.  | 
117  | 
||
118  | 
The prerequisite for datatype constructors is only syntactical: a  | 
|
119  | 
  constructor must be of type @{text "\<tau> = \<dots> \<Rightarrow> \<kappa> \<alpha>\<^isub>1 \<dots> \<alpha>\<^isub>n"} where @{text
 | 
|
120  | 
  "{\<alpha>\<^isub>1, \<dots>, \<alpha>\<^isub>n}"} is exactly the set of \emph{all} type variables in
 | 
|
121  | 
  @{text "\<tau>"}; then @{text "\<kappa>"} is its corresponding datatype.  The
 | 
|
122  | 
HOL datatype package by default registers any new datatype with its  | 
|
| 38511 | 123  | 
  constructors, but this may be changed using @{command_def
 | 
| 38459 | 124  | 
code_datatype}; the currently chosen constructors can be inspected  | 
125  | 
  using the @{command print_codesetup} command.
 | 
|
126  | 
||
127  | 
Equipped with this, we are able to prove the following equations  | 
|
| 38437 | 128  | 
  for our primitive queue operations which \qt{implement} the simple
 | 
129  | 
queues in an amortised fashion:  | 
|
130  | 
*}  | 
|
131  | 
||
132  | 
lemma %quote empty_AQueue [code]:  | 
|
133  | 
"empty = AQueue [] []"  | 
|
| 40754 | 134  | 
by (simp add: AQueue_def empty_def)  | 
| 38437 | 135  | 
|
136  | 
lemma %quote enqueue_AQueue [code]:  | 
|
137  | 
"enqueue x (AQueue xs ys) = AQueue (x # xs) ys"  | 
|
| 40754 | 138  | 
by (simp add: AQueue_def)  | 
| 38437 | 139  | 
|
140  | 
lemma %quote dequeue_AQueue [code]:  | 
|
141  | 
"dequeue (AQueue xs []) =  | 
|
142  | 
(if xs = [] then (None, AQueue [] [])  | 
|
143  | 
else dequeue (AQueue [] (rev xs)))"  | 
|
144  | 
"dequeue (AQueue xs (y # ys)) = (Some y, AQueue xs ys)"  | 
|
| 40754 | 145  | 
by (simp_all add: AQueue_def)  | 
| 38437 | 146  | 
|
147  | 
text {*
 | 
|
| 40754 | 148  | 
\noindent It is good style, although no absolute requirement, to  | 
149  | 
provide code equations for the original artefacts of the implemented  | 
|
150  | 
type, if possible; in our case, these are the datatype constructor  | 
|
151  | 
  @{const Queue} and the case combinator @{const queue_case}:
 | 
|
| 38437 | 152  | 
*}  | 
153  | 
||
| 40754 | 154  | 
lemma %quote Queue_AQueue [code]:  | 
155  | 
"Queue = AQueue []"  | 
|
156  | 
by (simp add: AQueue_def fun_eq_iff)  | 
|
157  | 
||
| 38437 | 158  | 
lemma %quote queue_case_AQueue [code]:  | 
159  | 
"queue_case f (AQueue xs ys) = f (ys @ rev xs)"  | 
|
| 40754 | 160  | 
by (simp add: AQueue_def)  | 
| 38437 | 161  | 
|
162  | 
text {*
 | 
|
163  | 
\noindent The resulting code looks as expected:  | 
|
164  | 
*}  | 
|
165  | 
||
| 39745 | 166  | 
text %quotetypewriter {*
 | 
| 40754 | 167  | 
  @{code_stmts empty enqueue dequeue Queue queue_case (SML)}
 | 
| 
39664
 
0afaf89ab591
more canonical type setting of type writer code examples
 
haftmann 
parents: 
39599 
diff
changeset
 | 
168  | 
*}  | 
| 38437 | 169  | 
|
170  | 
text {*
 | 
|
| 38459 | 171  | 
The same techniques can also be applied to types which are not  | 
172  | 
  specified as datatypes, e.g.~type @{typ int} is originally specified
 | 
|
| 38511 | 173  | 
  as quotient type by means of @{command_def typedef}, but for code
 | 
| 38459 | 174  | 
generation constants allowing construction of binary numeral values  | 
175  | 
  are used as constructors for @{typ int}.
 | 
|
| 38437 | 176  | 
|
| 38459 | 177  | 
This approach however fails if the representation of a type demands  | 
178  | 
invariants; this issue is discussed in the next section.  | 
|
179  | 
*}  | 
|
180  | 
||
| 38437 | 181  | 
|
| 39599 | 182  | 
subsection {* Datatype refinement involving invariants \label{sec:invariant} *}
 | 
| 38437 | 183  | 
|
| 38459 | 184  | 
text {*
 | 
| 38502 | 185  | 
Datatype representation involving invariants require a dedicated  | 
186  | 
setup for the type and its primitive operations. As a running  | 
|
187  | 
  example, we implement a type @{text "'a dlist"} of list consisting
 | 
|
188  | 
of distinct elements.  | 
|
189  | 
||
190  | 
The first step is to decide on which representation the abstract  | 
|
191  | 
  type (in our example @{text "'a dlist"}) should be implemented.
 | 
|
192  | 
  Here we choose @{text "'a list"}.  Then a conversion from the concrete
 | 
|
193  | 
type to the abstract type must be specified, here:  | 
|
194  | 
*}  | 
|
195  | 
||
196  | 
text %quote {*
 | 
|
197  | 
  @{term_type Dlist}
 | 
|
198  | 
*}  | 
|
199  | 
||
200  | 
text {*
 | 
|
201  | 
  \noindent Next follows the specification of a suitable \emph{projection},
 | 
|
202  | 
i.e.~a conversion from abstract to concrete type:  | 
|
203  | 
*}  | 
|
204  | 
||
205  | 
text %quote {*
 | 
|
206  | 
  @{term_type list_of_dlist}
 | 
|
207  | 
*}  | 
|
208  | 
||
209  | 
text {*
 | 
|
210  | 
\noindent This projection must be specified such that the following  | 
|
211  | 
  \emph{abstract datatype certificate} can be proven:
 | 
|
212  | 
*}  | 
|
213  | 
||
214  | 
lemma %quote [code abstype]:  | 
|
215  | 
"Dlist (list_of_dlist dxs) = dxs"  | 
|
216  | 
by (fact Dlist_list_of_dlist)  | 
|
217  | 
||
218  | 
text {*
 | 
|
219  | 
\noindent Note that so far the invariant on representations  | 
|
220  | 
  (@{term_type distinct}) has never been mentioned explicitly:
 | 
|
221  | 
the invariant is only referred to implicitly: all values in  | 
|
222  | 
  set @{term "{xs. list_of_dlist (Dlist xs) = xs}"} are invariant,
 | 
|
223  | 
  and in our example this is exactly @{term "{xs. distinct xs}"}.
 | 
|
224  | 
||
225  | 
  The primitive operations on @{typ "'a dlist"} are specified
 | 
|
226  | 
  indirectly using the projection @{const list_of_dlist}.  For
 | 
|
227  | 
  the empty @{text "dlist"}, @{const Dlist.empty}, we finally want
 | 
|
228  | 
the code equation  | 
|
229  | 
*}  | 
|
230  | 
||
231  | 
text %quote {*
 | 
|
232  | 
  @{term "Dlist.empty = Dlist []"}
 | 
|
233  | 
*}  | 
|
234  | 
||
235  | 
text {*
 | 
|
236  | 
\noindent This we have to prove indirectly as follows:  | 
|
237  | 
*}  | 
|
238  | 
||
239  | 
lemma %quote [code abstract]:  | 
|
240  | 
"list_of_dlist Dlist.empty = []"  | 
|
241  | 
by (fact list_of_dlist_empty)  | 
|
242  | 
||
243  | 
text {*
 | 
|
244  | 
\noindent This equation logically encodes both the desired code  | 
|
245  | 
  equation and that the expression @{const Dlist} is applied to obeys
 | 
|
246  | 
the implicit invariant. Equations for insertion and removal are  | 
|
247  | 
similar:  | 
|
248  | 
*}  | 
|
249  | 
||
250  | 
lemma %quote [code abstract]:  | 
|
251  | 
"list_of_dlist (Dlist.insert x dxs) = List.insert x (list_of_dlist dxs)"  | 
|
252  | 
by (fact list_of_dlist_insert)  | 
|
253  | 
||
254  | 
lemma %quote [code abstract]:  | 
|
255  | 
"list_of_dlist (Dlist.remove x dxs) = remove1 x (list_of_dlist dxs)"  | 
|
256  | 
by (fact list_of_dlist_remove)  | 
|
257  | 
||
258  | 
text {*
 | 
|
259  | 
\noindent Then the corresponding code is as follows:  | 
|
260  | 
*}  | 
|
261  | 
||
| 39745 | 262  | 
text %quotetypewriter {*
 | 
| 39683 | 263  | 
  @{code_stmts Dlist.empty Dlist.insert Dlist.remove list_of_dlist (Haskell)}
 | 
| 38502 | 264  | 
*} (*(types) dlist (consts) dempty dinsert dremove list_of List.member insert remove *)  | 
265  | 
||
266  | 
text {*
 | 
|
267  | 
Typical data structures implemented by representations involving  | 
|
| 46516 | 268  | 
  invariants are available in the library, theory @{theory Mapping}
 | 
269  | 
  specifies key-value-mappings (type @{typ "('a, 'b) mapping"});
 | 
|
270  | 
  these can be implemented by red-black-trees (theory @{theory RBT}).
 | 
|
| 38437 | 271  | 
*}  | 
272  | 
||
| 38405 | 273  | 
end  | 
| 46516 | 274  |