author | haftmann |
Wed, 03 Nov 2010 14:14:06 +0100 | |
changeset 40352 | 8fd36f8a5cb7 |
parent 39745 | 3aa2bc9c5478 |
child 40754 | e3d4f2522a5f |
permissions | -rw-r--r-- |
38405 | 1 |
theory Refinement |
2 |
imports Setup |
|
3 |
begin |
|
4 |
||
5 |
section {* Program and datatype refinement \label{sec:refinement} *} |
|
6 |
||
38451 | 7 |
text {* |
8 |
Code generation by shallow embedding (cf.~\secref{sec:principle}) |
|
9 |
allows to choose code equations and datatype constructors freely, |
|
10 |
given that some very basic syntactic properties are met; this |
|
11 |
flexibility opens up mechanisms for refinement which allow to extend |
|
12 |
the scope and quality of generated code dramatically. |
|
13 |
*} |
|
14 |
||
15 |
||
16 |
subsection {* Program refinement *} |
|
17 |
||
18 |
text {* |
|
19 |
Program refinement works by choosing appropriate code equations |
|
40352 | 20 |
explicitly (cf.~\secref{sec:equations}); as example, we use Fibonacci |
38451 | 21 |
numbers: |
22 |
*} |
|
23 |
||
24 |
fun %quote fib :: "nat \<Rightarrow> nat" where |
|
25 |
"fib 0 = 0" |
|
26 |
| "fib (Suc 0) = Suc 0" |
|
27 |
| "fib (Suc (Suc n)) = fib n + fib (Suc n)" |
|
28 |
||
29 |
text {* |
|
30 |
\noindent The runtime of the corresponding code grows exponential due |
|
31 |
to two recursive calls: |
|
32 |
*} |
|
33 |
||
39745 | 34 |
text %quotetypewriter {* |
39683 | 35 |
@{code_stmts fib (consts) fib (Haskell)} |
39664
0afaf89ab591
more canonical type setting of type writer code examples
haftmann
parents:
39599
diff
changeset
|
36 |
*} |
38451 | 37 |
|
38 |
text {* |
|
39 |
\noindent A more efficient implementation would use dynamic |
|
40 |
programming, e.g.~sharing of common intermediate results between |
|
41 |
recursive calls. This idea is expressed by an auxiliary operation |
|
42 |
which computes a Fibonacci number and its successor simultaneously: |
|
43 |
*} |
|
44 |
||
45 |
definition %quote fib_step :: "nat \<Rightarrow> nat \<times> nat" where |
|
46 |
"fib_step n = (fib (Suc n), fib n)" |
|
47 |
||
48 |
text {* |
|
49 |
\noindent This operation can be implemented by recursion using |
|
50 |
dynamic programming: |
|
51 |
*} |
|
52 |
||
53 |
lemma %quote [code]: |
|
54 |
"fib_step 0 = (Suc 0, 0)" |
|
55 |
"fib_step (Suc n) = (let (m, q) = fib_step n in (m + q, m))" |
|
56 |
by (simp_all add: fib_step_def) |
|
57 |
||
58 |
text {* |
|
59 |
\noindent What remains is to implement @{const fib} by @{const |
|
60 |
fib_step} as follows: |
|
61 |
*} |
|
62 |
||
63 |
lemma %quote [code]: |
|
64 |
"fib 0 = 0" |
|
65 |
"fib (Suc n) = fst (fib_step n)" |
|
66 |
by (simp_all add: fib_step_def) |
|
67 |
||
68 |
text {* |
|
69 |
\noindent The resulting code shows only linear growth of runtime: |
|
70 |
*} |
|
71 |
||
39745 | 72 |
text %quotetypewriter {* |
39683 | 73 |
@{code_stmts fib (consts) fib fib_step (Haskell)} |
39664
0afaf89ab591
more canonical type setting of type writer code examples
haftmann
parents:
39599
diff
changeset
|
74 |
*} |
38451 | 75 |
|
76 |
||
38459 | 77 |
subsection {* Datatype refinement *} |
38437 | 78 |
|
79 |
text {* |
|
38459 | 80 |
Selecting specific code equations \emph{and} datatype constructors |
81 |
leads to datatype refinement. As an example, we will develop an |
|
82 |
alternative representation of the queue example given in |
|
83 |
\secref{sec:queue_example}. The amortised representation is |
|
84 |
convenient for generating code but exposes its \qt{implementation} |
|
85 |
details, which may be cumbersome when proving theorems about it. |
|
86 |
Therefore, here is a simple, straightforward representation of |
|
87 |
queues: |
|
38437 | 88 |
*} |
89 |
||
90 |
datatype %quote 'a queue = Queue "'a list" |
|
91 |
||
92 |
definition %quote empty :: "'a queue" where |
|
93 |
"empty = Queue []" |
|
94 |
||
95 |
primrec %quote enqueue :: "'a \<Rightarrow> 'a queue \<Rightarrow> 'a queue" where |
|
96 |
"enqueue x (Queue xs) = Queue (xs @ [x])" |
|
97 |
||
98 |
fun %quote dequeue :: "'a queue \<Rightarrow> 'a option \<times> 'a queue" where |
|
99 |
"dequeue (Queue []) = (None, Queue [])" |
|
100 |
| "dequeue (Queue (x # xs)) = (Some x, Queue xs)" |
|
101 |
||
102 |
text {* |
|
103 |
\noindent This we can use directly for proving; for executing, |
|
104 |
we provide an alternative characterisation: |
|
105 |
*} |
|
106 |
||
107 |
definition %quote AQueue :: "'a list \<Rightarrow> 'a list \<Rightarrow> 'a queue" where |
|
108 |
"AQueue xs ys = Queue (ys @ rev xs)" |
|
109 |
||
110 |
code_datatype %quote AQueue |
|
111 |
||
112 |
text {* |
|
113 |
\noindent Here we define a \qt{constructor} @{const "AQueue"} which |
|
114 |
is defined in terms of @{text "Queue"} and interprets its arguments |
|
115 |
according to what the \emph{content} of an amortised queue is supposed |
|
38459 | 116 |
to be. |
117 |
||
118 |
The prerequisite for datatype constructors is only syntactical: a |
|
119 |
constructor must be of type @{text "\<tau> = \<dots> \<Rightarrow> \<kappa> \<alpha>\<^isub>1 \<dots> \<alpha>\<^isub>n"} where @{text |
|
120 |
"{\<alpha>\<^isub>1, \<dots>, \<alpha>\<^isub>n}"} is exactly the set of \emph{all} type variables in |
|
121 |
@{text "\<tau>"}; then @{text "\<kappa>"} is its corresponding datatype. The |
|
122 |
HOL datatype package by default registers any new datatype with its |
|
38511 | 123 |
constructors, but this may be changed using @{command_def |
38459 | 124 |
code_datatype}; the currently chosen constructors can be inspected |
125 |
using the @{command print_codesetup} command. |
|
126 |
||
127 |
Equipped with this, we are able to prove the following equations |
|
38437 | 128 |
for our primitive queue operations which \qt{implement} the simple |
129 |
queues in an amortised fashion: |
|
130 |
*} |
|
131 |
||
132 |
lemma %quote empty_AQueue [code]: |
|
133 |
"empty = AQueue [] []" |
|
134 |
unfolding AQueue_def empty_def by simp |
|
135 |
||
136 |
lemma %quote enqueue_AQueue [code]: |
|
137 |
"enqueue x (AQueue xs ys) = AQueue (x # xs) ys" |
|
138 |
unfolding AQueue_def by simp |
|
139 |
||
140 |
lemma %quote dequeue_AQueue [code]: |
|
141 |
"dequeue (AQueue xs []) = |
|
142 |
(if xs = [] then (None, AQueue [] []) |
|
143 |
else dequeue (AQueue [] (rev xs)))" |
|
144 |
"dequeue (AQueue xs (y # ys)) = (Some y, AQueue xs ys)" |
|
145 |
unfolding AQueue_def by simp_all |
|
146 |
||
147 |
text {* |
|
148 |
\noindent For completeness, we provide a substitute for the |
|
149 |
@{text case} combinator on queues: |
|
150 |
*} |
|
151 |
||
152 |
lemma %quote queue_case_AQueue [code]: |
|
153 |
"queue_case f (AQueue xs ys) = f (ys @ rev xs)" |
|
154 |
unfolding AQueue_def by simp |
|
155 |
||
156 |
text {* |
|
157 |
\noindent The resulting code looks as expected: |
|
158 |
*} |
|
159 |
||
39745 | 160 |
text %quotetypewriter {* |
39683 | 161 |
@{code_stmts empty enqueue dequeue (SML)} |
39664
0afaf89ab591
more canonical type setting of type writer code examples
haftmann
parents:
39599
diff
changeset
|
162 |
*} |
38437 | 163 |
|
164 |
text {* |
|
38459 | 165 |
The same techniques can also be applied to types which are not |
166 |
specified as datatypes, e.g.~type @{typ int} is originally specified |
|
38511 | 167 |
as quotient type by means of @{command_def typedef}, but for code |
38459 | 168 |
generation constants allowing construction of binary numeral values |
169 |
are used as constructors for @{typ int}. |
|
38437 | 170 |
|
38459 | 171 |
This approach however fails if the representation of a type demands |
172 |
invariants; this issue is discussed in the next section. |
|
173 |
*} |
|
174 |
||
38437 | 175 |
|
39599 | 176 |
subsection {* Datatype refinement involving invariants \label{sec:invariant} *} |
38437 | 177 |
|
38459 | 178 |
text {* |
38502 | 179 |
Datatype representation involving invariants require a dedicated |
180 |
setup for the type and its primitive operations. As a running |
|
181 |
example, we implement a type @{text "'a dlist"} of list consisting |
|
182 |
of distinct elements. |
|
183 |
||
184 |
The first step is to decide on which representation the abstract |
|
185 |
type (in our example @{text "'a dlist"}) should be implemented. |
|
186 |
Here we choose @{text "'a list"}. Then a conversion from the concrete |
|
187 |
type to the abstract type must be specified, here: |
|
188 |
*} |
|
189 |
||
190 |
text %quote {* |
|
191 |
@{term_type Dlist} |
|
192 |
*} |
|
193 |
||
194 |
text {* |
|
195 |
\noindent Next follows the specification of a suitable \emph{projection}, |
|
196 |
i.e.~a conversion from abstract to concrete type: |
|
197 |
*} |
|
198 |
||
199 |
text %quote {* |
|
200 |
@{term_type list_of_dlist} |
|
201 |
*} |
|
202 |
||
203 |
text {* |
|
204 |
\noindent This projection must be specified such that the following |
|
205 |
\emph{abstract datatype certificate} can be proven: |
|
206 |
*} |
|
207 |
||
208 |
lemma %quote [code abstype]: |
|
209 |
"Dlist (list_of_dlist dxs) = dxs" |
|
210 |
by (fact Dlist_list_of_dlist) |
|
211 |
||
212 |
text {* |
|
213 |
\noindent Note that so far the invariant on representations |
|
214 |
(@{term_type distinct}) has never been mentioned explicitly: |
|
215 |
the invariant is only referred to implicitly: all values in |
|
216 |
set @{term "{xs. list_of_dlist (Dlist xs) = xs}"} are invariant, |
|
217 |
and in our example this is exactly @{term "{xs. distinct xs}"}. |
|
218 |
||
219 |
The primitive operations on @{typ "'a dlist"} are specified |
|
220 |
indirectly using the projection @{const list_of_dlist}. For |
|
221 |
the empty @{text "dlist"}, @{const Dlist.empty}, we finally want |
|
222 |
the code equation |
|
223 |
*} |
|
224 |
||
225 |
text %quote {* |
|
226 |
@{term "Dlist.empty = Dlist []"} |
|
227 |
*} |
|
228 |
||
229 |
text {* |
|
230 |
\noindent This we have to prove indirectly as follows: |
|
231 |
*} |
|
232 |
||
233 |
lemma %quote [code abstract]: |
|
234 |
"list_of_dlist Dlist.empty = []" |
|
235 |
by (fact list_of_dlist_empty) |
|
236 |
||
237 |
text {* |
|
238 |
\noindent This equation logically encodes both the desired code |
|
239 |
equation and that the expression @{const Dlist} is applied to obeys |
|
240 |
the implicit invariant. Equations for insertion and removal are |
|
241 |
similar: |
|
242 |
*} |
|
243 |
||
244 |
lemma %quote [code abstract]: |
|
245 |
"list_of_dlist (Dlist.insert x dxs) = List.insert x (list_of_dlist dxs)" |
|
246 |
by (fact list_of_dlist_insert) |
|
247 |
||
248 |
lemma %quote [code abstract]: |
|
249 |
"list_of_dlist (Dlist.remove x dxs) = remove1 x (list_of_dlist dxs)" |
|
250 |
by (fact list_of_dlist_remove) |
|
251 |
||
252 |
text {* |
|
253 |
\noindent Then the corresponding code is as follows: |
|
254 |
*} |
|
255 |
||
39745 | 256 |
text %quotetypewriter {* |
39683 | 257 |
@{code_stmts Dlist.empty Dlist.insert Dlist.remove list_of_dlist (Haskell)} |
38502 | 258 |
*} (*(types) dlist (consts) dempty dinsert dremove list_of List.member insert remove *) |
259 |
||
260 |
text {* |
|
261 |
Typical data structures implemented by representations involving |
|
262 |
invariants are available in the library, e.g.~theories @{theory |
|
263 |
Fset} and @{theory Mapping} specify sets (type @{typ "'a fset"}) and |
|
264 |
key-value-mappings (type @{typ "('a, 'b) mapping"}) respectively; |
|
265 |
these can be implemented by distinct lists as presented here as |
|
266 |
example (theory @{theory Dlist}) and red-black-trees respectively |
|
267 |
(theory @{theory RBT}). |
|
38437 | 268 |
*} |
269 |
||
38405 | 270 |
end |