author | wenzelm |
Fri, 22 Jun 2018 20:31:49 +0200 | |
changeset 68484 | 59793df7f853 |
parent 66453 | cc19f7ca2ed6 |
child 69422 | 472af2d7835d |
permissions | -rw-r--r-- |
38405 | 1 |
theory Refinement |
66453
cc19f7ca2ed6
session-qualified theory imports: isabelle imports -U -i -d '~~/src/Benchmarks' -a;
wenzelm
parents:
66405
diff
changeset
|
2 |
imports Codegen_Basics.Setup |
38405 | 3 |
begin |
4 |
||
59377 | 5 |
section \<open>Program and datatype refinement \label{sec:refinement}\<close> |
38405 | 6 |
|
59377 | 7 |
text \<open> |
38451 | 8 |
Code generation by shallow embedding (cf.~\secref{sec:principle}) |
9 |
allows to choose code equations and datatype constructors freely, |
|
10 |
given that some very basic syntactic properties are met; this |
|
11 |
flexibility opens up mechanisms for refinement which allow to extend |
|
12 |
the scope and quality of generated code dramatically. |
|
59377 | 13 |
\<close> |
38451 | 14 |
|
15 |
||
59377 | 16 |
subsection \<open>Program refinement\<close> |
38451 | 17 |
|
59377 | 18 |
text \<open> |
38451 | 19 |
Program refinement works by choosing appropriate code equations |
40352 | 20 |
explicitly (cf.~\secref{sec:equations}); as example, we use Fibonacci |
38451 | 21 |
numbers: |
59377 | 22 |
\<close> |
38451 | 23 |
|
24 |
fun %quote fib :: "nat \<Rightarrow> nat" where |
|
25 |
"fib 0 = 0" |
|
26 |
| "fib (Suc 0) = Suc 0" |
|
27 |
| "fib (Suc (Suc n)) = fib n + fib (Suc n)" |
|
28 |
||
59377 | 29 |
text \<open> |
38451 | 30 |
\noindent The runtime of the corresponding code grows exponential due |
31 |
to two recursive calls: |
|
59377 | 32 |
\<close> |
38451 | 33 |
|
59377 | 34 |
text %quotetypewriter \<open> |
39683 | 35 |
@{code_stmts fib (consts) fib (Haskell)} |
59377 | 36 |
\<close> |
38451 | 37 |
|
59377 | 38 |
text \<open> |
38451 | 39 |
\noindent A more efficient implementation would use dynamic |
40 |
programming, e.g.~sharing of common intermediate results between |
|
41 |
recursive calls. This idea is expressed by an auxiliary operation |
|
42 |
which computes a Fibonacci number and its successor simultaneously: |
|
59377 | 43 |
\<close> |
38451 | 44 |
|
45 |
definition %quote fib_step :: "nat \<Rightarrow> nat \<times> nat" where |
|
46 |
"fib_step n = (fib (Suc n), fib n)" |
|
47 |
||
59377 | 48 |
text \<open> |
38451 | 49 |
\noindent This operation can be implemented by recursion using |
50 |
dynamic programming: |
|
59377 | 51 |
\<close> |
38451 | 52 |
|
53 |
lemma %quote [code]: |
|
54 |
"fib_step 0 = (Suc 0, 0)" |
|
55 |
"fib_step (Suc n) = (let (m, q) = fib_step n in (m + q, m))" |
|
56 |
by (simp_all add: fib_step_def) |
|
57 |
||
59377 | 58 |
text \<open> |
38451 | 59 |
\noindent What remains is to implement @{const fib} by @{const |
60 |
fib_step} as follows: |
|
59377 | 61 |
\<close> |
38451 | 62 |
|
63 |
lemma %quote [code]: |
|
64 |
"fib 0 = 0" |
|
65 |
"fib (Suc n) = fst (fib_step n)" |
|
66 |
by (simp_all add: fib_step_def) |
|
67 |
||
59377 | 68 |
text \<open> |
38451 | 69 |
\noindent The resulting code shows only linear growth of runtime: |
59377 | 70 |
\<close> |
38451 | 71 |
|
59377 | 72 |
text %quotetypewriter \<open> |
39683 | 73 |
@{code_stmts fib (consts) fib fib_step (Haskell)} |
59377 | 74 |
\<close> |
38451 | 75 |
|
76 |
||
59377 | 77 |
subsection \<open>Datatype refinement\<close> |
38437 | 78 |
|
59377 | 79 |
text \<open> |
38459 | 80 |
Selecting specific code equations \emph{and} datatype constructors |
81 |
leads to datatype refinement. As an example, we will develop an |
|
82 |
alternative representation of the queue example given in |
|
83 |
\secref{sec:queue_example}. The amortised representation is |
|
84 |
convenient for generating code but exposes its \qt{implementation} |
|
85 |
details, which may be cumbersome when proving theorems about it. |
|
86 |
Therefore, here is a simple, straightforward representation of |
|
87 |
queues: |
|
59377 | 88 |
\<close> |
38437 | 89 |
|
58310 | 90 |
datatype %quote 'a queue = Queue "'a list" |
38437 | 91 |
|
92 |
definition %quote empty :: "'a queue" where |
|
93 |
"empty = Queue []" |
|
94 |
||
95 |
primrec %quote enqueue :: "'a \<Rightarrow> 'a queue \<Rightarrow> 'a queue" where |
|
96 |
"enqueue x (Queue xs) = Queue (xs @ [x])" |
|
97 |
||
98 |
fun %quote dequeue :: "'a queue \<Rightarrow> 'a option \<times> 'a queue" where |
|
99 |
"dequeue (Queue []) = (None, Queue [])" |
|
100 |
| "dequeue (Queue (x # xs)) = (Some x, Queue xs)" |
|
101 |
||
59377 | 102 |
text \<open> |
38437 | 103 |
\noindent This we can use directly for proving; for executing, |
104 |
we provide an alternative characterisation: |
|
59377 | 105 |
\<close> |
38437 | 106 |
|
107 |
definition %quote AQueue :: "'a list \<Rightarrow> 'a list \<Rightarrow> 'a queue" where |
|
108 |
"AQueue xs ys = Queue (ys @ rev xs)" |
|
109 |
||
110 |
code_datatype %quote AQueue |
|
111 |
||
59377 | 112 |
text \<open> |
38437 | 113 |
\noindent Here we define a \qt{constructor} @{const "AQueue"} which |
114 |
is defined in terms of @{text "Queue"} and interprets its arguments |
|
115 |
according to what the \emph{content} of an amortised queue is supposed |
|
38459 | 116 |
to be. |
117 |
||
118 |
The prerequisite for datatype constructors is only syntactical: a |
|
53015
a1119cf551e8
standardized symbols via "isabelle update_sub_sup", excluding src/Pure and src/Tools/WWW_Find;
wenzelm
parents:
52637
diff
changeset
|
119 |
constructor must be of type @{text "\<tau> = \<dots> \<Rightarrow> \<kappa> \<alpha>\<^sub>1 \<dots> \<alpha>\<^sub>n"} where @{text |
a1119cf551e8
standardized symbols via "isabelle update_sub_sup", excluding src/Pure and src/Tools/WWW_Find;
wenzelm
parents:
52637
diff
changeset
|
120 |
"{\<alpha>\<^sub>1, \<dots>, \<alpha>\<^sub>n}"} is exactly the set of \emph{all} type variables in |
38459 | 121 |
@{text "\<tau>"}; then @{text "\<kappa>"} is its corresponding datatype. The |
122 |
HOL datatype package by default registers any new datatype with its |
|
38511 | 123 |
constructors, but this may be changed using @{command_def |
38459 | 124 |
code_datatype}; the currently chosen constructors can be inspected |
125 |
using the @{command print_codesetup} command. |
|
126 |
||
127 |
Equipped with this, we are able to prove the following equations |
|
38437 | 128 |
for our primitive queue operations which \qt{implement} the simple |
129 |
queues in an amortised fashion: |
|
59377 | 130 |
\<close> |
38437 | 131 |
|
132 |
lemma %quote empty_AQueue [code]: |
|
133 |
"empty = AQueue [] []" |
|
40754 | 134 |
by (simp add: AQueue_def empty_def) |
38437 | 135 |
|
136 |
lemma %quote enqueue_AQueue [code]: |
|
137 |
"enqueue x (AQueue xs ys) = AQueue (x # xs) ys" |
|
40754 | 138 |
by (simp add: AQueue_def) |
38437 | 139 |
|
140 |
lemma %quote dequeue_AQueue [code]: |
|
141 |
"dequeue (AQueue xs []) = |
|
142 |
(if xs = [] then (None, AQueue [] []) |
|
143 |
else dequeue (AQueue [] (rev xs)))" |
|
144 |
"dequeue (AQueue xs (y # ys)) = (Some y, AQueue xs ys)" |
|
40754 | 145 |
by (simp_all add: AQueue_def) |
38437 | 146 |
|
59377 | 147 |
text \<open> |
40754 | 148 |
\noindent It is good style, although no absolute requirement, to |
149 |
provide code equations for the original artefacts of the implemented |
|
150 |
type, if possible; in our case, these are the datatype constructor |
|
55422 | 151 |
@{const Queue} and the case combinator @{const case_queue}: |
59377 | 152 |
\<close> |
38437 | 153 |
|
40754 | 154 |
lemma %quote Queue_AQueue [code]: |
155 |
"Queue = AQueue []" |
|
156 |
by (simp add: AQueue_def fun_eq_iff) |
|
157 |
||
55422 | 158 |
lemma %quote case_queue_AQueue [code]: |
159 |
"case_queue f (AQueue xs ys) = f (ys @ rev xs)" |
|
40754 | 160 |
by (simp add: AQueue_def) |
38437 | 161 |
|
59377 | 162 |
text \<open> |
38437 | 163 |
\noindent The resulting code looks as expected: |
59377 | 164 |
\<close> |
38437 | 165 |
|
59377 | 166 |
text %quotetypewriter \<open> |
55422 | 167 |
@{code_stmts empty enqueue dequeue Queue case_queue (SML)} |
59377 | 168 |
\<close> |
38437 | 169 |
|
59377 | 170 |
text \<open> |
38459 | 171 |
The same techniques can also be applied to types which are not |
172 |
specified as datatypes, e.g.~type @{typ int} is originally specified |
|
38511 | 173 |
as quotient type by means of @{command_def typedef}, but for code |
38459 | 174 |
generation constants allowing construction of binary numeral values |
175 |
are used as constructors for @{typ int}. |
|
38437 | 176 |
|
38459 | 177 |
This approach however fails if the representation of a type demands |
178 |
invariants; this issue is discussed in the next section. |
|
59377 | 179 |
\<close> |
38459 | 180 |
|
38437 | 181 |
|
59377 | 182 |
subsection \<open>Datatype refinement involving invariants \label{sec:invariant}\<close> |
38437 | 183 |
|
59377 | 184 |
text \<open> |
38502 | 185 |
Datatype representation involving invariants require a dedicated |
186 |
setup for the type and its primitive operations. As a running |
|
66405 | 187 |
example, we implement a type @{typ "'a dlist"} of lists consisting |
38502 | 188 |
of distinct elements. |
189 |
||
66405 | 190 |
The specification of @{typ "'a dlist"} itself can be found in theory |
68484 | 191 |
@{theory "HOL-Library.Dlist"}. |
66405 | 192 |
|
38502 | 193 |
The first step is to decide on which representation the abstract |
66405 | 194 |
type (in our example @{typ "'a dlist"}) should be implemented. |
195 |
Here we choose @{typ "'a list"}. Then a conversion from the concrete |
|
38502 | 196 |
type to the abstract type must be specified, here: |
59377 | 197 |
\<close> |
38502 | 198 |
|
59377 | 199 |
text %quote \<open> |
38502 | 200 |
@{term_type Dlist} |
59377 | 201 |
\<close> |
38502 | 202 |
|
59377 | 203 |
text \<open> |
38502 | 204 |
\noindent Next follows the specification of a suitable \emph{projection}, |
205 |
i.e.~a conversion from abstract to concrete type: |
|
59377 | 206 |
\<close> |
38502 | 207 |
|
59377 | 208 |
text %quote \<open> |
38502 | 209 |
@{term_type list_of_dlist} |
59377 | 210 |
\<close> |
38502 | 211 |
|
59377 | 212 |
text \<open> |
38502 | 213 |
\noindent This projection must be specified such that the following |
214 |
\emph{abstract datatype certificate} can be proven: |
|
59377 | 215 |
\<close> |
38502 | 216 |
|
217 |
lemma %quote [code abstype]: |
|
218 |
"Dlist (list_of_dlist dxs) = dxs" |
|
219 |
by (fact Dlist_list_of_dlist) |
|
220 |
||
59377 | 221 |
text \<open> |
38502 | 222 |
\noindent Note that so far the invariant on representations |
223 |
(@{term_type distinct}) has never been mentioned explicitly: |
|
224 |
the invariant is only referred to implicitly: all values in |
|
225 |
set @{term "{xs. list_of_dlist (Dlist xs) = xs}"} are invariant, |
|
226 |
and in our example this is exactly @{term "{xs. distinct xs}"}. |
|
227 |
||
228 |
The primitive operations on @{typ "'a dlist"} are specified |
|
229 |
indirectly using the projection @{const list_of_dlist}. For |
|
230 |
the empty @{text "dlist"}, @{const Dlist.empty}, we finally want |
|
231 |
the code equation |
|
59377 | 232 |
\<close> |
38502 | 233 |
|
59377 | 234 |
text %quote \<open> |
38502 | 235 |
@{term "Dlist.empty = Dlist []"} |
59377 | 236 |
\<close> |
38502 | 237 |
|
59377 | 238 |
text \<open> |
38502 | 239 |
\noindent This we have to prove indirectly as follows: |
59377 | 240 |
\<close> |
38502 | 241 |
|
52637
1501ebe39711
attribute "code" declares concrete and abstract code equations uniformly; added explicit "code equation" instead
haftmann
parents:
48985
diff
changeset
|
242 |
lemma %quote [code]: |
38502 | 243 |
"list_of_dlist Dlist.empty = []" |
244 |
by (fact list_of_dlist_empty) |
|
245 |
||
59377 | 246 |
text \<open> |
38502 | 247 |
\noindent This equation logically encodes both the desired code |
248 |
equation and that the expression @{const Dlist} is applied to obeys |
|
249 |
the implicit invariant. Equations for insertion and removal are |
|
250 |
similar: |
|
59377 | 251 |
\<close> |
38502 | 252 |
|
52637
1501ebe39711
attribute "code" declares concrete and abstract code equations uniformly; added explicit "code equation" instead
haftmann
parents:
48985
diff
changeset
|
253 |
lemma %quote [code]: |
38502 | 254 |
"list_of_dlist (Dlist.insert x dxs) = List.insert x (list_of_dlist dxs)" |
255 |
by (fact list_of_dlist_insert) |
|
256 |
||
52637
1501ebe39711
attribute "code" declares concrete and abstract code equations uniformly; added explicit "code equation" instead
haftmann
parents:
48985
diff
changeset
|
257 |
lemma %quote [code]: |
38502 | 258 |
"list_of_dlist (Dlist.remove x dxs) = remove1 x (list_of_dlist dxs)" |
259 |
by (fact list_of_dlist_remove) |
|
260 |
||
59377 | 261 |
text \<open> |
38502 | 262 |
\noindent Then the corresponding code is as follows: |
59377 | 263 |
\<close> |
38502 | 264 |
|
59377 | 265 |
text %quotetypewriter \<open> |
39683 | 266 |
@{code_stmts Dlist.empty Dlist.insert Dlist.remove list_of_dlist (Haskell)} |
59377 | 267 |
\<close> |
38502 | 268 |
|
59377 | 269 |
text \<open> |
58620 | 270 |
See further @{cite "Haftmann-Kraus-Kuncar-Nipkow:2013:data_refinement"} |
53125 | 271 |
for the meta theory of datatype refinement involving invariants. |
272 |
||
38502 | 273 |
Typical data structures implemented by representations involving |
68484 | 274 |
invariants are available in the library, theory @{theory "HOL-Library.Mapping"} |
46516 | 275 |
specifies key-value-mappings (type @{typ "('a, 'b) mapping"}); |
68484 | 276 |
these can be implemented by red-black-trees (theory @{theory "HOL-Library.RBT"}). |
59377 | 277 |
\<close> |
38437 | 278 |
|
38405 | 279 |
end |