| author | wenzelm | 
| Fri, 13 Jan 2023 19:07:18 +0100 | |
| changeset 76966 | 2f91b787f509 | 
| parent 76660 | 89f78f76df1c | 
| child 76987 | 4c275405faae | 
| permissions | -rw-r--r-- | 
| 38405 | 1  | 
theory Refinement  | 
| 
69422
 
472af2d7835d
clarified session dependencies: faster build_doc/build_release;
 
wenzelm 
parents: 
68484 
diff
changeset
 | 
2  | 
imports Setup  | 
| 38405 | 3  | 
begin  | 
4  | 
||
| 59377 | 5  | 
section \<open>Program and datatype refinement \label{sec:refinement}\<close>
 | 
| 38405 | 6  | 
|
| 59377 | 7  | 
text \<open>  | 
| 38451 | 8  | 
  Code generation by shallow embedding (cf.~\secref{sec:principle})
 | 
9  | 
allows to choose code equations and datatype constructors freely,  | 
|
10  | 
given that some very basic syntactic properties are met; this  | 
|
11  | 
flexibility opens up mechanisms for refinement which allow to extend  | 
|
12  | 
the scope and quality of generated code dramatically.  | 
|
| 59377 | 13  | 
\<close>  | 
| 38451 | 14  | 
|
15  | 
||
| 59377 | 16  | 
subsection \<open>Program refinement\<close>  | 
| 38451 | 17  | 
|
| 59377 | 18  | 
text \<open>  | 
| 38451 | 19  | 
Program refinement works by choosing appropriate code equations  | 
| 40352 | 20  | 
  explicitly (cf.~\secref{sec:equations}); as example, we use Fibonacci
 | 
| 38451 | 21  | 
numbers:  | 
| 59377 | 22  | 
\<close>  | 
| 38451 | 23  | 
|
24  | 
fun %quote fib :: "nat \<Rightarrow> nat" where  | 
|
25  | 
"fib 0 = 0"  | 
|
26  | 
| "fib (Suc 0) = Suc 0"  | 
|
27  | 
| "fib (Suc (Suc n)) = fib n + fib (Suc n)"  | 
|
28  | 
||
| 59377 | 29  | 
text \<open>  | 
| 38451 | 30  | 
\noindent The runtime of the corresponding code grows exponential due  | 
31  | 
to two recursive calls:  | 
|
| 59377 | 32  | 
\<close>  | 
| 38451 | 33  | 
|
| 
69660
 
2bc2a8599369
canonical operation to typeset generated code makes dedicated environment obsolete
 
haftmann 
parents: 
69597 
diff
changeset
 | 
34  | 
text %quote \<open>  | 
| 
69697
 
4d95261fab5a
more conventional syntax for code_stmts antiquotation
 
haftmann 
parents: 
69660 
diff
changeset
 | 
35  | 
  @{code_stmts fib constant: fib (Haskell)}
 | 
| 59377 | 36  | 
\<close>  | 
| 38451 | 37  | 
|
| 59377 | 38  | 
text \<open>  | 
| 38451 | 39  | 
\noindent A more efficient implementation would use dynamic  | 
40  | 
programming, e.g.~sharing of common intermediate results between  | 
|
41  | 
recursive calls. This idea is expressed by an auxiliary operation  | 
|
42  | 
which computes a Fibonacci number and its successor simultaneously:  | 
|
| 59377 | 43  | 
\<close>  | 
| 38451 | 44  | 
|
45  | 
definition %quote fib_step :: "nat \<Rightarrow> nat \<times> nat" where  | 
|
46  | 
"fib_step n = (fib (Suc n), fib n)"  | 
|
47  | 
||
| 59377 | 48  | 
text \<open>  | 
| 38451 | 49  | 
\noindent This operation can be implemented by recursion using  | 
50  | 
dynamic programming:  | 
|
| 59377 | 51  | 
\<close>  | 
| 38451 | 52  | 
|
53  | 
lemma %quote [code]:  | 
|
54  | 
"fib_step 0 = (Suc 0, 0)"  | 
|
55  | 
"fib_step (Suc n) = (let (m, q) = fib_step n in (m + q, m))"  | 
|
56  | 
by (simp_all add: fib_step_def)  | 
|
57  | 
||
| 59377 | 58  | 
text \<open>  | 
| 69597 | 59  | 
\noindent What remains is to implement \<^const>\<open>fib\<close> by \<^const>\<open>fib_step\<close> as follows:  | 
| 59377 | 60  | 
\<close>  | 
| 38451 | 61  | 
|
62  | 
lemma %quote [code]:  | 
|
63  | 
"fib 0 = 0"  | 
|
64  | 
"fib (Suc n) = fst (fib_step n)"  | 
|
65  | 
by (simp_all add: fib_step_def)  | 
|
66  | 
||
| 59377 | 67  | 
text \<open>  | 
| 38451 | 68  | 
\noindent The resulting code shows only linear growth of runtime:  | 
| 59377 | 69  | 
\<close>  | 
| 38451 | 70  | 
|
| 
69660
 
2bc2a8599369
canonical operation to typeset generated code makes dedicated environment obsolete
 
haftmann 
parents: 
69597 
diff
changeset
 | 
71  | 
text %quote \<open>  | 
| 
69697
 
4d95261fab5a
more conventional syntax for code_stmts antiquotation
 
haftmann 
parents: 
69660 
diff
changeset
 | 
72  | 
  @{code_stmts fib constant: fib fib_step (Haskell)}
 | 
| 59377 | 73  | 
\<close>  | 
| 38451 | 74  | 
|
75  | 
||
| 59377 | 76  | 
subsection \<open>Datatype refinement\<close>  | 
| 38437 | 77  | 
|
| 59377 | 78  | 
text \<open>  | 
| 38459 | 79  | 
  Selecting specific code equations \emph{and} datatype constructors
 | 
80  | 
leads to datatype refinement. As an example, we will develop an  | 
|
81  | 
alternative representation of the queue example given in  | 
|
82  | 
  \secref{sec:queue_example}.  The amortised representation is
 | 
|
83  | 
  convenient for generating code but exposes its \qt{implementation}
 | 
|
84  | 
details, which may be cumbersome when proving theorems about it.  | 
|
85  | 
Therefore, here is a simple, straightforward representation of  | 
|
86  | 
queues:  | 
|
| 59377 | 87  | 
\<close>  | 
| 38437 | 88  | 
|
| 58310 | 89  | 
datatype %quote 'a queue = Queue "'a list"  | 
| 38437 | 90  | 
|
91  | 
definition %quote empty :: "'a queue" where  | 
|
92  | 
"empty = Queue []"  | 
|
93  | 
||
94  | 
primrec %quote enqueue :: "'a \<Rightarrow> 'a queue \<Rightarrow> 'a queue" where  | 
|
95  | 
"enqueue x (Queue xs) = Queue (xs @ [x])"  | 
|
96  | 
||
97  | 
fun %quote dequeue :: "'a queue \<Rightarrow> 'a option \<times> 'a queue" where  | 
|
98  | 
"dequeue (Queue []) = (None, Queue [])"  | 
|
99  | 
| "dequeue (Queue (x # xs)) = (Some x, Queue xs)"  | 
|
100  | 
||
| 59377 | 101  | 
text \<open>  | 
| 38437 | 102  | 
\noindent This we can use directly for proving; for executing,  | 
103  | 
we provide an alternative characterisation:  | 
|
| 59377 | 104  | 
\<close>  | 
| 38437 | 105  | 
|
106  | 
definition %quote AQueue :: "'a list \<Rightarrow> 'a list \<Rightarrow> 'a queue" where  | 
|
107  | 
"AQueue xs ys = Queue (ys @ rev xs)"  | 
|
108  | 
||
109  | 
code_datatype %quote AQueue  | 
|
110  | 
||
| 59377 | 111  | 
text \<open>  | 
| 69597 | 112  | 
  \noindent Here we define a \qt{constructor} \<^const>\<open>AQueue\<close> which
 | 
| 69505 | 113  | 
is defined in terms of \<open>Queue\<close> and interprets its arguments  | 
| 38437 | 114  | 
  according to what the \emph{content} of an amortised queue is supposed
 | 
| 38459 | 115  | 
to be.  | 
116  | 
||
117  | 
The prerequisite for datatype constructors is only syntactical: a  | 
|
| 69505 | 118  | 
  constructor must be of type \<open>\<tau> = \<dots> \<Rightarrow> \<kappa> \<alpha>\<^sub>1 \<dots> \<alpha>\<^sub>n\<close> where \<open>{\<alpha>\<^sub>1, \<dots>, \<alpha>\<^sub>n}\<close> is exactly the set of \emph{all} type variables in
 | 
119  | 
\<open>\<tau>\<close>; then \<open>\<kappa>\<close> is its corresponding datatype. The  | 
|
| 38459 | 120  | 
HOL datatype package by default registers any new datatype with its  | 
| 38511 | 121  | 
  constructors, but this may be changed using @{command_def
 | 
| 38459 | 122  | 
code_datatype}; the currently chosen constructors can be inspected  | 
123  | 
  using the @{command print_codesetup} command.
 | 
|
124  | 
||
125  | 
Equipped with this, we are able to prove the following equations  | 
|
| 38437 | 126  | 
  for our primitive queue operations which \qt{implement} the simple
 | 
127  | 
queues in an amortised fashion:  | 
|
| 59377 | 128  | 
\<close>  | 
| 38437 | 129  | 
|
130  | 
lemma %quote empty_AQueue [code]:  | 
|
131  | 
"empty = AQueue [] []"  | 
|
| 40754 | 132  | 
by (simp add: AQueue_def empty_def)  | 
| 38437 | 133  | 
|
134  | 
lemma %quote enqueue_AQueue [code]:  | 
|
135  | 
"enqueue x (AQueue xs ys) = AQueue (x # xs) ys"  | 
|
| 40754 | 136  | 
by (simp add: AQueue_def)  | 
| 38437 | 137  | 
|
138  | 
lemma %quote dequeue_AQueue [code]:  | 
|
139  | 
"dequeue (AQueue xs []) =  | 
|
140  | 
(if xs = [] then (None, AQueue [] [])  | 
|
141  | 
else dequeue (AQueue [] (rev xs)))"  | 
|
142  | 
"dequeue (AQueue xs (y # ys)) = (Some y, AQueue xs ys)"  | 
|
| 40754 | 143  | 
by (simp_all add: AQueue_def)  | 
| 38437 | 144  | 
|
| 59377 | 145  | 
text \<open>  | 
| 40754 | 146  | 
\noindent It is good style, although no absolute requirement, to  | 
147  | 
provide code equations for the original artefacts of the implemented  | 
|
148  | 
type, if possible; in our case, these are the datatype constructor  | 
|
| 69597 | 149  | 
\<^const>\<open>Queue\<close> and the case combinator \<^const>\<open>case_queue\<close>:  | 
| 59377 | 150  | 
\<close>  | 
| 38437 | 151  | 
|
| 40754 | 152  | 
lemma %quote Queue_AQueue [code]:  | 
153  | 
"Queue = AQueue []"  | 
|
154  | 
by (simp add: AQueue_def fun_eq_iff)  | 
|
155  | 
||
| 55422 | 156  | 
lemma %quote case_queue_AQueue [code]:  | 
157  | 
"case_queue f (AQueue xs ys) = f (ys @ rev xs)"  | 
|
| 40754 | 158  | 
by (simp add: AQueue_def)  | 
| 38437 | 159  | 
|
| 59377 | 160  | 
text \<open>  | 
| 38437 | 161  | 
\noindent The resulting code looks as expected:  | 
| 59377 | 162  | 
\<close>  | 
| 38437 | 163  | 
|
| 
69660
 
2bc2a8599369
canonical operation to typeset generated code makes dedicated environment obsolete
 
haftmann 
parents: 
69597 
diff
changeset
 | 
164  | 
text %quote \<open>  | 
| 55422 | 165  | 
  @{code_stmts empty enqueue dequeue Queue case_queue (SML)}
 | 
| 59377 | 166  | 
\<close>  | 
| 38437 | 167  | 
|
| 59377 | 168  | 
text \<open>  | 
| 38459 | 169  | 
The same techniques can also be applied to types which are not  | 
| 69597 | 170  | 
specified as datatypes, e.g.~type \<^typ>\<open>int\<close> is originally specified  | 
| 38511 | 171  | 
  as quotient type by means of @{command_def typedef}, but for code
 | 
| 38459 | 172  | 
generation constants allowing construction of binary numeral values  | 
| 69597 | 173  | 
are used as constructors for \<^typ>\<open>int\<close>.  | 
| 38437 | 174  | 
|
| 38459 | 175  | 
This approach however fails if the representation of a type demands  | 
176  | 
invariants; this issue is discussed in the next section.  | 
|
| 59377 | 177  | 
\<close>  | 
| 38459 | 178  | 
|
| 38437 | 179  | 
|
| 59377 | 180  | 
subsection \<open>Datatype refinement involving invariants \label{sec:invariant}\<close>
 | 
| 38437 | 181  | 
|
| 59377 | 182  | 
text \<open>  | 
| 38502 | 183  | 
Datatype representation involving invariants require a dedicated  | 
184  | 
setup for the type and its primitive operations. As a running  | 
|
| 69597 | 185  | 
example, we implement a type \<^typ>\<open>'a dlist\<close> of lists consisting  | 
| 38502 | 186  | 
of distinct elements.  | 
187  | 
||
| 69597 | 188  | 
The specification of \<^typ>\<open>'a dlist\<close> itself can be found in theory  | 
189  | 
\<^theory>\<open>HOL-Library.Dlist\<close>.  | 
|
| 66405 | 190  | 
|
| 38502 | 191  | 
The first step is to decide on which representation the abstract  | 
| 69597 | 192  | 
type (in our example \<^typ>\<open>'a dlist\<close>) should be implemented.  | 
193  | 
Here we choose \<^typ>\<open>'a list\<close>. Then a conversion from the concrete  | 
|
| 38502 | 194  | 
type to the abstract type must be specified, here:  | 
| 59377 | 195  | 
\<close>  | 
| 38502 | 196  | 
|
| 59377 | 197  | 
text %quote \<open>  | 
| 69597 | 198  | 
\<^term_type>\<open>Dlist\<close>  | 
| 59377 | 199  | 
\<close>  | 
| 38502 | 200  | 
|
| 59377 | 201  | 
text \<open>  | 
| 38502 | 202  | 
  \noindent Next follows the specification of a suitable \emph{projection},
 | 
203  | 
i.e.~a conversion from abstract to concrete type:  | 
|
| 59377 | 204  | 
\<close>  | 
| 38502 | 205  | 
|
| 59377 | 206  | 
text %quote \<open>  | 
| 69597 | 207  | 
\<^term_type>\<open>list_of_dlist\<close>  | 
| 59377 | 208  | 
\<close>  | 
| 38502 | 209  | 
|
| 59377 | 210  | 
text \<open>  | 
| 38502 | 211  | 
\noindent This projection must be specified such that the following  | 
212  | 
  \emph{abstract datatype certificate} can be proven:
 | 
|
| 59377 | 213  | 
\<close>  | 
| 38502 | 214  | 
|
215  | 
lemma %quote [code abstype]:  | 
|
216  | 
"Dlist (list_of_dlist dxs) = dxs"  | 
|
217  | 
by (fact Dlist_list_of_dlist)  | 
|
218  | 
||
| 59377 | 219  | 
text \<open>  | 
| 38502 | 220  | 
\noindent Note that so far the invariant on representations  | 
| 69597 | 221  | 
(\<^term_type>\<open>distinct\<close>) has never been mentioned explicitly:  | 
| 38502 | 222  | 
the invariant is only referred to implicitly: all values in  | 
| 69597 | 223  | 
  set \<^term>\<open>{xs. list_of_dlist (Dlist xs) = xs}\<close> are invariant,
 | 
224  | 
  and in our example this is exactly \<^term>\<open>{xs. distinct xs}\<close>.
 | 
|
| 38502 | 225  | 
|
| 69597 | 226  | 
The primitive operations on \<^typ>\<open>'a dlist\<close> are specified  | 
227  | 
indirectly using the projection \<^const>\<open>list_of_dlist\<close>. For  | 
|
228  | 
the empty \<open>dlist\<close>, \<^const>\<open>Dlist.empty\<close>, we finally want  | 
|
| 38502 | 229  | 
the code equation  | 
| 59377 | 230  | 
\<close>  | 
| 38502 | 231  | 
|
| 59377 | 232  | 
text %quote \<open>  | 
| 69597 | 233  | 
\<^term>\<open>Dlist.empty = Dlist []\<close>  | 
| 59377 | 234  | 
\<close>  | 
| 38502 | 235  | 
|
| 59377 | 236  | 
text \<open>  | 
| 38502 | 237  | 
\noindent This we have to prove indirectly as follows:  | 
| 59377 | 238  | 
\<close>  | 
| 38502 | 239  | 
|
| 
52637
 
1501ebe39711
attribute "code" declares concrete and abstract code equations uniformly; added explicit "code equation" instead
 
haftmann 
parents: 
48985 
diff
changeset
 | 
240  | 
lemma %quote [code]:  | 
| 38502 | 241  | 
"list_of_dlist Dlist.empty = []"  | 
242  | 
by (fact list_of_dlist_empty)  | 
|
243  | 
||
| 59377 | 244  | 
text \<open>  | 
| 38502 | 245  | 
\noindent This equation logically encodes both the desired code  | 
| 69597 | 246  | 
equation and that the expression \<^const>\<open>Dlist\<close> is applied to obeys  | 
| 38502 | 247  | 
the implicit invariant. Equations for insertion and removal are  | 
248  | 
similar:  | 
|
| 59377 | 249  | 
\<close>  | 
| 38502 | 250  | 
|
| 
52637
 
1501ebe39711
attribute "code" declares concrete and abstract code equations uniformly; added explicit "code equation" instead
 
haftmann 
parents: 
48985 
diff
changeset
 | 
251  | 
lemma %quote [code]:  | 
| 38502 | 252  | 
"list_of_dlist (Dlist.insert x dxs) = List.insert x (list_of_dlist dxs)"  | 
253  | 
by (fact list_of_dlist_insert)  | 
|
254  | 
||
| 
52637
 
1501ebe39711
attribute "code" declares concrete and abstract code equations uniformly; added explicit "code equation" instead
 
haftmann 
parents: 
48985 
diff
changeset
 | 
255  | 
lemma %quote [code]:  | 
| 38502 | 256  | 
"list_of_dlist (Dlist.remove x dxs) = remove1 x (list_of_dlist dxs)"  | 
257  | 
by (fact list_of_dlist_remove)  | 
|
258  | 
||
| 59377 | 259  | 
text \<open>  | 
| 38502 | 260  | 
\noindent Then the corresponding code is as follows:  | 
| 59377 | 261  | 
\<close>  | 
| 38502 | 262  | 
|
| 
69660
 
2bc2a8599369
canonical operation to typeset generated code makes dedicated environment obsolete
 
haftmann 
parents: 
69597 
diff
changeset
 | 
263  | 
text %quote \<open>  | 
| 76660 | 264  | 
  @{code_stmts Dlist.empty Dlist.insert Dlist.remove list_of_dlist (SML)}
 | 
| 59377 | 265  | 
\<close>  | 
| 38502 | 266  | 
|
| 59377 | 267  | 
text \<open>  | 
| 58620 | 268  | 
  See further @{cite "Haftmann-Kraus-Kuncar-Nipkow:2013:data_refinement"}
 | 
| 53125 | 269  | 
for the meta theory of datatype refinement involving invariants.  | 
270  | 
||
| 38502 | 271  | 
Typical data structures implemented by representations involving  | 
| 69597 | 272  | 
invariants are available in the library, theory \<^theory>\<open>HOL-Library.Mapping\<close>  | 
273  | 
  specifies key-value-mappings (type \<^typ>\<open>('a, 'b) mapping\<close>);
 | 
|
274  | 
these can be implemented by red-black-trees (theory \<^theory>\<open>HOL-Library.RBT\<close>).  | 
|
| 59377 | 275  | 
\<close>  | 
| 38437 | 276  | 
|
| 38405 | 277  | 
end  |