src/Doc/Codegen/Refinement.thy
author blanchet
Tue Nov 07 15:16:42 2017 +0100 (20 months ago)
changeset 67022 49309fe530fd
parent 66453 cc19f7ca2ed6
child 68484 59793df7f853
permissions -rw-r--r--
more robust parsing for THF proofs (esp. polymorphic Leo-III proofs)
haftmann@38405
     1
theory Refinement
wenzelm@66453
     2
imports Codegen_Basics.Setup
haftmann@38405
     3
begin
haftmann@38405
     4
haftmann@59377
     5
section \<open>Program and datatype refinement \label{sec:refinement}\<close>
haftmann@38405
     6
haftmann@59377
     7
text \<open>
haftmann@38451
     8
  Code generation by shallow embedding (cf.~\secref{sec:principle})
haftmann@38451
     9
  allows to choose code equations and datatype constructors freely,
haftmann@38451
    10
  given that some very basic syntactic properties are met; this
haftmann@38451
    11
  flexibility opens up mechanisms for refinement which allow to extend
haftmann@38451
    12
  the scope and quality of generated code dramatically.
haftmann@59377
    13
\<close>
haftmann@38451
    14
haftmann@38451
    15
haftmann@59377
    16
subsection \<open>Program refinement\<close>
haftmann@38451
    17
haftmann@59377
    18
text \<open>
haftmann@38451
    19
  Program refinement works by choosing appropriate code equations
haftmann@40352
    20
  explicitly (cf.~\secref{sec:equations}); as example, we use Fibonacci
haftmann@38451
    21
  numbers:
haftmann@59377
    22
\<close>
haftmann@38451
    23
haftmann@38451
    24
fun %quote fib :: "nat \<Rightarrow> nat" where
haftmann@38451
    25
    "fib 0 = 0"
haftmann@38451
    26
  | "fib (Suc 0) = Suc 0"
haftmann@38451
    27
  | "fib (Suc (Suc n)) = fib n + fib (Suc n)"
haftmann@38451
    28
haftmann@59377
    29
text \<open>
haftmann@38451
    30
  \noindent The runtime of the corresponding code grows exponential due
haftmann@38451
    31
  to two recursive calls:
haftmann@59377
    32
\<close>
haftmann@38451
    33
haftmann@59377
    34
text %quotetypewriter \<open>
haftmann@39683
    35
  @{code_stmts fib (consts) fib (Haskell)}
haftmann@59377
    36
\<close>
haftmann@38451
    37
haftmann@59377
    38
text \<open>
haftmann@38451
    39
  \noindent A more efficient implementation would use dynamic
haftmann@38451
    40
  programming, e.g.~sharing of common intermediate results between
haftmann@38451
    41
  recursive calls.  This idea is expressed by an auxiliary operation
haftmann@38451
    42
  which computes a Fibonacci number and its successor simultaneously:
haftmann@59377
    43
\<close>
haftmann@38451
    44
haftmann@38451
    45
definition %quote fib_step :: "nat \<Rightarrow> nat \<times> nat" where
haftmann@38451
    46
  "fib_step n = (fib (Suc n), fib n)"
haftmann@38451
    47
haftmann@59377
    48
text \<open>
haftmann@38451
    49
  \noindent This operation can be implemented by recursion using
haftmann@38451
    50
  dynamic programming:
haftmann@59377
    51
\<close>
haftmann@38451
    52
haftmann@38451
    53
lemma %quote [code]:
haftmann@38451
    54
  "fib_step 0 = (Suc 0, 0)"
haftmann@38451
    55
  "fib_step (Suc n) = (let (m, q) = fib_step n in (m + q, m))"
haftmann@38451
    56
  by (simp_all add: fib_step_def)
haftmann@38451
    57
haftmann@59377
    58
text \<open>
haftmann@38451
    59
  \noindent What remains is to implement @{const fib} by @{const
haftmann@38451
    60
  fib_step} as follows:
haftmann@59377
    61
\<close>
haftmann@38451
    62
haftmann@38451
    63
lemma %quote [code]:
haftmann@38451
    64
  "fib 0 = 0"
haftmann@38451
    65
  "fib (Suc n) = fst (fib_step n)"
haftmann@38451
    66
  by (simp_all add: fib_step_def)
haftmann@38451
    67
haftmann@59377
    68
text \<open>
haftmann@38451
    69
  \noindent The resulting code shows only linear growth of runtime:
haftmann@59377
    70
\<close>
haftmann@38451
    71
haftmann@59377
    72
text %quotetypewriter \<open>
haftmann@39683
    73
  @{code_stmts fib (consts) fib fib_step (Haskell)}
haftmann@59377
    74
\<close>
haftmann@38451
    75
haftmann@38451
    76
haftmann@59377
    77
subsection \<open>Datatype refinement\<close>
haftmann@38437
    78
haftmann@59377
    79
text \<open>
haftmann@38459
    80
  Selecting specific code equations \emph{and} datatype constructors
haftmann@38459
    81
  leads to datatype refinement.  As an example, we will develop an
haftmann@38459
    82
  alternative representation of the queue example given in
haftmann@38459
    83
  \secref{sec:queue_example}.  The amortised representation is
haftmann@38459
    84
  convenient for generating code but exposes its \qt{implementation}
haftmann@38459
    85
  details, which may be cumbersome when proving theorems about it.
haftmann@38459
    86
  Therefore, here is a simple, straightforward representation of
haftmann@38459
    87
  queues:
haftmann@59377
    88
\<close>
haftmann@38437
    89
blanchet@58310
    90
datatype %quote 'a queue = Queue "'a list"
haftmann@38437
    91
haftmann@38437
    92
definition %quote empty :: "'a queue" where
haftmann@38437
    93
  "empty = Queue []"
haftmann@38437
    94
haftmann@38437
    95
primrec %quote enqueue :: "'a \<Rightarrow> 'a queue \<Rightarrow> 'a queue" where
haftmann@38437
    96
  "enqueue x (Queue xs) = Queue (xs @ [x])"
haftmann@38437
    97
haftmann@38437
    98
fun %quote dequeue :: "'a queue \<Rightarrow> 'a option \<times> 'a queue" where
haftmann@38437
    99
    "dequeue (Queue []) = (None, Queue [])"
haftmann@38437
   100
  | "dequeue (Queue (x # xs)) = (Some x, Queue xs)"
haftmann@38437
   101
haftmann@59377
   102
text \<open>
haftmann@38437
   103
  \noindent This we can use directly for proving;  for executing,
haftmann@38437
   104
  we provide an alternative characterisation:
haftmann@59377
   105
\<close>
haftmann@38437
   106
haftmann@38437
   107
definition %quote AQueue :: "'a list \<Rightarrow> 'a list \<Rightarrow> 'a queue" where
haftmann@38437
   108
  "AQueue xs ys = Queue (ys @ rev xs)"
haftmann@38437
   109
haftmann@38437
   110
code_datatype %quote AQueue
haftmann@38437
   111
haftmann@59377
   112
text \<open>
haftmann@38437
   113
  \noindent Here we define a \qt{constructor} @{const "AQueue"} which
haftmann@38437
   114
  is defined in terms of @{text "Queue"} and interprets its arguments
haftmann@38437
   115
  according to what the \emph{content} of an amortised queue is supposed
haftmann@38459
   116
  to be.
haftmann@38459
   117
haftmann@38459
   118
  The prerequisite for datatype constructors is only syntactical: a
wenzelm@53015
   119
  constructor must be of type @{text "\<tau> = \<dots> \<Rightarrow> \<kappa> \<alpha>\<^sub>1 \<dots> \<alpha>\<^sub>n"} where @{text
wenzelm@53015
   120
  "{\<alpha>\<^sub>1, \<dots>, \<alpha>\<^sub>n}"} is exactly the set of \emph{all} type variables in
haftmann@38459
   121
  @{text "\<tau>"}; then @{text "\<kappa>"} is its corresponding datatype.  The
haftmann@38459
   122
  HOL datatype package by default registers any new datatype with its
haftmann@38511
   123
  constructors, but this may be changed using @{command_def
haftmann@38459
   124
  code_datatype}; the currently chosen constructors can be inspected
haftmann@38459
   125
  using the @{command print_codesetup} command.
haftmann@38459
   126
haftmann@38459
   127
  Equipped with this, we are able to prove the following equations
haftmann@38437
   128
  for our primitive queue operations which \qt{implement} the simple
haftmann@38437
   129
  queues in an amortised fashion:
haftmann@59377
   130
\<close>
haftmann@38437
   131
haftmann@38437
   132
lemma %quote empty_AQueue [code]:
haftmann@38437
   133
  "empty = AQueue [] []"
haftmann@40754
   134
  by (simp add: AQueue_def empty_def)
haftmann@38437
   135
haftmann@38437
   136
lemma %quote enqueue_AQueue [code]:
haftmann@38437
   137
  "enqueue x (AQueue xs ys) = AQueue (x # xs) ys"
haftmann@40754
   138
  by (simp add: AQueue_def)
haftmann@38437
   139
haftmann@38437
   140
lemma %quote dequeue_AQueue [code]:
haftmann@38437
   141
  "dequeue (AQueue xs []) =
haftmann@38437
   142
    (if xs = [] then (None, AQueue [] [])
haftmann@38437
   143
    else dequeue (AQueue [] (rev xs)))"
haftmann@38437
   144
  "dequeue (AQueue xs (y # ys)) = (Some y, AQueue xs ys)"
haftmann@40754
   145
  by (simp_all add: AQueue_def)
haftmann@38437
   146
haftmann@59377
   147
text \<open>
haftmann@40754
   148
  \noindent It is good style, although no absolute requirement, to
haftmann@40754
   149
  provide code equations for the original artefacts of the implemented
haftmann@40754
   150
  type, if possible; in our case, these are the datatype constructor
blanchet@55422
   151
  @{const Queue} and the case combinator @{const case_queue}:
haftmann@59377
   152
\<close>
haftmann@38437
   153
haftmann@40754
   154
lemma %quote Queue_AQueue [code]:
haftmann@40754
   155
  "Queue = AQueue []"
haftmann@40754
   156
  by (simp add: AQueue_def fun_eq_iff)
haftmann@40754
   157
blanchet@55422
   158
lemma %quote case_queue_AQueue [code]:
blanchet@55422
   159
  "case_queue f (AQueue xs ys) = f (ys @ rev xs)"
haftmann@40754
   160
  by (simp add: AQueue_def)
haftmann@38437
   161
haftmann@59377
   162
text \<open>
haftmann@38437
   163
  \noindent The resulting code looks as expected:
haftmann@59377
   164
\<close>
haftmann@38437
   165
haftmann@59377
   166
text %quotetypewriter \<open>
blanchet@55422
   167
  @{code_stmts empty enqueue dequeue Queue case_queue (SML)}
haftmann@59377
   168
\<close>
haftmann@38437
   169
haftmann@59377
   170
text \<open>
haftmann@38459
   171
  The same techniques can also be applied to types which are not
haftmann@38459
   172
  specified as datatypes, e.g.~type @{typ int} is originally specified
haftmann@38511
   173
  as quotient type by means of @{command_def typedef}, but for code
haftmann@38459
   174
  generation constants allowing construction of binary numeral values
haftmann@38459
   175
  are used as constructors for @{typ int}.
haftmann@38437
   176
haftmann@38459
   177
  This approach however fails if the representation of a type demands
haftmann@38459
   178
  invariants; this issue is discussed in the next section.
haftmann@59377
   179
\<close>
haftmann@38459
   180
haftmann@38437
   181
haftmann@59377
   182
subsection \<open>Datatype refinement involving invariants \label{sec:invariant}\<close>
haftmann@38437
   183
haftmann@59377
   184
text \<open>
haftmann@38502
   185
  Datatype representation involving invariants require a dedicated
haftmann@38502
   186
  setup for the type and its primitive operations.  As a running
haftmann@66405
   187
  example, we implement a type @{typ "'a dlist"} of lists consisting
haftmann@38502
   188
  of distinct elements.
haftmann@38502
   189
haftmann@66405
   190
  The specification of @{typ "'a dlist"} itself can be found in theory
haftmann@66405
   191
  @{theory Dlist}.
haftmann@66405
   192
haftmann@38502
   193
  The first step is to decide on which representation the abstract
haftmann@66405
   194
  type (in our example @{typ "'a dlist"}) should be implemented.
haftmann@66405
   195
  Here we choose @{typ "'a list"}.  Then a conversion from the concrete
haftmann@38502
   196
  type to the abstract type must be specified, here:
haftmann@59377
   197
\<close>
haftmann@38502
   198
haftmann@59377
   199
text %quote \<open>
haftmann@38502
   200
  @{term_type Dlist}
haftmann@59377
   201
\<close>
haftmann@38502
   202
haftmann@59377
   203
text \<open>
haftmann@38502
   204
  \noindent Next follows the specification of a suitable \emph{projection},
haftmann@38502
   205
  i.e.~a conversion from abstract to concrete type:
haftmann@59377
   206
\<close>
haftmann@38502
   207
haftmann@59377
   208
text %quote \<open>
haftmann@38502
   209
  @{term_type list_of_dlist}
haftmann@59377
   210
\<close>
haftmann@38502
   211
haftmann@59377
   212
text \<open>
haftmann@38502
   213
  \noindent This projection must be specified such that the following
haftmann@38502
   214
  \emph{abstract datatype certificate} can be proven:
haftmann@59377
   215
\<close>
haftmann@38502
   216
haftmann@38502
   217
lemma %quote [code abstype]:
haftmann@38502
   218
  "Dlist (list_of_dlist dxs) = dxs"
haftmann@38502
   219
  by (fact Dlist_list_of_dlist)
haftmann@38502
   220
haftmann@59377
   221
text \<open>
haftmann@38502
   222
  \noindent Note that so far the invariant on representations
haftmann@38502
   223
  (@{term_type distinct}) has never been mentioned explicitly:
haftmann@38502
   224
  the invariant is only referred to implicitly: all values in
haftmann@38502
   225
  set @{term "{xs. list_of_dlist (Dlist xs) = xs}"} are invariant,
haftmann@38502
   226
  and in our example this is exactly @{term "{xs. distinct xs}"}.
haftmann@38502
   227
  
haftmann@38502
   228
  The primitive operations on @{typ "'a dlist"} are specified
haftmann@38502
   229
  indirectly using the projection @{const list_of_dlist}.  For
haftmann@38502
   230
  the empty @{text "dlist"}, @{const Dlist.empty}, we finally want
haftmann@38502
   231
  the code equation
haftmann@59377
   232
\<close>
haftmann@38502
   233
haftmann@59377
   234
text %quote \<open>
haftmann@38502
   235
  @{term "Dlist.empty = Dlist []"}
haftmann@59377
   236
\<close>
haftmann@38502
   237
haftmann@59377
   238
text \<open>
haftmann@38502
   239
  \noindent This we have to prove indirectly as follows:
haftmann@59377
   240
\<close>
haftmann@38502
   241
haftmann@52637
   242
lemma %quote [code]:
haftmann@38502
   243
  "list_of_dlist Dlist.empty = []"
haftmann@38502
   244
  by (fact list_of_dlist_empty)
haftmann@38502
   245
haftmann@59377
   246
text \<open>
haftmann@38502
   247
  \noindent This equation logically encodes both the desired code
haftmann@38502
   248
  equation and that the expression @{const Dlist} is applied to obeys
haftmann@38502
   249
  the implicit invariant.  Equations for insertion and removal are
haftmann@38502
   250
  similar:
haftmann@59377
   251
\<close>
haftmann@38502
   252
haftmann@52637
   253
lemma %quote [code]:
haftmann@38502
   254
  "list_of_dlist (Dlist.insert x dxs) = List.insert x (list_of_dlist dxs)"
haftmann@38502
   255
  by (fact list_of_dlist_insert)
haftmann@38502
   256
haftmann@52637
   257
lemma %quote [code]:
haftmann@38502
   258
  "list_of_dlist (Dlist.remove x dxs) = remove1 x (list_of_dlist dxs)"
haftmann@38502
   259
  by (fact list_of_dlist_remove)
haftmann@38502
   260
haftmann@59377
   261
text \<open>
haftmann@38502
   262
  \noindent Then the corresponding code is as follows:
haftmann@59377
   263
\<close>
haftmann@38502
   264
haftmann@59377
   265
text %quotetypewriter \<open>
haftmann@39683
   266
  @{code_stmts Dlist.empty Dlist.insert Dlist.remove list_of_dlist (Haskell)}
haftmann@59377
   267
\<close>
haftmann@38502
   268
haftmann@59377
   269
text \<open>
wenzelm@58620
   270
  See further @{cite "Haftmann-Kraus-Kuncar-Nipkow:2013:data_refinement"}
haftmann@53125
   271
  for the meta theory of datatype refinement involving invariants.
haftmann@53125
   272
haftmann@38502
   273
  Typical data structures implemented by representations involving
haftmann@46516
   274
  invariants are available in the library, theory @{theory Mapping}
haftmann@46516
   275
  specifies key-value-mappings (type @{typ "('a, 'b) mapping"});
haftmann@46516
   276
  these can be implemented by red-black-trees (theory @{theory RBT}).
haftmann@59377
   277
\<close>
haftmann@38437
   278
haftmann@38405
   279
end