doc-src/Codegen/Refinement.thy
changeset 48951 b9238cbcdd41
parent 46516 92f981f4a61b
equal deleted inserted replaced
48950:9965099f51ad 48951:b9238cbcdd41
       
     1 theory Refinement
       
     2 imports Setup
       
     3 begin
       
     4 
       
     5 section {* Program and datatype refinement \label{sec:refinement} *}
       
     6 
       
     7 text {*
       
     8   Code generation by shallow embedding (cf.~\secref{sec:principle})
       
     9   allows to choose code equations and datatype constructors freely,
       
    10   given that some very basic syntactic properties are met; this
       
    11   flexibility opens up mechanisms for refinement which allow to extend
       
    12   the scope and quality of generated code dramatically.
       
    13 *}
       
    14 
       
    15 
       
    16 subsection {* Program refinement *}
       
    17 
       
    18 text {*
       
    19   Program refinement works by choosing appropriate code equations
       
    20   explicitly (cf.~\secref{sec:equations}); as example, we use Fibonacci
       
    21   numbers:
       
    22 *}
       
    23 
       
    24 fun %quote fib :: "nat \<Rightarrow> nat" where
       
    25     "fib 0 = 0"
       
    26   | "fib (Suc 0) = Suc 0"
       
    27   | "fib (Suc (Suc n)) = fib n + fib (Suc n)"
       
    28 
       
    29 text {*
       
    30   \noindent The runtime of the corresponding code grows exponential due
       
    31   to two recursive calls:
       
    32 *}
       
    33 
       
    34 text %quotetypewriter {*
       
    35   @{code_stmts fib (consts) fib (Haskell)}
       
    36 *}
       
    37 
       
    38 text {*
       
    39   \noindent A more efficient implementation would use dynamic
       
    40   programming, e.g.~sharing of common intermediate results between
       
    41   recursive calls.  This idea is expressed by an auxiliary operation
       
    42   which computes a Fibonacci number and its successor simultaneously:
       
    43 *}
       
    44 
       
    45 definition %quote fib_step :: "nat \<Rightarrow> nat \<times> nat" where
       
    46   "fib_step n = (fib (Suc n), fib n)"
       
    47 
       
    48 text {*
       
    49   \noindent This operation can be implemented by recursion using
       
    50   dynamic programming:
       
    51 *}
       
    52 
       
    53 lemma %quote [code]:
       
    54   "fib_step 0 = (Suc 0, 0)"
       
    55   "fib_step (Suc n) = (let (m, q) = fib_step n in (m + q, m))"
       
    56   by (simp_all add: fib_step_def)
       
    57 
       
    58 text {*
       
    59   \noindent What remains is to implement @{const fib} by @{const
       
    60   fib_step} as follows:
       
    61 *}
       
    62 
       
    63 lemma %quote [code]:
       
    64   "fib 0 = 0"
       
    65   "fib (Suc n) = fst (fib_step n)"
       
    66   by (simp_all add: fib_step_def)
       
    67 
       
    68 text {*
       
    69   \noindent The resulting code shows only linear growth of runtime:
       
    70 *}
       
    71 
       
    72 text %quotetypewriter {*
       
    73   @{code_stmts fib (consts) fib fib_step (Haskell)}
       
    74 *}
       
    75 
       
    76 
       
    77 subsection {* Datatype refinement *}
       
    78 
       
    79 text {*
       
    80   Selecting specific code equations \emph{and} datatype constructors
       
    81   leads to datatype refinement.  As an example, we will develop an
       
    82   alternative representation of the queue example given in
       
    83   \secref{sec:queue_example}.  The amortised representation is
       
    84   convenient for generating code but exposes its \qt{implementation}
       
    85   details, which may be cumbersome when proving theorems about it.
       
    86   Therefore, here is a simple, straightforward representation of
       
    87   queues:
       
    88 *}
       
    89 
       
    90 datatype %quote 'a queue = Queue "'a list"
       
    91 
       
    92 definition %quote empty :: "'a queue" where
       
    93   "empty = Queue []"
       
    94 
       
    95 primrec %quote enqueue :: "'a \<Rightarrow> 'a queue \<Rightarrow> 'a queue" where
       
    96   "enqueue x (Queue xs) = Queue (xs @ [x])"
       
    97 
       
    98 fun %quote dequeue :: "'a queue \<Rightarrow> 'a option \<times> 'a queue" where
       
    99     "dequeue (Queue []) = (None, Queue [])"
       
   100   | "dequeue (Queue (x # xs)) = (Some x, Queue xs)"
       
   101 
       
   102 text {*
       
   103   \noindent This we can use directly for proving;  for executing,
       
   104   we provide an alternative characterisation:
       
   105 *}
       
   106 
       
   107 definition %quote AQueue :: "'a list \<Rightarrow> 'a list \<Rightarrow> 'a queue" where
       
   108   "AQueue xs ys = Queue (ys @ rev xs)"
       
   109 
       
   110 code_datatype %quote AQueue
       
   111 
       
   112 text {*
       
   113   \noindent Here we define a \qt{constructor} @{const "AQueue"} which
       
   114   is defined in terms of @{text "Queue"} and interprets its arguments
       
   115   according to what the \emph{content} of an amortised queue is supposed
       
   116   to be.
       
   117 
       
   118   The prerequisite for datatype constructors is only syntactical: a
       
   119   constructor must be of type @{text "\<tau> = \<dots> \<Rightarrow> \<kappa> \<alpha>\<^isub>1 \<dots> \<alpha>\<^isub>n"} where @{text
       
   120   "{\<alpha>\<^isub>1, \<dots>, \<alpha>\<^isub>n}"} is exactly the set of \emph{all} type variables in
       
   121   @{text "\<tau>"}; then @{text "\<kappa>"} is its corresponding datatype.  The
       
   122   HOL datatype package by default registers any new datatype with its
       
   123   constructors, but this may be changed using @{command_def
       
   124   code_datatype}; the currently chosen constructors can be inspected
       
   125   using the @{command print_codesetup} command.
       
   126 
       
   127   Equipped with this, we are able to prove the following equations
       
   128   for our primitive queue operations which \qt{implement} the simple
       
   129   queues in an amortised fashion:
       
   130 *}
       
   131 
       
   132 lemma %quote empty_AQueue [code]:
       
   133   "empty = AQueue [] []"
       
   134   by (simp add: AQueue_def empty_def)
       
   135 
       
   136 lemma %quote enqueue_AQueue [code]:
       
   137   "enqueue x (AQueue xs ys) = AQueue (x # xs) ys"
       
   138   by (simp add: AQueue_def)
       
   139 
       
   140 lemma %quote dequeue_AQueue [code]:
       
   141   "dequeue (AQueue xs []) =
       
   142     (if xs = [] then (None, AQueue [] [])
       
   143     else dequeue (AQueue [] (rev xs)))"
       
   144   "dequeue (AQueue xs (y # ys)) = (Some y, AQueue xs ys)"
       
   145   by (simp_all add: AQueue_def)
       
   146 
       
   147 text {*
       
   148   \noindent It is good style, although no absolute requirement, to
       
   149   provide code equations for the original artefacts of the implemented
       
   150   type, if possible; in our case, these are the datatype constructor
       
   151   @{const Queue} and the case combinator @{const queue_case}:
       
   152 *}
       
   153 
       
   154 lemma %quote Queue_AQueue [code]:
       
   155   "Queue = AQueue []"
       
   156   by (simp add: AQueue_def fun_eq_iff)
       
   157 
       
   158 lemma %quote queue_case_AQueue [code]:
       
   159   "queue_case f (AQueue xs ys) = f (ys @ rev xs)"
       
   160   by (simp add: AQueue_def)
       
   161 
       
   162 text {*
       
   163   \noindent The resulting code looks as expected:
       
   164 *}
       
   165 
       
   166 text %quotetypewriter {*
       
   167   @{code_stmts empty enqueue dequeue Queue queue_case (SML)}
       
   168 *}
       
   169 
       
   170 text {*
       
   171   The same techniques can also be applied to types which are not
       
   172   specified as datatypes, e.g.~type @{typ int} is originally specified
       
   173   as quotient type by means of @{command_def typedef}, but for code
       
   174   generation constants allowing construction of binary numeral values
       
   175   are used as constructors for @{typ int}.
       
   176 
       
   177   This approach however fails if the representation of a type demands
       
   178   invariants; this issue is discussed in the next section.
       
   179 *}
       
   180 
       
   181 
       
   182 subsection {* Datatype refinement involving invariants \label{sec:invariant} *}
       
   183 
       
   184 text {*
       
   185   Datatype representation involving invariants require a dedicated
       
   186   setup for the type and its primitive operations.  As a running
       
   187   example, we implement a type @{text "'a dlist"} of list consisting
       
   188   of distinct elements.
       
   189 
       
   190   The first step is to decide on which representation the abstract
       
   191   type (in our example @{text "'a dlist"}) should be implemented.
       
   192   Here we choose @{text "'a list"}.  Then a conversion from the concrete
       
   193   type to the abstract type must be specified, here:
       
   194 *}
       
   195 
       
   196 text %quote {*
       
   197   @{term_type Dlist}
       
   198 *}
       
   199 
       
   200 text {*
       
   201   \noindent Next follows the specification of a suitable \emph{projection},
       
   202   i.e.~a conversion from abstract to concrete type:
       
   203 *}
       
   204 
       
   205 text %quote {*
       
   206   @{term_type list_of_dlist}
       
   207 *}
       
   208 
       
   209 text {*
       
   210   \noindent This projection must be specified such that the following
       
   211   \emph{abstract datatype certificate} can be proven:
       
   212 *}
       
   213 
       
   214 lemma %quote [code abstype]:
       
   215   "Dlist (list_of_dlist dxs) = dxs"
       
   216   by (fact Dlist_list_of_dlist)
       
   217 
       
   218 text {*
       
   219   \noindent Note that so far the invariant on representations
       
   220   (@{term_type distinct}) has never been mentioned explicitly:
       
   221   the invariant is only referred to implicitly: all values in
       
   222   set @{term "{xs. list_of_dlist (Dlist xs) = xs}"} are invariant,
       
   223   and in our example this is exactly @{term "{xs. distinct xs}"}.
       
   224   
       
   225   The primitive operations on @{typ "'a dlist"} are specified
       
   226   indirectly using the projection @{const list_of_dlist}.  For
       
   227   the empty @{text "dlist"}, @{const Dlist.empty}, we finally want
       
   228   the code equation
       
   229 *}
       
   230 
       
   231 text %quote {*
       
   232   @{term "Dlist.empty = Dlist []"}
       
   233 *}
       
   234 
       
   235 text {*
       
   236   \noindent This we have to prove indirectly as follows:
       
   237 *}
       
   238 
       
   239 lemma %quote [code abstract]:
       
   240   "list_of_dlist Dlist.empty = []"
       
   241   by (fact list_of_dlist_empty)
       
   242 
       
   243 text {*
       
   244   \noindent This equation logically encodes both the desired code
       
   245   equation and that the expression @{const Dlist} is applied to obeys
       
   246   the implicit invariant.  Equations for insertion and removal are
       
   247   similar:
       
   248 *}
       
   249 
       
   250 lemma %quote [code abstract]:
       
   251   "list_of_dlist (Dlist.insert x dxs) = List.insert x (list_of_dlist dxs)"
       
   252   by (fact list_of_dlist_insert)
       
   253 
       
   254 lemma %quote [code abstract]:
       
   255   "list_of_dlist (Dlist.remove x dxs) = remove1 x (list_of_dlist dxs)"
       
   256   by (fact list_of_dlist_remove)
       
   257 
       
   258 text {*
       
   259   \noindent Then the corresponding code is as follows:
       
   260 *}
       
   261 
       
   262 text %quotetypewriter {*
       
   263   @{code_stmts Dlist.empty Dlist.insert Dlist.remove list_of_dlist (Haskell)}
       
   264 *} (*(types) dlist (consts) dempty dinsert dremove list_of List.member insert remove *)
       
   265 
       
   266 text {*
       
   267   Typical data structures implemented by representations involving
       
   268   invariants are available in the library, theory @{theory Mapping}
       
   269   specifies key-value-mappings (type @{typ "('a, 'b) mapping"});
       
   270   these can be implemented by red-black-trees (theory @{theory RBT}).
       
   271 *}
       
   272 
       
   273 end
       
   274