| 8744 |      1 | (*<*)
 | 
| 16417 |      2 | theory CodeGen imports Main begin
 | 
| 8744 |      3 | (*>*)
 | 
|  |      4 | 
 | 
| 10885 |      5 | section{*Case Study: Compiling Expressions*}
 | 
| 9844 |      6 | 
 | 
|  |      7 | text{*\label{sec:ExprCompiler}
 | 
| 11458 |      8 | \index{compiling expressions example|(}%
 | 
| 8744 |      9 | The task is to develop a compiler from a generic type of expressions (built
 | 
| 10795 |     10 | from variables, constants and binary operations) to a stack machine.  This
 | 
| 8744 |     11 | generic type of expressions is a generalization of the boolean expressions in
 | 
|  |     12 | \S\ref{sec:boolex}.  This time we do not commit ourselves to a particular
 | 
|  |     13 | type of variables or values but make them type parameters.  Neither is there
 | 
|  |     14 | a fixed set of binary operations: instead the expression contains the
 | 
|  |     15 | appropriate function itself.
 | 
|  |     16 | *}
 | 
|  |     17 | 
 | 
| 10171 |     18 | types 'v binop = "'v \<Rightarrow> 'v \<Rightarrow> 'v";
 | 
| 8744 |     19 | datatype ('a,'v)expr = Cex 'v
 | 
|  |     20 |                      | Vex 'a
 | 
|  |     21 |                      | Bex "'v binop"  "('a,'v)expr"  "('a,'v)expr";
 | 
|  |     22 | 
 | 
|  |     23 | text{*\noindent
 | 
| 8771 |     24 | The three constructors represent constants, variables and the application of
 | 
|  |     25 | a binary operation to two subexpressions.
 | 
| 8744 |     26 | 
 | 
| 10795 |     27 | The value of an expression with respect to an environment that maps variables to
 | 
| 8744 |     28 | values is easily defined:
 | 
|  |     29 | *}
 | 
|  |     30 | 
 | 
| 27015 |     31 | primrec "value" :: "('a,'v)expr \<Rightarrow> ('a \<Rightarrow> 'v) \<Rightarrow> 'v" where
 | 
|  |     32 | "value (Cex v) env = v" |
 | 
|  |     33 | "value (Vex a) env = env a" |
 | 
|  |     34 | "value (Bex f e1 e2) env = f (value e1 env) (value e2 env)"
 | 
| 8744 |     35 | 
 | 
|  |     36 | text{*
 | 
|  |     37 | The stack machine has three instructions: load a constant value onto the
 | 
| 10795 |     38 | stack, load the contents of an address onto the stack, and apply a
 | 
| 8744 |     39 | binary operation to the two topmost elements of the stack, replacing them by
 | 
| 9792 |     40 | the result. As for @{text"expr"}, addresses and values are type parameters:
 | 
| 8744 |     41 | *}
 | 
|  |     42 | 
 | 
|  |     43 | datatype ('a,'v) instr = Const 'v
 | 
|  |     44 |                        | Load 'a
 | 
|  |     45 |                        | Apply "'v binop";
 | 
|  |     46 | 
 | 
|  |     47 | text{*
 | 
| 8771 |     48 | The execution of the stack machine is modelled by a function
 | 
| 9792 |     49 | @{text"exec"} that takes a list of instructions, a store (modelled as a
 | 
| 8771 |     50 | function from addresses to values, just like the environment for
 | 
|  |     51 | evaluating expressions), and a stack (modelled as a list) of values,
 | 
| 10971 |     52 | and returns the stack at the end of the execution --- the store remains
 | 
| 8771 |     53 | unchanged:
 | 
| 8744 |     54 | *}
 | 
|  |     55 | 
 | 
| 27015 |     56 | primrec exec :: "('a,'v)instr list \<Rightarrow> ('a\<Rightarrow>'v) \<Rightarrow> 'v list \<Rightarrow> 'v list"
 | 
|  |     57 | where
 | 
|  |     58 | "exec [] s vs = vs" |
 | 
| 8771 |     59 | "exec (i#is) s vs = (case i of
 | 
| 10171 |     60 |     Const v  \<Rightarrow> exec is s (v#vs)
 | 
|  |     61 |   | Load a   \<Rightarrow> exec is s ((s a)#vs)
 | 
| 27015 |     62 |   | Apply f  \<Rightarrow> exec is s ((f (hd vs) (hd(tl vs)))#(tl(tl vs))))"
 | 
| 8744 |     63 | 
 | 
|  |     64 | text{*\noindent
 | 
| 9792 |     65 | Recall that @{term"hd"} and @{term"tl"}
 | 
| 8744 |     66 | return the first element and the remainder of a list.
 | 
| 11458 |     67 | Because all functions are total, \cdx{hd} is defined even for the empty
 | 
| 8744 |     68 | list, although we do not know what the result is. Thus our model of the
 | 
| 10795 |     69 | machine always terminates properly, although the definition above does not
 | 
| 9792 |     70 | tell us much about the result in situations where @{term"Apply"} was executed
 | 
| 8744 |     71 | with fewer than two elements on the stack.
 | 
|  |     72 | 
 | 
|  |     73 | The compiler is a function from expressions to a list of instructions. Its
 | 
| 10795 |     74 | definition is obvious:
 | 
| 8744 |     75 | *}
 | 
|  |     76 | 
 | 
| 27015 |     77 | primrec compile :: "('a,'v)expr \<Rightarrow> ('a,'v)instr list" where
 | 
|  |     78 | "compile (Cex v)       = [Const v]" |
 | 
|  |     79 | "compile (Vex a)       = [Load a]" |
 | 
| 17212 |     80 | "compile (Bex f e1 e2) = (compile e2) @ (compile e1) @ [Apply f]"
 | 
| 8744 |     81 | 
 | 
|  |     82 | text{*
 | 
|  |     83 | Now we have to prove the correctness of the compiler, i.e.\ that the
 | 
|  |     84 | execution of a compiled expression results in the value of the expression:
 | 
|  |     85 | *}
 | 
| 17212 |     86 | theorem "exec (compile e) s [] = [value e s]";
 | 
| 8744 |     87 | (*<*)oops;(*>*)
 | 
|  |     88 | text{*\noindent
 | 
| 11458 |     89 | This theorem needs to be generalized:
 | 
| 8744 |     90 | *}
 | 
|  |     91 | 
 | 
| 17212 |     92 | theorem "\<forall>vs. exec (compile e) s vs = (value e s) # vs";
 | 
| 8744 |     93 | 
 | 
|  |     94 | txt{*\noindent
 | 
| 11458 |     95 | It will be proved by induction on @{term"e"} followed by simplification.  
 | 
|  |     96 | First, we must prove a lemma about executing the concatenation of two
 | 
| 8744 |     97 | instruction sequences:
 | 
|  |     98 | *}
 | 
|  |     99 | (*<*)oops;(*>*)
 | 
|  |    100 | lemma exec_app[simp]:
 | 
| 10171 |    101 |   "\<forall>vs. exec (xs@ys) s vs = exec ys s (exec xs s vs)"; 
 | 
| 8744 |    102 | 
 | 
|  |    103 | txt{*\noindent
 | 
| 9792 |    104 | This requires induction on @{term"xs"} and ordinary simplification for the
 | 
| 8744 |    105 | base cases. In the induction step, simplification leaves us with a formula
 | 
| 9792 |    106 | that contains two @{text"case"}-expressions over instructions. Thus we add
 | 
| 11458 |    107 | automatic case splitting, which finishes the proof:
 | 
| 8744 |    108 | *}
 | 
| 10171 |    109 | apply(induct_tac xs, simp, simp split: instr.split);
 | 
|  |    110 | (*<*)done(*>*)
 | 
| 8744 |    111 | text{*\noindent
 | 
| 11428 |    112 | Note that because both \methdx{simp_all} and \methdx{auto} perform simplification, they can
 | 
|  |    113 | be modified in the same way as @{text simp}.  Thus the proof can be
 | 
| 8744 |    114 | rewritten as
 | 
|  |    115 | *}
 | 
|  |    116 | (*<*)
 | 
| 9933 |    117 | declare exec_app[simp del];
 | 
| 10171 |    118 | lemma [simp]: "\<forall>vs. exec (xs@ys) s vs = exec ys s (exec xs s vs)"; 
 | 
| 8744 |    119 | (*>*)
 | 
| 10971 |    120 | apply(induct_tac xs, simp_all split: instr.split);
 | 
| 10171 |    121 | (*<*)done(*>*)
 | 
| 8744 |    122 | text{*\noindent
 | 
|  |    123 | Although this is more compact, it is less clear for the reader of the proof.
 | 
|  |    124 | 
 | 
| 17212 |    125 | We could now go back and prove @{prop"exec (compile e) s [] = [value e s]"}
 | 
| 8744 |    126 | merely by simplification with the generalized version we just proved.
 | 
|  |    127 | However, this is unnecessary because the generalized version fully subsumes
 | 
| 11458 |    128 | its instance.%
 | 
|  |    129 | \index{compiling expressions example|)}
 | 
| 8744 |    130 | *}
 | 
|  |    131 | (*<*)
 | 
| 17212 |    132 | theorem "\<forall>vs. exec (compile e) s vs = (value e s) # vs";
 | 
| 9458 |    133 | by(induct_tac e, auto);
 | 
| 8744 |    134 | end
 | 
|  |    135 | (*>*)
 |