| 8745 |      1 | (*<*)
 | 
|  |      2 | theory Trie = Main:;
 | 
|  |      3 | (*>*)
 | 
|  |      4 | text{*
 | 
|  |      5 | To minimize running time, each node of a trie should contain an array that maps
 | 
| 10795 |      6 | letters to subtries. We have chosen a
 | 
| 8745 |      7 | representation where the subtries are held in an association list, i.e.\ a
 | 
| 9792 |      8 | list of (letter,trie) pairs.  Abstracting over the alphabet @{typ"'a"} and the
 | 
|  |      9 | values @{typ"'v"} we define a trie as follows:
 | 
| 9458 |     10 | *};
 | 
| 8745 |     11 | 
 | 
|  |     12 | datatype ('a,'v)trie = Trie  "'v option"  "('a * ('a,'v)trie)list";
 | 
|  |     13 | 
 | 
|  |     14 | text{*\noindent
 | 
| 11458 |     15 | \index{datatypes!and nested recursion}%
 | 
| 8745 |     16 | The first component is the optional value, the second component the
 | 
|  |     17 | association list of subtries.  This is an example of nested recursion involving products,
 | 
|  |     18 | which is fine because products are datatypes as well.
 | 
|  |     19 | We define two selector functions:
 | 
| 9458 |     20 | *};
 | 
| 8745 |     21 | 
 | 
| 10171 |     22 | consts value :: "('a,'v)trie \<Rightarrow> 'v option"
 | 
|  |     23 |        alist :: "('a,'v)trie \<Rightarrow> ('a * ('a,'v)trie)list";
 | 
| 8745 |     24 | primrec "value(Trie ov al) = ov";
 | 
|  |     25 | primrec "alist(Trie ov al) = al";
 | 
|  |     26 | 
 | 
|  |     27 | text{*\noindent
 | 
| 11309 |     28 | Association lists come with a generic lookup function.  Its result
 | 
|  |     29 | involves type @{text option} because a lookup can fail:
 | 
| 9458 |     30 | *};
 | 
| 8745 |     31 | 
 | 
| 10171 |     32 | consts   assoc :: "('key * 'val)list \<Rightarrow> 'key \<Rightarrow> 'val option";
 | 
| 8745 |     33 | primrec "assoc [] x = None"
 | 
|  |     34 |         "assoc (p#ps) x =
 | 
|  |     35 |            (let (a,b) = p in if a=x then Some b else assoc ps x)";
 | 
|  |     36 | 
 | 
|  |     37 | text{*
 | 
|  |     38 | Now we can define the lookup function for tries. It descends into the trie
 | 
|  |     39 | examining the letters of the search string one by one. As
 | 
|  |     40 | recursion on lists is simpler than on tries, let us express this as primitive
 | 
|  |     41 | recursion on the search string argument:
 | 
| 9458 |     42 | *};
 | 
| 8745 |     43 | 
 | 
| 10171 |     44 | consts   lookup :: "('a,'v)trie \<Rightarrow> 'a list \<Rightarrow> 'v option";
 | 
| 8745 |     45 | primrec "lookup t [] = value t"
 | 
|  |     46 |         "lookup t (a#as) = (case assoc (alist t) a of
 | 
| 10171 |     47 |                               None \<Rightarrow> None
 | 
|  |     48 |                             | Some at \<Rightarrow> lookup at as)";
 | 
| 8745 |     49 | 
 | 
|  |     50 | text{*
 | 
|  |     51 | As a first simple property we prove that looking up a string in the empty
 | 
| 10171 |     52 | trie @{term"Trie None []"} always returns @{term None}. The proof merely
 | 
| 8745 |     53 | distinguishes the two cases whether the search string is empty or not:
 | 
| 9458 |     54 | *};
 | 
| 8745 |     55 | 
 | 
|  |     56 | lemma [simp]: "lookup (Trie None []) as = None";
 | 
| 10171 |     57 | apply(case_tac as, simp_all);
 | 
|  |     58 | done
 | 
| 8745 |     59 | 
 | 
|  |     60 | text{*
 | 
|  |     61 | Things begin to get interesting with the definition of an update function
 | 
| 12328 |     62 | that adds a new (string, value) pair to a trie, overwriting the old value
 | 
| 8745 |     63 | associated with that string:
 | 
| 9458 |     64 | *};
 | 
| 8745 |     65 | 
 | 
| 10171 |     66 | consts update :: "('a,'v)trie \<Rightarrow> 'a list \<Rightarrow> 'v \<Rightarrow> ('a,'v)trie";
 | 
| 8745 |     67 | primrec
 | 
|  |     68 |   "update t []     v = Trie (Some v) (alist t)"
 | 
|  |     69 |   "update t (a#as) v =
 | 
|  |     70 |      (let tt = (case assoc (alist t) a of
 | 
| 10171 |     71 |                   None \<Rightarrow> Trie None [] | Some at \<Rightarrow> at)
 | 
| 10795 |     72 |       in Trie (value t) ((a,update tt as v) # alist t))";
 | 
| 8745 |     73 | 
 | 
|  |     74 | text{*\noindent
 | 
|  |     75 | The base case is obvious. In the recursive case the subtrie
 | 
| 10171 |     76 | @{term tt} associated with the first letter @{term a} is extracted,
 | 
| 8745 |     77 | recursively updated, and then placed in front of the association list.
 | 
| 10171 |     78 | The old subtrie associated with @{term a} is still in the association list
 | 
|  |     79 | but no longer accessible via @{term assoc}. Clearly, there is room here for
 | 
| 8745 |     80 | optimizations!
 | 
|  |     81 | 
 | 
| 10171 |     82 | Before we start on any proofs about @{term update} we tell the simplifier to
 | 
|  |     83 | expand all @{text let}s and to split all @{text case}-constructs over
 | 
| 8745 |     84 | options:
 | 
| 9458 |     85 | *};
 | 
| 8745 |     86 | 
 | 
| 9933 |     87 | declare Let_def[simp] option.split[split]
 | 
| 8745 |     88 | 
 | 
|  |     89 | text{*\noindent
 | 
|  |     90 | The reason becomes clear when looking (probably after a failed proof
 | 
| 10171 |     91 | attempt) at the body of @{term update}: it contains both
 | 
|  |     92 | @{text let} and a case distinction over type @{text option}.
 | 
| 8745 |     93 | 
 | 
| 10171 |     94 | Our main goal is to prove the correct interaction of @{term update} and
 | 
|  |     95 | @{term lookup}:
 | 
| 9458 |     96 | *};
 | 
| 8745 |     97 | 
 | 
| 10171 |     98 | theorem "\<forall>t v bs. lookup (update t as v) bs =
 | 
| 8745 |     99 |                     (if as=bs then Some v else lookup t bs)";
 | 
|  |    100 | 
 | 
|  |    101 | txt{*\noindent
 | 
| 10171 |    102 | Our plan is to induct on @{term as}; hence the remaining variables are
 | 
| 8745 |    103 | quantified. From the definitions it is clear that induction on either
 | 
| 11458 |    104 | @{term as} or @{term bs} is required. The choice of @{term as} is 
 | 
| 10171 |    105 | guided by the intuition that simplification of @{term lookup} might be easier
 | 
|  |    106 | if @{term update} has already been simplified, which can only happen if
 | 
|  |    107 | @{term as} is instantiated.
 | 
| 11458 |    108 | The start of the proof is conventional:
 | 
| 9458 |    109 | *};
 | 
| 8771 |    110 | apply(induct_tac as, auto);
 | 
| 8745 |    111 | 
 | 
|  |    112 | txt{*\noindent
 | 
|  |    113 | Unfortunately, this time we are left with three intimidating looking subgoals:
 | 
| 9723 |    114 | \begin{isabelle}
 | 
| 8745 |    115 | ~1.~\dots~{\isasymLongrightarrow}~lookup~\dots~bs~=~lookup~t~bs\isanewline
 | 
|  |    116 | ~2.~\dots~{\isasymLongrightarrow}~lookup~\dots~bs~=~lookup~t~bs\isanewline
 | 
| 9792 |    117 | ~3.~\dots~{\isasymLongrightarrow}~lookup~\dots~bs~=~lookup~t~bs
 | 
| 9723 |    118 | \end{isabelle}
 | 
| 10171 |    119 | Clearly, if we want to make headway we have to instantiate @{term bs} as
 | 
| 8745 |    120 | well now. It turns out that instead of induction, case distinction
 | 
|  |    121 | suffices:
 | 
| 9458 |    122 | *};
 | 
| 10171 |    123 | apply(case_tac[!] bs, auto);
 | 
|  |    124 | done
 | 
| 8745 |    125 | 
 | 
|  |    126 | text{*\noindent
 | 
| 11458 |    127 | \index{subgoal numbering}%
 | 
| 10171 |    128 | All methods ending in @{text tac} take an optional first argument that
 | 
| 9792 |    129 | specifies the range of subgoals they are applied to, where @{text"[!]"} means
 | 
|  |    130 | all subgoals, i.e.\ @{text"[1-3]"} in our case. Individual subgoal numbers,
 | 
|  |    131 | e.g. @{text"[2]"} are also allowed.
 | 
| 8745 |    132 | 
 | 
|  |    133 | This proof may look surprisingly straightforward. However, note that this
 | 
| 9792 |    134 | comes at a cost: the proof script is unreadable because the intermediate
 | 
|  |    135 | proof states are invisible, and we rely on the (possibly brittle) magic of
 | 
| 10971 |    136 | @{text auto} (@{text simp_all} will not do --- try it) to split the subgoals
 | 
| 10171 |    137 | of the induction up in such a way that case distinction on @{term bs} makes
 | 
| 11458 |    138 | sense and solves the proof. 
 | 
| 11294 |    139 | 
 | 
|  |    140 | \begin{exercise}
 | 
|  |    141 |   Modify @{term update} (and its type) such that it allows both insertion and
 | 
| 11303 |    142 |   deletion of entries with a single function.  Prove the corresponding version 
 | 
|  |    143 |   of the main theorem above.
 | 
|  |    144 |   Optimize your function such that it shrinks tries after
 | 
|  |    145 |   deletion if possible.
 | 
| 11294 |    146 | \end{exercise}
 | 
|  |    147 | 
 | 
|  |    148 | \begin{exercise}
 | 
|  |    149 |   Write an improved version of @{term update} that does not suffer from the
 | 
|  |    150 |   space leak (pointed out above) caused by not deleting overwritten entries
 | 
|  |    151 |   from the association list. Prove the main theorem for your improved
 | 
|  |    152 |   @{term update}.
 | 
|  |    153 | \end{exercise}
 | 
|  |    154 | 
 | 
|  |    155 | \begin{exercise}
 | 
|  |    156 |   Conceptually, each node contains a mapping from letters to optional
 | 
|  |    157 |   subtries. Above we have implemented this by means of an association
 | 
|  |    158 |   list. Replay the development replacing @{typ "('a * ('a,'v)trie)list"}
 | 
|  |    159 |   with @{typ"'a \<Rightarrow> ('a,'v)trie option"}.
 | 
|  |    160 | \end{exercise}
 | 
|  |    161 | 
 | 
| 9458 |    162 | *};
 | 
| 8745 |    163 | 
 | 
|  |    164 | (*<*)
 | 
| 11294 |    165 | 
 | 
|  |    166 | (* Exercise 1. Solution by Getrud Bauer *)
 | 
|  |    167 | 
 | 
|  |    168 | consts update1 :: "('a, 'v) trie \<Rightarrow> 'a list \<Rightarrow> 'v option \<Rightarrow> ('a, 'v) trie"
 | 
|  |    169 | primrec
 | 
|  |    170 |   "update1 t []     vo = Trie vo (alist t)"
 | 
|  |    171 |   "update1 t (a#as) vo =
 | 
|  |    172 |      (let tt = (case assoc (alist t) a of
 | 
|  |    173 |                   None \<Rightarrow> Trie None [] 
 | 
|  |    174 |                 | Some at \<Rightarrow> at)
 | 
|  |    175 |       in Trie (value t) ((a, update1 tt as vo) # alist t))"
 | 
|  |    176 | 
 | 
|  |    177 | theorem [simp]: "\<forall>t v bs. lookup (update1 t as v) bs =
 | 
|  |    178 |                     (if as = bs then v else lookup t bs)";
 | 
|  |    179 | apply (induct_tac as, auto);
 | 
|  |    180 | apply (case_tac[!] bs, auto);
 | 
|  |    181 | done
 | 
|  |    182 | 
 | 
|  |    183 | 
 | 
|  |    184 | (* Exercise 2. Solution by Getrud Bauer *)
 | 
|  |    185 | 
 | 
|  |    186 | consts overwrite :: "'a \<Rightarrow> 'b \<Rightarrow> ('a * 'b) list \<Rightarrow> ('a * 'b) list"
 | 
|  |    187 | primrec
 | 
|  |    188 |   "overwrite a v [] = [(a,v)]"
 | 
|  |    189 |   "overwrite a v (p#ps) = (if a = fst p then (a,v)#ps else p # overwrite a v ps)"
 | 
|  |    190 | 
 | 
|  |    191 | lemma [simp]: "\<forall> a v b. assoc (overwrite a v ps) b = assoc ((a,v)#ps) b"
 | 
|  |    192 | apply (induct_tac ps, auto)
 | 
|  |    193 | apply (case_tac[!] a)
 | 
|  |    194 | done
 | 
|  |    195 | 
 | 
|  |    196 | consts update2 :: "('a, 'v) trie \<Rightarrow> 'a list \<Rightarrow> 'v option \<Rightarrow> ('a, 'v) trie";
 | 
|  |    197 | primrec
 | 
|  |    198 |   "update2 t []     vo = Trie vo (alist t)"
 | 
|  |    199 |   "update2 t (a#as) vo =
 | 
|  |    200 |      (let tt = (case assoc (alist t) a of 
 | 
|  |    201 |                   None \<Rightarrow> Trie None []  
 | 
|  |    202 |                 | Some at \<Rightarrow> at) 
 | 
|  |    203 |       in Trie (value t) (overwrite a (update2 tt as vo) (alist t)))"; 
 | 
|  |    204 | 
 | 
|  |    205 | theorem "\<forall>t v bs. lookup (update2 t as vo) bs =
 | 
|  |    206 |                     (if as = bs then vo else lookup t bs)";
 | 
|  |    207 | apply (induct_tac as, auto);
 | 
|  |    208 | apply (case_tac[!] bs, auto);
 | 
|  |    209 | done
 | 
|  |    210 | 
 | 
|  |    211 | 
 | 
|  |    212 | (* Exercise 3. Solution by Getrud Bauer *)
 | 
|  |    213 | datatype ('a,'v) triem = Triem  "'v option" "'a \<Rightarrow> ('a,'v) triem option";
 | 
|  |    214 | 
 | 
|  |    215 | consts valuem :: "('a, 'v) triem \<Rightarrow> 'v option"
 | 
|  |    216 | primrec "valuem (Triem ov m) = ov";
 | 
|  |    217 | 
 | 
|  |    218 | consts mapping :: "('a,'v) triem \<Rightarrow> 'a \<Rightarrow> ('a, 'v) triem option";
 | 
|  |    219 | primrec "mapping (Triem ov m) = m";
 | 
|  |    220 | 
 | 
|  |    221 | consts lookupm :: "('a,'v) triem \<Rightarrow> 'a list \<Rightarrow> 'v option"
 | 
|  |    222 | primrec
 | 
|  |    223 |   "lookupm t [] = valuem t"
 | 
|  |    224 |   "lookupm t (a#as) = (case mapping t a of
 | 
|  |    225 |                         None \<Rightarrow> None
 | 
|  |    226 |                       | Some at \<Rightarrow> lookupm at as)";
 | 
|  |    227 | 
 | 
|  |    228 | lemma [simp]: "lookupm (Triem None  (\<lambda>c. None)) as = None";
 | 
|  |    229 | apply (case_tac as, simp_all);
 | 
|  |    230 | done
 | 
|  |    231 | 
 | 
|  |    232 | consts updatem :: "('a,'v)triem \<Rightarrow> 'a list \<Rightarrow> 'v \<Rightarrow> ('a,'v)triem"
 | 
|  |    233 | primrec
 | 
|  |    234 |   "updatem t []     v = Triem (Some v) (mapping t)"
 | 
|  |    235 |   "updatem t (a#as) v =
 | 
|  |    236 |      (let tt = (case mapping t a of
 | 
|  |    237 |                   None \<Rightarrow> Triem None (\<lambda>c. None) 
 | 
|  |    238 |                 | Some at \<Rightarrow> at)
 | 
|  |    239 |       in Triem (valuem t) 
 | 
|  |    240 |               (\<lambda>c. if c = a then Some (updatem tt as v) else mapping t c))";
 | 
|  |    241 | 
 | 
|  |    242 | theorem "\<forall>t v bs. lookupm (updatem t as v) bs = 
 | 
|  |    243 |                     (if as = bs then Some v else lookupm t bs)";
 | 
|  |    244 | apply (induct_tac as, auto);
 | 
|  |    245 | apply (case_tac[!] bs, auto);
 | 
|  |    246 | done
 | 
|  |    247 | 
 | 
| 9458 |    248 | end;
 | 
| 8745 |    249 | (*>*)
 |