| 8745 |      1 | (*<*)
 | 
| 16417 |      2 | theory Trie imports Main begin;
 | 
| 8745 |      3 | (*>*)
 | 
|  |      4 | text{*
 | 
|  |      5 | To minimize running time, each node of a trie should contain an array that maps
 | 
| 10795 |      6 | letters to subtries. We have chosen a
 | 
| 8745 |      7 | representation where the subtries are held in an association list, i.e.\ a
 | 
| 9792 |      8 | list of (letter,trie) pairs.  Abstracting over the alphabet @{typ"'a"} and the
 | 
|  |      9 | values @{typ"'v"} we define a trie as follows:
 | 
| 9458 |     10 | *};
 | 
| 8745 |     11 | 
 | 
|  |     12 | datatype ('a,'v)trie = Trie  "'v option"  "('a * ('a,'v)trie)list";
 | 
|  |     13 | 
 | 
|  |     14 | text{*\noindent
 | 
| 11458 |     15 | \index{datatypes!and nested recursion}%
 | 
| 8745 |     16 | The first component is the optional value, the second component the
 | 
|  |     17 | association list of subtries.  This is an example of nested recursion involving products,
 | 
|  |     18 | which is fine because products are datatypes as well.
 | 
|  |     19 | We define two selector functions:
 | 
| 9458 |     20 | *};
 | 
| 8745 |     21 | 
 | 
| 27015 |     22 | primrec "value" :: "('a,'v)trie \<Rightarrow> 'v option" where
 | 
|  |     23 | "value(Trie ov al) = ov"
 | 
|  |     24 | primrec alist :: "('a,'v)trie \<Rightarrow> ('a * ('a,'v)trie)list" where
 | 
|  |     25 | "alist(Trie ov al) = al"
 | 
| 8745 |     26 | 
 | 
|  |     27 | text{*\noindent
 | 
| 11309 |     28 | Association lists come with a generic lookup function.  Its result
 | 
|  |     29 | involves type @{text option} because a lookup can fail:
 | 
| 9458 |     30 | *};
 | 
| 8745 |     31 | 
 | 
| 27015 |     32 | primrec assoc :: "('key * 'val)list \<Rightarrow> 'key \<Rightarrow> 'val option" where
 | 
|  |     33 | "assoc [] x = None" |
 | 
|  |     34 | "assoc (p#ps) x =
 | 
|  |     35 |    (let (a,b) = p in if a=x then Some b else assoc ps x)"
 | 
| 8745 |     36 | 
 | 
|  |     37 | text{*
 | 
|  |     38 | Now we can define the lookup function for tries. It descends into the trie
 | 
|  |     39 | examining the letters of the search string one by one. As
 | 
|  |     40 | recursion on lists is simpler than on tries, let us express this as primitive
 | 
|  |     41 | recursion on the search string argument:
 | 
| 9458 |     42 | *};
 | 
| 8745 |     43 | 
 | 
| 27015 |     44 | primrec lookup :: "('a,'v)trie \<Rightarrow> 'a list \<Rightarrow> 'v option" where
 | 
|  |     45 | "lookup t [] = value t" |
 | 
|  |     46 | "lookup t (a#as) = (case assoc (alist t) a of
 | 
|  |     47 |                       None \<Rightarrow> None
 | 
|  |     48 |                     | Some at \<Rightarrow> lookup at as)"
 | 
| 8745 |     49 | 
 | 
|  |     50 | text{*
 | 
|  |     51 | As a first simple property we prove that looking up a string in the empty
 | 
| 15905 |     52 | trie @{term"Trie None []"} always returns @{const None}. The proof merely
 | 
| 8745 |     53 | distinguishes the two cases whether the search string is empty or not:
 | 
| 9458 |     54 | *};
 | 
| 8745 |     55 | 
 | 
|  |     56 | lemma [simp]: "lookup (Trie None []) as = None";
 | 
| 10171 |     57 | apply(case_tac as, simp_all);
 | 
|  |     58 | done
 | 
| 8745 |     59 | 
 | 
|  |     60 | text{*
 | 
|  |     61 | Things begin to get interesting with the definition of an update function
 | 
| 12328 |     62 | that adds a new (string, value) pair to a trie, overwriting the old value
 | 
| 8745 |     63 | associated with that string:
 | 
| 9458 |     64 | *};
 | 
| 8745 |     65 | 
 | 
| 27015 |     66 | primrec update:: "('a,'v)trie \<Rightarrow> 'a list \<Rightarrow> 'v \<Rightarrow> ('a,'v)trie" where
 | 
|  |     67 | "update t []     v = Trie (Some v) (alist t)" |
 | 
|  |     68 | "update t (a#as) v =
 | 
|  |     69 |    (let tt = (case assoc (alist t) a of
 | 
|  |     70 |                 None \<Rightarrow> Trie None [] | Some at \<Rightarrow> at)
 | 
|  |     71 |     in Trie (value t) ((a,update tt as v) # alist t))"
 | 
| 8745 |     72 | 
 | 
|  |     73 | text{*\noindent
 | 
|  |     74 | The base case is obvious. In the recursive case the subtrie
 | 
| 10171 |     75 | @{term tt} associated with the first letter @{term a} is extracted,
 | 
| 8745 |     76 | recursively updated, and then placed in front of the association list.
 | 
| 10171 |     77 | The old subtrie associated with @{term a} is still in the association list
 | 
| 15905 |     78 | but no longer accessible via @{const assoc}. Clearly, there is room here for
 | 
| 8745 |     79 | optimizations!
 | 
|  |     80 | 
 | 
| 15905 |     81 | Before we start on any proofs about @{const update} we tell the simplifier to
 | 
| 10171 |     82 | expand all @{text let}s and to split all @{text case}-constructs over
 | 
| 8745 |     83 | options:
 | 
| 9458 |     84 | *};
 | 
| 8745 |     85 | 
 | 
| 9933 |     86 | declare Let_def[simp] option.split[split]
 | 
| 8745 |     87 | 
 | 
|  |     88 | text{*\noindent
 | 
|  |     89 | The reason becomes clear when looking (probably after a failed proof
 | 
| 15905 |     90 | attempt) at the body of @{const update}: it contains both
 | 
| 10171 |     91 | @{text let} and a case distinction over type @{text option}.
 | 
| 8745 |     92 | 
 | 
| 15905 |     93 | Our main goal is to prove the correct interaction of @{const update} and
 | 
|  |     94 | @{const lookup}:
 | 
| 9458 |     95 | *};
 | 
| 8745 |     96 | 
 | 
| 10171 |     97 | theorem "\<forall>t v bs. lookup (update t as v) bs =
 | 
| 8745 |     98 |                     (if as=bs then Some v else lookup t bs)";
 | 
|  |     99 | 
 | 
|  |    100 | txt{*\noindent
 | 
| 10171 |    101 | Our plan is to induct on @{term as}; hence the remaining variables are
 | 
| 8745 |    102 | quantified. From the definitions it is clear that induction on either
 | 
| 11458 |    103 | @{term as} or @{term bs} is required. The choice of @{term as} is 
 | 
| 15905 |    104 | guided by the intuition that simplification of @{const lookup} might be easier
 | 
|  |    105 | if @{const update} has already been simplified, which can only happen if
 | 
| 10171 |    106 | @{term as} is instantiated.
 | 
| 11458 |    107 | The start of the proof is conventional:
 | 
| 9458 |    108 | *};
 | 
| 8771 |    109 | apply(induct_tac as, auto);
 | 
| 8745 |    110 | 
 | 
|  |    111 | txt{*\noindent
 | 
|  |    112 | Unfortunately, this time we are left with three intimidating looking subgoals:
 | 
| 9723 |    113 | \begin{isabelle}
 | 
| 8745 |    114 | ~1.~\dots~{\isasymLongrightarrow}~lookup~\dots~bs~=~lookup~t~bs\isanewline
 | 
|  |    115 | ~2.~\dots~{\isasymLongrightarrow}~lookup~\dots~bs~=~lookup~t~bs\isanewline
 | 
| 9792 |    116 | ~3.~\dots~{\isasymLongrightarrow}~lookup~\dots~bs~=~lookup~t~bs
 | 
| 9723 |    117 | \end{isabelle}
 | 
| 10171 |    118 | Clearly, if we want to make headway we have to instantiate @{term bs} as
 | 
| 8745 |    119 | well now. It turns out that instead of induction, case distinction
 | 
|  |    120 | suffices:
 | 
| 9458 |    121 | *};
 | 
| 10171 |    122 | apply(case_tac[!] bs, auto);
 | 
|  |    123 | done
 | 
| 8745 |    124 | 
 | 
|  |    125 | text{*\noindent
 | 
| 11458 |    126 | \index{subgoal numbering}%
 | 
| 10171 |    127 | All methods ending in @{text tac} take an optional first argument that
 | 
| 9792 |    128 | specifies the range of subgoals they are applied to, where @{text"[!]"} means
 | 
|  |    129 | all subgoals, i.e.\ @{text"[1-3]"} in our case. Individual subgoal numbers,
 | 
|  |    130 | e.g. @{text"[2]"} are also allowed.
 | 
| 8745 |    131 | 
 | 
|  |    132 | This proof may look surprisingly straightforward. However, note that this
 | 
| 9792 |    133 | comes at a cost: the proof script is unreadable because the intermediate
 | 
|  |    134 | proof states are invisible, and we rely on the (possibly brittle) magic of
 | 
| 10971 |    135 | @{text auto} (@{text simp_all} will not do --- try it) to split the subgoals
 | 
| 10171 |    136 | of the induction up in such a way that case distinction on @{term bs} makes
 | 
| 11458 |    137 | sense and solves the proof. 
 | 
| 11294 |    138 | 
 | 
|  |    139 | \begin{exercise}
 | 
| 15905 |    140 |   Modify @{const update} (and its type) such that it allows both insertion and
 | 
| 11303 |    141 |   deletion of entries with a single function.  Prove the corresponding version 
 | 
|  |    142 |   of the main theorem above.
 | 
|  |    143 |   Optimize your function such that it shrinks tries after
 | 
|  |    144 |   deletion if possible.
 | 
| 11294 |    145 | \end{exercise}
 | 
|  |    146 | 
 | 
|  |    147 | \begin{exercise}
 | 
| 15905 |    148 |   Write an improved version of @{const update} that does not suffer from the
 | 
| 11294 |    149 |   space leak (pointed out above) caused by not deleting overwritten entries
 | 
|  |    150 |   from the association list. Prove the main theorem for your improved
 | 
| 15905 |    151 |   @{const update}.
 | 
| 11294 |    152 | \end{exercise}
 | 
|  |    153 | 
 | 
|  |    154 | \begin{exercise}
 | 
|  |    155 |   Conceptually, each node contains a mapping from letters to optional
 | 
|  |    156 |   subtries. Above we have implemented this by means of an association
 | 
|  |    157 |   list. Replay the development replacing @{typ "('a * ('a,'v)trie)list"}
 | 
|  |    158 |   with @{typ"'a \<Rightarrow> ('a,'v)trie option"}.
 | 
|  |    159 | \end{exercise}
 | 
|  |    160 | 
 | 
| 9458 |    161 | *};
 | 
| 8745 |    162 | 
 | 
|  |    163 | (*<*)
 | 
| 11294 |    164 | 
 | 
|  |    165 | (* Exercise 1. Solution by Getrud Bauer *)
 | 
|  |    166 | 
 | 
| 27015 |    167 | primrec update1 :: "('a, 'v) trie \<Rightarrow> 'a list \<Rightarrow> 'v option \<Rightarrow> ('a, 'v) trie"
 | 
|  |    168 | where
 | 
|  |    169 |   "update1 t []     vo = Trie vo (alist t)" |
 | 
| 11294 |    170 |   "update1 t (a#as) vo =
 | 
|  |    171 |      (let tt = (case assoc (alist t) a of
 | 
|  |    172 |                   None \<Rightarrow> Trie None [] 
 | 
|  |    173 |                 | Some at \<Rightarrow> at)
 | 
|  |    174 |       in Trie (value t) ((a, update1 tt as vo) # alist t))"
 | 
|  |    175 | 
 | 
|  |    176 | theorem [simp]: "\<forall>t v bs. lookup (update1 t as v) bs =
 | 
|  |    177 |                     (if as = bs then v else lookup t bs)";
 | 
|  |    178 | apply (induct_tac as, auto);
 | 
|  |    179 | apply (case_tac[!] bs, auto);
 | 
|  |    180 | done
 | 
|  |    181 | 
 | 
|  |    182 | 
 | 
|  |    183 | (* Exercise 2. Solution by Getrud Bauer *)
 | 
|  |    184 | 
 | 
| 27015 |    185 | primrec overwrite :: "'a \<Rightarrow> 'b \<Rightarrow> ('a * 'b) list \<Rightarrow> ('a * 'b) list" where
 | 
|  |    186 | "overwrite a v [] = [(a,v)]" |
 | 
|  |    187 | "overwrite a v (p#ps) = (if a = fst p then (a,v)#ps else p # overwrite a v ps)"
 | 
| 11294 |    188 | 
 | 
|  |    189 | lemma [simp]: "\<forall> a v b. assoc (overwrite a v ps) b = assoc ((a,v)#ps) b"
 | 
|  |    190 | apply (induct_tac ps, auto)
 | 
|  |    191 | apply (case_tac[!] a)
 | 
|  |    192 | done
 | 
|  |    193 | 
 | 
| 27015 |    194 | primrec update2 :: "('a, 'v) trie \<Rightarrow> 'a list \<Rightarrow> 'v option \<Rightarrow> ('a, 'v) trie"
 | 
|  |    195 | where
 | 
|  |    196 |   "update2 t []     vo = Trie vo (alist t)" |
 | 
| 11294 |    197 |   "update2 t (a#as) vo =
 | 
|  |    198 |      (let tt = (case assoc (alist t) a of 
 | 
|  |    199 |                   None \<Rightarrow> Trie None []  
 | 
|  |    200 |                 | Some at \<Rightarrow> at) 
 | 
|  |    201 |       in Trie (value t) (overwrite a (update2 tt as vo) (alist t)))"; 
 | 
|  |    202 | 
 | 
|  |    203 | theorem "\<forall>t v bs. lookup (update2 t as vo) bs =
 | 
|  |    204 |                     (if as = bs then vo else lookup t bs)";
 | 
|  |    205 | apply (induct_tac as, auto);
 | 
|  |    206 | apply (case_tac[!] bs, auto);
 | 
|  |    207 | done
 | 
|  |    208 | 
 | 
|  |    209 | 
 | 
|  |    210 | (* Exercise 3. Solution by Getrud Bauer *)
 | 
|  |    211 | datatype ('a,'v) triem = Triem  "'v option" "'a \<Rightarrow> ('a,'v) triem option";
 | 
|  |    212 | 
 | 
| 27015 |    213 | primrec valuem :: "('a, 'v) triem \<Rightarrow> 'v option" where
 | 
|  |    214 | "valuem (Triem ov m) = ov"
 | 
| 11294 |    215 | 
 | 
| 27015 |    216 | primrec mapping :: "('a,'v) triem \<Rightarrow> 'a \<Rightarrow> ('a, 'v) triem option" where
 | 
|  |    217 | "mapping (Triem ov m) = m"
 | 
| 11294 |    218 | 
 | 
| 27015 |    219 | primrec lookupm :: "('a,'v) triem \<Rightarrow> 'a list \<Rightarrow> 'v option" where
 | 
|  |    220 |   "lookupm t [] = valuem t" |
 | 
| 11294 |    221 |   "lookupm t (a#as) = (case mapping t a of
 | 
|  |    222 |                         None \<Rightarrow> None
 | 
|  |    223 |                       | Some at \<Rightarrow> lookupm at as)";
 | 
|  |    224 | 
 | 
|  |    225 | lemma [simp]: "lookupm (Triem None  (\<lambda>c. None)) as = None";
 | 
|  |    226 | apply (case_tac as, simp_all);
 | 
|  |    227 | done
 | 
|  |    228 | 
 | 
| 27015 |    229 | primrec updatem :: "('a,'v)triem \<Rightarrow> 'a list \<Rightarrow> 'v \<Rightarrow> ('a,'v)triem" where
 | 
|  |    230 |   "updatem t []     v = Triem (Some v) (mapping t)" |
 | 
| 11294 |    231 |   "updatem t (a#as) v =
 | 
|  |    232 |      (let tt = (case mapping t a of
 | 
|  |    233 |                   None \<Rightarrow> Triem None (\<lambda>c. None) 
 | 
|  |    234 |                 | Some at \<Rightarrow> at)
 | 
|  |    235 |       in Triem (valuem t) 
 | 
|  |    236 |               (\<lambda>c. if c = a then Some (updatem tt as v) else mapping t c))";
 | 
|  |    237 | 
 | 
|  |    238 | theorem "\<forall>t v bs. lookupm (updatem t as v) bs = 
 | 
|  |    239 |                     (if as = bs then Some v else lookupm t bs)";
 | 
|  |    240 | apply (induct_tac as, auto);
 | 
|  |    241 | apply (case_tac[!] bs, auto);
 | 
|  |    242 | done
 | 
|  |    243 | 
 | 
| 9458 |    244 | end;
 | 
| 8745 |    245 | (*>*)
 |