src/Doc/Tutorial/Trie/Trie.thy
changeset 48985 5386df44a037
parent 27015 f8537d69f514
child 58305 57752a91eec4
equal deleted inserted replaced
48984:f51d4a302962 48985:5386df44a037
       
     1 (*<*)
       
     2 theory Trie imports Main begin;
       
     3 (*>*)
       
     4 text{*
       
     5 To minimize running time, each node of a trie should contain an array that maps
       
     6 letters to subtries. We have chosen a
       
     7 representation where the subtries are held in an association list, i.e.\ a
       
     8 list of (letter,trie) pairs.  Abstracting over the alphabet @{typ"'a"} and the
       
     9 values @{typ"'v"} we define a trie as follows:
       
    10 *};
       
    11 
       
    12 datatype ('a,'v)trie = Trie  "'v option"  "('a * ('a,'v)trie)list";
       
    13 
       
    14 text{*\noindent
       
    15 \index{datatypes!and nested recursion}%
       
    16 The first component is the optional value, the second component the
       
    17 association list of subtries.  This is an example of nested recursion involving products,
       
    18 which is fine because products are datatypes as well.
       
    19 We define two selector functions:
       
    20 *};
       
    21 
       
    22 primrec "value" :: "('a,'v)trie \<Rightarrow> 'v option" where
       
    23 "value(Trie ov al) = ov"
       
    24 primrec alist :: "('a,'v)trie \<Rightarrow> ('a * ('a,'v)trie)list" where
       
    25 "alist(Trie ov al) = al"
       
    26 
       
    27 text{*\noindent
       
    28 Association lists come with a generic lookup function.  Its result
       
    29 involves type @{text option} because a lookup can fail:
       
    30 *};
       
    31 
       
    32 primrec assoc :: "('key * 'val)list \<Rightarrow> 'key \<Rightarrow> 'val option" where
       
    33 "assoc [] x = None" |
       
    34 "assoc (p#ps) x =
       
    35    (let (a,b) = p in if a=x then Some b else assoc ps x)"
       
    36 
       
    37 text{*
       
    38 Now we can define the lookup function for tries. It descends into the trie
       
    39 examining the letters of the search string one by one. As
       
    40 recursion on lists is simpler than on tries, let us express this as primitive
       
    41 recursion on the search string argument:
       
    42 *};
       
    43 
       
    44 primrec lookup :: "('a,'v)trie \<Rightarrow> 'a list \<Rightarrow> 'v option" where
       
    45 "lookup t [] = value t" |
       
    46 "lookup t (a#as) = (case assoc (alist t) a of
       
    47                       None \<Rightarrow> None
       
    48                     | Some at \<Rightarrow> lookup at as)"
       
    49 
       
    50 text{*
       
    51 As a first simple property we prove that looking up a string in the empty
       
    52 trie @{term"Trie None []"} always returns @{const None}. The proof merely
       
    53 distinguishes the two cases whether the search string is empty or not:
       
    54 *};
       
    55 
       
    56 lemma [simp]: "lookup (Trie None []) as = None";
       
    57 apply(case_tac as, simp_all);
       
    58 done
       
    59 
       
    60 text{*
       
    61 Things begin to get interesting with the definition of an update function
       
    62 that adds a new (string, value) pair to a trie, overwriting the old value
       
    63 associated with that string:
       
    64 *};
       
    65 
       
    66 primrec update:: "('a,'v)trie \<Rightarrow> 'a list \<Rightarrow> 'v \<Rightarrow> ('a,'v)trie" where
       
    67 "update t []     v = Trie (Some v) (alist t)" |
       
    68 "update t (a#as) v =
       
    69    (let tt = (case assoc (alist t) a of
       
    70                 None \<Rightarrow> Trie None [] | Some at \<Rightarrow> at)
       
    71     in Trie (value t) ((a,update tt as v) # alist t))"
       
    72 
       
    73 text{*\noindent
       
    74 The base case is obvious. In the recursive case the subtrie
       
    75 @{term tt} associated with the first letter @{term a} is extracted,
       
    76 recursively updated, and then placed in front of the association list.
       
    77 The old subtrie associated with @{term a} is still in the association list
       
    78 but no longer accessible via @{const assoc}. Clearly, there is room here for
       
    79 optimizations!
       
    80 
       
    81 Before we start on any proofs about @{const update} we tell the simplifier to
       
    82 expand all @{text let}s and to split all @{text case}-constructs over
       
    83 options:
       
    84 *};
       
    85 
       
    86 declare Let_def[simp] option.split[split]
       
    87 
       
    88 text{*\noindent
       
    89 The reason becomes clear when looking (probably after a failed proof
       
    90 attempt) at the body of @{const update}: it contains both
       
    91 @{text let} and a case distinction over type @{text option}.
       
    92 
       
    93 Our main goal is to prove the correct interaction of @{const update} and
       
    94 @{const lookup}:
       
    95 *};
       
    96 
       
    97 theorem "\<forall>t v bs. lookup (update t as v) bs =
       
    98                     (if as=bs then Some v else lookup t bs)";
       
    99 
       
   100 txt{*\noindent
       
   101 Our plan is to induct on @{term as}; hence the remaining variables are
       
   102 quantified. From the definitions it is clear that induction on either
       
   103 @{term as} or @{term bs} is required. The choice of @{term as} is 
       
   104 guided by the intuition that simplification of @{const lookup} might be easier
       
   105 if @{const update} has already been simplified, which can only happen if
       
   106 @{term as} is instantiated.
       
   107 The start of the proof is conventional:
       
   108 *};
       
   109 apply(induct_tac as, auto);
       
   110 
       
   111 txt{*\noindent
       
   112 Unfortunately, this time we are left with three intimidating looking subgoals:
       
   113 \begin{isabelle}
       
   114 ~1.~\dots~{\isasymLongrightarrow}~lookup~\dots~bs~=~lookup~t~bs\isanewline
       
   115 ~2.~\dots~{\isasymLongrightarrow}~lookup~\dots~bs~=~lookup~t~bs\isanewline
       
   116 ~3.~\dots~{\isasymLongrightarrow}~lookup~\dots~bs~=~lookup~t~bs
       
   117 \end{isabelle}
       
   118 Clearly, if we want to make headway we have to instantiate @{term bs} as
       
   119 well now. It turns out that instead of induction, case distinction
       
   120 suffices:
       
   121 *};
       
   122 apply(case_tac[!] bs, auto);
       
   123 done
       
   124 
       
   125 text{*\noindent
       
   126 \index{subgoal numbering}%
       
   127 All methods ending in @{text tac} take an optional first argument that
       
   128 specifies the range of subgoals they are applied to, where @{text"[!]"} means
       
   129 all subgoals, i.e.\ @{text"[1-3]"} in our case. Individual subgoal numbers,
       
   130 e.g. @{text"[2]"} are also allowed.
       
   131 
       
   132 This proof may look surprisingly straightforward. However, note that this
       
   133 comes at a cost: the proof script is unreadable because the intermediate
       
   134 proof states are invisible, and we rely on the (possibly brittle) magic of
       
   135 @{text auto} (@{text simp_all} will not do --- try it) to split the subgoals
       
   136 of the induction up in such a way that case distinction on @{term bs} makes
       
   137 sense and solves the proof. 
       
   138 
       
   139 \begin{exercise}
       
   140   Modify @{const update} (and its type) such that it allows both insertion and
       
   141   deletion of entries with a single function.  Prove the corresponding version 
       
   142   of the main theorem above.
       
   143   Optimize your function such that it shrinks tries after
       
   144   deletion if possible.
       
   145 \end{exercise}
       
   146 
       
   147 \begin{exercise}
       
   148   Write an improved version of @{const update} that does not suffer from the
       
   149   space leak (pointed out above) caused by not deleting overwritten entries
       
   150   from the association list. Prove the main theorem for your improved
       
   151   @{const update}.
       
   152 \end{exercise}
       
   153 
       
   154 \begin{exercise}
       
   155   Conceptually, each node contains a mapping from letters to optional
       
   156   subtries. Above we have implemented this by means of an association
       
   157   list. Replay the development replacing @{typ "('a * ('a,'v)trie)list"}
       
   158   with @{typ"'a \<Rightarrow> ('a,'v)trie option"}.
       
   159 \end{exercise}
       
   160 
       
   161 *};
       
   162 
       
   163 (*<*)
       
   164 
       
   165 (* Exercise 1. Solution by Getrud Bauer *)
       
   166 
       
   167 primrec update1 :: "('a, 'v) trie \<Rightarrow> 'a list \<Rightarrow> 'v option \<Rightarrow> ('a, 'v) trie"
       
   168 where
       
   169   "update1 t []     vo = Trie vo (alist t)" |
       
   170   "update1 t (a#as) vo =
       
   171      (let tt = (case assoc (alist t) a of
       
   172                   None \<Rightarrow> Trie None [] 
       
   173                 | Some at \<Rightarrow> at)
       
   174       in Trie (value t) ((a, update1 tt as vo) # alist t))"
       
   175 
       
   176 theorem [simp]: "\<forall>t v bs. lookup (update1 t as v) bs =
       
   177                     (if as = bs then v else lookup t bs)";
       
   178 apply (induct_tac as, auto);
       
   179 apply (case_tac[!] bs, auto);
       
   180 done
       
   181 
       
   182 
       
   183 (* Exercise 2. Solution by Getrud Bauer *)
       
   184 
       
   185 primrec overwrite :: "'a \<Rightarrow> 'b \<Rightarrow> ('a * 'b) list \<Rightarrow> ('a * 'b) list" where
       
   186 "overwrite a v [] = [(a,v)]" |
       
   187 "overwrite a v (p#ps) = (if a = fst p then (a,v)#ps else p # overwrite a v ps)"
       
   188 
       
   189 lemma [simp]: "\<forall> a v b. assoc (overwrite a v ps) b = assoc ((a,v)#ps) b"
       
   190 apply (induct_tac ps, auto)
       
   191 apply (case_tac[!] a)
       
   192 done
       
   193 
       
   194 primrec update2 :: "('a, 'v) trie \<Rightarrow> 'a list \<Rightarrow> 'v option \<Rightarrow> ('a, 'v) trie"
       
   195 where
       
   196   "update2 t []     vo = Trie vo (alist t)" |
       
   197   "update2 t (a#as) vo =
       
   198      (let tt = (case assoc (alist t) a of 
       
   199                   None \<Rightarrow> Trie None []  
       
   200                 | Some at \<Rightarrow> at) 
       
   201       in Trie (value t) (overwrite a (update2 tt as vo) (alist t)))"; 
       
   202 
       
   203 theorem "\<forall>t v bs. lookup (update2 t as vo) bs =
       
   204                     (if as = bs then vo else lookup t bs)";
       
   205 apply (induct_tac as, auto);
       
   206 apply (case_tac[!] bs, auto);
       
   207 done
       
   208 
       
   209 
       
   210 (* Exercise 3. Solution by Getrud Bauer *)
       
   211 datatype ('a,'v) triem = Triem  "'v option" "'a \<Rightarrow> ('a,'v) triem option";
       
   212 
       
   213 primrec valuem :: "('a, 'v) triem \<Rightarrow> 'v option" where
       
   214 "valuem (Triem ov m) = ov"
       
   215 
       
   216 primrec mapping :: "('a,'v) triem \<Rightarrow> 'a \<Rightarrow> ('a, 'v) triem option" where
       
   217 "mapping (Triem ov m) = m"
       
   218 
       
   219 primrec lookupm :: "('a,'v) triem \<Rightarrow> 'a list \<Rightarrow> 'v option" where
       
   220   "lookupm t [] = valuem t" |
       
   221   "lookupm t (a#as) = (case mapping t a of
       
   222                         None \<Rightarrow> None
       
   223                       | Some at \<Rightarrow> lookupm at as)";
       
   224 
       
   225 lemma [simp]: "lookupm (Triem None  (\<lambda>c. None)) as = None";
       
   226 apply (case_tac as, simp_all);
       
   227 done
       
   228 
       
   229 primrec updatem :: "('a,'v)triem \<Rightarrow> 'a list \<Rightarrow> 'v \<Rightarrow> ('a,'v)triem" where
       
   230   "updatem t []     v = Triem (Some v) (mapping t)" |
       
   231   "updatem t (a#as) v =
       
   232      (let tt = (case mapping t a of
       
   233                   None \<Rightarrow> Triem None (\<lambda>c. None) 
       
   234                 | Some at \<Rightarrow> at)
       
   235       in Triem (valuem t) 
       
   236               (\<lambda>c. if c = a then Some (updatem tt as v) else mapping t c))";
       
   237 
       
   238 theorem "\<forall>t v bs. lookupm (updatem t as v) bs = 
       
   239                     (if as = bs then Some v else lookupm t bs)";
       
   240 apply (induct_tac as, auto);
       
   241 apply (case_tac[!] bs, auto);
       
   242 done
       
   243 
       
   244 end;
       
   245 (*>*)