| 9722 |      1 | %
 | 
|  |      2 | \begin{isabellebody}%
 | 
| 9924 |      3 | \def\isabellecontext{Trie}%
 | 
| 17056 |      4 | %
 | 
|  |      5 | \isadelimtheory
 | 
|  |      6 | %
 | 
|  |      7 | \endisadelimtheory
 | 
|  |      8 | %
 | 
|  |      9 | \isatagtheory
 | 
|  |     10 | %
 | 
|  |     11 | \endisatagtheory
 | 
|  |     12 | {\isafoldtheory}%
 | 
|  |     13 | %
 | 
|  |     14 | \isadelimtheory
 | 
|  |     15 | %
 | 
|  |     16 | \endisadelimtheory
 | 
| 8749 |     17 | %
 | 
|  |     18 | \begin{isamarkuptext}%
 | 
|  |     19 | To minimize running time, each node of a trie should contain an array that maps
 | 
| 10795 |     20 | letters to subtries. We have chosen a
 | 
| 8749 |     21 | representation where the subtries are held in an association list, i.e.\ a
 | 
| 9792 |     22 | list of (letter,trie) pairs.  Abstracting over the alphabet \isa{{\isacharprime}a} and the
 | 
|  |     23 | values \isa{{\isacharprime}v} we define a trie as follows:%
 | 
| 8749 |     24 | \end{isamarkuptext}%
 | 
| 17175 |     25 | \isamarkuptrue%
 | 
|  |     26 | \isacommand{datatype}\isamarkupfalse%
 | 
|  |     27 | \ {\isacharparenleft}{\isacharprime}a{\isacharcomma}{\isacharprime}v{\isacharparenright}trie\ {\isacharequal}\ Trie\ \ {\isachardoublequoteopen}{\isacharprime}v\ option{\isachardoublequoteclose}\ \ {\isachardoublequoteopen}{\isacharparenleft}{\isacharprime}a\ {\isacharasterisk}\ {\isacharparenleft}{\isacharprime}a{\isacharcomma}{\isacharprime}v{\isacharparenright}trie{\isacharparenright}list{\isachardoublequoteclose}%
 | 
| 8749 |     28 | \begin{isamarkuptext}%
 | 
|  |     29 | \noindent
 | 
| 11480 |     30 | \index{datatypes!and nested recursion}%
 | 
| 8749 |     31 | The first component is the optional value, the second component the
 | 
|  |     32 | association list of subtries.  This is an example of nested recursion involving products,
 | 
|  |     33 | which is fine because products are datatypes as well.
 | 
|  |     34 | We define two selector functions:%
 | 
|  |     35 | \end{isamarkuptext}%
 | 
| 17175 |     36 | \isamarkuptrue%
 | 
|  |     37 | \isacommand{primrec}\isamarkupfalse%
 | 
| 27015 |     38 | \ {\isachardoublequoteopen}value{\isachardoublequoteclose}\ {\isacharcolon}{\isacharcolon}\ {\isachardoublequoteopen}{\isacharparenleft}{\isacharprime}a{\isacharcomma}{\isacharprime}v{\isacharparenright}trie\ {\isasymRightarrow}\ {\isacharprime}v\ option{\isachardoublequoteclose}\ \isakeyword{where}\isanewline
 | 
|  |     39 | {\isachardoublequoteopen}value{\isacharparenleft}Trie\ ov\ al{\isacharparenright}\ {\isacharequal}\ ov{\isachardoublequoteclose}\isanewline
 | 
| 17175 |     40 | \isacommand{primrec}\isamarkupfalse%
 | 
| 27015 |     41 | \ alist\ {\isacharcolon}{\isacharcolon}\ {\isachardoublequoteopen}{\isacharparenleft}{\isacharprime}a{\isacharcomma}{\isacharprime}v{\isacharparenright}trie\ {\isasymRightarrow}\ {\isacharparenleft}{\isacharprime}a\ {\isacharasterisk}\ {\isacharparenleft}{\isacharprime}a{\isacharcomma}{\isacharprime}v{\isacharparenright}trie{\isacharparenright}list{\isachardoublequoteclose}\ \isakeyword{where}\isanewline
 | 
|  |     42 | {\isachardoublequoteopen}alist{\isacharparenleft}Trie\ ov\ al{\isacharparenright}\ {\isacharequal}\ al{\isachardoublequoteclose}%
 | 
| 8749 |     43 | \begin{isamarkuptext}%
 | 
|  |     44 | \noindent
 | 
| 11309 |     45 | Association lists come with a generic lookup function.  Its result
 | 
|  |     46 | involves type \isa{option} because a lookup can fail:%
 | 
| 8749 |     47 | \end{isamarkuptext}%
 | 
| 17175 |     48 | \isamarkuptrue%
 | 
|  |     49 | \isacommand{primrec}\isamarkupfalse%
 | 
| 27015 |     50 | \ assoc\ {\isacharcolon}{\isacharcolon}\ {\isachardoublequoteopen}{\isacharparenleft}{\isacharprime}key\ {\isacharasterisk}\ {\isacharprime}val{\isacharparenright}list\ {\isasymRightarrow}\ {\isacharprime}key\ {\isasymRightarrow}\ {\isacharprime}val\ option{\isachardoublequoteclose}\ \isakeyword{where}\isanewline
 | 
|  |     51 | {\isachardoublequoteopen}assoc\ {\isacharbrackleft}{\isacharbrackright}\ x\ {\isacharequal}\ None{\isachardoublequoteclose}\ {\isacharbar}\isanewline
 | 
|  |     52 | {\isachardoublequoteopen}assoc\ {\isacharparenleft}p{\isacharhash}ps{\isacharparenright}\ x\ {\isacharequal}\isanewline
 | 
|  |     53 | \ \ \ {\isacharparenleft}let\ {\isacharparenleft}a{\isacharcomma}b{\isacharparenright}\ {\isacharequal}\ p\ in\ if\ a{\isacharequal}x\ then\ Some\ b\ else\ assoc\ ps\ x{\isacharparenright}{\isachardoublequoteclose}%
 | 
| 8749 |     54 | \begin{isamarkuptext}%
 | 
|  |     55 | Now we can define the lookup function for tries. It descends into the trie
 | 
|  |     56 | examining the letters of the search string one by one. As
 | 
|  |     57 | recursion on lists is simpler than on tries, let us express this as primitive
 | 
|  |     58 | recursion on the search string argument:%
 | 
|  |     59 | \end{isamarkuptext}%
 | 
| 17175 |     60 | \isamarkuptrue%
 | 
|  |     61 | \isacommand{primrec}\isamarkupfalse%
 | 
| 27015 |     62 | \ lookup\ {\isacharcolon}{\isacharcolon}\ {\isachardoublequoteopen}{\isacharparenleft}{\isacharprime}a{\isacharcomma}{\isacharprime}v{\isacharparenright}trie\ {\isasymRightarrow}\ {\isacharprime}a\ list\ {\isasymRightarrow}\ {\isacharprime}v\ option{\isachardoublequoteclose}\ \isakeyword{where}\isanewline
 | 
|  |     63 | {\isachardoublequoteopen}lookup\ t\ {\isacharbrackleft}{\isacharbrackright}\ {\isacharequal}\ value\ t{\isachardoublequoteclose}\ {\isacharbar}\isanewline
 | 
|  |     64 | {\isachardoublequoteopen}lookup\ t\ {\isacharparenleft}a{\isacharhash}as{\isacharparenright}\ {\isacharequal}\ {\isacharparenleft}case\ assoc\ {\isacharparenleft}alist\ t{\isacharparenright}\ a\ of\isanewline
 | 
|  |     65 | \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ None\ {\isasymRightarrow}\ None\isanewline
 | 
|  |     66 | \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ {\isacharbar}\ Some\ at\ {\isasymRightarrow}\ lookup\ at\ as{\isacharparenright}{\isachardoublequoteclose}%
 | 
| 8749 |     67 | \begin{isamarkuptext}%
 | 
|  |     68 | As a first simple property we prove that looking up a string in the empty
 | 
| 9792 |     69 | trie \isa{Trie\ None\ {\isacharbrackleft}{\isacharbrackright}} always returns \isa{None}. The proof merely
 | 
| 8749 |     70 | distinguishes the two cases whether the search string is empty or not:%
 | 
|  |     71 | \end{isamarkuptext}%
 | 
| 17175 |     72 | \isamarkuptrue%
 | 
|  |     73 | \isacommand{lemma}\isamarkupfalse%
 | 
|  |     74 | \ {\isacharbrackleft}simp{\isacharbrackright}{\isacharcolon}\ {\isachardoublequoteopen}lookup\ {\isacharparenleft}Trie\ None\ {\isacharbrackleft}{\isacharbrackright}{\isacharparenright}\ as\ {\isacharequal}\ None{\isachardoublequoteclose}\isanewline
 | 
| 17056 |     75 | %
 | 
|  |     76 | \isadelimproof
 | 
|  |     77 | %
 | 
|  |     78 | \endisadelimproof
 | 
|  |     79 | %
 | 
|  |     80 | \isatagproof
 | 
| 17175 |     81 | \isacommand{apply}\isamarkupfalse%
 | 
|  |     82 | {\isacharparenleft}case{\isacharunderscore}tac\ as{\isacharcomma}\ simp{\isacharunderscore}all{\isacharparenright}\isanewline
 | 
|  |     83 | \isacommand{done}\isamarkupfalse%
 | 
|  |     84 | %
 | 
| 17056 |     85 | \endisatagproof
 | 
|  |     86 | {\isafoldproof}%
 | 
|  |     87 | %
 | 
|  |     88 | \isadelimproof
 | 
|  |     89 | %
 | 
|  |     90 | \endisadelimproof
 | 
| 11866 |     91 | %
 | 
| 8749 |     92 | \begin{isamarkuptext}%
 | 
|  |     93 | Things begin to get interesting with the definition of an update function
 | 
| 12328 |     94 | that adds a new (string, value) pair to a trie, overwriting the old value
 | 
| 8749 |     95 | associated with that string:%
 | 
|  |     96 | \end{isamarkuptext}%
 | 
| 17175 |     97 | \isamarkuptrue%
 | 
|  |     98 | \isacommand{primrec}\isamarkupfalse%
 | 
| 27015 |     99 | \ update{\isacharcolon}{\isacharcolon}\ {\isachardoublequoteopen}{\isacharparenleft}{\isacharprime}a{\isacharcomma}{\isacharprime}v{\isacharparenright}trie\ {\isasymRightarrow}\ {\isacharprime}a\ list\ {\isasymRightarrow}\ {\isacharprime}v\ {\isasymRightarrow}\ {\isacharparenleft}{\isacharprime}a{\isacharcomma}{\isacharprime}v{\isacharparenright}trie{\isachardoublequoteclose}\ \isakeyword{where}\isanewline
 | 
|  |    100 | {\isachardoublequoteopen}update\ t\ {\isacharbrackleft}{\isacharbrackright}\ \ \ \ \ v\ {\isacharequal}\ Trie\ {\isacharparenleft}Some\ v{\isacharparenright}\ {\isacharparenleft}alist\ t{\isacharparenright}{\isachardoublequoteclose}\ {\isacharbar}\isanewline
 | 
|  |    101 | {\isachardoublequoteopen}update\ t\ {\isacharparenleft}a{\isacharhash}as{\isacharparenright}\ v\ {\isacharequal}\isanewline
 | 
|  |    102 | \ \ \ {\isacharparenleft}let\ tt\ {\isacharequal}\ {\isacharparenleft}case\ assoc\ {\isacharparenleft}alist\ t{\isacharparenright}\ a\ of\isanewline
 | 
|  |    103 | \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ None\ {\isasymRightarrow}\ Trie\ None\ {\isacharbrackleft}{\isacharbrackright}\ {\isacharbar}\ Some\ at\ {\isasymRightarrow}\ at{\isacharparenright}\isanewline
 | 
|  |    104 | \ \ \ \ in\ Trie\ {\isacharparenleft}value\ t{\isacharparenright}\ {\isacharparenleft}{\isacharparenleft}a{\isacharcomma}update\ tt\ as\ v{\isacharparenright}\ {\isacharhash}\ alist\ t{\isacharparenright}{\isacharparenright}{\isachardoublequoteclose}%
 | 
| 8749 |    105 | \begin{isamarkuptext}%
 | 
|  |    106 | \noindent
 | 
|  |    107 | The base case is obvious. In the recursive case the subtrie
 | 
|  |    108 | \isa{tt} associated with the first letter \isa{a} is extracted,
 | 
|  |    109 | recursively updated, and then placed in front of the association list.
 | 
|  |    110 | The old subtrie associated with \isa{a} is still in the association list
 | 
|  |    111 | but no longer accessible via \isa{assoc}. Clearly, there is room here for
 | 
|  |    112 | optimizations!
 | 
|  |    113 | 
 | 
|  |    114 | Before we start on any proofs about \isa{update} we tell the simplifier to
 | 
|  |    115 | expand all \isa{let}s and to split all \isa{case}-constructs over
 | 
|  |    116 | options:%
 | 
|  |    117 | \end{isamarkuptext}%
 | 
| 17175 |    118 | \isamarkuptrue%
 | 
|  |    119 | \isacommand{declare}\isamarkupfalse%
 | 
|  |    120 | \ Let{\isacharunderscore}def{\isacharbrackleft}simp{\isacharbrackright}\ option{\isachardot}split{\isacharbrackleft}split{\isacharbrackright}%
 | 
| 8749 |    121 | \begin{isamarkuptext}%
 | 
|  |    122 | \noindent
 | 
|  |    123 | The reason becomes clear when looking (probably after a failed proof
 | 
|  |    124 | attempt) at the body of \isa{update}: it contains both
 | 
|  |    125 | \isa{let} and a case distinction over type \isa{option}.
 | 
|  |    126 | 
 | 
|  |    127 | Our main goal is to prove the correct interaction of \isa{update} and
 | 
|  |    128 | \isa{lookup}:%
 | 
|  |    129 | \end{isamarkuptext}%
 | 
| 17175 |    130 | \isamarkuptrue%
 | 
|  |    131 | \isacommand{theorem}\isamarkupfalse%
 | 
|  |    132 | \ {\isachardoublequoteopen}{\isasymforall}t\ v\ bs{\isachardot}\ lookup\ {\isacharparenleft}update\ t\ as\ v{\isacharparenright}\ bs\ {\isacharequal}\isanewline
 | 
|  |    133 | \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ {\isacharparenleft}if\ as{\isacharequal}bs\ then\ Some\ v\ else\ lookup\ t\ bs{\isacharparenright}{\isachardoublequoteclose}%
 | 
| 17056 |    134 | \isadelimproof
 | 
|  |    135 | %
 | 
|  |    136 | \endisadelimproof
 | 
|  |    137 | %
 | 
|  |    138 | \isatagproof
 | 
| 16069 |    139 | %
 | 
|  |    140 | \begin{isamarkuptxt}%
 | 
|  |    141 | \noindent
 | 
|  |    142 | Our plan is to induct on \isa{as}; hence the remaining variables are
 | 
|  |    143 | quantified. From the definitions it is clear that induction on either
 | 
|  |    144 | \isa{as} or \isa{bs} is required. The choice of \isa{as} is 
 | 
|  |    145 | guided by the intuition that simplification of \isa{lookup} might be easier
 | 
|  |    146 | if \isa{update} has already been simplified, which can only happen if
 | 
|  |    147 | \isa{as} is instantiated.
 | 
|  |    148 | The start of the proof is conventional:%
 | 
|  |    149 | \end{isamarkuptxt}%
 | 
| 17175 |    150 | \isamarkuptrue%
 | 
|  |    151 | \isacommand{apply}\isamarkupfalse%
 | 
|  |    152 | {\isacharparenleft}induct{\isacharunderscore}tac\ as{\isacharcomma}\ auto{\isacharparenright}%
 | 
| 16069 |    153 | \begin{isamarkuptxt}%
 | 
|  |    154 | \noindent
 | 
|  |    155 | Unfortunately, this time we are left with three intimidating looking subgoals:
 | 
|  |    156 | \begin{isabelle}
 | 
|  |    157 | ~1.~\dots~{\isasymLongrightarrow}~lookup~\dots~bs~=~lookup~t~bs\isanewline
 | 
|  |    158 | ~2.~\dots~{\isasymLongrightarrow}~lookup~\dots~bs~=~lookup~t~bs\isanewline
 | 
|  |    159 | ~3.~\dots~{\isasymLongrightarrow}~lookup~\dots~bs~=~lookup~t~bs
 | 
|  |    160 | \end{isabelle}
 | 
|  |    161 | Clearly, if we want to make headway we have to instantiate \isa{bs} as
 | 
|  |    162 | well now. It turns out that instead of induction, case distinction
 | 
|  |    163 | suffices:%
 | 
|  |    164 | \end{isamarkuptxt}%
 | 
| 17175 |    165 | \isamarkuptrue%
 | 
|  |    166 | \isacommand{apply}\isamarkupfalse%
 | 
|  |    167 | {\isacharparenleft}case{\isacharunderscore}tac{\isacharbrackleft}{\isacharbang}{\isacharbrackright}\ bs{\isacharcomma}\ auto{\isacharparenright}\isanewline
 | 
|  |    168 | \isacommand{done}\isamarkupfalse%
 | 
|  |    169 | %
 | 
| 17056 |    170 | \endisatagproof
 | 
|  |    171 | {\isafoldproof}%
 | 
|  |    172 | %
 | 
|  |    173 | \isadelimproof
 | 
|  |    174 | %
 | 
|  |    175 | \endisadelimproof
 | 
| 11866 |    176 | %
 | 
| 8749 |    177 | \begin{isamarkuptext}%
 | 
|  |    178 | \noindent
 | 
| 11480 |    179 | \index{subgoal numbering}%
 | 
| 9541 |    180 | All methods ending in \isa{tac} take an optional first argument that
 | 
| 9792 |    181 | specifies the range of subgoals they are applied to, where \isa{{\isacharbrackleft}{\isacharbang}{\isacharbrackright}} means
 | 
| 10187 |    182 | all subgoals, i.e.\ \isa{{\isacharbrackleft}{\isadigit{1}}{\isacharminus}{\isadigit{3}}{\isacharbrackright}} in our case. Individual subgoal numbers,
 | 
|  |    183 | e.g. \isa{{\isacharbrackleft}{\isadigit{2}}{\isacharbrackright}} are also allowed.
 | 
| 8749 |    184 | 
 | 
|  |    185 | This proof may look surprisingly straightforward. However, note that this
 | 
| 9792 |    186 | comes at a cost: the proof script is unreadable because the intermediate
 | 
|  |    187 | proof states are invisible, and we rely on the (possibly brittle) magic of
 | 
| 10971 |    188 | \isa{auto} (\isa{simp{\isacharunderscore}all} will not do --- try it) to split the subgoals
 | 
| 9792 |    189 | of the induction up in such a way that case distinction on \isa{bs} makes
 | 
| 11480 |    190 | sense and solves the proof. 
 | 
| 11294 |    191 | 
 | 
|  |    192 | \begin{exercise}
 | 
|  |    193 |   Modify \isa{update} (and its type) such that it allows both insertion and
 | 
| 11304 |    194 |   deletion of entries with a single function.  Prove the corresponding version 
 | 
|  |    195 |   of the main theorem above.
 | 
|  |    196 |   Optimize your function such that it shrinks tries after
 | 
|  |    197 |   deletion if possible.
 | 
| 11294 |    198 | \end{exercise}
 | 
|  |    199 | 
 | 
|  |    200 | \begin{exercise}
 | 
|  |    201 |   Write an improved version of \isa{update} that does not suffer from the
 | 
|  |    202 |   space leak (pointed out above) caused by not deleting overwritten entries
 | 
|  |    203 |   from the association list. Prove the main theorem for your improved
 | 
|  |    204 |   \isa{update}.
 | 
|  |    205 | \end{exercise}
 | 
|  |    206 | 
 | 
|  |    207 | \begin{exercise}
 | 
|  |    208 |   Conceptually, each node contains a mapping from letters to optional
 | 
|  |    209 |   subtries. Above we have implemented this by means of an association
 | 
|  |    210 |   list. Replay the development replacing \isa{{\isacharparenleft}{\isacharprime}a\ {\isasymtimes}\ {\isacharparenleft}{\isacharprime}a{\isacharcomma}\ {\isacharprime}v{\isacharparenright}\ trie{\isacharparenright}\ list}
 | 
| 14179 |    211 |   with \isa{{\isacharprime}a\ {\isasymrightharpoonup}\ {\isacharparenleft}{\isacharprime}a{\isacharcomma}\ {\isacharprime}v{\isacharparenright}\ trie}.
 | 
| 11294 |    212 | \end{exercise}%
 | 
| 8749 |    213 | \end{isamarkuptext}%
 | 
| 17175 |    214 | \isamarkuptrue%
 | 
| 17056 |    215 | %
 | 
|  |    216 | \isadelimproof
 | 
|  |    217 | %
 | 
|  |    218 | \endisadelimproof
 | 
|  |    219 | %
 | 
|  |    220 | \isatagproof
 | 
|  |    221 | %
 | 
|  |    222 | \endisatagproof
 | 
|  |    223 | {\isafoldproof}%
 | 
|  |    224 | %
 | 
|  |    225 | \isadelimproof
 | 
|  |    226 | %
 | 
|  |    227 | \endisadelimproof
 | 
|  |    228 | %
 | 
|  |    229 | \isadelimproof
 | 
|  |    230 | %
 | 
|  |    231 | \endisadelimproof
 | 
|  |    232 | %
 | 
|  |    233 | \isatagproof
 | 
|  |    234 | %
 | 
|  |    235 | \endisatagproof
 | 
|  |    236 | {\isafoldproof}%
 | 
|  |    237 | %
 | 
|  |    238 | \isadelimproof
 | 
|  |    239 | %
 | 
|  |    240 | \endisadelimproof
 | 
|  |    241 | %
 | 
|  |    242 | \isadelimproof
 | 
|  |    243 | %
 | 
|  |    244 | \endisadelimproof
 | 
|  |    245 | %
 | 
|  |    246 | \isatagproof
 | 
|  |    247 | %
 | 
|  |    248 | \endisatagproof
 | 
|  |    249 | {\isafoldproof}%
 | 
|  |    250 | %
 | 
|  |    251 | \isadelimproof
 | 
|  |    252 | %
 | 
|  |    253 | \endisadelimproof
 | 
|  |    254 | %
 | 
|  |    255 | \isadelimproof
 | 
|  |    256 | %
 | 
|  |    257 | \endisadelimproof
 | 
|  |    258 | %
 | 
|  |    259 | \isatagproof
 | 
|  |    260 | %
 | 
|  |    261 | \endisatagproof
 | 
|  |    262 | {\isafoldproof}%
 | 
|  |    263 | %
 | 
|  |    264 | \isadelimproof
 | 
|  |    265 | %
 | 
|  |    266 | \endisadelimproof
 | 
|  |    267 | %
 | 
|  |    268 | \isadelimproof
 | 
|  |    269 | %
 | 
|  |    270 | \endisadelimproof
 | 
|  |    271 | %
 | 
|  |    272 | \isatagproof
 | 
|  |    273 | %
 | 
|  |    274 | \endisatagproof
 | 
|  |    275 | {\isafoldproof}%
 | 
|  |    276 | %
 | 
|  |    277 | \isadelimproof
 | 
|  |    278 | %
 | 
|  |    279 | \endisadelimproof
 | 
|  |    280 | %
 | 
|  |    281 | \isadelimtheory
 | 
|  |    282 | %
 | 
|  |    283 | \endisadelimtheory
 | 
|  |    284 | %
 | 
|  |    285 | \isatagtheory
 | 
|  |    286 | %
 | 
|  |    287 | \endisatagtheory
 | 
|  |    288 | {\isafoldtheory}%
 | 
|  |    289 | %
 | 
|  |    290 | \isadelimtheory
 | 
|  |    291 | %
 | 
|  |    292 | \endisadelimtheory
 | 
| 9722 |    293 | \end{isabellebody}%
 | 
| 9145 |    294 | %%% Local Variables:
 | 
|  |    295 | %%% mode: latex
 | 
|  |    296 | %%% TeX-master: "root"
 | 
|  |    297 | %%% End:
 |