diff -r 619531d87ce4 -r 4e2ee88276d2 doc-src/TutorialI/document/Trie.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc-src/TutorialI/document/Trie.tex Thu Jul 26 19:59:06 2012 +0200 @@ -0,0 +1,297 @@ +% +\begin{isabellebody}% +\def\isabellecontext{Trie}% +% +\isadelimtheory +% +\endisadelimtheory +% +\isatagtheory +% +\endisatagtheory +{\isafoldtheory}% +% +\isadelimtheory +% +\endisadelimtheory +% +\begin{isamarkuptext}% +To minimize running time, each node of a trie should contain an array that maps +letters to subtries. We have chosen a +representation where the subtries are held in an association list, i.e.\ a +list of (letter,trie) pairs. Abstracting over the alphabet \isa{{\isaliteral{27}{\isacharprime}}a} and the +values \isa{{\isaliteral{27}{\isacharprime}}v} we define a trie as follows:% +\end{isamarkuptext}% +\isamarkuptrue% +\isacommand{datatype}\isamarkupfalse% +\ {\isaliteral{28}{\isacharparenleft}}{\isaliteral{27}{\isacharprime}}a{\isaliteral{2C}{\isacharcomma}}{\isaliteral{27}{\isacharprime}}v{\isaliteral{29}{\isacharparenright}}trie\ {\isaliteral{3D}{\isacharequal}}\ Trie\ \ {\isaliteral{22}{\isachardoublequoteopen}}{\isaliteral{27}{\isacharprime}}v\ option{\isaliteral{22}{\isachardoublequoteclose}}\ \ {\isaliteral{22}{\isachardoublequoteopen}}{\isaliteral{28}{\isacharparenleft}}{\isaliteral{27}{\isacharprime}}a\ {\isaliteral{2A}{\isacharasterisk}}\ {\isaliteral{28}{\isacharparenleft}}{\isaliteral{27}{\isacharprime}}a{\isaliteral{2C}{\isacharcomma}}{\isaliteral{27}{\isacharprime}}v{\isaliteral{29}{\isacharparenright}}trie{\isaliteral{29}{\isacharparenright}}list{\isaliteral{22}{\isachardoublequoteclose}}% +\begin{isamarkuptext}% +\noindent +\index{datatypes!and nested recursion}% +The first component is the optional value, the second component the +association list of subtries. This is an example of nested recursion involving products, +which is fine because products are datatypes as well. +We define two selector functions:% +\end{isamarkuptext}% +\isamarkuptrue% +\isacommand{primrec}\isamarkupfalse% +\ {\isaliteral{22}{\isachardoublequoteopen}}value{\isaliteral{22}{\isachardoublequoteclose}}\ {\isaliteral{3A}{\isacharcolon}}{\isaliteral{3A}{\isacharcolon}}\ {\isaliteral{22}{\isachardoublequoteopen}}{\isaliteral{28}{\isacharparenleft}}{\isaliteral{27}{\isacharprime}}a{\isaliteral{2C}{\isacharcomma}}{\isaliteral{27}{\isacharprime}}v{\isaliteral{29}{\isacharparenright}}trie\ {\isaliteral{5C3C52696768746172726F773E}{\isasymRightarrow}}\ {\isaliteral{27}{\isacharprime}}v\ option{\isaliteral{22}{\isachardoublequoteclose}}\ \isakeyword{where}\isanewline +{\isaliteral{22}{\isachardoublequoteopen}}value{\isaliteral{28}{\isacharparenleft}}Trie\ ov\ al{\isaliteral{29}{\isacharparenright}}\ {\isaliteral{3D}{\isacharequal}}\ ov{\isaliteral{22}{\isachardoublequoteclose}}\isanewline +\isacommand{primrec}\isamarkupfalse% +\ alist\ {\isaliteral{3A}{\isacharcolon}}{\isaliteral{3A}{\isacharcolon}}\ {\isaliteral{22}{\isachardoublequoteopen}}{\isaliteral{28}{\isacharparenleft}}{\isaliteral{27}{\isacharprime}}a{\isaliteral{2C}{\isacharcomma}}{\isaliteral{27}{\isacharprime}}v{\isaliteral{29}{\isacharparenright}}trie\ {\isaliteral{5C3C52696768746172726F773E}{\isasymRightarrow}}\ {\isaliteral{28}{\isacharparenleft}}{\isaliteral{27}{\isacharprime}}a\ {\isaliteral{2A}{\isacharasterisk}}\ {\isaliteral{28}{\isacharparenleft}}{\isaliteral{27}{\isacharprime}}a{\isaliteral{2C}{\isacharcomma}}{\isaliteral{27}{\isacharprime}}v{\isaliteral{29}{\isacharparenright}}trie{\isaliteral{29}{\isacharparenright}}list{\isaliteral{22}{\isachardoublequoteclose}}\ \isakeyword{where}\isanewline +{\isaliteral{22}{\isachardoublequoteopen}}alist{\isaliteral{28}{\isacharparenleft}}Trie\ ov\ al{\isaliteral{29}{\isacharparenright}}\ {\isaliteral{3D}{\isacharequal}}\ al{\isaliteral{22}{\isachardoublequoteclose}}% +\begin{isamarkuptext}% +\noindent +Association lists come with a generic lookup function. Its result +involves type \isa{option} because a lookup can fail:% +\end{isamarkuptext}% +\isamarkuptrue% +\isacommand{primrec}\isamarkupfalse% +\ assoc\ {\isaliteral{3A}{\isacharcolon}}{\isaliteral{3A}{\isacharcolon}}\ {\isaliteral{22}{\isachardoublequoteopen}}{\isaliteral{28}{\isacharparenleft}}{\isaliteral{27}{\isacharprime}}key\ {\isaliteral{2A}{\isacharasterisk}}\ {\isaliteral{27}{\isacharprime}}val{\isaliteral{29}{\isacharparenright}}list\ {\isaliteral{5C3C52696768746172726F773E}{\isasymRightarrow}}\ {\isaliteral{27}{\isacharprime}}key\ {\isaliteral{5C3C52696768746172726F773E}{\isasymRightarrow}}\ {\isaliteral{27}{\isacharprime}}val\ option{\isaliteral{22}{\isachardoublequoteclose}}\ \isakeyword{where}\isanewline +{\isaliteral{22}{\isachardoublequoteopen}}assoc\ {\isaliteral{5B}{\isacharbrackleft}}{\isaliteral{5D}{\isacharbrackright}}\ x\ {\isaliteral{3D}{\isacharequal}}\ None{\isaliteral{22}{\isachardoublequoteclose}}\ {\isaliteral{7C}{\isacharbar}}\isanewline +{\isaliteral{22}{\isachardoublequoteopen}}assoc\ {\isaliteral{28}{\isacharparenleft}}p{\isaliteral{23}{\isacharhash}}ps{\isaliteral{29}{\isacharparenright}}\ x\ {\isaliteral{3D}{\isacharequal}}\isanewline +\ \ \ {\isaliteral{28}{\isacharparenleft}}let\ {\isaliteral{28}{\isacharparenleft}}a{\isaliteral{2C}{\isacharcomma}}b{\isaliteral{29}{\isacharparenright}}\ {\isaliteral{3D}{\isacharequal}}\ p\ in\ if\ a{\isaliteral{3D}{\isacharequal}}x\ then\ Some\ b\ else\ assoc\ ps\ x{\isaliteral{29}{\isacharparenright}}{\isaliteral{22}{\isachardoublequoteclose}}% +\begin{isamarkuptext}% +Now we can define the lookup function for tries. It descends into the trie +examining the letters of the search string one by one. As +recursion on lists is simpler than on tries, let us express this as primitive +recursion on the search string argument:% +\end{isamarkuptext}% +\isamarkuptrue% +\isacommand{primrec}\isamarkupfalse% +\ lookup\ {\isaliteral{3A}{\isacharcolon}}{\isaliteral{3A}{\isacharcolon}}\ {\isaliteral{22}{\isachardoublequoteopen}}{\isaliteral{28}{\isacharparenleft}}{\isaliteral{27}{\isacharprime}}a{\isaliteral{2C}{\isacharcomma}}{\isaliteral{27}{\isacharprime}}v{\isaliteral{29}{\isacharparenright}}trie\ {\isaliteral{5C3C52696768746172726F773E}{\isasymRightarrow}}\ {\isaliteral{27}{\isacharprime}}a\ list\ {\isaliteral{5C3C52696768746172726F773E}{\isasymRightarrow}}\ {\isaliteral{27}{\isacharprime}}v\ option{\isaliteral{22}{\isachardoublequoteclose}}\ \isakeyword{where}\isanewline +{\isaliteral{22}{\isachardoublequoteopen}}lookup\ t\ {\isaliteral{5B}{\isacharbrackleft}}{\isaliteral{5D}{\isacharbrackright}}\ {\isaliteral{3D}{\isacharequal}}\ value\ t{\isaliteral{22}{\isachardoublequoteclose}}\ {\isaliteral{7C}{\isacharbar}}\isanewline +{\isaliteral{22}{\isachardoublequoteopen}}lookup\ t\ {\isaliteral{28}{\isacharparenleft}}a{\isaliteral{23}{\isacharhash}}as{\isaliteral{29}{\isacharparenright}}\ {\isaliteral{3D}{\isacharequal}}\ {\isaliteral{28}{\isacharparenleft}}case\ assoc\ {\isaliteral{28}{\isacharparenleft}}alist\ t{\isaliteral{29}{\isacharparenright}}\ a\ of\isanewline +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ None\ {\isaliteral{5C3C52696768746172726F773E}{\isasymRightarrow}}\ None\isanewline +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ {\isaliteral{7C}{\isacharbar}}\ Some\ at\ {\isaliteral{5C3C52696768746172726F773E}{\isasymRightarrow}}\ lookup\ at\ as{\isaliteral{29}{\isacharparenright}}{\isaliteral{22}{\isachardoublequoteclose}}% +\begin{isamarkuptext}% +As a first simple property we prove that looking up a string in the empty +trie \isa{Trie\ None\ {\isaliteral{5B}{\isacharbrackleft}}{\isaliteral{5D}{\isacharbrackright}}} always returns \isa{None}. The proof merely +distinguishes the two cases whether the search string is empty or not:% +\end{isamarkuptext}% +\isamarkuptrue% +\isacommand{lemma}\isamarkupfalse% +\ {\isaliteral{5B}{\isacharbrackleft}}simp{\isaliteral{5D}{\isacharbrackright}}{\isaliteral{3A}{\isacharcolon}}\ {\isaliteral{22}{\isachardoublequoteopen}}lookup\ {\isaliteral{28}{\isacharparenleft}}Trie\ None\ {\isaliteral{5B}{\isacharbrackleft}}{\isaliteral{5D}{\isacharbrackright}}{\isaliteral{29}{\isacharparenright}}\ as\ {\isaliteral{3D}{\isacharequal}}\ None{\isaliteral{22}{\isachardoublequoteclose}}\isanewline +% +\isadelimproof +% +\endisadelimproof +% +\isatagproof +\isacommand{apply}\isamarkupfalse% +{\isaliteral{28}{\isacharparenleft}}case{\isaliteral{5F}{\isacharunderscore}}tac\ as{\isaliteral{2C}{\isacharcomma}}\ simp{\isaliteral{5F}{\isacharunderscore}}all{\isaliteral{29}{\isacharparenright}}\isanewline +\isacommand{done}\isamarkupfalse% +% +\endisatagproof +{\isafoldproof}% +% +\isadelimproof +% +\endisadelimproof +% +\begin{isamarkuptext}% +Things begin to get interesting with the definition of an update function +that adds a new (string, value) pair to a trie, overwriting the old value +associated with that string:% +\end{isamarkuptext}% +\isamarkuptrue% +\isacommand{primrec}\isamarkupfalse% +\ update{\isaliteral{3A}{\isacharcolon}}{\isaliteral{3A}{\isacharcolon}}\ {\isaliteral{22}{\isachardoublequoteopen}}{\isaliteral{28}{\isacharparenleft}}{\isaliteral{27}{\isacharprime}}a{\isaliteral{2C}{\isacharcomma}}{\isaliteral{27}{\isacharprime}}v{\isaliteral{29}{\isacharparenright}}trie\ {\isaliteral{5C3C52696768746172726F773E}{\isasymRightarrow}}\ {\isaliteral{27}{\isacharprime}}a\ list\ {\isaliteral{5C3C52696768746172726F773E}{\isasymRightarrow}}\ {\isaliteral{27}{\isacharprime}}v\ {\isaliteral{5C3C52696768746172726F773E}{\isasymRightarrow}}\ {\isaliteral{28}{\isacharparenleft}}{\isaliteral{27}{\isacharprime}}a{\isaliteral{2C}{\isacharcomma}}{\isaliteral{27}{\isacharprime}}v{\isaliteral{29}{\isacharparenright}}trie{\isaliteral{22}{\isachardoublequoteclose}}\ \isakeyword{where}\isanewline +{\isaliteral{22}{\isachardoublequoteopen}}update\ t\ {\isaliteral{5B}{\isacharbrackleft}}{\isaliteral{5D}{\isacharbrackright}}\ \ \ \ \ v\ {\isaliteral{3D}{\isacharequal}}\ Trie\ {\isaliteral{28}{\isacharparenleft}}Some\ v{\isaliteral{29}{\isacharparenright}}\ {\isaliteral{28}{\isacharparenleft}}alist\ t{\isaliteral{29}{\isacharparenright}}{\isaliteral{22}{\isachardoublequoteclose}}\ {\isaliteral{7C}{\isacharbar}}\isanewline +{\isaliteral{22}{\isachardoublequoteopen}}update\ t\ {\isaliteral{28}{\isacharparenleft}}a{\isaliteral{23}{\isacharhash}}as{\isaliteral{29}{\isacharparenright}}\ v\ {\isaliteral{3D}{\isacharequal}}\isanewline +\ \ \ {\isaliteral{28}{\isacharparenleft}}let\ tt\ {\isaliteral{3D}{\isacharequal}}\ {\isaliteral{28}{\isacharparenleft}}case\ assoc\ {\isaliteral{28}{\isacharparenleft}}alist\ t{\isaliteral{29}{\isacharparenright}}\ a\ of\isanewline +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ None\ {\isaliteral{5C3C52696768746172726F773E}{\isasymRightarrow}}\ Trie\ None\ {\isaliteral{5B}{\isacharbrackleft}}{\isaliteral{5D}{\isacharbrackright}}\ {\isaliteral{7C}{\isacharbar}}\ Some\ at\ {\isaliteral{5C3C52696768746172726F773E}{\isasymRightarrow}}\ at{\isaliteral{29}{\isacharparenright}}\isanewline +\ \ \ \ in\ Trie\ {\isaliteral{28}{\isacharparenleft}}value\ t{\isaliteral{29}{\isacharparenright}}\ {\isaliteral{28}{\isacharparenleft}}{\isaliteral{28}{\isacharparenleft}}a{\isaliteral{2C}{\isacharcomma}}update\ tt\ as\ v{\isaliteral{29}{\isacharparenright}}\ {\isaliteral{23}{\isacharhash}}\ alist\ t{\isaliteral{29}{\isacharparenright}}{\isaliteral{29}{\isacharparenright}}{\isaliteral{22}{\isachardoublequoteclose}}% +\begin{isamarkuptext}% +\noindent +The base case is obvious. In the recursive case the subtrie +\isa{tt} associated with the first letter \isa{a} is extracted, +recursively updated, and then placed in front of the association list. +The old subtrie associated with \isa{a} is still in the association list +but no longer accessible via \isa{assoc}. Clearly, there is room here for +optimizations! + +Before we start on any proofs about \isa{update} we tell the simplifier to +expand all \isa{let}s and to split all \isa{case}-constructs over +options:% +\end{isamarkuptext}% +\isamarkuptrue% +\isacommand{declare}\isamarkupfalse% +\ Let{\isaliteral{5F}{\isacharunderscore}}def{\isaliteral{5B}{\isacharbrackleft}}simp{\isaliteral{5D}{\isacharbrackright}}\ option{\isaliteral{2E}{\isachardot}}split{\isaliteral{5B}{\isacharbrackleft}}split{\isaliteral{5D}{\isacharbrackright}}% +\begin{isamarkuptext}% +\noindent +The reason becomes clear when looking (probably after a failed proof +attempt) at the body of \isa{update}: it contains both +\isa{let} and a case distinction over type \isa{option}. + +Our main goal is to prove the correct interaction of \isa{update} and +\isa{lookup}:% +\end{isamarkuptext}% +\isamarkuptrue% +\isacommand{theorem}\isamarkupfalse% +\ {\isaliteral{22}{\isachardoublequoteopen}}{\isaliteral{5C3C666F72616C6C3E}{\isasymforall}}t\ v\ bs{\isaliteral{2E}{\isachardot}}\ lookup\ {\isaliteral{28}{\isacharparenleft}}update\ t\ as\ v{\isaliteral{29}{\isacharparenright}}\ bs\ {\isaliteral{3D}{\isacharequal}}\isanewline +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ {\isaliteral{28}{\isacharparenleft}}if\ as{\isaliteral{3D}{\isacharequal}}bs\ then\ Some\ v\ else\ lookup\ t\ bs{\isaliteral{29}{\isacharparenright}}{\isaliteral{22}{\isachardoublequoteclose}}% +\isadelimproof +% +\endisadelimproof +% +\isatagproof +% +\begin{isamarkuptxt}% +\noindent +Our plan is to induct on \isa{as}; hence the remaining variables are +quantified. From the definitions it is clear that induction on either +\isa{as} or \isa{bs} is required. The choice of \isa{as} is +guided by the intuition that simplification of \isa{lookup} might be easier +if \isa{update} has already been simplified, which can only happen if +\isa{as} is instantiated. +The start of the proof is conventional:% +\end{isamarkuptxt}% +\isamarkuptrue% +\isacommand{apply}\isamarkupfalse% +{\isaliteral{28}{\isacharparenleft}}induct{\isaliteral{5F}{\isacharunderscore}}tac\ as{\isaliteral{2C}{\isacharcomma}}\ auto{\isaliteral{29}{\isacharparenright}}% +\begin{isamarkuptxt}% +\noindent +Unfortunately, this time we are left with three intimidating looking subgoals: +\begin{isabelle} +~1.~\dots~{\isasymLongrightarrow}~lookup~\dots~bs~=~lookup~t~bs\isanewline +~2.~\dots~{\isasymLongrightarrow}~lookup~\dots~bs~=~lookup~t~bs\isanewline +~3.~\dots~{\isasymLongrightarrow}~lookup~\dots~bs~=~lookup~t~bs +\end{isabelle} +Clearly, if we want to make headway we have to instantiate \isa{bs} as +well now. It turns out that instead of induction, case distinction +suffices:% +\end{isamarkuptxt}% +\isamarkuptrue% +\isacommand{apply}\isamarkupfalse% +{\isaliteral{28}{\isacharparenleft}}case{\isaliteral{5F}{\isacharunderscore}}tac{\isaliteral{5B}{\isacharbrackleft}}{\isaliteral{21}{\isacharbang}}{\isaliteral{5D}{\isacharbrackright}}\ bs{\isaliteral{2C}{\isacharcomma}}\ auto{\isaliteral{29}{\isacharparenright}}\isanewline +\isacommand{done}\isamarkupfalse% +% +\endisatagproof +{\isafoldproof}% +% +\isadelimproof +% +\endisadelimproof +% +\begin{isamarkuptext}% +\noindent +\index{subgoal numbering}% +All methods ending in \isa{tac} take an optional first argument that +specifies the range of subgoals they are applied to, where \isa{{\isaliteral{5B}{\isacharbrackleft}}{\isaliteral{21}{\isacharbang}}{\isaliteral{5D}{\isacharbrackright}}} means +all subgoals, i.e.\ \isa{{\isaliteral{5B}{\isacharbrackleft}}{\isadigit{1}}{\isaliteral{2D}{\isacharminus}}{\isadigit{3}}{\isaliteral{5D}{\isacharbrackright}}} in our case. Individual subgoal numbers, +e.g. \isa{{\isaliteral{5B}{\isacharbrackleft}}{\isadigit{2}}{\isaliteral{5D}{\isacharbrackright}}} are also allowed. + +This proof may look surprisingly straightforward. However, note that this +comes at a cost: the proof script is unreadable because the intermediate +proof states are invisible, and we rely on the (possibly brittle) magic of +\isa{auto} (\isa{simp{\isaliteral{5F}{\isacharunderscore}}all} will not do --- try it) to split the subgoals +of the induction up in such a way that case distinction on \isa{bs} makes +sense and solves the proof. + +\begin{exercise} + Modify \isa{update} (and its type) such that it allows both insertion and + deletion of entries with a single function. Prove the corresponding version + of the main theorem above. + Optimize your function such that it shrinks tries after + deletion if possible. +\end{exercise} + +\begin{exercise} + Write an improved version of \isa{update} that does not suffer from the + space leak (pointed out above) caused by not deleting overwritten entries + from the association list. Prove the main theorem for your improved + \isa{update}. +\end{exercise} + +\begin{exercise} + Conceptually, each node contains a mapping from letters to optional + subtries. Above we have implemented this by means of an association + list. Replay the development replacing \isa{{\isaliteral{28}{\isacharparenleft}}{\isaliteral{27}{\isacharprime}}a\ {\isaliteral{5C3C74696D65733E}{\isasymtimes}}\ {\isaliteral{28}{\isacharparenleft}}{\isaliteral{27}{\isacharprime}}a{\isaliteral{2C}{\isacharcomma}}\ {\isaliteral{27}{\isacharprime}}v{\isaliteral{29}{\isacharparenright}}\ trie{\isaliteral{29}{\isacharparenright}}\ list} + with \isa{{\isaliteral{27}{\isacharprime}}a\ {\isaliteral{5C3C52696768746172726F773E}{\isasymRightarrow}}\ {\isaliteral{28}{\isacharparenleft}}{\isaliteral{27}{\isacharprime}}a{\isaliteral{2C}{\isacharcomma}}\ {\isaliteral{27}{\isacharprime}}v{\isaliteral{29}{\isacharparenright}}\ trie\ option}. +\end{exercise}% +\end{isamarkuptext}% +\isamarkuptrue% +% +\isadelimproof +% +\endisadelimproof +% +\isatagproof +% +\endisatagproof +{\isafoldproof}% +% +\isadelimproof +% +\endisadelimproof +% +\isadelimproof +% +\endisadelimproof +% +\isatagproof +% +\endisatagproof +{\isafoldproof}% +% +\isadelimproof +% +\endisadelimproof +% +\isadelimproof +% +\endisadelimproof +% +\isatagproof +% +\endisatagproof +{\isafoldproof}% +% +\isadelimproof +% +\endisadelimproof +% +\isadelimproof +% +\endisadelimproof +% +\isatagproof +% +\endisatagproof +{\isafoldproof}% +% +\isadelimproof +% +\endisadelimproof +% +\isadelimproof +% +\endisadelimproof +% +\isatagproof +% +\endisatagproof +{\isafoldproof}% +% +\isadelimproof +% +\endisadelimproof +% +\isadelimtheory +% +\endisadelimtheory +% +\isatagtheory +% +\endisatagtheory +{\isafoldtheory}% +% +\isadelimtheory +% +\endisadelimtheory +\end{isabellebody}% +%%% Local Variables: +%%% mode: latex +%%% TeX-master: "root" +%%% End: