70250
|
1 |
(* Author: Tobias Nipkow *)
|
|
2 |
|
|
3 |
section "Binary Tries and Patricia Tries"
|
|
4 |
|
|
5 |
theory Tries_Binary
|
|
6 |
imports Set_Specs
|
|
7 |
begin
|
|
8 |
|
|
9 |
hide_const (open) insert
|
|
10 |
|
|
11 |
declare Let_def[simp]
|
|
12 |
|
|
13 |
fun sel2 :: "bool \<Rightarrow> 'a * 'a \<Rightarrow> 'a" where
|
|
14 |
"sel2 b (a1,a2) = (if b then a2 else a1)"
|
|
15 |
|
|
16 |
fun mod2 :: "('a \<Rightarrow> 'a) \<Rightarrow> bool \<Rightarrow> 'a * 'a \<Rightarrow> 'a * 'a" where
|
|
17 |
"mod2 f b (a1,a2) = (if b then (a1,f a2) else (f a1,a2))"
|
|
18 |
|
|
19 |
|
|
20 |
subsection "Trie"
|
|
21 |
|
|
22 |
datatype trie = Lf | Nd bool "trie * trie"
|
|
23 |
|
70267
|
24 |
definition empty :: trie where
|
|
25 |
[simp]: "empty = Lf"
|
|
26 |
|
70250
|
27 |
fun isin :: "trie \<Rightarrow> bool list \<Rightarrow> bool" where
|
|
28 |
"isin Lf ks = False" |
|
|
29 |
"isin (Nd b lr) ks =
|
|
30 |
(case ks of
|
|
31 |
[] \<Rightarrow> b |
|
|
32 |
k#ks \<Rightarrow> isin (sel2 k lr) ks)"
|
|
33 |
|
|
34 |
fun insert :: "bool list \<Rightarrow> trie \<Rightarrow> trie" where
|
|
35 |
"insert [] Lf = Nd True (Lf,Lf)" |
|
|
36 |
"insert [] (Nd b lr) = Nd True lr" |
|
|
37 |
"insert (k#ks) Lf = Nd False (mod2 (insert ks) k (Lf,Lf))" |
|
|
38 |
"insert (k#ks) (Nd b lr) = Nd b (mod2 (insert ks) k lr)"
|
|
39 |
|
70267
|
40 |
lemma isin_insert: "isin (insert xs t) ys = (xs = ys \<or> isin t ys)"
|
|
41 |
apply(induction xs t arbitrary: ys rule: insert.induct)
|
70250
|
42 |
apply (auto split: list.splits if_splits)
|
|
43 |
done
|
|
44 |
|
|
45 |
text \<open>A simple implementation of delete; does not shrink the trie!\<close>
|
|
46 |
|
|
47 |
fun delete0 :: "bool list \<Rightarrow> trie \<Rightarrow> trie" where
|
|
48 |
"delete0 ks Lf = Lf" |
|
|
49 |
"delete0 ks (Nd b lr) =
|
|
50 |
(case ks of
|
|
51 |
[] \<Rightarrow> Nd False lr |
|
|
52 |
k#ks' \<Rightarrow> Nd b (mod2 (delete0 ks') k lr))"
|
|
53 |
|
|
54 |
lemma isin_delete0: "isin (delete0 as t) bs = (as \<noteq> bs \<and> isin t bs)"
|
|
55 |
apply(induction as t arbitrary: bs rule: delete0.induct)
|
|
56 |
apply (auto split: list.splits if_splits)
|
|
57 |
done
|
|
58 |
|
|
59 |
text \<open>Now deletion with shrinking:\<close>
|
|
60 |
|
|
61 |
fun node :: "bool \<Rightarrow> trie * trie \<Rightarrow> trie" where
|
|
62 |
"node b lr = (if \<not> b \<and> lr = (Lf,Lf) then Lf else Nd b lr)"
|
|
63 |
|
|
64 |
fun delete :: "bool list \<Rightarrow> trie \<Rightarrow> trie" where
|
|
65 |
"delete ks Lf = Lf" |
|
|
66 |
"delete ks (Nd b lr) =
|
|
67 |
(case ks of
|
|
68 |
[] \<Rightarrow> node False lr |
|
|
69 |
k#ks' \<Rightarrow> node b (mod2 (delete ks') k lr))"
|
|
70 |
|
70267
|
71 |
lemma isin_delete: "isin (delete xs t) ys = (xs \<noteq> ys \<and> isin t ys)"
|
|
72 |
apply(induction xs t arbitrary: ys rule: delete.induct)
|
70250
|
73 |
apply simp
|
|
74 |
apply (auto split: list.splits if_splits)
|
|
75 |
apply (metis isin.simps(1))
|
|
76 |
apply (metis isin.simps(1))
|
|
77 |
done
|
|
78 |
|
|
79 |
definition set_trie :: "trie \<Rightarrow> bool list set" where
|
|
80 |
"set_trie t = {xs. isin t xs}"
|
|
81 |
|
70267
|
82 |
lemma set_trie_empty: "set_trie empty = {}"
|
|
83 |
by(simp add: set_trie_def)
|
|
84 |
|
|
85 |
lemma set_trie_isin: "isin t xs = (xs \<in> set_trie t)"
|
|
86 |
by(simp add: set_trie_def)
|
|
87 |
|
70250
|
88 |
lemma set_trie_insert: "set_trie(insert xs t) = set_trie t \<union> {xs}"
|
|
89 |
by(auto simp add: isin_insert set_trie_def)
|
|
90 |
|
70267
|
91 |
lemma set_trie_delete: "set_trie(delete xs t) = set_trie t - {xs}"
|
|
92 |
by(auto simp add: isin_delete set_trie_def)
|
|
93 |
|
77830
|
94 |
text \<open>Invariant: tries are fully shrunk:\<close>
|
|
95 |
fun invar where
|
|
96 |
"invar Lf = True" |
|
|
97 |
"invar (Nd b (l,r)) = (invar l \<and> invar r \<and> (l = Lf \<and> r = Lf \<longrightarrow> b))"
|
|
98 |
|
|
99 |
lemma insert_Lf: "insert xs t \<noteq> Lf"
|
|
100 |
using insert.elims by blast
|
|
101 |
|
|
102 |
lemma invar_insert: "invar t \<Longrightarrow> invar(insert xs t)"
|
|
103 |
proof(induction xs t rule: insert.induct)
|
|
104 |
case 1 thus ?case by simp
|
|
105 |
next
|
|
106 |
case (2 b lr)
|
|
107 |
thus ?case by(cases lr; simp)
|
|
108 |
next
|
|
109 |
case (3 k ks)
|
|
110 |
thus ?case by(simp; cases ks; auto)
|
|
111 |
next
|
|
112 |
case (4 k ks b lr)
|
|
113 |
then show ?case by(cases lr; auto simp: insert_Lf)
|
|
114 |
qed
|
|
115 |
|
|
116 |
lemma invar_delete: "invar t \<Longrightarrow> invar(delete xs t)"
|
|
117 |
proof(induction t arbitrary: xs)
|
|
118 |
case Lf thus ?case by simp
|
|
119 |
next
|
|
120 |
case (Nd b lr)
|
|
121 |
thus ?case by(cases lr)(auto split: list.split)
|
|
122 |
qed
|
|
123 |
|
70250
|
124 |
interpretation S: Set
|
70267
|
125 |
where empty = empty and isin = isin and insert = insert and delete = delete
|
77830
|
126 |
and set = set_trie and invar = invar
|
70250
|
127 |
proof (standard, goal_cases)
|
70267
|
128 |
case 1 show ?case by (rule set_trie_empty)
|
70250
|
129 |
next
|
70267
|
130 |
case 2 show ?case by(rule set_trie_isin)
|
70250
|
131 |
next
|
|
132 |
case 3 thus ?case by(auto simp: set_trie_insert)
|
|
133 |
next
|
70267
|
134 |
case 4 show ?case by(rule set_trie_delete)
|
77830
|
135 |
next
|
|
136 |
case 5 show ?case by(simp)
|
|
137 |
next
|
|
138 |
case 6 thus ?case by(rule invar_insert)
|
|
139 |
next
|
|
140 |
case 7 thus ?case by(rule invar_delete)
|
|
141 |
qed
|
70250
|
142 |
|
|
143 |
|
|
144 |
subsection "Patricia Trie"
|
|
145 |
|
70268
|
146 |
datatype trieP = LfP | NdP "bool list" bool "trieP * trieP"
|
70250
|
147 |
|
77830
|
148 |
text \<open>Fully shrunk:\<close>
|
|
149 |
fun invarP where
|
|
150 |
"invarP LfP = True" |
|
|
151 |
"invarP (NdP ps b (l,r)) = (invarP l \<and> invarP r \<and> (l = LfP \<or> r = LfP \<longrightarrow> b))"
|
|
152 |
|
70268
|
153 |
fun isinP :: "trieP \<Rightarrow> bool list \<Rightarrow> bool" where
|
70250
|
154 |
"isinP LfP ks = False" |
|
|
155 |
"isinP (NdP ps b lr) ks =
|
|
156 |
(let n = length ps in
|
|
157 |
if ps = take n ks
|
|
158 |
then case drop n ks of [] \<Rightarrow> b | k#ks' \<Rightarrow> isinP (sel2 k lr) ks'
|
|
159 |
else False)"
|
|
160 |
|
70268
|
161 |
definition emptyP :: trieP where
|
|
162 |
[simp]: "emptyP = LfP"
|
|
163 |
|
77830
|
164 |
fun lcp :: "'a list \<Rightarrow> 'a list \<Rightarrow> 'a list \<times> 'a list \<times> 'a list" where
|
|
165 |
"lcp [] ys = ([],[],ys)" |
|
|
166 |
"lcp xs [] = ([],xs,[])" |
|
|
167 |
"lcp (x#xs) (y#ys) =
|
70250
|
168 |
(if x\<noteq>y then ([],x#xs,y#ys)
|
77830
|
169 |
else let (ps,xs',ys') = lcp xs ys in (x#ps,xs',ys'))"
|
70250
|
170 |
|
|
171 |
|
|
172 |
lemma mod2_cong[fundef_cong]:
|
|
173 |
"\<lbrakk> lr = lr'; k = k'; \<And>a b. lr'=(a,b) \<Longrightarrow> f (a) = f' (a) ; \<And>a b. lr'=(a,b) \<Longrightarrow> f (b) = f' (b) \<rbrakk>
|
|
174 |
\<Longrightarrow> mod2 f k lr= mod2 f' k' lr'"
|
|
175 |
by(cases lr, cases lr', auto)
|
|
176 |
|
70268
|
177 |
|
|
178 |
fun insertP :: "bool list \<Rightarrow> trieP \<Rightarrow> trieP" where
|
70250
|
179 |
"insertP ks LfP = NdP ks True (LfP,LfP)" |
|
|
180 |
"insertP ks (NdP ps b lr) =
|
77830
|
181 |
(case lcp ks ps of
|
77767
|
182 |
(qs, k#ks', p#ps') \<Rightarrow>
|
70250
|
183 |
let tp = NdP ps' b lr; tk = NdP ks' True (LfP,LfP) in
|
|
184 |
NdP qs False (if k then (tp,tk) else (tk,tp)) |
|
77767
|
185 |
(qs, k#ks', []) \<Rightarrow>
|
70250
|
186 |
NdP ps b (mod2 (insertP ks') k lr) |
|
77767
|
187 |
(qs, [], p#ps') \<Rightarrow>
|
70250
|
188 |
let t = NdP ps' b lr in
|
|
189 |
NdP qs True (if p then (LfP,t) else (t,LfP)) |
|
|
190 |
(qs,[],[]) \<Rightarrow> NdP ps True lr)"
|
|
191 |
|
|
192 |
|
77830
|
193 |
text \<open>Smart constructor that shrinks:\<close>
|
|
194 |
definition nodeP :: "bool list \<Rightarrow> bool \<Rightarrow> trieP * trieP \<Rightarrow> trieP" where
|
|
195 |
"nodeP ps b lr =
|
|
196 |
(if b then NdP ps b lr
|
|
197 |
else case lr of
|
|
198 |
(LfP,LfP) \<Rightarrow> LfP |
|
|
199 |
(LfP, NdP ks b lr) \<Rightarrow> NdP (ps @ True # ks) b lr |
|
|
200 |
(NdP ks b lr, LfP) \<Rightarrow> NdP (ps @ False # ks) b lr |
|
|
201 |
_ \<Rightarrow> NdP ps b lr)"
|
70250
|
202 |
|
70268
|
203 |
fun deleteP :: "bool list \<Rightarrow> trieP \<Rightarrow> trieP" where
|
70250
|
204 |
"deleteP ks LfP = LfP" |
|
|
205 |
"deleteP ks (NdP ps b lr) =
|
77830
|
206 |
(case lcp ks ps of
|
77767
|
207 |
(_, _, _#_) \<Rightarrow> NdP ps b lr |
|
|
208 |
(_, k#ks', []) \<Rightarrow> nodeP ps b (mod2 (deleteP ks') k lr) |
|
|
209 |
(_, [], []) \<Rightarrow> nodeP ps False lr)"
|
70250
|
210 |
|
|
211 |
|
77830
|
212 |
|
70250
|
213 |
subsubsection \<open>Functional Correctness\<close>
|
|
214 |
|
70268
|
215 |
text \<open>First step: @{typ trieP} implements @{typ trie} via the abstraction function \<open>abs_trieP\<close>:\<close>
|
70250
|
216 |
|
|
217 |
fun prefix_trie :: "bool list \<Rightarrow> trie \<Rightarrow> trie" where
|
|
218 |
"prefix_trie [] t = t" |
|
|
219 |
"prefix_trie (k#ks) t =
|
|
220 |
(let t' = prefix_trie ks t in Nd False (if k then (Lf,t') else (t',Lf)))"
|
|
221 |
|
70268
|
222 |
fun abs_trieP :: "trieP \<Rightarrow> trie" where
|
|
223 |
"abs_trieP LfP = Lf" |
|
|
224 |
"abs_trieP (NdP ps b (l,r)) = prefix_trie ps (Nd b (abs_trieP l, abs_trieP r))"
|
70250
|
225 |
|
|
226 |
|
|
227 |
text \<open>Correctness of @{const isinP}:\<close>
|
|
228 |
|
|
229 |
lemma isin_prefix_trie:
|
|
230 |
"isin (prefix_trie ps t) ks
|
|
231 |
= (ps = take (length ps) ks \<and> isin t (drop (length ps) ks))"
|
|
232 |
apply(induction ps arbitrary: ks)
|
|
233 |
apply(auto split: list.split)
|
|
234 |
done
|
|
235 |
|
70269
|
236 |
lemma abs_trieP_isinP:
|
70268
|
237 |
"isinP t ks = isin (abs_trieP t) ks"
|
|
238 |
apply(induction t arbitrary: ks rule: abs_trieP.induct)
|
70250
|
239 |
apply(auto simp: isin_prefix_trie split: list.split)
|
|
240 |
done
|
|
241 |
|
|
242 |
|
|
243 |
text \<open>Correctness of @{const insertP}:\<close>
|
|
244 |
|
|
245 |
lemma prefix_trie_Lfs: "prefix_trie ks (Nd True (Lf,Lf)) = insert ks Lf"
|
|
246 |
apply(induction ks)
|
|
247 |
apply auto
|
|
248 |
done
|
|
249 |
|
|
250 |
lemma insert_prefix_trie_same:
|
|
251 |
"insert ps (prefix_trie ps (Nd b lr)) = prefix_trie ps (Nd True lr)"
|
|
252 |
apply(induction ps)
|
|
253 |
apply auto
|
|
254 |
done
|
|
255 |
|
|
256 |
lemma insert_append: "insert (ks @ ks') (prefix_trie ks t) = prefix_trie ks (insert ks' t)"
|
|
257 |
apply(induction ks)
|
|
258 |
apply auto
|
|
259 |
done
|
|
260 |
|
|
261 |
lemma prefix_trie_append: "prefix_trie (ps @ qs) t = prefix_trie ps (prefix_trie qs t)"
|
|
262 |
apply(induction ps)
|
|
263 |
apply auto
|
|
264 |
done
|
|
265 |
|
77830
|
266 |
lemma lcp_if: "lcp ks ps = (qs, ks', ps') \<Longrightarrow>
|
70250
|
267 |
ks = qs @ ks' \<and> ps = qs @ ps' \<and> (ks' \<noteq> [] \<and> ps' \<noteq> [] \<longrightarrow> hd ks' \<noteq> hd ps')"
|
77830
|
268 |
apply(induction ks ps arbitrary: qs ks' ps' rule: lcp.induct)
|
70250
|
269 |
apply(auto split: prod.splits if_splits)
|
|
270 |
done
|
|
271 |
|
70268
|
272 |
lemma abs_trieP_insertP:
|
|
273 |
"abs_trieP (insertP ks t) = insert ks (abs_trieP t)"
|
70250
|
274 |
apply(induction t arbitrary: ks)
|
|
275 |
apply(auto simp: prefix_trie_Lfs insert_prefix_trie_same insert_append prefix_trie_append
|
77830
|
276 |
dest!: lcp_if split: list.split prod.split if_splits)
|
70250
|
277 |
done
|
|
278 |
|
|
279 |
|
|
280 |
text \<open>Correctness of @{const deleteP}:\<close>
|
|
281 |
|
|
282 |
lemma prefix_trie_Lf: "prefix_trie xs t = Lf \<longleftrightarrow> xs = [] \<and> t = Lf"
|
|
283 |
by(cases xs)(auto)
|
|
284 |
|
70268
|
285 |
lemma abs_trieP_Lf: "abs_trieP t = Lf \<longleftrightarrow> t = LfP"
|
70250
|
286 |
by(cases t) (auto simp: prefix_trie_Lf)
|
|
287 |
|
|
288 |
lemma delete_prefix_trie:
|
|
289 |
"delete xs (prefix_trie xs (Nd b (l,r)))
|
|
290 |
= (if (l,r) = (Lf,Lf) then Lf else prefix_trie xs (Nd False (l,r)))"
|
|
291 |
by(induction xs)(auto simp: prefix_trie_Lf)
|
|
292 |
|
|
293 |
lemma delete_append_prefix_trie:
|
|
294 |
"delete (xs @ ys) (prefix_trie xs t)
|
|
295 |
= (if delete ys t = Lf then Lf else prefix_trie xs (delete ys t))"
|
|
296 |
by(induction xs)(auto simp: prefix_trie_Lf)
|
|
297 |
|
77830
|
298 |
lemma nodeP_LfP2: "nodeP xs False (LfP, LfP) = LfP"
|
|
299 |
by(simp add: nodeP_def)
|
|
300 |
|
|
301 |
text \<open>Some non-inductive aux. lemmas:\<close>
|
|
302 |
|
|
303 |
lemma abs_trieP_nodeP: "a\<noteq>LfP \<or> b \<noteq> LfP \<Longrightarrow>
|
|
304 |
abs_trieP (nodeP xs f (a, b)) = prefix_trie xs (Nd f (abs_trieP a, abs_trieP b))"
|
|
305 |
by(auto simp add: nodeP_def prefix_trie_append split: trieP.split)
|
|
306 |
|
|
307 |
lemma nodeP_True: "nodeP ps True lr = NdP ps True lr"
|
|
308 |
by(simp add: nodeP_def)
|
|
309 |
|
70268
|
310 |
lemma delete_abs_trieP:
|
|
311 |
"delete ks (abs_trieP t) = abs_trieP (deleteP ks t)"
|
70250
|
312 |
apply(induction t arbitrary: ks)
|
|
313 |
apply(auto simp: delete_prefix_trie delete_append_prefix_trie
|
77830
|
314 |
prefix_trie_append prefix_trie_Lf abs_trieP_Lf nodeP_LfP2 abs_trieP_nodeP nodeP_True
|
|
315 |
dest!: lcp_if split: if_splits list.split prod.split)
|
70250
|
316 |
done
|
|
317 |
|
77830
|
318 |
text \<open>Invariant preservation:\<close>
|
|
319 |
|
|
320 |
lemma insertP_LfP: "insertP xs t \<noteq> LfP"
|
|
321 |
by(cases t)(auto split: prod.split list.split)
|
|
322 |
|
|
323 |
lemma invarP_insertP: "invarP t \<Longrightarrow> invarP(insertP xs t)"
|
|
324 |
proof(induction t arbitrary: xs)
|
|
325 |
case LfP thus ?case by simp
|
|
326 |
next
|
|
327 |
case (NdP bs b lr)
|
|
328 |
then show ?case
|
|
329 |
by(cases lr)(auto simp: insertP_LfP split: prod.split list.split)
|
|
330 |
qed
|
|
331 |
|
|
332 |
(* Inlining this proof leads to nontermination *)
|
|
333 |
lemma invarP_nodeP: "\<lbrakk> invarP t1; invarP t2\<rbrakk> \<Longrightarrow> invarP (nodeP xs b (t1, t2))"
|
|
334 |
by (auto simp add: nodeP_def split: trieP.split)
|
|
335 |
|
|
336 |
lemma invarP_deleteP: "invarP t \<Longrightarrow> invarP(deleteP xs t)"
|
|
337 |
proof(induction t arbitrary: xs)
|
|
338 |
case LfP thus ?case by simp
|
|
339 |
next
|
|
340 |
case (NdP ks b lr)
|
|
341 |
thus ?case by(cases lr)(auto simp: invarP_nodeP split: prod.split list.split)
|
|
342 |
qed
|
|
343 |
|
70250
|
344 |
|
|
345 |
text \<open>The overall correctness proof. Simply composes correctness lemmas.\<close>
|
|
346 |
|
70268
|
347 |
definition set_trieP :: "trieP \<Rightarrow> bool list set" where
|
|
348 |
"set_trieP = set_trie o abs_trieP"
|
70250
|
349 |
|
70269
|
350 |
lemma isinP_set_trieP: "isinP t xs = (xs \<in> set_trieP t)"
|
|
351 |
by(simp add: abs_trieP_isinP set_trie_isin set_trieP_def)
|
|
352 |
|
70268
|
353 |
lemma set_trieP_insertP: "set_trieP (insertP xs t) = set_trieP t \<union> {xs}"
|
|
354 |
by(simp add: abs_trieP_insertP set_trie_insert set_trieP_def)
|
70250
|
355 |
|
70269
|
356 |
lemma set_trieP_deleteP: "set_trieP (deleteP xs t) = set_trieP t - {xs}"
|
|
357 |
by(auto simp: set_trie_delete set_trieP_def simp flip: delete_abs_trieP)
|
|
358 |
|
70250
|
359 |
interpretation SP: Set
|
70268
|
360 |
where empty = emptyP and isin = isinP and insert = insertP and delete = deleteP
|
77830
|
361 |
and set = set_trieP and invar = invarP
|
70250
|
362 |
proof (standard, goal_cases)
|
70268
|
363 |
case 1 show ?case by (simp add: set_trieP_def set_trie_def)
|
70250
|
364 |
next
|
70269
|
365 |
case 2 show ?case by(rule isinP_set_trieP)
|
70250
|
366 |
next
|
70268
|
367 |
case 3 thus ?case by (auto simp: set_trieP_insertP)
|
70250
|
368 |
next
|
70269
|
369 |
case 4 thus ?case by(auto simp: set_trieP_deleteP)
|
77830
|
370 |
next
|
|
371 |
case 5 thus ?case by(simp)
|
|
372 |
next
|
|
373 |
case 6 thus ?case by(rule invarP_insertP)
|
|
374 |
next
|
|
375 |
case 7 thus ?case by(rule invarP_deleteP)
|
|
376 |
qed
|
70250
|
377 |
|
|
378 |
end
|