71810
|
1 |
(* Author: Tobias Nipkow *)
|
71801
|
2 |
|
|
3 |
section "Height-Balanced Trees"
|
|
4 |
|
|
5 |
theory Height_Balanced_Tree
|
|
6 |
imports
|
|
7 |
Cmp
|
|
8 |
Isin2
|
|
9 |
begin
|
|
10 |
|
|
11 |
text \<open>Height-balanced trees (HBTs) can be seen as a generalization of AVL trees.
|
|
12 |
The code and the proofs were obtained by small modifications of the AVL theories.
|
|
13 |
This is an implementation of sets via HBTs.\<close>
|
|
14 |
|
71818
|
15 |
type_synonym 'a tree_ht = "('a*nat) tree"
|
71801
|
16 |
|
71818
|
17 |
definition empty :: "'a tree_ht" where
|
71801
|
18 |
"empty = Leaf"
|
|
19 |
|
71812
|
20 |
text \<open>The maximal amount by which the height of two siblings may differ:\<close>
|
71801
|
21 |
|
71812
|
22 |
locale HBT =
|
|
23 |
fixes m :: nat
|
|
24 |
assumes [arith]: "m > 0"
|
|
25 |
begin
|
71801
|
26 |
|
|
27 |
text \<open>Invariant:\<close>
|
|
28 |
|
71818
|
29 |
fun hbt :: "'a tree_ht \<Rightarrow> bool" where
|
71801
|
30 |
"hbt Leaf = True" |
|
|
31 |
"hbt (Node l (a,n) r) =
|
71812
|
32 |
(abs(int(height l) - int(height r)) \<le> int(m) \<and>
|
71801
|
33 |
n = max (height l) (height r) + 1 \<and> hbt l \<and> hbt r)"
|
|
34 |
|
71818
|
35 |
fun ht :: "'a tree_ht \<Rightarrow> nat" where
|
71801
|
36 |
"ht Leaf = 0" |
|
|
37 |
"ht (Node l (a,n) r) = n"
|
|
38 |
|
71818
|
39 |
definition node :: "'a tree_ht \<Rightarrow> 'a \<Rightarrow> 'a tree_ht \<Rightarrow> 'a tree_ht" where
|
71801
|
40 |
"node l a r = Node l (a, max (ht l) (ht r) + 1) r"
|
|
41 |
|
71818
|
42 |
definition balL :: "'a tree_ht \<Rightarrow> 'a \<Rightarrow> 'a tree_ht \<Rightarrow> 'a tree_ht" where
|
71801
|
43 |
"balL AB b C =
|
71812
|
44 |
(if ht AB = ht C + m + 1 then
|
71801
|
45 |
case AB of
|
|
46 |
Node A (a, _) B \<Rightarrow>
|
|
47 |
if ht A \<ge> ht B then node A a (node B b C)
|
|
48 |
else
|
|
49 |
case B of
|
|
50 |
Node B\<^sub>1 (ab, _) B\<^sub>2 \<Rightarrow> node (node A a B\<^sub>1) ab (node B\<^sub>2 b C)
|
|
51 |
else node AB b C)"
|
|
52 |
|
71818
|
53 |
definition balR :: "'a tree_ht \<Rightarrow> 'a \<Rightarrow> 'a tree_ht \<Rightarrow> 'a tree_ht" where
|
71801
|
54 |
"balR A a BC =
|
71812
|
55 |
(if ht BC = ht A + m + 1 then
|
71801
|
56 |
case BC of
|
|
57 |
Node B (b, _) C \<Rightarrow>
|
|
58 |
if ht B \<le> ht C then node (node A a B) b C
|
|
59 |
else
|
|
60 |
case B of
|
|
61 |
Node B\<^sub>1 (ab, _) B\<^sub>2 \<Rightarrow> node (node A a B\<^sub>1) ab (node B\<^sub>2 b C)
|
|
62 |
else node A a BC)"
|
|
63 |
|
71818
|
64 |
fun insert :: "'a::linorder \<Rightarrow> 'a tree_ht \<Rightarrow> 'a tree_ht" where
|
71801
|
65 |
"insert x Leaf = Node Leaf (x, 1) Leaf" |
|
|
66 |
"insert x (Node l (a, n) r) = (case cmp x a of
|
|
67 |
EQ \<Rightarrow> Node l (a, n) r |
|
|
68 |
LT \<Rightarrow> balL (insert x l) a r |
|
|
69 |
GT \<Rightarrow> balR l a (insert x r))"
|
|
70 |
|
71818
|
71 |
fun split_max :: "'a tree_ht \<Rightarrow> 'a tree_ht * 'a" where
|
71801
|
72 |
"split_max (Node l (a, _) r) =
|
|
73 |
(if r = Leaf then (l,a) else let (r',a') = split_max r in (balL l a r', a'))"
|
|
74 |
|
|
75 |
lemmas split_max_induct = split_max.induct[case_names Node Leaf]
|
|
76 |
|
71818
|
77 |
fun delete :: "'a::linorder \<Rightarrow> 'a tree_ht \<Rightarrow> 'a tree_ht" where
|
71801
|
78 |
"delete _ Leaf = Leaf" |
|
|
79 |
"delete x (Node l (a, n) r) =
|
|
80 |
(case cmp x a of
|
|
81 |
EQ \<Rightarrow> if l = Leaf then r
|
|
82 |
else let (l', a') = split_max l in balR l' a' r |
|
|
83 |
LT \<Rightarrow> balR (delete x l) a r |
|
|
84 |
GT \<Rightarrow> balL l a (delete x r))"
|
|
85 |
|
|
86 |
|
|
87 |
subsection \<open>Functional Correctness Proofs\<close>
|
|
88 |
|
|
89 |
|
|
90 |
subsubsection "Proofs for insert"
|
|
91 |
|
|
92 |
lemma inorder_balL:
|
|
93 |
"inorder (balL l a r) = inorder l @ a # inorder r"
|
|
94 |
by (auto simp: node_def balL_def split:tree.splits)
|
|
95 |
|
|
96 |
lemma inorder_balR:
|
|
97 |
"inorder (balR l a r) = inorder l @ a # inorder r"
|
|
98 |
by (auto simp: node_def balR_def split:tree.splits)
|
|
99 |
|
|
100 |
theorem inorder_insert:
|
|
101 |
"sorted(inorder t) \<Longrightarrow> inorder(insert x t) = ins_list x (inorder t)"
|
|
102 |
by (induct t)
|
|
103 |
(auto simp: ins_list_simps inorder_balL inorder_balR)
|
|
104 |
|
|
105 |
|
|
106 |
subsubsection "Proofs for delete"
|
|
107 |
|
|
108 |
lemma inorder_split_maxD:
|
|
109 |
"\<lbrakk> split_max t = (t',a); t \<noteq> Leaf \<rbrakk> \<Longrightarrow>
|
|
110 |
inorder t' @ [a] = inorder t"
|
|
111 |
by(induction t arbitrary: t' rule: split_max.induct)
|
|
112 |
(auto simp: inorder_balL split: if_splits prod.splits tree.split)
|
|
113 |
|
|
114 |
theorem inorder_delete:
|
|
115 |
"sorted(inorder t) \<Longrightarrow> inorder (delete x t) = del_list x (inorder t)"
|
|
116 |
by(induction t)
|
|
117 |
(auto simp: del_list_simps inorder_balL inorder_balR inorder_split_maxD split: prod.splits)
|
|
118 |
|
|
119 |
|
|
120 |
subsection \<open>Invariant preservation\<close>
|
|
121 |
|
|
122 |
|
|
123 |
subsubsection \<open>Insertion maintains balance\<close>
|
|
124 |
|
|
125 |
declare Let_def [simp]
|
|
126 |
|
|
127 |
lemma ht_height[simp]: "hbt t \<Longrightarrow> ht t = height t"
|
|
128 |
by (cases t rule: tree2_cases) simp_all
|
|
129 |
|
|
130 |
text \<open>First, a fast but relatively manual proof with many lemmas:\<close>
|
|
131 |
|
|
132 |
lemma height_balL:
|
71812
|
133 |
"\<lbrakk> hbt l; hbt r; height l = height r + m + 1 \<rbrakk> \<Longrightarrow>
|
|
134 |
height (balL l a r) \<in> {height r + m + 1, height r + m + 2}"
|
71818
|
135 |
by (auto simp:node_def balL_def split:tree.split)
|
71801
|
136 |
|
|
137 |
lemma height_balR:
|
71812
|
138 |
"\<lbrakk> hbt l; hbt r; height r = height l + m + 1 \<rbrakk> \<Longrightarrow>
|
|
139 |
height (balR l a r) \<in> {height l + m + 1, height l + m + 2}"
|
71818
|
140 |
by(auto simp add:node_def balR_def split:tree.split)
|
71801
|
141 |
|
|
142 |
lemma height_node[simp]: "height(node l a r) = max (height l) (height r) + 1"
|
|
143 |
by (simp add: node_def)
|
|
144 |
|
|
145 |
lemma height_balL2:
|
71812
|
146 |
"\<lbrakk> hbt l; hbt r; height l \<noteq> height r + m + 1 \<rbrakk> \<Longrightarrow>
|
71801
|
147 |
height (balL l a r) = 1 + max (height l) (height r)"
|
71818
|
148 |
by (simp_all add: balL_def)
|
71801
|
149 |
|
|
150 |
lemma height_balR2:
|
71812
|
151 |
"\<lbrakk> hbt l; hbt r; height r \<noteq> height l + m + 1 \<rbrakk> \<Longrightarrow>
|
71801
|
152 |
height (balR l a r) = 1 + max (height l) (height r)"
|
71818
|
153 |
by (simp_all add: balR_def)
|
71801
|
154 |
|
|
155 |
lemma hbt_balL:
|
71812
|
156 |
"\<lbrakk> hbt l; hbt r; height r - m \<le> height l \<and> height l \<le> height r + m + 1 \<rbrakk> \<Longrightarrow> hbt(balL l a r)"
|
71818
|
157 |
by(auto simp: balL_def node_def max_def split!: if_splits tree.split)
|
71801
|
158 |
|
|
159 |
lemma hbt_balR:
|
71812
|
160 |
"\<lbrakk> hbt l; hbt r; height l - m \<le> height r \<and> height r \<le> height l + m + 1 \<rbrakk> \<Longrightarrow> hbt(balR l a r)"
|
71818
|
161 |
by(auto simp: balR_def node_def max_def split!: if_splits tree.split)
|
71801
|
162 |
|
|
163 |
text\<open>Insertion maintains @{const hbt}. Requires simultaneous proof.\<close>
|
|
164 |
|
|
165 |
theorem hbt_insert:
|
71812
|
166 |
"hbt t \<Longrightarrow> hbt(insert x t)"
|
|
167 |
"hbt t \<Longrightarrow> height (insert x t) \<in> {height t, height t + 1}"
|
71801
|
168 |
proof (induction t rule: tree2_induct)
|
|
169 |
case (Node l a _ r)
|
|
170 |
case 1
|
|
171 |
show ?case
|
|
172 |
proof(cases "x = a")
|
|
173 |
case True with Node 1 show ?thesis by simp
|
|
174 |
next
|
|
175 |
case False
|
|
176 |
show ?thesis
|
|
177 |
proof(cases "x<a")
|
71810
|
178 |
case True with 1 Node(1,2) show ?thesis by (auto intro!: hbt_balL)
|
71801
|
179 |
next
|
71810
|
180 |
case False with 1 Node(3,4) \<open>x\<noteq>a\<close> show ?thesis by (auto intro!: hbt_balR)
|
71801
|
181 |
qed
|
|
182 |
qed
|
|
183 |
case 2
|
|
184 |
show ?case
|
|
185 |
proof(cases "x = a")
|
71810
|
186 |
case True with 2 show ?thesis by simp
|
71801
|
187 |
next
|
|
188 |
case False
|
|
189 |
show ?thesis
|
|
190 |
proof(cases "x<a")
|
|
191 |
case True
|
|
192 |
show ?thesis
|
71812
|
193 |
proof(cases "height (insert x l) = height r + m + 1")
|
71810
|
194 |
case False with 2 Node(1,2) \<open>x < a\<close> show ?thesis by (auto simp: height_balL2)
|
71801
|
195 |
next
|
|
196 |
case True
|
71812
|
197 |
hence "(height (balL (insert x l) a r) = height r + m + 1) \<or>
|
|
198 |
(height (balL (insert x l) a r) = height r + m + 2)" (is "?A \<or> ?B")
|
71810
|
199 |
using 2 Node(1,2) height_balL[OF _ _ True] by simp
|
71801
|
200 |
thus ?thesis
|
|
201 |
proof
|
71810
|
202 |
assume ?A with 2 Node(2) True \<open>x < a\<close> show ?thesis by (auto)
|
71801
|
203 |
next
|
71810
|
204 |
assume ?B with 2 Node(2) True \<open>x < a\<close> show ?thesis by (simp) arith
|
71801
|
205 |
qed
|
|
206 |
qed
|
|
207 |
next
|
|
208 |
case False
|
|
209 |
show ?thesis
|
71812
|
210 |
proof(cases "height (insert x r) = height l + m + 1")
|
71810
|
211 |
case False with 2 Node(3,4) \<open>\<not>x < a\<close> show ?thesis by (auto simp: height_balR2)
|
71801
|
212 |
next
|
|
213 |
case True
|
71812
|
214 |
hence "(height (balR l a (insert x r)) = height l + m + 1) \<or>
|
|
215 |
(height (balR l a (insert x r)) = height l + m + 2)" (is "?A \<or> ?B")
|
71810
|
216 |
using Node 2 height_balR[OF _ _ True] by simp
|
71801
|
217 |
thus ?thesis
|
|
218 |
proof
|
71810
|
219 |
assume ?A with 2 Node(4) True \<open>\<not>x < a\<close> show ?thesis by (auto)
|
71801
|
220 |
next
|
71810
|
221 |
assume ?B with 2 Node(4) True \<open>\<not>x < a\<close> show ?thesis by (simp) arith
|
71801
|
222 |
qed
|
|
223 |
qed
|
|
224 |
qed
|
|
225 |
qed
|
|
226 |
qed simp_all
|
|
227 |
|
|
228 |
text \<open>Now an automatic proof without lemmas:\<close>
|
|
229 |
|
|
230 |
theorem hbt_insert_auto: "hbt t \<Longrightarrow>
|
|
231 |
hbt(insert x t) \<and> height (insert x t) \<in> {height t, height t + 1}"
|
|
232 |
apply (induction t rule: tree2_induct)
|
|
233 |
(* if you want to save a few secs: apply (auto split!: if_split) *)
|
|
234 |
apply (auto simp: balL_def balR_def node_def max_absorb1 max_absorb2 split!: if_split tree.split)
|
|
235 |
done
|
|
236 |
|
|
237 |
|
|
238 |
subsubsection \<open>Deletion maintains balance\<close>
|
|
239 |
|
|
240 |
lemma hbt_split_max:
|
|
241 |
"\<lbrakk> hbt t; t \<noteq> Leaf \<rbrakk> \<Longrightarrow>
|
|
242 |
hbt (fst (split_max t)) \<and>
|
|
243 |
height t \<in> {height(fst (split_max t)), height(fst (split_max t)) + 1}"
|
|
244 |
by(induct t rule: split_max_induct)
|
|
245 |
(auto simp: balL_def node_def max_absorb2 split!: prod.split if_split tree.split)
|
|
246 |
|
|
247 |
text\<open>Deletion maintains @{const hbt}:\<close>
|
|
248 |
|
|
249 |
theorem hbt_delete:
|
71810
|
250 |
"hbt t \<Longrightarrow> hbt(delete x t)"
|
|
251 |
"hbt t \<Longrightarrow> height t \<in> {height (delete x t), height (delete x t) + 1}"
|
71801
|
252 |
proof (induct t rule: tree2_induct)
|
|
253 |
case (Node l a n r)
|
|
254 |
case 1
|
|
255 |
thus ?case
|
|
256 |
using Node hbt_split_max[of l] by (auto intro!: hbt_balL hbt_balR split: prod.split)
|
|
257 |
case 2
|
|
258 |
show ?case
|
|
259 |
proof(cases "x = a")
|
|
260 |
case True then show ?thesis using 1 hbt_split_max[of l]
|
|
261 |
by(auto simp: balR_def max_absorb2 split!: if_splits prod.split tree.split)
|
|
262 |
next
|
|
263 |
case False
|
|
264 |
show ?thesis
|
|
265 |
proof(cases "x<a")
|
|
266 |
case True
|
|
267 |
show ?thesis
|
71812
|
268 |
proof(cases "height r = height (delete x l) + m + 1")
|
71801
|
269 |
case False with Node 1 \<open>x < a\<close> show ?thesis by(auto simp: balR_def)
|
|
270 |
next
|
|
271 |
case True
|
71812
|
272 |
hence "(height (balR (delete x l) a r) = height (delete x l) + m + 1) \<or>
|
|
273 |
height (balR (delete x l) a r) = height (delete x l) + m + 2" (is "?A \<or> ?B")
|
71810
|
274 |
using Node 2height_balR[OF _ _ True] by simp
|
71801
|
275 |
thus ?thesis
|
|
276 |
proof
|
|
277 |
assume ?A with \<open>x < a\<close> Node 2 show ?thesis by(auto simp: balR_def split!: if_splits)
|
|
278 |
next
|
|
279 |
assume ?B with \<open>x < a\<close> Node 2 show ?thesis by(auto simp: balR_def split!: if_splits)
|
|
280 |
qed
|
|
281 |
qed
|
|
282 |
next
|
|
283 |
case False
|
|
284 |
show ?thesis
|
71812
|
285 |
proof(cases "height l = height (delete x r) + m + 1")
|
71801
|
286 |
case False with Node 1 \<open>\<not>x < a\<close> \<open>x \<noteq> a\<close> show ?thesis by(auto simp: balL_def)
|
|
287 |
next
|
|
288 |
case True
|
71812
|
289 |
hence "(height (balL l a (delete x r)) = height (delete x r) + m + 1) \<or>
|
|
290 |
height (balL l a (delete x r)) = height (delete x r) + m + 2" (is "?A \<or> ?B")
|
71810
|
291 |
using Node 2 height_balL[OF _ _ True] by simp
|
71801
|
292 |
thus ?thesis
|
|
293 |
proof
|
|
294 |
assume ?A with \<open>\<not>x < a\<close> \<open>x \<noteq> a\<close> Node 2 show ?thesis by(auto simp: balL_def split: if_splits)
|
|
295 |
next
|
|
296 |
assume ?B with \<open>\<not>x < a\<close> \<open>x \<noteq> a\<close> Node 2 show ?thesis by(auto simp: balL_def split: if_splits)
|
|
297 |
qed
|
|
298 |
qed
|
|
299 |
qed
|
|
300 |
qed
|
|
301 |
qed simp_all
|
|
302 |
|
|
303 |
text \<open>A more automatic proof.
|
|
304 |
Complete automation as for insertion seems hard due to resource requirements.\<close>
|
|
305 |
|
|
306 |
theorem hbt_delete_auto:
|
71810
|
307 |
"hbt t \<Longrightarrow> hbt(delete x t)"
|
|
308 |
"hbt t \<Longrightarrow> height t \<in> {height (delete x t), height (delete x t) + 1}"
|
71801
|
309 |
proof (induct t rule: tree2_induct)
|
|
310 |
case (Node l a n r)
|
|
311 |
case 1
|
71810
|
312 |
thus ?case
|
|
313 |
using Node hbt_split_max[of l] by (auto intro!: hbt_balL hbt_balR split: prod.split)
|
71801
|
314 |
case 2
|
|
315 |
show ?case
|
|
316 |
proof(cases "x = a")
|
|
317 |
case True thus ?thesis
|
71810
|
318 |
using 2 hbt_split_max[of l]
|
71801
|
319 |
by(auto simp: balR_def max_absorb2 split!: if_splits prod.split tree.split)
|
|
320 |
next
|
|
321 |
case False thus ?thesis
|
71810
|
322 |
using height_balL[of l "delete x r" a] height_balR[of "delete x l" r a] 2 Node
|
71801
|
323 |
by(auto simp: balL_def balR_def split!: if_split)
|
|
324 |
qed
|
|
325 |
qed simp_all
|
|
326 |
|
|
327 |
|
|
328 |
subsection "Overall correctness"
|
|
329 |
|
|
330 |
interpretation S: Set_by_Ordered
|
|
331 |
where empty = empty and isin = isin and insert = insert and delete = delete
|
|
332 |
and inorder = inorder and inv = hbt
|
|
333 |
proof (standard, goal_cases)
|
|
334 |
case 1 show ?case by (simp add: empty_def)
|
|
335 |
next
|
|
336 |
case 2 thus ?case by(simp add: isin_set_inorder)
|
|
337 |
next
|
|
338 |
case 3 thus ?case by(simp add: inorder_insert)
|
|
339 |
next
|
|
340 |
case 4 thus ?case by(simp add: inorder_delete)
|
|
341 |
next
|
|
342 |
case 5 thus ?case by (simp add: empty_def)
|
|
343 |
next
|
|
344 |
case 6 thus ?case by (simp add: hbt_insert(1))
|
|
345 |
next
|
|
346 |
case 7 thus ?case by (simp add: hbt_delete(1))
|
|
347 |
qed
|
|
348 |
|
|
349 |
end
|
71812
|
350 |
|
|
351 |
end
|