# HG changeset patch # User nipkow # Date 1587825103 -7200 # Node ID 49d8d8ad7e431f01b82892a180d0d026c9cd2560 # Parent 35a951ed2e826ba9ac37a65c21dc19d3e230396e added Height_Balanced_Trees diff -r 35a951ed2e82 -r 49d8d8ad7e43 src/HOL/Data_Structures/AVL_Set.thy --- a/src/HOL/Data_Structures/AVL_Set.thy Fri Apr 24 13:16:42 2020 +0000 +++ b/src/HOL/Data_Structures/AVL_Set.thy Sat Apr 25 16:31:43 2020 +0200 @@ -1,9 +1,8 @@ (* -Author: Tobias Nipkow, Daniel Stüwe -Based on the AFP entry AVL. +Author: Tobias Nipkow *) -subsection \Invariant\ +subsection \Invariant expressed via 3-way cases\ theory AVL_Set imports diff -r 35a951ed2e82 -r 49d8d8ad7e43 src/HOL/Data_Structures/Height_Balanced_Tree.thy --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/HOL/Data_Structures/Height_Balanced_Tree.thy Sat Apr 25 16:31:43 2020 +0200 @@ -0,0 +1,366 @@ +(* +Author: Tobias Nipkow +*) + +section "Height-Balanced Trees" + +theory Height_Balanced_Tree +imports + Cmp + Isin2 +begin + +text \Height-balanced trees (HBTs) can be seen as a generalization of AVL trees. +The code and the proofs were obtained by small modifications of the AVL theories. +This is an implementation of sets via HBTs.\ + +type_synonym 'a hbt = "('a*nat) tree" + +definition empty :: "'a hbt" where +"empty = Leaf" + +text \The maximal amount that the height of two siblings may differ.\ + +consts m :: nat + +text \Invariant:\ + +fun hbt :: "'a hbt \ bool" where +"hbt Leaf = True" | +"hbt (Node l (a,n) r) = + (abs(int(height l) - int(height r)) \ int(m+1) \ + n = max (height l) (height r) + 1 \ hbt l \ hbt r)" + +fun ht :: "'a hbt \ nat" where +"ht Leaf = 0" | +"ht (Node l (a,n) r) = n" + +definition node :: "'a hbt \ 'a \ 'a hbt \ 'a hbt" where +"node l a r = Node l (a, max (ht l) (ht r) + 1) r" + +definition balL :: "'a hbt \ 'a \ 'a hbt \ 'a hbt" where +"balL AB b C = + (if ht AB = ht C + m + 2 then + case AB of + Node A (a, _) B \ + if ht A \ ht B then node A a (node B b C) + else + case B of + Node B\<^sub>1 (ab, _) B\<^sub>2 \ node (node A a B\<^sub>1) ab (node B\<^sub>2 b C) + else node AB b C)" + +definition balR :: "'a hbt \ 'a \ 'a hbt \ 'a hbt" where +"balR A a BC = + (if ht BC = ht A + m + 2 then + case BC of + Node B (b, _) C \ + if ht B \ ht C then node (node A a B) b C + else + case B of + Node B\<^sub>1 (ab, _) B\<^sub>2 \ node (node A a B\<^sub>1) ab (node B\<^sub>2 b C) + else node A a BC)" + +fun insert :: "'a::linorder \ 'a hbt \ 'a hbt" where +"insert x Leaf = Node Leaf (x, 1) Leaf" | +"insert x (Node l (a, n) r) = (case cmp x a of + EQ \ Node l (a, n) r | + LT \ balL (insert x l) a r | + GT \ balR l a (insert x r))" + +fun split_max :: "'a hbt \ 'a hbt * 'a" where +"split_max (Node l (a, _) r) = + (if r = Leaf then (l,a) else let (r',a') = split_max r in (balL l a r', a'))" + +lemmas split_max_induct = split_max.induct[case_names Node Leaf] + +fun delete :: "'a::linorder \ 'a hbt \ 'a hbt" where +"delete _ Leaf = Leaf" | +"delete x (Node l (a, n) r) = + (case cmp x a of + EQ \ if l = Leaf then r + else let (l', a') = split_max l in balR l' a' r | + LT \ balR (delete x l) a r | + GT \ balL l a (delete x r))" + + +subsection \Functional Correctness Proofs\ + + +subsubsection "Proofs for insert" + +lemma inorder_balL: + "inorder (balL l a r) = inorder l @ a # inorder r" +by (auto simp: node_def balL_def split:tree.splits) + +lemma inorder_balR: + "inorder (balR l a r) = inorder l @ a # inorder r" +by (auto simp: node_def balR_def split:tree.splits) + +theorem inorder_insert: + "sorted(inorder t) \ inorder(insert x t) = ins_list x (inorder t)" +by (induct t) + (auto simp: ins_list_simps inorder_balL inorder_balR) + + +subsubsection "Proofs for delete" + +lemma inorder_split_maxD: + "\ split_max t = (t',a); t \ Leaf \ \ + inorder t' @ [a] = inorder t" +by(induction t arbitrary: t' rule: split_max.induct) + (auto simp: inorder_balL split: if_splits prod.splits tree.split) + +theorem inorder_delete: + "sorted(inorder t) \ inorder (delete x t) = del_list x (inorder t)" +by(induction t) + (auto simp: del_list_simps inorder_balL inorder_balR inorder_split_maxD split: prod.splits) + + +subsection \Invariant preservation\ + + +subsubsection \Insertion maintains balance\ + +declare Let_def [simp] + +lemma ht_height[simp]: "hbt t \ ht t = height t" +by (cases t rule: tree2_cases) simp_all + +text \First, a fast but relatively manual proof with many lemmas:\ + +lemma height_balL: + "\ hbt l; hbt r; height l = height r + m + 2 \ \ + height (balL l a r) = height r + m + 2 \ + height (balL l a r) = height r + m + 3" +apply (cases l) + apply (auto simp:node_def balL_def split:tree.split) + by arith+ + +lemma height_balR: + "\ hbt l; hbt r; height r = height l + m + 2 \ \ + height (balR l a r) = height l + m + 2 \ + height (balR l a r) = height l + m + 3" +apply (cases r) + apply(auto simp add:node_def balR_def split:tree.split) + by arith+ + +lemma height_node[simp]: "height(node l a r) = max (height l) (height r) + 1" +by (simp add: node_def) + +lemma height_balL2: + "\ hbt l; hbt r; height l \ height r + m + 2 \ \ + height (balL l a r) = 1 + max (height l) (height r)" +by (cases l, cases r) (simp_all add: balL_def) + +lemma height_balR2: + "\ hbt l; hbt r; height r \ height l + m + 2 \ \ + height (balR l a r) = 1 + max (height l) (height r)" +by (cases l, cases r) (simp_all add: balR_def) + +lemma hbt_balL: + "\ hbt l; hbt r; height r - m - 1 \ height l \ height l \ height r + m + 2 \ \ hbt(balL l a r)" +by(cases l, cases r) + (auto simp: balL_def node_def split!: if_split tree.split) + +lemma hbt_balR: + "\ hbt l; hbt r; height l - m - 1 \ height r \ height r \ height l + m + 2 \ \ hbt(balR l a r)" +by(cases r, cases l) + (auto simp: balR_def node_def split!: if_split tree.split) + +text\Insertion maintains @{const hbt}. Requires simultaneous proof.\ + +theorem hbt_insert: + assumes "hbt t" + shows "hbt(insert x t)" + "(height (insert x t) = height t \ height (insert x t) = height t + 1)" +using assms +proof (induction t rule: tree2_induct) + case (Node l a _ r) + case 1 + show ?case + proof(cases "x = a") + case True with Node 1 show ?thesis by simp + next + case False + show ?thesis + proof(cases "xx\a\ show ?thesis by (auto intro!:hbt_balR) + qed + qed + case 2 + show ?case + proof(cases "x = a") + case True with Node 1 show ?thesis by simp + next + case False + show ?thesis + proof(cases "xx < a\ show ?thesis by (auto simp: height_balL2) + next + case True + hence "(height (balL (insert x l) a r) = height r + m + 2) \ + (height (balL (insert x l) a r) = height r + m + 3)" (is "?A \ ?B") + using Node 2 by (intro height_balL) simp_all + thus ?thesis + proof + assume ?A with Node(2) 2 True \x < a\ show ?thesis by (auto) + next + assume ?B with Node(2) 1 True \x < a\ show ?thesis by (simp) arith + qed + qed + next + case False + show ?thesis + proof(cases "height (insert x r) = height l + m + 2") + case False with Node 2 \\x < a\ show ?thesis by (auto simp: height_balR2) + next + case True + hence "(height (balR l a (insert x r)) = height l + m + 2) \ + (height (balR l a (insert x r)) = height l + m + 3)" (is "?A \ ?B") + using Node 2 by (intro height_balR) simp_all + thus ?thesis + proof + assume ?A with Node(4) 2 True \\x < a\ show ?thesis by (auto) + next + assume ?B with Node(4) 1 True \\x < a\ show ?thesis by (simp) arith + qed + qed + qed + qed +qed simp_all + +text \Now an automatic proof without lemmas:\ + +theorem hbt_insert_auto: "hbt t \ + hbt(insert x t) \ height (insert x t) \ {height t, height t + 1}" +apply (induction t rule: tree2_induct) + (* if you want to save a few secs: apply (auto split!: if_split) *) + apply (auto simp: balL_def balR_def node_def max_absorb1 max_absorb2 split!: if_split tree.split) +done + + +subsubsection \Deletion maintains balance\ + +lemma hbt_split_max: + "\ hbt t; t \ Leaf \ \ + hbt (fst (split_max t)) \ + height t \ {height(fst (split_max t)), height(fst (split_max t)) + 1}" +by(induct t rule: split_max_induct) + (auto simp: balL_def node_def max_absorb2 split!: prod.split if_split tree.split) + +text\Deletion maintains @{const hbt}:\ + +theorem hbt_delete: + assumes "hbt t" + shows "hbt(delete x t)" and "height t = (height (delete x t)) \ height t = height (delete x t) + 1" +using assms +proof (induct t rule: tree2_induct) + case (Node l a n r) + case 1 + thus ?case + using Node hbt_split_max[of l] by (auto intro!: hbt_balL hbt_balR split: prod.split) + case 2 + show ?case + proof(cases "x = a") + case True then show ?thesis using 1 hbt_split_max[of l] + by(auto simp: balR_def max_absorb2 split!: if_splits prod.split tree.split) + next + case False + show ?thesis + proof(cases "xx < a\ show ?thesis by(auto simp: balR_def) + next + case True + hence "(height (balR (delete x l) a r) = height (delete x l) + m + 2) \ + height (balR (delete x l) a r) = height (delete x l) + m + 3" (is "?A \ ?B") + using Node 2 by (intro height_balR) auto + thus ?thesis + proof + assume ?A with \x < a\ Node 2 show ?thesis by(auto simp: balR_def split!: if_splits) + next + assume ?B with \x < a\ Node 2 show ?thesis by(auto simp: balR_def split!: if_splits) + qed + qed + next + case False + show ?thesis + proof(cases "height l = height (delete x r) + m + 2") + case False with Node 1 \\x < a\ \x \ a\ show ?thesis by(auto simp: balL_def) + next + case True + hence "(height (balL l a (delete x r)) = height (delete x r) + m + 2) \ + height (balL l a (delete x r)) = height (delete x r) + m + 3" (is "?A \ ?B") + using Node 2 by (intro height_balL) auto + thus ?thesis + proof + assume ?A with \\x < a\ \x \ a\ Node 2 show ?thesis by(auto simp: balL_def split: if_splits) + next + assume ?B with \\x < a\ \x \ a\ Node 2 show ?thesis by(auto simp: balL_def split: if_splits) + qed + qed + qed + qed +qed simp_all + +text \A more automatic proof. +Complete automation as for insertion seems hard due to resource requirements.\ + +theorem hbt_delete_auto: + assumes "hbt t" + shows "hbt(delete x t)" and "height t \ {height (delete x t), height (delete x t) + 1}" +using assms +proof (induct t rule: tree2_induct) + case (Node l a n r) + case 1 + show ?case + proof(cases "x = a") + case True thus ?thesis + using 1 hbt_split_max[of l] by (auto intro!: hbt_balR split: prod.split) + next + case False thus ?thesis + using Node 1 by (auto intro!: hbt_balL hbt_balR) + qed + case 2 + show ?case + proof(cases "x = a") + case True thus ?thesis + using 1 hbt_split_max[of l] + by(auto simp: balR_def max_absorb2 split!: if_splits prod.split tree.split) + next + case False thus ?thesis + using height_balL[of l "delete x r" a] height_balR[of "delete x l" r a] Node 1 + by(auto simp: balL_def balR_def split!: if_split) + qed +qed simp_all + + +subsection "Overall correctness" + +interpretation S: Set_by_Ordered +where empty = empty and isin = isin and insert = insert and delete = delete +and inorder = inorder and inv = hbt +proof (standard, goal_cases) + case 1 show ?case by (simp add: empty_def) +next + case 2 thus ?case by(simp add: isin_set_inorder) +next + case 3 thus ?case by(simp add: inorder_insert) +next + case 4 thus ?case by(simp add: inorder_delete) +next + case 5 thus ?case by (simp add: empty_def) +next + case 6 thus ?case by (simp add: hbt_insert(1)) +next + case 7 thus ?case by (simp add: hbt_delete(1)) +qed + +end diff -r 35a951ed2e82 -r 49d8d8ad7e43 src/HOL/ROOT --- a/src/HOL/ROOT Fri Apr 24 13:16:42 2020 +0000 +++ b/src/HOL/ROOT Sat Apr 25 16:31:43 2020 +0200 @@ -240,6 +240,7 @@ Tree_Map Interval_Tree AVL_Map + Height_Balanced_Tree RBT_Set2 RBT_Map Tree23_Map