| 71810 |      1 | (* Author: Tobias Nipkow *)
 | 
| 71801 |      2 | 
 | 
|  |      3 | section "Height-Balanced Trees"
 | 
|  |      4 | 
 | 
|  |      5 | theory Height_Balanced_Tree
 | 
|  |      6 | imports
 | 
|  |      7 |   Cmp
 | 
|  |      8 |   Isin2
 | 
|  |      9 | begin
 | 
|  |     10 | 
 | 
|  |     11 | text \<open>Height-balanced trees (HBTs) can be seen as a generalization of AVL trees.
 | 
|  |     12 | The code and the proofs were obtained by small modifications of the AVL theories.
 | 
|  |     13 | This is an implementation of sets via HBTs.\<close>
 | 
|  |     14 | 
 | 
| 71818 |     15 | type_synonym 'a tree_ht = "('a*nat) tree"
 | 
| 71801 |     16 | 
 | 
| 71818 |     17 | definition empty :: "'a tree_ht" where
 | 
| 71801 |     18 | "empty = Leaf"
 | 
|  |     19 | 
 | 
| 71812 |     20 | text \<open>The maximal amount by which the height of two siblings may differ:\<close>
 | 
| 71801 |     21 | 
 | 
| 71812 |     22 | locale HBT =
 | 
|  |     23 | fixes m :: nat
 | 
|  |     24 | assumes [arith]: "m > 0"
 | 
|  |     25 | begin
 | 
| 71801 |     26 | 
 | 
|  |     27 | text \<open>Invariant:\<close>
 | 
|  |     28 | 
 | 
| 71818 |     29 | fun hbt :: "'a tree_ht \<Rightarrow> bool" where
 | 
| 71801 |     30 | "hbt Leaf = True" |
 | 
|  |     31 | "hbt (Node l (a,n) r) =
 | 
| 71812 |     32 |  (abs(int(height l) - int(height r)) \<le> int(m) \<and>
 | 
| 71801 |     33 |   n = max (height l) (height r) + 1 \<and> hbt l \<and> hbt r)"
 | 
|  |     34 | 
 | 
| 71818 |     35 | fun ht :: "'a tree_ht \<Rightarrow> nat" where
 | 
| 71801 |     36 | "ht Leaf = 0" |
 | 
|  |     37 | "ht (Node l (a,n) r) = n"
 | 
|  |     38 | 
 | 
| 71818 |     39 | definition node :: "'a tree_ht \<Rightarrow> 'a \<Rightarrow> 'a tree_ht \<Rightarrow> 'a tree_ht" where
 | 
| 71801 |     40 | "node l a r = Node l (a, max (ht l) (ht r) + 1) r"
 | 
|  |     41 | 
 | 
| 71818 |     42 | definition balL :: "'a tree_ht \<Rightarrow> 'a \<Rightarrow> 'a tree_ht \<Rightarrow> 'a tree_ht" where
 | 
| 71801 |     43 | "balL AB b C =
 | 
| 71812 |     44 |   (if ht AB = ht C + m + 1 then
 | 
| 71801 |     45 |      case AB of 
 | 
|  |     46 |        Node A (a, _) B \<Rightarrow>
 | 
|  |     47 |          if ht A \<ge> ht B then node A a (node B b C)
 | 
|  |     48 |          else
 | 
|  |     49 |            case B of
 | 
|  |     50 |              Node B\<^sub>1 (ab, _) B\<^sub>2 \<Rightarrow> node (node A a B\<^sub>1) ab (node B\<^sub>2 b C)
 | 
|  |     51 |    else node AB b C)"
 | 
|  |     52 | 
 | 
| 71818 |     53 | definition balR :: "'a tree_ht \<Rightarrow> 'a \<Rightarrow> 'a tree_ht \<Rightarrow> 'a tree_ht" where
 | 
| 71801 |     54 | "balR A a BC =
 | 
| 71812 |     55 |    (if ht BC = ht A + m + 1 then
 | 
| 71801 |     56 |       case BC of
 | 
|  |     57 |         Node B (b, _) C \<Rightarrow>
 | 
|  |     58 |           if ht B \<le> ht C then node (node A a B) b C
 | 
|  |     59 |           else
 | 
|  |     60 |             case B of
 | 
|  |     61 |               Node B\<^sub>1 (ab, _) B\<^sub>2 \<Rightarrow> node (node A a B\<^sub>1) ab (node B\<^sub>2 b C)
 | 
|  |     62 |   else node A a BC)"
 | 
|  |     63 | 
 | 
| 71818 |     64 | fun insert :: "'a::linorder \<Rightarrow> 'a tree_ht \<Rightarrow> 'a tree_ht" where
 | 
| 71801 |     65 | "insert x Leaf = Node Leaf (x, 1) Leaf" |
 | 
|  |     66 | "insert x (Node l (a, n) r) = (case cmp x a of
 | 
|  |     67 |    EQ \<Rightarrow> Node l (a, n) r |
 | 
|  |     68 |    LT \<Rightarrow> balL (insert x l) a r |
 | 
|  |     69 |    GT \<Rightarrow> balR l a (insert x r))"
 | 
|  |     70 | 
 | 
| 71818 |     71 | fun split_max :: "'a tree_ht \<Rightarrow> 'a tree_ht * 'a" where
 | 
| 71801 |     72 | "split_max (Node l (a, _) r) =
 | 
|  |     73 |   (if r = Leaf then (l,a) else let (r',a') = split_max r in (balL l a r', a'))"
 | 
|  |     74 | 
 | 
|  |     75 | lemmas split_max_induct = split_max.induct[case_names Node Leaf]
 | 
|  |     76 | 
 | 
| 71818 |     77 | fun delete :: "'a::linorder \<Rightarrow> 'a tree_ht \<Rightarrow> 'a tree_ht" where
 | 
| 71801 |     78 | "delete _ Leaf = Leaf" |
 | 
|  |     79 | "delete x (Node l (a, n) r) =
 | 
|  |     80 |   (case cmp x a of
 | 
|  |     81 |      EQ \<Rightarrow> if l = Leaf then r
 | 
|  |     82 |            else let (l', a') = split_max l in balR l' a' r |
 | 
|  |     83 |      LT \<Rightarrow> balR (delete x l) a r |
 | 
|  |     84 |      GT \<Rightarrow> balL l a (delete x r))"
 | 
|  |     85 | 
 | 
|  |     86 | 
 | 
|  |     87 | subsection \<open>Functional Correctness Proofs\<close>
 | 
|  |     88 | 
 | 
|  |     89 | 
 | 
|  |     90 | subsubsection "Proofs for insert"
 | 
|  |     91 | 
 | 
|  |     92 | lemma inorder_balL:
 | 
|  |     93 |   "inorder (balL l a r) = inorder l @ a # inorder r"
 | 
|  |     94 | by (auto simp: node_def balL_def split:tree.splits)
 | 
|  |     95 | 
 | 
|  |     96 | lemma inorder_balR:
 | 
|  |     97 |   "inorder (balR l a r) = inorder l @ a # inorder r"
 | 
|  |     98 | by (auto simp: node_def balR_def split:tree.splits)
 | 
|  |     99 | 
 | 
|  |    100 | theorem inorder_insert:
 | 
|  |    101 |   "sorted(inorder t) \<Longrightarrow> inorder(insert x t) = ins_list x (inorder t)"
 | 
|  |    102 | by (induct t) 
 | 
|  |    103 |    (auto simp: ins_list_simps inorder_balL inorder_balR)
 | 
|  |    104 | 
 | 
|  |    105 | 
 | 
|  |    106 | subsubsection "Proofs for delete"
 | 
|  |    107 | 
 | 
|  |    108 | lemma inorder_split_maxD:
 | 
|  |    109 |   "\<lbrakk> split_max t = (t',a); t \<noteq> Leaf \<rbrakk> \<Longrightarrow>
 | 
|  |    110 |    inorder t' @ [a] = inorder t"
 | 
|  |    111 | by(induction t arbitrary: t' rule: split_max.induct)
 | 
|  |    112 |   (auto simp: inorder_balL split: if_splits prod.splits tree.split)
 | 
|  |    113 | 
 | 
|  |    114 | theorem inorder_delete:
 | 
|  |    115 |   "sorted(inorder t) \<Longrightarrow> inorder (delete x t) = del_list x (inorder t)"
 | 
|  |    116 | by(induction t)
 | 
|  |    117 |   (auto simp: del_list_simps inorder_balL inorder_balR inorder_split_maxD split: prod.splits)
 | 
|  |    118 | 
 | 
|  |    119 | 
 | 
|  |    120 | subsection \<open>Invariant preservation\<close>
 | 
|  |    121 | 
 | 
|  |    122 | 
 | 
|  |    123 | subsubsection \<open>Insertion maintains balance\<close>
 | 
|  |    124 | 
 | 
|  |    125 | declare Let_def [simp]
 | 
|  |    126 | 
 | 
|  |    127 | lemma ht_height[simp]: "hbt t \<Longrightarrow> ht t = height t"
 | 
|  |    128 | by (cases t rule: tree2_cases) simp_all
 | 
|  |    129 | 
 | 
|  |    130 | text \<open>First, a fast but relatively manual proof with many lemmas:\<close>
 | 
|  |    131 | 
 | 
|  |    132 | lemma height_balL:
 | 
| 71812 |    133 |   "\<lbrakk> hbt l; hbt r; height l = height r + m + 1 \<rbrakk> \<Longrightarrow>
 | 
|  |    134 |    height (balL l a r) \<in> {height r + m + 1, height r + m + 2}"
 | 
| 71818 |    135 | by (auto simp:node_def balL_def split:tree.split)
 | 
| 71801 |    136 | 
 | 
|  |    137 | lemma height_balR:
 | 
| 71812 |    138 |   "\<lbrakk> hbt l; hbt r; height r = height l + m + 1 \<rbrakk> \<Longrightarrow>
 | 
|  |    139 |    height (balR l a r) \<in> {height l + m + 1, height l + m + 2}"
 | 
| 71818 |    140 | by(auto simp add:node_def balR_def split:tree.split)
 | 
| 71801 |    141 | 
 | 
|  |    142 | lemma height_node[simp]: "height(node l a r) = max (height l) (height r) + 1"
 | 
|  |    143 | by (simp add: node_def)
 | 
|  |    144 | 
 | 
|  |    145 | lemma height_balL2:
 | 
| 71812 |    146 |   "\<lbrakk> hbt l; hbt r; height l \<noteq> height r + m + 1 \<rbrakk> \<Longrightarrow>
 | 
| 71801 |    147 |    height (balL l a r) = 1 + max (height l) (height r)"
 | 
| 71818 |    148 | by (simp_all add: balL_def)
 | 
| 71801 |    149 | 
 | 
|  |    150 | lemma height_balR2:
 | 
| 71812 |    151 |   "\<lbrakk> hbt l;  hbt r;  height r \<noteq> height l + m + 1 \<rbrakk> \<Longrightarrow>
 | 
| 71801 |    152 |    height (balR l a r) = 1 + max (height l) (height r)"
 | 
| 71818 |    153 | by (simp_all add: balR_def)
 | 
| 71801 |    154 | 
 | 
|  |    155 | lemma hbt_balL: 
 | 
| 71812 |    156 |   "\<lbrakk> hbt l; hbt r; height r - m \<le> height l \<and> height l \<le> height r + m + 1 \<rbrakk> \<Longrightarrow> hbt(balL l a r)"
 | 
| 71818 |    157 | by(auto simp: balL_def node_def max_def split!: if_splits tree.split)
 | 
| 71801 |    158 | 
 | 
|  |    159 | lemma hbt_balR: 
 | 
| 71812 |    160 |   "\<lbrakk> hbt l; hbt r; height l - m \<le> height r \<and> height r \<le> height l + m + 1 \<rbrakk> \<Longrightarrow> hbt(balR l a r)"
 | 
| 71818 |    161 | by(auto simp: balR_def node_def max_def split!: if_splits tree.split)
 | 
| 71801 |    162 | 
 | 
|  |    163 | text\<open>Insertion maintains @{const hbt}. Requires simultaneous proof.\<close>
 | 
|  |    164 | 
 | 
|  |    165 | theorem hbt_insert:
 | 
| 71812 |    166 |   "hbt t \<Longrightarrow> hbt(insert x t)"
 | 
|  |    167 |   "hbt t \<Longrightarrow> height (insert x t) \<in> {height t, height t + 1}"
 | 
| 71801 |    168 | proof (induction t rule: tree2_induct)
 | 
|  |    169 |   case (Node l a _ r)
 | 
|  |    170 |   case 1
 | 
|  |    171 |   show ?case
 | 
|  |    172 |   proof(cases "x = a")
 | 
|  |    173 |     case True with Node 1 show ?thesis by simp
 | 
|  |    174 |   next
 | 
|  |    175 |     case False
 | 
|  |    176 |     show ?thesis 
 | 
|  |    177 |     proof(cases "x<a")
 | 
| 71810 |    178 |       case True with 1 Node(1,2) show ?thesis by (auto intro!: hbt_balL)
 | 
| 71801 |    179 |     next
 | 
| 71810 |    180 |       case False with 1 Node(3,4) \<open>x\<noteq>a\<close> show ?thesis by (auto intro!: hbt_balR)
 | 
| 71801 |    181 |     qed
 | 
|  |    182 |   qed
 | 
|  |    183 |   case 2
 | 
|  |    184 |   show ?case
 | 
|  |    185 |   proof(cases "x = a")
 | 
| 71810 |    186 |     case True with 2 show ?thesis by simp
 | 
| 71801 |    187 |   next
 | 
|  |    188 |     case False
 | 
|  |    189 |     show ?thesis 
 | 
|  |    190 |     proof(cases "x<a")
 | 
|  |    191 |       case True
 | 
|  |    192 |       show ?thesis
 | 
| 71812 |    193 |       proof(cases "height (insert x l) = height r + m + 1")
 | 
| 71810 |    194 |         case False with 2 Node(1,2) \<open>x < a\<close> show ?thesis by (auto simp: height_balL2)
 | 
| 71801 |    195 |       next
 | 
|  |    196 |         case True 
 | 
| 71812 |    197 |         hence "(height (balL (insert x l) a r) = height r + m + 1) \<or>
 | 
|  |    198 |           (height (balL (insert x l) a r) = height r + m + 2)" (is "?A \<or> ?B")
 | 
| 71810 |    199 |           using 2 Node(1,2) height_balL[OF _ _ True] by simp
 | 
| 71801 |    200 |         thus ?thesis
 | 
|  |    201 |         proof
 | 
| 71810 |    202 |           assume ?A with 2 Node(2) True \<open>x < a\<close> show ?thesis by (auto)
 | 
| 71801 |    203 |         next
 | 
| 71810 |    204 |           assume ?B with 2 Node(2) True \<open>x < a\<close> show ?thesis by (simp) arith
 | 
| 71801 |    205 |         qed
 | 
|  |    206 |       qed
 | 
|  |    207 |     next
 | 
|  |    208 |       case False
 | 
|  |    209 |       show ?thesis 
 | 
| 71812 |    210 |       proof(cases "height (insert x r) = height l + m + 1")
 | 
| 71810 |    211 |         case False with 2 Node(3,4) \<open>\<not>x < a\<close> show ?thesis by (auto simp: height_balR2)
 | 
| 71801 |    212 |       next
 | 
|  |    213 |         case True 
 | 
| 71812 |    214 |         hence "(height (balR l a (insert x r)) = height l + m + 1) \<or>
 | 
|  |    215 |           (height (balR l a (insert x r)) = height l + m + 2)"  (is "?A \<or> ?B")
 | 
| 71810 |    216 |           using Node 2 height_balR[OF _ _ True] by simp
 | 
| 71801 |    217 |         thus ?thesis 
 | 
|  |    218 |         proof
 | 
| 71810 |    219 |           assume ?A with 2 Node(4) True \<open>\<not>x < a\<close> show ?thesis by (auto)
 | 
| 71801 |    220 |         next
 | 
| 71810 |    221 |           assume ?B with 2 Node(4) True \<open>\<not>x < a\<close> show ?thesis by (simp) arith
 | 
| 71801 |    222 |         qed
 | 
|  |    223 |       qed
 | 
|  |    224 |     qed
 | 
|  |    225 |   qed
 | 
|  |    226 | qed simp_all
 | 
|  |    227 | 
 | 
|  |    228 | text \<open>Now an automatic proof without lemmas:\<close>
 | 
|  |    229 | 
 | 
|  |    230 | theorem hbt_insert_auto: "hbt t \<Longrightarrow>
 | 
|  |    231 |   hbt(insert x t) \<and> height (insert x t) \<in> {height t, height t + 1}"
 | 
|  |    232 | apply (induction t rule: tree2_induct)
 | 
|  |    233 |  (* if you want to save a few secs: apply (auto split!: if_split) *)
 | 
|  |    234 |  apply (auto simp: balL_def balR_def node_def max_absorb1 max_absorb2 split!: if_split tree.split)
 | 
|  |    235 | done
 | 
|  |    236 | 
 | 
|  |    237 | 
 | 
|  |    238 | subsubsection \<open>Deletion maintains balance\<close>
 | 
|  |    239 | 
 | 
|  |    240 | lemma hbt_split_max:
 | 
|  |    241 |   "\<lbrakk> hbt t; t \<noteq> Leaf \<rbrakk> \<Longrightarrow>
 | 
|  |    242 |   hbt (fst (split_max t)) \<and>
 | 
|  |    243 |   height t \<in> {height(fst (split_max t)), height(fst (split_max t)) + 1}"
 | 
|  |    244 | by(induct t rule: split_max_induct)
 | 
|  |    245 |   (auto simp: balL_def node_def max_absorb2 split!: prod.split if_split tree.split)
 | 
|  |    246 | 
 | 
|  |    247 | text\<open>Deletion maintains @{const hbt}:\<close>
 | 
|  |    248 | 
 | 
|  |    249 | theorem hbt_delete:
 | 
| 71810 |    250 |   "hbt t \<Longrightarrow> hbt(delete x t)"
 | 
|  |    251 |   "hbt t \<Longrightarrow> height t \<in> {height (delete x t), height (delete x t) + 1}"
 | 
| 71801 |    252 | proof (induct t rule: tree2_induct)
 | 
|  |    253 |   case (Node l a n r)
 | 
|  |    254 |   case 1
 | 
|  |    255 |   thus ?case
 | 
|  |    256 |     using Node hbt_split_max[of l] by (auto intro!: hbt_balL hbt_balR split: prod.split)
 | 
|  |    257 |   case 2
 | 
|  |    258 |   show ?case
 | 
|  |    259 |   proof(cases "x = a")
 | 
|  |    260 |     case True then show ?thesis using 1 hbt_split_max[of l]
 | 
|  |    261 |       by(auto simp: balR_def max_absorb2 split!: if_splits prod.split tree.split)
 | 
|  |    262 |   next
 | 
|  |    263 |     case False
 | 
|  |    264 |     show ?thesis 
 | 
|  |    265 |     proof(cases "x<a")
 | 
|  |    266 |       case True
 | 
|  |    267 |       show ?thesis
 | 
| 71812 |    268 |       proof(cases "height r = height (delete x l) + m + 1")
 | 
| 71801 |    269 |         case False with Node 1 \<open>x < a\<close> show ?thesis by(auto simp: balR_def)
 | 
|  |    270 |       next
 | 
|  |    271 |         case True 
 | 
| 71812 |    272 |         hence "(height (balR (delete x l) a r) = height (delete x l) + m + 1) \<or>
 | 
|  |    273 |           height (balR (delete x l) a r) = height (delete x l) + m + 2" (is "?A \<or> ?B")
 | 
| 71810 |    274 |           using Node 2height_balR[OF _ _ True] by simp
 | 
| 71801 |    275 |         thus ?thesis 
 | 
|  |    276 |         proof
 | 
|  |    277 |           assume ?A with \<open>x < a\<close> Node 2 show ?thesis by(auto simp: balR_def split!: if_splits)
 | 
|  |    278 |         next
 | 
|  |    279 |           assume ?B with \<open>x < a\<close> Node 2 show ?thesis by(auto simp: balR_def split!: if_splits)
 | 
|  |    280 |         qed
 | 
|  |    281 |       qed
 | 
|  |    282 |     next
 | 
|  |    283 |       case False
 | 
|  |    284 |       show ?thesis
 | 
| 71812 |    285 |       proof(cases "height l = height (delete x r) + m + 1")
 | 
| 71801 |    286 |         case False with Node 1 \<open>\<not>x < a\<close> \<open>x \<noteq> a\<close> show ?thesis by(auto simp: balL_def)
 | 
|  |    287 |       next
 | 
|  |    288 |         case True 
 | 
| 71812 |    289 |         hence "(height (balL l a (delete x r)) = height (delete x r) + m + 1) \<or>
 | 
|  |    290 |           height (balL l a (delete x r)) = height (delete x r) + m + 2" (is "?A \<or> ?B")
 | 
| 71810 |    291 |           using Node 2 height_balL[OF _ _ True] by simp
 | 
| 71801 |    292 |         thus ?thesis 
 | 
|  |    293 |         proof
 | 
|  |    294 |           assume ?A with \<open>\<not>x < a\<close> \<open>x \<noteq> a\<close> Node 2 show ?thesis by(auto simp: balL_def split: if_splits)
 | 
|  |    295 |         next
 | 
|  |    296 |           assume ?B with \<open>\<not>x < a\<close> \<open>x \<noteq> a\<close> Node 2 show ?thesis by(auto simp: balL_def split: if_splits)
 | 
|  |    297 |         qed
 | 
|  |    298 |       qed
 | 
|  |    299 |     qed
 | 
|  |    300 |   qed
 | 
|  |    301 | qed simp_all
 | 
|  |    302 | 
 | 
|  |    303 | text \<open>A more automatic proof.
 | 
|  |    304 | Complete automation as for insertion seems hard due to resource requirements.\<close>
 | 
|  |    305 | 
 | 
|  |    306 | theorem hbt_delete_auto:
 | 
| 71810 |    307 |   "hbt t \<Longrightarrow> hbt(delete x t)"
 | 
|  |    308 |   "hbt t \<Longrightarrow> height t \<in> {height (delete x t), height (delete x t) + 1}"
 | 
| 71801 |    309 | proof (induct t rule: tree2_induct)
 | 
|  |    310 |   case (Node l a n r)
 | 
|  |    311 |   case 1
 | 
| 71810 |    312 |   thus ?case
 | 
|  |    313 |     using Node hbt_split_max[of l] by (auto intro!: hbt_balL hbt_balR split: prod.split)
 | 
| 71801 |    314 |   case 2
 | 
|  |    315 |   show ?case
 | 
|  |    316 |   proof(cases "x = a")
 | 
|  |    317 |     case True thus ?thesis
 | 
| 71810 |    318 |       using 2 hbt_split_max[of l]
 | 
| 71801 |    319 |       by(auto simp: balR_def max_absorb2 split!: if_splits prod.split tree.split)
 | 
|  |    320 |   next
 | 
|  |    321 |     case False thus ?thesis 
 | 
| 71810 |    322 |       using height_balL[of l "delete x r" a] height_balR[of "delete x l" r a] 2 Node
 | 
| 71801 |    323 |         by(auto simp: balL_def balR_def split!: if_split)
 | 
|  |    324 |   qed
 | 
|  |    325 | qed simp_all
 | 
|  |    326 | 
 | 
|  |    327 | 
 | 
|  |    328 | subsection "Overall correctness"
 | 
|  |    329 | 
 | 
|  |    330 | interpretation S: Set_by_Ordered
 | 
|  |    331 | where empty = empty and isin = isin and insert = insert and delete = delete
 | 
|  |    332 | and inorder = inorder and inv = hbt
 | 
|  |    333 | proof (standard, goal_cases)
 | 
|  |    334 |   case 1 show ?case by (simp add: empty_def)
 | 
|  |    335 | next
 | 
|  |    336 |   case 2 thus ?case by(simp add: isin_set_inorder)
 | 
|  |    337 | next
 | 
|  |    338 |   case 3 thus ?case by(simp add: inorder_insert)
 | 
|  |    339 | next
 | 
|  |    340 |   case 4 thus ?case by(simp add: inorder_delete)
 | 
|  |    341 | next
 | 
|  |    342 |   case 5 thus ?case by (simp add: empty_def)
 | 
|  |    343 | next
 | 
|  |    344 |   case 6 thus ?case by (simp add: hbt_insert(1))
 | 
|  |    345 | next
 | 
|  |    346 |   case 7 thus ?case by (simp add: hbt_delete(1))
 | 
|  |    347 | qed
 | 
|  |    348 | 
 | 
|  |    349 | end
 | 
| 71812 |    350 | 
 | 
|  |    351 | end
 |