src/HOL/Data_Structures/AVL_Set.thy
author nipkow
Fri Nov 13 12:06:50 2015 +0100 (2015-11-13)
changeset 61647 5121b9a57cce
parent 61588 1d2907d0ed73
child 61678 b594e9277be3
permissions -rw-r--r--
tuned
     1 (*
     2 Author:     Tobias Nipkow
     3 Derived from AFP entry AVL.
     4 *)
     5 
     6 section "AVL Tree Implementation of Sets"
     7 
     8 theory AVL_Set
     9 imports Cmp Isin2
    10 begin
    11 
    12 type_synonym 'a avl_tree = "('a,nat) tree"
    13 
    14 text {* Invariant: *}
    15 
    16 fun avl :: "'a avl_tree \<Rightarrow> bool" where
    17 "avl Leaf = True" |
    18 "avl (Node h l a r) =
    19  ((height l = height r \<or> height l = height r + 1 \<or> height r = height l + 1) \<and> 
    20   h = max (height l) (height r) + 1 \<and> avl l \<and> avl r)"
    21 
    22 fun ht :: "'a avl_tree \<Rightarrow> nat" where
    23 "ht Leaf = 0" |
    24 "ht (Node h l a r) = h"
    25 
    26 definition node :: "'a avl_tree \<Rightarrow> 'a \<Rightarrow> 'a avl_tree \<Rightarrow> 'a avl_tree" where
    27 "node l a r = Node (max (ht l) (ht r) + 1) l a r"
    28 
    29 definition balL :: "'a avl_tree \<Rightarrow> 'a \<Rightarrow> 'a avl_tree \<Rightarrow> 'a avl_tree" where
    30 "balL l a r = (
    31   if ht l = ht r + 2 then (case l of 
    32     Node _ bl b br \<Rightarrow> (if ht bl < ht br
    33     then case br of
    34       Node _ cl c cr \<Rightarrow> node (node bl b cl) c (node cr a r)
    35     else node bl b (node br a r)))
    36   else node l a r)"
    37 
    38 definition balR :: "'a avl_tree \<Rightarrow> 'a \<Rightarrow> 'a avl_tree \<Rightarrow> 'a avl_tree" where
    39 "balR l a r = (
    40   if ht r = ht l + 2 then (case r of
    41     Node _ bl b br \<Rightarrow> (if ht bl > ht br
    42     then case bl of
    43       Node _ cl c cr \<Rightarrow> node (node l a cl) c (node cr b br)
    44     else node (node l a bl) b br))
    45   else node l a r)"
    46 
    47 fun insert :: "'a::cmp \<Rightarrow> 'a avl_tree \<Rightarrow> 'a avl_tree" where
    48 "insert x Leaf = Node 1 Leaf x Leaf" |
    49 "insert x (Node h l a r) = (case cmp x a of
    50    EQ \<Rightarrow> Node h l a r |
    51    LT \<Rightarrow> balL (insert x l) a r |
    52    GT \<Rightarrow> balR l a (insert x r))"
    53 
    54 fun del_max :: "'a avl_tree \<Rightarrow> 'a avl_tree * 'a" where
    55 "del_max (Node _ l a r) = (if r = Leaf then (l,a)
    56   else let (r',a') = del_max r in (balL l a r', a'))"
    57 
    58 lemmas del_max_induct = del_max.induct[case_names Node Leaf]
    59 
    60 fun del_root :: "'a avl_tree \<Rightarrow> 'a avl_tree" where
    61 "del_root (Node h Leaf a r) = r" |
    62 "del_root (Node h l a Leaf) = l" |
    63 "del_root (Node h l a r) = (let (l', a') = del_max l in balR l' a' r)"
    64 
    65 lemmas del_root_cases = del_root.cases[case_names Leaf_t Node_Leaf Node_Node]
    66 
    67 fun delete :: "'a::cmp \<Rightarrow> 'a avl_tree \<Rightarrow> 'a avl_tree" where
    68 "delete _ Leaf = Leaf" |
    69 "delete x (Node h l a r) = (case cmp x a of
    70    EQ \<Rightarrow> del_root (Node h l a r) |
    71    LT \<Rightarrow> balR (delete x l) a r |
    72    GT \<Rightarrow> balL l a (delete x r))"
    73 
    74 
    75 subsection {* Functional Correctness Proofs *}
    76 
    77 text{* Very different from the AFP/AVL proofs *}
    78 
    79 
    80 subsubsection "Proofs for insert"
    81 
    82 lemma inorder_balL:
    83   "inorder (balL l a r) = inorder l @ a # inorder r"
    84 by (auto simp: node_def balL_def split:tree.splits)
    85 
    86 lemma inorder_balR:
    87   "inorder (balR l a r) = inorder l @ a # inorder r"
    88 by (auto simp: node_def balR_def split:tree.splits)
    89 
    90 theorem inorder_insert:
    91   "sorted(inorder t) \<Longrightarrow> inorder(insert x t) = ins_list x (inorder t)"
    92 by (induct t) 
    93    (auto simp: ins_list_simps inorder_balL inorder_balR)
    94 
    95 
    96 subsubsection "Proofs for delete"
    97 
    98 lemma inorder_del_maxD:
    99   "\<lbrakk> del_max t = (t',a); t \<noteq> Leaf \<rbrakk> \<Longrightarrow>
   100    inorder t' @ [a] = inorder t"
   101 by(induction t arbitrary: t' rule: del_max.induct)
   102   (auto simp: inorder_balL split: if_splits prod.splits tree.split)
   103 
   104 lemma inorder_del_root:
   105   "inorder (del_root (Node h l a r)) = inorder l @ inorder r"
   106 by(induction "Node h l a r" arbitrary: l a r h rule: del_root.induct)
   107   (auto simp: inorder_balL inorder_balR inorder_del_maxD split: if_splits prod.splits)
   108 
   109 theorem inorder_delete:
   110   "sorted(inorder t) \<Longrightarrow> inorder (delete x t) = del_list x (inorder t)"
   111 by(induction t)
   112   (auto simp: del_list_simps inorder_balL inorder_balR
   113     inorder_del_root inorder_del_maxD split: prod.splits)
   114 
   115 
   116 subsubsection "Overall functional correctness"
   117 
   118 interpretation Set_by_Ordered
   119 where empty = Leaf and isin = isin and insert = insert and delete = delete
   120 and inorder = inorder and inv = "\<lambda>_. True"
   121 proof (standard, goal_cases)
   122   case 1 show ?case by simp
   123 next
   124   case 2 thus ?case by(simp add: isin_set)
   125 next
   126   case 3 thus ?case by(simp add: inorder_insert)
   127 next
   128   case 4 thus ?case by(simp add: inorder_delete)
   129 qed (rule TrueI)+
   130 
   131 
   132 subsection {* AVL invariants *}
   133 
   134 text{* Essentially the AFP/AVL proofs *}
   135 
   136 
   137 subsubsection {* Insertion maintains AVL balance *}
   138 
   139 declare Let_def [simp]
   140 
   141 lemma [simp]: "avl t \<Longrightarrow> ht t = height t"
   142 by (induct t) simp_all
   143 
   144 lemma height_balL:
   145   "\<lbrakk> height l = height r + 2; avl l; avl r \<rbrakk> \<Longrightarrow>
   146    height (balL l a r) = height r + 2 \<or>
   147    height (balL l a r) = height r + 3"
   148 by (cases l) (auto simp:node_def balL_def split:tree.split)
   149        
   150 lemma height_balR:
   151   "\<lbrakk> height r = height l + 2; avl l; avl r \<rbrakk> \<Longrightarrow>
   152    height (balR l a r) = height l + 2 \<or>
   153    height (balR l a r) = height l + 3"
   154 by (cases r) (auto simp add:node_def balR_def split:tree.split)
   155 
   156 lemma [simp]: "height(node l a r) = max (height l) (height r) + 1"
   157 by (simp add: node_def)
   158 
   159 lemma avl_node:
   160   "\<lbrakk> avl l; avl r;
   161      height l = height r \<or> height l = height r + 1 \<or> height r = height l + 1
   162    \<rbrakk> \<Longrightarrow> avl(node l a r)"
   163 by (auto simp add:max_def node_def)
   164 
   165 lemma height_balL2:
   166   "\<lbrakk> avl l; avl r; height l \<noteq> height r + 2 \<rbrakk> \<Longrightarrow>
   167    height (balL l a r) = (1 + max (height l) (height r))"
   168 by (cases l, cases r) (simp_all add: balL_def)
   169 
   170 lemma height_balR2:
   171   "\<lbrakk> avl l;  avl r;  height r \<noteq> height l + 2 \<rbrakk> \<Longrightarrow>
   172    height (balR l a r) = (1 + max (height l) (height r))"
   173 by (cases l, cases r) (simp_all add: balR_def)
   174 
   175 lemma avl_balL: 
   176   assumes "avl l" "avl r" and "height l = height r \<or> height l = height r + 1
   177     \<or> height r = height l + 1 \<or> height l = height r + 2" 
   178   shows "avl(balL l a r)"
   179 proof(cases l)
   180   case Leaf
   181   with assms show ?thesis by (simp add: node_def balL_def)
   182 next
   183   case (Node ln ll lr lh)
   184   with assms show ?thesis
   185   proof(cases "height l = height r + 2")
   186     case True
   187     from True Node assms show ?thesis
   188       by (auto simp: balL_def intro!: avl_node split: tree.split) arith+
   189   next
   190     case False
   191     with assms show ?thesis by (simp add: avl_node balL_def)
   192   qed
   193 qed
   194 
   195 lemma avl_balR: 
   196   assumes "avl l" and "avl r" and "height l = height r \<or> height l = height r + 1
   197     \<or> height r = height l + 1 \<or> height r = height l + 2" 
   198   shows "avl(balR l a r)"
   199 proof(cases r)
   200   case Leaf
   201   with assms show ?thesis by (simp add: node_def balR_def)
   202 next
   203   case (Node rn rl rr rh)
   204   with assms show ?thesis
   205   proof(cases "height r = height l + 2")
   206     case True
   207       from True Node assms show ?thesis
   208         by (auto simp: balR_def intro!: avl_node split: tree.split) arith+
   209   next
   210     case False
   211     with assms show ?thesis by (simp add: balR_def avl_node)
   212   qed
   213 qed
   214 
   215 (* It appears that these two properties need to be proved simultaneously: *)
   216 
   217 text{* Insertion maintains the AVL property: *}
   218 
   219 theorem avl_insert_aux:
   220   assumes "avl t"
   221   shows "avl(insert x t)"
   222         "(height (insert x t) = height t \<or> height (insert x t) = height t + 1)"
   223 using assms
   224 proof (induction t)
   225   case (Node h l a r)
   226   case 1
   227   with Node show ?case
   228   proof(cases "x = a")
   229     case True
   230     with Node 1 show ?thesis by simp
   231   next
   232     case False
   233     with Node 1 show ?thesis 
   234     proof(cases "x<a")
   235       case True
   236       with Node 1 show ?thesis by (auto simp add:avl_balL)
   237     next
   238       case False
   239       with Node 1 `x\<noteq>a` show ?thesis by (auto simp add:avl_balR)
   240     qed
   241   qed
   242   case 2
   243   from 2 Node show ?case
   244   proof(cases "x = a")
   245     case True
   246     with Node 1 show ?thesis by simp
   247   next
   248     case False
   249     with Node 1 show ?thesis 
   250      proof(cases "x<a")
   251       case True
   252       with Node 2 show ?thesis
   253       proof(cases "height (insert x l) = height r + 2")
   254         case False with Node 2 `x < a` show ?thesis by (auto simp: height_balL2)
   255       next
   256         case True 
   257         hence "(height (balL (insert x l) a r) = height r + 2) \<or>
   258           (height (balL (insert x l) a r) = height r + 3)" (is "?A \<or> ?B")
   259           using Node 2 by (intro height_balL) simp_all
   260         thus ?thesis
   261         proof
   262           assume ?A
   263           with 2 `x < a` show ?thesis by (auto)
   264         next
   265           assume ?B
   266           with True 1 Node(2) `x < a` show ?thesis by (simp) arith
   267         qed
   268       qed
   269     next
   270       case False
   271       with Node 2 show ?thesis 
   272       proof(cases "height (insert x r) = height l + 2")
   273         case False
   274         with Node 2 `\<not>x < a` show ?thesis by (auto simp: height_balR2)
   275       next
   276         case True 
   277         hence "(height (balR l a (insert x r)) = height l + 2) \<or>
   278           (height (balR l a (insert x r)) = height l + 3)"  (is "?A \<or> ?B")
   279           using Node 2 by (intro height_balR) simp_all
   280         thus ?thesis 
   281         proof
   282           assume ?A
   283           with 2 `\<not>x < a` show ?thesis by (auto)
   284         next
   285           assume ?B
   286           with True 1 Node(4) `\<not>x < a` show ?thesis by (simp) arith
   287         qed
   288       qed
   289     qed
   290   qed
   291 qed simp_all
   292 
   293 
   294 subsubsection {* Deletion maintains AVL balance *}
   295 
   296 lemma avl_del_max:
   297   assumes "avl x" and "x \<noteq> Leaf"
   298   shows "avl (fst (del_max x))" "height x = height(fst (del_max x)) \<or>
   299          height x = height(fst (del_max x)) + 1"
   300 using assms
   301 proof (induct x rule: del_max_induct)
   302   case (Node h l a r)
   303   case 1
   304   thus ?case using Node
   305     by (auto simp: height_balL height_balL2 avl_balL
   306       linorder_class.max.absorb1 linorder_class.max.absorb2
   307       split:prod.split)
   308 next
   309   case (Node h l a r)
   310   case 2
   311   let ?r' = "fst (del_max r)"
   312   from `avl x` Node 2 have "avl l" and "avl r" by simp_all
   313   thus ?case using Node 2 height_balL[of l ?r' a] height_balL2[of l ?r' a]
   314     apply (auto split:prod.splits simp del:avl.simps) by arith+
   315 qed auto
   316 
   317 lemma avl_del_root:
   318   assumes "avl t" and "t \<noteq> Leaf"
   319   shows "avl(del_root t)" 
   320 using assms
   321 proof (cases t rule:del_root_cases)
   322   case (Node_Node h lh ll ln lr n rh rl rn rr)
   323   let ?l = "Node lh ll ln lr"
   324   let ?r = "Node rh rl rn rr"
   325   let ?l' = "fst (del_max ?l)"
   326   from `avl t` and Node_Node have "avl ?r" by simp
   327   from `avl t` and Node_Node have "avl ?l" by simp
   328   hence "avl(?l')" "height ?l = height(?l') \<or>
   329          height ?l = height(?l') + 1" by (rule avl_del_max,simp)+
   330   with `avl t` Node_Node have "height ?l' = height ?r \<or> height ?l' = height ?r + 1
   331             \<or> height ?r = height ?l' + 1 \<or> height ?r = height ?l' + 2" by fastforce
   332   with `avl ?l'` `avl ?r` have "avl(balR ?l' (snd(del_max ?l)) ?r)"
   333     by (rule avl_balR)
   334   with Node_Node show ?thesis by (auto split:prod.splits)
   335 qed simp_all
   336 
   337 lemma height_del_root:
   338   assumes "avl t" and "t \<noteq> Leaf" 
   339   shows "height t = height(del_root t) \<or> height t = height(del_root t) + 1"
   340 using assms
   341 proof (cases t rule: del_root_cases)
   342   case (Node_Node h lh ll ln lr n rh rl rn rr)
   343   let ?l = "Node lh ll ln lr"
   344   let ?r = "Node rh rl rn rr"
   345   let ?l' = "fst (del_max ?l)"
   346   let ?t' = "balR ?l' (snd(del_max ?l)) ?r"
   347   from `avl t` and Node_Node have "avl ?r" by simp
   348   from `avl t` and Node_Node have "avl ?l" by simp
   349   hence "avl(?l')"  by (rule avl_del_max,simp)
   350   have l'_height: "height ?l = height ?l' \<or> height ?l = height ?l' + 1" using `avl ?l` by (intro avl_del_max) auto
   351   have t_height: "height t = 1 + max (height ?l) (height ?r)" using `avl t` Node_Node by simp
   352   have "height t = height ?t' \<or> height t = height ?t' + 1" using  `avl t` Node_Node
   353   proof(cases "height ?r = height ?l' + 2")
   354     case False
   355     show ?thesis using l'_height t_height False by (subst  height_balR2[OF `avl ?l'` `avl ?r` False])+ arith
   356   next
   357     case True
   358     show ?thesis
   359     proof(cases rule: disjE[OF height_balR[OF True `avl ?l'` `avl ?r`, of "snd (del_max ?l)"]])
   360       case 1
   361       thus ?thesis using l'_height t_height True by arith
   362     next
   363       case 2
   364       thus ?thesis using l'_height t_height True by arith
   365     qed
   366   qed
   367   thus ?thesis using Node_Node by (auto split:prod.splits)
   368 qed simp_all
   369 
   370 text{* Deletion maintains the AVL property: *}
   371 
   372 theorem avl_delete_aux:
   373   assumes "avl t" 
   374   shows "avl(delete x t)" and "height t = (height (delete x t)) \<or> height t = height (delete x t) + 1"
   375 using assms
   376 proof (induct t)
   377   case (Node h l n r)
   378   case 1
   379   with Node show ?case
   380   proof(cases "x = n")
   381     case True
   382     with Node 1 show ?thesis by (auto simp:avl_del_root)
   383   next
   384     case False
   385     with Node 1 show ?thesis 
   386     proof(cases "x<n")
   387       case True
   388       with Node 1 show ?thesis by (auto simp add:avl_balR)
   389     next
   390       case False
   391       with Node 1 `x\<noteq>n` show ?thesis by (auto simp add:avl_balL)
   392     qed
   393   qed
   394   case 2
   395   with Node show ?case
   396   proof(cases "x = n")
   397     case True
   398     with 1 have "height (Node h l n r) = height(del_root (Node h l n r))
   399       \<or> height (Node h l n r) = height(del_root (Node h l n r)) + 1"
   400       by (subst height_del_root,simp_all)
   401     with True show ?thesis by simp
   402   next
   403     case False
   404     with Node 1 show ?thesis 
   405      proof(cases "x<n")
   406       case True
   407       show ?thesis
   408       proof(cases "height r = height (delete x l) + 2")
   409         case False with Node 1 `x < n` show ?thesis by(auto simp: balR_def)
   410       next
   411         case True 
   412         hence "(height (balR (delete x l) n r) = height (delete x l) + 2) \<or>
   413           height (balR (delete x l) n r) = height (delete x l) + 3" (is "?A \<or> ?B")
   414           using Node 2 by (intro height_balR) auto
   415         thus ?thesis 
   416         proof
   417           assume ?A
   418           with `x < n` Node 2 show ?thesis by(auto simp: balR_def)
   419         next
   420           assume ?B
   421           with `x < n` Node 2 show ?thesis by(auto simp: balR_def)
   422         qed
   423       qed
   424     next
   425       case False
   426       show ?thesis
   427       proof(cases "height l = height (delete x r) + 2")
   428         case False with Node 1 `\<not>x < n` `x \<noteq> n` show ?thesis by(auto simp: balL_def)
   429       next
   430         case True 
   431         hence "(height (balL l n (delete x r)) = height (delete x r) + 2) \<or>
   432           height (balL l n (delete x r)) = height (delete x r) + 3" (is "?A \<or> ?B")
   433           using Node 2 by (intro height_balL) auto
   434         thus ?thesis 
   435         proof
   436           assume ?A
   437           with `\<not>x < n` `x \<noteq> n` Node 2 show ?thesis by(auto simp: balL_def)
   438         next
   439           assume ?B
   440           with `\<not>x < n` `x \<noteq> n` Node 2 show ?thesis by(auto simp: balL_def)
   441         qed
   442       qed
   443     qed
   444   qed
   445 qed simp_all
   446 
   447 end