# HG changeset patch # User Andreas Lochbihler # Date 1349867030 -7200 # Node ID cf6a78acf4457757ab41fdcf97cb0d99abfe1024 # Parent ecf1d5d87e5ef9f5d964a039fcccfaf1e7bb1351 efficient construction of red black trees from sorted associative lists diff -r ecf1d5d87e5e -r cf6a78acf445 CONTRIBUTORS --- a/CONTRIBUTORS Tue Oct 09 17:33:46 2012 +0200 +++ b/CONTRIBUTORS Wed Oct 10 13:03:50 2012 +0200 @@ -9,6 +9,9 @@ * 2012: Makarius Wenzel, Université Paris-Sud / LRI Improvements of Isabelle/Scala and Isabelle/jEdit Prover IDE. +* October 2012: Andreas Lochbihler, KIT + Efficient construction of red black trees from sorted associative lists. + * September 2012: Florian Haftmann, TUM Lattice instances for type option. diff -r ecf1d5d87e5e -r cf6a78acf445 NEWS --- a/NEWS Tue Oct 09 17:33:46 2012 +0200 +++ b/NEWS Wed Oct 10 13:03:50 2012 +0200 @@ -134,6 +134,10 @@ appear in a constant's type. This alternative to adding TYPE('a) as another parameter avoids unnecessary closures in generated code. +* Library/RBT_Impl.thy: efficient construction of red-black trees +from sorted associative lists. Merging two trees with rbt_union may +return a structurally different tree than before. MINOR INCOMPATIBILITY. + * Simproc "finite_Collect" rewrites set comprehensions into pointfree expressions. diff -r ecf1d5d87e5e -r cf6a78acf445 src/HOL/Library/RBT_Impl.thy --- a/src/HOL/Library/RBT_Impl.thy Tue Oct 09 17:33:46 2012 +0200 +++ b/src/HOL/Library/RBT_Impl.thy Wed Oct 10 13:03:50 2012 +0200 @@ -113,13 +113,13 @@ context linorder begin lemma rbt_sorted_entries: - "rbt_sorted t \ List.sorted (List.map fst (entries t))" + "rbt_sorted t \ List.sorted (map fst (entries t))" by (induct t) (force simp: sorted_append sorted_Cons rbt_ord_props dest!: entry_in_tree_keys)+ lemma distinct_entries: - "rbt_sorted t \ distinct (List.map fst (entries t))" + "rbt_sorted t \ distinct (map fst (entries t))" by (induct t) (force simp: sorted_append sorted_Cons rbt_ord_props dest!: entry_in_tree_keys)+ @@ -998,89 +998,6 @@ end -subsection {* Union *} - -context ord begin - -primrec rbt_union_with_key :: "('a \ 'b \ 'b \ 'b) \ ('a,'b) rbt \ ('a,'b) rbt \ ('a,'b) rbt" -where - "rbt_union_with_key f t Empty = t" -| "rbt_union_with_key f t (Branch c lt k v rt) = rbt_union_with_key f (rbt_union_with_key f (rbt_insert_with_key f k v t) lt) rt" - -definition rbt_union_with where - "rbt_union_with f = rbt_union_with_key (\_. f)" - -definition rbt_union where - "rbt_union = rbt_union_with_key (%_ _ rv. rv)" - -end - -context linorder begin - -lemma rbt_unionwk_rbt_sorted: "rbt_sorted lt \ rbt_sorted (rbt_union_with_key f lt rt)" - by (induct rt arbitrary: lt) (auto simp: rbt_insertwk_rbt_sorted) -theorem rbt_unionwk_is_rbt[simp]: "is_rbt lt \ is_rbt (rbt_union_with_key f lt rt)" - by (induct rt arbitrary: lt) (simp add: rbt_insertwk_is_rbt)+ - -theorem rbt_unionw_is_rbt: "is_rbt lt \ is_rbt (rbt_union_with f lt rt)" unfolding rbt_union_with_def by simp - -theorem rbt_union_is_rbt: "is_rbt lt \ is_rbt (rbt_union lt rt)" unfolding rbt_union_def by simp - -lemma (in ord) rbt_union_Branch[simp]: - "rbt_union t (Branch c lt k v rt) = rbt_union (rbt_union (rbt_insert k v t) lt) rt" - unfolding rbt_union_def rbt_insert_def - by simp - -lemma rbt_lookup_rbt_union: - assumes "is_rbt s" "rbt_sorted t" - shows "rbt_lookup (rbt_union s t) = rbt_lookup s ++ rbt_lookup t" -using assms -proof (induct t arbitrary: s) - case Empty thus ?case by (auto simp: rbt_union_def) -next - case (Branch c l k v r s) - then have "rbt_sorted r" "rbt_sorted l" "l |\ k" "k \| r" by auto - - have meq: "rbt_lookup s(k \ v) ++ rbt_lookup l ++ rbt_lookup r = - rbt_lookup s ++ - (\a. if a < k then rbt_lookup l a - else if k < a then rbt_lookup r a else Some v)" (is "?m1 = ?m2") - proof (rule ext) - fix a - - have "k < a \ k = a \ k > a" by auto - thus "?m1 a = ?m2 a" - proof (elim disjE) - assume "k < a" - with `l |\ k` have "l |\ a" by (rule rbt_less_trans) - with `k < a` show ?thesis - by (auto simp: map_add_def split: option.splits) - next - assume "k = a" - with `l |\ k` `k \| r` - show ?thesis by (auto simp: map_add_def) - next - assume "a < k" - from this `k \| r` have "a \| r" by (rule rbt_greater_trans) - with `a < k` show ?thesis - by (auto simp: map_add_def split: option.splits) - qed - qed - - from Branch have is_rbt: "is_rbt (rbt_union (rbt_insert k v s) l)" - by (auto intro: rbt_union_is_rbt rbt_insert_is_rbt) - with Branch have IHs: - "rbt_lookup (rbt_union (rbt_union (rbt_insert k v s) l) r) = rbt_lookup (rbt_union (rbt_insert k v s) l) ++ rbt_lookup r" - "rbt_lookup (rbt_union (rbt_insert k v s) l) = rbt_lookup (rbt_insert k v s) ++ rbt_lookup l" - by auto - - with meq show ?case - by (auto simp: rbt_lookup_rbt_insert[OF Branch(3)]) - -qed - -end - subsection {* Modifying existing entries *} context ord begin @@ -1146,16 +1063,23 @@ (* FIXME: simproc "antisym less" does not work for linorder context, only for linorder type class by (induct t) auto *) +hide_const (open) map + subsection {* Folding over entries *} definition fold :: "('a \ 'b \ 'c \ 'c) \ ('a, 'b) rbt \ 'c \ 'c" where "fold f t = List.fold (prod_case f) (entries t)" -lemma fold_simps [simp, code]: +lemma fold_simps [simp]: "fold f Empty = id" "fold f (Branch c lt k v rt) = fold f rt \ f k v \ fold f lt" by (simp_all add: fold_def fun_eq_iff) +lemma fold_code [code]: + "fold f Empty c = c" + "fold f (Branch c lt k v rt) c = fold f rt (f k v (fold f lt c))" +by(simp_all) + (* fold with continuation predicate *) fun foldi :: "('c \ bool) \ ('a \ 'b \ 'c \ 'c) \ ('a :: linorder, 'b) rbt \ 'c \ 'c" @@ -1197,6 +1121,884 @@ end + + +subsection {* Building a RBT from a sorted list *} + +text {* + These functions have been adapted from + Andrew W. Appel, Efficient Verified Red-Black Trees (September 2011) +*} + +fun rbtreeify_f :: "nat \ ('a \ 'b) list \ ('a, 'b) rbt \ ('a \ 'b) list" + and rbtreeify_g :: "nat \ ('a \ 'b) list \ ('a, 'b) rbt \ ('a \ 'b) list" +where + "rbtreeify_f n kvs = + (if n = 0 then (Empty, kvs) + else if n = 1 then + case kvs of (k, v) # kvs' \ (Branch R Empty k v Empty, kvs') + else if (n mod 2 = 0) then + case rbtreeify_f (n div 2) kvs of (t1, (k, v) # kvs') \ + apfst (Branch B t1 k v) (rbtreeify_g (n div 2) kvs') + else case rbtreeify_f (n div 2) kvs of (t1, (k, v) # kvs') \ + apfst (Branch B t1 k v) (rbtreeify_f (n div 2) kvs'))" + +| "rbtreeify_g n kvs = + (if n = 0 \ n = 1 then (Empty, kvs) + else if n mod 2 = 0 then + case rbtreeify_g (n div 2) kvs of (t1, (k, v) # kvs') \ + apfst (Branch B t1 k v) (rbtreeify_g (n div 2) kvs') + else case rbtreeify_f (n div 2) kvs of (t1, (k, v) # kvs') \ + apfst (Branch B t1 k v) (rbtreeify_g (n div 2) kvs'))" + +definition rbtreeify :: "('a \ 'b) list \ ('a, 'b) rbt" +where "rbtreeify kvs = fst (rbtreeify_g (Suc (length kvs)) kvs)" + +declare rbtreeify_f.simps [simp del] rbtreeify_g.simps [simp del] + +lemma rbtreeify_f_code [code]: + "rbtreeify_f n kvs = + (if n = 0 then (Empty, kvs) + else if n = 1 then + case kvs of (k, v) # kvs' \ + (Branch R Empty k v Empty, kvs') + else let (n', r) = divmod_nat n 2 in + if r = 0 then + case rbtreeify_f n' kvs of (t1, (k, v) # kvs') \ + apfst (Branch B t1 k v) (rbtreeify_g n' kvs') + else case rbtreeify_f n' kvs of (t1, (k, v) # kvs') \ + apfst (Branch B t1 k v) (rbtreeify_f n' kvs'))" +by(subst rbtreeify_f.simps)(simp only: Let_def divmod_nat_div_mod prod.simps) + +lemma rbtreeify_g_code [code]: + "rbtreeify_g n kvs = + (if n = 0 \ n = 1 then (Empty, kvs) + else let (n', r) = divmod_nat n 2 in + if r = 0 then + case rbtreeify_g n' kvs of (t1, (k, v) # kvs') \ + apfst (Branch B t1 k v) (rbtreeify_g n' kvs') + else case rbtreeify_f n' kvs of (t1, (k, v) # kvs') \ + apfst (Branch B t1 k v) (rbtreeify_g n' kvs'))" +by(subst rbtreeify_g.simps)(simp only: Let_def divmod_nat_div_mod prod.simps) + +lemma Suc_double_half: "Suc (2 * n) div 2 = n" +by simp + +lemma div2_plus_div2: "n div 2 + n div 2 = (n :: nat) - n mod 2" +by arith + +lemma rbtreeify_f_rec_aux_lemma: + "\k - n div 2 = Suc k'; n \ k; n mod 2 = Suc 0\ + \ k' - n div 2 = k - n" +apply(rule add_right_imp_eq[where a = "n - n div 2"]) +apply(subst add_diff_assoc2, arith) +apply(simp add: div2_plus_div2) +done + +lemma rbtreeify_f_simps: + "rbtreeify_f 0 kvs = (RBT_Impl.Empty, kvs)" + "rbtreeify_f (Suc 0) ((k, v) # kvs) = + (Branch R Empty k v Empty, kvs)" + "0 < n \ rbtreeify_f (2 * n) kvs = + (case rbtreeify_f n kvs of (t1, (k, v) # kvs') \ + apfst (Branch B t1 k v) (rbtreeify_g n kvs'))" + "0 < n \ rbtreeify_f (Suc (2 * n)) kvs = + (case rbtreeify_f n kvs of (t1, (k, v) # kvs') \ + apfst (Branch B t1 k v) (rbtreeify_f n kvs'))" +by(subst (1) rbtreeify_f.simps, simp add: Suc_double_half)+ + +lemma rbtreeify_g_simps: + "rbtreeify_g 0 kvs = (Empty, kvs)" + "rbtreeify_g (Suc 0) kvs = (Empty, kvs)" + "0 < n \ rbtreeify_g (2 * n) kvs = + (case rbtreeify_g n kvs of (t1, (k, v) # kvs') \ + apfst (Branch B t1 k v) (rbtreeify_g n kvs'))" + "0 < n \ rbtreeify_g (Suc (2 * n)) kvs = + (case rbtreeify_f n kvs of (t1, (k, v) # kvs') \ + apfst (Branch B t1 k v) (rbtreeify_g n kvs'))" +by(subst (1) rbtreeify_g.simps, simp add: Suc_double_half)+ + +declare rbtreeify_f_simps[simp] rbtreeify_g_simps[simp] + +lemma length_rbtreeify_f: "n \ length kvs + \ length (snd (rbtreeify_f n kvs)) = length kvs - n" + and length_rbtreeify_g:"\ 0 < n; n \ Suc (length kvs) \ + \ length (snd (rbtreeify_g n kvs)) = Suc (length kvs) - n" +proof(induction n kvs and n kvs rule: rbtreeify_f_rbtreeify_g.induct) + case (1 n kvs) + show ?case + proof(cases "n \ 1") + case True thus ?thesis using "1.prems" + by(cases n kvs rule: nat.exhaust[case_product list.exhaust]) auto + next + case False + hence "n \ 0" "n \ 1" by simp_all + note IH = "1.IH"[OF this] + show ?thesis + proof(cases "n mod 2 = 0") + case True + hence "length (snd (rbtreeify_f n kvs)) = + length (snd (rbtreeify_f (2 * (n div 2)) kvs))" + by(metis minus_nat.diff_0 mult_div_cancel) + also from "1.prems" False obtain k v kvs' + where kvs: "kvs = (k, v) # kvs'" by(cases kvs) auto + also have "0 < n div 2" using False by(simp) + note rbtreeify_f_simps(3)[OF this] + also note kvs[symmetric] + also let ?rest1 = "snd (rbtreeify_f (n div 2) kvs)" + from "1.prems" have "n div 2 \ length kvs" by simp + with True have len: "length ?rest1 = length kvs - n div 2" by(rule IH) + with "1.prems" False obtain t1 k' v' kvs'' + where kvs'': "rbtreeify_f (n div 2) kvs = (t1, (k', v') # kvs'')" + by(cases ?rest1)(auto simp add: snd_def split: prod.split_asm) + note this also note prod.simps(2) also note list.simps(5) + also note prod.simps(2) also note snd_apfst + also have "0 < n div 2" "n div 2 \ Suc (length kvs'')" + using len "1.prems" False unfolding kvs'' by simp_all + with True kvs''[symmetric] refl refl + have "length (snd (rbtreeify_g (n div 2) kvs'')) = + Suc (length kvs'') - n div 2" by(rule IH) + finally show ?thesis using len[unfolded kvs''] "1.prems" True + by(simp add: Suc_diff_le[symmetric] mult_2[symmetric] mult_div_cancel) + next + case False + hence "length (snd (rbtreeify_f n kvs)) = + length (snd (rbtreeify_f (Suc (2 * (n div 2))) kvs))" + by(metis Suc_eq_plus1_left comm_semiring_1_class.normalizing_semiring_rules(7) + mod_2_not_eq_zero_eq_one_nat semiring_div_class.mod_div_equality') + also from "1.prems" `\ n \ 1` obtain k v kvs' + where kvs: "kvs = (k, v) # kvs'" by(cases kvs) auto + also have "0 < n div 2" using `\ n \ 1` by(simp) + note rbtreeify_f_simps(4)[OF this] + also note kvs[symmetric] + also let ?rest1 = "snd (rbtreeify_f (n div 2) kvs)" + from "1.prems" have "n div 2 \ length kvs" by simp + with False have len: "length ?rest1 = length kvs - n div 2" by(rule IH) + with "1.prems" `\ n \ 1` obtain t1 k' v' kvs'' + where kvs'': "rbtreeify_f (n div 2) kvs = (t1, (k', v') # kvs'')" + by(cases ?rest1)(auto simp add: snd_def split: prod.split_asm) + note this also note prod.simps(2) also note list.simps(5) + also note prod.simps(2) also note snd_apfst + also have "n div 2 \ length kvs''" + using len "1.prems" False unfolding kvs'' by simp arith + with False kvs''[symmetric] refl refl + have "length (snd (rbtreeify_f (n div 2) kvs'')) = length kvs'' - n div 2" + by(rule IH) + finally show ?thesis using len[unfolded kvs''] "1.prems" False + by simp(rule rbtreeify_f_rec_aux_lemma[OF sym]) + qed + qed +next + case (2 n kvs) + show ?case + proof(cases "n > 1") + case False with `0 < n` show ?thesis + by(cases n kvs rule: nat.exhaust[case_product list.exhaust]) simp_all + next + case True + hence "\ (n = 0 \ n = 1)" by simp + note IH = "2.IH"[OF this] + show ?thesis + proof(cases "n mod 2 = 0") + case True + hence "length (snd (rbtreeify_g n kvs)) = + length (snd (rbtreeify_g (2 * (n div 2)) kvs))" + by(metis minus_nat.diff_0 mult_div_cancel) + also from "2.prems" True obtain k v kvs' + where kvs: "kvs = (k, v) # kvs'" by(cases kvs) auto + also have "0 < n div 2" using `1 < n` by(simp) + note rbtreeify_g_simps(3)[OF this] + also note kvs[symmetric] + also let ?rest1 = "snd (rbtreeify_g (n div 2) kvs)" + from "2.prems" `1 < n` + have "0 < n div 2" "n div 2 \ Suc (length kvs)" by simp_all + with True have len: "length ?rest1 = Suc (length kvs) - n div 2" by(rule IH) + with "2.prems" obtain t1 k' v' kvs'' + where kvs'': "rbtreeify_g (n div 2) kvs = (t1, (k', v') # kvs'')" + by(cases ?rest1)(auto simp add: snd_def split: prod.split_asm) + note this also note prod.simps(2) also note list.simps(5) + also note prod.simps(2) also note snd_apfst + also have "n div 2 \ Suc (length kvs'')" + using len "2.prems" unfolding kvs'' by simp + with True kvs''[symmetric] refl refl `0 < n div 2` + have "length (snd (rbtreeify_g (n div 2) kvs'')) = Suc (length kvs'') - n div 2" + by(rule IH) + finally show ?thesis using len[unfolded kvs''] "2.prems" True + by(simp add: Suc_diff_le[symmetric] mult_2[symmetric] mult_div_cancel) + next + case False + hence "length (snd (rbtreeify_g n kvs)) = + length (snd (rbtreeify_g (Suc (2 * (n div 2))) kvs))" + by(metis Suc_eq_plus1_left comm_semiring_1_class.normalizing_semiring_rules(7) + mod_2_not_eq_zero_eq_one_nat semiring_div_class.mod_div_equality') + also from "2.prems" `1 < n` obtain k v kvs' + where kvs: "kvs = (k, v) # kvs'" by(cases kvs) auto + also have "0 < n div 2" using `1 < n` by(simp) + note rbtreeify_g_simps(4)[OF this] + also note kvs[symmetric] + also let ?rest1 = "snd (rbtreeify_f (n div 2) kvs)" + from "2.prems" have "n div 2 \ length kvs" by simp + with False have len: "length ?rest1 = length kvs - n div 2" by(rule IH) + with "2.prems" `1 < n` False obtain t1 k' v' kvs'' + where kvs'': "rbtreeify_f (n div 2) kvs = (t1, (k', v') # kvs'')" + by(cases ?rest1)(auto simp add: snd_def split: prod.split_asm, arith) + note this also note prod.simps(2) also note list.simps(5) + also note prod.simps(2) also note snd_apfst + also have "n div 2 \ Suc (length kvs'')" + using len "2.prems" False unfolding kvs'' by simp arith + with False kvs''[symmetric] refl refl `0 < n div 2` + have "length (snd (rbtreeify_g (n div 2) kvs'')) = Suc (length kvs'') - n div 2" + by(rule IH) + finally show ?thesis using len[unfolded kvs''] "2.prems" False + by(simp add: div2_plus_div2) + qed + qed +qed + +lemma rbtreeify_induct [consumes 1, case_names f_0 f_1 f_even f_odd g_0 g_1 g_even g_odd]: + fixes P Q + defines "f0 == (\kvs. P 0 kvs)" + and "f1 == (\k v kvs. P (Suc 0) ((k, v) # kvs))" + and "feven == + (\n kvs t k v kvs'. \ n > 0; n \ length kvs; P n kvs; + rbtreeify_f n kvs = (t, (k, v) # kvs'); n \ Suc (length kvs'); Q n kvs' \ + \ P (2 * n) kvs)" + and "fodd == + (\n kvs t k v kvs'. \ n > 0; n \ length kvs; P n kvs; + rbtreeify_f n kvs = (t, (k, v) # kvs'); n \ length kvs'; P n kvs' \ + \ P (Suc (2 * n)) kvs)" + and "g0 == (\kvs. Q 0 kvs)" + and "g1 == (\kvs. Q (Suc 0) kvs)" + and "geven == + (\n kvs t k v kvs'. \ n > 0; n \ Suc (length kvs); Q n kvs; + rbtreeify_g n kvs = (t, (k, v) # kvs'); n \ Suc (length kvs'); Q n kvs' \ + \ Q (2 * n) kvs)" + and "godd == + (\n kvs t k v kvs'. \ n > 0; n \ length kvs; P n kvs; + rbtreeify_f n kvs = (t, (k, v) # kvs'); n \ Suc (length kvs'); Q n kvs' \ + \ Q (Suc (2 * n)) kvs)" + shows "\ n \ length kvs; + PROP f0; PROP f1; PROP feven; PROP fodd; + PROP g0; PROP g1; PROP geven; PROP godd \ + \ P n kvs" + and "\ n \ Suc (length kvs); + PROP f0; PROP f1; PROP feven; PROP fodd; + PROP g0; PROP g1; PROP geven; PROP godd \ + \ Q n kvs" +proof - + assume f0: "PROP f0" and f1: "PROP f1" and feven: "PROP feven" and fodd: "PROP fodd" + and g0: "PROP g0" and g1: "PROP g1" and geven: "PROP geven" and godd: "PROP godd" + show "n \ length kvs \ P n kvs" and "n \ Suc (length kvs) \ Q n kvs" + proof(induction rule: rbtreeify_f_rbtreeify_g.induct) + case (1 n kvs) + show ?case + proof(cases "n \ 1") + case True thus ?thesis using "1.prems" + by(cases n kvs rule: nat.exhaust[case_product list.exhaust]) + (auto simp add: f0[unfolded f0_def] f1[unfolded f1_def]) + next + case False + hence ns: "n \ 0" "n \ 1" by simp_all + hence ge0: "n div 2 > 0" by simp + note IH = "1.IH"[OF ns] + show ?thesis + proof(cases "n mod 2 = 0") + case True note ge0 + moreover from "1.prems" have n2: "n div 2 \ length kvs" by simp + moreover with True have "P (n div 2) kvs" by(rule IH) + moreover from length_rbtreeify_f[OF n2] ge0 "1.prems" obtain t k v kvs' + where kvs': "rbtreeify_f (n div 2) kvs = (t, (k, v) # kvs')" + by(cases "snd (rbtreeify_f (n div 2) kvs)") + (auto simp add: snd_def split: prod.split_asm) + moreover from "1.prems" length_rbtreeify_f[OF n2] ge0 + have "n div 2 \ Suc (length kvs')" by(simp add: kvs') + moreover with True kvs'[symmetric] refl refl + have "Q (n div 2) kvs'" by(rule IH) + moreover note feven[unfolded feven_def] + (* FIXME: why does by(rule feven[unfolded feven_def]) not work? *) + ultimately have "P (2 * (n div 2)) kvs" by - + thus ?thesis using True by (metis div_mod_equality' minus_nat.diff_0 nat_mult_commute) + next + case False note ge0 + moreover from "1.prems" have n2: "n div 2 \ length kvs" by simp + moreover with False have "P (n div 2) kvs" by(rule IH) + moreover from length_rbtreeify_f[OF n2] ge0 "1.prems" obtain t k v kvs' + where kvs': "rbtreeify_f (n div 2) kvs = (t, (k, v) # kvs')" + by(cases "snd (rbtreeify_f (n div 2) kvs)") + (auto simp add: snd_def split: prod.split_asm) + moreover from "1.prems" length_rbtreeify_f[OF n2] ge0 False + have "n div 2 \ length kvs'" by(simp add: kvs') arith + moreover with False kvs'[symmetric] refl refl have "P (n div 2) kvs'" by(rule IH) + moreover note fodd[unfolded fodd_def] + ultimately have "P (Suc (2 * (n div 2))) kvs" by - + thus ?thesis using False + by simp (metis One_nat_def Suc_eq_plus1_left le_add_diff_inverse mod_less_eq_dividend mult_div_cancel) + qed + qed + next + case (2 n kvs) + show ?case + proof(cases "n \ 1") + case True thus ?thesis using "2.prems" + by(cases n kvs rule: nat.exhaust[case_product list.exhaust]) + (auto simp add: g0[unfolded g0_def] g1[unfolded g1_def]) + next + case False + hence ns: "\ (n = 0 \ n = 1)" by simp + hence ge0: "n div 2 > 0" by simp + note IH = "2.IH"[OF ns] + show ?thesis + proof(cases "n mod 2 = 0") + case True note ge0 + moreover from "2.prems" have n2: "n div 2 \ Suc (length kvs)" by simp + moreover with True have "Q (n div 2) kvs" by(rule IH) + moreover from length_rbtreeify_g[OF ge0 n2] ge0 "2.prems" obtain t k v kvs' + where kvs': "rbtreeify_g (n div 2) kvs = (t, (k, v) # kvs')" + by(cases "snd (rbtreeify_g (n div 2) kvs)") + (auto simp add: snd_def split: prod.split_asm) + moreover from "2.prems" length_rbtreeify_g[OF ge0 n2] ge0 + have "n div 2 \ Suc (length kvs')" by(simp add: kvs') + moreover with True kvs'[symmetric] refl refl + have "Q (n div 2) kvs'" by(rule IH) + moreover note geven[unfolded geven_def] + ultimately have "Q (2 * (n div 2)) kvs" by - + thus ?thesis using True + by(metis div_mod_equality' minus_nat.diff_0 nat_mult_commute) + next + case False note ge0 + moreover from "2.prems" have n2: "n div 2 \ length kvs" by simp + moreover with False have "P (n div 2) kvs" by(rule IH) + moreover from length_rbtreeify_f[OF n2] ge0 "2.prems" False obtain t k v kvs' + where kvs': "rbtreeify_f (n div 2) kvs = (t, (k, v) # kvs')" + by(cases "snd (rbtreeify_f (n div 2) kvs)") + (auto simp add: snd_def split: prod.split_asm, arith) + moreover from "2.prems" length_rbtreeify_f[OF n2] ge0 False + have "n div 2 \ Suc (length kvs')" by(simp add: kvs') arith + moreover with False kvs'[symmetric] refl refl + have "Q (n div 2) kvs'" by(rule IH) + moreover note godd[unfolded godd_def] + ultimately have "Q (Suc (2 * (n div 2))) kvs" by - + thus ?thesis using False + by simp (metis One_nat_def Suc_eq_plus1_left le_add_diff_inverse mod_less_eq_dividend mult_div_cancel) + qed + qed + qed +qed + +lemma inv1_rbtreeify_f: "n \ length kvs + \ inv1 (fst (rbtreeify_f n kvs))" + and inv1_rbtreeify_g: "n \ Suc (length kvs) + \ inv1 (fst (rbtreeify_g n kvs))" +by(induct n kvs and n kvs rule: rbtreeify_induct) simp_all + +fun plog2 :: "nat \ nat" +where "plog2 n = (if n \ 1 then 0 else plog2 (n div 2) + 1)" + +declare plog2.simps [simp del] + +lemma plog2_simps [simp]: + "plog2 0 = 0" "plog2 (Suc 0) = 0" + "0 < n \ plog2 (2 * n) = 1 + plog2 n" + "0 < n \ plog2 (Suc (2 * n)) = 1 + plog2 n" +by(subst plog2.simps, simp add: Suc_double_half)+ + +lemma bheight_rbtreeify_f: "n \ length kvs + \ bheight (fst (rbtreeify_f n kvs)) = plog2 n" + and bheight_rbtreeify_g: "n \ Suc (length kvs) + \ bheight (fst (rbtreeify_g n kvs)) = plog2 n" +by(induct n kvs and n kvs rule: rbtreeify_induct) simp_all + +lemma bheight_rbtreeify_f_eq_plog2I: + "\ rbtreeify_f n kvs = (t, kvs'); n \ length kvs \ + \ bheight t = plog2 n" +using bheight_rbtreeify_f[of n kvs] by simp + +lemma bheight_rbtreeify_g_eq_plog2I: + "\ rbtreeify_g n kvs = (t, kvs'); n \ Suc (length kvs) \ + \ bheight t = plog2 n" +using bheight_rbtreeify_g[of n kvs] by simp + +hide_const (open) plog2 + +lemma inv2_rbtreeify_f: "n \ length kvs + \ inv2 (fst (rbtreeify_f n kvs))" + and inv2_rbtreeify_g: "n \ Suc (length kvs) + \ inv2 (fst (rbtreeify_g n kvs))" +by(induct n kvs and n kvs rule: rbtreeify_induct) + (auto simp add: bheight_rbtreeify_f bheight_rbtreeify_g + intro: bheight_rbtreeify_f_eq_plog2I bheight_rbtreeify_g_eq_plog2I) + +lemma "n \ length kvs \ True" + and color_of_rbtreeify_g: + "\ n \ Suc (length kvs); 0 < n \ + \ color_of (fst (rbtreeify_g n kvs)) = B" +by(induct n kvs and n kvs rule: rbtreeify_induct) simp_all + +lemma entries_rbtreeify_f_append: + "n \ length kvs + \ entries (fst (rbtreeify_f n kvs)) @ snd (rbtreeify_f n kvs) = kvs" + and entries_rbtreeify_g_append: + "n \ Suc (length kvs) + \ entries (fst (rbtreeify_g n kvs)) @ snd (rbtreeify_g n kvs) = kvs" +by(induction rule: rbtreeify_induct) simp_all + +lemma length_entries_rbtreeify_f: + "n \ length kvs \ length (entries (fst (rbtreeify_f n kvs))) = n" + and length_entries_rbtreeify_g: + "n \ Suc (length kvs) \ length (entries (fst (rbtreeify_g n kvs))) = n - 1" +by(induct rule: rbtreeify_induct) simp_all + +lemma rbtreeify_f_conv_drop: + "n \ length kvs \ snd (rbtreeify_f n kvs) = drop n kvs" +using entries_rbtreeify_f_append[of n kvs] +by(simp add: append_eq_conv_conj length_entries_rbtreeify_f) + +lemma rbtreeify_g_conv_drop: + "n \ Suc (length kvs) \ snd (rbtreeify_g n kvs) = drop (n - 1) kvs" +using entries_rbtreeify_g_append[of n kvs] +by(simp add: append_eq_conv_conj length_entries_rbtreeify_g) + +lemma entries_rbtreeify_f [simp]: + "n \ length kvs \ entries (fst (rbtreeify_f n kvs)) = take n kvs" +using entries_rbtreeify_f_append[of n kvs] +by(simp add: append_eq_conv_conj length_entries_rbtreeify_f) + +lemma entries_rbtreeify_g [simp]: + "n \ Suc (length kvs) \ + entries (fst (rbtreeify_g n kvs)) = take (n - 1) kvs" +using entries_rbtreeify_g_append[of n kvs] +by(simp add: append_eq_conv_conj length_entries_rbtreeify_g) + +lemma keys_rbtreeify_f [simp]: "n \ length kvs + \ keys (fst (rbtreeify_f n kvs)) = take n (map fst kvs)" +by(simp add: keys_def take_map) + +lemma keys_rbtreeify_g [simp]: "n \ Suc (length kvs) + \ keys (fst (rbtreeify_g n kvs)) = take (n - 1) (map fst kvs)" +by(simp add: keys_def take_map) + +lemma rbtreeify_fD: + "\ rbtreeify_f n kvs = (t, kvs'); n \ length kvs \ + \ entries t = take n kvs \ kvs' = drop n kvs" +using rbtreeify_f_conv_drop[of n kvs] entries_rbtreeify_f[of n kvs] by simp + +lemma rbtreeify_gD: + "\ rbtreeify_g n kvs = (t, kvs'); n \ Suc (length kvs) \ + \ entries t = take (n - 1) kvs \ kvs' = drop (n - 1) kvs" +using rbtreeify_g_conv_drop[of n kvs] entries_rbtreeify_g[of n kvs] by simp + +lemma entries_rbtreeify [simp]: "entries (rbtreeify kvs) = kvs" +by(simp add: rbtreeify_def entries_rbtreeify_g) + +context linorder begin + +lemma rbt_sorted_rbtreeify_f: + "\ n \ length kvs; sorted (map fst kvs); distinct (map fst kvs) \ + \ rbt_sorted (fst (rbtreeify_f n kvs))" + and rbt_sorted_rbtreeify_g: + "\ n \ Suc (length kvs); sorted (map fst kvs); distinct (map fst kvs) \ + \ rbt_sorted (fst (rbtreeify_g n kvs))" +proof(induction n kvs and n kvs rule: rbtreeify_induct) + case (f_even n kvs t k v kvs') + from rbtreeify_fD[OF `rbtreeify_f n kvs = (t, (k, v) # kvs')` `n \ length kvs`] + have "entries t = take n kvs" + and kvs': "drop n kvs = (k, v) # kvs'" by simp_all + hence unfold: "kvs = take n kvs @ (k, v) # kvs'" by(metis append_take_drop_id) + from `sorted (map fst kvs)` kvs' + have "(\(x, y) \ set (take n kvs). x \ k) \ (\(x, y) \ set kvs'. k \ x)" + by(subst (asm) unfold)(auto simp add: sorted_append sorted_Cons) + moreover from `distinct (map fst kvs)` kvs' + have "(\(x, y) \ set (take n kvs). x \ k) \ (\(x, y) \ set kvs'. x \ k)" + by(subst (asm) unfold)(auto intro: rev_image_eqI) + ultimately have "(\(x, y) \ set (take n kvs). x < k) \ (\(x, y) \ set kvs'. k < x)" + by fastforce + hence "fst (rbtreeify_f n kvs) |\ k" "k \| fst (rbtreeify_g n kvs')" + using `n \ Suc (length kvs')` `n \ length kvs` set_take_subset[of "n - 1" kvs'] + by(auto simp add: ord.rbt_greater_prop ord.rbt_less_prop take_map split_def) + moreover from `sorted (map fst kvs)` `distinct (map fst kvs)` + have "rbt_sorted (fst (rbtreeify_f n kvs))" by(rule f_even.IH) + moreover have "sorted (map fst kvs')" "distinct (map fst kvs')" + using `sorted (map fst kvs)` `distinct (map fst kvs)` + by(subst (asm) (1 2) unfold, simp add: sorted_append sorted_Cons)+ + hence "rbt_sorted (fst (rbtreeify_g n kvs'))" by(rule f_even.IH) + ultimately show ?case + using `0 < n` `rbtreeify_f n kvs = (t, (k, v) # kvs')` by simp +next + case (f_odd n kvs t k v kvs') + from rbtreeify_fD[OF `rbtreeify_f n kvs = (t, (k, v) # kvs')` `n \ length kvs`] + have "entries t = take n kvs" + and kvs': "drop n kvs = (k, v) # kvs'" by simp_all + hence unfold: "kvs = take n kvs @ (k, v) # kvs'" by(metis append_take_drop_id) + from `sorted (map fst kvs)` kvs' + have "(\(x, y) \ set (take n kvs). x \ k) \ (\(x, y) \ set kvs'. k \ x)" + by(subst (asm) unfold)(auto simp add: sorted_append sorted_Cons) + moreover from `distinct (map fst kvs)` kvs' + have "(\(x, y) \ set (take n kvs). x \ k) \ (\(x, y) \ set kvs'. x \ k)" + by(subst (asm) unfold)(auto intro: rev_image_eqI) + ultimately have "(\(x, y) \ set (take n kvs). x < k) \ (\(x, y) \ set kvs'. k < x)" + by fastforce + hence "fst (rbtreeify_f n kvs) |\ k" "k \| fst (rbtreeify_f n kvs')" + using `n \ length kvs'` `n \ length kvs` set_take_subset[of n kvs'] + by(auto simp add: rbt_greater_prop rbt_less_prop take_map split_def) + moreover from `sorted (map fst kvs)` `distinct (map fst kvs)` + have "rbt_sorted (fst (rbtreeify_f n kvs))" by(rule f_odd.IH) + moreover have "sorted (map fst kvs')" "distinct (map fst kvs')" + using `sorted (map fst kvs)` `distinct (map fst kvs)` + by(subst (asm) (1 2) unfold, simp add: sorted_append sorted_Cons)+ + hence "rbt_sorted (fst (rbtreeify_f n kvs'))" by(rule f_odd.IH) + ultimately show ?case + using `0 < n` `rbtreeify_f n kvs = (t, (k, v) # kvs')` by simp +next + case (g_even n kvs t k v kvs') + from rbtreeify_gD[OF `rbtreeify_g n kvs = (t, (k, v) # kvs')` `n \ Suc (length kvs)`] + have t: "entries t = take (n - 1) kvs" + and kvs': "drop (n - 1) kvs = (k, v) # kvs'" by simp_all + hence unfold: "kvs = take (n - 1) kvs @ (k, v) # kvs'" by(metis append_take_drop_id) + from `sorted (map fst kvs)` kvs' + have "(\(x, y) \ set (take (n - 1) kvs). x \ k) \ (\(x, y) \ set kvs'. k \ x)" + by(subst (asm) unfold)(auto simp add: sorted_append sorted_Cons) + moreover from `distinct (map fst kvs)` kvs' + have "(\(x, y) \ set (take (n - 1) kvs). x \ k) \ (\(x, y) \ set kvs'. x \ k)" + by(subst (asm) unfold)(auto intro: rev_image_eqI) + ultimately have "(\(x, y) \ set (take (n - 1) kvs). x < k) \ (\(x, y) \ set kvs'. k < x)" + by fastforce + hence "fst (rbtreeify_g n kvs) |\ k" "k \| fst (rbtreeify_g n kvs')" + using `n \ Suc (length kvs')` `n \ Suc (length kvs)` set_take_subset[of "n - 1" kvs'] + by(auto simp add: rbt_greater_prop rbt_less_prop take_map split_def) + moreover from `sorted (map fst kvs)` `distinct (map fst kvs)` + have "rbt_sorted (fst (rbtreeify_g n kvs))" by(rule g_even.IH) + moreover have "sorted (map fst kvs')" "distinct (map fst kvs')" + using `sorted (map fst kvs)` `distinct (map fst kvs)` + by(subst (asm) (1 2) unfold, simp add: sorted_append sorted_Cons)+ + hence "rbt_sorted (fst (rbtreeify_g n kvs'))" by(rule g_even.IH) + ultimately show ?case using `0 < n` `rbtreeify_g n kvs = (t, (k, v) # kvs')` by simp +next + case (g_odd n kvs t k v kvs') + from rbtreeify_fD[OF `rbtreeify_f n kvs = (t, (k, v) # kvs')` `n \ length kvs`] + have "entries t = take n kvs" + and kvs': "drop n kvs = (k, v) # kvs'" by simp_all + hence unfold: "kvs = take n kvs @ (k, v) # kvs'" by(metis append_take_drop_id) + from `sorted (map fst kvs)` kvs' + have "(\(x, y) \ set (take n kvs). x \ k) \ (\(x, y) \ set kvs'. k \ x)" + by(subst (asm) unfold)(auto simp add: sorted_append sorted_Cons) + moreover from `distinct (map fst kvs)` kvs' + have "(\(x, y) \ set (take n kvs). x \ k) \ (\(x, y) \ set kvs'. x \ k)" + by(subst (asm) unfold)(auto intro: rev_image_eqI) + ultimately have "(\(x, y) \ set (take n kvs). x < k) \ (\(x, y) \ set kvs'. k < x)" + by fastforce + hence "fst (rbtreeify_f n kvs) |\ k" "k \| fst (rbtreeify_g n kvs')" + using `n \ Suc (length kvs')` `n \ length kvs` set_take_subset[of "n - 1" kvs'] + by(auto simp add: rbt_greater_prop rbt_less_prop take_map split_def) + moreover from `sorted (map fst kvs)` `distinct (map fst kvs)` + have "rbt_sorted (fst (rbtreeify_f n kvs))" by(rule g_odd.IH) + moreover have "sorted (map fst kvs')" "distinct (map fst kvs')" + using `sorted (map fst kvs)` `distinct (map fst kvs)` + by(subst (asm) (1 2) unfold, simp add: sorted_append sorted_Cons)+ + hence "rbt_sorted (fst (rbtreeify_g n kvs'))" by(rule g_odd.IH) + ultimately show ?case + using `0 < n` `rbtreeify_f n kvs = (t, (k, v) # kvs')` by simp +qed simp_all + +lemma rbt_sorted_rbtreeify: + "\ sorted (map fst kvs); distinct (map fst kvs) \ \ rbt_sorted (rbtreeify kvs)" +by(simp add: rbtreeify_def rbt_sorted_rbtreeify_g) + +lemma is_rbt_rbtreeify: + "\ sorted (map fst kvs); distinct (map fst kvs) \ + \ is_rbt (rbtreeify kvs)" +by(simp add: is_rbt_def rbtreeify_def inv1_rbtreeify_g inv2_rbtreeify_g rbt_sorted_rbtreeify_g color_of_rbtreeify_g) + +lemma rbt_lookup_rbtreeify: + "\ sorted (map fst kvs); distinct (map fst kvs) \ \ + rbt_lookup (rbtreeify kvs) = map_of kvs" +by(simp add: map_of_entries[symmetric] rbt_sorted_rbtreeify) + +end + +text {* + Functions to compare the height of two rbt trees, taken from + Andrew W. Appel, Efficient Verified Red-Black Trees (September 2011) +*} + +fun skip_red :: "('a, 'b) rbt \ ('a, 'b) rbt" +where + "skip_red (Branch color.R l k v r) = l" +| "skip_red t = t" + +fun skip_black :: "('a, 'b) rbt \ ('a, 'b) rbt" +where + "skip_black (Branch color.B l k v r) = l" +| "skip_black t = t" + +datatype compare = LT | GT | EQ + +partial_function (tailrec) compare_height :: "('a, 'b) rbt \ ('a, 'b) rbt \ ('a, 'b) rbt \ ('a, 'b) rbt \ compare" +where + "compare_height sx s t tx = + (case (skip_red sx, skip_red s, skip_red t, skip_red tx) of + (Branch _ sx' _ _ _, Branch _ s' _ _ _, Branch _ t' _ _ _, Branch _ tx' _ _ _) \ + compare_height (skip_black sx') s' t' (skip_black tx') + | (_, rbt.Empty, _, Branch _ _ _ _ _) \ LT + | (Branch _ _ _ _ _, _, rbt.Empty, _) \ GT + | (Branch _ sx' _ _ _, Branch _ s' _ _ _, Branch _ t' _ _ _, rbt.Empty) \ + compare_height (skip_black sx') s' t' rbt.Empty + | (rbt.Empty, Branch _ s' _ _ _, Branch _ t' _ _ _, Branch _ tx' _ _ _) \ + compare_height rbt.Empty s' t' (skip_black tx') + | _ \ EQ)" + +declare compare_height.simps [code] + +hide_type (open) compare +hide_const (open) + compare_height skip_black skip_red LT GT EQ compare_case compare_rec + Abs_compare Rep_compare compare_rep_set +hide_fact (open) + Abs_compare_cases Abs_compare_induct Abs_compare_inject Abs_compare_inverse + Rep_compare Rep_compare_cases Rep_compare_induct Rep_compare_inject Rep_compare_inverse + compare.simps compare.exhaust compare.induct compare.recs compare.simps + compare.size compare.case_cong compare.weak_case_cong compare.cases + compare.nchotomy compare.split compare.split_asm compare_rec_def + compare.eq.refl compare.eq.simps + compare.EQ_def compare.GT_def compare.LT_def + equal_compare_def + skip_red_def skip_red.simps skip_red.induct + skip_black_def skip_black.simps skip_black.induct + compare_height.simps + +subsection {* union and intersection of sorted associative lists *} + +context ord begin + +function sunion_with :: "('a \ 'b \ 'b \ 'b) \ ('a \ 'b) list \ ('a \ 'b) list \ ('a \ 'b) list" +where + "sunion_with f ((k, v) # as) ((k', v') # bs) = + (if k > k' then (k', v') # sunion_with f ((k, v) # as) bs + else if k < k' then (k, v) # sunion_with f as ((k', v') # bs) + else (k, f k v v') # sunion_with f as bs)" +| "sunion_with f [] bs = bs" +| "sunion_with f as [] = as" +by pat_completeness auto +termination by lexicographic_order + +function sinter_with :: "('a \ 'b \ 'b \ 'b) \ ('a \ 'b) list \ ('a \ 'b) list \ ('a \ 'b) list" +where + "sinter_with f ((k, v) # as) ((k', v') # bs) = + (if k > k' then sinter_with f ((k, v) # as) bs + else if k < k' then sinter_with f as ((k', v') # bs) + else (k, f k v v') # sinter_with f as bs)" +| "sinter_with f [] _ = []" +| "sinter_with f _ [] = []" +by pat_completeness auto +termination by lexicographic_order + +end + +declare ord.sunion_with.simps [code] ord.sinter_with.simps[code] + +context linorder begin + +lemma set_fst_sunion_with: + "set (map fst (sunion_with f xs ys)) = set (map fst xs) \ set (map fst ys)" +by(induct f xs ys rule: sunion_with.induct) auto + +lemma sorted_sunion_with [simp]: + "\ sorted (map fst xs); sorted (map fst ys) \ + \ sorted (map fst (sunion_with f xs ys))" +by(induct f xs ys rule: sunion_with.induct) + (auto simp add: sorted_Cons set_fst_sunion_with simp del: set_map) + +lemma distinct_sunion_with [simp]: + "\ distinct (map fst xs); distinct (map fst ys); sorted (map fst xs); sorted (map fst ys) \ + \ distinct (map fst (sunion_with f xs ys))" +proof(induct f xs ys rule: sunion_with.induct) + case (1 f k v xs k' v' ys) + have "\ \ k < k'; \ k' < k \ \ k = k'" by simp + thus ?case using "1" + by(auto simp add: set_fst_sunion_with sorted_Cons simp del: set_map) +qed simp_all + +lemma map_of_sunion_with: + "\ sorted (map fst xs); sorted (map fst ys) \ + \ map_of (sunion_with f xs ys) k = + (case map_of xs k of None \ map_of ys k + | Some v \ case map_of ys k of None \ Some v + | Some w \ Some (f k v w))" +by(induct f xs ys rule: sunion_with.induct)(auto simp add: sorted_Cons split: option.split dest: map_of_SomeD bspec) + +lemma set_fst_sinter_with [simp]: + "\ sorted (map fst xs); sorted (map fst ys) \ + \ set (map fst (sinter_with f xs ys)) = set (map fst xs) \ set (map fst ys)" +by(induct f xs ys rule: sinter_with.induct)(auto simp add: sorted_Cons simp del: set_map) + +lemma set_fst_sinter_with_subset1: + "set (map fst (sinter_with f xs ys)) \ set (map fst xs)" +by(induct f xs ys rule: sinter_with.induct) auto + +lemma set_fst_sinter_with_subset2: + "set (map fst (sinter_with f xs ys)) \ set (map fst ys)" +by(induct f xs ys rule: sinter_with.induct)(auto simp del: set_map) + +lemma sorted_sinter_with [simp]: + "\ sorted (map fst xs); sorted (map fst ys) \ + \ sorted (map fst (sinter_with f xs ys))" +by(induct f xs ys rule: sinter_with.induct)(auto simp add: sorted_Cons simp del: set_map) + +lemma distinct_sinter_with [simp]: + "\ distinct (map fst xs); distinct (map fst ys) \ + \ distinct (map fst (sinter_with f xs ys))" +proof(induct f xs ys rule: sinter_with.induct) + case (1 f k v as k' v' bs) + have "\ \ k < k'; \ k' < k \ \ k = k'" by simp + thus ?case using "1" set_fst_sinter_with_subset1[of f as bs] + set_fst_sinter_with_subset2[of f as bs] + by(auto simp del: set_map) +qed simp_all + +lemma map_of_sinter_with: + "\ sorted (map fst xs); sorted (map fst ys) \ + \ map_of (sinter_with f xs ys) k = + (case map_of xs k of None \