merged
authorwenzelm
Sat, 20 Feb 2021 17:04:26 +0100
changeset 73257 446085672b74
parent 73253 f6bb31879698 (diff)
parent 73256 69177d552afb (current diff)
child 73258 fdc76c10bb61
merged
--- a/Admin/PLATFORMS	Sat Feb 20 13:49:24 2021 +0100
+++ b/Admin/PLATFORMS	Sat Feb 20 17:04:26 2021 +0100
@@ -38,7 +38,7 @@
   x86_64-darwin     macOS 10.13 High Sierra (lapbroy68 MacBookPro11,2)
                     macOS 10.14 Mojave (mini2 Macmini8,1)
                     macOS 10.15 Catalina (laramac01 Macmini8,1)
-                    macOS 11.1 Big Sur
+                    macOS 11.1 Big Sur (mini1 Macmini8,1)
 
   x86_64-windows    Windows 10
   x86_64-cygwin     Cygwin 3.1.x https://isabelle.sketis.net/cygwin_2021 (x86_64/release)
--- a/CONTRIBUTORS	Sat Feb 20 13:49:24 2021 +0100
+++ b/CONTRIBUTORS	Sat Feb 20 17:04:26 2021 +0100
@@ -3,6 +3,19 @@
 listed as an author in one of the source files of this Isabelle distribution.
 
 
+Contributions to this Isabelle version
+--------------------------------------
+
+* February 2021: Manuel Eberl
+  New material in HOL-Analysis/HOL-Probability, most notably Hoeffding's
+  inequality and the negative binomial distribution
+
+* January 2021: Jakub Kądziołka
+  Some lemmas for HOL-Computational_Algebra.
+
+* January 2021: Martin Rasyzk
+  Fast set operations for red-black trees.
+
 Contributions to Isabelle2021
 -----------------------------
 
--- a/NEWS	Sat Feb 20 13:49:24 2021 +0100
+++ b/NEWS	Sat Feb 20 17:04:26 2021 +0100
@@ -4,6 +4,18 @@
 (Note: Isabelle/jEdit shows a tree-view of the NEWS file in Sidekick.)
 
 
+New in this Isabelle version
+----------------------------
+
+*** HOL ***
+
+* HOL-Analysis/HOL-Probability: indexed products of discrete
+distributions, negative binomial distribution, Hoeffding's inequality,
+Chernoff bounds, Cauchy–Schwarz inequality for nn_integral, and some
+more small lemmas. Some theorems that were stated awkwardly before were
+corrected. Minor INCOMPATIBILITY.
+
+
 New in Isabelle2021 (February 2021)
 -----------------------------------
 
--- a/src/HOL/Analysis/Bochner_Integration.thy	Sat Feb 20 13:49:24 2021 +0100
+++ b/src/HOL/Analysis/Bochner_Integration.thy	Sat Feb 20 17:04:26 2021 +0100
@@ -1315,6 +1315,27 @@
   using integrableI_bounded[of f M] has_bochner_integral_implies_finite_norm[of M f]
   unfolding integrable.simps has_bochner_integral.simps[abs_def] by auto
 
+lemma (in finite_measure) square_integrable_imp_integrable:
+  fixes f :: "'a \<Rightarrow> 'b :: {second_countable_topology, banach, real_normed_div_algebra}"
+  assumes [measurable]: "f \<in> borel_measurable M"
+  assumes "integrable M (\<lambda>x. f x ^ 2)"
+  shows   "integrable M f"
+proof -
+  have less_top: "emeasure M (space M) < top"
+    using finite_emeasure_space by (meson top.not_eq_extremum)
+  have "nn_integral M (\<lambda>x. norm (f x)) ^ 2 \<le>
+        nn_integral M (\<lambda>x. norm (f x) ^ 2) * emeasure M (space M)"
+    using Cauchy_Schwarz_nn_integral[of "\<lambda>x. norm (f x)" M "\<lambda>_. 1"]
+    by (simp add: ennreal_power)
+  also have "\<dots> < \<infinity>"
+    using assms(2) less_top
+    by (subst (asm) integrable_iff_bounded) (auto simp: norm_power ennreal_mult_less_top)
+  finally have "nn_integral M (\<lambda>x. norm (f x)) < \<infinity>"
+    by (simp add: power_less_top_ennreal)
+  thus ?thesis
+    by (simp add: integrable_iff_bounded)
+qed
+
 lemma integrable_bound:
   fixes f :: "'a \<Rightarrow> 'b::{banach, second_countable_topology}"
     and g :: "'a \<Rightarrow> 'c::{banach, second_countable_topology}"
@@ -2065,16 +2086,50 @@
 
   have "{x \<in> space M. u x \<ge> c} = {x \<in> space M. ennreal(1/c) * u x \<ge> 1} \<inter> (space M)"
     using \<open>c>0\<close> by (auto simp: ennreal_mult'[symmetric])
-  then have "emeasure M {x \<in> space M. u x \<ge> c} = emeasure M ({x \<in> space M. ennreal(1/c) * u x \<ge> 1} \<inter> (space M))"
+  then have "emeasure M {x \<in> space M. u x \<ge> c} = emeasure M ({x \<in> space M. ennreal(1/c) * u x \<ge> 1})"
     by simp
   also have "... \<le> ennreal(1/c) * (\<integral>\<^sup>+ x. ennreal(u x) * indicator (space M) x \<partial>M)"
     by (rule nn_integral_Markov_inequality) (auto simp add: assms)
   also have "... \<le> ennreal(1/c) * (\<integral>x. u x \<partial>M)"
-    apply (rule mult_left_mono) using * \<open>c>0\<close> by auto
+    by (rule mult_left_mono) (use * \<open>c > 0\<close> in auto)
   finally show ?thesis
     using \<open>0<c\<close> by (simp add: ennreal_mult'[symmetric])
 qed
 
+theorem integral_Markov_inequality_measure:
+  assumes [measurable]: "integrable M u" and "A \<in> sets M" and "AE x in M. 0 \<le> u x" "0 < (c::real)"
+  shows "measure M {x\<in>space M. u x \<ge> c} \<le> (\<integral>x. u x \<partial>M) / c"
+proof -
+  have le: "emeasure M {x\<in>space M. u x \<ge> c} \<le> ennreal ((1/c) * (\<integral>x. u x \<partial>M))"
+    by (rule integral_Markov_inequality) (use assms in auto)
+  also have "\<dots> < top"
+    by auto
+  finally have "ennreal (measure M {x\<in>space M. u x \<ge> c}) = emeasure M {x\<in>space M. u x \<ge> c}"
+    by (intro emeasure_eq_ennreal_measure [symmetric]) auto
+  also note le
+  finally show ?thesis
+    by (subst (asm) ennreal_le_iff)
+       (auto intro!: divide_nonneg_pos Bochner_Integration.integral_nonneg_AE assms)
+qed
+
+theorem%important (in finite_measure) second_moment_method:
+  assumes [measurable]: "f \<in> M \<rightarrow>\<^sub>M borel"
+  assumes "integrable M (\<lambda>x. f x ^ 2)"
+  defines "\<mu> \<equiv> lebesgue_integral M f"
+  assumes "a > 0"
+  shows "measure M {x\<in>space M. \<bar>f x\<bar> \<ge> a} \<le> lebesgue_integral M (\<lambda>x. f x ^ 2) / a\<^sup>2"
+proof -
+  have integrable: "integrable M f"
+    using assms by (blast dest: square_integrable_imp_integrable)
+  have "{x\<in>space M. \<bar>f x\<bar> \<ge> a} = {x\<in>space M. f x ^ 2 \<ge> a\<^sup>2}"
+    using \<open>a > 0\<close> by (simp flip: abs_le_square_iff)
+  hence "measure M {x\<in>space M. \<bar>f x\<bar> \<ge> a} = measure M {x\<in>space M. f x ^ 2 \<ge> a\<^sup>2}"
+    by simp
+  also have "\<dots> \<le> lebesgue_integral M (\<lambda>x. f x ^ 2) / a\<^sup>2"
+    using assms by (intro integral_Markov_inequality_measure) auto
+  finally show ?thesis .
+qed
+
 lemma integral_ineq_eq_0_then_AE:
   fixes f::"_ \<Rightarrow> real"
   assumes "AE x in M. f x \<le> g x" "integrable M f" "integrable M g"
--- a/src/HOL/Analysis/Borel_Space.thy	Sat Feb 20 13:49:24 2021 +0100
+++ b/src/HOL/Analysis/Borel_Space.thy	Sat Feb 20 17:04:26 2021 +0100
@@ -1640,6 +1640,12 @@
   shows "f \<in> M \<rightarrow>\<^sub>M borel \<Longrightarrow> g \<in> M \<rightarrow>\<^sub>M borel \<Longrightarrow> (\<lambda>x. f x - g x) \<in> M \<rightarrow>\<^sub>M borel"
   unfolding is_borel_def[symmetric] by transfer simp
 
+lemma borel_measurable_power_ennreal [measurable (raw)]:
+  fixes f :: "_ \<Rightarrow> ennreal"
+  assumes f: "f \<in> borel_measurable M"
+  shows "(\<lambda>x. (f x) ^ n) \<in> borel_measurable M"
+  by (induction n) (use f in auto)
+
 lemma borel_measurable_prod_ennreal[measurable (raw)]:
   fixes f :: "'c \<Rightarrow> 'a \<Rightarrow> ennreal"
   assumes "\<And>i. i \<in> S \<Longrightarrow> f i \<in> borel_measurable M"
--- a/src/HOL/Analysis/Nonnegative_Lebesgue_Integration.thy	Sat Feb 20 13:49:24 2021 +0100
+++ b/src/HOL/Analysis/Nonnegative_Lebesgue_Integration.thy	Sat Feb 20 17:04:26 2021 +0100
@@ -1174,13 +1174,18 @@
 qed
 
 theorem nn_integral_Markov_inequality:
-  assumes u: "u \<in> borel_measurable M" and "A \<in> sets M"
-  shows "(emeasure M) ({x\<in>space M. 1 \<le> c * u x} \<inter> A) \<le> c * (\<integral>\<^sup>+ x. u x * indicator A x \<partial>M)"
+  assumes u: "(\<lambda>x. u x * indicator A x) \<in> borel_measurable M" and "A \<in> sets M"
+  shows "(emeasure M) ({x\<in>A. 1 \<le> c * u x}) \<le> c * (\<integral>\<^sup>+ x. u x * indicator A x \<partial>M)"
     (is "(emeasure M) ?A \<le> _ * ?PI")
 proof -
-  have "?A \<in> sets M"
-    using \<open>A \<in> sets M\<close> u by auto
-  hence "(emeasure M) ?A = (\<integral>\<^sup>+ x. indicator ?A x \<partial>M)"
+  define u' where "u' = (\<lambda>x. u x * indicator A x)"
+  have [measurable]: "u' \<in> borel_measurable M"
+    using u unfolding u'_def .
+  have "{x\<in>space M. c * u' x \<ge> 1} \<in> sets M"
+    by measurable
+  also have "{x\<in>space M. c * u' x \<ge> 1} = ?A"
+    using sets.sets_into_space[OF \<open>A \<in> sets M\<close>] by (auto simp: u'_def indicator_def)
+  finally have "(emeasure M) ?A = (\<integral>\<^sup>+ x. indicator ?A x \<partial>M)"
     using nn_integral_indicator by simp
   also have "\<dots> \<le> (\<integral>\<^sup>+ x. c * (u x * indicator A x) \<partial>M)"
     using u by (auto intro!: nn_integral_mono_AE simp: indicator_def)
@@ -1189,6 +1194,37 @@
   finally show ?thesis .
 qed
 
+lemma Chernoff_ineq_nn_integral_ge:
+  assumes s: "s > 0" and [measurable]: "A \<in> sets M"
+  assumes [measurable]: "(\<lambda>x. f x * indicator A x) \<in> borel_measurable M"
+  shows   "emeasure M {x\<in>A. f x \<ge> a} \<le>
+           ennreal (exp (-s * a)) * nn_integral M (\<lambda>x. ennreal (exp (s * f x)) * indicator A x)"
+proof -
+  define f' where "f' = (\<lambda>x. f x * indicator A x)"
+  have [measurable]: "f' \<in> borel_measurable M"
+    using assms(3) unfolding f'_def by assumption
+  have "(\<lambda>x. ennreal (exp (s * f' x)) * indicator A x) \<in> borel_measurable M"
+    by simp
+  also have "(\<lambda>x. ennreal (exp (s * f' x)) * indicator A x) =
+             (\<lambda>x. ennreal (exp (s * f x)) * indicator A x)"
+    by (auto simp: f'_def indicator_def fun_eq_iff)
+  finally have meas: "\<dots> \<in> borel_measurable M" .
+
+  have "{x\<in>A. f x \<ge> a} = {x\<in>A. ennreal (exp (-s * a)) * ennreal (exp (s * f x)) \<ge> 1}"
+    using s by (auto simp: exp_minus field_simps simp flip: ennreal_mult)
+  also have "emeasure M \<dots> \<le> ennreal (exp (-s * a)) *
+               (\<integral>\<^sup>+x. ennreal (exp (s * f x)) * indicator A x \<partial>M)"
+    by (intro order.trans[OF nn_integral_Markov_inequality] meas) auto
+  finally show ?thesis .
+qed
+
+lemma Chernoff_ineq_nn_integral_le:
+  assumes s: "s > 0" and [measurable]: "A \<in> sets M"
+  assumes [measurable]: "f \<in> borel_measurable M"
+  shows   "emeasure M {x\<in>A. f x \<le> a} \<le>
+           ennreal (exp (s * a)) * nn_integral M (\<lambda>x. ennreal (exp (-s * f x)) * indicator A x)"
+  using Chernoff_ineq_nn_integral_ge[of s A M "\<lambda>x. -f x" "-a"] assms by simp
+
 lemma nn_integral_noteq_infinite:
   assumes g: "g \<in> borel_measurable M" and "integral\<^sup>N M g \<noteq> \<infinity>"
   shows "AE x in M. g x \<noteq> \<infinity>"
@@ -1432,7 +1468,7 @@
 qed
 
 lemma nn_integral_0_iff:
-  assumes u: "u \<in> borel_measurable M"
+  assumes u [measurable]: "u \<in> borel_measurable M"
   shows "integral\<^sup>N M u = 0 \<longleftrightarrow> emeasure M {x\<in>space M. u x \<noteq> 0} = 0"
     (is "_ \<longleftrightarrow> (emeasure M) ?A = 0")
 proof -
@@ -1449,9 +1485,13 @@
     have "0 = (SUP n. (emeasure M) (?M n \<inter> ?A))"
     proof -
       { fix n :: nat
-        from nn_integral_Markov_inequality[OF u, of ?A "of_nat n"] u
-        have "(emeasure M) (?M n \<inter> ?A) \<le> 0"
-          by (simp add: ennreal_of_nat_eq_real_of_nat u_eq *)
+        have "emeasure M {x \<in> ?A. 1 \<le> of_nat n * u x} \<le>
+                of_nat n * \<integral>\<^sup>+ x. u x * indicator ?A x \<partial>M"
+          by (intro nn_integral_Markov_inequality) auto
+        also have "{x \<in> ?A. 1 \<le> of_nat n * u x} = (?M n \<inter> ?A)"
+          by (auto simp: ennreal_of_nat_eq_real_of_nat u_eq * )
+        finally have "emeasure M (?M n \<inter> ?A) \<le> 0"
+          by (simp add: ennreal_of_nat_eq_real_of_nat u_eq * )
         moreover have "0 \<le> (emeasure M) (?M n \<inter> ?A)" using u by auto
         ultimately have "(emeasure M) (?M n \<inter> ?A) = 0" by auto }
       thus ?thesis by simp
@@ -1617,6 +1657,93 @@
     by (subst step) auto
 qed (insert bound, auto simp add: le_fun_def INF_apply[abs_def] top_fun_def intro!: meas f g)
 
+
+text \<open>Cauchy--Schwarz inequality for \<^const>\<open>nn_integral\<close>\<close>
+
+lemma sum_of_squares_ge_ennreal:
+  fixes a b :: ennreal
+  shows "2 * a * b \<le> a\<^sup>2 + b\<^sup>2"
+proof (cases a; cases b)
+  fix x y
+  assume xy: "x \<ge> 0" "y \<ge> 0" and [simp]: "a = ennreal x" "b = ennreal y"
+  have "0 \<le> (x - y)\<^sup>2"
+    by simp
+  also have "\<dots> = x\<^sup>2 + y\<^sup>2 - 2 * x * y"
+    by (simp add: algebra_simps power2_eq_square)
+  finally have "2 * x * y \<le> x\<^sup>2 + y\<^sup>2"
+    by simp
+  hence "ennreal (2 * x * y) \<le> ennreal (x\<^sup>2 + y\<^sup>2)"
+    by (intro ennreal_leI)
+  thus ?thesis using xy
+    by (simp add: ennreal_mult ennreal_power)
+qed auto
+
+lemma Cauchy_Schwarz_nn_integral:
+  assumes [measurable]: "f \<in> borel_measurable M" "g \<in> borel_measurable M"
+  shows "(\<integral>\<^sup>+x. f x * g x \<partial>M)\<^sup>2 \<le> (\<integral>\<^sup>+x. f x ^ 2 \<partial>M) * (\<integral>\<^sup>+x. g x ^ 2 \<partial>M)"
+proof (cases "(\<integral>\<^sup>+x. f x * g x \<partial>M) = 0")
+  case False
+  define F where "F = nn_integral M (\<lambda>x. f x ^ 2)"
+  define G where "G = nn_integral M (\<lambda>x. g x ^ 2)"
+  from False have "\<not>(AE x in M. f x = 0 \<or> g x = 0)"
+    by (auto simp: nn_integral_0_iff_AE)
+  hence "\<not>(AE x in M. f x = 0)" and "\<not>(AE x in M. g x = 0)"
+    by (auto intro: AE_disjI1 AE_disjI2)
+  hence nz: "F \<noteq> 0" "G \<noteq> 0"
+    by (auto simp: nn_integral_0_iff_AE F_def G_def)
+
+  show ?thesis
+  proof (cases "F = \<infinity> \<or> G = \<infinity>")
+    case True
+    thus ?thesis using nz
+      by (auto simp: F_def G_def)
+  next
+    case False
+    define F' where "F' = ennreal (sqrt (enn2real F))"
+    define G' where "G' = ennreal (sqrt (enn2real G))"
+    from False have fin: "F < top" "G < top"
+      by (simp_all add: top.not_eq_extremum)
+    have F'_sqr: "F'\<^sup>2 = F"
+      using False by (cases F) (auto simp: F'_def ennreal_power)
+    have G'_sqr: "G'\<^sup>2 = G"
+      using False by (cases G) (auto simp: G'_def ennreal_power)
+    have nz': "F' \<noteq> 0" "G' \<noteq> 0" and fin': "F' \<noteq> \<infinity>" "G' \<noteq> \<infinity>"
+      using F'_sqr G'_sqr nz fin by auto
+    from fin' have fin'': "F' < top" "G' < top"
+      by (auto simp: top.not_eq_extremum)
+
+    have "2 * (F' / F') * (G' / G') * (\<integral>\<^sup>+x. f x * g x \<partial>M) =
+          F' * G' * (\<integral>\<^sup>+x. 2 * (f x / F') * (g x / G') \<partial>M)"
+      using nz' fin''
+      by (simp add: divide_ennreal_def algebra_simps ennreal_inverse_mult flip: nn_integral_cmult)
+    also have "F'/ F' = 1"
+      using nz' fin'' by simp
+    also have "G'/ G' = 1"
+      using nz' fin'' by simp
+    also have "2 * 1 * 1 = (2 :: ennreal)" by simp
+    also have "F' * G' * (\<integral>\<^sup>+ x. 2 * (f x / F') * (g x / G') \<partial>M) \<le>
+               F' * G' * (\<integral>\<^sup>+x. (f x / F')\<^sup>2 + (g x / G')\<^sup>2 \<partial>M)"
+      by (intro mult_left_mono nn_integral_mono sum_of_squares_ge_ennreal) auto
+    also have "\<dots> = F' * G' * (F / F'\<^sup>2 + G / G'\<^sup>2)" using nz
+      by (auto simp: nn_integral_add algebra_simps nn_integral_divide F_def G_def)
+    also have "F / F'\<^sup>2 = 1"
+      using nz F'_sqr fin by simp
+    also have "G / G'\<^sup>2 = 1"
+      using nz G'_sqr fin by simp
+    also have "F' * G' * (1 + 1) = 2 * (F' * G')"
+      by (simp add: mult_ac)
+    finally have "(\<integral>\<^sup>+x. f x * g x \<partial>M) \<le> F' * G'"
+      by (subst (asm) ennreal_mult_le_mult_iff) auto
+    hence "(\<integral>\<^sup>+x. f x * g x \<partial>M)\<^sup>2 \<le> (F' * G')\<^sup>2"
+      by (intro power_mono_ennreal)
+    also have "\<dots> = F * G"
+      by (simp add: algebra_simps F'_sqr G'_sqr)
+    finally show ?thesis
+      by (simp add: F_def G_def)
+  qed
+qed auto
+
+
 (* TODO: rename? *)
 subsection \<open>Integral under concrete measures\<close>
 
--- a/src/HOL/Analysis/Set_Integral.thy	Sat Feb 20 13:49:24 2021 +0100
+++ b/src/HOL/Analysis/Set_Integral.thy	Sat Feb 20 17:04:26 2021 +0100
@@ -1079,4 +1079,87 @@
   qed
 qed
 
+
+
+theorem integral_Markov_inequality':
+  fixes u :: "'a \<Rightarrow> real"
+  assumes [measurable]: "set_integrable M A u" and "A \<in> sets M"
+  assumes "AE x in M. x \<in> A \<longrightarrow> u x \<ge> 0" and "0 < (c::real)"
+  shows "emeasure M {x\<in>A. u x \<ge> c} \<le> (1/c::real) * (\<integral>x\<in>A. u x \<partial>M)"
+proof -
+  have "(\<lambda>x. u x * indicator A x) \<in> borel_measurable M" 
+    using assms by (auto simp: set_integrable_def mult_ac)
+  hence "(\<lambda>x. ennreal (u x * indicator A x)) \<in> borel_measurable M"
+    by measurable
+  also have "(\<lambda>x. ennreal (u x * indicator A x)) = (\<lambda>x. ennreal (u x) * indicator A x)"
+    by (intro ext) (auto simp: indicator_def)
+  finally have meas: "\<dots> \<in> borel_measurable M" .
+  from assms(3) have AE: "AE x in M. 0 \<le> u x * indicator A x"
+    by eventually_elim (auto simp: indicator_def)
+  have nonneg: "set_lebesgue_integral M A u \<ge> 0"
+    unfolding set_lebesgue_integral_def
+    by (intro Bochner_Integration.integral_nonneg_AE eventually_mono[OF AE]) (auto simp: mult_ac)
+
+  have A: "A \<subseteq> space M"
+    using \<open>A \<in> sets M\<close> by (simp add: sets.sets_into_space)
+  have "{x \<in> A. u x \<ge> c} = {x \<in> A. ennreal(1/c) * u x \<ge> 1}"
+    using \<open>c>0\<close> A by (auto simp: ennreal_mult'[symmetric])
+  then have "emeasure M {x \<in> A. u x \<ge> c} = emeasure M ({x \<in> A. ennreal(1/c) * u x \<ge> 1})"
+    by simp
+  also have "... \<le> ennreal(1/c) * (\<integral>\<^sup>+ x. ennreal(u x) * indicator A x \<partial>M)"
+    by (intro nn_integral_Markov_inequality meas assms)
+  also have "(\<integral>\<^sup>+ x. ennreal(u x) * indicator A x \<partial>M) = ennreal (set_lebesgue_integral M A u)"
+    unfolding set_lebesgue_integral_def nn_integral_set_ennreal using assms AE
+    by (subst nn_integral_eq_integral) (simp_all add: mult_ac set_integrable_def)
+  finally show ?thesis
+    using \<open>c > 0\<close> nonneg by (subst ennreal_mult) auto
+qed
+
+theorem integral_Markov_inequality'_measure:
+  assumes [measurable]: "set_integrable M A u" and "A \<in> sets M" 
+     and "AE x in M. x \<in> A \<longrightarrow> 0 \<le> u x" "0 < (c::real)"
+  shows "measure M {x\<in>A. u x \<ge> c} \<le> (\<integral>x\<in>A. u x \<partial>M) / c"
+proof -
+  have nonneg: "set_lebesgue_integral M A u \<ge> 0"
+    unfolding set_lebesgue_integral_def
+    by (intro Bochner_Integration.integral_nonneg_AE eventually_mono[OF assms(3)])
+       (auto simp: mult_ac)
+  have le: "emeasure M {x\<in>A. u x \<ge> c} \<le> ennreal ((1/c) * (\<integral>x\<in>A. u x \<partial>M))"
+    by (rule integral_Markov_inequality') (use assms in auto)
+  also have "\<dots> < top"
+    by auto
+  finally have "ennreal (measure M {x\<in>A. u x \<ge> c}) = emeasure M {x\<in>A. u x \<ge> c}"
+    by (intro emeasure_eq_ennreal_measure [symmetric]) auto
+  also note le
+  finally show ?thesis using nonneg
+    by (subst (asm) ennreal_le_iff)
+       (auto intro!: divide_nonneg_pos Bochner_Integration.integral_nonneg_AE assms)
+qed
+
+theorem%important (in finite_measure) Chernoff_ineq_ge:
+  assumes s: "s > 0"
+  assumes integrable: "set_integrable M A (\<lambda>x. exp (s * f x))" and "A \<in> sets M"
+  shows   "measure M {x\<in>A. f x \<ge> a} \<le> exp (-s * a) * (\<integral>x\<in>A. exp (s * f x) \<partial>M)"
+proof -
+  have "{x\<in>A. f x \<ge> a} = {x\<in>A. exp (s * f x) \<ge> exp (s * a)}"
+    using s by auto
+  also have "measure M \<dots> \<le> set_lebesgue_integral M A (\<lambda>x. exp (s * f x)) / exp (s * a)"
+    by (intro integral_Markov_inequality'_measure assms) auto
+  finally show ?thesis 
+    by (simp add: exp_minus field_simps)
+qed
+
+theorem%important (in finite_measure) Chernoff_ineq_le:
+  assumes s: "s > 0"
+  assumes integrable: "set_integrable M A (\<lambda>x. exp (-s * f x))" and "A \<in> sets M"
+  shows   "measure M {x\<in>A. f x \<le> a} \<le> exp (s * a) * (\<integral>x\<in>A. exp (-s * f x) \<partial>M)"
+proof -
+  have "{x\<in>A. f x \<le> a} = {x\<in>A. exp (-s * f x) \<ge> exp (-s * a)}"
+    using s by auto
+  also have "measure M \<dots> \<le> set_lebesgue_integral M A (\<lambda>x. exp (-s * f x)) / exp (-s * a)"
+    by (intro integral_Markov_inequality'_measure assms) auto
+  finally show ?thesis 
+    by (simp add: exp_minus field_simps)
+qed
+
 end
--- a/src/HOL/Complete_Partial_Order.thy	Sat Feb 20 13:49:24 2021 +0100
+++ b/src/HOL/Complete_Partial_Order.thy	Sat Feb 20 17:04:26 2021 +0100
@@ -123,11 +123,7 @@
       proof (cases "\<exists>z\<in>M. f x \<le> z")
         case True
         then have "f x \<le> Sup M"
-          apply rule
-          apply (erule order_trans)
-          apply (rule ccpo_Sup_upper[OF chM])
-          apply assumption
-          done
+          by (blast intro: ccpo_Sup_upper[OF chM] order_trans)
         then show ?thesis ..
       next
         case False
@@ -141,11 +137,7 @@
     proof (cases "\<exists>x\<in>M. y \<le> x")
       case True
       then have "y \<le> Sup M"
-        apply rule
-        apply (erule order_trans)
-        apply (rule ccpo_Sup_upper[OF Sup(1)])
-        apply assumption
-        done
+        by (blast intro: ccpo_Sup_upper[OF Sup(1)] order_trans)
       then show ?thesis ..
     next
       case False with Sup
@@ -328,20 +320,17 @@
     qed
   qed
   moreover
-  have "Sup A = Sup {x \<in> A. P x}" if "\<forall>x\<in>A. \<exists>y\<in>A. x \<le> y \<and> P y" for P
+  have "Sup A = Sup {x \<in> A. P x}" if "\<And>x. x\<in>A \<Longrightarrow> \<exists>y\<in>A. x \<le> y \<and> P y" for P
   proof (rule antisym)
     have chain_P: "chain (\<le>) {x \<in> A. P x}"
       by (rule chain_compr [OF chain])
     show "Sup A \<le> Sup {x \<in> A. P x}"
-      apply (rule ccpo_Sup_least [OF chain])
-      apply (drule that [rule_format])
-      apply clarify
-      apply (erule order_trans)
-      apply (simp add: ccpo_Sup_upper [OF chain_P])
-      done
+    proof (rule ccpo_Sup_least [OF chain])
+      show "\<And>x. x \<in> A \<Longrightarrow> x \<le> \<Squnion> {x \<in> A. P x}"
+          by (blast intro: ccpo_Sup_upper[OF chain_P] order_trans dest: that)
+      qed
     show "Sup {x \<in> A. P x} \<le> Sup A"
       apply (rule ccpo_Sup_least [OF chain_P])
-      apply clarify
       apply (simp add: ccpo_Sup_upper [OF chain])
       done
   qed
@@ -350,13 +339,15 @@
     | "\<exists>x. x \<in> A \<and> Q x" "Sup A = Sup {x \<in> A. Q x}"
     by blast
   then show "P (Sup A) \<or> Q (Sup A)"
-    apply cases
-     apply simp_all
-     apply (rule disjI1)
-     apply (rule ccpo.admissibleD [OF P chain_compr [OF chain]]; simp)
-    apply (rule disjI2)
-    apply (rule ccpo.admissibleD [OF Q chain_compr [OF chain]]; simp)
-    done
+  proof cases
+    case 1
+    then show ?thesis
+      using ccpo.admissibleD [OF P chain_compr [OF chain]] by force
+  next
+    case 2
+    then show ?thesis 
+      using ccpo.admissibleD [OF Q chain_compr [OF chain]] by force
+  qed
 qed
 
 end
--- a/src/HOL/Computational_Algebra/Factorial_Ring.thy	Sat Feb 20 13:49:24 2021 +0100
+++ b/src/HOL/Computational_Algebra/Factorial_Ring.thy	Sat Feb 20 17:04:26 2021 +0100
@@ -898,6 +898,26 @@
   ultimately show ?thesis by (rule finite_subset)
 qed
 
+lemma infinite_unit_divisor_powers:
+ assumes "y \<noteq> 0"
+ assumes "is_unit x"
+ shows "infinite {n. x^n dvd y}"
+proof -
+ from `is_unit x` have "is_unit (x^n)" for n
+   using is_unit_power_iff by auto
+ hence "x^n dvd y" for n
+   by auto
+ hence "{n. x^n dvd y} = UNIV"
+   by auto
+ thus ?thesis
+   by auto
+qed
+
+corollary is_unit_iff_infinite_divisor_powers:
+ assumes "y \<noteq> 0"
+ shows "is_unit x \<longleftrightarrow> infinite {n. x^n dvd y}"
+ using infinite_unit_divisor_powers finite_divisor_powers assms by auto
+
 lemma prime_elem_iff_irreducible: "prime_elem x \<longleftrightarrow> irreducible x"
   by (blast intro: irreducible_imp_prime_elem prime_elem_imp_irreducible)
 
@@ -1606,8 +1626,8 @@
   thus ?thesis by simp
 qed
 
-lemma multiplicity_zero_1 [simp]: "multiplicity 0 x = 0"
-  by (metis (mono_tags) local.dvd_0_left multiplicity_zero not_dvd_imp_multiplicity_0)
+lemma multiplicity_zero_left [simp]: "multiplicity 0 x = 0"
+ by (cases "x = 0") (auto intro: not_dvd_imp_multiplicity_0)
 
 lemma inj_on_Prod_primes:
   assumes "\<And>P p. P \<in> A \<Longrightarrow> p \<in> P \<Longrightarrow> prime p"
@@ -2129,6 +2149,91 @@
   with assms show False by auto
 qed
 
+text \<open>Now a string of results due to Jakub Kądziołka\<close>
+
+lemma multiplicity_dvd_iff_dvd:
+ assumes "x \<noteq> 0"
+ shows "p^k dvd x \<longleftrightarrow> p^k dvd p^multiplicity p x"
+proof (cases "is_unit p")
+ case True
+ then have "is_unit (p^k)"
+   using is_unit_power_iff by simp
+ hence "p^k dvd x"
+   by auto
+ moreover from `is_unit p` have "p^k dvd p^multiplicity p x"
+   using multiplicity_unit_left is_unit_power_iff by simp
+ ultimately show ?thesis by simp
+next
+ case False
+ show ?thesis
+ proof (cases "p = 0")
+   case True
+   then have "p^multiplicity p x = 1"
+     by simp
+   moreover have "p^k dvd x \<Longrightarrow> k = 0"
+   proof (rule ccontr)
+     assume "p^k dvd x" and "k \<noteq> 0"
+     with `p = 0` have "p^k = 0" by auto
+     with `p^k dvd x` have "0 dvd x" by auto
+     hence "x = 0" by auto
+     with `x \<noteq> 0` show False by auto
+   qed
+   ultimately show ?thesis
+     by (auto simp add: is_unit_power_iff `\<not> is_unit p`)
+ next
+   case False
+   with `x \<noteq> 0` `\<not> is_unit p` show ?thesis
+     by (simp add: power_dvd_iff_le_multiplicity dvd_power_iff multiplicity_same_power)
+ qed
+qed
+
+lemma multiplicity_decomposeI:
+ assumes "x = p^k * x'" and "\<not> p dvd x'" and "p \<noteq> 0"
+ shows "multiplicity p x = k"
+  using assms local.multiplicity_eqI local.power_Suc2 by force
+
+lemma multiplicity_sum_lt:
+ assumes "multiplicity p a < multiplicity p b" "a \<noteq> 0" "b \<noteq> 0"
+ shows "multiplicity p (a + b) = multiplicity p a"
+proof -
+ let ?vp = "multiplicity p"
+ have unit: "\<not> is_unit p"
+ proof
+   assume "is_unit p"
+   then have "?vp a = 0" and "?vp b = 0" using multiplicity_unit_left by auto
+   with assms show False by auto
+ qed
+
+ from multiplicity_decompose' obtain a' where a': "a = p^?vp a * a'" "\<not> p dvd a'"
+   using unit assms by metis
+ from multiplicity_decompose' obtain b' where b': "b = p^?vp b * b'"
+   using unit assms by metis
+
+ show "?vp (a + b) = ?vp a"
+ proof (rule multiplicity_decomposeI)
+   let ?k = "?vp b - ?vp a"
+   from assms have k: "?k > 0" by simp
+   with b' have "b = p^?vp a * p^?k * b'"
+     by (simp flip: power_add)
+   with a' show *: "a + b = p^?vp a * (a' + p^?k * b')"
+     by (simp add: ac_simps distrib_left)
+   moreover show "\<not> p dvd a' + p^?k * b'"
+     using a' k dvd_add_left_iff by auto
+   show "p \<noteq> 0" using assms by auto
+ qed
+qed
+
+corollary multiplicity_sum_min:
+ assumes "multiplicity p a \<noteq> multiplicity p b" "a \<noteq> 0" "b \<noteq> 0"
+ shows "multiplicity p (a + b) = min (multiplicity p a) (multiplicity p b)"
+proof -
+ let ?vp = "multiplicity p"
+ from assms have "?vp a < ?vp b \<or> ?vp a > ?vp b"
+   by auto
+ then show ?thesis
+   by (metis assms multiplicity_sum_lt min.commute add_commute min.strict_order_iff)    
+qed
+
 end
 
 end
--- a/src/HOL/Equiv_Relations.thy	Sat Feb 20 13:49:24 2021 +0100
+++ b/src/HOL/Equiv_Relations.thy	Sat Feb 20 17:04:26 2021 +0100
@@ -355,6 +355,50 @@
     by (simp add: quotient_def card_UN_disjoint)
 qed
 
+text \<open>By Jakub Kądziołka:\<close>
+
+lemma sum_fun_comp:
+  assumes "finite S" "finite R" "g ` S \<subseteq> R"
+  shows "(\<Sum>x \<in> S. f (g x)) = (\<Sum>y \<in> R. of_nat (card {x \<in> S. g x = y}) * f y)"
+proof -
+  let ?r = "relation_of (\<lambda>p q. g p = g q) S"
+  have eqv: "equiv S ?r"
+    unfolding relation_of_def by (auto intro: comp_equivI)
+  have finite: "C \<in> S//?r \<Longrightarrow> finite C" for C
+    by (fact finite_equiv_class[OF `finite S` equiv_type[OF `equiv S ?r`]])
+  have disjoint: "A \<in> S//?r \<Longrightarrow> B \<in> S//?r \<Longrightarrow> A \<noteq> B \<Longrightarrow> A \<inter> B = {}" for A B
+    using eqv quotient_disj by blast
+
+  let ?cls = "\<lambda>y. {x \<in> S. y = g x}"
+  have quot_as_img: "S//?r = ?cls ` g ` S"
+    by (auto simp add: relation_of_def quotient_def)
+  have cls_inj: "inj_on ?cls (g ` S)"
+    by (auto intro: inj_onI)
+
+  have rest_0: "(\<Sum>y \<in> R - g ` S. of_nat (card (?cls y)) * f y) = 0"
+  proof -
+    have "of_nat (card (?cls y)) * f y = 0" if asm: "y \<in> R - g ` S" for y
+    proof -
+      from asm have *: "?cls y = {}" by auto
+      show ?thesis unfolding * by simp
+    qed
+    thus ?thesis by simp
+  qed
+
+  have "(\<Sum>x \<in> S. f (g x)) = (\<Sum>C \<in> S//?r. \<Sum>x \<in> C. f (g x))"
+    using eqv finite disjoint
+    by (simp flip: sum.Union_disjoint[simplified] add: Union_quotient)
+  also have "... = (\<Sum>y \<in> g ` S. \<Sum>x \<in> ?cls y. f (g x))"
+    unfolding quot_as_img by (simp add: sum.reindex[OF cls_inj])
+  also have "... = (\<Sum>y \<in> g ` S. \<Sum>x \<in> ?cls y. f y)"
+    by auto
+  also have "... = (\<Sum>y \<in> g ` S. of_nat (card (?cls y)) * f y)"
+    by (simp flip: sum_constant)
+  also have "... = (\<Sum>y \<in> R. of_nat (card (?cls y)) * f y)"
+    using rest_0 by (simp add: sum.subset_diff[OF \<open>g ` S \<subseteq> R\<close> \<open>finite R\<close>])
+  finally show ?thesis
+    by (simp add: eq_commute)
+qed
 
 subsection \<open>Projection\<close>
 
--- a/src/HOL/Lattices_Big.thy	Sat Feb 20 13:49:24 2021 +0100
+++ b/src/HOL/Lattices_Big.thy	Sat Feb 20 17:04:26 2021 +0100
@@ -560,6 +560,12 @@
   ultimately show ?thesis by (simp add: max.absorb1)
 qed
 
+lemma Max_const[simp]: "\<lbrakk> finite A; A \<noteq> {} \<rbrakk> \<Longrightarrow> Max ((\<lambda>_. c) ` A) = c"
+using Max_in image_is_empty by blast
+
+lemma Min_const[simp]: "\<lbrakk> finite A; A \<noteq> {} \<rbrakk> \<Longrightarrow> Min ((\<lambda>_. c) ` A) = c"
+using Min_in image_is_empty by blast
+
 lemma Min_le [simp]:
   assumes "finite A" and "x \<in> A"
   shows "Min A \<le> x"
@@ -671,6 +677,12 @@
 
 end
 
+lemma sum_le_card_Max: "finite A \<Longrightarrow> sum f A \<le> card A * Max (f ` A)"
+using sum_bounded_above[of A f "Max (f ` A)"] by simp
+
+lemma card_Min_le_sum: "finite A \<Longrightarrow> card A * Min (f ` A) \<le> sum f A"
+using sum_bounded_below[of A "Min (f ` A)" f] by simp
+
 context linorder  (* FIXME *)
 begin
 
--- a/src/HOL/Library/Extended_Nonnegative_Real.thy	Sat Feb 20 13:49:24 2021 +0100
+++ b/src/HOL/Library/Extended_Nonnegative_Real.thy	Sat Feb 20 17:04:26 2021 +0100
@@ -555,6 +555,32 @@
 lemma ennreal_add_bot[simp]: "bot + x = (x::ennreal)"
   by transfer simp
 
+lemma add_top_right_ennreal [simp]: "x + top = (top :: ennreal)"
+  by (cases x) auto
+
+lemma add_top_left_ennreal [simp]: "top + x = (top :: ennreal)"
+  by (cases x) auto
+
+lemma ennreal_top_mult_left [simp]: "x \<noteq> 0 \<Longrightarrow> x * top = (top :: ennreal)"
+  by (subst ennreal_mult_eq_top_iff) auto
+
+lemma ennreal_top_mult_right [simp]: "x \<noteq> 0 \<Longrightarrow> top * x = (top :: ennreal)"
+  by (subst ennreal_mult_eq_top_iff) auto
+
+
+lemma power_top_ennreal [simp]: "n > 0 \<Longrightarrow> top ^ n = (top :: ennreal)"
+  by (induction n) auto
+
+lemma power_eq_top_ennreal_iff: "x ^ n = top \<longleftrightarrow> x = (top :: ennreal) \<and> n > 0"
+  by (induction n) (auto simp: ennreal_mult_eq_top_iff)
+
+lemma ennreal_mult_le_mult_iff: "c \<noteq> 0 \<Longrightarrow> c \<noteq> top \<Longrightarrow> c * a \<le> c * b \<longleftrightarrow> a \<le> (b :: ennreal)"
+  including ennreal.lifting
+  by (transfer, subst ereal_mult_le_mult_iff) (auto simp: top_ereal_def)
+
+lemma power_mono_ennreal: "x \<le> y \<Longrightarrow> x ^ n \<le> (y ^ n :: ennreal)"
+  by (induction n) (auto intro!: mult_mono)
+
 instance ennreal :: semiring_char_0
 proof (standard, safe intro!: linorder_injI)
   have *: "1 + of_nat k \<noteq> (0::ennreal)" for k
@@ -683,6 +709,9 @@
   unfolding divide_ennreal_def
   by transfer (auto simp: ereal_zero_less_0_iff top_ereal_def ereal_0_gt_inverse)
 
+lemma add_divide_distrib_ennreal: "(a + b) / c = a / c + b / (c :: ennreal)"
+  by (simp add: divide_ennreal_def ring_distribs)
+
 lemma divide_right_mono_ennreal:
   fixes a b c :: ennreal
   shows "a \<le> b \<Longrightarrow> a / c \<le> b / c"
@@ -976,6 +1005,10 @@
   qed
 qed
 
+lemma power_divide_distrib_ennreal [algebra_simps]:
+  "(x / y) ^ n = x ^ n / (y ^ n :: ennreal)"
+  by (simp add: divide_ennreal_def algebra_simps ennreal_inverse_power)
+
 lemma ennreal_divide_numeral: "0 \<le> x \<Longrightarrow> ennreal x / numeral b = ennreal (x / numeral b)"
   by (subst divide_ennreal[symmetric]) auto
 
@@ -983,6 +1016,11 @@
   by (induction A rule: infinite_finite_induct)
      (auto simp: ennreal_mult prod_nonneg)
 
+lemma prod_mono_ennreal:
+  assumes "\<And>x. x \<in> A \<Longrightarrow> f x \<le> (g x :: ennreal)"
+  shows   "prod f A \<le> prod g A"
+  using assms by (induction A rule: infinite_finite_induct) (auto intro!: mult_mono)
+
 lemma mult_right_ennreal_cancel: "a * ennreal c = b * ennreal c \<longleftrightarrow> (a = b \<or> c \<le> 0)"
 proof (cases "0 \<le> c")
   case True
--- a/src/HOL/Library/RBT_Impl.thy	Sat Feb 20 13:49:24 2021 +0100
+++ b/src/HOL/Library/RBT_Impl.thy	Sat Feb 20 17:04:26 2021 +0100
@@ -1869,35 +1869,506 @@
 lemma set_map_filter: "set (List.map_filter P xs) = the ` (P ` set xs - {None})"
 by(auto simp add: List.map_filter_def intro: rev_image_eqI)
 
+(* Split and Join *)
+
+definition is_rbt_empty :: "('a, 'b) rbt \<Rightarrow> bool" where
+  "is_rbt_empty t \<longleftrightarrow> (case t of RBT_Impl.Empty \<Rightarrow> True | _ \<Rightarrow> False)"
+
+lemma is_rbt_empty_prop[simp]: "is_rbt_empty t \<longleftrightarrow> t = RBT_Impl.Empty"
+  by (auto simp: is_rbt_empty_def split: RBT_Impl.rbt.splits)
+
+definition small_rbt :: "('a, 'b) rbt \<Rightarrow> bool" where
+  "small_rbt t \<longleftrightarrow> bheight t < 4"
+
+definition flip_rbt :: "('a, 'b) rbt \<Rightarrow> ('a, 'b) rbt \<Rightarrow> bool" where
+  "flip_rbt t1 t2 \<longleftrightarrow> bheight t2 < bheight t1"
+
+abbreviation (input) MR where "MR l a b r \<equiv> Branch RBT_Impl.R l a b r"
+abbreviation (input) MB where "MB l a b r \<equiv> Branch RBT_Impl.B l a b r"
+
+fun rbt_baliL :: "('a, 'b) rbt \<Rightarrow> 'a \<Rightarrow> 'b \<Rightarrow> ('a, 'b) rbt \<Rightarrow> ('a, 'b) rbt" where
+  "rbt_baliL (MR (MR t1 a b t2) a' b' t3) a'' b'' t4 = MR (MB t1 a b t2) a' b' (MB t3 a'' b'' t4)"
+| "rbt_baliL (MR t1 a b (MR t2 a' b' t3)) a'' b'' t4 = MR (MB t1 a b t2) a' b' (MB t3 a'' b'' t4)"
+| "rbt_baliL t1 a b t2 = MB t1 a b t2"
+
+fun rbt_baliR :: "('a, 'b) rbt \<Rightarrow> 'a \<Rightarrow> 'b \<Rightarrow> ('a, 'b) rbt \<Rightarrow> ('a, 'b) rbt" where
+  "rbt_baliR t1 a b (MR t2 a' b' (MR t3 a'' b'' t4)) = MR (MB t1 a b t2) a' b' (MB t3 a'' b'' t4)"
+| "rbt_baliR t1 a b (MR (MR t2 a' b' t3) a'' b'' t4) = MR (MB t1 a b t2) a' b' (MB t3 a'' b'' t4)"
+| "rbt_baliR t1 a b t2 = MB t1 a b t2"
+
+fun rbt_baldL :: "('a, 'b) rbt \<Rightarrow> 'a \<Rightarrow> 'b \<Rightarrow> ('a, 'b) rbt \<Rightarrow> ('a, 'b) rbt" where
+  "rbt_baldL (MR t1 a b t2) a' b' t3 = MR (MB t1 a b t2) a' b' t3"
+| "rbt_baldL t1 a b (MB t2 a' b' t3) = rbt_baliR t1 a b (MR t2 a' b' t3)"
+| "rbt_baldL t1 a b (MR (MB t2 a' b' t3) a'' b'' t4) =
+  MR (MB t1 a b t2) a' b' (rbt_baliR t3 a'' b'' (paint RBT_Impl.R t4))"
+| "rbt_baldL t1 a b t2 = MR t1 a b t2"
+
+fun rbt_baldR :: "('a, 'b) rbt \<Rightarrow> 'a \<Rightarrow> 'b \<Rightarrow> ('a, 'b) rbt \<Rightarrow> ('a, 'b) rbt" where
+  "rbt_baldR t1 a b (MR t2 a' b' t3) = MR t1 a b (MB t2 a' b' t3)"
+| "rbt_baldR (MB t1 a b t2) a' b' t3 = rbt_baliL (MR t1 a b t2) a' b' t3"
+| "rbt_baldR (MR t1 a b (MB t2 a' b' t3)) a'' b'' t4 =
+  MR (rbt_baliL (paint RBT_Impl.R t1) a b t2) a' b' (MB t3 a'' b'' t4)"
+| "rbt_baldR t1 a b t2 = MR t1 a b t2"
+
+fun rbt_app :: "('a, 'b) rbt \<Rightarrow> ('a, 'b) rbt \<Rightarrow> ('a, 'b) rbt" where
+  "rbt_app RBT_Impl.Empty t = t"
+| "rbt_app t RBT_Impl.Empty = t"
+| "rbt_app (MR t1 a b t2) (MR t3 a'' b'' t4) = (case rbt_app t2 t3 of
+    MR u2 a' b' u3 \<Rightarrow> (MR (MR t1 a b u2) a' b' (MR u3 a'' b'' t4))
+  | t23 \<Rightarrow> MR t1 a b (MR t23 a'' b'' t4))"
+| "rbt_app (MB t1 a b t2) (MB t3 a'' b'' t4) = (case rbt_app t2 t3 of
+    MR u2 a' b' u3 \<Rightarrow> MR (MB t1 a b u2) a' b' (MB u3 a'' b'' t4)
+  | t23 \<Rightarrow> rbt_baldL t1 a b (MB t23 a'' b'' t4))"
+| "rbt_app t1 (MR t2 a b t3) = MR (rbt_app t1 t2) a b t3"
+| "rbt_app (MR t1 a b t2) t3 = MR t1 a b (rbt_app t2 t3)"
+
+fun rbt_joinL :: "('a, 'b) rbt \<Rightarrow> 'a \<Rightarrow> 'b \<Rightarrow> ('a, 'b) rbt \<Rightarrow> ('a, 'b) rbt" where
+  "rbt_joinL l a b r = (if bheight l \<ge> bheight r then MR l a b r
+    else case r of MB l' a' b' r' \<Rightarrow> rbt_baliL (rbt_joinL l a b l') a' b' r'
+    | MR l' a' b' r' \<Rightarrow> MR (rbt_joinL l a b l') a' b' r')"
+
+declare rbt_joinL.simps[simp del]
+
+fun rbt_joinR :: "('a, 'b) rbt \<Rightarrow> 'a \<Rightarrow> 'b \<Rightarrow> ('a, 'b) rbt \<Rightarrow> ('a, 'b) rbt" where
+  "rbt_joinR l a b r = (if bheight l \<le> bheight r then MR l a b r
+    else case l of MB l' a' b' r' \<Rightarrow> rbt_baliR l' a' b' (rbt_joinR r' a b r)
+    | MR l' a' b' r' \<Rightarrow> MR l' a' b' (rbt_joinR r' a b r))"
+
+declare rbt_joinR.simps[simp del]
+
+definition rbt_join :: "('a, 'b) rbt \<Rightarrow> 'a \<Rightarrow> 'b \<Rightarrow> ('a, 'b) rbt \<Rightarrow> ('a, 'b) rbt" where
+  "rbt_join l a b r =
+    (let bhl = bheight l; bhr = bheight r
+    in if bhl > bhr
+    then paint RBT_Impl.B (rbt_joinR l a b r)
+    else if bhl < bhr
+    then paint RBT_Impl.B (rbt_joinL l a b r)
+    else MB l a b r)"
+
+lemma size_paint[simp]: "size (paint c t) = size t"
+  by (cases t) auto
+
+lemma size_baliL[simp]: "size (rbt_baliL t1 a b t2) = Suc (size t1 + size t2)"
+  by (induction t1 a b t2 rule: rbt_baliL.induct) auto
+
+lemma size_baliR[simp]: "size (rbt_baliR t1 a b t2) = Suc (size t1 + size t2)"
+  by (induction t1 a b t2 rule: rbt_baliR.induct) auto
+
+lemma size_baldL[simp]: "size (rbt_baldL t1 a b t2) = Suc (size t1 + size t2)"
+  by (induction t1 a b t2 rule: rbt_baldL.induct) auto
+
+lemma size_baldR[simp]: "size (rbt_baldR t1 a b t2) = Suc (size t1 + size t2)"
+  by (induction t1 a b t2 rule: rbt_baldR.induct) auto
+
+lemma size_rbt_app[simp]: "size (rbt_app t1 t2) = size t1 + size t2"
+  by (induction t1 t2 rule: rbt_app.induct)
+     (auto split: RBT_Impl.rbt.splits RBT_Impl.color.splits)
+
+lemma size_rbt_joinL[simp]: "size (rbt_joinL t1 a b t2) = Suc (size t1 + size t2)"
+  by (induction t1 a b t2 rule: rbt_joinL.induct)
+     (auto simp: rbt_joinL.simps split: RBT_Impl.rbt.splits RBT_Impl.color.splits)
+
+lemma size_rbt_joinR[simp]: "size (rbt_joinR t1 a b t2) = Suc (size t1 + size t2)"
+  by (induction t1 a b t2 rule: rbt_joinR.induct)
+     (auto simp: rbt_joinR.simps split: RBT_Impl.rbt.splits RBT_Impl.color.splits)
+
+lemma size_rbt_join[simp]: "size (rbt_join t1 a b t2) = Suc (size t1 + size t2)"
+  by (auto simp: rbt_join_def Let_def)
+
+definition "inv_12 t \<longleftrightarrow> inv1 t \<and> inv2 t"
+
+lemma rbt_Node: "inv_12 (RBT_Impl.Branch c l a b r) \<Longrightarrow> inv_12 l \<and> inv_12 r"
+  by (auto simp: inv_12_def)
+
+lemma paint2: "paint c2 (paint c1 t) = paint c2 t"
+  by (cases t) auto
+
+lemma inv1_rbt_baliL: "inv1l l \<Longrightarrow> inv1 r \<Longrightarrow> inv1 (rbt_baliL l a b r)"
+  by (induct l a b r rule: rbt_baliL.induct) auto
+
+lemma inv1_rbt_baliR: "inv1 l \<Longrightarrow> inv1l r \<Longrightarrow> inv1 (rbt_baliR l a b r)"
+  by (induct l a b r rule: rbt_baliR.induct) auto
+
+lemma rbt_bheight_rbt_baliL: "bheight l = bheight r \<Longrightarrow> bheight (rbt_baliL l a b r) = Suc (bheight l)"
+  by (induct l a b r rule: rbt_baliL.induct) auto
+
+lemma rbt_bheight_rbt_baliR: "bheight l = bheight r \<Longrightarrow> bheight (rbt_baliR l a b r) = Suc (bheight l)"
+  by (induct l a b r rule: rbt_baliR.induct) auto
+
+lemma inv2_rbt_baliL: "inv2 l \<Longrightarrow> inv2 r \<Longrightarrow> bheight l = bheight r \<Longrightarrow> inv2 (rbt_baliL l a b r)"
+  by (induct l a b r rule: rbt_baliL.induct) auto
+
+lemma inv2_rbt_baliR: "inv2 l \<Longrightarrow> inv2 r \<Longrightarrow> bheight l = bheight r \<Longrightarrow> inv2 (rbt_baliR l a b r)"
+  by (induct l a b r rule: rbt_baliR.induct) auto
+
+lemma inv_rbt_baliR: "inv2 l \<Longrightarrow> inv2 r \<Longrightarrow> inv1 l \<Longrightarrow> inv1l r \<Longrightarrow> bheight l = bheight r \<Longrightarrow>
+  inv1 (rbt_baliR l a b r) \<and> inv2 (rbt_baliR l a b r) \<and> bheight (rbt_baliR l a b r) = Suc (bheight l)"
+  by (induct l a b r rule: rbt_baliR.induct) auto
+
+lemma inv_rbt_baliL: "inv2 l \<Longrightarrow> inv2 r \<Longrightarrow> inv1l l \<Longrightarrow> inv1 r \<Longrightarrow> bheight l = bheight r \<Longrightarrow>
+  inv1 (rbt_baliL l a b r) \<and> inv2 (rbt_baliL l a b r) \<and> bheight (rbt_baliL l a b r) = Suc (bheight l)"
+  by (induct l a b r rule: rbt_baliL.induct) auto
+
+lemma inv2_rbt_baldL_inv1: "inv2 l \<Longrightarrow> inv2 r \<Longrightarrow> bheight l + 1 = bheight r \<Longrightarrow> inv1 r \<Longrightarrow>
+  inv2 (rbt_baldL l a b r) \<and> bheight (rbt_baldL l a b r) = bheight r"
+  by (induct l a b r rule: rbt_baldL.induct) (auto simp: inv2_rbt_baliR rbt_bheight_rbt_baliR)
+
+lemma inv2_rbt_baldL_B: "inv2 l \<Longrightarrow> inv2 r \<Longrightarrow> bheight l + 1 = bheight r \<Longrightarrow> color_of r = RBT_Impl.B \<Longrightarrow>
+  inv2 (rbt_baldL l a b r) \<and> bheight (rbt_baldL l a b r) = bheight r"
+  by (induct l a b r rule: rbt_baldL.induct) (auto simp add: inv2_rbt_baliR rbt_bheight_rbt_baliR)
+
+lemma inv1_rbt_baldL: "inv1l l \<Longrightarrow> inv1 r \<Longrightarrow> color_of r = RBT_Impl.B \<Longrightarrow> inv1 (rbt_baldL l a b r)"
+  by (induct l a b r rule: rbt_baldL.induct) (simp_all add: inv1_rbt_baliR)
+
+lemma inv1lI: "inv1 t \<Longrightarrow> inv1l t"
+  by (cases t) auto
+
+lemma neq_Black[simp]: "(c \<noteq> RBT_Impl.B) = (c = RBT_Impl.R)"
+  by (cases c) auto
+
+lemma inv1l_rbt_baldL: "inv1l l \<Longrightarrow> inv1 r \<Longrightarrow> inv1l (rbt_baldL l a b r)"
+  by (induct l a b r rule: rbt_baldL.induct) (auto simp: inv1_rbt_baliR paint2)
+
+lemma inv2_rbt_baldR_inv1: "inv2 l \<Longrightarrow> inv2 r \<Longrightarrow> bheight l = bheight r + 1 \<Longrightarrow> inv1 l \<Longrightarrow>
+  inv2 (rbt_baldR l a b r) \<and> bheight (rbt_baldR l a b r) = bheight l"
+  by (induct l a b r rule: rbt_baldR.induct) (auto simp: inv2_rbt_baliL rbt_bheight_rbt_baliL)
+
+lemma inv1_rbt_baldR: "inv1 l \<Longrightarrow> inv1l r \<Longrightarrow> color_of l = RBT_Impl.B \<Longrightarrow> inv1 (rbt_baldR l a b r)"
+  by (induct l a b r rule: rbt_baldR.induct) (simp_all add: inv1_rbt_baliL)
+
+lemma inv1l_rbt_baldR: "inv1 l \<Longrightarrow> inv1l r \<Longrightarrow>inv1l (rbt_baldR l a b r)"
+  by (induct l a b r rule: rbt_baldR.induct) (auto simp: inv1_rbt_baliL paint2)
+
+lemma inv2_rbt_app: "inv2 l \<Longrightarrow> inv2 r \<Longrightarrow> bheight l = bheight r \<Longrightarrow>
+  inv2 (rbt_app l r) \<and> bheight (rbt_app l r) = bheight l"
+  by (induct l r rule: rbt_app.induct)
+     (auto simp: inv2_rbt_baldL_B split: RBT_Impl.rbt.splits RBT_Impl.color.splits)
+
+lemma inv1_rbt_app: "inv1 l \<Longrightarrow> inv1 r \<Longrightarrow> (color_of l = RBT_Impl.B \<and>
+  color_of r = RBT_Impl.B \<longrightarrow> inv1 (rbt_app l r)) \<and> inv1l (rbt_app l r)"
+  by (induct l r rule: rbt_app.induct)
+     (auto simp: inv1_rbt_baldL split: RBT_Impl.rbt.splits RBT_Impl.color.splits)
+
+lemma inv_rbt_baldL: "inv2 l \<Longrightarrow> inv2 r \<Longrightarrow> bheight l + 1 = bheight r \<Longrightarrow> inv1l l \<Longrightarrow> inv1 r \<Longrightarrow>
+  inv2 (rbt_baldL l a b r) \<and> bheight (rbt_baldL l a b r) = bheight r \<and>
+  inv1l (rbt_baldL l a b r) \<and> (color_of r = RBT_Impl.B \<longrightarrow> inv1 (rbt_baldL l a b r))"
+  by (induct l a b r rule: rbt_baldL.induct) (auto simp: inv_rbt_baliR rbt_bheight_rbt_baliR paint2)
+
+lemma inv_rbt_baldR: "inv2 l \<Longrightarrow> inv2 r \<Longrightarrow> bheight l = bheight r + 1 \<Longrightarrow> inv1 l \<Longrightarrow> inv1l r \<Longrightarrow>
+  inv2 (rbt_baldR l a b r) \<and> bheight (rbt_baldR l a b r) = bheight l \<and>
+  inv1l (rbt_baldR l a b r) \<and> (color_of l = RBT_Impl.B \<longrightarrow> inv1 (rbt_baldR l a b r))"
+  by (induct l a b r rule: rbt_baldR.induct) (auto simp: inv_rbt_baliL rbt_bheight_rbt_baliL paint2)
+
+lemma inv_rbt_app: "inv2 l \<Longrightarrow> inv2 r \<Longrightarrow> bheight l = bheight r \<Longrightarrow> inv1 l \<Longrightarrow> inv1 r \<Longrightarrow>
+  inv2 (rbt_app l r) \<and> bheight (rbt_app l r) = bheight l \<and>
+  inv1l (rbt_app l r) \<and> (color_of l = RBT_Impl.B \<and> color_of r = RBT_Impl.B \<longrightarrow> inv1 (rbt_app l r))"
+  by (induct l r rule: rbt_app.induct)
+     (auto simp: inv2_rbt_baldL_B inv_rbt_baldL split: RBT_Impl.rbt.splits RBT_Impl.color.splits)
+
+lemma inv1l_rbt_joinL: "inv1 l \<Longrightarrow> inv1 r \<Longrightarrow> bheight l \<le> bheight r \<Longrightarrow>
+  inv1l (rbt_joinL l a b r) \<and>
+  (bheight l \<noteq> bheight r \<and> color_of r = RBT_Impl.B \<longrightarrow> inv1 (rbt_joinL l a b r))"
+proof (induct l a b r rule: rbt_joinL.induct)
+  case (1 l a b r)
+  then show ?case
+    by (auto simp: inv1_rbt_baliL rbt_joinL.simps[of l a b r]
+        split!: RBT_Impl.rbt.splits RBT_Impl.color.splits)
+qed
+
+lemma inv1l_rbt_joinR: "inv1 l \<Longrightarrow> inv2 l \<Longrightarrow> inv1 r \<Longrightarrow> inv2 r \<Longrightarrow> bheight l \<ge> bheight r \<Longrightarrow>
+  inv1l (rbt_joinR l a b r) \<and>
+  (bheight l \<noteq> bheight r \<and> color_of l = RBT_Impl.B \<longrightarrow> inv1 (rbt_joinR l a b r))"
+proof (induct l a b r rule: rbt_joinR.induct)
+  case (1 l a b r)
+  then show ?case
+    by (fastforce simp: inv1_rbt_baliR rbt_joinR.simps[of l a b r]
+        split!: RBT_Impl.rbt.splits RBT_Impl.color.splits)
+qed
+
+lemma bheight_rbt_joinL: "inv2 l \<Longrightarrow> inv2 r \<Longrightarrow> bheight l \<le> bheight r \<Longrightarrow>
+  bheight (rbt_joinL l a b r) = bheight r"
+proof (induct l a b r rule: rbt_joinL.induct)
+  case (1 l a b r)
+  then show ?case
+    by (auto simp: rbt_bheight_rbt_baliL rbt_joinL.simps[of l a b r]
+        split!: RBT_Impl.rbt.splits RBT_Impl.color.splits)
+qed
+
+lemma inv2_rbt_joinL: "inv2 l \<Longrightarrow> inv2 r \<Longrightarrow> bheight l \<le> bheight r \<Longrightarrow> inv2 (rbt_joinL l a b r)"
+proof (induct l a b r rule: rbt_joinL.induct)
+  case (1 l a b r)
+  then show ?case
+    by (auto simp: inv2_rbt_baliL bheight_rbt_joinL rbt_joinL.simps[of l a b r]
+        split!: RBT_Impl.rbt.splits RBT_Impl.color.splits)
+qed
+
+lemma bheight_rbt_joinR: "inv2 l \<Longrightarrow> inv2 r \<Longrightarrow> bheight l \<ge> bheight r \<Longrightarrow>
+  bheight (rbt_joinR l a b r) = bheight l"
+proof (induct l a b r rule: rbt_joinR.induct)
+  case (1 l a b r)
+  then show ?case
+    by (fastforce simp: rbt_bheight_rbt_baliR rbt_joinR.simps[of l a b r]
+        split!: RBT_Impl.rbt.splits RBT_Impl.color.splits)
+qed
+
+lemma inv2_rbt_joinR: "inv2 l \<Longrightarrow> inv2 r \<Longrightarrow> bheight l \<ge> bheight r \<Longrightarrow> inv2 (rbt_joinR l a b r)"
+proof (induct l a b r rule: rbt_joinR.induct)
+  case (1 l a b r)
+  then show ?case
+    by (fastforce simp: inv2_rbt_baliR bheight_rbt_joinR rbt_joinR.simps[of l a b r]
+        split!: RBT_Impl.rbt.splits RBT_Impl.color.splits)
+qed
+
+lemma keys_paint[simp]: "RBT_Impl.keys (paint c t) = RBT_Impl.keys t"
+  by (cases t) auto
+
+lemma keys_rbt_baliL: "RBT_Impl.keys (rbt_baliL l a b r) = RBT_Impl.keys l @ a # RBT_Impl.keys r"
+  by (cases "(l,a,b,r)" rule: rbt_baliL.cases) auto
+
+lemma keys_rbt_baliR: "RBT_Impl.keys (rbt_baliR l a b r) = RBT_Impl.keys l @ a # RBT_Impl.keys r"
+  by (cases "(l,a,b,r)" rule: rbt_baliR.cases) auto
+
+lemma keys_rbt_baldL: "RBT_Impl.keys (rbt_baldL l a b r) = RBT_Impl.keys l @ a # RBT_Impl.keys r"
+  by (cases "(l,a,b,r)" rule: rbt_baldL.cases) (auto simp: keys_rbt_baliL keys_rbt_baliR)
+
+lemma keys_rbt_baldR: "RBT_Impl.keys (rbt_baldR l a b r) = RBT_Impl.keys l @ a # RBT_Impl.keys r"
+  by (cases "(l,a,b,r)" rule: rbt_baldR.cases) (auto simp: keys_rbt_baliL keys_rbt_baliR)
+
+lemma keys_rbt_app: "RBT_Impl.keys (rbt_app l r) = RBT_Impl.keys l @ RBT_Impl.keys r"
+  by (induction l r rule: rbt_app.induct)
+     (auto simp: keys_rbt_baldL keys_rbt_baldR split: RBT_Impl.rbt.splits RBT_Impl.color.splits)
+
+lemma keys_rbt_joinL: "bheight l \<le> bheight r \<Longrightarrow>
+  RBT_Impl.keys (rbt_joinL l a b r) = RBT_Impl.keys l @ a # RBT_Impl.keys r"
+proof (induction l a b r rule: rbt_joinL.induct)
+  case (1 l a b r)
+  thus ?case
+    by (auto simp: keys_rbt_baliL rbt_joinL.simps[of l a b r]
+        split!: RBT_Impl.rbt.splits RBT_Impl.color.splits)
+qed
+
+lemma keys_rbt_joinR: "RBT_Impl.keys (rbt_joinR l a b r) = RBT_Impl.keys l @ a # RBT_Impl.keys r"
+proof (induction l a b r rule: rbt_joinR.induct)
+  case (1 l a b r)
+  thus ?case
+    by (force simp: keys_rbt_baliR rbt_joinR.simps[of l a b r]
+        split!: RBT_Impl.rbt.splits RBT_Impl.color.splits)
+qed
+
+lemma rbt_set_rbt_baliL: "set (RBT_Impl.keys (rbt_baliL l a b r)) =
+  set (RBT_Impl.keys l) \<union> {a} \<union> set (RBT_Impl.keys r)"
+  by (cases "(l,a,b,r)" rule: rbt_baliL.cases) auto
+
+lemma set_rbt_joinL: "set (RBT_Impl.keys (rbt_joinL l a b r)) =
+  set (RBT_Impl.keys l) \<union> {a} \<union> set (RBT_Impl.keys r)"
+proof (induction l a b r rule: rbt_joinL.induct)
+  case (1 l a b r)
+  thus ?case
+    by (auto simp: rbt_set_rbt_baliL rbt_joinL.simps[of l a b r]
+        split!: RBT_Impl.rbt.splits RBT_Impl.color.splits)
+qed
+
+lemma rbt_set_rbt_baliR: "set (RBT_Impl.keys (rbt_baliR l a b r)) =
+  set (RBT_Impl.keys l) \<union> {a} \<union> set (RBT_Impl.keys r)"
+  by (cases "(l,a,b,r)" rule: rbt_baliR.cases) auto
+
+lemma set_rbt_joinR: "set (RBT_Impl.keys (rbt_joinR l a b r)) =
+  set (RBT_Impl.keys l) \<union> {a} \<union> set (RBT_Impl.keys r)"
+proof (induction l a b r rule: rbt_joinR.induct)
+  case (1 l a b r)
+  thus ?case
+    by (force simp: rbt_set_rbt_baliR rbt_joinR.simps[of l a b r]
+        split!: RBT_Impl.rbt.splits RBT_Impl.color.splits)
+qed
+
+lemma set_keys_paint: "set (RBT_Impl.keys (paint c t)) = set (RBT_Impl.keys t)"
+  by (cases t) auto
+
+lemma set_rbt_join: "set (RBT_Impl.keys (rbt_join l a b r)) =
+  set (RBT_Impl.keys l) \<union> {a} \<union> set (RBT_Impl.keys r)"
+  by (simp add: set_rbt_joinL set_rbt_joinR set_keys_paint rbt_join_def Let_def)
+
+lemma inv_rbt_join: "inv_12 l \<Longrightarrow> inv_12 r \<Longrightarrow> inv_12 (rbt_join l a b r)"
+  by (auto simp: rbt_join_def Let_def inv1l_rbt_joinL inv1l_rbt_joinR
+      inv2_rbt_joinL inv2_rbt_joinR inv_12_def)
+
+fun rbt_recolor :: "('a, 'b) rbt \<Rightarrow> ('a, 'b) rbt" where
+  "rbt_recolor (Branch RBT_Impl.R t1 k v t2) =
+    (if color_of t1 = RBT_Impl.B \<and> color_of t2 = RBT_Impl.B then Branch RBT_Impl.B t1 k v t2
+    else Branch RBT_Impl.R t1 k v t2)"
+| "rbt_recolor t = t"
+
+lemma rbt_recolor: "inv_12 t \<Longrightarrow> inv_12 (rbt_recolor t)"
+  by (induction t rule: rbt_recolor.induct) (auto simp: inv_12_def)
+
+fun rbt_split_min :: "('a, 'b) rbt \<Rightarrow> 'a \<times> 'b \<times> ('a, 'b) rbt" where
+  "rbt_split_min RBT_Impl.Empty = undefined"
+| "rbt_split_min (RBT_Impl.Branch _ l a b r) =
+    (if is_rbt_empty l then (a,b,r) else let (a',b',l') = rbt_split_min l in (a',b',rbt_join l' a b r))"
+
+lemma rbt_split_min_set:
+  "rbt_split_min t = (a,b,t') \<Longrightarrow> t \<noteq> RBT_Impl.Empty \<Longrightarrow>
+  a \<in> set (RBT_Impl.keys t) \<and> set (RBT_Impl.keys t) = {a} \<union> set (RBT_Impl.keys t')"
+  by (induction t arbitrary: t') (auto simp: set_rbt_join split: prod.splits if_splits)
+
+lemma rbt_split_min_inv: "rbt_split_min t = (a,b,t') \<Longrightarrow> inv_12 t \<Longrightarrow> t \<noteq> RBT_Impl.Empty \<Longrightarrow> inv_12 t'"
+  by (induction t arbitrary: t')
+     (auto simp: inv_rbt_join split: if_splits prod.splits dest: rbt_Node)
+
+definition rbt_join2 :: "('a, 'b) rbt \<Rightarrow> ('a, 'b) rbt \<Rightarrow> ('a, 'b) rbt" where
+  "rbt_join2 l r = (if is_rbt_empty r then l else let (a,b,r') = rbt_split_min r in rbt_join l a b r')"
+
+lemma set_rbt_join2[simp]: "set (RBT_Impl.keys (rbt_join2 l r)) =
+  set (RBT_Impl.keys l) \<union> set (RBT_Impl.keys r)"
+  by (simp add: rbt_join2_def rbt_split_min_set set_rbt_join split: prod.split)
+
+lemma inv_rbt_join2: "inv_12 l \<Longrightarrow> inv_12 r \<Longrightarrow> inv_12 (rbt_join2 l r)"
+  by (simp add: rbt_join2_def inv_rbt_join rbt_split_min_set rbt_split_min_inv split: prod.split)
+
 context ord begin
 
-definition rbt_union_with_key :: "('a \<Rightarrow> 'b \<Rightarrow> 'b \<Rightarrow> 'b) \<Rightarrow> ('a, 'b) rbt \<Rightarrow> ('a, 'b) rbt \<Rightarrow> ('a, 'b) rbt"
-where
-  "rbt_union_with_key f t1 t2 =
-  (case RBT_Impl.compare_height t1 t1 t2 t2
-   of compare.EQ \<Rightarrow> rbtreeify (sunion_with f (entries t1) (entries t2))
-    | compare.LT \<Rightarrow> fold (rbt_insert_with_key (\<lambda>k v w. f k w v)) t1 t2
-    | compare.GT \<Rightarrow> fold (rbt_insert_with_key f) t2 t1)"
-
-definition rbt_union_with where
-  "rbt_union_with f = rbt_union_with_key (\<lambda>_. f)"
-
-definition rbt_union where
-  "rbt_union = rbt_union_with_key (%_ _ rv. rv)"
-
-definition rbt_inter_with_key :: "('a \<Rightarrow> 'b \<Rightarrow> 'b \<Rightarrow> 'b) \<Rightarrow> ('a, 'b) rbt \<Rightarrow> ('a, 'b) rbt \<Rightarrow> ('a, 'b) rbt"
-where
-  "rbt_inter_with_key f t1 t2 =
-  (case RBT_Impl.compare_height t1 t1 t2 t2 
-   of compare.EQ \<Rightarrow> rbtreeify (sinter_with f (entries t1) (entries t2))
-    | compare.LT \<Rightarrow> rbtreeify (List.map_filter (\<lambda>(k, v). map_option (\<lambda>w. (k, f k v w)) (rbt_lookup t2 k)) (entries t1))
-    | compare.GT \<Rightarrow> rbtreeify (List.map_filter (\<lambda>(k, v). map_option (\<lambda>w. (k, f k w v)) (rbt_lookup t1 k)) (entries t2)))"
-
-definition rbt_inter_with where
-  "rbt_inter_with f = rbt_inter_with_key (\<lambda>_. f)"
-
-definition rbt_inter where
-  "rbt_inter = rbt_inter_with_key (\<lambda>_ _ rv. rv)"
+fun rbt_split :: "('a, 'b) rbt \<Rightarrow> 'a \<Rightarrow> ('a, 'b) rbt \<times> 'b option \<times> ('a, 'b) rbt" where
+  "rbt_split RBT_Impl.Empty k = (RBT_Impl.Empty, None, RBT_Impl.Empty)"
+| "rbt_split (RBT_Impl.Branch _ l a b r) x =
+  (if x < a then (case rbt_split l x of (l1, \<beta>, l2) \<Rightarrow> (l1, \<beta>, rbt_join l2 a b r))
+  else if a < x then (case rbt_split r x of (r1, \<beta>, r2) \<Rightarrow> (rbt_join l a b r1, \<beta>, r2))
+  else (l, Some b, r))"
+
+lemma rbt_split: "rbt_split t x = (l,\<beta>,r) \<Longrightarrow> inv_12 t \<Longrightarrow> inv_12 l \<and> inv_12 r"
+  by (induction t arbitrary: l r)
+     (auto simp: set_rbt_join inv_rbt_join rbt_greater_prop rbt_less_prop
+      split: if_splits prod.splits dest!: rbt_Node)
+
+lemma rbt_split_size: "(l2,\<beta>,r2) = rbt_split t2 a \<Longrightarrow> size l2 + size r2 \<le> size t2"
+  by (induction t2 a arbitrary: l2 r2 rule: rbt_split.induct) (auto split: if_splits prod.splits)
+
+function rbt_union_rec :: "('a \<Rightarrow> 'b \<Rightarrow> 'b \<Rightarrow> 'b) \<Rightarrow> ('a, 'b) rbt \<Rightarrow> ('a, 'b) rbt \<Rightarrow> ('a, 'b) rbt" where
+  "rbt_union_rec f t1 t2 = (let (f, t2, t1) =
+    if flip_rbt t2 t1 then (\<lambda>k v v'. f k v' v, t1, t2) else (f, t2, t1) in
+    if small_rbt t2 then RBT_Impl.fold (rbt_insert_with_key f) t2 t1
+    else (case t1 of RBT_Impl.Empty \<Rightarrow> t2
+      | RBT_Impl.Branch _ l1 a b r1 \<Rightarrow>
+        case rbt_split t2 a of (l2, \<beta>, r2) \<Rightarrow>
+          rbt_join (rbt_union_rec f l1 l2) a (case \<beta> of None \<Rightarrow> b | Some b' \<Rightarrow> f a b b') (rbt_union_rec f r1 r2)))"
+  by pat_completeness auto
+termination
+  using rbt_split_size
+  by (relation "measure (\<lambda>(f,t1,t2). size t1 + size t2)") (fastforce split: if_splits)+
+
+declare rbt_union_rec.simps[simp del]
+
+function rbt_union_swap_rec :: "('a \<Rightarrow> 'b \<Rightarrow> 'b \<Rightarrow> 'b) \<Rightarrow> bool \<Rightarrow> ('a, 'b) rbt \<Rightarrow> ('a, 'b) rbt \<Rightarrow> ('a, 'b) rbt" where
+  "rbt_union_swap_rec f \<gamma> t1 t2 = (let (\<gamma>, t2, t1) =
+    if flip_rbt t2 t1 then (\<not>\<gamma>, t1, t2) else (\<gamma>, t2, t1);
+    f' = (if \<gamma> then (\<lambda>k v v'. f k v' v) else f) in
+    if small_rbt t2 then RBT_Impl.fold (rbt_insert_with_key f') t2 t1
+    else (case t1 of RBT_Impl.Empty \<Rightarrow> t2
+      | RBT_Impl.Branch _ l1 a b r1 \<Rightarrow>
+        case rbt_split t2 a of (l2, \<beta>, r2) \<Rightarrow>
+          rbt_join (rbt_union_swap_rec f \<gamma> l1 l2) a (case \<beta> of None \<Rightarrow> b | Some b' \<Rightarrow> f' a b b') (rbt_union_swap_rec f \<gamma> r1 r2)))"
+  by pat_completeness auto
+termination
+  using rbt_split_size
+  by (relation "measure (\<lambda>(f,\<gamma>,t1,t2). size t1 + size t2)") (fastforce split: if_splits)+
+
+declare rbt_union_swap_rec.simps[simp del]
+
+lemma rbt_union_swap_rec: "rbt_union_swap_rec f \<gamma> t1 t2 =
+  rbt_union_rec (if \<gamma> then (\<lambda>k v v'. f k v' v) else f) t1 t2"
+proof (induction f \<gamma> t1 t2 rule: rbt_union_swap_rec.induct)
+  case (1 f \<gamma> t1 t2)
+  show ?case
+    using 1[OF refl _ refl refl _ refl _ refl]
+    unfolding rbt_union_swap_rec.simps[of _ _ t1] rbt_union_rec.simps[of _ t1]
+    by (auto simp: Let_def split: rbt.splits prod.splits option.splits) (* slow *)
+qed
+
+lemma rbt_fold_rbt_insert:
+  assumes "inv_12 t2"
+  shows "inv_12 (RBT_Impl.fold (rbt_insert_with_key f) t1 t2)"
+proof -
+  define xs where "xs = RBT_Impl.entries t1"
+  from assms show ?thesis
+    unfolding RBT_Impl.fold_def xs_def[symmetric]
+    by (induct xs rule: rev_induct)
+       (auto simp: inv_12_def rbt_insert_with_key_def ins_inv1_inv2)
+qed
+
+lemma rbt_union_rec: "inv_12 t1 \<Longrightarrow> inv_12 t2 \<Longrightarrow> inv_12 (rbt_union_rec f t1 t2)"
+proof (induction f t1 t2 rule: rbt_union_rec.induct)
+  case (1 t1 t2)
+  thus ?case
+    by (auto simp: rbt_union_rec.simps[of t1 t2] inv_rbt_join rbt_split rbt_fold_rbt_insert
+        split!: RBT_Impl.rbt.splits RBT_Impl.color.splits prod.split if_splits dest: rbt_Node)
+qed
+
+definition "map_filter_inter f t1 t2 = List.map_filter (\<lambda>(k, v).
+  case rbt_lookup t1 k of None \<Rightarrow> None
+  | Some v' \<Rightarrow> Some (k, f k v' v)) (RBT_Impl.entries t2)"
+
+function rbt_inter_rec :: "('a \<Rightarrow> 'b \<Rightarrow> 'b \<Rightarrow> 'b) \<Rightarrow> ('a, 'b) rbt \<Rightarrow> ('a, 'b) rbt \<Rightarrow> ('a, 'b) rbt" where
+  "rbt_inter_rec f t1 t2 = (let (f, t2, t1) =
+    if flip_rbt t2 t1 then (\<lambda>k v v'. f k v' v, t1, t2) else (f, t2, t1) in
+    if small_rbt t2 then rbtreeify (map_filter_inter f t1 t2)
+    else case t1 of RBT_Impl.Empty \<Rightarrow> RBT_Impl.Empty
+    | RBT_Impl.Branch _ l1 a b r1 \<Rightarrow>
+      case rbt_split t2 a of (l2, \<beta>, r2) \<Rightarrow> let l' = rbt_inter_rec f l1 l2; r' = rbt_inter_rec f r1 r2 in
+      (case \<beta> of None \<Rightarrow> rbt_join2 l' r' | Some b' \<Rightarrow> rbt_join l' a (f a b b') r'))"
+  by pat_completeness auto
+termination
+  using rbt_split_size
+  by (relation "measure (\<lambda>(f,t1,t2). size t1 + size t2)") (fastforce split: if_splits)+
+
+declare rbt_inter_rec.simps[simp del]
+
+function rbt_inter_swap_rec :: "('a \<Rightarrow> 'b \<Rightarrow> 'b \<Rightarrow> 'b) \<Rightarrow> bool \<Rightarrow> ('a, 'b) rbt \<Rightarrow> ('a, 'b) rbt \<Rightarrow> ('a, 'b) rbt" where
+  "rbt_inter_swap_rec f \<gamma> t1 t2 = (let (\<gamma>, t2, t1) =
+    if flip_rbt t2 t1 then (\<not>\<gamma>, t1, t2) else (\<gamma>, t2, t1);
+    f' = (if \<gamma> then (\<lambda>k v v'. f k v' v) else f) in
+    if small_rbt t2 then rbtreeify (map_filter_inter f' t1 t2)
+    else case t1 of RBT_Impl.Empty \<Rightarrow> RBT_Impl.Empty
+    | RBT_Impl.Branch _ l1 a b r1 \<Rightarrow>
+      case rbt_split t2 a of (l2, \<beta>, r2) \<Rightarrow> let l' = rbt_inter_swap_rec f \<gamma> l1 l2; r' = rbt_inter_swap_rec f \<gamma> r1 r2 in
+      (case \<beta> of None \<Rightarrow> rbt_join2 l' r' | Some b' \<Rightarrow> rbt_join l' a (f' a b b') r'))"
+  by pat_completeness auto
+termination
+  using rbt_split_size
+  by (relation "measure (\<lambda>(f,\<gamma>,t1,t2). size t1 + size t2)") (fastforce split: if_splits)+
+
+declare rbt_inter_swap_rec.simps[simp del]
+
+lemma rbt_inter_swap_rec: "rbt_inter_swap_rec f \<gamma> t1 t2 =
+  rbt_inter_rec (if \<gamma> then (\<lambda>k v v'. f k v' v) else f) t1 t2"
+proof (induction f \<gamma> t1 t2 rule: rbt_inter_swap_rec.induct)
+  case (1 f \<gamma> t1 t2)
+  show ?case
+    using 1[OF refl _ refl refl _ refl _ refl]
+    unfolding rbt_inter_swap_rec.simps[of _ _ t1] rbt_inter_rec.simps[of _ t1]
+    by (auto simp add: Let_def split: rbt.splits prod.splits option.splits)
+qed
+
+lemma rbt_rbtreeify[simp]: "inv_12 (rbtreeify kvs)"
+  by (simp add: inv_12_def rbtreeify_def inv1_rbtreeify_g inv2_rbtreeify_g)
+
+lemma rbt_inter_rec: "inv_12 t1 \<Longrightarrow> inv_12 t2 \<Longrightarrow> inv_12 (rbt_inter_rec f t1 t2)"
+proof(induction f t1 t2 rule: rbt_inter_rec.induct)
+  case (1 t1 t2)
+  thus ?case
+    by (auto simp: rbt_inter_rec.simps[of t1 t2] inv_rbt_join inv_rbt_join2 rbt_split Let_def
+        split!: RBT_Impl.rbt.splits RBT_Impl.color.splits prod.split if_splits
+        option.splits dest!: rbt_Node)
+qed
+
+definition "filter_minus t1 t2 = filter (\<lambda>(k, _). rbt_lookup t2 k = None) (RBT_Impl.entries t1)"
+
+fun rbt_minus_rec :: "('a, 'b) rbt \<Rightarrow> ('a, 'b) rbt \<Rightarrow> ('a, 'b) rbt" where
+  "rbt_minus_rec t1 t2 = (if small_rbt t2 then RBT_Impl.fold (\<lambda>k _ t. rbt_delete k t) t2 t1
+    else if small_rbt t1 then rbtreeify (filter_minus t1 t2)
+    else case t2 of RBT_Impl.Empty \<Rightarrow> t1
+      | RBT_Impl.Branch _ l2 a b r2 \<Rightarrow>
+        case rbt_split t1 a of (l1, _, r1) \<Rightarrow> rbt_join2 (rbt_minus_rec l1 l2) (rbt_minus_rec r1 r2))"
+
+declare rbt_minus_rec.simps[simp del]
 
 end
 
@@ -1921,6 +2392,173 @@
     by(induct xs rule: rev_induct)(auto simp add: rbt_insertwk_is_rbt)
 qed
 
+lemma rbt_delete: "inv_12 t \<Longrightarrow> inv_12 (rbt_delete x t)"
+  using rbt_del_inv1_inv2[of t x]
+  by (auto simp: inv_12_def rbt_delete_def rbt_del_inv1_inv2)
+
+lemma rbt_sorted_delete: "rbt_sorted t \<Longrightarrow> rbt_sorted (rbt_delete x t)"
+  by (auto simp: rbt_delete_def rbt_del_rbt_sorted)
+
+lemma rbt_fold_rbt_delete:
+  assumes "inv_12 t2"
+  shows "inv_12 (RBT_Impl.fold (\<lambda>k _ t. rbt_delete k t) t1 t2)"
+proof -
+  define xs where "xs = RBT_Impl.entries t1"
+  from assms show ?thesis
+    unfolding RBT_Impl.fold_def xs_def[symmetric]
+    by (induct xs rule: rev_induct) (auto simp: rbt_delete)
+qed
+
+lemma rbt_minus_rec: "inv_12 t1 \<Longrightarrow> inv_12 t2 \<Longrightarrow> inv_12 (rbt_minus_rec t1 t2)"
+proof(induction t1 t2 rule: rbt_minus_rec.induct)
+  case (1 t1 t2)
+  thus ?case
+    by (auto simp: rbt_minus_rec.simps[of t1 t2] inv_rbt_join inv_rbt_join2 rbt_split
+        rbt_fold_rbt_delete split!: RBT_Impl.rbt.splits RBT_Impl.color.splits prod.split if_splits
+        dest: rbt_Node)
+qed
+
+end
+
+context linorder begin
+
+lemma rbt_sorted_rbt_baliL: "rbt_sorted l \<Longrightarrow> rbt_sorted r \<Longrightarrow> l |\<guillemotleft> a \<Longrightarrow> a \<guillemotleft>| r \<Longrightarrow>
+  rbt_sorted (rbt_baliL l a b r)"
+  using rbt_greater_trans rbt_less_trans
+  by (cases "(l,a,b,r)" rule: rbt_baliL.cases) fastforce+
+
+lemma rbt_lookup_rbt_baliL: "rbt_sorted l \<Longrightarrow> rbt_sorted r \<Longrightarrow> l |\<guillemotleft> a \<Longrightarrow> a \<guillemotleft>| r \<Longrightarrow>
+  rbt_lookup (rbt_baliL l a b r) k =
+  (if k < a then rbt_lookup l k else if k = a then Some b else rbt_lookup r k)"
+  by (cases "(l,a,b,r)" rule: rbt_baliL.cases) (auto split!: if_splits)
+
+lemma rbt_sorted_rbt_baliR: "rbt_sorted l \<Longrightarrow> rbt_sorted r \<Longrightarrow> l |\<guillemotleft> a \<Longrightarrow> a \<guillemotleft>| r \<Longrightarrow>
+  rbt_sorted (rbt_baliR l a b r)"
+  using rbt_greater_trans rbt_less_trans
+  by (cases "(l,a,b,r)" rule: rbt_baliR.cases) fastforce+
+
+lemma rbt_lookup_rbt_baliR: "rbt_sorted l \<Longrightarrow> rbt_sorted r \<Longrightarrow> l |\<guillemotleft> a \<Longrightarrow> a \<guillemotleft>| r \<Longrightarrow>
+  rbt_lookup (rbt_baliR l a b r) k =
+  (if k < a then rbt_lookup l k else if k = a then Some b else rbt_lookup r k)"
+  by (cases "(l,a,b,r)" rule: rbt_baliR.cases) (auto split!: if_splits)
+
+lemma rbt_sorted_rbt_joinL: "rbt_sorted (RBT_Impl.Branch c l a b r) \<Longrightarrow> bheight l \<le> bheight r \<Longrightarrow>
+  rbt_sorted (rbt_joinL l a b r)"
+proof (induction l a b r arbitrary: c rule: rbt_joinL.induct)
+  case (1 l a b r)
+  thus ?case
+    by (auto simp: rbt_set_rbt_baliL rbt_joinL.simps[of l a b r] set_rbt_joinL rbt_less_prop
+        intro!: rbt_sorted_rbt_baliL split!: RBT_Impl.rbt.splits RBT_Impl.color.splits)
+qed
+
+lemma rbt_lookup_rbt_joinL: "rbt_sorted l \<Longrightarrow> rbt_sorted r \<Longrightarrow> l |\<guillemotleft> a \<Longrightarrow> a \<guillemotleft>| r \<Longrightarrow>
+  rbt_lookup (rbt_joinL l a b r) k =
+  (if k < a then rbt_lookup l k else if k = a then Some b else rbt_lookup r k)"
+proof (induction l a b r rule: rbt_joinL.induct)
+  case (1 l a b r)
+  have less_rbt_joinL:
+    "rbt_sorted r1 \<Longrightarrow> r1 |\<guillemotleft> x \<Longrightarrow> a \<guillemotleft>| r1 \<Longrightarrow> a < x \<Longrightarrow> rbt_joinL l a b r1 |\<guillemotleft> x" for x r1
+    using 1(5)
+    by (auto simp: rbt_less_prop rbt_greater_prop set_rbt_joinL)
+  show ?case
+    using 1 less_rbt_joinL rbt_lookup_rbt_baliL[OF rbt_sorted_rbt_joinL[of _ l a b], where ?k=k]
+    by (auto simp: rbt_joinL.simps[of l a b r] split!: if_splits rbt.splits color.splits)
+qed
+
+lemma rbt_sorted_rbt_joinR: "rbt_sorted l \<Longrightarrow> rbt_sorted r \<Longrightarrow> l |\<guillemotleft> a \<Longrightarrow> a \<guillemotleft>| r \<Longrightarrow>
+  rbt_sorted (rbt_joinR l a b r)"
+proof (induction l a b r rule: rbt_joinR.induct)
+  case (1 l a b r)
+  thus ?case
+    by (auto simp: rbt_set_rbt_baliR rbt_joinR.simps[of l a b r] set_rbt_joinR rbt_greater_prop
+        intro!: rbt_sorted_rbt_baliR split!: RBT_Impl.rbt.splits RBT_Impl.color.splits)
+qed
+
+lemma rbt_lookup_rbt_joinR: "rbt_sorted l \<Longrightarrow> rbt_sorted r \<Longrightarrow> l |\<guillemotleft> a \<Longrightarrow> a \<guillemotleft>| r \<Longrightarrow>
+  rbt_lookup (rbt_joinR l a b r) k =
+  (if k < a then rbt_lookup l k else if k = a then Some b else rbt_lookup r k)"
+proof (induction l a b r rule: rbt_joinR.induct)
+  case (1 l a b r)
+  have less_rbt_joinR:
+    "rbt_sorted l1 \<Longrightarrow> x \<guillemotleft>| l1 \<Longrightarrow> l1 |\<guillemotleft> a \<Longrightarrow> x < a \<Longrightarrow> x \<guillemotleft>| rbt_joinR l1 a b r" for x l1
+    using 1(6)
+    by (auto simp: rbt_less_prop rbt_greater_prop set_rbt_joinR)
+  show ?case
+    using 1 less_rbt_joinR rbt_lookup_rbt_baliR[OF _ rbt_sorted_rbt_joinR[of _ r a b], where ?k=k]
+    by (auto simp: rbt_joinR.simps[of l a b r] split!: if_splits rbt.splits color.splits)
+qed
+
+lemma rbt_sorted_paint: "rbt_sorted (paint c t) = rbt_sorted t"
+  by (cases t) auto
+
+lemma rbt_sorted_rbt_join: "rbt_sorted (RBT_Impl.Branch c l a b r) \<Longrightarrow>
+  rbt_sorted (rbt_join l a b r)"
+  by (auto simp: rbt_sorted_paint rbt_sorted_rbt_joinL rbt_sorted_rbt_joinR rbt_join_def Let_def)
+
+lemma rbt_lookup_rbt_join: "rbt_sorted l \<Longrightarrow> rbt_sorted r \<Longrightarrow> l |\<guillemotleft> a \<Longrightarrow> a \<guillemotleft>| r \<Longrightarrow>
+  rbt_lookup (rbt_join l a b r) k =
+  (if k < a then rbt_lookup l k else if k = a then Some b else rbt_lookup r k)"
+  by (auto simp: rbt_join_def Let_def rbt_lookup_rbt_joinL rbt_lookup_rbt_joinR)
+
+lemma rbt_split_min_rbt_sorted: "rbt_split_min t = (a,b,t') \<Longrightarrow> rbt_sorted t \<Longrightarrow> t \<noteq> RBT_Impl.Empty \<Longrightarrow>
+  rbt_sorted t' \<and> (\<forall>x \<in> set (RBT_Impl.keys t'). a < x)"
+  by (induction t arbitrary: t')
+     (fastforce simp: rbt_split_min_set rbt_sorted_rbt_join set_rbt_join rbt_less_prop rbt_greater_prop
+      split: if_splits prod.splits)+
+
+lemma rbt_split_min_rbt_lookup: "rbt_split_min t = (a,b,t') \<Longrightarrow> rbt_sorted t \<Longrightarrow> t \<noteq> RBT_Impl.Empty \<Longrightarrow>
+  rbt_lookup t k = (if k < a then None else if k = a then Some b else rbt_lookup t' k)"
+  by (induction t arbitrary: a b t')
+     (auto simp: rbt_less_trans antisym_conv3 rbt_less_prop rbt_split_min_set rbt_lookup_rbt_join
+      rbt_split_min_rbt_sorted split!: if_splits prod.splits)
+
+lemma rbt_sorted_rbt_join2: "rbt_sorted l \<Longrightarrow> rbt_sorted r \<Longrightarrow>
+  \<forall>x \<in> set (RBT_Impl.keys l). \<forall>y \<in> set (RBT_Impl.keys r). x < y \<Longrightarrow> rbt_sorted (rbt_join2 l r)"
+  by (simp add: rbt_join2_def rbt_sorted_rbt_join rbt_split_min_set rbt_split_min_rbt_sorted set_rbt_join
+      rbt_greater_prop rbt_less_prop split: prod.split)
+
+lemma rbt_lookup_rbt_join2: "rbt_sorted l \<Longrightarrow> rbt_sorted r \<Longrightarrow>
+  \<forall>x \<in> set (RBT_Impl.keys l). \<forall>y \<in> set (RBT_Impl.keys r). x < y \<Longrightarrow>
+  rbt_lookup (rbt_join2 l r) k = (case rbt_lookup l k of None \<Rightarrow> rbt_lookup r k | Some v \<Rightarrow> Some v)"
+  using rbt_lookup_keys
+  by (fastforce simp: rbt_join2_def rbt_greater_prop rbt_less_prop rbt_lookup_rbt_join
+      rbt_split_min_rbt_lookup rbt_split_min_rbt_sorted rbt_split_min_set split: option.splits prod.splits)
+
+lemma rbt_split_props: "rbt_split t x = (l,\<beta>,r) \<Longrightarrow> rbt_sorted t \<Longrightarrow>
+  set (RBT_Impl.keys l) = {a \<in> set (RBT_Impl.keys t). a < x} \<and>
+  set (RBT_Impl.keys r) = {a \<in> set (RBT_Impl.keys t). x < a} \<and>
+  rbt_sorted l \<and> rbt_sorted r"
+  by (induction t arbitrary: l r)
+     (force simp: Let_def set_rbt_join rbt_greater_prop rbt_less_prop
+      split: if_splits prod.splits intro!: rbt_sorted_rbt_join)+
+
+lemma rbt_split_lookup: "rbt_split t x = (l,\<beta>,r) \<Longrightarrow> rbt_sorted t \<Longrightarrow>
+  rbt_lookup t k = (if k < x then rbt_lookup l k else if k = x then \<beta> else rbt_lookup r k)"
+proof (induction t arbitrary: x l \<beta> r)
+  case (Branch c t1 a b t2)
+  have "rbt_sorted r1" "r1 |\<guillemotleft> a" if "rbt_split t1 x = (l, \<beta>, r1)" for r1
+    using rbt_split_props Branch(4) that
+    by (fastforce simp: rbt_less_prop)+
+  moreover have "rbt_sorted l1" "a \<guillemotleft>| l1" if "rbt_split t2 x = (l1, \<beta>, r)" for l1
+    using rbt_split_props Branch(4) that
+    by (fastforce simp: rbt_greater_prop)+
+  ultimately show ?case
+    using Branch rbt_lookup_rbt_join[of t1 _ a b k] rbt_lookup_rbt_join[of _ t2 a b k]
+    by (auto split!: if_splits prod.splits)
+qed simp
+
+lemma rbt_sorted_fold_insertwk: "rbt_sorted t \<Longrightarrow>
+  rbt_sorted (RBT_Impl.fold (rbt_insert_with_key f) t' t)"
+  by (induct t' arbitrary: t)
+     (simp_all add: rbt_insertwk_rbt_sorted)
+
+lemma rbt_lookup_iff_keys:
+  "rbt_sorted t \<Longrightarrow> set (RBT_Impl.keys t) = {k. \<exists>v. rbt_lookup t k = Some v}"
+  "rbt_sorted t \<Longrightarrow> rbt_lookup t k = None \<longleftrightarrow> k \<notin> set (RBT_Impl.keys t)"
+  "rbt_sorted t \<Longrightarrow> (\<exists>v. rbt_lookup t k = Some v) \<longleftrightarrow> k \<in> set (RBT_Impl.keys t)"
+  using entry_in_tree_keys rbt_lookup_keys[of t]
+  by force+
+
 lemma rbt_lookup_fold_rbt_insertwk:
   assumes t1: "rbt_sorted t1" and t2: "rbt_sorted t2"
   shows "rbt_lookup (fold (rbt_insert_with_key f) t1 t2) k =
@@ -1939,9 +2577,363 @@
     done
 qed
 
+lemma rbt_lookup_union_rec: "rbt_sorted t1 \<Longrightarrow> rbt_sorted t2 \<Longrightarrow>
+  rbt_sorted (rbt_union_rec f t1 t2) \<and> rbt_lookup (rbt_union_rec f t1 t2) k =
+  (case rbt_lookup t1 k of None \<Rightarrow> rbt_lookup t2 k
+  | Some v \<Rightarrow> (case rbt_lookup t2 k of None \<Rightarrow> Some v
+              | Some w \<Rightarrow> Some (f k v w)))"
+proof(induction f t1 t2 arbitrary: k rule: rbt_union_rec.induct)
+  case (1 f t1 t2)
+  obtain f' t1' t2' where flip: "(f', t2', t1') =
+    (if flip_rbt t2 t1 then (\<lambda>k v v'. f k v' v, t1, t2) else (f, t2, t1))"
+    by fastforce
+  have rbt_sorted': "rbt_sorted t1'" "rbt_sorted t2'"
+    using 1(3,4) flip
+    by (auto split: if_splits)
+  show ?case
+  proof (cases t1')
+    case Empty
+    show ?thesis
+      unfolding rbt_union_rec.simps[of _ t1] flip[symmetric]
+      using flip rbt_sorted' rbt_split_props[of t2]
+      by (auto simp: Empty rbt_lookup_fold_rbt_insertwk
+          intro!: rbt_sorted_fold_insertwk split: if_splits option.splits)
+  next
+    case (Branch c l1 a b r1)
+    {
+      assume not_small: "\<not>small_rbt t2'"
+      obtain l2 \<beta> r2 where rbt_split_t2': "rbt_split t2' a = (l2, \<beta>, r2)"
+        by (cases "rbt_split t2' a") auto
+      have rbt_sort: "rbt_sorted l1" "rbt_sorted r1"
+        using 1(3,4) flip
+        by (auto simp: Branch split: if_splits)
+      note rbt_split_t2'_props = rbt_split_props[OF rbt_split_t2' rbt_sorted'(2)]
+      have union_l1_l2: "rbt_sorted (rbt_union_rec f' l1 l2)" "rbt_lookup (rbt_union_rec f' l1 l2) k =
+        (case rbt_lookup l1 k of None \<Rightarrow> rbt_lookup l2 k
+        | Some v \<Rightarrow> (case rbt_lookup l2 k of None \<Rightarrow> Some v | Some w \<Rightarrow> Some (f' k v w)))" for k
+        using 1(1)[OF flip refl refl _ Branch rbt_split_t2'[symmetric]] rbt_sort rbt_split_t2'_props
+        by (auto simp: not_small)
+      have union_r1_r2: "rbt_sorted (rbt_union_rec f' r1 r2)" "rbt_lookup (rbt_union_rec f' r1 r2) k =
+        (case rbt_lookup r1 k of None \<Rightarrow> rbt_lookup r2 k
+        | Some v \<Rightarrow> (case rbt_lookup r2 k of None \<Rightarrow> Some v | Some w \<Rightarrow> Some (f' k v w)))" for k
+        using 1(2)[OF flip refl refl _ Branch rbt_split_t2'[symmetric]] rbt_sort rbt_split_t2'_props
+        by (auto simp: not_small)
+      have union_l1_l2_keys: "set (RBT_Impl.keys (rbt_union_rec f' l1 l2)) =
+       set (RBT_Impl.keys l1) \<union> set (RBT_Impl.keys l2)"
+        using rbt_sorted'(1) rbt_split_t2'_props
+        by (auto simp: Branch rbt_lookup_iff_keys(1) union_l1_l2 split: option.splits)
+      have union_r1_r2_keys: "set (RBT_Impl.keys (rbt_union_rec f' r1 r2)) =
+        set (RBT_Impl.keys r1) \<union> set (RBT_Impl.keys r2)"
+        using rbt_sorted'(1) rbt_split_t2'_props
+        by (auto simp: Branch rbt_lookup_iff_keys(1) union_r1_r2 split: option.splits)
+      have union_l1_l2_less: "rbt_union_rec f' l1 l2 |\<guillemotleft> a"
+        using rbt_sorted'(1) rbt_split_t2'_props
+        by (auto simp: Branch rbt_less_prop union_l1_l2_keys)
+      have union_r1_r2_greater: "a \<guillemotleft>| rbt_union_rec f' r1 r2"
+        using rbt_sorted'(1) rbt_split_t2'_props
+        by (auto simp: Branch rbt_greater_prop union_r1_r2_keys)
+      have "rbt_lookup (rbt_union_rec f t1 t2) k =
+       (case rbt_lookup t1' k of None \<Rightarrow> rbt_lookup t2' k
+       | Some v \<Rightarrow> (case rbt_lookup t2' k of None \<Rightarrow> Some v | Some w \<Rightarrow> Some (f' k v w)))"
+        using rbt_sorted' union_l1_l2 union_r1_r2 rbt_split_t2'_props
+          union_l1_l2_less union_r1_r2_greater not_small
+        by (auto simp: rbt_union_rec.simps[of _ t1] flip[symmetric] Branch
+            rbt_split_t2' rbt_lookup_rbt_join rbt_split_lookup[OF rbt_split_t2'] split: option.splits)
+      moreover have "rbt_sorted (rbt_union_rec f t1 t2)"
+        using rbt_sorted' rbt_split_t2'_props not_small
+        by (auto simp: rbt_union_rec.simps[of _ t1] flip[symmetric] Branch rbt_split_t2'
+            union_l1_l2 union_r1_r2 union_l1_l2_keys union_r1_r2_keys rbt_less_prop
+            rbt_greater_prop intro!: rbt_sorted_rbt_join)
+      ultimately have ?thesis
+        using flip
+        by (auto split: if_splits option.splits)
+    }
+    then show ?thesis
+      unfolding rbt_union_rec.simps[of _ t1] flip[symmetric]
+      using rbt_sorted' flip
+      by (auto simp: rbt_sorted_fold_insertwk rbt_lookup_fold_rbt_insertwk split: option.splits)
+  qed
+qed
+
+lemma rbtreeify_map_filter_inter:
+  fixes f :: "'a \<Rightarrow> 'b \<Rightarrow> 'b \<Rightarrow> 'b"
+  assumes "rbt_sorted t2"
+  shows "rbt_sorted (rbtreeify (map_filter_inter f t1 t2))"
+    "rbt_lookup (rbtreeify (map_filter_inter f t1 t2)) k =
+    (case rbt_lookup t1 k of None \<Rightarrow> None
+    | Some v \<Rightarrow> (case rbt_lookup t2 k of None \<Rightarrow> None | Some w \<Rightarrow> Some (f k v w)))"
+proof -
+  have map_of_map_filter: "map_of (List.map_filter (\<lambda>(k, v).
+    case rbt_lookup t1 k of None \<Rightarrow> None | Some v' \<Rightarrow> Some (k, f k v' v)) xs) k =
+    (case rbt_lookup t1 k of None \<Rightarrow> None
+    | Some v \<Rightarrow> (case map_of xs k of None \<Rightarrow> None | Some w \<Rightarrow> Some (f k v w)))" for xs k
+    by (induction xs) (auto simp: List.map_filter_def split: option.splits) (* slow *)
+  have map_fst_map_filter: "map fst (List.map_filter (\<lambda>(k, v).
+    case rbt_lookup t1 k of None \<Rightarrow> None | Some v' \<Rightarrow> Some (k, f k v' v)) xs) =
+    filter (\<lambda>k. rbt_lookup t1 k \<noteq> None) (map fst xs)" for xs
+    by (induction xs) (auto simp: List.map_filter_def split: option.splits)
+  have "sorted (map fst (map_filter_inter f t1 t2))"
+    using sorted_filter[of id] rbt_sorted_entries[OF assms]
+    by (auto simp: map_filter_inter_def map_fst_map_filter)
+  moreover have "distinct (map fst (map_filter_inter f t1 t2))"
+    using distinct_filter distinct_entries[OF assms]
+    by (auto simp: map_filter_inter_def map_fst_map_filter)
+  ultimately show
+    "rbt_sorted (rbtreeify (map_filter_inter f t1 t2))"
+    "rbt_lookup (rbtreeify (map_filter_inter f t1 t2)) k =
+      (case rbt_lookup t1 k of None \<Rightarrow> None
+      | Some v \<Rightarrow> (case rbt_lookup t2 k of None \<Rightarrow> None | Some w \<Rightarrow> Some (f k v w)))"
+    using rbt_sorted_rbtreeify
+    by (auto simp: rbt_lookup_rbtreeify map_filter_inter_def map_of_map_filter
+        map_of_entries[OF assms] split: option.splits)
+qed
+
+lemma rbt_lookup_inter_rec: "rbt_sorted t1 \<Longrightarrow> rbt_sorted t2 \<Longrightarrow>
+  rbt_sorted (rbt_inter_rec f t1 t2) \<and> rbt_lookup (rbt_inter_rec f t1 t2) k =
+  (case rbt_lookup t1 k of None \<Rightarrow> None
+  | Some v \<Rightarrow> (case rbt_lookup t2 k of None \<Rightarrow> None | Some w \<Rightarrow> Some (f k v w)))"
+proof(induction f t1 t2 arbitrary: k rule: rbt_inter_rec.induct)
+  case (1 f t1 t2)
+  obtain f' t1' t2' where flip: "(f', t2', t1') =
+    (if flip_rbt t2 t1 then (\<lambda>k v v'. f k v' v, t1, t2) else (f, t2, t1))"
+    by fastforce
+  have rbt_sorted': "rbt_sorted t1'" "rbt_sorted t2'"
+    using 1(3,4) flip
+    by (auto split: if_splits)
+  show ?case
+  proof (cases t1')
+    case Empty
+    show ?thesis
+      unfolding rbt_inter_rec.simps[of _ t1] flip[symmetric]
+      using flip rbt_sorted' rbt_split_props[of t2] rbtreeify_map_filter_inter[OF rbt_sorted'(2)]
+      by (auto simp: Empty split: option.splits)
+  next
+    case (Branch c l1 a b r1)
+    {
+      assume not_small: "\<not>small_rbt t2'"
+      obtain l2 \<beta> r2 where rbt_split_t2': "rbt_split t2' a = (l2, \<beta>, r2)"
+        by (cases "rbt_split t2' a") auto
+      note rbt_split_t2'_props = rbt_split_props[OF rbt_split_t2' rbt_sorted'(2)]
+      have rbt_sort: "rbt_sorted l1" "rbt_sorted r1" "rbt_sorted l2" "rbt_sorted r2"
+        using 1(3,4) flip
+        by (auto simp: Branch rbt_split_t2'_props split: if_splits)
+      have inter_l1_l2: "rbt_sorted (rbt_inter_rec f' l1 l2)" "rbt_lookup (rbt_inter_rec f' l1 l2) k =
+        (case rbt_lookup l1 k of None \<Rightarrow> None
+        | Some v \<Rightarrow> (case rbt_lookup l2 k of None \<Rightarrow> None | Some w \<Rightarrow> Some (f' k v w)))" for k
+        using 1(1)[OF flip refl refl _ Branch rbt_split_t2'[symmetric]] rbt_sort rbt_split_t2'_props
+        by (auto simp: not_small)
+      have inter_r1_r2: "rbt_sorted (rbt_inter_rec f' r1 r2)" "rbt_lookup (rbt_inter_rec f' r1 r2) k =
+        (case rbt_lookup r1 k of None \<Rightarrow> None
+        | Some v \<Rightarrow> (case rbt_lookup r2 k of None \<Rightarrow> None | Some w \<Rightarrow> Some (f' k v w)))" for k
+        using 1(2)[OF flip refl refl _ Branch rbt_split_t2'[symmetric]] rbt_sort rbt_split_t2'_props
+        by (auto simp: not_small)
+      have inter_l1_l2_keys: "set (RBT_Impl.keys (rbt_inter_rec f' l1 l2)) =
+        set (RBT_Impl.keys l1) \<inter> set (RBT_Impl.keys l2)"
+        using inter_l1_l2(1)
+        by (auto simp: rbt_lookup_iff_keys(1) inter_l1_l2(2) rbt_sort split: option.splits)
+      have inter_r1_r2_keys: "set (RBT_Impl.keys (rbt_inter_rec f' r1 r2)) =
+        set (RBT_Impl.keys r1) \<inter> set (RBT_Impl.keys r2)"
+        using inter_r1_r2(1)
+        by (auto simp: rbt_lookup_iff_keys(1) inter_r1_r2(2) rbt_sort split: option.splits)
+      have inter_l1_l2_less: "rbt_inter_rec f' l1 l2 |\<guillemotleft> a"
+        using rbt_sorted'(1) rbt_split_t2'_props
+        by (auto simp: Branch rbt_less_prop inter_l1_l2_keys)
+      have inter_r1_r2_greater: "a \<guillemotleft>| rbt_inter_rec f' r1 r2"
+        using rbt_sorted'(1) rbt_split_t2'_props
+        by (auto simp: Branch rbt_greater_prop inter_r1_r2_keys)
+      have rbt_lookup_join2: "rbt_lookup (rbt_join2 (rbt_inter_rec f' l1 l2) (rbt_inter_rec f' r1 r2)) k =
+        (case rbt_lookup (rbt_inter_rec f' l1 l2) k of None \<Rightarrow> rbt_lookup (rbt_inter_rec f' r1 r2) k
+        | Some v \<Rightarrow> Some v)" for k
+        using rbt_lookup_rbt_join2[OF inter_l1_l2(1) inter_r1_r2(1)] rbt_sorted'(1)
+        by (fastforce simp: Branch inter_l1_l2_keys inter_r1_r2_keys rbt_less_prop rbt_greater_prop)
+      have rbt_lookup_l1_k: "rbt_lookup l1 k = Some v \<Longrightarrow> k < a" for k v
+        using rbt_sorted'(1) rbt_lookup_iff_keys(3)
+        by (auto simp: Branch rbt_less_prop)
+      have rbt_lookup_r1_k: "rbt_lookup r1 k = Some v \<Longrightarrow> a < k" for k v
+        using rbt_sorted'(1) rbt_lookup_iff_keys(3)
+        by (auto simp: Branch rbt_greater_prop)
+      have "rbt_lookup (rbt_inter_rec f t1 t2) k =
+        (case rbt_lookup t1' k of None \<Rightarrow> None
+        | Some v \<Rightarrow> (case rbt_lookup t2' k of None \<Rightarrow> None | Some w \<Rightarrow> Some (f' k v w)))"
+        by (auto simp: Let_def rbt_inter_rec.simps[of _ t1] flip[symmetric] not_small Branch
+            rbt_split_t2' rbt_lookup_join2 rbt_lookup_rbt_join inter_l1_l2_less inter_r1_r2_greater
+            rbt_split_lookup[OF rbt_split_t2' rbt_sorted'(2)] inter_l1_l2 inter_r1_r2
+            split!: if_splits option.splits dest: rbt_lookup_l1_k rbt_lookup_r1_k)
+      moreover have "rbt_sorted (rbt_inter_rec f t1 t2)"
+        using rbt_sorted' inter_l1_l2 inter_r1_r2 rbt_split_t2'_props not_small
+        by (auto simp: Let_def rbt_inter_rec.simps[of _ t1] flip[symmetric] Branch rbt_split_t2'
+            rbt_less_prop rbt_greater_prop inter_l1_l2_less inter_r1_r2_greater
+            inter_l1_l2_keys inter_r1_r2_keys intro!: rbt_sorted_rbt_join rbt_sorted_rbt_join2
+            split: if_splits option.splits dest!: bspec)
+      ultimately have ?thesis
+        using flip
+        by (auto split: if_splits split: option.splits)
+    }
+    then show ?thesis
+      unfolding rbt_inter_rec.simps[of _ t1] flip[symmetric]
+      using rbt_sorted' flip rbtreeify_map_filter_inter[OF rbt_sorted'(2)]
+      by (auto split: option.splits)
+  qed
+qed
+
+lemma rbt_lookup_delete:
+  assumes "inv_12 t" "rbt_sorted t"
+  shows "rbt_lookup (rbt_delete x t) k = (if x = k then None else rbt_lookup t k)"
+proof -
+  note rbt_sorted_del = rbt_del_rbt_sorted[OF assms(2), of x]
+  show ?thesis
+    using assms rbt_sorted_del rbt_del_in_tree rbt_lookup_from_in_tree[OF assms(2) rbt_sorted_del]
+    by (fastforce simp: inv_12_def rbt_delete_def rbt_lookup_iff_keys(2) keys_entries)
+qed
+
+lemma fold_rbt_delete:
+  assumes "inv_12 t1" "rbt_sorted t1" "rbt_sorted t2"
+  shows "inv_12 (RBT_Impl.fold (\<lambda>k _ t. rbt_delete k t) t2 t1) \<and>
+    rbt_sorted (RBT_Impl.fold (\<lambda>k _ t. rbt_delete k t) t2 t1) \<and>
+    rbt_lookup (RBT_Impl.fold (\<lambda>k _ t. rbt_delete k t) t2 t1) k =
+    (case rbt_lookup t1 k of None \<Rightarrow> None
+    | Some v \<Rightarrow> (case rbt_lookup t2 k of None \<Rightarrow> Some v | _ \<Rightarrow> None))"
+proof -
+  define xs where "xs = RBT_Impl.entries t2"
+  show "inv_12 (RBT_Impl.fold (\<lambda>k _ t. rbt_delete k t) t2 t1) \<and>
+    rbt_sorted (RBT_Impl.fold (\<lambda>k _ t. rbt_delete k t) t2 t1) \<and>
+    rbt_lookup (RBT_Impl.fold (\<lambda>k _ t. rbt_delete k t) t2 t1) k =
+    (case rbt_lookup t1 k of None \<Rightarrow> None
+    | Some v \<Rightarrow> (case rbt_lookup t2 k of None \<Rightarrow> Some v | _ \<Rightarrow> None))"
+    using assms(1,2)
+    unfolding map_of_entries[OF assms(3), symmetric] RBT_Impl.fold_def xs_def[symmetric]
+    by (induction xs arbitrary: t1 rule: rev_induct)
+       (auto simp: rbt_delete rbt_sorted_delete rbt_lookup_delete split!: option.splits)
+qed
+
+lemma rbtreeify_filter_minus:
+  assumes "rbt_sorted t1"
+  shows "rbt_sorted (rbtreeify (filter_minus t1 t2)) \<and>
+    rbt_lookup (rbtreeify (filter_minus t1 t2)) k =
+    (case rbt_lookup t1 k of None \<Rightarrow> None
+    | Some v \<Rightarrow> (case rbt_lookup t2 k of None \<Rightarrow> Some v | _ \<Rightarrow> None))"
+proof -
+  have map_of_filter: "map_of (filter (\<lambda>(k, _). rbt_lookup t2 k = None) xs) k =
+    (case map_of xs k of None \<Rightarrow> None
+    | Some v \<Rightarrow> (case rbt_lookup t2 k of None \<Rightarrow> Some v | Some x \<Rightarrow> Map.empty x))"
+      for xs :: "('a \<times> 'b) list"
+    by (induction xs) (auto split: option.splits)
+  have map_fst_filter_minus: "map fst (filter_minus t1 t2) =
+    filter (\<lambda>k. rbt_lookup t2 k = None) (map fst (RBT_Impl.entries t1))"
+    by (auto simp: filter_minus_def filter_map comp_def case_prod_unfold)
+  have "sorted (map fst (filter_minus t1 t2))" "distinct (map fst (filter_minus t1 t2))"
+    using distinct_filter distinct_entries[OF assms]
+      sorted_filter[of id] rbt_sorted_entries[OF assms]
+    by (auto simp: map_fst_filter_minus intro!: rbt_sorted_rbtreeify)
+  then show ?thesis
+    by (auto simp: rbt_lookup_rbtreeify filter_minus_def map_of_filter map_of_entries[OF assms]
+        intro!: rbt_sorted_rbtreeify)
+qed
+
+lemma rbt_lookup_minus_rec: "inv_12 t1 \<Longrightarrow> rbt_sorted t1 \<Longrightarrow> rbt_sorted t2 \<Longrightarrow>
+  rbt_sorted (rbt_minus_rec t1 t2) \<and> rbt_lookup (rbt_minus_rec t1 t2) k =
+  (case rbt_lookup t1 k of None \<Rightarrow> None
+  | Some v \<Rightarrow> (case rbt_lookup t2 k of None \<Rightarrow> Some v | _ \<Rightarrow> None))"
+proof(induction t1 t2 arbitrary: k rule: rbt_minus_rec.induct)
+  case (1 t1 t2)
+  show ?case
+  proof (cases t2)
+    case Empty
+    show ?thesis
+      using rbtreeify_filter_minus[OF 1(4)] 1(4)
+      by (auto simp: rbt_minus_rec.simps[of t1] Empty split: option.splits)
+  next
+    case (Branch c l2 a b r2)
+    {
+      assume not_small: "\<not>small_rbt t2" "\<not>small_rbt t1"
+      obtain l1 \<beta> r1 where rbt_split_t1: "rbt_split t1 a = (l1, \<beta>, r1)"
+        by (cases "rbt_split t1 a") auto
+      note rbt_split_t1_props = rbt_split_props[OF rbt_split_t1 1(4)]
+      have minus_l1_l2: "rbt_sorted (rbt_minus_rec l1 l2)"
+        "rbt_lookup (rbt_minus_rec l1 l2) k =
+        (case rbt_lookup l1 k of None \<Rightarrow> None
+        | Some v \<Rightarrow> (case rbt_lookup l2 k of None \<Rightarrow> Some v | Some x \<Rightarrow> None))" for k
+        using 1(1)[OF not_small Branch rbt_split_t1[symmetric] refl] 1(5) rbt_split_t1_props
+          rbt_split[OF rbt_split_t1 1(3)]
+        by (auto simp: Branch)
+      have minus_r1_r2: "rbt_sorted (rbt_minus_rec r1 r2)"
+        "rbt_lookup (rbt_minus_rec r1 r2) k =
+        (case rbt_lookup r1 k of None \<Rightarrow> None
+        | Some v \<Rightarrow> (case rbt_lookup r2 k of None \<Rightarrow> Some v | Some x \<Rightarrow> None))" for k
+        using 1(2)[OF not_small Branch rbt_split_t1[symmetric] refl] 1(5) rbt_split_t1_props
+          rbt_split[OF rbt_split_t1 1(3)]
+        by (auto simp: Branch)
+      have minus_l1_l2_keys: "set (RBT_Impl.keys (rbt_minus_rec l1 l2)) =
+        set (RBT_Impl.keys l1) - set (RBT_Impl.keys l2)"
+        using minus_l1_l2(1) 1(5) rbt_lookup_iff_keys(3) rbt_split_t1_props
+        by (auto simp: Branch rbt_lookup_iff_keys(1) minus_l1_l2(2) split: option.splits)
+      have minus_r1_r2_keys: "set (RBT_Impl.keys (rbt_minus_rec r1 r2)) =
+        set (RBT_Impl.keys r1) - set (RBT_Impl.keys r2)"
+        using minus_r1_r2(1) 1(5) rbt_lookup_iff_keys(3) rbt_split_t1_props
+        by (auto simp: Branch rbt_lookup_iff_keys(1) minus_r1_r2(2) split: option.splits)
+      have rbt_lookup_join2: "rbt_lookup (rbt_join2 (rbt_minus_rec l1 l2) (rbt_minus_rec r1 r2)) k =
+        (case rbt_lookup (rbt_minus_rec l1 l2) k of None \<Rightarrow> rbt_lookup (rbt_minus_rec r1 r2) k
+        | Some v \<Rightarrow> Some v)" for k
+        using rbt_lookup_rbt_join2[OF minus_l1_l2(1) minus_r1_r2(1)] rbt_split_t1_props
+        by (fastforce simp: minus_l1_l2_keys minus_r1_r2_keys)
+      have lookup_l1_r1_a: "rbt_lookup l1 a = None" "rbt_lookup r1 a = None"
+        using rbt_split_t1_props
+        by (auto simp: rbt_lookup_iff_keys(2))
+      have "rbt_lookup (rbt_minus_rec t1 t2) k =
+        (case rbt_lookup t1 k of None \<Rightarrow> None
+        | Some v \<Rightarrow> (case rbt_lookup t2 k of None \<Rightarrow> Some v | _ \<Rightarrow> None))"
+        using not_small rbt_lookup_iff_keys(2)[of l1] rbt_lookup_iff_keys(3)[of l1]
+          rbt_lookup_iff_keys(3)[of r1] rbt_split_t1_props
+        by (auto simp: rbt_minus_rec.simps[of t1] Branch rbt_split_t1 rbt_lookup_join2
+            minus_l1_l2(2) minus_r1_r2(2) rbt_split_lookup[OF rbt_split_t1 1(4)] lookup_l1_r1_a
+            split: option.splits)
+      moreover have "rbt_sorted (rbt_minus_rec t1 t2)"
+        using not_small minus_l1_l2(1) minus_r1_r2(1) rbt_split_t1_props rbt_sorted_rbt_join2
+        by (fastforce simp: rbt_minus_rec.simps[of t1] Branch rbt_split_t1 minus_l1_l2_keys minus_r1_r2_keys)
+      ultimately have ?thesis
+        by (auto split: if_splits split: option.splits)
+    }
+    then show ?thesis
+      using fold_rbt_delete[OF 1(3,4,5)] rbtreeify_filter_minus[OF 1(4)]
+      by (auto simp: rbt_minus_rec.simps[of t1])
+  qed
+qed
+
+end
+
+context ord begin
+
+definition rbt_union_with_key :: "('a \<Rightarrow> 'b \<Rightarrow> 'b \<Rightarrow> 'b) \<Rightarrow> ('a, 'b) rbt \<Rightarrow> ('a, 'b) rbt \<Rightarrow> ('a, 'b) rbt"
+where
+  "rbt_union_with_key f t1 t2 = paint B (rbt_union_swap_rec f False t1 t2)"
+
+definition rbt_union_with where
+  "rbt_union_with f = rbt_union_with_key (\<lambda>_. f)"
+
+definition rbt_union where
+  "rbt_union = rbt_union_with_key (%_ _ rv. rv)"
+
+definition rbt_inter_with_key :: "('a \<Rightarrow> 'b \<Rightarrow> 'b \<Rightarrow> 'b) \<Rightarrow> ('a, 'b) rbt \<Rightarrow> ('a, 'b) rbt \<Rightarrow> ('a, 'b) rbt"
+where
+  "rbt_inter_with_key f t1 t2 = paint B (rbt_inter_swap_rec f False t1 t2)"
+
+definition rbt_inter_with where
+  "rbt_inter_with f = rbt_inter_with_key (\<lambda>_. f)"
+
+definition rbt_inter where
+  "rbt_inter = rbt_inter_with_key (\<lambda>_ _ rv. rv)"
+
+definition rbt_minus where
+  "rbt_minus t1 t2 = paint B (rbt_minus_rec t1 t2)"
+
+end
+
+context linorder begin
+
 lemma is_rbt_rbt_unionwk [simp]:
   "\<lbrakk> is_rbt t1; is_rbt t2 \<rbrakk> \<Longrightarrow> is_rbt (rbt_union_with_key f t1 t2)"
-by(simp add: rbt_union_with_key_def Let_def is_rbt_fold_rbt_insertwk is_rbt_rbtreeify rbt_sorted_entries distinct_entries split: compare.split)
+  using rbt_union_rec rbt_lookup_union_rec
+  by (fastforce simp: rbt_union_with_key_def rbt_union_swap_rec is_rbt_def inv_12_def)
 
 lemma rbt_lookup_rbt_unionwk:
   "\<lbrakk> rbt_sorted t1; rbt_sorted t2 \<rbrakk> 
@@ -1949,7 +2941,8 @@
   (case rbt_lookup t1 k of None \<Rightarrow> rbt_lookup t2 k 
    | Some v \<Rightarrow> case rbt_lookup t2 k of None \<Rightarrow> Some v 
               | Some w \<Rightarrow> Some (f k v w))"
-by(auto simp add: rbt_union_with_key_def Let_def rbt_lookup_fold_rbt_insertwk rbt_sorted_entries distinct_entries map_of_sunion_with map_of_entries rbt_lookup_rbtreeify split: option.split compare.split)
+  using rbt_lookup_union_rec
+  by (auto simp: rbt_union_with_key_def rbt_union_swap_rec)
 
 lemma rbt_unionw_is_rbt: "\<lbrakk> is_rbt lt; is_rbt rt \<rbrakk> \<Longrightarrow> is_rbt (rbt_union_with f lt rt)"
 by(simp add: rbt_union_with_def)
@@ -1963,15 +2956,16 @@
 by(rule ext)(simp add: rbt_lookup_rbt_unionwk rbt_union_def map_add_def split: option.split)
 
 lemma rbt_interwk_is_rbt [simp]:
-  "\<lbrakk> rbt_sorted t1; rbt_sorted t2 \<rbrakk> \<Longrightarrow> is_rbt (rbt_inter_with_key f t1 t2)"
-by(auto simp add: rbt_inter_with_key_def Let_def map_map_filter split_def o_def option.map_comp map_filter_map_option_const sorted_filter[where f=id, simplified] rbt_sorted_entries distinct_entries intro: is_rbt_rbtreeify split: compare.split)
+  "\<lbrakk> is_rbt t1; is_rbt t2 \<rbrakk> \<Longrightarrow> is_rbt (rbt_inter_with_key f t1 t2)"
+  using rbt_inter_rec rbt_lookup_inter_rec
+  by (fastforce simp: rbt_inter_with_key_def rbt_inter_swap_rec is_rbt_def inv_12_def rbt_sorted_paint)
 
 lemma rbt_interw_is_rbt:
-  "\<lbrakk> rbt_sorted t1; rbt_sorted t2 \<rbrakk> \<Longrightarrow> is_rbt (rbt_inter_with f t1 t2)"
+  "\<lbrakk> is_rbt t1; is_rbt t2 \<rbrakk> \<Longrightarrow> is_rbt (rbt_inter_with f t1 t2)"
 by(simp add: rbt_inter_with_def)
 
 lemma rbt_inter_is_rbt:
-  "\<lbrakk> rbt_sorted t1; rbt_sorted t2 \<rbrakk> \<Longrightarrow> is_rbt (rbt_inter t1 t2)"
+  "\<lbrakk> is_rbt t1; is_rbt t2 \<rbrakk> \<Longrightarrow> is_rbt (rbt_inter t1 t2)"
 by(simp add: rbt_inter_def)
 
 lemma rbt_lookup_rbt_interwk:
@@ -1980,13 +2974,26 @@
   (case rbt_lookup t1 k of None \<Rightarrow> None 
    | Some v \<Rightarrow> case rbt_lookup t2 k of None \<Rightarrow> None
                | Some w \<Rightarrow> Some (f k v w))"
-by(auto 4 3 simp add: rbt_inter_with_key_def Let_def map_of_entries[symmetric] rbt_lookup_rbtreeify map_map_filter split_def o_def option.map_comp map_filter_map_option_const sorted_filter[where f=id, simplified] rbt_sorted_entries distinct_entries map_of_sinter_with map_of_eq_None_iff set_map_filter split: option.split compare.split intro: rev_image_eqI dest: rbt_sorted_entries_right_unique)
+  using rbt_lookup_inter_rec
+  by (auto simp: rbt_inter_with_key_def rbt_inter_swap_rec)
 
 lemma rbt_lookup_rbt_inter:
   "\<lbrakk> rbt_sorted t1; rbt_sorted t2 \<rbrakk>
   \<Longrightarrow> rbt_lookup (rbt_inter t1 t2) = rbt_lookup t2 |` dom (rbt_lookup t1)"
 by(auto simp add: rbt_inter_def rbt_lookup_rbt_interwk restrict_map_def split: option.split)
 
+lemma rbt_minus_is_rbt:
+  "\<lbrakk> is_rbt t1; is_rbt t2 \<rbrakk> \<Longrightarrow> is_rbt (rbt_minus t1 t2)"
+  using rbt_minus_rec[of t1 t2] rbt_lookup_minus_rec[of t1 t2]
+  by (auto simp: rbt_minus_def is_rbt_def inv_12_def)
+
+lemma rbt_lookup_rbt_minus:
+  "\<lbrakk> is_rbt t1; is_rbt t2 \<rbrakk>
+  \<Longrightarrow> rbt_lookup (rbt_minus t1 t2) = rbt_lookup t1 |` (- dom (rbt_lookup t2))"
+  by (rule ext)
+     (auto simp: rbt_minus_def is_rbt_def inv_12_def restrict_map_def rbt_lookup_minus_rec
+      split: option.splits)
+
 end
 
 
@@ -2006,14 +3013,19 @@
   ord.rbt_del_from_right.simps
   ord.rbt_del.simps
   ord.rbt_delete_def
-  ord.sunion_with.simps
-  ord.sinter_with.simps
+  ord.rbt_split.simps
+  ord.rbt_union_swap_rec.simps
+  ord.map_filter_inter_def
+  ord.rbt_inter_swap_rec.simps
+  ord.filter_minus_def
+  ord.rbt_minus_rec.simps
   ord.rbt_union_with_key_def
   ord.rbt_union_with_def
   ord.rbt_union_def
   ord.rbt_inter_with_key_def
   ord.rbt_inter_with_def
   ord.rbt_inter_def
+  ord.rbt_minus_def
   ord.rbt_map_entry.simps
   ord.rbt_bulkload_def
 
@@ -2070,6 +3082,6 @@
      (\<^const_name>\<open>rbt_bulkload\<close>, SOME \<^typ>\<open>('a \<times> 'b) list \<Rightarrow> ('a::linorder,'b) rbt\<close>)]
 \<close>
 
-hide_const (open) R B Empty entries keys fold gen_keys gen_entries
+hide_const (open) MR MB R B Empty entries keys fold gen_keys gen_entries
 
 end
--- a/src/HOL/Library/Sublist.thy	Sat Feb 20 13:49:24 2021 +0100
+++ b/src/HOL/Library/Sublist.thy	Sat Feb 20 17:04:26 2021 +0100
@@ -476,40 +476,19 @@
   obtains "prefix xs ys" | "strict_prefix ys xs" | "xs \<parallel> ys"
   unfolding parallel_def strict_prefix_def by blast
 
+lemma parallel_cancel:  "a#xs \<parallel> a#ys \<Longrightarrow> xs \<parallel> ys"
+  by (simp add: parallel_def)
+
 theorem parallel_decomp:
   "xs \<parallel> ys \<Longrightarrow> \<exists>as b bs c cs. b \<noteq> c \<and> xs = as @ b # bs \<and> ys = as @ c # cs"
-proof (induct xs rule: rev_induct)
-  case Nil
-  then have False by auto
-  then show ?case ..
-next
-  case (snoc x xs)
-  show ?case
-  proof (rule prefix_cases)
-    assume le: "prefix xs ys"
-    then obtain ys' where ys: "ys = xs @ ys'" ..
-    show ?thesis
-    proof (cases ys')
-      assume "ys' = []"
-      then show ?thesis by (metis append_Nil2 parallelE prefixI snoc.prems ys)
-    next
-      fix c cs assume ys': "ys' = c # cs"
-      have "x \<noteq> c" using snoc.prems ys ys' by fastforce
-      thus "\<exists>as b bs c cs. b \<noteq> c \<and> xs @ [x] = as @ b # bs \<and> ys = as @ c # cs"
-        using ys ys' by blast
-    qed
-  next
-    assume "strict_prefix ys xs"
-    then have "prefix ys (xs @ [x])" by (simp add: strict_prefix_def)
-    with snoc have False by blast
-    then show ?thesis ..
-  next
-    assume "xs \<parallel> ys"
-    with snoc obtain as b bs c cs where neq: "(b::'a) \<noteq> c"
-      and xs: "xs = as @ b # bs" and ys: "ys = as @ c # cs"
-      by blast
-    from xs have "xs @ [x] = as @ b # (bs @ [x])" by simp
-    with neq ys show ?thesis by blast
+proof (induct rule: list_induct2', blast, force, force)
+  case (4 x xs y ys)
+  then show ?case
+  proof (cases "x \<noteq> y", blast)
+    assume "\<not> x \<noteq> y" hence "x = y" by blast
+    then show ?thesis
+      using "4.hyps"[OF parallel_cancel[OF "4.prems"[folded \<open>x = y\<close>]]]
+      by (meson Cons_eq_appendI)
   qed
 qed
 
--- a/src/HOL/Probability/Central_Limit_Theorem.thy	Sat Feb 20 13:49:24 2021 +0100
+++ b/src/HOL/Probability/Central_Limit_Theorem.thy	Sat Feb 20 17:04:26 2021 +0100
@@ -14,7 +14,6 @@
     and \<sigma> :: real
     and S :: "nat \<Rightarrow> 'a \<Rightarrow> real"
   assumes X_indep: "indep_vars (\<lambda>i. borel) X UNIV"
-    and X_integrable: "\<And>n. integrable M (X n)"
     and X_mean_0: "\<And>n. expectation (X n) = 0"
     and \<sigma>_pos: "\<sigma> > 0"
     and X_square_integrable: "\<And>n. integrable M (\<lambda>x. (X n x)\<^sup>2)"
@@ -27,8 +26,10 @@
   define \<phi> where "\<phi> n = char (distr M borel (?S' n))" for n
   define \<psi> where "\<psi> n t = char \<mu> (t / sqrt (\<sigma>\<^sup>2 * n))" for n t
 
-  have X_rv [simp, measurable]: "\<And>n. random_variable borel (X n)"
+  have X_rv [simp, measurable]: "random_variable borel (X n)" for n
     using X_indep unfolding indep_vars_def2 by simp
+  have X_integrable [simp, intro]: "integrable M (X n)" for n
+    by (rule square_integrable_imp_integrable[OF _ X_square_integrable]) simp_all
   interpret \<mu>: real_distribution \<mu>
     by (subst X_distrib [symmetric, of 0], rule real_distribution_distr, simp)
 
@@ -120,7 +121,6 @@
     and \<sigma> :: real
     and S :: "nat \<Rightarrow> 'a \<Rightarrow> real"
   assumes X_indep: "indep_vars (\<lambda>i. borel) X UNIV"
-    and X_integrable: "\<And>n. integrable M (X n)"
     and X_mean: "\<And>n. expectation (X n) = m"
     and \<sigma>_pos: "\<sigma> > 0"
     and X_square_integrable: "\<And>n. integrable M (\<lambda>x. (X n x)\<^sup>2)"
@@ -131,8 +131,12 @@
 proof (intro central_limit_theorem_zero_mean)
   show "indep_vars (\<lambda>i. borel) X' UNIV"
     unfolding X'_def[abs_def] using X_indep by (rule indep_vars_compose2) auto
-  show "integrable M (X' n)" "expectation (X' n) = 0" for n
-    using X_integrable X_mean by (auto simp: X'_def[abs_def] prob_space)
+  have X_rv [simp, measurable]: "random_variable borel (X n)" for n
+    using X_indep unfolding indep_vars_def2 by simp
+  have X_integrable [simp, intro]: "integrable M (X n)" for n
+    by (rule square_integrable_imp_integrable[OF _ X_square_integrable]) simp_all
+  show "expectation (X' n) = 0" for n
+    using X_mean by (auto simp: X'_def[abs_def] prob_space)
   show "\<sigma> > 0" "integrable M (\<lambda>x. (X' n x)\<^sup>2)" "variance (X' n) = \<sigma>\<^sup>2" for n
     using \<open>0 < \<sigma>\<close> X_integrable X_mean X_square_integrable X_variance unfolding X'_def
     by (auto simp: prob_space power2_diff)
--- a/src/HOL/Probability/Conditional_Expectation.thy	Sat Feb 20 13:49:24 2021 +0100
+++ b/src/HOL/Probability/Conditional_Expectation.thy	Sat Feb 20 17:04:26 2021 +0100
@@ -1291,7 +1291,7 @@
         finally have "\<bar>q y\<bar> > \<bar>q 0\<bar> - e" by auto
         then show ?thesis unfolding e_def by simp
       qed
-      have "emeasure M {x \<in> space M. \<bar>X x\<bar> < d} \<le> emeasure M ({x \<in> space M. 1 \<le> ennreal(1/e) * \<bar>q(X x)\<bar>} \<inter> space M)"
+      have "emeasure M {x \<in> space M. \<bar>X x\<bar> < d} \<le> emeasure M ({x \<in> space M. 1 \<le> ennreal(1/e) * \<bar>q(X x)\<bar>})"
         by (rule emeasure_mono, auto simp add: * \<open>e>0\<close> less_imp_le ennreal_mult''[symmetric])
       also have "... \<le> (1/e) * (\<integral>\<^sup>+x. ennreal(\<bar>q(X x)\<bar>) * indicator (space M) x \<partial>M)"
         by (rule nn_integral_Markov_inequality, auto)
@@ -1304,7 +1304,7 @@
 
       have "{x \<in> space M. \<bar>X x\<bar> \<ge> d} = {x \<in> space M. 1 \<le> ennreal(1/d) * \<bar>X x\<bar>} \<inter> space M"
         by (auto simp add: \<open>d>0\<close> ennreal_mult''[symmetric])
-      then have "emeasure M {x \<in> space M. \<bar>X x\<bar> \<ge> d} = emeasure M ({x \<in> space M. 1 \<le> ennreal(1/d) * \<bar>X x\<bar>} \<inter> space M)"
+      then have "emeasure M {x \<in> space M. \<bar>X x\<bar> \<ge> d} = emeasure M ({x \<in> space M. 1 \<le> ennreal(1/d) * \<bar>X x\<bar>})"
         by auto
       also have "... \<le> (1/d) * (\<integral>\<^sup>+x. ennreal(\<bar>X x\<bar>) * indicator (space M) x \<partial>M)"
         by (rule nn_integral_Markov_inequality, auto)
--- a/src/HOL/Probability/Giry_Monad.thy	Sat Feb 20 13:49:24 2021 +0100
+++ b/src/HOL/Probability/Giry_Monad.thy	Sat Feb 20 17:04:26 2021 +0100
@@ -2,15 +2,17 @@
     Author:     Johannes Hölzl, TU München
     Author:     Manuel Eberl, TU München
 
-Defines the subprobability spaces, the subprobability functor and the Giry monad on subprobability
+Defines subprobability spaces, the subprobability functor and the Giry monad on subprobability
 spaces.
 *)
 
+section \<open>The Giry monad\<close>
+
 theory Giry_Monad
   imports Probability_Measure "HOL-Library.Monad_Syntax"
 begin
 
-section \<open>Sub-probability spaces\<close>
+subsection \<open>Sub-probability spaces\<close>
 
 locale subprob_space = finite_measure +
   assumes emeasure_space_le_1: "emeasure M (space M) \<le> 1"
@@ -521,7 +523,7 @@
   qed
 qed
 
-section \<open>Properties of return\<close>
+subsection \<open>Properties of ``return''\<close>
 
 definition return :: "'a measure \<Rightarrow> 'a \<Rightarrow> 'a measure" where
   "return R x = measure_of (space R) (sets R) (\<lambda>A. indicator A x)"
@@ -757,7 +759,7 @@
   "sets M = sets (subprob_algebra N) \<Longrightarrow> space (select_sets M) = space N"
   by (intro sets_eq_imp_space_eq sets_select_sets)
 
-section \<open>Join\<close>
+subsection \<open>Join\<close>
 
 definition join :: "'a measure measure \<Rightarrow> 'a measure" where
   "join M = measure_of (space (select_sets M)) (sets (select_sets M)) (\<lambda>B. \<integral>\<^sup>+ M'. emeasure M' B \<partial>M)"
@@ -1784,4 +1786,28 @@
   by (subst bind_distr[OF _ measurable_compose[OF _ return_measurable]])
      (auto intro!: bind_return_distr')
 
+lemma (in prob_space) AE_eq_constD:
+  assumes "AE x in M. x = y"
+  shows   "M = return M y" "y \<in> space M"
+proof -
+  have "AE x in M. x \<in> space M"
+    by auto
+  with assms have "AE x in M. y \<in> space M"
+    by eventually_elim auto
+  thus "y \<in> space M"
+    by simp
+
+  show "M = return M y"
+  proof (rule measure_eqI)
+    fix X assume X: "X \<in> sets M"
+    have "AE x in M. (x \<in> X) = (x \<in> (if y \<in> X then space M else {}))"
+      using assms by eventually_elim (use X \<open>y \<in> space M\<close> in auto)
+    hence "emeasure M X = emeasure M (if y \<in> X then space M else {})"
+      using X by (intro emeasure_eq_AE) auto
+    also have "\<dots> = emeasure (return M y) X"
+      using X by (auto simp: emeasure_space_1)
+    finally show "emeasure M X = \<dots>" .
+  qed auto
+qed
+
 end
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/Probability/Hoeffding.thy	Sat Feb 20 17:04:26 2021 +0100
@@ -0,0 +1,923 @@
+(*
+  File:    Hoeffding.thy
+  Author:  Manuel Eberl, TU München
+*)
+section \<open>Hoeffding's Lemma and Hoeffding's Inequality\<close>
+theory Hoeffding
+  imports Product_PMF Independent_Family
+begin
+
+text \<open>
+  Hoeffding's inequality shows that a sum of bounded independent random variables is concentrated
+  around its mean, with an exponential decay of the tail probabilities.
+\<close>
+
+subsection \<open>Hoeffding's Lemma\<close>
+
+lemma convex_on_exp: 
+  fixes l :: real
+  assumes "l \<ge> 0"
+  shows   "convex_on UNIV (\<lambda>x. exp(l*x))"
+  using assms
+  by (intro convex_on_realI[where f' = "\<lambda>x. l * exp (l * x)"])
+     (auto intro!: derivative_eq_intros mult_left_mono)
+
+lemma mult_const_minus_self_real_le:
+  fixes x :: real
+  shows "x * (c - x) \<le> c\<^sup>2 / 4"
+proof -
+  have "x * (c - x) = -(x - c / 2)\<^sup>2 + c\<^sup>2 / 4"
+    by (simp add: field_simps power2_eq_square)
+  also have "\<dots> \<le> 0 + c\<^sup>2 / 4"
+    by (intro add_mono) auto
+  finally show ?thesis by simp
+qed
+
+lemma Hoeffdings_lemma_aux:
+  fixes h p :: real
+  assumes "h \<ge> 0" and "p \<ge> 0"
+  defines "L \<equiv> (\<lambda>h. -h * p + ln (1 + p * (exp h - 1)))"
+  shows   "L h \<le> h\<^sup>2 / 8"
+proof (cases "h = 0")
+  case False
+  hence h: "h > 0"
+    using \<open>h \<ge> 0\<close> by simp
+  define L' where "L' = (\<lambda>h. -p + p * exp h / (1 + p * (exp h - 1)))"
+  define L'' where "L'' = (\<lambda>h. -(p\<^sup>2) * exp h * exp h / (1 + p * (exp h - 1))\<^sup>2 +
+                              p * exp h / (1 + p * (exp h - 1)))"
+  define Ls where "Ls = (\<lambda>n. [L, L', L''] ! n)"
+
+  have [simp]: "L 0 = 0" "L' 0 = 0"
+    by (auto simp: L_def L'_def)
+
+  have L': "(L has_real_derivative L' x) (at x)" if "x \<in> {0..h}" for x
+  proof -
+    have "1 + p * (exp x - 1) > 0"
+      using \<open>p \<ge> 0\<close> that by (intro add_pos_nonneg mult_nonneg_nonneg) auto
+    thus ?thesis
+      unfolding L_def L'_def by (auto intro!: derivative_eq_intros)
+  qed
+
+  have L'': "(L' has_real_derivative L'' x) (at x)" if "x \<in> {0..h}" for x
+  proof -
+    have *: "1 + p * (exp x - 1) > 0"
+      using \<open>p \<ge> 0\<close> that by (intro add_pos_nonneg mult_nonneg_nonneg) auto
+    show ?thesis
+      unfolding L'_def L''_def
+      by (insert *, (rule derivative_eq_intros refl | simp)+) (auto simp: divide_simps; algebra)
+  qed
+
+  have diff: "\<forall>m t. m < 2 \<and> 0 \<le> t \<and> t \<le> h \<longrightarrow> (Ls m has_real_derivative Ls (Suc m) t) (at t)"
+    using L' L'' by (auto simp: Ls_def nth_Cons split: nat.splits)
+  from Taylor[of 2 Ls L 0 h 0 h, OF _ _ diff]
+    obtain t where t: "t \<in> {0<..<h}" "L h = L'' t * h\<^sup>2 / 2"
+      using \<open>h > 0\<close> by (auto simp: Ls_def lessThan_nat_numeral)
+  define u where "u = p * exp t / (1 + p * (exp t - 1))"
+
+  have "L'' t = u * (1 - u)"
+    by (simp add: L''_def u_def divide_simps; algebra)
+  also have "\<dots> \<le> 1 / 4"
+    using mult_const_minus_self_real_le[of u 1] by simp
+  finally have "L'' t \<le> 1 / 4" .
+
+  note t(2)
+  also have "L'' t * h\<^sup>2 / 2 \<le> (1 / 4) * h\<^sup>2 / 2"
+    using \<open>L'' t \<le> 1 / 4\<close> by (intro mult_right_mono divide_right_mono) auto
+  finally show "L h \<le> h\<^sup>2 / 8" by simp
+qed (auto simp: L_def)
+
+
+locale interval_bounded_random_variable = prob_space +
+  fixes f :: "'a \<Rightarrow> real" and a b :: real
+  assumes random_variable [measurable]: "random_variable borel f"
+  assumes AE_in_interval: "AE x in M. f x \<in> {a..b}"
+begin
+
+lemma integrable [intro]: "integrable M f"
+proof (rule integrable_const_bound)
+  show "AE x in M. norm (f x) \<le> max \<bar>a\<bar> \<bar>b\<bar>"
+    by (intro eventually_mono[OF AE_in_interval]) auto
+qed (fact random_variable)
+
+text \<open>
+  We first show Hoeffding's lemma for distributions whose expectation is 0. The general
+  case will easily follow from this later.
+\<close>
+lemma Hoeffdings_lemma_nn_integral_0:
+  assumes "l > 0" and E0: "expectation f = 0"
+  shows   "nn_integral M (\<lambda>x. exp (l * f x)) \<le> ennreal (exp (l\<^sup>2 * (b - a)\<^sup>2 / 8))"
+proof (cases "AE x in M. f x = 0")
+  case True
+  hence "nn_integral M (\<lambda>x. exp (l * f x)) = nn_integral M (\<lambda>x. ennreal 1)"
+    by (intro nn_integral_cong_AE) auto
+  also have "\<dots> = ennreal (expectation (\<lambda>_. 1))"
+    by (intro nn_integral_eq_integral) auto
+  finally show ?thesis by (simp add: prob_space)
+next
+  case False
+  have "a < 0"
+  proof (rule ccontr)
+    assume a: "\<not>(a < 0)"
+    have "AE x in M. f x = 0"
+    proof (subst integral_nonneg_eq_0_iff_AE [symmetric])
+      show "AE x in M. f x \<ge> 0"
+        using AE_in_interval by eventually_elim (use a in auto)
+    qed (use E0 in \<open>auto simp: id_def integrable\<close>)
+    with False show False by contradiction
+  qed
+
+  have "b > 0"
+  proof (rule ccontr)
+    assume b: "\<not>(b > 0)"
+    have "AE x in M. -f x = 0"
+    proof (subst integral_nonneg_eq_0_iff_AE [symmetric])
+      show "AE x in M. -f x \<ge> 0"
+        using AE_in_interval by eventually_elim (use b in auto)
+    qed (use E0 in \<open>auto simp: id_def integrable\<close>)
+    with False show False by simp
+  qed
+    
+  have "a < b"
+    using \<open>a < 0\<close> \<open>b > 0\<close> by linarith
+
+  define p where "p = -a / (b - a)"
+  define L where "L = (\<lambda>t. -t* p + ln (1 - p + p * exp t))"
+  define z where "z = l * (b - a)"
+  have "z > 0"
+    unfolding z_def using \<open>a < b\<close> \<open>l > 0\<close> by auto
+  have "p > 0"
+    using \<open>a < 0\<close> \<open>a < b\<close> unfolding p_def by (intro divide_pos_pos) auto
+
+  have "(\<integral>\<^sup>+x. exp (l * f x) \<partial>M) \<le>
+        (\<integral>\<^sup>+x. (b - f x) / (b - a) * exp (l * a) + (f x - a) / (b - a) * exp (l * b) \<partial>M)"
+  proof (intro nn_integral_mono_AE eventually_mono[OF AE_in_interval] ennreal_leI)
+    fix x assume x: "f x \<in> {a..b}"
+    define y where "y = (b - f x) / (b-a)"
+    have y: "y \<in> {0..1}"
+      using x \<open>a < b\<close> by (auto simp: y_def)
+    have conv: "convex_on UNIV (\<lambda>x. exp(l*x))"
+      using \<open>l > 0\<close> by (intro convex_on_exp) auto
+    have "exp (l * ((1 - y) *\<^sub>R b + y *\<^sub>R a)) \<le> (1 - y) * exp (l * b) + y * exp (l * a)"
+      using y \<open>l > 0\<close> by (intro convex_onD[OF convex_on_exp]) auto
+    also have "(1 - y) *\<^sub>R b + y *\<^sub>R a = f x"
+      using \<open>a < b\<close> by (simp add: y_def divide_simps) (simp add: algebra_simps)?
+    also have "1 - y = (f x - a) / (b - a)"
+      using \<open>a < b\<close> by (simp add: field_simps y_def)
+    finally show "exp (l * f x) \<le> (b - f x) / (b - a) * exp (l*a) + (f x - a)/(b-a) * exp (l*b)"
+      by (simp add: y_def)
+  qed
+  also have "\<dots> = (\<integral>\<^sup>+x. ennreal (b - f x) * exp (l * a) / (b - a) +
+                        ennreal (f x - a) * exp (l * b) / (b - a) \<partial>M)"
+    using \<open>a < 0\<close> \<open>b > 0\<close>
+    by (intro nn_integral_cong_AE eventually_mono[OF AE_in_interval])
+       (simp add: ennreal_plus ennreal_mult flip: divide_ennreal)
+  also have "\<dots> = ((\<integral>\<^sup>+ x. ennreal (b - f x) \<partial>M) * ennreal (exp (l * a)) +
+                   (\<integral>\<^sup>+ x. ennreal (f x - a) \<partial>M) * ennreal (exp (l * b))) / ennreal (b - a)"
+    by (simp add: nn_integral_add nn_integral_divide nn_integral_multc add_divide_distrib_ennreal)
+  also have "(\<integral>\<^sup>+ x. ennreal (b - f x) \<partial>M) = ennreal (expectation (\<lambda>x. b - f x))"
+    by (intro nn_integral_eq_integral Bochner_Integration.integrable_diff
+              eventually_mono[OF AE_in_interval] integrable_const integrable) auto
+  also have "expectation (\<lambda>x. b - f x) = b"
+    using assms by (subst Bochner_Integration.integral_diff) (auto simp: prob_space)
+  also have "(\<integral>\<^sup>+ x. ennreal (f x - a) \<partial>M) = ennreal (expectation (\<lambda>x. f x - a))"
+    by (intro nn_integral_eq_integral Bochner_Integration.integrable_diff
+              eventually_mono[OF AE_in_interval] integrable_const integrable) auto
+  also have "expectation (\<lambda>x. f x - a) = (-a)"
+    using assms by (subst Bochner_Integration.integral_diff) (auto simp: prob_space)
+  also have "(ennreal b * (exp (l * a)) + ennreal (-a) * (exp (l * b))) / (b - a) =
+             ennreal (b * exp (l * a) - a * exp (l * b)) / ennreal (b - a)"
+    using \<open>a < 0\<close> \<open>b > 0\<close>
+    by (simp flip: ennreal_mult ennreal_plus add: mult_nonpos_nonneg divide_ennreal mult_mono)
+  also have "b * exp (l * a) - a * exp (l * b) = exp (L z) * (b - a)"
+  proof -
+    have pos: "1 - p + p * exp z > 0"
+    proof -
+      have "exp z > 1" using \<open>l > 0\<close> and \<open>a < b\<close>
+        by (subst one_less_exp_iff) (auto simp: z_def intro!: mult_pos_pos)
+      hence "(exp z - 1) * p \<ge> 0"
+        unfolding p_def using \<open>a < 0\<close> and \<open>a < b\<close>
+        by (intro mult_nonneg_nonneg divide_nonneg_pos) auto
+      thus ?thesis
+        by (simp add: algebra_simps)
+    qed
+
+    have "exp (L z) * (b - a) = exp (-z * p) * (1 - p + p * exp z) * (b - a)"
+      using pos by (simp add: exp_add L_def exp_diff exp_minus divide_simps)
+    also have "\<dots> = b * exp (l * a) - a * exp (l * b)" using \<open>a < b\<close>
+      by (simp add: p_def z_def divide_simps) (simp add:  exp_diff algebra_simps)?
+    finally show ?thesis by simp
+  qed
+  also have "ennreal (exp (L z) * (b - a)) / ennreal (b - a) = ennreal (exp (L z))"
+    using \<open>a < b\<close> by (simp add: divide_ennreal)
+  also have "L z = -z * p + ln (1 + p * (exp z - 1))"
+    by (simp add: L_def algebra_simps)
+  also have "\<dots> \<le> z\<^sup>2 / 8"
+    unfolding L_def by (rule Hoeffdings_lemma_aux[where p = p]) (use \<open>z > 0\<close> \<open>p > 0\<close> in simp_all)
+  hence "ennreal (exp (-z * p + ln (1 + p * (exp z - 1)))) \<le> ennreal (exp (z\<^sup>2 / 8))"
+    by (intro ennreal_leI) auto
+  finally show ?thesis
+    by (simp add: z_def power_mult_distrib)
+qed
+
+context
+begin
+
+interpretation shift: interval_bounded_random_variable M "\<lambda>x. f x - \<mu>" "a - \<mu>" "b - \<mu>"
+  rewrites "b - \<mu> - (a - \<mu>) \<equiv> b - a"
+  by unfold_locales (auto intro!: eventually_mono[OF AE_in_interval])
+
+lemma expectation_shift: "expectation (\<lambda>x. f x - expectation f) = 0"
+  by (subst Bochner_Integration.integral_diff) (auto simp: integrable prob_space)
+
+lemmas Hoeffdings_lemma_nn_integral = shift.Hoeffdings_lemma_nn_integral_0[OF _ expectation_shift]
+
+end
+
+end
+
+
+
+subsection \<open>Hoeffding's Inequality\<close>
+
+text \<open>
+  Consider \<open>n\<close> independent real random variables $X_1, \ldots, X_n$ that each almost surely lie
+  in a compact interval $[a_i, b_i]$. Hoeffding's inequality states that the distribution of the
+  sum of the $X_i$ is tightly concentrated around the sum of the expected values: the probability
+  of it being above or below the sum of the expected values by more than some \<open>\<epsilon>\<close> decreases
+  exponentially with \<open>\<epsilon>\<close>.
+\<close>
+
+locale indep_interval_bounded_random_variables = prob_space +
+  fixes I :: "'b set" and X :: "'b \<Rightarrow> 'a \<Rightarrow> real"
+  fixes a b :: "'b \<Rightarrow> real"
+  assumes fin: "finite I"
+  assumes indep: "indep_vars (\<lambda>_. borel) X I"
+  assumes AE_in_interval: "\<And>i. i \<in> I \<Longrightarrow> AE x in M. X i x \<in> {a i..b i}"
+begin
+
+lemma random_variable [measurable]:
+  assumes i: "i \<in> I"
+  shows "random_variable borel (X i)"
+  using i indep unfolding indep_vars_def by blast
+
+lemma bounded_random_variable [intro]:
+  assumes i: "i \<in> I"
+  shows   "interval_bounded_random_variable M (X i) (a i) (b i)"
+  by unfold_locales (use AE_in_interval[OF i] i in auto)
+
+end
+
+
+locale Hoeffding_ineq = indep_interval_bounded_random_variables +
+  fixes \<mu> :: real
+  defines "\<mu> \<equiv> (\<Sum>i\<in>I. expectation (X i))"
+begin
+
+theorem%important Hoeffding_ineq_ge:
+  assumes "\<epsilon> \<ge> 0"
+  assumes "(\<Sum>i\<in>I. (b i - a i)\<^sup>2) > 0"
+  shows   "prob {x\<in>space M. (\<Sum>i\<in>I. X i x) \<ge> \<mu> + \<epsilon>} \<le> exp (-2 * \<epsilon>\<^sup>2 / (\<Sum>i\<in>I. (b i - a i)\<^sup>2))"
+proof (cases "\<epsilon> = 0")
+  case [simp]: True
+  have "prob {x\<in>space M. (\<Sum>i\<in>I. X i x) \<ge> \<mu> + \<epsilon>} \<le> 1"
+    by simp
+  thus ?thesis by simp
+next
+  case False
+  with \<open>\<epsilon> \<ge> 0\<close> have \<epsilon>: "\<epsilon> > 0"
+    by auto
+
+  define d where "d = (\<Sum>i\<in>I. (b i - a i)\<^sup>2)"
+  define l :: real where "l = 4 * \<epsilon> / d"
+  have d: "d > 0"
+    using assms by (simp add: d_def)
+  have l: "l > 0"
+    using \<epsilon> d by (simp add: l_def)
+  define \<mu>' where "\<mu>' = (\<lambda>i. expectation (X i))"
+
+  have "{x\<in>space M. (\<Sum>i\<in>I. X i x) \<ge> \<mu> + \<epsilon>} = {x\<in>space M. (\<Sum>i\<in>I. X i x) - \<mu> \<ge> \<epsilon>}"
+    by (simp add: algebra_simps)
+  hence "ennreal (prob {x\<in>space M. (\<Sum>i\<in>I. X i x) \<ge> \<mu> + \<epsilon>}) = emeasure M \<dots>"
+    by (simp add: emeasure_eq_measure)
+  also have "\<dots> \<le> ennreal (exp (-l*\<epsilon>)) * (\<integral>\<^sup>+x\<in>space M. exp (l * ((\<Sum>i\<in>I. X i x) - \<mu>)) \<partial>M)"
+    by (intro Chernoff_ineq_nn_integral_ge l) auto
+  also have "(\<lambda>x. (\<Sum>i\<in>I. X i x) - \<mu>) = (\<lambda>x. (\<Sum>i\<in>I. X i x - \<mu>' i))"
+    by (simp add: \<mu>_def sum_subtractf \<mu>'_def)
+  also have "(\<integral>\<^sup>+x\<in>space M. exp (l * ((\<Sum>i\<in>I. X i x - \<mu>' i))) \<partial>M) =
+             (\<integral>\<^sup>+x. (\<Prod>i\<in>I. ennreal (exp (l * (X i x - \<mu>' i)))) \<partial>M)"
+    by (intro nn_integral_cong)
+       (simp_all add: sum_distrib_left ring_distribs exp_diff exp_sum fin prod_ennreal)
+  also have "\<dots> = (\<Prod>i\<in>I. \<integral>\<^sup>+x. ennreal (exp (l * (X i x - \<mu>' i))) \<partial>M)"
+    by (intro indep_vars_nn_integral fin indep_vars_compose2[OF indep]) auto
+  also have "ennreal (exp (-l * \<epsilon>)) * \<dots> \<le>
+               ennreal (exp (-l * \<epsilon>)) * (\<Prod>i\<in>I. ennreal (exp (l\<^sup>2 * (b i - a i)\<^sup>2 / 8)))"
+  proof (intro mult_left_mono prod_mono_ennreal)
+    fix i assume i: "i \<in> I"
+    from i interpret interval_bounded_random_variable M "X i" "a i" "b i" ..
+    show "(\<integral>\<^sup>+x. ennreal (exp (l * (X i x - \<mu>' i))) \<partial>M) \<le> ennreal (exp (l\<^sup>2 * (b i - a i)\<^sup>2 / 8))"
+      unfolding \<mu>'_def by (rule Hoeffdings_lemma_nn_integral) fact+
+  qed auto
+  also have "\<dots> = ennreal (exp (-l*\<epsilon>) * (\<Prod>i\<in>I. exp (l\<^sup>2 * (b i - a i)\<^sup>2 / 8)))"
+    by (simp add: prod_ennreal prod_nonneg flip: ennreal_mult)
+  also have "exp (-l*\<epsilon>) * (\<Prod>i\<in>I. exp (l\<^sup>2 * (b i - a i)\<^sup>2 / 8)) = exp (d * l\<^sup>2 / 8 - l * \<epsilon>)"
+    by (simp add: exp_diff exp_minus sum_divide_distrib sum_distrib_left
+                  sum_distrib_right exp_sum fin divide_simps mult_ac d_def)
+  also have "d * l\<^sup>2 / 8 - l * \<epsilon> = -2 * \<epsilon>\<^sup>2 / d"
+    using d by (simp add: l_def field_simps power2_eq_square)
+  finally show ?thesis
+    by (subst (asm) ennreal_le_iff) (simp_all add: d_def)
+qed
+
+corollary Hoeffding_ineq_le:
+  assumes \<epsilon>: "\<epsilon> \<ge> 0"
+  assumes "(\<Sum>i\<in>I. (b i - a i)\<^sup>2) > 0"
+  shows   "prob {x\<in>space M. (\<Sum>i\<in>I. X i x) \<le> \<mu> - \<epsilon>} \<le> exp (-2 * \<epsilon>\<^sup>2 / (\<Sum>i\<in>I. (b i - a i)\<^sup>2))"
+proof -
+  interpret flip: Hoeffding_ineq M I "\<lambda>i x. -X i x" "\<lambda>i. -b i" "\<lambda>i. -a i" "-\<mu>"
+  proof unfold_locales
+    fix i assume "i \<in> I"
+    then interpret interval_bounded_random_variable M "X i" "a i" "b i" ..
+    show "AE x in M. - X i x \<in> {- b i..- a i}"
+      by (intro eventually_mono[OF AE_in_interval]) auto
+  qed (auto simp: fin \<mu>_def sum_negf intro: indep_vars_compose2[OF indep])
+
+  have "prob {x\<in>space M. (\<Sum>i\<in>I. X i x) \<le> \<mu> - \<epsilon>} = prob {x\<in>space M. (\<Sum>i\<in>I. -X i x) \<ge> -\<mu> + \<epsilon>}"
+    by (simp add: sum_negf algebra_simps)
+  also have "\<dots> \<le> exp (- 2 * \<epsilon>\<^sup>2 / (\<Sum>i\<in>I. (b i - a i)\<^sup>2))"
+    using flip.Hoeffding_ineq_ge[OF \<epsilon>] assms(2) by simp
+  finally show ?thesis .
+qed
+
+corollary Hoeffding_ineq_abs_ge:
+  assumes \<epsilon>: "\<epsilon> \<ge> 0"
+  assumes "(\<Sum>i\<in>I. (b i - a i)\<^sup>2) > 0"
+  shows   "prob {x\<in>space M. \<bar>(\<Sum>i\<in>I. X i x) - \<mu>\<bar> \<ge> \<epsilon>} \<le> 2 * exp (-2 * \<epsilon>\<^sup>2 / (\<Sum>i\<in>I. (b i - a i)\<^sup>2))"
+proof -
+  have "{x\<in>space M. \<bar>(\<Sum>i\<in>I. X i x) - \<mu>\<bar> \<ge> \<epsilon>} =
+        {x\<in>space M. (\<Sum>i\<in>I. X i x) \<ge> \<mu> + \<epsilon>} \<union> {x\<in>space M. (\<Sum>i\<in>I. X i x) \<le> \<mu> - \<epsilon>}"
+    by auto
+  also have "prob \<dots> \<le> prob {x\<in>space M. (\<Sum>i\<in>I. X i x) \<ge> \<mu> + \<epsilon>} +
+                       prob {x\<in>space M. (\<Sum>i\<in>I. X i x) \<le> \<mu> - \<epsilon>}"
+    by (intro measure_Un_le) auto
+  also have "\<dots> \<le> exp (-2 * \<epsilon>\<^sup>2 / (\<Sum>i\<in>I. (b i - a i)\<^sup>2)) + exp (-2 * \<epsilon>\<^sup>2 / (\<Sum>i\<in>I. (b i - a i)\<^sup>2))"
+    by (intro add_mono Hoeffding_ineq_ge Hoeffding_ineq_le assms)
+  finally show ?thesis by simp
+qed
+
+end
+
+
+subsection \<open>Hoeffding's inequality for i.i.d. bounded random variables\<close>
+
+text \<open>
+  If we have \<open>n\<close> even identically-distributed random variables, the statement of Hoeffding's
+  lemma simplifies a bit more: it shows that the probability that the average of the $X_i$
+  is more than \<open>\<epsilon>\<close> above the expected value is no greater than $e^{\frac{-2ny^2}{(b-a)^2}}$.
+
+  This essentially gives us a more concrete version of the weak law of large numbers: the law
+  states that the probability vanishes for \<open>n \<rightarrow> \<infinity>\<close> for any \<open>\<epsilon> > 0\<close>. Unlike Hoeffding's inequality,
+  it does not assume the variables to have bounded support, but it does not provide concrete bounds.
+\<close>
+
+locale iid_interval_bounded_random_variables = prob_space +
+  fixes I :: "'b set" and X :: "'b \<Rightarrow> 'a \<Rightarrow> real" and Y :: "'a \<Rightarrow> real"
+  fixes a b :: real
+  assumes fin: "finite I"
+  assumes indep: "indep_vars (\<lambda>_. borel) X I"
+  assumes distr_X: "i \<in> I \<Longrightarrow> distr M borel (X i) = distr M borel Y"
+  assumes rv_Y [measurable]: "random_variable borel Y"
+  assumes AE_in_interval: "AE x in M. Y x \<in> {a..b}"
+begin
+
+lemma random_variable [measurable]:
+  assumes i: "i \<in> I"
+  shows "random_variable borel (X i)"
+  using i indep unfolding indep_vars_def by blast
+
+sublocale X: indep_interval_bounded_random_variables M I X "\<lambda>_. a" "\<lambda>_. b"
+proof
+  fix i assume i: "i \<in> I"
+  have "AE x in M. Y x \<in> {a..b}"
+    by (fact AE_in_interval)
+  also have "?this \<longleftrightarrow> (AE x in distr M borel Y. x \<in> {a..b})"
+    by (subst AE_distr_iff) auto
+  also have "distr M borel Y = distr M borel (X i)"
+    using i by (simp add: distr_X)
+  also have "(AE x in \<dots>. x \<in> {a..b}) \<longleftrightarrow> (AE x in M. X i x \<in> {a..b})"
+    using i by (subst AE_distr_iff) auto
+  finally show "AE x in M. X i x \<in> {a..b}" .
+qed (simp_all add: fin indep)
+
+lemma expectation_X [simp]:
+  assumes i: "i \<in> I"
+  shows "expectation (X i) = expectation Y"
+proof -
+  have "expectation (X i) = lebesgue_integral (distr M borel (X i)) (\<lambda>x. x)"
+    using i by (intro integral_distr [symmetric]) auto
+  also have "distr M borel (X i) = distr M borel Y"
+    using i by (rule distr_X)
+  also have "lebesgue_integral \<dots> (\<lambda>x. x) = expectation Y"
+    by (rule integral_distr) auto
+  finally show "expectation (X i) = expectation Y" .
+qed
+
+end
+
+
+locale Hoeffding_ineq_iid = iid_interval_bounded_random_variables +
+  fixes \<mu> :: real
+  defines "\<mu> \<equiv> expectation Y"
+begin
+
+sublocale X: Hoeffding_ineq M I X "\<lambda>_. a" "\<lambda>_. b" "real (card I) * \<mu>"
+  by unfold_locales (simp_all add: \<mu>_def)
+
+corollary
+  assumes \<epsilon>: "\<epsilon> \<ge> 0"
+  assumes "a < b" "I \<noteq> {}"
+  defines "n \<equiv> card I"
+  shows   Hoeffding_ineq_ge:
+            "prob {x\<in>space M. (\<Sum>i\<in>I. X i x) \<ge> n * \<mu> + \<epsilon>} \<le>
+               exp (-2 * \<epsilon>\<^sup>2 / (n * (b - a)\<^sup>2))" (is ?le)
+    and   Hoeffding_ineq_le:
+            "prob {x\<in>space M. (\<Sum>i\<in>I. X i x) \<le> n * \<mu> - \<epsilon>} \<le>
+               exp (-2 * \<epsilon>\<^sup>2 / (n * (b - a)\<^sup>2))" (is ?ge)
+    and   Hoeffding_ineq_abs_ge:
+            "prob {x\<in>space M. \<bar>(\<Sum>i\<in>I. X i x) - n * \<mu>\<bar> \<ge> \<epsilon>} \<le>
+               2 * exp (-2 * \<epsilon>\<^sup>2 / (n * (b - a)\<^sup>2))" (is ?abs_ge)
+proof -
+  have pos: "(\<Sum>i\<in>I. (b - a)\<^sup>2) > 0"
+    using \<open>a < b\<close> \<open>I \<noteq> {}\<close> fin by (intro sum_pos) auto
+  show ?le
+    using X.Hoeffding_ineq_ge[OF \<epsilon> pos] by (simp add: n_def)
+  show ?ge
+    using X.Hoeffding_ineq_le[OF \<epsilon> pos] by (simp add: n_def)
+  show ?abs_ge
+    using X.Hoeffding_ineq_abs_ge[OF \<epsilon> pos] by (simp add: n_def)
+qed
+
+lemma 
+  assumes \<epsilon>: "\<epsilon> \<ge> 0"
+  assumes "a < b" "I \<noteq> {}"
+  defines "n \<equiv> card I"
+  shows   Hoeffding_ineq_ge':
+            "prob {x\<in>space M. (\<Sum>i\<in>I. X i x) / n \<ge> \<mu> + \<epsilon>} \<le>
+               exp (-2 * n * \<epsilon>\<^sup>2 / (b - a)\<^sup>2)" (is ?ge)
+    and   Hoeffding_ineq_le':
+            "prob {x\<in>space M. (\<Sum>i\<in>I. X i x) / n \<le> \<mu> - \<epsilon>} \<le>
+               exp (-2 * n * \<epsilon>\<^sup>2 / (b - a)\<^sup>2)" (is ?le)
+    and   Hoeffding_ineq_abs_ge':
+            "prob {x\<in>space M. \<bar>(\<Sum>i\<in>I. X i x) / n - \<mu>\<bar> \<ge> \<epsilon>} \<le>
+               2 * exp (-2 * n * \<epsilon>\<^sup>2 / (b - a)\<^sup>2)" (is ?abs_ge)
+proof -
+  have "n > 0"
+    using assms fin by (auto simp: field_simps)
+  have \<epsilon>': "\<epsilon> * n \<ge> 0"
+    using \<open>n > 0\<close> \<open>\<epsilon> \<ge> 0\<close> by auto
+  have eq: "- (2 * (\<epsilon> * real n)\<^sup>2 / (real (card I) * (b - a)\<^sup>2)) =
+            - (2 * real n * \<epsilon>\<^sup>2 / (b - a)\<^sup>2)"
+    using \<open>n > 0\<close> by (simp add: power2_eq_square divide_simps n_def)
+
+  have "{x\<in>space M. (\<Sum>i\<in>I. X i x) / n \<ge> \<mu> + \<epsilon>} =
+        {x\<in>space M. (\<Sum>i\<in>I. X i x) \<ge> \<mu> * n + \<epsilon> * n}"
+    using \<open>n > 0\<close> by (intro Collect_cong conj_cong refl) (auto simp: field_simps)
+  with Hoeffding_ineq_ge[OF \<epsilon>' \<open>a < b\<close> \<open>I \<noteq> {}\<close>] \<open>n > 0\<close> eq show ?ge
+    by (simp add: n_def mult_ac)
+
+  have "{x\<in>space M. (\<Sum>i\<in>I. X i x) / n \<le> \<mu> - \<epsilon>} =
+        {x\<in>space M. (\<Sum>i\<in>I. X i x) \<le> \<mu> * n - \<epsilon> * n}"
+    using \<open>n > 0\<close> by (intro Collect_cong conj_cong refl) (auto simp: field_simps)
+  with Hoeffding_ineq_le[OF \<epsilon>' \<open>a < b\<close> \<open>I \<noteq> {}\<close>] \<open>n > 0\<close> eq show ?le
+    by (simp add: n_def mult_ac)
+
+  have "{x\<in>space M. \<bar>(\<Sum>i\<in>I. X i x) / n - \<mu>\<bar> \<ge> \<epsilon>} =
+        {x\<in>space M. \<bar>(\<Sum>i\<in>I. X i x) - \<mu> * n\<bar> \<ge> \<epsilon> * n}"
+    using \<open>n > 0\<close> by (intro Collect_cong conj_cong refl) (auto simp: field_simps)
+  with Hoeffding_ineq_abs_ge[OF \<epsilon>' \<open>a < b\<close> \<open>I \<noteq> {}\<close>] \<open>n > 0\<close> eq show ?abs_ge
+    by (simp add: n_def mult_ac)
+qed
+
+end
+
+
+subsection \<open>Hoeffding's Inequality for the Binomial distribution\<close>
+
+text \<open>
+  We can now apply Hoeffding's inequality to the Binomial distribution, which can be seen
+  as the sum of \<open>n\<close> i.i.d. coin flips (the support of each of which is contained in $[0,1]$).
+\<close>
+
+locale binomial_distribution =
+  fixes n :: nat and p :: real
+  assumes p: "p \<in> {0..1}"
+begin
+
+context
+  fixes coins :: "(nat \<Rightarrow> bool) pmf" and \<mu>
+  assumes n: "n > 0"
+  defines "coins \<equiv> Pi_pmf {..<n} False (\<lambda>_. bernoulli_pmf p)"
+begin
+
+lemma coins_component:
+  assumes i: "i < n"
+  shows   "distr (measure_pmf coins) borel (\<lambda>f. if f i then 1 else 0) =
+             distr (measure_pmf (bernoulli_pmf p)) borel (\<lambda>b. if b then 1 else 0)"
+proof -
+  have "distr (measure_pmf coins) borel (\<lambda>f. if f i then 1 else 0) =
+        distr (measure_pmf (map_pmf (\<lambda>f. f i) coins)) borel (\<lambda>b. if b then 1 else 0)"
+    unfolding map_pmf_rep_eq by (subst distr_distr) (auto simp: o_def)
+  also have "map_pmf (\<lambda>f. f i) coins = bernoulli_pmf p"
+    unfolding coins_def using i by (subst Pi_pmf_component) auto
+  finally show ?thesis
+    unfolding map_pmf_rep_eq .
+qed
+
+lemma prob_binomial_pmf_conv_coins:
+  "measure_pmf.prob (binomial_pmf n p) {x. P (real x)} = 
+   measure_pmf.prob coins {x. P (\<Sum>i<n. if x i then 1 else 0)}"
+proof -
+  have eq1: "(\<Sum>i<n. if x i then 1 else 0) = real (card {i\<in>{..<n}. x i})" for x
+  proof -
+    have "(\<Sum>i<n. if x i then 1 else (0::real)) = (\<Sum>i\<in>{i\<in>{..<n}. x i}. 1)"
+      by (intro sum.mono_neutral_cong_right) auto
+    thus ?thesis by simp
+  qed
+  have eq2: "binomial_pmf n p = map_pmf (\<lambda>v. card {i\<in>{..<n}. v i}) coins"
+    unfolding coins_def by (rule binomial_pmf_altdef') (use p in auto)
+  show ?thesis
+    by (subst eq2) (simp_all add: eq1)
+qed
+
+interpretation Hoeffding_ineq_iid
+  coins "{..<n}" "\<lambda>i f. if f i then 1 else 0" "\<lambda>f. if f 0 then 1 else 0" 0 1 p
+proof unfold_locales
+  show "prob_space.indep_vars (measure_pmf coins) (\<lambda>_. borel) (\<lambda>i f. if f i then 1 else 0) {..<n}"
+    unfolding coins_def
+    by (intro prob_space.indep_vars_compose2[OF _ indep_vars_Pi_pmf])
+       (auto simp: measure_pmf.prob_space_axioms)
+next
+  have "measure_pmf.expectation coins (\<lambda>f. if f 0 then 1 else 0 :: real) =
+        measure_pmf.expectation (map_pmf (\<lambda>f. f 0) coins) (\<lambda>b. if b then 1 else 0 :: real)"
+    by (simp add: coins_def)
+  also have "map_pmf (\<lambda>f. f 0) coins = bernoulli_pmf p"
+    using n by (simp add: coins_def Pi_pmf_component)
+  also have "measure_pmf.expectation \<dots> (\<lambda>b. if b then 1 else 0) = p"
+    using p by simp
+  finally show "p \<equiv> measure_pmf.expectation coins (\<lambda>f. if f 0 then 1 else 0)" by simp
+qed (auto simp: coins_component)
+
+corollary
+  fixes \<epsilon> :: real
+  assumes \<epsilon>: "\<epsilon> \<ge> 0"
+  shows prob_ge: "measure_pmf.prob (binomial_pmf n p) {x. x \<ge> n * p + \<epsilon>} \<le> exp (-2 * \<epsilon>\<^sup>2 / n)"
+    and prob_le: "measure_pmf.prob (binomial_pmf n p) {x. x \<le> n * p - \<epsilon>} \<le> exp (-2 * \<epsilon>\<^sup>2 / n)"
+    and prob_abs_ge:
+          "measure_pmf.prob (binomial_pmf n p) {x. \<bar>x - n * p\<bar> \<ge> \<epsilon>} \<le> 2 * exp (-2 * \<epsilon>\<^sup>2 / n)"
+proof -
+  have [simp]: "{..<n} \<noteq> {}"
+    using n by auto
+  show "measure_pmf.prob (binomial_pmf n p) {x. x \<ge> n * p + \<epsilon>} \<le> exp (-2 * \<epsilon>\<^sup>2 / n)"
+    using Hoeffding_ineq_ge[of \<epsilon>] by (subst prob_binomial_pmf_conv_coins) (use assms in simp_all)
+  show "measure_pmf.prob (binomial_pmf n p) {x. x \<le> n * p - \<epsilon>} \<le> exp (-2 * \<epsilon>\<^sup>2 / n)"
+    using Hoeffding_ineq_le[of \<epsilon>] by (subst prob_binomial_pmf_conv_coins) (use assms in simp_all)
+  show "measure_pmf.prob (binomial_pmf n p) {x. \<bar>x - n * p\<bar> \<ge> \<epsilon>} \<le> 2 *  exp (-2 * \<epsilon>\<^sup>2 / n)"
+    using Hoeffding_ineq_abs_ge[of \<epsilon>]
+    by (subst prob_binomial_pmf_conv_coins) (use assms in simp_all)
+qed
+
+corollary
+  fixes \<epsilon> :: real
+  assumes \<epsilon>: "\<epsilon> \<ge> 0"
+  shows prob_ge': "measure_pmf.prob (binomial_pmf n p) {x. x / n \<ge> p + \<epsilon>} \<le> exp (-2 * n * \<epsilon>\<^sup>2)"
+    and prob_le': "measure_pmf.prob (binomial_pmf n p) {x. x / n \<le> p - \<epsilon>} \<le> exp (-2 * n * \<epsilon>\<^sup>2)"
+    and prob_abs_ge':
+          "measure_pmf.prob (binomial_pmf n p) {x. \<bar>x / n - p\<bar> \<ge> \<epsilon>} \<le> 2 * exp (-2 * n * \<epsilon>\<^sup>2)"
+proof -
+  have [simp]: "{..<n} \<noteq> {}"
+    using n by auto
+  show "measure_pmf.prob (binomial_pmf n p) {x. x / n \<ge> p + \<epsilon>} \<le> exp (-2 * n * \<epsilon>\<^sup>2)"
+    using Hoeffding_ineq_ge'[of \<epsilon>] by (subst prob_binomial_pmf_conv_coins) (use assms in simp_all)
+  show "measure_pmf.prob (binomial_pmf n p) {x. x / n \<le> p - \<epsilon>} \<le> exp (-2 * n * \<epsilon>\<^sup>2)"
+    using Hoeffding_ineq_le'[of \<epsilon>] by (subst prob_binomial_pmf_conv_coins) (use assms in simp_all)
+  show "measure_pmf.prob (binomial_pmf n p) {x. \<bar>x / n - p\<bar> \<ge> \<epsilon>} \<le> 2 * exp (-2 * n * \<epsilon>\<^sup>2)"
+    using Hoeffding_ineq_abs_ge'[of \<epsilon>]
+    by (subst prob_binomial_pmf_conv_coins) (use assms in simp_all)
+qed
+
+end
+
+end
+
+
+subsection \<open>Tail bounds for the negative binomial distribution\<close>
+
+text \<open>
+  Since the tail probabilities of a negative Binomial distribution are equal to the
+  tail probabilities of some Binomial distribution, we can obtain tail bounds for the
+  negative Binomial distribution through the Hoeffding tail bounds for the Binomial
+  distribution.
+\<close>
+
+context
+  fixes p q :: real
+  assumes p: "p \<in> {0<..<1}"
+  defines "q \<equiv> 1 - p"
+begin
+
+lemma prob_neg_binomial_pmf_ge_bound:
+  fixes n :: nat and k :: real
+  defines "\<mu> \<equiv> real n * q / p"
+  assumes k: "k \<ge> 0"
+  shows "measure_pmf.prob (neg_binomial_pmf n p) {x. real x \<ge> \<mu> + k}
+         \<le> exp (- 2 * p ^ 3 * k\<^sup>2 / (n + p * k))"
+proof -
+  consider "n = 0" | "p = 1" | "n > 0" "p \<noteq> 1"
+    by blast
+  thus ?thesis
+  proof cases
+    assume [simp]: "n = 0"
+    show ?thesis using k
+      by (simp add: indicator_def \<mu>_def)
+  next
+    assume [simp]: "p = 1"
+    show ?thesis using k
+      by (auto simp add: indicator_def \<mu>_def q_def)
+  next
+    assume n: "n > 0" and "p \<noteq> 1"
+    from \<open>p \<noteq> 1\<close> and p have p: "p \<in> {0<..<1}"
+      by auto
+    from p have q: "q \<in> {0<..<1}"
+      by (auto simp: q_def)
+
+    define k1 where "k1 = \<mu> + k"
+    have k1: "k1 \<ge> \<mu>"
+      using k by (simp add: k1_def)
+    have "k1 > 0"
+      by (rule less_le_trans[OF _ k1]) (use p n in \<open>auto simp: q_def \<mu>_def\<close>)
+  
+    define k1' where "k1' = nat (ceiling k1)"
+    have "\<mu> \<ge> 0" using p
+      by (auto simp: \<mu>_def q_def)
+    have "\<not>(x < k1') \<longleftrightarrow> real x \<ge> k1" for x
+      unfolding k1'_def by linarith
+    hence eq: "UNIV - {..<k1'} = {x. x \<ge> k1}"
+      by auto
+    hence "measure_pmf.prob (neg_binomial_pmf n p) {n. n \<ge> k1} =
+          1 - measure_pmf.prob (neg_binomial_pmf n p) {..<k1'}"
+      using measure_pmf.prob_compl[of "{..<k1'}" "neg_binomial_pmf n p"] by simp
+    also have "measure_pmf.prob (neg_binomial_pmf n p) {..<k1'} =
+               measure_pmf.prob (binomial_pmf (n + k1' - 1) q) {..<k1'}"
+      unfolding q_def using p by (intro prob_neg_binomial_pmf_lessThan) auto
+    also have "1 - \<dots> = measure_pmf.prob (binomial_pmf (n + k1' - 1) q) {n. n \<ge> k1}"
+      using measure_pmf.prob_compl[of "{..<k1'}" "binomial_pmf (n + k1' - 1) q"] eq by simp
+    also have "{x. real x \<ge> k1} = {x. x \<ge> real (n + k1' - 1) * q + (k1 - real (n + k1' - 1) * q)}"
+      by simp
+    also have "measure_pmf.prob (binomial_pmf (n + k1' - 1) q) \<dots> \<le>
+                 exp (-2 * (k1 - real (n + k1' - 1) * q)\<^sup>2 / real (n + k1' - 1))"
+    proof (rule binomial_distribution.prob_ge)
+      show "binomial_distribution q"
+        by unfold_locales (use q in auto)
+    next
+      show "n + k1' - 1 > 0"
+        using \<open>k1 > 0\<close> n unfolding k1'_def by linarith
+    next
+      have "real (n + nat \<lceil>k1\<rceil> - 1) \<le> real n + k1"
+        using \<open>k1 > 0\<close> by linarith
+      hence "real (n + k1' - 1) * q  \<le> (real n + k1) * q"
+        unfolding k1'_def by (intro mult_right_mono) (use p in \<open>simp_all add: q_def\<close>)
+      also have "\<dots> \<le> k1"
+        using k1 p by (simp add: q_def field_simps \<mu>_def)
+      finally show "0 \<le> k1 - real (n + k1' - 1) * q"
+        by simp
+    qed
+    also have "{x. real (n + k1' - 1) * q + (k1 - real (n + k1' - 1) * q) \<le> real x} = {x. real x \<ge> k1}"
+      by simp
+    also have "exp (-2 * (k1 - real (n + k1' - 1) * q)\<^sup>2 / real (n + k1' - 1)) \<le>
+               exp (-2 * (k1 - (n + k1) * q)\<^sup>2 / (n + k1))"
+    proof -
+      have "real (n + k1' - Suc 0) \<le> real n + k1"
+        unfolding k1'_def using \<open>k1 > 0\<close> by linarith
+      moreover have "(real n + k1) * q \<le> k1"
+        using k1 p by (auto simp: q_def field_simps \<mu>_def)
+      moreover have "1 < n + k1'"
+        using n \<open>k1 > 0\<close> unfolding k1'_def by linarith
+      ultimately have "2 * (k1 - real (n + k1' - 1) * q)\<^sup>2 / real (n + k1' - 1) \<ge>
+                       2 * (k1 - (n + k1) * q)\<^sup>2 / (n + k1)"
+        by (intro frac_le mult_left_mono power_mono mult_nonneg_nonneg mult_right_mono diff_mono)
+           (use q in simp_all)
+      thus ?thesis
+        by simp
+    qed
+    also have "\<dots> = exp (-2 * (p * k1 - q * n)\<^sup>2 / (k1 + n))"
+      by (simp add: q_def algebra_simps)
+    also have "-2 * (p * k1 - q * n)\<^sup>2 = -2 * p\<^sup>2 * (k1 - \<mu>)\<^sup>2"
+      using p by (auto simp: field_simps \<mu>_def)
+    also have "k1 - \<mu> = k"
+      by (simp add: k1_def \<mu>_def)
+    also note k1_def
+    also have "\<mu> + k + real n = real n / p + k"
+      using p by (simp add: \<mu>_def q_def field_simps)
+    also have "- 2 * p\<^sup>2 * k\<^sup>2 / (real n / p + k) = - 2 * p ^ 3 * k\<^sup>2 / (p * k + n)"
+      using p by (simp add: field_simps power3_eq_cube power2_eq_square)
+    finally show ?thesis by (simp add: add_ac)
+  qed
+qed
+
+lemma prob_neg_binomial_pmf_le_bound:
+  fixes n :: nat and k :: real
+  defines "\<mu> \<equiv> real n * q / p"
+  assumes k: "k \<ge> 0"
+  shows "measure_pmf.prob (neg_binomial_pmf n p) {x. real x \<le> \<mu> - k}
+         \<le> exp (-2 * p ^ 3 * k\<^sup>2 / (n - p * k))"
+proof -
+  consider "n = 0" | "p = 1" | "k > \<mu>" | "n > 0" "p \<noteq> 1" "k \<le> \<mu>"
+    by force
+  thus ?thesis
+  proof cases
+    assume [simp]: "n = 0"
+    show ?thesis using k
+      by (simp add: indicator_def \<mu>_def)
+  next
+    assume [simp]: "p = 1"
+    show ?thesis using k
+      by (auto simp add: indicator_def \<mu>_def q_def)
+  next
+    assume "k > \<mu>"
+    hence "{x. real x \<le> \<mu> - k} = {}"
+      by auto
+    thus ?thesis by simp
+  next
+    assume n: "n > 0" and "p \<noteq> 1" and "k \<le> \<mu>"
+    from \<open>p \<noteq> 1\<close> and p have p: "p \<in> {0<..<1}"
+      by auto
+    from p have q: "q \<in> {0<..<1}"
+      by (auto simp: q_def)
+
+    define f :: "real \<Rightarrow> real" where "f = (\<lambda>x. (p * x - q * n)\<^sup>2 / (x + n))"
+    have f_mono: "f x \<ge> f y" if "x \<ge> 0" "y \<le> n * q / p" "x \<le> y" for x y :: real
+      using that(3)
+    proof (rule DERIV_nonpos_imp_nonincreasing)
+      fix t assume t: "t \<ge> x" "t \<le> y"
+      have "x > -n"
+        using n \<open>x \<ge> 0\<close> by linarith
+      hence "(f has_field_derivative ((p * t - q * n) * (n * (1 + p) + p * t) / (n + t) ^ 2)) (at t)"
+        unfolding f_def using t
+        by (auto intro!: derivative_eq_intros simp: algebra_simps q_def power2_eq_square)
+      moreover {
+        have "p * t \<le> p * y"
+          using p by (intro mult_left_mono t) auto
+        also have "p * y \<le> q * n"
+          using \<open>y \<le> n * q / p\<close> p by (simp add: field_simps)
+        finally have "p * t \<le> q * n" .
+      }
+      hence "(p * t - q * n) * (n * (1 + p) + p * t) / (n + t) ^ 2 \<le> 0"
+        using p \<open>x \<ge> 0\<close> t
+        by (intro mult_nonpos_nonneg divide_nonpos_nonneg add_nonneg_nonneg mult_nonneg_nonneg) auto
+      ultimately show "\<exists>y. (f has_real_derivative y) (at t) \<and> y \<le> 0"
+        by blast
+    qed
+
+    define k1 where "k1 = \<mu> - k"
+    have k1: "k1 \<le> real n * q / p"
+      using assms by (simp add: \<mu>_def k1_def)
+    have "k1 \<ge> 0"
+      using k \<open>k \<le> \<mu>\<close> by (simp add: \<mu>_def k1_def)
+  
+    define k1' where "k1' = nat (floor k1)"
+    have "\<mu> \<ge> 0" using p
+      by (auto simp: \<mu>_def q_def)
+    have "(x \<le> k1') \<longleftrightarrow> real x \<le> k1" for x
+      unfolding k1'_def not_less using \<open>k1 \<ge> 0\<close> by linarith
+    hence eq: "{n. n \<le> k1}  = {..k1'}"
+      by auto
+    hence "measure_pmf.prob (neg_binomial_pmf n p) {n. n \<le> k1} =
+           measure_pmf.prob (neg_binomial_pmf n p) {..k1'}"
+      by simp
+    also have "measure_pmf.prob (neg_binomial_pmf n p) {..k1'} =
+               measure_pmf.prob (binomial_pmf (n + k1') q) {..k1'}"
+      unfolding q_def using p by (intro prob_neg_binomial_pmf_atMost) auto
+    also note eq [symmetric]
+    also have "{x. real x \<le> k1} = {x. x \<le> real (n + k1') * q - (real (n + k1') * q - real k1')}"
+      using eq by auto
+    also have "measure_pmf.prob (binomial_pmf (n + k1') q) \<dots> \<le>
+                 exp (-2 * (real (n + k1') * q - real k1')\<^sup>2 / real (n + k1'))"
+    proof (rule binomial_distribution.prob_le)
+      show "binomial_distribution q"
+        by unfold_locales (use q in auto)
+    next
+      show "n + k1' > 0"
+        using \<open>k1 \<ge> 0\<close> n unfolding k1'_def by linarith
+    next
+      have "p * k1' \<le> p * k1"
+        using p \<open>k1 \<ge> 0\<close> by (intro mult_left_mono) (auto simp: k1'_def)
+      also have "\<dots> \<le> q * n"
+        using k1 p by (simp add: field_simps)
+      finally show "0 \<le> real (n + k1') * q - real k1'"
+        by (simp add: algebra_simps q_def)
+    qed
+    also have "{x. real x \<le> real (n + k1') * q - (real (n + k1') * q - k1')} = {..k1'}"
+      by auto
+    also have "real (n + k1') * q - k1' = -(p * k1' - q * n)"
+      by (simp add: q_def algebra_simps)
+    also have "\<dots> ^ 2 = (p * k1' - q * n) ^ 2"
+      by algebra
+    also have "- 2 * (p * real k1' - q * real n)\<^sup>2 / real (n + k1') = -2 * f (real k1')"
+      by (simp add: f_def)
+    also have "f (real k1') \<ge> f k1"
+      by (rule f_mono) (use \<open>k1 \<ge> 0\<close> k1 in \<open>auto simp: k1'_def\<close>)
+    hence "exp (-2 * f (real k1')) \<le> exp (-2 * f k1)"
+      by simp
+    also have "\<dots> = exp (-2 * (p * k1 - q * n)\<^sup>2 / (k1 + n))"
+      by (simp add: f_def)
+
+    also have "-2 * (p * k1 - q * n)\<^sup>2 = -2 * p\<^sup>2 * (k1 - \<mu>)\<^sup>2"
+      using p by (auto simp: field_simps \<mu>_def)
+    also have "(k1 - \<mu>) ^ 2 = k ^ 2"
+      by (simp add: k1_def \<mu>_def)
+    also note k1_def
+    also have "\<mu> - k + real n = real n / p - k"
+      using p by (simp add: \<mu>_def q_def field_simps)
+    also have "- 2 * p\<^sup>2 * k\<^sup>2 / (real n / p - k) = - 2 * p ^ 3 * k\<^sup>2 / (n - p * k)"
+      using p by (simp add: field_simps power3_eq_cube power2_eq_square)
+    also have "{..k1'} = {x. real x \<le> \<mu> - k}"
+      using eq by (simp add: k1_def)
+    finally show ?thesis .
+  qed
+qed
+
+text \<open>
+  Due to the function $exp(-l/x)$ being concave for $x \geq \frac{l}{2}$, the above two
+  bounds can be combined into the following one for moderate values of \<open>k\<close>.
+  (cf. \<^url>\<open>https://math.stackexchange.com/questions/1565559\<close>)
+\<close>
+lemma prob_neg_binomial_pmf_abs_ge_bound:
+  fixes n :: nat and k :: real
+  defines "\<mu> \<equiv> real n * q / p"
+  assumes "k \<ge> 0" and n_ge: "n \<ge> p * k * (p\<^sup>2 * k + 1)"
+  shows "measure_pmf.prob (neg_binomial_pmf n p) {x. \<bar>real x - \<mu>\<bar> \<ge> k} \<le>
+           2 * exp (-2 * p ^ 3 * k ^ 2 / n)"
+proof (cases "k = 0")
+  case False
+  with \<open>k \<ge> 0\<close> have k: "k > 0"
+    by auto
+  define l :: real where "l = 2 * p ^ 3 * k ^ 2"
+  have l: "l > 0"
+    using p k by (auto simp: l_def)
+  define f :: "real \<Rightarrow> real" where "f = (\<lambda>x. exp (-l / x))"
+  define f' where "f' = (\<lambda>x. -l * exp (-l / x) / x ^ 2)"
+
+  have f'_mono: "f' x \<le> f' y" if "x \<ge> l / 2" "x \<le> y" for x y :: real
+    using that(2)
+  proof (rule DERIV_nonneg_imp_nondecreasing)
+    fix t assume t: "x \<le> t" "t \<le> y"
+    have "t > 0"
+      using that l t by auto
+    have "(f' has_field_derivative (l * (2 * t - l) / (exp (l / t) * t ^ 4))) (at t)"
+      unfolding f'_def using t that \<open>t > 0\<close>
+      by (auto intro!: derivative_eq_intros simp: field_simps exp_minus simp flip: power_Suc)
+    moreover have "l * (2 * t - l) / (exp (l / t) * t ^ 4) \<ge> 0"
+      using that t l by (intro divide_nonneg_pos mult_nonneg_nonneg) auto
+    ultimately show "\<exists>y. (f' has_real_derivative y) (at t) \<and> 0 \<le> y" by blast
+  qed
+
+  have convex: "convex_on {l/2..} (\<lambda>x. -f x)" unfolding f_def
+  proof (intro convex_on_realI[where f' = f'])
+    show "((\<lambda>x. - exp (- l / x)) has_real_derivative f' x) (at x)" if "x \<in> {l/2..}" for x
+      using that l
+      by (auto intro!: derivative_eq_intros simp: f'_def power2_eq_square algebra_simps)
+  qed (use l in \<open>auto intro!: f'_mono\<close>)
+
+  have eq: "{x. \<bar>real x - \<mu>\<bar> \<ge> k} = {x. real x \<le> \<mu> - k} \<union> {x. real x \<ge> \<mu> + k}"
+    by auto
+  have "measure_pmf.prob (neg_binomial_pmf n p) {x. \<bar>real x - \<mu>\<bar> \<ge> k} \<le>
+        measure_pmf.prob (neg_binomial_pmf n p) {x. real x \<le> \<mu> - k} +
+        measure_pmf.prob (neg_binomial_pmf n p) {x. real x \<ge> \<mu> + k}"
+    by (subst eq, rule measure_Un_le) auto
+  also have "\<dots> \<le> exp (-2 * p ^ 3 * k\<^sup>2 / (n - p * k)) + exp (-2 * p ^ 3 * k\<^sup>2 / (n + p * k))"
+    unfolding \<mu>_def
+    by (intro prob_neg_binomial_pmf_le_bound prob_neg_binomial_pmf_ge_bound add_mono \<open>k \<ge> 0\<close>)
+  also have "\<dots> = 2 * (1/2 * f (n - p * k) + 1/2 * f (n + p * k))"
+    by (simp add: f_def l_def)
+  also have "1/2 * f (n - p * k) + 1/2 * f (n + p * k) \<le> f (1/2 * (n - p * k) + 1/2 * (n + p * k))"
+  proof -
+    let ?x = "n - p * k" and ?y = "n + p * k"
+    have le1: "l / 2 \<le> ?x" using n_ge
+      by (simp add: l_def power2_eq_square power3_eq_cube algebra_simps)
+    also have "\<dots> \<le> ?y"
+      using p k by simp
+    finally have le2: "l / 2 \<le> ?y" .
+    have "-f ((1 - 1 / 2) *\<^sub>R ?x + (1 / 2) *\<^sub>R ?y) \<le> (1 - 1 / 2) * - f ?x + 1 / 2 * - f ?y"
+      using le1 le2 by (intro convex_onD[OF convex]) auto
+    thus ?thesis by simp
+  qed
+  also have "1/2 * (n - p * k) + 1/2 * (n + p * k) = n"
+    by (simp add: algebra_simps)
+  also have "2 * f n = 2 * exp (-l / n)"
+    by (simp add: f_def)
+  finally show ?thesis
+    by (simp add: l_def)
+qed auto
+
+end
+
+end
--- a/src/HOL/Probability/Independent_Family.thy	Sat Feb 20 13:49:24 2021 +0100
+++ b/src/HOL/Probability/Independent_Family.thy	Sat Feb 20 17:04:26 2021 +0100
@@ -1138,6 +1138,33 @@
   qed
 qed
 
+lemma (in prob_space) indep_vars_iff_distr_eq_PiM':
+  fixes I :: "'i set" and X :: "'i \<Rightarrow> 'a \<Rightarrow> 'b"
+  assumes "I \<noteq> {}"
+  assumes rv: "\<And>i. i \<in> I \<Longrightarrow> random_variable (M' i) (X i)"
+  shows "indep_vars M' X I \<longleftrightarrow>
+           distr M (\<Pi>\<^sub>M i\<in>I. M' i) (\<lambda>x. \<lambda>i\<in>I. X i x) = (\<Pi>\<^sub>M i\<in>I. distr M (M' i) (X i))"
+proof -
+  from assms obtain j where j: "j \<in> I"
+    by auto
+  define N' where "N' = (\<lambda>i. if i \<in> I then M' i else M' j)"
+  define Y where "Y = (\<lambda>i. if i \<in> I then X i else X j)"
+  have rv: "random_variable (N' i) (Y i)" for i
+    using j by (auto simp: N'_def Y_def intro: assms)
+
+  have "indep_vars M' X I = indep_vars N' Y I"
+    by (intro indep_vars_cong) (auto simp: N'_def Y_def)
+  also have "\<dots> \<longleftrightarrow> distr M (\<Pi>\<^sub>M i\<in>I. N' i) (\<lambda>x. \<lambda>i\<in>I. Y i x) = (\<Pi>\<^sub>M i\<in>I. distr M (N' i) (Y i))"
+    by (intro indep_vars_iff_distr_eq_PiM rv assms)
+  also have "(\<Pi>\<^sub>M i\<in>I. N' i) = (\<Pi>\<^sub>M i\<in>I. M' i)"
+    by (intro PiM_cong) (simp_all add: N'_def)
+  also have "(\<lambda>x. \<lambda>i\<in>I. Y i x) = (\<lambda>x. \<lambda>i\<in>I. X i x)"
+    by (simp_all add: Y_def fun_eq_iff)
+  also have "(\<Pi>\<^sub>M i\<in>I. distr M (N' i) (Y i)) = (\<Pi>\<^sub>M i\<in>I. distr M (M' i) (X i))"
+    by (intro PiM_cong distr_cong) (simp_all add: N'_def Y_def)
+  finally show ?thesis .
+qed
+
 lemma (in prob_space) indep_varD:
   assumes indep: "indep_var Ma A Mb B"
   assumes sets: "Xa \<in> sets Ma" "Xb \<in> sets Mb"
--- a/src/HOL/Probability/Infinite_Product_Measure.thy	Sat Feb 20 13:49:24 2021 +0100
+++ b/src/HOL/Probability/Infinite_Product_Measure.thy	Sat Feb 20 17:04:26 2021 +0100
@@ -392,4 +392,51 @@
 
 end
 
+lemma PiM_return:
+  assumes "finite I"
+  assumes [measurable]: "\<And>i. i \<in> I \<Longrightarrow> {a i} \<in> sets (M i)"
+  shows "PiM I (\<lambda>i. return (M i) (a i)) = return (PiM I M) (restrict a I)"
+proof -
+  have [simp]: "a i \<in> space (M i)" if "i \<in> I" for i
+    using assms(2)[OF that] by (meson insert_subset sets.sets_into_space)
+  interpret prob_space "PiM I (\<lambda>i. return (M i) (a i))"
+    by (intro prob_space_PiM prob_space_return) auto
+  have "AE x in PiM I (\<lambda>i. return (M i) (a i)). \<forall>i\<in>I. x i = restrict a I i"
+    by (intro eventually_ball_finite ballI AE_PiM_component prob_space_return assms)
+       (auto simp: AE_return)
+  moreover have "AE x in PiM I (\<lambda>i. return (M i) (a i)). x \<in> space (PiM I (\<lambda>i. return (M i) (a i)))"
+    by simp
+  ultimately have "AE x in PiM I (\<lambda>i. return (M i) (a i)). x = restrict a I"
+    by eventually_elim (auto simp: fun_eq_iff space_PiM)
+  hence "Pi\<^sub>M I (\<lambda>i. return (M i) (a i)) = return (Pi\<^sub>M I (\<lambda>i. return (M i) (a i))) (restrict a I)"
+    by (rule AE_eq_constD)
+  also have "\<dots> = return (PiM I M) (restrict a I)"
+    by (intro return_cong sets_PiM_cong) auto
+  finally show ?thesis .
+qed
+
+lemma distr_PiM_finite_prob_space':
+  assumes fin: "finite I"
+  assumes "\<And>i. i \<in> I \<Longrightarrow> prob_space (M i)"
+  assumes "\<And>i. i \<in> I \<Longrightarrow> prob_space (M' i)"
+  assumes [measurable]: "\<And>i. i \<in> I \<Longrightarrow> f \<in> measurable (M i) (M' i)"
+  shows   "distr (PiM I M) (PiM I M') (compose I f) = PiM I (\<lambda>i. distr (M i) (M' i) f)"
+proof -
+  define N where "N = (\<lambda>i. if i \<in> I then M i else return (count_space UNIV) undefined)"
+  define N' where "N' = (\<lambda>i. if i \<in> I then M' i else return (count_space UNIV) undefined)"
+  have [simp]: "PiM I N = PiM I M" "PiM I N' = PiM I M'"
+    by (intro PiM_cong; simp add: N_def N'_def)+
+
+  have "distr (PiM I N) (PiM I N') (compose I f) = PiM I (\<lambda>i. distr (N i) (N' i) f)"
+  proof (rule distr_PiM_finite_prob_space)
+    show "product_prob_space N"
+      by (rule product_prob_spaceI) (auto simp: N_def intro!: prob_space_return assms)
+    show "product_prob_space N'"
+      by (rule product_prob_spaceI) (auto simp: N'_def intro!: prob_space_return assms)
+  qed (auto simp: N_def N'_def fin)
+  also have "Pi\<^sub>M I (\<lambda>i. distr (N i) (N' i) f) = Pi\<^sub>M I (\<lambda>i. distr (M i) (M' i) f)"
+    by (intro PiM_cong) (simp_all add: N_def N'_def)
+  finally show ?thesis by simp
+qed
+
 end
--- a/src/HOL/Probability/Probability.thy	Sat Feb 20 13:49:24 2021 +0100
+++ b/src/HOL/Probability/Probability.thy	Sat Feb 20 17:04:26 2021 +0100
@@ -10,6 +10,8 @@
   Projective_Limit
   Random_Permutations
   SPMF
+  Product_PMF
+  Hoeffding
   Stream_Space
   Tree_Space
   Conditional_Expectation
--- a/src/HOL/Probability/Probability_Mass_Function.thy	Sat Feb 20 13:49:24 2021 +0100
+++ b/src/HOL/Probability/Probability_Mass_Function.thy	Sat Feb 20 17:04:26 2021 +0100
@@ -1,6 +1,7 @@
 (*  Title:      HOL/Probability/Probability_Mass_Function.thy
     Author:     Johannes Hölzl, TU München
     Author:     Andreas Lochbihler, ETH Zurich
+    Author:     Manuel Eberl, TU München
 *)
 
 section \<open> Probability mass function \<close>
@@ -529,6 +530,16 @@
   shows "integral\<^sup>L (map_pmf g p) f = integral\<^sup>L p (\<lambda>x. f (g x))"
   by (simp add: integral_distr map_pmf_rep_eq)
 
+lemma integrable_map_pmf_eq [simp]:
+  fixes g :: "'a \<Rightarrow> 'b::{banach, second_countable_topology}"
+  shows "integrable (map_pmf f p) g \<longleftrightarrow> integrable (measure_pmf p) (\<lambda>x. g (f x))"              
+  by (subst map_pmf_rep_eq, subst integrable_distr_eq) auto
+
+lemma integrable_map_pmf [intro]:
+  fixes g :: "'a \<Rightarrow> 'b::{banach, second_countable_topology}"
+  shows "integrable (measure_pmf p) (\<lambda>x. g (f x)) \<Longrightarrow> integrable (map_pmf f p) g"
+  by (subst integrable_map_pmf_eq)
+
 lemma pmf_abs_summable [intro]: "pmf p abs_summable_on A"
   by (rule abs_summable_on_subset[OF _ subset_UNIV])
      (auto simp:  abs_summable_on_def integrable_iff_bounded nn_integral_pmf)
@@ -669,6 +680,9 @@
   by (auto simp: pmf.rep_eq map_pmf_rep_eq measure_distr AE_measure_pmf_iff inj_onD
            intro!: measure_pmf.finite_measure_eq_AE)
 
+lemma pair_return_pmf [simp]: "pair_pmf (return_pmf x) (return_pmf y) = return_pmf (x, y)"
+  by (auto simp: pair_pmf_def bind_return_pmf)
+
 lemma pmf_map_inj': "inj f \<Longrightarrow> pmf (map_pmf f M) (f x) = pmf M x"
 apply(cases "x \<in> set_pmf M")
  apply(simp add: pmf_map_inj[OF subset_inj_on])
@@ -676,6 +690,28 @@
 apply(auto simp add: pmf_eq_0_set_pmf dest: injD)
 done
 
+lemma expectation_pair_pmf_fst [simp]:
+  fixes f :: "'a \<Rightarrow> 'b::{banach, second_countable_topology}"
+  shows "measure_pmf.expectation (pair_pmf p q) (\<lambda>x. f (fst x)) = measure_pmf.expectation p f"
+proof -
+  have "measure_pmf.expectation (pair_pmf p q) (\<lambda>x. f (fst x)) = 
+          measure_pmf.expectation (map_pmf fst (pair_pmf p q)) f" by simp
+  also have "map_pmf fst (pair_pmf p q) = p"
+    by (simp add: map_fst_pair_pmf)
+  finally show ?thesis .
+qed
+
+lemma expectation_pair_pmf_snd [simp]:
+  fixes f :: "'a \<Rightarrow> 'b::{banach, second_countable_topology}"
+  shows "measure_pmf.expectation (pair_pmf p q) (\<lambda>x. f (snd x)) = measure_pmf.expectation q f"
+proof -
+  have "measure_pmf.expectation (pair_pmf p q) (\<lambda>x. f (snd x)) = 
+          measure_pmf.expectation (map_pmf snd (pair_pmf p q)) f" by simp
+  also have "map_pmf snd (pair_pmf p q) = q"
+    by (simp add: map_snd_pair_pmf)
+  finally show ?thesis .
+qed
+
 lemma pmf_map_outside: "x \<notin> f ` set_pmf M \<Longrightarrow> pmf (map_pmf f M) x = 0"
   unfolding pmf_eq_0_set_pmf by simp
 
@@ -1711,9 +1747,108 @@
 
 end
 
+lemma geometric_pmf_1 [simp]: "geometric_pmf 1 = return_pmf 0"
+  by (intro pmf_eqI) (auto simp: indicator_def)
+
 lemma set_pmf_geometric: "0 < p \<Longrightarrow> p < 1 \<Longrightarrow> set_pmf (geometric_pmf p) = UNIV"
   by (auto simp: set_pmf_iff)
 
+lemma geometric_sums_times_n:
+  fixes c::"'a::{banach,real_normed_field}"
+  assumes "norm c < 1"
+  shows "(\<lambda>n. c^n * of_nat n) sums (c / (1 - c)\<^sup>2)"
+proof -
+  have "(\<lambda>n. c * z ^ n) sums (c / (1 - z))" if "norm z < 1" for z
+    using geometric_sums sums_mult that by fastforce
+  moreover have "((\<lambda>z. c / (1 - z)) has_field_derivative (c / (1 - c)\<^sup>2)) (at c)"
+      using assms by (auto intro!: derivative_eq_intros simp add: semiring_normalization_rules)
+  ultimately have "(\<lambda>n. diffs (\<lambda>n. c) n * c ^ n) sums (c / (1 - c)\<^sup>2)"
+    using assms by (intro termdiffs_sums_strong)
+  then have "(\<lambda>n. of_nat (Suc n) * c ^ (Suc n)) sums (c / (1 - c)\<^sup>2)"
+    unfolding diffs_def by (simp add: power_eq_if mult.assoc)
+  then show ?thesis
+    by (subst (asm) sums_Suc_iff) (auto simp add: mult.commute)
+qed
+
+lemma geometric_sums_times_norm:
+  fixes c::"'a::{banach,real_normed_field}"
+  assumes "norm c < 1"
+  shows "(\<lambda>n. norm (c^n * of_nat n)) sums (norm c / (1 - norm c)\<^sup>2)"
+proof -
+  have "norm (c^n * of_nat n) = (norm c) ^ n * of_nat n" for n::nat
+    by (simp add: norm_power norm_mult)
+  then show ?thesis
+    using geometric_sums_times_n[of "norm c"] assms
+    by force
+qed
+
+lemma integrable_real_geometric_pmf:
+  assumes "p \<in> {0<..1}"
+  shows   "integrable (geometric_pmf p) real"
+proof -
+  have "summable (\<lambda>x. p * ((1 - p) ^ x * real x))"
+    using geometric_sums_times_norm[of "1 - p"] assms
+    by (intro summable_mult) (auto simp: sums_iff)
+  hence "summable (\<lambda>x. (1 - p) ^ x * real x)"
+    by (rule summable_mult_D) (use assms in auto)
+  thus ?thesis
+    unfolding measure_pmf_eq_density using assms
+    by (subst integrable_density)
+       (auto simp: integrable_count_space_nat_iff mult_ac)
+qed
+
+lemma expectation_geometric_pmf:
+  assumes "p \<in> {0<..1}"
+  shows   "measure_pmf.expectation (geometric_pmf p) real = (1 - p) / p"
+proof -
+  have "(\<lambda>n. p * ((1 - p) ^ n * n)) sums (p * ((1 - p) / p^2))"
+    using assms geometric_sums_times_n[of "1-p"] by (intro sums_mult) auto
+  moreover have "(\<lambda>n. p * ((1 - p) ^ n * n)) = (\<lambda>n. (1 - p) ^ n * p  * real n)"
+    by auto
+  ultimately have *: "(\<lambda>n. (1 - p) ^ n * p  * real n) sums ((1 - p) / p)"
+    using assms sums_subst by (auto simp add: power2_eq_square)
+  have "measure_pmf.expectation (geometric_pmf p) real =
+        (\<integral>n. pmf (geometric_pmf p) n * real n \<partial>count_space UNIV)"
+    unfolding measure_pmf_eq_density by (subst integral_density) auto
+  also have "integrable (count_space UNIV) (\<lambda>n. pmf (geometric_pmf p) n * real n)"
+    using * assms unfolding integrable_count_space_nat_iff by (simp add: sums_iff)
+  hence "(\<integral>n. pmf (geometric_pmf p) n * real n \<partial>count_space UNIV) = (1 - p) / p"
+    using * assms by (subst integral_count_space_nat) (simp_all add: sums_iff)
+  finally show ?thesis by auto
+qed
+
+lemma geometric_bind_pmf_unfold:
+  assumes "p \<in> {0<..1}"
+  shows "geometric_pmf p =
+     do {b \<leftarrow> bernoulli_pmf p;
+         if b then return_pmf 0 else map_pmf Suc (geometric_pmf p)}"
+proof -
+  have *: "(Suc -` {i}) = (if i = 0 then {} else {i - 1})" for i
+    by force
+  have "pmf (geometric_pmf p) i =
+        pmf (bernoulli_pmf p \<bind>
+            (\<lambda>b. if b then return_pmf 0 else map_pmf Suc (geometric_pmf p)))
+            i" for i
+  proof -
+    have "pmf (geometric_pmf p) i =
+          (if i = 0 then p else (1 - p) * pmf (geometric_pmf p) (i - 1))"
+      using assms by (simp add: power_eq_if)
+    also have "\<dots> = (if i = 0  then p else (1 - p) * pmf (map_pmf Suc (geometric_pmf p)) i)"
+      by (simp add: pmf_map indicator_def measure_pmf_single *)
+    also have "\<dots> = measure_pmf.expectation (bernoulli_pmf p)
+          (\<lambda>x. pmf (if x then return_pmf 0 else map_pmf Suc (geometric_pmf p)) i)"
+      using assms by (auto simp add: pmf_map *)
+    also have "\<dots> = pmf (bernoulli_pmf p \<bind>
+                   (\<lambda>b. if b then return_pmf 0 else map_pmf Suc (geometric_pmf p)))
+                   i"
+      by (auto simp add: pmf_bind)
+    finally show ?thesis .
+  qed
+  then show ?thesis
+    using pmf_eqI by blast
+qed
+
+
 subsubsection \<open> Uniform Multiset Distribution \<close>
 
 context
@@ -1945,6 +2080,23 @@
 lemma set_pmf_binomial[simp]: "0 < p \<Longrightarrow> p < 1 \<Longrightarrow> set_pmf (binomial_pmf n p) = {..n}"
   by (simp add: set_pmf_binomial_eq)
 
+lemma finite_set_pmf_binomial_pmf [intro]: "p \<in> {0..1} \<Longrightarrow> finite (set_pmf (binomial_pmf n p))"
+  by (subst set_pmf_binomial_eq) auto
+
+lemma expectation_binomial_pmf':
+  fixes f :: "nat \<Rightarrow> 'a :: {banach, second_countable_topology}"
+  assumes p: "p \<in> {0..1}"
+  shows   "measure_pmf.expectation (binomial_pmf n p) f =
+             (\<Sum>k\<le>n. (real (n choose k) * p ^ k * (1 - p) ^ (n - k)) *\<^sub>R f k)"
+  using p by (subst integral_measure_pmf[where A = "{..n}"])
+             (auto simp: set_pmf_binomial_eq split: if_splits)
+
+lemma integrable_binomial_pmf [simp, intro]:
+  fixes f :: "nat \<Rightarrow> 'a :: {banach, second_countable_topology}"
+  assumes p: "p \<in> {0..1}"
+  shows "integrable (binomial_pmf n p) f"
+  by (rule integrable_measure_pmf_finite) (use assms in auto)
+
 context includes lifting_syntax
 begin
 
@@ -2010,6 +2162,222 @@
         bind_return_pmf' binomial_pmf_0 intro!: bind_pmf_cong)
 
 
+subsection \<open>Negative Binomial distribution\<close>
+
+text \<open>
+  The negative binomial distribution counts the number of times a weighted coin comes up
+  tails before having come up heads \<open>n\<close> times. In other words: how many failures do we see before
+  seeing the \<open>n\<close>-th success?
+
+  An alternative view is that the negative binomial distribution is the sum of \<open>n\<close> i.i.d.
+  geometric variables (this is the definition that we use).
+
+  Note that there are sometimes different conventions for this distributions in the literature;
+  for instance, sometimes the number of \<^emph>\<open>attempts\<close> is counted instead of the number of failures.
+  This only shifts the entire distribution by a constant number and is thus not a big difference.
+  I think that the convention we use is the most natural one since the support of the distribution
+  starts at 0, whereas for the other convention it starts at \<open>n\<close>.
+\<close>
+primrec neg_binomial_pmf :: "nat \<Rightarrow> real \<Rightarrow> nat pmf" where
+  "neg_binomial_pmf 0 p = return_pmf 0"
+| "neg_binomial_pmf (Suc n) p =
+     map_pmf (\<lambda>(x,y). (x + y)) (pair_pmf (geometric_pmf p) (neg_binomial_pmf n p))"
+
+lemma neg_binomial_pmf_Suc_0 [simp]: "neg_binomial_pmf (Suc 0) p = geometric_pmf p"
+  by (auto simp: pair_pmf_def bind_return_pmf map_pmf_def bind_assoc_pmf bind_return_pmf')
+
+lemmas neg_binomial_pmf_Suc [simp del] = neg_binomial_pmf.simps(2)
+
+lemma neg_binomial_prob_1 [simp]: "neg_binomial_pmf n 1 = return_pmf 0"
+  by (induction n) (simp_all add: neg_binomial_pmf_Suc)
+
+text \<open>
+  We can now show the aforementioned intuition about counting the failures before the
+  \<open>n\<close>-th success with the following recurrence:
+\<close>
+lemma neg_binomial_pmf_unfold:
+  assumes p: "p \<in> {0<..1}"
+  shows "neg_binomial_pmf (Suc n) p =
+           do {b \<leftarrow> bernoulli_pmf p;
+               if b then neg_binomial_pmf n p else map_pmf Suc (neg_binomial_pmf (Suc n) p)}"
+  (is "_ = ?rhs")
+  unfolding neg_binomial_pmf_Suc
+  by (subst geometric_bind_pmf_unfold[OF p])
+     (auto simp: map_pmf_def pair_pmf_def bind_assoc_pmf bind_return_pmf bind_return_pmf'
+           intro!: bind_pmf_cong)
+
+text \<open>
+  Next, we show an explicit formula for the probability mass function of the negative
+  binomial distribution:
+\<close>
+lemma pmf_neg_binomial:
+  assumes p: "p \<in> {0<..1}"
+  shows   "pmf (neg_binomial_pmf n p) k = real ((k + n - 1) choose k) * p ^ n * (1 - p) ^ k"
+proof (induction n arbitrary: k)
+  case 0
+  thus ?case using assms by (auto simp: indicator_def)
+next
+  case (Suc n)
+  show ?case
+  proof (cases "n = 0")
+    case True
+    thus ?thesis using assms by auto
+  next
+    case False
+    let ?f = "pmf (neg_binomial_pmf n p)"
+    have "pmf (neg_binomial_pmf (Suc n) p) k =
+            pmf (geometric_pmf p \<bind> (\<lambda>x. map_pmf ((+) x) (neg_binomial_pmf n p))) k"
+      by (auto simp: pair_pmf_def bind_return_pmf map_pmf_def bind_assoc_pmf neg_binomial_pmf_Suc)
+    also have "\<dots> = measure_pmf.expectation (geometric_pmf p) 
+                      (\<lambda>x. measure_pmf.prob (neg_binomial_pmf n p) ((+) x -` {k}))"
+      by (simp add: pmf_bind pmf_map)
+    also have "(\<lambda>x. (+) x -` {k}) = (\<lambda>x. if x \<le> k then {k - x} else {})"
+      by (auto simp: fun_eq_iff)
+    also have "(\<lambda>x. measure_pmf.prob (neg_binomial_pmf n p) (\<dots> x)) =
+               (\<lambda>x. if x \<le> k then ?f(k - x) else 0)"
+      by (auto simp: fun_eq_iff measure_pmf_single)
+    also have "measure_pmf.expectation (geometric_pmf p) \<dots> =
+                 (\<Sum>i\<le>k. pmf (neg_binomial_pmf n p) (k - i) * pmf (geometric_pmf p) i)"
+      by (subst integral_measure_pmf_real[where A = "{..k}"]) (auto split: if_splits)
+    also have "\<dots> = p^(n+1) * (1-p)^k * real (\<Sum>i\<le>k. (k - i + n - 1) choose (k - i))"
+      unfolding sum_distrib_left of_nat_sum
+    proof (intro sum.cong refl, goal_cases)
+      case (1 i)
+      have "pmf (neg_binomial_pmf n p) (k - i) * pmf (geometric_pmf p) i =
+              real ((k - i + n - 1) choose (k - i)) * p^(n+1) * ((1-p)^(k-i) * (1-p)^i)"
+        using assms Suc.IH by (simp add: mult_ac)
+      also have "(1-p)^(k-i) * (1-p)^i = (1-p)^k"
+        using 1 by (subst power_add [symmetric]) auto
+      finally show ?case by simp
+    qed
+    also have "(\<Sum>i\<le>k. (k - i + n - 1) choose (k - i)) = (\<Sum>i\<le>k. (n - 1 + i) choose i)"
+      by (intro sum.reindex_bij_witness[of _ "\<lambda>i. k - i" "\<lambda>i. k - i"]) 
+         (use \<open>n \<noteq> 0\<close> in \<open>auto simp: algebra_simps\<close>)
+    also have "\<dots> = (n + k) choose k"
+      by (subst sum_choose_lower) (use \<open>n \<noteq> 0\<close> in auto)
+    finally show ?thesis
+      by (simp add: add_ac)
+  qed
+qed
+
+(* TODO: Move? *)
+lemma gbinomial_0_left: "0 gchoose k = (if k = 0 then 1 else 0)"
+  by (cases k) auto
+
+text \<open>
+  The following alternative formula highlights why it is called `negative binomial distribution':
+\<close>
+lemma pmf_neg_binomial':
+  assumes p: "p \<in> {0<..1}"
+  shows   "pmf (neg_binomial_pmf n p) k = (-1) ^ k * ((-real n) gchoose k) * p ^ n * (1 - p) ^ k"
+proof (cases "n > 0")
+  case n: True
+  have "pmf (neg_binomial_pmf n p) k = real ((k + n - 1) choose k) * p ^ n * (1 - p) ^ k"
+    by (rule pmf_neg_binomial) fact+
+  also have "real ((k + n - 1) choose k) = ((real k + real n - 1) gchoose k)"
+    using n by (subst binomial_gbinomial) (auto simp: of_nat_diff)
+  also have "\<dots> = (-1) ^ k * ((-real n) gchoose k)"
+    by (subst gbinomial_negated_upper) auto
+  finally show ?thesis by simp
+qed (auto simp: indicator_def gbinomial_0_left)
+
+text \<open>
+  The cumulative distribution function of the negative binomial distribution can be
+  expressed in terms of that of the `normal' binomial distribution.
+\<close>
+lemma prob_neg_binomial_pmf_atMost:
+  assumes p: "p \<in> {0<..1}"
+  shows "measure_pmf.prob (neg_binomial_pmf n p) {..k} =
+         measure_pmf.prob (binomial_pmf (n + k) (1 - p)) {..k}"
+proof (cases "n = 0")
+  case [simp]: True
+  have "set_pmf (binomial_pmf (n + k) (1 - p)) \<subseteq> {..n+k}"
+    using p by (subst set_pmf_binomial_eq) auto
+  hence "measure_pmf.prob (binomial_pmf (n + k) (1 - p)) {..k} = 1"
+    by (subst measure_pmf.prob_eq_1) (auto intro!: AE_pmfI)
+  thus ?thesis by simp
+next
+  case False
+  hence n: "n > 0" by auto
+  have "measure_pmf.prob (binomial_pmf (n + k) (1 - p)) {..k} = (\<Sum>i\<le>k. pmf (binomial_pmf (n + k) (1 - p)) i)"
+    by (intro measure_measure_pmf_finite) auto
+  also have "\<dots> = (\<Sum>i\<le>k. real ((n + k) choose i) * p ^ (n + k - i) * (1 - p) ^ i)"
+    using p by (simp add: mult_ac)
+  also have "\<dots> = p ^ n * (\<Sum>i\<le>k. real ((n + k) choose i) * (1 - p) ^ i * p ^ (k - i))"
+    unfolding sum_distrib_left by (intro sum.cong) (auto simp: algebra_simps simp flip: power_add)
+  also have "(\<Sum>i\<le>k. real ((n + k) choose i) * (1 - p) ^ i * p ^ (k - i)) =
+             (\<Sum>i\<le>k. ((n + i - 1) choose i) * (1 - p) ^ i)"
+    using gbinomial_partial_sum_poly_xpos[of k "real n" "1 - p" p] n
+    by (simp add: binomial_gbinomial add_ac of_nat_diff)
+  also have "p ^ n * \<dots> = (\<Sum>i\<le>k. pmf (neg_binomial_pmf n p) i)"
+    using p unfolding sum_distrib_left by (simp add: pmf_neg_binomial algebra_simps)
+  also have "\<dots> = measure_pmf.prob (neg_binomial_pmf n p) {..k}"
+    by (intro measure_measure_pmf_finite [symmetric]) auto
+  finally show ?thesis ..
+qed
+
+lemma prob_neg_binomial_pmf_lessThan:
+  assumes p: "p \<in> {0<..1}"
+  shows "measure_pmf.prob (neg_binomial_pmf n p) {..<k} =
+         measure_pmf.prob (binomial_pmf (n + k - 1) (1 - p)) {..<k}"
+proof (cases "k = 0")
+  case False
+  hence "{..<k} = {..k-1}"
+    by auto
+  thus ?thesis
+    using prob_neg_binomial_pmf_atMost[OF p, of n "k - 1"] False by simp
+qed auto  
+
+text \<open>
+  The expected value of the negative binomial distribution is $n(1-p)/p$:
+\<close>
+lemma nn_integral_neg_binomial_pmf_real:
+  assumes p: "p \<in> {0<..1}"
+  shows "nn_integral (measure_pmf (neg_binomial_pmf n p)) of_nat = ennreal (n * (1 - p) / p)"
+proof (induction n)
+  case 0
+  thus ?case by auto
+next
+  case (Suc n)
+  have "nn_integral (measure_pmf (neg_binomial_pmf (Suc n) p)) of_nat =
+        nn_integral (measure_pmf (geometric_pmf p)) of_nat +
+        nn_integral (measure_pmf (neg_binomial_pmf n p)) of_nat"
+    by (simp add: neg_binomial_pmf_Suc case_prod_unfold nn_integral_add nn_integral_pair_pmf')
+  also have "nn_integral (measure_pmf (geometric_pmf p)) of_nat = ennreal ((1-p) / p)"
+    unfolding ennreal_of_nat_eq_real_of_nat
+    using expectation_geometric_pmf[OF p] integrable_real_geometric_pmf[OF p]
+    by (subst nn_integral_eq_integral) auto
+  also have "nn_integral (measure_pmf (neg_binomial_pmf n p)) of_nat = n * (1 - p) / p" using p
+    by (subst Suc.IH)
+       (auto simp: ennreal_of_nat_eq_real_of_nat ennreal_mult simp flip: divide_ennreal ennreal_minus)
+  also have "ennreal ((1 - p) / p) + ennreal (real n * (1 - p) / p) =
+             ennreal ((1-p) / p + real n * (1 - p) / p)"
+    by (intro ennreal_plus [symmetric] divide_nonneg_pos mult_nonneg_nonneg) (use p in auto)
+  also have "(1-p) / p + real n * (1 - p) / p = real (Suc n) * (1 - p) / p"
+    using p by (auto simp: field_simps)
+  finally show ?case
+    by (simp add: ennreal_of_nat_eq_real_of_nat)
+qed
+
+lemma integrable_neg_binomial_pmf_real:
+  assumes p: "p \<in> {0<..1}"
+  shows "integrable (measure_pmf (neg_binomial_pmf n p)) real"
+  using nn_integral_neg_binomial_pmf_real[OF p, of n]
+  by (subst integrable_iff_bounded) (auto simp flip: ennreal_of_nat_eq_real_of_nat)
+
+lemma expectation_neg_binomial_pmf:
+  assumes p: "p \<in> {0<..1}"
+  shows "measure_pmf.expectation (neg_binomial_pmf n p) real = n * (1 - p) / p"
+proof -
+  have "nn_integral (measure_pmf (neg_binomial_pmf n p)) of_nat = ennreal (n * (1 - p) / p)"
+    by (intro nn_integral_neg_binomial_pmf_real p)
+  also have "of_nat = (\<lambda>x. ennreal (real x))"
+    by (simp add: ennreal_of_nat_eq_real_of_nat fun_eq_iff)
+  finally show ?thesis
+    using p by (subst (asm) nn_integral_eq_integrable) auto
+qed
+
+
 subsection \<open>PMFs from association lists\<close>
 
 definition pmf_of_list ::" ('a \<times> real) list \<Rightarrow> 'a pmf" where
--- a/src/HOL/Probability/Probability_Measure.thy	Sat Feb 20 13:49:24 2021 +0100
+++ b/src/HOL/Probability/Probability_Measure.thy	Sat Feb 20 17:04:26 2021 +0100
@@ -465,6 +465,21 @@
   "expectation X = 0 \<Longrightarrow> variance X = expectation (\<lambda>x. (X x)^2)"
   by simp
 
+theorem%important (in prob_space) Chebyshev_inequality:
+  assumes [measurable]: "random_variable borel f"
+  assumes "integrable M (\<lambda>x. f x ^ 2)"
+  defines "\<mu> \<equiv> expectation f"
+  assumes "a > 0"
+  shows "prob {x\<in>space M. \<bar>f x - \<mu>\<bar> \<ge> a} \<le> variance f / a\<^sup>2"
+  unfolding \<mu>_def
+proof (rule second_moment_method)
+  have integrable: "integrable M f"
+    using assms by (blast dest: square_integrable_imp_integrable)
+  show "integrable M (\<lambda>x. (f x - expectation f)\<^sup>2)"
+    using assms integrable unfolding power2_eq_square ring_distribs
+    by (intro Bochner_Integration.integrable_diff) auto
+qed (use assms in auto)
+
 locale pair_prob_space = pair_sigma_finite M1 M2 + M1: prob_space M1 + M2: prob_space M2 for M1 M2
 
 sublocale pair_prob_space \<subseteq> P?: prob_space "M1 \<Otimes>\<^sub>M M2"
@@ -501,6 +516,18 @@
   finally show ?thesis by (simp add: measure_nonneg prod_nonneg)
 qed
 
+lemma product_prob_spaceI:
+  assumes "\<And>i. prob_space (M i)"
+  shows   "product_prob_space M"
+  unfolding product_prob_space_def product_prob_space_axioms_def product_sigma_finite_def
+proof safe
+  fix i
+  interpret prob_space "M i"
+    by (rule assms)
+  show "sigma_finite_measure (M i)" "prob_space (M i)"
+    by unfold_locales
+qed
+
 subsection \<open>Distributions\<close>
 
 definition distributed :: "'a measure \<Rightarrow> 'b measure \<Rightarrow> ('a \<Rightarrow> 'b) \<Rightarrow> ('b \<Rightarrow> ennreal) \<Rightarrow> bool"
@@ -1226,4 +1253,53 @@
 lemma (in prob_space) prob_space_completion: "prob_space (completion M)"
   by (rule prob_spaceI) (simp add: emeasure_space_1)
 
+lemma distr_PiM_finite_prob_space:
+  assumes fin: "finite I"
+  assumes "product_prob_space M"
+  assumes "product_prob_space M'"
+  assumes [measurable]: "\<And>i. i \<in> I \<Longrightarrow> f \<in> measurable (M i) (M' i)"
+  shows   "distr (PiM I M) (PiM I M') (compose I f) = PiM I (\<lambda>i. distr (M i) (M' i) f)"
+proof -
+  interpret M: product_prob_space M by fact
+  interpret M': product_prob_space M' by fact
+  define N where "N = (\<lambda>i. if i \<in> I then distr (M i) (M' i) f else M' i)"
+  have [intro]: "prob_space (N i)" for i
+    by (auto simp: N_def intro!: M.M.prob_space_distr M'.prob_space)
+
+  interpret N: product_prob_space N
+    by (intro product_prob_spaceI) (auto simp: N_def M'.prob_space intro: M.M.prob_space_distr)
+
+  have "distr (PiM I M) (PiM I M') (compose I f) = PiM I N"
+  proof (rule N.PiM_eqI)
+    have N_events_eq: "sets (Pi\<^sub>M I N) = sets (Pi\<^sub>M I M')"
+      unfolding N_def by (intro sets_PiM_cong) auto
+    also have "\<dots> = sets (distr (Pi\<^sub>M I M) (Pi\<^sub>M I M') (compose I f))"
+      by simp
+    finally show "sets (distr (Pi\<^sub>M I M) (Pi\<^sub>M I M') (compose I f)) = sets (Pi\<^sub>M I N)"  ..
+
+    fix A assume A: "\<And>i. i \<in> I \<Longrightarrow> A i \<in> N.M.events i"
+    have "emeasure (distr (Pi\<^sub>M I M) (Pi\<^sub>M I M') (compose I f)) (Pi\<^sub>E I A) =
+          emeasure (Pi\<^sub>M I M) (compose I f -` Pi\<^sub>E I A \<inter> space (Pi\<^sub>M I M))"
+    proof (intro emeasure_distr)
+      show "compose I f \<in> Pi\<^sub>M I M \<rightarrow>\<^sub>M Pi\<^sub>M I M'"
+        unfolding compose_def by measurable
+      show "Pi\<^sub>E I A \<in> sets (Pi\<^sub>M I M')"
+        unfolding N_events_eq [symmetric] by (intro sets_PiM_I_finite fin A)
+    qed
+    also have "compose I f -` Pi\<^sub>E I A \<inter> space (Pi\<^sub>M I M) = Pi\<^sub>E I (\<lambda>i. f -` A i \<inter> space (M i))"
+      using A by (auto simp: space_PiM PiE_def Pi_def extensional_def N_def compose_def)
+    also have "emeasure (Pi\<^sub>M I M) (Pi\<^sub>E I (\<lambda>i. f -` A i \<inter> space (M i))) =
+          (\<Prod>i\<in>I. emeasure (M i) (f -` A i \<inter> space (M i)))"
+      using A by (intro M.emeasure_PiM fin) (auto simp: N_def)
+    also have "\<dots> = (\<Prod>i\<in>I. emeasure (distr (M i) (M' i) f) (A i))"
+      using A by (intro prod.cong emeasure_distr [symmetric]) (auto simp: N_def)
+    also have "\<dots> = (\<Prod>i\<in>I. emeasure (N i) (A i))"
+      unfolding N_def by (intro prod.cong) (auto simp: N_def)
+    finally show "emeasure (distr (Pi\<^sub>M I M) (Pi\<^sub>M I M') (compose I f)) (Pi\<^sub>E I A) = \<dots>" .
+  qed fact+
+  also have "PiM I N = PiM I (\<lambda>i. distr (M i) (M' i) f)"
+    by (intro PiM_cong) (auto simp: N_def)
+  finally show ?thesis .
+qed
+
 end
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/Probability/Product_PMF.thy	Sat Feb 20 17:04:26 2021 +0100
@@ -0,0 +1,910 @@
+(*
+  File:    Product_PMF.thy
+  Authors: Manuel Eberl, Max W. Haslbeck
+*)
+section \<open>Indexed products of PMFs\<close>
+theory Product_PMF
+  imports Probability_Mass_Function Independent_Family
+begin
+
+subsection \<open>Preliminaries\<close>
+
+lemma pmf_expectation_eq_infsetsum: "measure_pmf.expectation p f = infsetsum (\<lambda>x. pmf p x * f x) UNIV"
+  unfolding infsetsum_def measure_pmf_eq_density by (subst integral_density) simp_all
+
+lemma measure_pmf_prob_product:
+  assumes "countable A" "countable B"
+  shows "measure_pmf.prob (pair_pmf M N) (A \<times> B) = measure_pmf.prob M A * measure_pmf.prob N B"
+proof -
+  have "measure_pmf.prob (pair_pmf M N) (A \<times> B) = (\<Sum>\<^sub>a(a, b)\<in>A \<times> B. pmf M a * pmf N b)"
+    by (auto intro!: infsetsum_cong simp add: measure_pmf_conv_infsetsum pmf_pair)
+  also have "\<dots> = measure_pmf.prob M A * measure_pmf.prob N B"
+    using assms by (subst infsetsum_product) (auto simp add: measure_pmf_conv_infsetsum)
+  finally show ?thesis
+    by simp
+qed
+
+
+subsection \<open>Definition\<close>
+
+text \<open>
+  In analogy to @{const PiM}, we define an indexed product of PMFs. In the literature, this
+  is typically called taking a vector of independent random variables. Note that the components
+  do not have to be identically distributed.
+
+  The operation takes an explicit index set \<^term>\<open>A :: 'a set\<close> and a function \<^term>\<open>f :: 'a \<Rightarrow> 'b pmf\<close>
+  that maps each element from \<^term>\<open>A\<close> to a PMF and defines the product measure
+  $\bigotimes_{i\in A} f(i)$ , which is represented as a \<^typ>\<open>('a \<Rightarrow> 'b) pmf\<close>.
+
+  Note that unlike @{const PiM}, this only works for \<^emph>\<open>finite\<close> index sets. It could
+  be extended to countable sets and beyond, but the construction becomes somewhat more involved.
+\<close>
+definition Pi_pmf :: "'a set \<Rightarrow> 'b \<Rightarrow> ('a \<Rightarrow> 'b pmf) \<Rightarrow> ('a \<Rightarrow> 'b) pmf" where
+  "Pi_pmf A dflt p =
+     embed_pmf (\<lambda>f. if (\<forall>x. x \<notin> A \<longrightarrow> f x = dflt) then \<Prod>x\<in>A. pmf (p x) (f x) else 0)"
+
+text \<open>
+  A technical subtlety that needs to be addressed is this: Intuitively, the functions in the
+  support of a product distribution have domain \<open>A\<close>. However, since HOL is a total logic, these
+  functions must still return \<^emph>\<open>some\<close> value for inputs outside \<open>A\<close>. The product measure
+  @{const PiM} simply lets these functions return @{const undefined} in these cases. We chose a
+  different solution here, which is to supply a default value \<^term>\<open>dflt :: 'b\<close> that is returned
+  in these cases.
+
+  As one possible application, one could model the result of \<open>n\<close> different independent coin
+  tosses as @{term "Pi_pmf {0..<n} False (\<lambda>_. bernoulli_pmf (1 / 2))"}. This returns a function
+  of type \<^typ>\<open>nat \<Rightarrow> bool\<close> that maps every natural number below \<open>n\<close> to the result of the
+  corresponding coin toss, and every other natural number to \<^term>\<open>False\<close>.
+\<close>
+
+lemma pmf_Pi:
+  assumes A: "finite A"
+  shows   "pmf (Pi_pmf A dflt p) f =
+             (if (\<forall>x. x \<notin> A \<longrightarrow> f x = dflt) then \<Prod>x\<in>A. pmf (p x) (f x) else 0)"
+  unfolding Pi_pmf_def
+proof (rule pmf_embed_pmf, goal_cases)
+  case 2
+  define S where "S = {f. \<forall>x. x \<notin> A \<longrightarrow> f x = dflt}"
+  define B where "B = (\<lambda>x. set_pmf (p x))"
+
+  have neutral_left: "(\<Prod>xa\<in>A. pmf (p xa) (f xa)) = 0"
+    if "f \<in> PiE A B - (\<lambda>f. restrict f A) ` S" for f
+  proof -
+    have "restrict (\<lambda>x. if x \<in> A then f x else dflt) A \<in> (\<lambda>f. restrict f A) ` S"
+      by (intro imageI) (auto simp: S_def)
+    also have "restrict (\<lambda>x. if x \<in> A then f x else dflt) A = f"
+      using that by (auto simp: PiE_def Pi_def extensional_def fun_eq_iff)
+    finally show ?thesis using that by blast
+  qed
+  have neutral_right: "(\<Prod>xa\<in>A. pmf (p xa) (f xa)) = 0"
+    if "f \<in> (\<lambda>f. restrict f A) ` S - PiE A B" for f
+  proof -
+    from that obtain f' where f': "f = restrict f' A" "f' \<in> S" by auto
+    moreover from this and that have "restrict f' A \<notin> PiE A B" by simp
+    then obtain x where "x \<in> A" "pmf (p x) (f' x) = 0" by (auto simp: B_def set_pmf_eq)
+    with f' and A show ?thesis by auto
+  qed
+
+  have "(\<lambda>f. \<Prod>x\<in>A. pmf (p x) (f x)) abs_summable_on PiE A B"
+    by (intro abs_summable_on_prod_PiE A) (auto simp: B_def)
+  also have "?this \<longleftrightarrow> (\<lambda>f. \<Prod>x\<in>A. pmf (p x) (f x)) abs_summable_on (\<lambda>f. restrict f A) ` S"
+    by (intro abs_summable_on_cong_neutral neutral_left neutral_right) auto
+  also have "\<dots> \<longleftrightarrow> (\<lambda>f. \<Prod>x\<in>A. pmf (p x) (restrict f A x)) abs_summable_on S"
+    by (rule abs_summable_on_reindex_iff [symmetric]) (force simp: inj_on_def fun_eq_iff S_def)
+  also have "\<dots> \<longleftrightarrow> (\<lambda>f. if \<forall>x. x \<notin> A \<longrightarrow> f x = dflt then \<Prod>x\<in>A. pmf (p x) (f x) else 0)
+                          abs_summable_on UNIV"
+    by (intro abs_summable_on_cong_neutral) (auto simp: S_def)
+  finally have summable: \<dots> .
+
+  have "1 = (\<Prod>x\<in>A. 1::real)" by simp
+  also have "(\<Prod>x\<in>A. 1) = (\<Prod>x\<in>A. \<Sum>\<^sub>ay\<in>B x. pmf (p x) y)"
+    unfolding B_def by (subst infsetsum_pmf_eq_1) auto
+  also have "(\<Prod>x\<in>A. \<Sum>\<^sub>ay\<in>B x. pmf (p x) y) = (\<Sum>\<^sub>af\<in>Pi\<^sub>E A B. \<Prod>x\<in>A. pmf (p x) (f x))"
+    by (intro infsetsum_prod_PiE [symmetric] A) (auto simp: B_def)
+  also have "\<dots> = (\<Sum>\<^sub>af\<in>(\<lambda>f. restrict f A) ` S. \<Prod>x\<in>A. pmf (p x) (f x))" using A
+    by (intro infsetsum_cong_neutral neutral_left neutral_right refl)
+  also have "\<dots> = (\<Sum>\<^sub>af\<in>S. \<Prod>x\<in>A. pmf (p x) (restrict f A x))"
+    by (rule infsetsum_reindex) (force simp: inj_on_def fun_eq_iff S_def)
+  also have "\<dots> = (\<Sum>\<^sub>af\<in>S. \<Prod>x\<in>A. pmf (p x) (f x))"
+    by (intro infsetsum_cong) (auto simp: S_def)
+  also have "\<dots> = (\<Sum>\<^sub>af. if \<forall>x. x \<notin> A \<longrightarrow> f x = dflt then \<Prod>x\<in>A. pmf (p x) (f x) else 0)"
+    by (intro infsetsum_cong_neutral) (auto simp: S_def)
+  also have "ennreal \<dots> = (\<integral>\<^sup>+f. ennreal (if \<forall>x. x \<notin> A \<longrightarrow> f x = dflt
+                             then \<Prod>x\<in>A. pmf (p x) (f x) else 0) \<partial>count_space UNIV)"
+    by (intro nn_integral_conv_infsetsum [symmetric] summable) (auto simp: prod_nonneg)
+  finally show ?case by simp
+qed (auto simp: prod_nonneg)
+
+lemma Pi_pmf_cong:
+  assumes "A = A'" "dflt = dflt'" "\<And>x. x \<in> A \<Longrightarrow> f x = f' x"
+  shows   "Pi_pmf A dflt f = Pi_pmf A' dflt' f'"
+proof -
+  have "(\<lambda>fa. if \<forall>x. x \<notin> A \<longrightarrow> fa x = dflt then \<Prod>x\<in>A. pmf (f x) (fa x) else 0) =
+        (\<lambda>f. if \<forall>x. x \<notin> A' \<longrightarrow> f x = dflt' then \<Prod>x\<in>A'. pmf (f' x) (f x) else 0)"
+    using assms by (intro ext) (auto intro!: prod.cong)
+  thus ?thesis
+    by (simp only: Pi_pmf_def)
+qed
+
+lemma pmf_Pi':
+  assumes "finite A" "\<And>x. x \<notin> A \<Longrightarrow> f x = dflt"
+  shows   "pmf (Pi_pmf A dflt p) f = (\<Prod>x\<in>A. pmf (p x) (f x))"
+  using assms by (subst pmf_Pi) auto
+
+lemma pmf_Pi_outside:
+  assumes "finite A" "\<exists>x. x \<notin> A \<and> f x \<noteq> dflt"
+  shows   "pmf (Pi_pmf A dflt p) f = 0"
+  using assms by (subst pmf_Pi) auto
+
+lemma pmf_Pi_empty [simp]: "Pi_pmf {} dflt p = return_pmf (\<lambda>_. dflt)"
+  by (intro pmf_eqI, subst pmf_Pi) (auto simp: indicator_def)
+
+lemma set_Pi_pmf_subset: "finite A \<Longrightarrow> set_pmf (Pi_pmf A dflt p) \<subseteq> {f. \<forall>x. x \<notin> A \<longrightarrow> f x = dflt}"
+  by (auto simp: set_pmf_eq pmf_Pi)
+
+
+subsection \<open>Dependent product sets with a default\<close>
+
+text \<open>
+  The following describes a dependent product of sets where the functions are required to return
+  the default value \<^term>\<open>dflt\<close> outside their domain, in analogy to @{const PiE}, which uses
+  @{const undefined}.
+\<close>
+definition PiE_dflt
+  where "PiE_dflt A dflt B = {f. \<forall>x. (x \<in> A \<longrightarrow> f x \<in> B x) \<and> (x \<notin> A \<longrightarrow> f x = dflt)}"
+
+lemma restrict_PiE_dflt: "(\<lambda>h. restrict h A) ` PiE_dflt A dflt B = PiE A B"
+proof (intro equalityI subsetI)
+  fix h assume "h \<in> (\<lambda>h. restrict h A) ` PiE_dflt A dflt B"
+  thus "h \<in> PiE A B"
+    by (auto simp: PiE_dflt_def)
+next
+  fix h assume h: "h \<in> PiE A B"
+  hence "restrict (\<lambda>x. if x \<in> A then h x else dflt) A \<in> (\<lambda>h. restrict h A) ` PiE_dflt A dflt B"
+    by (intro imageI) (auto simp: PiE_def extensional_def PiE_dflt_def)
+  also have "restrict (\<lambda>x. if x \<in> A then h x else dflt) A = h"
+    using h by (auto simp: fun_eq_iff)
+  finally show "h \<in> (\<lambda>h. restrict h A) ` PiE_dflt A dflt B" .
+qed
+
+lemma dflt_image_PiE: "(\<lambda>h x. if x \<in> A then h x else dflt) ` PiE A B = PiE_dflt A dflt B"
+  (is "?f ` ?X = ?Y")
+proof (intro equalityI subsetI)
+  fix h assume "h \<in> ?f ` ?X"
+  thus "h \<in> ?Y"
+    by (auto simp: PiE_dflt_def PiE_def)
+next
+  fix h assume h: "h \<in> ?Y"
+  hence "?f (restrict h A) \<in> ?f ` ?X"
+    by (intro imageI) (auto simp: PiE_def extensional_def PiE_dflt_def)
+  also have "?f (restrict h A) = h"
+    using h by (auto simp: fun_eq_iff PiE_dflt_def)
+  finally show "h \<in> ?f ` ?X" .
+qed
+
+lemma finite_PiE_dflt [intro]:
+  assumes "finite A" "\<And>x. x \<in> A \<Longrightarrow> finite (B x)"
+  shows   "finite (PiE_dflt A d B)"
+proof -
+  have "PiE_dflt A d B = (\<lambda>f x. if x \<in> A then f x else d) ` PiE A B"
+    by (rule dflt_image_PiE [symmetric])
+  also have "finite \<dots>"
+    by (intro finite_imageI finite_PiE assms)
+  finally show ?thesis .
+qed
+
+lemma card_PiE_dflt:
+  assumes "finite A" "\<And>x. x \<in> A \<Longrightarrow> finite (B x)"
+  shows   "card (PiE_dflt A d B) = (\<Prod>x\<in>A. card (B x))"
+proof -
+  from assms have "(\<Prod>x\<in>A. card (B x)) = card (PiE A B)"
+    by (intro card_PiE [symmetric]) auto
+  also have "PiE A B = (\<lambda>f. restrict f A) ` PiE_dflt A d B"
+    by (rule restrict_PiE_dflt [symmetric])
+  also have "card \<dots> = card (PiE_dflt A d B)"
+    by (intro card_image) (force simp: inj_on_def restrict_def fun_eq_iff PiE_dflt_def)
+  finally show ?thesis ..
+qed
+
+lemma PiE_dflt_empty_iff [simp]: "PiE_dflt A dflt B = {} \<longleftrightarrow> (\<exists>x\<in>A. B x = {})"
+  by (simp add: dflt_image_PiE [symmetric] PiE_eq_empty_iff)
+
+lemma set_Pi_pmf_subset':
+  assumes "finite A"
+  shows   "set_pmf (Pi_pmf A dflt p) \<subseteq> PiE_dflt A dflt (set_pmf \<circ> p)"
+  using assms by (auto simp: set_pmf_eq pmf_Pi PiE_dflt_def)
+
+lemma set_Pi_pmf:
+  assumes "finite A"
+  shows   "set_pmf (Pi_pmf A dflt p) = PiE_dflt A dflt (set_pmf \<circ> p)"
+proof (rule equalityI)
+  show "PiE_dflt A dflt (set_pmf \<circ> p) \<subseteq> set_pmf (Pi_pmf A dflt p)"
+  proof safe
+    fix f assume f: "f \<in> PiE_dflt A dflt (set_pmf \<circ> p)"
+    hence "pmf (Pi_pmf A dflt p) f = (\<Prod>x\<in>A. pmf (p x) (f x))"
+      using assms by (auto simp: pmf_Pi PiE_dflt_def)
+    also have "\<dots> > 0"
+      using f by (intro prod_pos) (auto simp: PiE_dflt_def set_pmf_eq)
+    finally show "f \<in> set_pmf (Pi_pmf A dflt p)"
+      by (auto simp: set_pmf_eq)
+  qed
+qed (use set_Pi_pmf_subset'[OF assms, of dflt p] in auto)
+
+text \<open>
+  The probability of an independent combination of events is precisely the product
+  of the probabilities of each individual event.
+\<close>
+lemma measure_Pi_pmf_PiE_dflt:
+  assumes [simp]: "finite A"
+  shows   "measure_pmf.prob (Pi_pmf A dflt p) (PiE_dflt A dflt B) =
+             (\<Prod>x\<in>A. measure_pmf.prob (p x) (B x))"
+proof -
+  define B' where "B' = (\<lambda>x. B x \<inter> set_pmf (p x))"
+  have "measure_pmf.prob (Pi_pmf A dflt p) (PiE_dflt A dflt B) =
+          (\<Sum>\<^sub>ah\<in>PiE_dflt A dflt B. pmf (Pi_pmf A dflt p) h)"
+    by (rule measure_pmf_conv_infsetsum)
+  also have "\<dots> = (\<Sum>\<^sub>ah\<in>PiE_dflt A dflt B. \<Prod>x\<in>A. pmf (p x) (h x))"
+    by (intro infsetsum_cong, subst pmf_Pi') (auto simp: PiE_dflt_def)
+  also have "\<dots> = (\<Sum>\<^sub>ah\<in>(\<lambda>h. restrict h A) ` PiE_dflt A dflt B. \<Prod>x\<in>A. pmf (p x) (h x))"
+    by (subst infsetsum_reindex) (force simp: inj_on_def PiE_dflt_def fun_eq_iff)+
+  also have "(\<lambda>h. restrict h A) ` PiE_dflt A dflt B = PiE A B"
+    by (rule restrict_PiE_dflt)
+  also have "(\<Sum>\<^sub>ah\<in>PiE A B. \<Prod>x\<in>A. pmf (p x) (h x)) = (\<Sum>\<^sub>ah\<in>PiE A B'. \<Prod>x\<in>A. pmf (p x) (h x))"
+    by (intro infsetsum_cong_neutral) (auto simp: B'_def set_pmf_eq)
+  also have "(\<Sum>\<^sub>ah\<in>PiE A B'. \<Prod>x\<in>A. pmf (p x) (h x)) = (\<Prod>x\<in>A. infsetsum (pmf (p x)) (B' x))"
+    by (intro infsetsum_prod_PiE) (auto simp: B'_def)
+  also have "\<dots> = (\<Prod>x\<in>A. infsetsum (pmf (p x)) (B x))"
+    by (intro prod.cong infsetsum_cong_neutral) (auto simp: B'_def set_pmf_eq)
+  also have "\<dots> = (\<Prod>x\<in>A. measure_pmf.prob (p x) (B x))"
+    by (subst measure_pmf_conv_infsetsum) (rule refl)
+  finally show ?thesis .
+qed
+
+lemma measure_Pi_pmf_Pi:
+  fixes t::nat
+  assumes [simp]: "finite A"
+  shows   "measure_pmf.prob (Pi_pmf A dflt p) (Pi A B) =
+             (\<Prod>x\<in>A. measure_pmf.prob (p x) (B x))" (is "?lhs = ?rhs")
+proof -
+  have "?lhs = measure_pmf.prob (Pi_pmf A dflt p) (PiE_dflt A dflt B)"
+    by (intro measure_prob_cong_0)
+       (auto simp: PiE_dflt_def PiE_def intro!: pmf_Pi_outside)+
+  also have "\<dots> = ?rhs"
+    using assms by (simp add: measure_Pi_pmf_PiE_dflt)
+  finally show ?thesis
+    by simp
+qed
+
+
+subsection \<open>Common PMF operations on products\<close>
+
+text \<open>
+  @{const Pi_pmf} distributes over the `bind' operation in the Giry monad:
+\<close>
+lemma Pi_pmf_bind:
+  assumes "finite A"
+  shows   "Pi_pmf A d (\<lambda>x. bind_pmf (p x) (q x)) =
+             do {f \<leftarrow> Pi_pmf A d' p; Pi_pmf A d (\<lambda>x. q x (f x))}" (is "?lhs = ?rhs")
+proof (rule pmf_eqI, goal_cases)
+  case (1 f)
+  show ?case
+  proof (cases "\<exists>x\<in>-A. f x \<noteq> d")
+    case False
+    define B where "B = (\<lambda>x. set_pmf (p x))"
+    have [simp]: "countable (B x)" for x by (auto simp: B_def)
+
+    {
+      fix x :: 'a
+      have "(\<lambda>a. pmf (p x) a * 1) abs_summable_on B x"
+        by (simp add: pmf_abs_summable)
+      moreover have "norm (pmf (p x) a * 1) \<ge> norm (pmf (p x) a * pmf (q x a) (f x))" for a
+        unfolding norm_mult by (intro mult_left_mono) (auto simp: pmf_le_1)
+      ultimately have "(\<lambda>a. pmf (p x) a * pmf (q x a) (f x)) abs_summable_on B x"
+        by (rule abs_summable_on_comparison_test)
+    } note summable = this
+
+    have "pmf ?rhs f = (\<Sum>\<^sub>ag. pmf (Pi_pmf A d' p) g * (\<Prod>x\<in>A. pmf (q x (g x)) (f x)))"
+      by (subst pmf_bind, subst pmf_Pi')
+         (insert assms False, simp_all add: pmf_expectation_eq_infsetsum)
+    also have "\<dots> = (\<Sum>\<^sub>ag\<in>PiE_dflt A d' B.
+                      pmf (Pi_pmf A d' p) g * (\<Prod>x\<in>A. pmf (q x (g x)) (f x)))" unfolding B_def
+      using assms by (intro infsetsum_cong_neutral) (auto simp: pmf_Pi PiE_dflt_def set_pmf_eq)
+    also have "\<dots> = (\<Sum>\<^sub>ag\<in>PiE_dflt A d' B.
+                      (\<Prod>x\<in>A. pmf (p x) (g x) * pmf (q x (g x)) (f x)))"
+      using assms by (intro infsetsum_cong) (auto simp: pmf_Pi PiE_dflt_def prod.distrib)
+    also have "\<dots> = (\<Sum>\<^sub>ag\<in>(\<lambda>g. restrict g A) ` PiE_dflt A d' B.
+                      (\<Prod>x\<in>A. pmf (p x) (g x) * pmf (q x (g x)) (f x)))"
+      by (subst infsetsum_reindex) (force simp: PiE_dflt_def inj_on_def fun_eq_iff)+
+    also have "(\<lambda>g. restrict g A) ` PiE_dflt A d' B = PiE A B"
+      by (rule restrict_PiE_dflt)
+    also have "(\<Sum>\<^sub>ag\<in>\<dots>. (\<Prod>x\<in>A. pmf (p x) (g x) * pmf (q x (g x)) (f x))) =
+                 (\<Prod>x\<in>A. \<Sum>\<^sub>aa\<in>B x. pmf (p x) a * pmf (q x a) (f x))"
+      using assms summable by (subst infsetsum_prod_PiE) simp_all
+    also have "\<dots> = (\<Prod>x\<in>A. \<Sum>\<^sub>aa. pmf (p x) a * pmf (q x a) (f x))"
+      by (intro prod.cong infsetsum_cong_neutral) (auto simp: B_def set_pmf_eq)
+    also have "\<dots> = pmf ?lhs f"
+      using False assms by (subst pmf_Pi') (simp_all add: pmf_bind pmf_expectation_eq_infsetsum)
+    finally show ?thesis ..
+  next
+    case True
+    have "pmf ?rhs f =
+            measure_pmf.expectation (Pi_pmf A d' p) (\<lambda>x. pmf (Pi_pmf A d (\<lambda>xa. q xa (x xa))) f)"
+      using assms by (simp add: pmf_bind)
+    also have "\<dots> = measure_pmf.expectation (Pi_pmf A d' p) (\<lambda>x. 0)"
+      using assms True by (intro Bochner_Integration.integral_cong pmf_Pi_outside) auto
+    also have "\<dots> = pmf ?lhs f"
+      using assms True by (subst pmf_Pi_outside) auto
+    finally show ?thesis ..
+  qed
+qed
+
+lemma Pi_pmf_return_pmf [simp]:
+  assumes "finite A"
+  shows   "Pi_pmf A dflt (\<lambda>x. return_pmf (f x)) = return_pmf (\<lambda>x. if x \<in> A then f x else dflt)"
+  using assms by (intro pmf_eqI) (auto simp: pmf_Pi simp: indicator_def)
+
+text \<open>
+  Analogously any componentwise mapping can be pulled outside the product:
+\<close>
+lemma Pi_pmf_map:
+  assumes [simp]: "finite A" and "f dflt = dflt'"
+  shows   "Pi_pmf A dflt' (\<lambda>x. map_pmf f (g x)) = map_pmf (\<lambda>h. f \<circ> h) (Pi_pmf A dflt g)"
+proof -
+  have "Pi_pmf A dflt' (\<lambda>x. map_pmf f (g x)) =
+          Pi_pmf A dflt' (\<lambda>x. g x \<bind> (\<lambda>x. return_pmf (f x)))"
+    using assms by (simp add: map_pmf_def Pi_pmf_bind)
+  also have "\<dots> = Pi_pmf A dflt g \<bind> (\<lambda>h. return_pmf (\<lambda>x. if x \<in> A then f (h x) else dflt'))"
+   by (subst Pi_pmf_bind[where d' = dflt]) (auto simp: )
+  also have "\<dots> = map_pmf (\<lambda>h. f \<circ> h) (Pi_pmf A dflt g)"
+    unfolding map_pmf_def using set_Pi_pmf_subset'[of A dflt g]
+    by (intro bind_pmf_cong refl arg_cong[of _ _ return_pmf])
+       (auto dest: simp: fun_eq_iff PiE_dflt_def assms(2))
+  finally show ?thesis .
+qed
+
+text \<open>
+  We can exchange the default value in a product of PMFs like this:
+\<close>
+lemma Pi_pmf_default_swap:
+  assumes "finite A"
+  shows   "map_pmf (\<lambda>f x. if x \<in> A then f x else dflt') (Pi_pmf A dflt p) =
+             Pi_pmf A dflt' p" (is "?lhs = ?rhs")
+proof (rule pmf_eqI, goal_cases)
+  case (1 f)
+  let ?B = "(\<lambda>f x. if x \<in> A then f x else dflt') -` {f} \<inter> PiE_dflt A dflt (\<lambda>_. UNIV)"
+  show ?case
+  proof (cases "\<exists>x\<in>-A. f x \<noteq> dflt'")
+    case False
+    let ?f' = "\<lambda>x. if x \<in> A then f x else dflt"
+    from False have "pmf ?lhs f = measure_pmf.prob (Pi_pmf A dflt p) ?B"
+      using assms unfolding pmf_map
+      by (intro measure_prob_cong_0) (auto simp: PiE_dflt_def pmf_Pi_outside)
+    also from False have "?B = {?f'}"
+      by (auto simp: fun_eq_iff PiE_dflt_def)
+    also have "measure_pmf.prob (Pi_pmf A dflt p) {?f'} = pmf (Pi_pmf A dflt p) ?f'"
+      by (simp add: measure_pmf_single)
+    also have "\<dots> = pmf ?rhs f"
+      using False assms by (subst (1 2) pmf_Pi) auto
+    finally show ?thesis .
+  next
+    case True
+    have "pmf ?lhs f = measure_pmf.prob (Pi_pmf A dflt p) ?B"
+      using assms unfolding pmf_map
+      by (intro measure_prob_cong_0) (auto simp: PiE_dflt_def pmf_Pi_outside)
+    also from True have "?B = {}" by auto
+    also have "measure_pmf.prob (Pi_pmf A dflt p) \<dots> = 0"
+      by simp
+    also have "0 = pmf ?rhs f"
+      using True assms by (intro pmf_Pi_outside [symmetric]) auto
+    finally show ?thesis .
+  qed
+qed
+
+text \<open>
+  The following rule allows reindexing the product:
+\<close>
+lemma Pi_pmf_bij_betw:
+  assumes "finite A" "bij_betw h A B" "\<And>x. x \<notin> A \<Longrightarrow> h x \<notin> B"
+  shows "Pi_pmf A dflt (\<lambda>_. f) = map_pmf (\<lambda>g. g \<circ> h) (Pi_pmf B dflt (\<lambda>_. f))"
+    (is "?lhs = ?rhs")
+proof -
+  have B: "finite B"
+    using assms bij_betw_finite by auto
+  have "pmf ?lhs g = pmf ?rhs g" for g
+  proof (cases "\<forall>a. a \<notin> A \<longrightarrow> g a = dflt")
+    case True
+    define h' where "h' = the_inv_into A h"
+    have h': "h' (h x) = x" if "x \<in> A" for x
+      unfolding h'_def using that assms by (auto simp add: bij_betw_def the_inv_into_f_f)
+    have h: "h (h' x) = x" if "x \<in> B" for x
+      unfolding h'_def using that assms f_the_inv_into_f_bij_betw by fastforce
+    have "pmf ?rhs g = measure_pmf.prob (Pi_pmf B dflt (\<lambda>_. f)) ((\<lambda>g. g \<circ> h) -` {g})"
+      unfolding pmf_map by simp
+    also have "\<dots> = measure_pmf.prob (Pi_pmf B dflt (\<lambda>_. f))
+                                (((\<lambda>g. g \<circ> h) -` {g}) \<inter> PiE_dflt B dflt (\<lambda>_. UNIV))"
+      using B by (intro measure_prob_cong_0) (auto simp: PiE_dflt_def pmf_Pi_outside)
+    also have "\<dots> = pmf (Pi_pmf B dflt (\<lambda>_. f)) (\<lambda>x. if x \<in> B then g (h' x) else dflt)"
+    proof -
+      have "(if h x \<in> B then g (h' (h x)) else dflt) = g x" for x
+        using h' assms True by (cases "x \<in> A") (auto simp add: bij_betwE)
+      then have "(\<lambda>g. g \<circ> h) -` {g} \<inter> PiE_dflt B dflt (\<lambda>_. UNIV) =
+            {(\<lambda>x. if x \<in> B then g (h' x) else dflt)}"
+        using assms h' h True unfolding PiE_dflt_def by auto
+      then show ?thesis
+        by (simp add: measure_pmf_single)
+    qed
+    also have "\<dots> = pmf (Pi_pmf A dflt (\<lambda>_. f)) g"
+      using B assms True  h'_def
+      by (auto simp add: pmf_Pi intro!: prod.reindex_bij_betw bij_betw_the_inv_into)
+    finally show ?thesis
+      by simp
+  next
+    case False
+    have "pmf ?rhs g = infsetsum (pmf (Pi_pmf B dflt (\<lambda>_. f))) ((\<lambda>g. g \<circ> h) -` {g})"
+      using assms by (auto simp add: measure_pmf_conv_infsetsum pmf_map)
+    also have "\<dots> = infsetsum (\<lambda>_. 0) ((\<lambda>g x. g (h x)) -` {g})"
+      using B False assms by (intro infsetsum_cong pmf_Pi_outside) fastforce+
+    also have "\<dots> = 0"
+      by simp
+    finally show ?thesis
+      using assms False by (auto simp add: pmf_Pi pmf_map)
+  qed
+  then show ?thesis
+    by (rule pmf_eqI)
+qed
+
+text \<open>
+  A product of uniform random choices is again a uniform distribution.
+\<close>
+lemma Pi_pmf_of_set:
+  assumes "finite A" "\<And>x. x \<in> A \<Longrightarrow> finite (B x)" "\<And>x. x \<in> A \<Longrightarrow> B x \<noteq> {}"
+  shows   "Pi_pmf A d (\<lambda>x. pmf_of_set (B x)) = pmf_of_set (PiE_dflt A d B)" (is "?lhs = ?rhs")
+proof (rule pmf_eqI, goal_cases)
+  case (1 f)
+  show ?case
+  proof (cases "\<exists>x. x \<notin> A \<and> f x \<noteq> d")
+    case True
+    hence "pmf ?lhs f = 0"
+      using assms by (intro pmf_Pi_outside) (auto simp: PiE_dflt_def)
+    also from True have "f \<notin> PiE_dflt A d B"
+      by (auto simp: PiE_dflt_def)
+    hence "0 = pmf ?rhs f"
+      using assms by (subst pmf_of_set) auto
+    finally show ?thesis .
+  next
+    case False
+    hence "pmf ?lhs f = (\<Prod>x\<in>A. pmf (pmf_of_set (B x)) (f x))"
+      using assms by (subst pmf_Pi') auto
+    also have "\<dots> = (\<Prod>x\<in>A. indicator (B x) (f x) / real (card (B x)))"
+      by (intro prod.cong refl, subst pmf_of_set) (use assms False in auto)
+    also have "\<dots> = (\<Prod>x\<in>A. indicator (B x) (f x)) / real (\<Prod>x\<in>A. card (B x))"
+      by (subst prod_dividef) simp_all
+    also have "(\<Prod>x\<in>A. indicator (B x) (f x) :: real) = indicator (PiE_dflt A d B) f"
+      using assms False by (auto simp: indicator_def PiE_dflt_def)
+    also have "(\<Prod>x\<in>A. card (B x)) = card (PiE_dflt A d B)"
+      using assms by (intro card_PiE_dflt [symmetric]) auto
+    also have "indicator (PiE_dflt A d B) f / \<dots> = pmf ?rhs f"
+      using assms by (intro pmf_of_set [symmetric]) auto
+    finally show ?thesis .
+  qed
+qed
+
+
+subsection \<open>Merging and splitting PMF products\<close>
+
+text \<open>
+  The following lemma shows that we can add a single PMF to a product:
+\<close>
+lemma Pi_pmf_insert:
+  assumes "finite A" "x \<notin> A"
+  shows   "Pi_pmf (insert x A) dflt p = map_pmf (\<lambda>(y,f). f(x:=y)) (pair_pmf (p x) (Pi_pmf A dflt p))"
+proof (intro pmf_eqI)
+  fix f
+  let ?M = "pair_pmf (p x) (Pi_pmf A dflt p)"
+  have "pmf (map_pmf (\<lambda>(y, f). f(x := y)) ?M) f =
+          measure_pmf.prob ?M ((\<lambda>(y, f). f(x := y)) -` {f})"
+    by (subst pmf_map) auto
+  also have "((\<lambda>(y, f). f(x := y)) -` {f}) = (\<Union>y'. {(f x, f(x := y'))})"
+    by (auto simp: fun_upd_def fun_eq_iff)
+  also have "measure_pmf.prob ?M \<dots> = measure_pmf.prob ?M {(f x, f(x := dflt))}"
+    using assms by (intro measure_prob_cong_0) (auto simp: pmf_pair pmf_Pi split: if_splits)
+  also have "\<dots> = pmf (p x) (f x) * pmf (Pi_pmf A dflt p) (f(x := dflt))"
+    by (simp add: measure_pmf_single pmf_pair pmf_Pi)
+  also have "\<dots> = pmf (Pi_pmf (insert x A) dflt p) f"
+  proof (cases "\<forall>y. y \<notin> insert x A \<longrightarrow> f y = dflt")
+    case True
+    with assms have "pmf (p x) (f x) * pmf (Pi_pmf A dflt p) (f(x := dflt)) =
+                       pmf (p x) (f x) * (\<Prod>xa\<in>A. pmf (p xa) ((f(x := dflt)) xa))"
+      by (subst pmf_Pi') auto
+    also have "(\<Prod>xa\<in>A. pmf (p xa) ((f(x := dflt)) xa)) = (\<Prod>xa\<in>A. pmf (p xa) (f xa))"
+      using assms by (intro prod.cong) auto
+    also have "pmf (p x) (f x) * \<dots> = pmf (Pi_pmf (insert x A) dflt p) f"
+      using assms True by (subst pmf_Pi') auto
+    finally show ?thesis .
+  qed (insert assms, auto simp: pmf_Pi)
+  finally show "\<dots> = pmf (map_pmf (\<lambda>(y, f). f(x := y)) ?M) f" ..
+qed
+
+lemma Pi_pmf_insert':
+  assumes "finite A"  "x \<notin> A"
+  shows   "Pi_pmf (insert x A) dflt p =
+             do {y \<leftarrow> p x; f \<leftarrow> Pi_pmf A dflt p; return_pmf (f(x := y))}"
+  using assms
+  by (subst Pi_pmf_insert)
+     (auto simp add: map_pmf_def pair_pmf_def case_prod_beta' bind_return_pmf bind_assoc_pmf)
+
+lemma Pi_pmf_singleton:
+  "Pi_pmf {x} dflt p = map_pmf (\<lambda>a b. if b = x then a else dflt) (p x)"
+proof -
+  have "Pi_pmf {x} dflt p = map_pmf (fun_upd (\<lambda>_. dflt) x) (p x)"
+    by (subst Pi_pmf_insert) (simp_all add: pair_return_pmf2 pmf.map_comp o_def)
+  also have "fun_upd (\<lambda>_. dflt) x = (\<lambda>z y. if y = x then z else dflt)"
+    by (simp add: fun_upd_def fun_eq_iff)
+  finally show ?thesis .
+qed
+
+text \<open>
+  Projecting a product of PMFs onto a component yields the expected result:
+\<close>
+lemma Pi_pmf_component:
+  assumes "finite A"
+  shows   "map_pmf (\<lambda>f. f x) (Pi_pmf A dflt p) = (if x \<in> A then p x else return_pmf dflt)"
+proof (cases "x \<in> A")
+  case True
+  define A' where "A' = A - {x}"
+  from assms and True have A': "A = insert x A'"
+    by (auto simp: A'_def)
+  from assms have "map_pmf (\<lambda>f. f x) (Pi_pmf A dflt p) = p x" unfolding A'
+    by (subst Pi_pmf_insert)
+       (auto simp: A'_def pmf.map_comp o_def case_prod_unfold map_fst_pair_pmf)
+  with True show ?thesis by simp
+next
+  case False
+  have "map_pmf (\<lambda>f. f x) (Pi_pmf A dflt p) = map_pmf (\<lambda>_. dflt) (Pi_pmf A dflt p)"
+    using assms False set_Pi_pmf_subset[of A dflt p]
+    by (intro pmf.map_cong refl) (auto simp: set_pmf_eq pmf_Pi_outside)
+  with False show ?thesis by simp
+qed
+
+text \<open>
+  We can take merge two PMF products on disjoint sets like this:
+\<close>
+lemma Pi_pmf_union:
+  assumes "finite A" "finite B" "A \<inter> B = {}"
+  shows   "Pi_pmf (A \<union> B) dflt p =
+             map_pmf (\<lambda>(f,g) x. if x \<in> A then f x else g x)
+             (pair_pmf (Pi_pmf A dflt p) (Pi_pmf B dflt p))" (is "_ = map_pmf (?h A) (?q A)")
+  using assms(1,3)
+proof (induction rule: finite_induct)
+  case (insert x A)
+  have "map_pmf (?h (insert x A)) (?q (insert x A)) =
+          do {v \<leftarrow> p x; (f, g) \<leftarrow> pair_pmf (Pi_pmf A dflt p) (Pi_pmf B dflt p);
+              return_pmf (\<lambda>y. if y \<in> insert x A then (f(x := v)) y else g y)}"
+    by (subst Pi_pmf_insert)
+       (insert insert.hyps insert.prems,
+        simp_all add: pair_pmf_def map_bind_pmf bind_map_pmf bind_assoc_pmf bind_return_pmf)
+  also have "\<dots> = do {v \<leftarrow> p x; (f, g) \<leftarrow> ?q A; return_pmf ((?h A (f,g))(x := v))}"
+    by (intro bind_pmf_cong refl) (auto simp: fun_eq_iff)
+  also have "\<dots> = do {v \<leftarrow> p x; f \<leftarrow> map_pmf (?h A) (?q A); return_pmf (f(x := v))}"
+    by (simp add: bind_map_pmf map_bind_pmf case_prod_unfold cong: if_cong)
+  also have "\<dots> = do {v \<leftarrow> p x; f \<leftarrow> Pi_pmf (A \<union> B) dflt p; return_pmf (f(x := v))}"
+    using insert.hyps and insert.prems by (intro bind_pmf_cong insert.IH [symmetric] refl) auto
+  also have "\<dots> = Pi_pmf (insert x (A \<union> B)) dflt p"
+    by (subst Pi_pmf_insert)
+       (insert assms insert.hyps insert.prems, auto simp: pair_pmf_def map_bind_pmf)
+  also have "insert x (A \<union> B) = insert x A \<union> B"
+    by simp
+  finally show ?case ..
+qed (simp_all add: case_prod_unfold map_snd_pair_pmf)
+
+text \<open>
+  We can also project a product to a subset of the indices by mapping all the other
+  indices to the default value:
+\<close>
+lemma Pi_pmf_subset:
+  assumes "finite A" "A' \<subseteq> A"
+  shows   "Pi_pmf A' dflt p = map_pmf (\<lambda>f x. if x \<in> A' then f x else dflt) (Pi_pmf A dflt p)"
+proof -
+  let ?P = "pair_pmf (Pi_pmf A' dflt p) (Pi_pmf (A - A') dflt p)"
+  from assms have [simp]: "finite A'"
+    by (blast dest: finite_subset)
+  from assms have "A = A' \<union> (A - A')"
+    by blast
+  also have "Pi_pmf \<dots> dflt p = map_pmf (\<lambda>(f,g) x. if x \<in> A' then f x else g x) ?P"
+    using assms by (intro Pi_pmf_union) auto
+  also have "map_pmf (\<lambda>f x. if x \<in> A' then f x else dflt) \<dots> = map_pmf fst ?P"
+    unfolding map_pmf_comp o_def case_prod_unfold
+    using set_Pi_pmf_subset[of A' dflt p] by (intro map_pmf_cong refl) (auto simp: fun_eq_iff)
+  also have "\<dots> = Pi_pmf A' dflt p"
+    by (simp add: map_fst_pair_pmf)
+  finally show ?thesis ..
+qed
+
+lemma Pi_pmf_subset':
+  fixes f :: "'a \<Rightarrow> 'b pmf"
+  assumes "finite A" "B \<subseteq> A" "\<And>x. x \<in> A - B \<Longrightarrow> f x = return_pmf dflt"
+  shows "Pi_pmf A dflt f = Pi_pmf B dflt f"
+proof -
+  have "Pi_pmf (B \<union> (A - B)) dflt f =
+          map_pmf (\<lambda>(f, g) x. if x \<in> B then f x else g x)
+                  (pair_pmf (Pi_pmf B dflt f) (Pi_pmf (A - B) dflt f))"
+    using assms by (intro Pi_pmf_union) (auto dest: finite_subset)
+  also have "Pi_pmf (A - B) dflt f = Pi_pmf (A - B) dflt (\<lambda>_. return_pmf dflt)"
+    using assms by (intro Pi_pmf_cong) auto
+  also have "\<dots> = return_pmf (\<lambda>_. dflt)"
+    using assms by simp
+  also have "map_pmf (\<lambda>(f, g) x. if x \<in> B then f x else g x)
+                  (pair_pmf (Pi_pmf B dflt f) (return_pmf (\<lambda>_. dflt))) =
+             map_pmf (\<lambda>f x. if x \<in> B then f x else dflt) (Pi_pmf B dflt f)"
+    by (simp add: map_pmf_def pair_pmf_def bind_assoc_pmf bind_return_pmf bind_return_pmf')
+  also have "\<dots> = Pi_pmf B dflt f"
+    using assms by (intro Pi_pmf_default_swap) (auto dest: finite_subset)
+  also have "B \<union> (A - B) = A"
+    using assms by auto
+  finally show ?thesis .
+qed
+
+lemma Pi_pmf_if_set:
+  assumes "finite A"
+  shows "Pi_pmf A dflt (\<lambda>x. if b x then f x else return_pmf dflt) =
+           Pi_pmf {x\<in>A. b x} dflt f"
+proof -
+  have "Pi_pmf A dflt (\<lambda>x. if b x then f x else return_pmf dflt) =
+          Pi_pmf {x\<in>A. b x} dflt (\<lambda>x. if b x then f x else return_pmf dflt)"
+    using assms by (intro Pi_pmf_subset') auto
+  also have "\<dots> = Pi_pmf {x\<in>A. b x} dflt f"
+    by (intro Pi_pmf_cong) auto
+  finally show ?thesis .
+qed
+
+lemma Pi_pmf_if_set':
+  assumes "finite A"
+  shows "Pi_pmf A dflt (\<lambda>x. if b x then return_pmf dflt else f x) =
+         Pi_pmf {x\<in>A. \<not>b x} dflt f"
+proof -
+  have "Pi_pmf A dflt (\<lambda>x. if b x then return_pmf dflt else  f x) =
+          Pi_pmf {x\<in>A. \<not>b x} dflt (\<lambda>x. if b x then return_pmf dflt else  f x)"
+    using assms by (intro Pi_pmf_subset') auto
+  also have "\<dots> = Pi_pmf {x\<in>A. \<not>b x} dflt f"
+    by (intro Pi_pmf_cong) auto
+  finally show ?thesis .
+qed
+
+text \<open>
+  Lastly, we can delete a single component from a product:
+\<close>
+lemma Pi_pmf_remove:
+  assumes "finite A"
+  shows   "Pi_pmf (A - {x}) dflt p = map_pmf (\<lambda>f. f(x := dflt)) (Pi_pmf A dflt p)"
+proof -
+  have "Pi_pmf (A - {x}) dflt p =
+          map_pmf (\<lambda>f xa. if xa \<in> A - {x} then f xa else dflt) (Pi_pmf A dflt p)"
+    using assms by (intro Pi_pmf_subset) auto
+  also have "\<dots> = map_pmf (\<lambda>f. f(x := dflt)) (Pi_pmf A dflt p)"
+    using set_Pi_pmf_subset[of A dflt p] assms
+    by (intro map_pmf_cong refl) (auto simp: fun_eq_iff)
+  finally show ?thesis .
+qed
+
+
+subsection \<open>Additional properties\<close>
+
+lemma nn_integral_prod_Pi_pmf:
+  assumes "finite A"
+  shows   "nn_integral (Pi_pmf A dflt p) (\<lambda>y. \<Prod>x\<in>A. f x (y x)) = (\<Prod>x\<in>A. nn_integral (p x) (f x))"
+  using assms
+proof (induction rule: finite_induct)
+  case (insert x A)
+  have "nn_integral (Pi_pmf (insert x A) dflt p) (\<lambda>y. \<Prod>z\<in>insert x A. f z (y z)) =
+          (\<integral>\<^sup>+a. \<integral>\<^sup>+b. f x a * (\<Prod>z\<in>A. f z (if z = x then a else b z)) \<partial>Pi_pmf A dflt p \<partial>p x)"
+    using insert by (auto simp: Pi_pmf_insert case_prod_unfold nn_integral_pair_pmf' cong: if_cong)
+  also have "(\<lambda>a b. \<Prod>z\<in>A. f z (if z = x then a else b z)) = (\<lambda>a b. \<Prod>z\<in>A. f z (b z))"
+    by (intro ext prod.cong) (use insert.hyps in auto)
+  also have "(\<integral>\<^sup>+a. \<integral>\<^sup>+b. f x a * (\<Prod>z\<in>A. f z (b z)) \<partial>Pi_pmf A dflt p \<partial>p x) =
+             (\<integral>\<^sup>+y. f x y \<partial>(p x)) * (\<integral>\<^sup>+y. (\<Prod>z\<in>A. f z (y z)) \<partial>(Pi_pmf A dflt p))"
+    by (simp add: nn_integral_multc nn_integral_cmult)
+  also have "(\<integral>\<^sup>+y. (\<Prod>z\<in>A. f z (y z)) \<partial>(Pi_pmf A dflt p)) = (\<Prod>x\<in>A. nn_integral (p x) (f x))"
+    by (rule insert.IH)
+  also have "(\<integral>\<^sup>+y. f x y \<partial>(p x)) * \<dots> = (\<Prod>x\<in>insert x A. nn_integral (p x) (f x))"
+    using insert.hyps by simp
+  finally show ?case .
+qed auto
+
+lemma integrable_prod_Pi_pmf:
+  fixes f :: "'a \<Rightarrow> 'b \<Rightarrow> 'c :: {real_normed_field, second_countable_topology, banach}"
+  assumes "finite A" and "\<And>x. x \<in> A \<Longrightarrow> integrable (measure_pmf (p x)) (f x)"
+  shows   "integrable (measure_pmf (Pi_pmf A dflt p)) (\<lambda>h. \<Prod>x\<in>A. f x (h x))"
+proof (intro integrableI_bounded)
+  have "(\<integral>\<^sup>+ x. ennreal (norm (\<Prod>xa\<in>A. f xa (x xa))) \<partial>measure_pmf (Pi_pmf A dflt p)) =
+        (\<integral>\<^sup>+ x. (\<Prod>y\<in>A. ennreal (norm (f y (x y)))) \<partial>measure_pmf (Pi_pmf A dflt p))"
+    by (simp flip: prod_norm prod_ennreal)
+  also have "\<dots> = (\<Prod>x\<in>A. \<integral>\<^sup>+ a. ennreal (norm (f x a)) \<partial>measure_pmf (p x))"
+    by (intro nn_integral_prod_Pi_pmf) fact
+  also have "(\<integral>\<^sup>+a. ennreal (norm (f i a)) \<partial>measure_pmf (p i)) \<noteq> top" if i: "i \<in> A" for i
+    using assms(2)[OF i] by (simp add: integrable_iff_bounded)
+  hence "(\<Prod>x\<in>A. \<integral>\<^sup>+ a. ennreal (norm (f x a)) \<partial>measure_pmf (p x)) \<noteq> top"
+    by (subst ennreal_prod_eq_top) auto
+  finally show "(\<integral>\<^sup>+ x. ennreal (norm (\<Prod>xa\<in>A. f xa (x xa))) \<partial>measure_pmf (Pi_pmf A dflt p)) < \<infinity>"
+    by (simp add: top.not_eq_extremum)
+qed auto
+
+lemma expectation_prod_Pi_pmf:
+  fixes f :: "_ \<Rightarrow> _ \<Rightarrow> real"
+  assumes "finite A"
+  assumes "\<And>x. x \<in> A \<Longrightarrow> integrable (measure_pmf (p x)) (f x)"
+  assumes "\<And>x y. x \<in> A \<Longrightarrow> y \<in> set_pmf (p x) \<Longrightarrow> f x y \<ge> 0"
+  shows   "measure_pmf.expectation (Pi_pmf A dflt p) (\<lambda>y. \<Prod>x\<in>A. f x (y x)) =
+             (\<Prod>x\<in>A. measure_pmf.expectation (p x) (\<lambda>v. f x v))"
+proof -
+  have nonneg: "measure_pmf.expectation (p x) (f x) \<ge> 0" if "x \<in> A" for x
+    using that by (intro Bochner_Integration.integral_nonneg_AE AE_pmfI assms)
+  have nonneg': "0 \<le> measure_pmf.expectation (Pi_pmf A dflt p) (\<lambda>y. \<Prod>x\<in>A. f x (y x))"
+    by (intro Bochner_Integration.integral_nonneg_AE AE_pmfI assms prod_nonneg)
+       (use assms in \<open>auto simp: set_Pi_pmf PiE_dflt_def\<close>)
+
+  have "ennreal (measure_pmf.expectation (Pi_pmf A dflt p) (\<lambda>y. \<Prod>x\<in>A. f x (y x))) =
+          nn_integral (Pi_pmf A dflt p) (\<lambda>y. ennreal (\<Prod>x\<in>A. f x (y x)))" using assms
+    by (intro nn_integral_eq_integral [symmetric] assms integrable_prod_Pi_pmf)
+       (auto simp: AE_measure_pmf_iff set_Pi_pmf PiE_dflt_def prod_nonneg)
+  also have "\<dots> = nn_integral (Pi_pmf A dflt p) (\<lambda>y. (\<Prod>x\<in>A. ennreal (f x (y x))))"
+    by (intro nn_integral_cong_AE AE_pmfI prod_ennreal [symmetric])
+       (use assms(1) in \<open>auto simp: set_Pi_pmf PiE_dflt_def intro!: assms(3)\<close>)
+  also have "\<dots> = (\<Prod>x\<in>A. \<integral>\<^sup>+ a. ennreal (f x a) \<partial>measure_pmf (p x))"
+    by (rule nn_integral_prod_Pi_pmf) fact+
+  also have "\<dots> = (\<Prod>x\<in>A. ennreal (measure_pmf.expectation (p x) (f x)))"
+    by (intro prod.cong nn_integral_eq_integral assms AE_pmfI) auto
+  also have "\<dots> = ennreal (\<Prod>x\<in>A. measure_pmf.expectation (p x) (f x))"
+    by (intro prod_ennreal nonneg)
+  finally show ?thesis
+    using nonneg nonneg' by (subst (asm) ennreal_inj) (auto intro!: prod_nonneg)
+qed
+
+lemma indep_vars_Pi_pmf:
+  assumes fin: "finite I"
+  shows   "prob_space.indep_vars (measure_pmf (Pi_pmf I dflt p))
+             (\<lambda>_. count_space UNIV) (\<lambda>x f. f x) I"
+proof (cases "I = {}")
+  case True
+  show ?thesis 
+    by (subst prob_space.indep_vars_def [OF measure_pmf.prob_space_axioms],
+        subst prob_space.indep_sets_def [OF measure_pmf.prob_space_axioms]) (simp_all add: True)
+next
+  case [simp]: False
+  show ?thesis
+  proof (subst prob_space.indep_vars_iff_distr_eq_PiM')
+    show "distr (measure_pmf (Pi_pmf I dflt p)) (Pi\<^sub>M I (\<lambda>i. count_space UNIV)) (\<lambda>x. restrict x I) =
+          Pi\<^sub>M I (\<lambda>i. distr (measure_pmf (Pi_pmf I dflt p)) (count_space UNIV) (\<lambda>f. f i))"
+    proof (rule product_sigma_finite.PiM_eqI, goal_cases)
+      case 1
+      interpret product_prob_space "\<lambda>i. distr (measure_pmf (Pi_pmf I dflt p)) (count_space UNIV) (\<lambda>f. f i)"
+        by (intro product_prob_spaceI prob_space.prob_space_distr measure_pmf.prob_space_axioms)
+           simp_all
+      show ?case by unfold_locales
+    next
+      case 3
+      have "sets (Pi\<^sub>M I (\<lambda>i. distr (measure_pmf (Pi_pmf I dflt p)) (count_space UNIV) (\<lambda>f. f i))) =
+            sets (Pi\<^sub>M I (\<lambda>_. count_space UNIV))"
+        by (intro sets_PiM_cong) simp_all
+      thus ?case by simp
+    next
+      case (4 A)
+      have "Pi\<^sub>E I A \<in> sets (Pi\<^sub>M I (\<lambda>i. count_space UNIV))"
+        using 4 by (intro sets_PiM_I_finite fin) auto
+      hence "emeasure (distr (measure_pmf (Pi_pmf I dflt p)) (Pi\<^sub>M I (\<lambda>i. count_space UNIV))
+              (\<lambda>x. restrict x I)) (Pi\<^sub>E I A) =
+             emeasure (measure_pmf (Pi_pmf I dflt p)) ((\<lambda>x. restrict x I) -` Pi\<^sub>E I A)"
+        using 4 by (subst emeasure_distr) (auto simp: space_PiM)
+      also have "\<dots> = emeasure (measure_pmf (Pi_pmf I dflt p)) (PiE_dflt I dflt A)"
+        by (intro emeasure_eq_AE AE_pmfI) (auto simp: PiE_dflt_def set_Pi_pmf fin)
+      also have "\<dots> = (\<Prod>i\<in>I. emeasure (measure_pmf (p i)) (A i))"
+        by (simp add: measure_pmf.emeasure_eq_measure measure_Pi_pmf_PiE_dflt fin prod_ennreal)
+      also have "\<dots> = (\<Prod>i\<in>I. emeasure (measure_pmf (map_pmf (\<lambda>f. f i) (Pi_pmf I dflt p))) (A i))"
+        by (intro prod.cong refl, subst Pi_pmf_component) (auto simp: fin)
+      finally show ?case
+        by (simp add: map_pmf_rep_eq)
+    qed fact+
+  qed (simp_all add: measure_pmf.prob_space_axioms)
+qed
+
+lemma
+  fixes h :: "'a :: comm_monoid_add \<Rightarrow> 'b::{banach, second_countable_topology}"
+  assumes fin: "finite I"
+  assumes integrable: "\<And>i. i \<in> I \<Longrightarrow> integrable (measure_pmf (D i)) h"
+  shows   integrable_sum_Pi_pmf: "integrable (Pi_pmf I dflt D) (\<lambda>g. \<Sum>i\<in>I. h (g i))"
+    and   expectation_sum_Pi_pmf:
+            "measure_pmf.expectation (Pi_pmf I dflt D) (\<lambda>g. \<Sum>i\<in>I. h (g i)) =
+             (\<Sum>i\<in>I. measure_pmf.expectation (D i) h)"
+proof -
+  have integrable': "integrable (Pi_pmf I dflt D) (\<lambda>g. h (g i))" if i: "i \<in> I" for i
+  proof -
+    have "integrable (D i) h"
+      using i by (rule assms)
+    also have "D i = map_pmf (\<lambda>g. g i) (Pi_pmf I dflt D)"
+      by (subst Pi_pmf_component) (use fin i in auto)
+    finally show "integrable (measure_pmf (Pi_pmf I dflt D)) (\<lambda>x. h (x i))"
+      by simp
+  qed
+  thus "integrable (Pi_pmf I dflt D) (\<lambda>g. \<Sum>i\<in>I. h (g i))"
+    by (intro Bochner_Integration.integrable_sum)
+
+  have "measure_pmf.expectation (Pi_pmf I dflt D) (\<lambda>x. \<Sum>i\<in>I. h (x i)) =
+               (\<Sum>i\<in>I. measure_pmf.expectation (map_pmf (\<lambda>x. x i) (Pi_pmf I dflt D)) h)"
+    using integrable' by (subst Bochner_Integration.integral_sum) auto
+  also have "\<dots> = (\<Sum>i\<in>I. measure_pmf.expectation (D i) h)"
+    by (intro sum.cong refl, subst Pi_pmf_component) (use fin in auto)
+  finally show "measure_pmf.expectation (Pi_pmf I dflt D) (\<lambda>g. \<Sum>i\<in>I. h (g i)) =
+                  (\<Sum>i\<in>I. measure_pmf.expectation (D i) h)" .
+qed
+
+
+subsection \<open>Applications\<close>
+
+text \<open>
+  Choosing a subset of a set uniformly at random is equivalent to tossing a fair coin
+  independently for each element and collecting all the elements that came up heads.
+\<close>
+lemma pmf_of_set_Pow_conv_bernoulli:
+  assumes "finite (A :: 'a set)"
+  shows "map_pmf (\<lambda>b. {x\<in>A. b x}) (Pi_pmf A P (\<lambda>_. bernoulli_pmf (1/2))) = pmf_of_set (Pow A)"
+proof -
+  have "Pi_pmf A P (\<lambda>_. bernoulli_pmf (1/2)) = pmf_of_set (PiE_dflt A P (\<lambda>x. UNIV))"
+    using assms by (simp add: bernoulli_pmf_half_conv_pmf_of_set Pi_pmf_of_set)
+  also have "map_pmf (\<lambda>b. {x\<in>A. b x}) \<dots> = pmf_of_set (Pow A)"
+  proof -
+    have "bij_betw (\<lambda>b. {x \<in> A. b x}) (PiE_dflt A P (\<lambda>_. UNIV)) (Pow A)"
+      by (rule bij_betwI[of _ _ _ "\<lambda>B b. if b \<in> A then b \<in> B else P"]) (auto simp add: PiE_dflt_def)
+    then show ?thesis
+      using assms by (intro map_pmf_of_set_bij_betw) auto
+  qed
+  finally show ?thesis
+    by simp
+qed
+
+text \<open>
+  A binomial distribution can be seen as the number of successes in \<open>n\<close> independent coin tosses.
+\<close>
+lemma binomial_pmf_altdef':
+  fixes A :: "'a set"
+  assumes "finite A" and "card A = n" and p: "p \<in> {0..1}"
+  shows   "binomial_pmf n p =
+             map_pmf (\<lambda>f. card {x\<in>A. f x}) (Pi_pmf A dflt (\<lambda>_. bernoulli_pmf p))" (is "?lhs = ?rhs")
+proof -
+  from assms have "?lhs = binomial_pmf (card A) p"
+    by simp
+  also have "\<dots> = ?rhs"
+  using assms(1)
+  proof (induction rule: finite_induct)
+    case empty
+    with p show ?case by (simp add: binomial_pmf_0)
+  next
+    case (insert x A)
+    from insert.hyps have "card (insert x A) = Suc (card A)"
+      by simp
+    also have "binomial_pmf \<dots> p = do {
+                                     b \<leftarrow> bernoulli_pmf p;
+                                     f \<leftarrow> Pi_pmf A dflt (\<lambda>_. bernoulli_pmf p);
+                                     return_pmf ((if b then 1 else 0) + card {y \<in> A. f y})
+                                   }"
+      using p by (simp add: binomial_pmf_Suc insert.IH bind_map_pmf)
+    also have "\<dots> = do {
+                      b \<leftarrow> bernoulli_pmf p;
+                      f \<leftarrow> Pi_pmf A dflt (\<lambda>_. bernoulli_pmf p);
+                      return_pmf (card {y \<in> insert x A. (f(x := b)) y})
+                    }"
+    proof (intro bind_pmf_cong refl, goal_cases)
+      case (1 b f)
+      have "(if b then 1 else 0) + card {y\<in>A. f y} = card ((if b then {x} else {}) \<union> {y\<in>A. f y})"
+        using insert.hyps by auto
+      also have "(if b then {x} else {}) \<union> {y\<in>A. f y} = {y\<in>insert x A. (f(x := b)) y}"
+        using insert.hyps by auto
+      finally show ?case by simp
+    qed
+    also have "\<dots> = map_pmf (\<lambda>f. card {y\<in>insert x A. f y})
+                      (Pi_pmf (insert x A) dflt (\<lambda>_. bernoulli_pmf p))"
+      using insert.hyps by (subst Pi_pmf_insert) (simp_all add: pair_pmf_def map_bind_pmf)
+    finally show ?case .
+  qed
+  finally show ?thesis .
+qed
+
+end
--- a/src/HOL/Set_Interval.thy	Sat Feb 20 13:49:24 2021 +0100
+++ b/src/HOL/Set_Interval.thy	Sat Feb 20 17:04:26 2021 +0100
@@ -1545,6 +1545,18 @@
   finally show ?thesis.
 qed
 
+lemma card_le_Suc_Max: "finite S \<Longrightarrow> card S \<le> Suc (Max S)"
+proof (rule classical)
+  assume "finite S" and "\<not> Suc (Max S) \<ge> card S"
+  then have "Suc (Max S) < card S"
+    by simp
+  with `finite S` have "S \<subseteq> {0..Max S}"
+    by auto
+  hence "card S \<le> card {0..Max S}"
+    by (intro card_mono; auto)
+  thus "card S \<le> Suc (Max S)"
+    by simp
+qed
 
 subsection \<open>Lemmas useful with the summation operator sum\<close>
 
@@ -2057,6 +2069,30 @@
 
 end
 
+lemma card_sum_le_nat_sum: "\<Sum> {0..<card S} \<le> \<Sum> S"
+proof (cases "finite S")
+  case True
+  then show ?thesis
+  proof (induction "card S" arbitrary: S)
+    case (Suc x)
+    then have "Max S \<ge> x" using card_le_Suc_Max by fastforce
+    let ?S' = "S - {Max S}"
+    from Suc have "Max S \<in> S" by (auto intro: Max_in)
+    hence cards: "card S = Suc (card ?S')"
+      using `finite S` by (intro card.remove; auto)
+    hence "\<Sum> {0..<card ?S'} \<le> \<Sum> ?S'"
+      using Suc by (intro Suc; auto)
+
+    hence "\<Sum> {0..<card ?S'} + x \<le> \<Sum> ?S' + Max S"
+      using `Max S \<ge> x` by simp
+    also have "... = \<Sum> S"
+      using sum.remove[OF `finite S` `Max S \<in> S`, where g="\<lambda>x. x"]
+      by simp
+    finally show ?case
+      using cards Suc by auto
+  qed simp
+qed simp
+
 lemma sum_natinterval_diff:
   fixes f:: "nat \<Rightarrow> ('a::ab_group_add)"
   shows  "sum (\<lambda>k. f k - f(k + 1)) {(m::nat) .. n} =
--- a/src/Pure/Admin/build_history.scala	Sat Feb 20 13:49:24 2021 +0100
+++ b/src/Pure/Admin/build_history.scala	Sat Feb 20 17:04:26 2021 +0100
@@ -107,6 +107,7 @@
     afp_partition: Int = 0,
     isabelle_identifier: String = default_isabelle_identifier,
     ml_statistics_step: Int = 1,
+    component_repository: String = Components.default_component_repository,
     components_base: Path = Components.default_components_base,
     fresh: Boolean = false,
     hostname: String = "",
@@ -161,8 +162,12 @@
     val (afp_build_args, afp_sessions) =
       if (afp_rev.isEmpty) (Nil, Nil)
       else {
-        val afp = AFP.init(options, afp_repos)
-        (List("-d", "~~/AFP/thys"), afp.partition(afp_partition))
+        val (opt, sessions) =
+          try {
+            val afp = AFP.init(options, afp_repos)
+            ("-d", afp.partition(afp_partition))
+          } catch { case ERROR(_) => ("-D", Nil) }
+        (List(opt, "~~/AFP/thys"), sessions)
       }
 
 
@@ -183,9 +188,11 @@
 
       val component_settings =
         other_isabelle.init_components(
-          components_base = components_base, catalogs = List("main", "optional"))
+          component_repository = component_repository,
+          components_base = components_base,
+          catalogs = List("main", "optional"))
       other_isabelle.init_settings(component_settings ::: init_settings)
-      other_isabelle.resolve_components(verbose)
+      other_isabelle.resolve_components(echo = verbose)
       val ml_platform =
         augment_settings(other_isabelle, threads, arch_64, heap, max_heap, more_settings)
 
@@ -198,7 +205,7 @@
       val isabelle_base_log = isabelle_output + Path.explode("../base_log")
 
       if (first_build) {
-        other_isabelle.resolve_components(verbose)
+        other_isabelle.resolve_components(echo = verbose)
 
         if (fresh)
           Isabelle_System.rm_tree(other_isabelle.isabelle_home + Path.explode("lib/classes"))
@@ -403,6 +410,7 @@
       var multicore_list = List(default_multicore)
       var isabelle_identifier = default_isabelle_identifier
       var afp_partition = 0
+      var component_repository = Components.default_component_repository
       var more_settings: List[String] = Nil
       var more_preferences: List[String] = Nil
       var fresh = false
@@ -430,6 +438,8 @@
     -M MULTICORE multicore configurations (see below)
     -N NAME      alternative ISABELLE_IDENTIFIER (default: """ + default_isabelle_identifier + """)
     -P NUMBER    AFP partition number (0, 1, 2, default: 0=unrestricted)
+    -R URL       remote repository for Isabelle components (default: """ +
+      Components.default_component_repository + """)
     -U SIZE      maximal ML heap in MB (default: unbounded)
     -e TEXT      additional text for generated etc/settings
     -f           fresh build of Isabelle/Scala components (recommended)
@@ -458,6 +468,7 @@
         "M:" -> (arg => multicore_list = space_explode(',', arg).map(Multicore.parse)),
         "N:" -> (arg => isabelle_identifier = arg),
         "P:" -> (arg => afp_partition = Value.Int.parse(arg)),
+        "R:" -> (arg => component_repository = arg),
         "U:" -> (arg => max_heap = Some(Value.Int.parse(arg))),
         "e:" -> (arg => more_settings = more_settings ::: List(arg)),
         "f" -> (_ => fresh = true),
@@ -491,8 +502,9 @@
         build_history(Options.init(), root, user_home = user_home, progress = progress, rev = rev,
           afp_rev = afp_rev, afp_partition = afp_partition,
           isabelle_identifier = isabelle_identifier, ml_statistics_step = ml_statistics_step,
-          components_base = components_base, fresh = fresh, hostname = hostname,
-          multicore_base = multicore_base, multicore_list = multicore_list, arch_64 = arch_64,
+          component_repository = component_repository, components_base = components_base,
+          fresh = fresh, hostname = hostname, multicore_base = multicore_base,
+          multicore_list = multicore_list, arch_64 = arch_64,
           heap = heap.getOrElse(if (arch_64) default_heap * 2 else default_heap),
           max_heap = max_heap, init_settings = init_settings, more_settings = more_settings,
           more_preferences = more_preferences, verbose = verbose, build_tags = build_tags,
@@ -570,7 +582,7 @@
       else {
         val afp_repos = isabelle_repos_other + Path.explode("AFP")
         Mercurial.setup_repository(afp_repos_source, afp_repos, ssh = ssh)
-          " -A " + Bash.string(afp_rev.get)
+        " -A " + Bash.string(afp_rev.get)
       }
 
 
@@ -580,11 +592,19 @@
     {
       val output_file = tmp_dir + Path.explode("output")
 
-      execute("Admin/build_history",
-        "-o " + ssh.bash_path(output_file) +
-          (if (rev == "") "" else " -r " + Bash.string(rev_id)) + " " +
-          options + afp_options + " " + ssh.bash_path(isabelle_repos_other) + " " + args,
-        echo = true, strict = false)
+      val rev_options = if (rev == "") "" else " -r " + Bash.string(rev_id)
+
+      try {
+        execute("Admin/build_history",
+          "-o " + ssh.bash_path(output_file) + rev_options + afp_options + " " + options + " " +
+            ssh.bash_path(isabelle_repos_other) + " " + args,
+          echo = true, strict = false)
+      }
+      catch {
+        case ERROR(msg) =>
+          cat_error(msg,
+            "The error(s) above occurred for build_bistory " + rev_options + afp_options)
+      }
 
       for (line <- split_lines(ssh.read(output_file)))
       yield {
--- a/src/Pure/Admin/components.scala	Sat Feb 20 13:49:24 2021 +0100
+++ b/src/Pure/Admin/components.scala	Sat Feb 20 17:04:26 2021 +0100
@@ -38,6 +38,9 @@
 
   /* component collections */
 
+  def default_component_repository: String =
+    Isabelle_System.getenv("ISABELLE_COMPONENT_REPOSITORY")
+
   val default_components_base: Path = Path.explode("$ISABELLE_COMPONENTS_BASE")
 
   def admin(dir: Path): Path = dir + Path.explode("Admin/components")
@@ -63,7 +66,7 @@
       val archive_name = Archive(name)
       val archive = base_dir + Path.explode(archive_name)
       if (!archive.is_file) {
-        val remote = Isabelle_System.getenv("ISABELLE_COMPONENT_REPOSITORY") + "/" + archive_name
+        val remote = Components.default_component_repository + "/" + archive_name
         progress.echo("Getting " + remote)
         Bytes.write(archive, Url.read_bytes(Url(remote)))
       }
--- a/src/Pure/Admin/isabelle_cronjob.scala	Sat Feb 20 13:49:24 2021 +0100
+++ b/src/Pure/Admin/isabelle_cronjob.scala	Sat Feb 20 17:04:26 2021 +0100
@@ -92,6 +92,10 @@
 
   /* integrity test of build_history vs. build_history_base */
 
+  def build_history_options0: String =
+    " -R " + Bash.string(Components.default_component_repository) +
+    " -C '$USER_HOME/.isabelle/contrib' -f "
+
   val build_history_base: Logger_Task =
     Logger_Task("build_history_base", logger =>
       {
@@ -104,7 +108,7 @@
                 isabelle_identifier = "cronjob_build_history",
                 self_update = true,
                 rev = "build_history_base",
-                options = "-f",
+                options = build_history_options0,
                 args = "HOL")
 
             for ((log_name, bytes) <- results) {
@@ -222,6 +226,14 @@
         pick_days(2000, 1)
       })
     }
+
+    def build_history_options: String =
+      " -h " + Bash.string(host) + " " +
+      (java_heap match {
+        case "" => ""
+        case h =>
+          "-e 'ISABELLE_TOOL_JAVA_OPTIONS=\"$ISABELLE_TOOL_JAVA_OPTIONS -Xmx" + h + "\"' "
+      }) + options
   }
 
   val remote_builds_old: List[Remote_Build] =
@@ -326,6 +338,15 @@
         options = "-m32 -B -M1,2,4 -e ISABELLE_GHC_SETUP=true -p pide_session=false",
         self_update = true, args = "-a -d '~~/src/Benchmarks'")),
       List(
+        Remote_Build("macOS 11.1 Big Sur", "mini1",
+          options = "-m32 -B -M1x2,2,4 -p pide_session=false" +
+            " -e ISABELLE_OCAML=ocaml -e ISABELLE_OCAMLC=ocamlc -e ISABELLE_OCAML_SETUP=true" +
+            " -e ISABELLE_GHC_SETUP=true" +
+            " -e ISABELLE_MLTON=/usr/local/bin/mlton" +
+            " -e ISABELLE_SMLNJ=/usr/local/smlnj/bin/sml" +
+            " -e ISABELLE_SWIPL=/usr/local/bin/swipl",
+          self_update = true, args = "-a -d '~~/src/Benchmarks'")),
+      List(
         Remote_Build("macOS 10.14 Mojave", "mini2",
           options = "-m32 -B -M1x2,2,4 -p pide_session=false" +
             " -e ISABELLE_OCAML=ocaml -e ISABELLE_OCAMLC=ocamlc -e ISABELLE_OCAML_SETUP=true" +
@@ -408,12 +429,7 @@
                 afp_rev = afp_rev,
                 options =
                   " -N " + Bash.string(task_name) + (if (i < 0) "" else "_" + (i + 1).toString) +
-                  " -f -h " + Bash.string(r.host) + " " +
-                  (r.java_heap match {
-                    case "" => ""
-                    case h =>
-                      "-e 'ISABELLE_TOOL_JAVA_OPTIONS=\"$ISABELLE_TOOL_JAVA_OPTIONS -Xmx" + h + "\"' "
-                  }) + r.options,
+                  build_history_options0 + r.build_history_options,
                 args = "-o timeout=10800 " + r.args)
 
             for ((log_name, bytes) <- results) {
--- a/src/Pure/Admin/other_isabelle.scala	Sat Feb 20 13:49:24 2021 +0100
+++ b/src/Pure/Admin/other_isabelle.scala	Sat Feb 20 17:04:26 2021 +0100
@@ -50,7 +50,9 @@
     bash("bin/isabelle " + cmdline, redirect = redirect, echo = echo, strict = strict)
 
   def resolve_components(echo: Boolean): Unit =
-    other_isabelle("components -a", redirect = true, echo = echo).check
+    other_isabelle(
+      "env ISABELLE_TOOLS=" + Bash.string(Isabelle_System.getenv("ISABELLE_TOOLS")) +
+      " isabelle components -a", redirect = true, echo = echo).check
 
   def getenv(name: String): String =
     other_isabelle("getenv -b " + Bash.string(name)).check.out
@@ -69,11 +71,14 @@
   /* components */
 
   def init_components(
+    component_repository: String = Components.default_component_repository,
     components_base: Path = Components.default_components_base,
     catalogs: List[String] = Nil,
     components: List[String] = Nil): List[String] =
   {
     val dir = Components.admin(isabelle_home)
+
+    ("ISABELLE_COMPONENT_REPOSITORY=" + Bash.string(component_repository)) ::
     catalogs.map(name =>
       "init_components " + File.bash_path(components_base) + " " +
         File.bash_path(dir + Path.basic(name))) :::