HOL-Analysis/Probability: Hoeffding's inequality, negative binomial distribution, etc.
authorManuel Eberl <eberlm@in.tum.de>
Fri, 19 Feb 2021 13:42:12 +0100
changeset 73253 f6bb31879698
parent 73252 b4552595b04e
child 73257 446085672b74
HOL-Analysis/Probability: Hoeffding's inequality, negative binomial distribution, etc.
CONTRIBUTORS
NEWS
src/HOL/Analysis/Bochner_Integration.thy
src/HOL/Analysis/Borel_Space.thy
src/HOL/Analysis/Nonnegative_Lebesgue_Integration.thy
src/HOL/Analysis/Set_Integral.thy
src/HOL/Library/Extended_Nonnegative_Real.thy
src/HOL/Probability/Central_Limit_Theorem.thy
src/HOL/Probability/Conditional_Expectation.thy
src/HOL/Probability/Giry_Monad.thy
src/HOL/Probability/Hoeffding.thy
src/HOL/Probability/Independent_Family.thy
src/HOL/Probability/Infinite_Product_Measure.thy
src/HOL/Probability/Probability.thy
src/HOL/Probability/Probability_Mass_Function.thy
src/HOL/Probability/Probability_Measure.thy
src/HOL/Probability/Product_PMF.thy
--- a/CONTRIBUTORS	Tue Feb 16 17:12:02 2021 +0000
+++ b/CONTRIBUTORS	Fri Feb 19 13:42:12 2021 +0100
@@ -6,6 +6,10 @@
 Contributions to this Isabelle version
 --------------------------------------
 
+* February 2021: Manuel Eberl
+  New material in HOL-Analysis/HOL-Probability, most notably Hoeffding's
+  inequality and the negative binomial distribution
+
 * January 2021: Jakub Kądziołka
   Some lemmas for HOL-Computational_Algebra.
 
--- a/NEWS	Tue Feb 16 17:12:02 2021 +0000
+++ b/NEWS	Fri Feb 19 13:42:12 2021 +0100
@@ -4,6 +4,18 @@
 (Note: Isabelle/jEdit shows a tree-view of the NEWS file in Sidekick.)
 
 
+New in this Isabelle version
+----------------------------
+
+*** HOL ***
+
+* HOL-Analysis/HOL-Probability: indexed products of discrete
+distributions, negative binomial distribution, Hoeffding's inequality,
+Chernoff bounds, Cauchy–Schwarz inequality for nn_integral, and some
+more small lemmas. Some theorems that were stated awkwardly before were
+corrected. Minor INCOMPATIBILITY.
+
+
 New in Isabelle2021 (February 2021)
 -----------------------------------
 
--- a/src/HOL/Analysis/Bochner_Integration.thy	Tue Feb 16 17:12:02 2021 +0000
+++ b/src/HOL/Analysis/Bochner_Integration.thy	Fri Feb 19 13:42:12 2021 +0100
@@ -1315,6 +1315,27 @@
   using integrableI_bounded[of f M] has_bochner_integral_implies_finite_norm[of M f]
   unfolding integrable.simps has_bochner_integral.simps[abs_def] by auto
 
+lemma (in finite_measure) square_integrable_imp_integrable:
+  fixes f :: "'a \<Rightarrow> 'b :: {second_countable_topology, banach, real_normed_div_algebra}"
+  assumes [measurable]: "f \<in> borel_measurable M"
+  assumes "integrable M (\<lambda>x. f x ^ 2)"
+  shows   "integrable M f"
+proof -
+  have less_top: "emeasure M (space M) < top"
+    using finite_emeasure_space by (meson top.not_eq_extremum)
+  have "nn_integral M (\<lambda>x. norm (f x)) ^ 2 \<le>
+        nn_integral M (\<lambda>x. norm (f x) ^ 2) * emeasure M (space M)"
+    using Cauchy_Schwarz_nn_integral[of "\<lambda>x. norm (f x)" M "\<lambda>_. 1"]
+    by (simp add: ennreal_power)
+  also have "\<dots> < \<infinity>"
+    using assms(2) less_top
+    by (subst (asm) integrable_iff_bounded) (auto simp: norm_power ennreal_mult_less_top)
+  finally have "nn_integral M (\<lambda>x. norm (f x)) < \<infinity>"
+    by (simp add: power_less_top_ennreal)
+  thus ?thesis
+    by (simp add: integrable_iff_bounded)
+qed
+
 lemma integrable_bound:
   fixes f :: "'a \<Rightarrow> 'b::{banach, second_countable_topology}"
     and g :: "'a \<Rightarrow> 'c::{banach, second_countable_topology}"
@@ -2065,16 +2086,50 @@
 
   have "{x \<in> space M. u x \<ge> c} = {x \<in> space M. ennreal(1/c) * u x \<ge> 1} \<inter> (space M)"
     using \<open>c>0\<close> by (auto simp: ennreal_mult'[symmetric])
-  then have "emeasure M {x \<in> space M. u x \<ge> c} = emeasure M ({x \<in> space M. ennreal(1/c) * u x \<ge> 1} \<inter> (space M))"
+  then have "emeasure M {x \<in> space M. u x \<ge> c} = emeasure M ({x \<in> space M. ennreal(1/c) * u x \<ge> 1})"
     by simp
   also have "... \<le> ennreal(1/c) * (\<integral>\<^sup>+ x. ennreal(u x) * indicator (space M) x \<partial>M)"
     by (rule nn_integral_Markov_inequality) (auto simp add: assms)
   also have "... \<le> ennreal(1/c) * (\<integral>x. u x \<partial>M)"
-    apply (rule mult_left_mono) using * \<open>c>0\<close> by auto
+    by (rule mult_left_mono) (use * \<open>c > 0\<close> in auto)
   finally show ?thesis
     using \<open>0<c\<close> by (simp add: ennreal_mult'[symmetric])
 qed
 
+theorem integral_Markov_inequality_measure:
+  assumes [measurable]: "integrable M u" and "A \<in> sets M" and "AE x in M. 0 \<le> u x" "0 < (c::real)"
+  shows "measure M {x\<in>space M. u x \<ge> c} \<le> (\<integral>x. u x \<partial>M) / c"
+proof -
+  have le: "emeasure M {x\<in>space M. u x \<ge> c} \<le> ennreal ((1/c) * (\<integral>x. u x \<partial>M))"
+    by (rule integral_Markov_inequality) (use assms in auto)
+  also have "\<dots> < top"
+    by auto
+  finally have "ennreal (measure M {x\<in>space M. u x \<ge> c}) = emeasure M {x\<in>space M. u x \<ge> c}"
+    by (intro emeasure_eq_ennreal_measure [symmetric]) auto
+  also note le
+  finally show ?thesis
+    by (subst (asm) ennreal_le_iff)
+       (auto intro!: divide_nonneg_pos Bochner_Integration.integral_nonneg_AE assms)
+qed
+
+theorem%important (in finite_measure) second_moment_method:
+  assumes [measurable]: "f \<in> M \<rightarrow>\<^sub>M borel"
+  assumes "integrable M (\<lambda>x. f x ^ 2)"
+  defines "\<mu> \<equiv> lebesgue_integral M f"
+  assumes "a > 0"
+  shows "measure M {x\<in>space M. \<bar>f x\<bar> \<ge> a} \<le> lebesgue_integral M (\<lambda>x. f x ^ 2) / a\<^sup>2"
+proof -
+  have integrable: "integrable M f"
+    using assms by (blast dest: square_integrable_imp_integrable)
+  have "{x\<in>space M. \<bar>f x\<bar> \<ge> a} = {x\<in>space M. f x ^ 2 \<ge> a\<^sup>2}"
+    using \<open>a > 0\<close> by (simp flip: abs_le_square_iff)
+  hence "measure M {x\<in>space M. \<bar>f x\<bar> \<ge> a} = measure M {x\<in>space M. f x ^ 2 \<ge> a\<^sup>2}"
+    by simp
+  also have "\<dots> \<le> lebesgue_integral M (\<lambda>x. f x ^ 2) / a\<^sup>2"
+    using assms by (intro integral_Markov_inequality_measure) auto
+  finally show ?thesis .
+qed
+
 lemma integral_ineq_eq_0_then_AE:
   fixes f::"_ \<Rightarrow> real"
   assumes "AE x in M. f x \<le> g x" "integrable M f" "integrable M g"
--- a/src/HOL/Analysis/Borel_Space.thy	Tue Feb 16 17:12:02 2021 +0000
+++ b/src/HOL/Analysis/Borel_Space.thy	Fri Feb 19 13:42:12 2021 +0100
@@ -1640,6 +1640,12 @@
   shows "f \<in> M \<rightarrow>\<^sub>M borel \<Longrightarrow> g \<in> M \<rightarrow>\<^sub>M borel \<Longrightarrow> (\<lambda>x. f x - g x) \<in> M \<rightarrow>\<^sub>M borel"
   unfolding is_borel_def[symmetric] by transfer simp
 
+lemma borel_measurable_power_ennreal [measurable (raw)]:
+  fixes f :: "_ \<Rightarrow> ennreal"
+  assumes f: "f \<in> borel_measurable M"
+  shows "(\<lambda>x. (f x) ^ n) \<in> borel_measurable M"
+  by (induction n) (use f in auto)
+
 lemma borel_measurable_prod_ennreal[measurable (raw)]:
   fixes f :: "'c \<Rightarrow> 'a \<Rightarrow> ennreal"
   assumes "\<And>i. i \<in> S \<Longrightarrow> f i \<in> borel_measurable M"
--- a/src/HOL/Analysis/Nonnegative_Lebesgue_Integration.thy	Tue Feb 16 17:12:02 2021 +0000
+++ b/src/HOL/Analysis/Nonnegative_Lebesgue_Integration.thy	Fri Feb 19 13:42:12 2021 +0100
@@ -1174,13 +1174,18 @@
 qed
 
 theorem nn_integral_Markov_inequality:
-  assumes u: "u \<in> borel_measurable M" and "A \<in> sets M"
-  shows "(emeasure M) ({x\<in>space M. 1 \<le> c * u x} \<inter> A) \<le> c * (\<integral>\<^sup>+ x. u x * indicator A x \<partial>M)"
+  assumes u: "(\<lambda>x. u x * indicator A x) \<in> borel_measurable M" and "A \<in> sets M"
+  shows "(emeasure M) ({x\<in>A. 1 \<le> c * u x}) \<le> c * (\<integral>\<^sup>+ x. u x * indicator A x \<partial>M)"
     (is "(emeasure M) ?A \<le> _ * ?PI")
 proof -
-  have "?A \<in> sets M"
-    using \<open>A \<in> sets M\<close> u by auto
-  hence "(emeasure M) ?A = (\<integral>\<^sup>+ x. indicator ?A x \<partial>M)"
+  define u' where "u' = (\<lambda>x. u x * indicator A x)"
+  have [measurable]: "u' \<in> borel_measurable M"
+    using u unfolding u'_def .
+  have "{x\<in>space M. c * u' x \<ge> 1} \<in> sets M"
+    by measurable
+  also have "{x\<in>space M. c * u' x \<ge> 1} = ?A"
+    using sets.sets_into_space[OF \<open>A \<in> sets M\<close>] by (auto simp: u'_def indicator_def)
+  finally have "(emeasure M) ?A = (\<integral>\<^sup>+ x. indicator ?A x \<partial>M)"
     using nn_integral_indicator by simp
   also have "\<dots> \<le> (\<integral>\<^sup>+ x. c * (u x * indicator A x) \<partial>M)"
     using u by (auto intro!: nn_integral_mono_AE simp: indicator_def)
@@ -1189,6 +1194,37 @@
   finally show ?thesis .
 qed
 
+lemma Chernoff_ineq_nn_integral_ge:
+  assumes s: "s > 0" and [measurable]: "A \<in> sets M"
+  assumes [measurable]: "(\<lambda>x. f x * indicator A x) \<in> borel_measurable M"
+  shows   "emeasure M {x\<in>A. f x \<ge> a} \<le>
+           ennreal (exp (-s * a)) * nn_integral M (\<lambda>x. ennreal (exp (s * f x)) * indicator A x)"
+proof -
+  define f' where "f' = (\<lambda>x. f x * indicator A x)"
+  have [measurable]: "f' \<in> borel_measurable M"
+    using assms(3) unfolding f'_def by assumption
+  have "(\<lambda>x. ennreal (exp (s * f' x)) * indicator A x) \<in> borel_measurable M"
+    by simp
+  also have "(\<lambda>x. ennreal (exp (s * f' x)) * indicator A x) =
+             (\<lambda>x. ennreal (exp (s * f x)) * indicator A x)"
+    by (auto simp: f'_def indicator_def fun_eq_iff)
+  finally have meas: "\<dots> \<in> borel_measurable M" .
+
+  have "{x\<in>A. f x \<ge> a} = {x\<in>A. ennreal (exp (-s * a)) * ennreal (exp (s * f x)) \<ge> 1}"
+    using s by (auto simp: exp_minus field_simps simp flip: ennreal_mult)
+  also have "emeasure M \<dots> \<le> ennreal (exp (-s * a)) *
+               (\<integral>\<^sup>+x. ennreal (exp (s * f x)) * indicator A x \<partial>M)"
+    by (intro order.trans[OF nn_integral_Markov_inequality] meas) auto
+  finally show ?thesis .
+qed
+
+lemma Chernoff_ineq_nn_integral_le:
+  assumes s: "s > 0" and [measurable]: "A \<in> sets M"
+  assumes [measurable]: "f \<in> borel_measurable M"
+  shows   "emeasure M {x\<in>A. f x \<le> a} \<le>
+           ennreal (exp (s * a)) * nn_integral M (\<lambda>x. ennreal (exp (-s * f x)) * indicator A x)"
+  using Chernoff_ineq_nn_integral_ge[of s A M "\<lambda>x. -f x" "-a"] assms by simp
+
 lemma nn_integral_noteq_infinite:
   assumes g: "g \<in> borel_measurable M" and "integral\<^sup>N M g \<noteq> \<infinity>"
   shows "AE x in M. g x \<noteq> \<infinity>"
@@ -1432,7 +1468,7 @@
 qed
 
 lemma nn_integral_0_iff:
-  assumes u: "u \<in> borel_measurable M"
+  assumes u [measurable]: "u \<in> borel_measurable M"
   shows "integral\<^sup>N M u = 0 \<longleftrightarrow> emeasure M {x\<in>space M. u x \<noteq> 0} = 0"
     (is "_ \<longleftrightarrow> (emeasure M) ?A = 0")
 proof -
@@ -1449,9 +1485,13 @@
     have "0 = (SUP n. (emeasure M) (?M n \<inter> ?A))"
     proof -
       { fix n :: nat
-        from nn_integral_Markov_inequality[OF u, of ?A "of_nat n"] u
-        have "(emeasure M) (?M n \<inter> ?A) \<le> 0"
-          by (simp add: ennreal_of_nat_eq_real_of_nat u_eq *)
+        have "emeasure M {x \<in> ?A. 1 \<le> of_nat n * u x} \<le>
+                of_nat n * \<integral>\<^sup>+ x. u x * indicator ?A x \<partial>M"
+          by (intro nn_integral_Markov_inequality) auto
+        also have "{x \<in> ?A. 1 \<le> of_nat n * u x} = (?M n \<inter> ?A)"
+          by (auto simp: ennreal_of_nat_eq_real_of_nat u_eq * )
+        finally have "emeasure M (?M n \<inter> ?A) \<le> 0"
+          by (simp add: ennreal_of_nat_eq_real_of_nat u_eq * )
         moreover have "0 \<le> (emeasure M) (?M n \<inter> ?A)" using u by auto
         ultimately have "(emeasure M) (?M n \<inter> ?A) = 0" by auto }
       thus ?thesis by simp
@@ -1617,6 +1657,93 @@
     by (subst step) auto
 qed (insert bound, auto simp add: le_fun_def INF_apply[abs_def] top_fun_def intro!: meas f g)
 
+
+text \<open>Cauchy--Schwarz inequality for \<^const>\<open>nn_integral\<close>\<close>
+
+lemma sum_of_squares_ge_ennreal:
+  fixes a b :: ennreal
+  shows "2 * a * b \<le> a\<^sup>2 + b\<^sup>2"
+proof (cases a; cases b)
+  fix x y
+  assume xy: "x \<ge> 0" "y \<ge> 0" and [simp]: "a = ennreal x" "b = ennreal y"
+  have "0 \<le> (x - y)\<^sup>2"
+    by simp
+  also have "\<dots> = x\<^sup>2 + y\<^sup>2 - 2 * x * y"
+    by (simp add: algebra_simps power2_eq_square)
+  finally have "2 * x * y \<le> x\<^sup>2 + y\<^sup>2"
+    by simp
+  hence "ennreal (2 * x * y) \<le> ennreal (x\<^sup>2 + y\<^sup>2)"
+    by (intro ennreal_leI)
+  thus ?thesis using xy
+    by (simp add: ennreal_mult ennreal_power)
+qed auto
+
+lemma Cauchy_Schwarz_nn_integral:
+  assumes [measurable]: "f \<in> borel_measurable M" "g \<in> borel_measurable M"
+  shows "(\<integral>\<^sup>+x. f x * g x \<partial>M)\<^sup>2 \<le> (\<integral>\<^sup>+x. f x ^ 2 \<partial>M) * (\<integral>\<^sup>+x. g x ^ 2 \<partial>M)"
+proof (cases "(\<integral>\<^sup>+x. f x * g x \<partial>M) = 0")
+  case False
+  define F where "F = nn_integral M (\<lambda>x. f x ^ 2)"
+  define G where "G = nn_integral M (\<lambda>x. g x ^ 2)"
+  from False have "\<not>(AE x in M. f x = 0 \<or> g x = 0)"
+    by (auto simp: nn_integral_0_iff_AE)
+  hence "\<not>(AE x in M. f x = 0)" and "\<not>(AE x in M. g x = 0)"
+    by (auto intro: AE_disjI1 AE_disjI2)
+  hence nz: "F \<noteq> 0" "G \<noteq> 0"
+    by (auto simp: nn_integral_0_iff_AE F_def G_def)
+
+  show ?thesis
+  proof (cases "F = \<infinity> \<or> G = \<infinity>")
+    case True
+    thus ?thesis using nz
+      by (auto simp: F_def G_def)
+  next
+    case False
+    define F' where "F' = ennreal (sqrt (enn2real F))"
+    define G' where "G' = ennreal (sqrt (enn2real G))"
+    from False have fin: "F < top" "G < top"
+      by (simp_all add: top.not_eq_extremum)
+    have F'_sqr: "F'\<^sup>2 = F"
+      using False by (cases F) (auto simp: F'_def ennreal_power)
+    have G'_sqr: "G'\<^sup>2 = G"
+      using False by (cases G) (auto simp: G'_def ennreal_power)
+    have nz': "F' \<noteq> 0" "G' \<noteq> 0" and fin': "F' \<noteq> \<infinity>" "G' \<noteq> \<infinity>"
+      using F'_sqr G'_sqr nz fin by auto
+    from fin' have fin'': "F' < top" "G' < top"
+      by (auto simp: top.not_eq_extremum)
+
+    have "2 * (F' / F') * (G' / G') * (\<integral>\<^sup>+x. f x * g x \<partial>M) =
+          F' * G' * (\<integral>\<^sup>+x. 2 * (f x / F') * (g x / G') \<partial>M)"
+      using nz' fin''
+      by (simp add: divide_ennreal_def algebra_simps ennreal_inverse_mult flip: nn_integral_cmult)
+    also have "F'/ F' = 1"
+      using nz' fin'' by simp
+    also have "G'/ G' = 1"
+      using nz' fin'' by simp
+    also have "2 * 1 * 1 = (2 :: ennreal)" by simp
+    also have "F' * G' * (\<integral>\<^sup>+ x. 2 * (f x / F') * (g x / G') \<partial>M) \<le>
+               F' * G' * (\<integral>\<^sup>+x. (f x / F')\<^sup>2 + (g x / G')\<^sup>2 \<partial>M)"
+      by (intro mult_left_mono nn_integral_mono sum_of_squares_ge_ennreal) auto
+    also have "\<dots> = F' * G' * (F / F'\<^sup>2 + G / G'\<^sup>2)" using nz
+      by (auto simp: nn_integral_add algebra_simps nn_integral_divide F_def G_def)
+    also have "F / F'\<^sup>2 = 1"
+      using nz F'_sqr fin by simp
+    also have "G / G'\<^sup>2 = 1"
+      using nz G'_sqr fin by simp
+    also have "F' * G' * (1 + 1) = 2 * (F' * G')"
+      by (simp add: mult_ac)
+    finally have "(\<integral>\<^sup>+x. f x * g x \<partial>M) \<le> F' * G'"
+      by (subst (asm) ennreal_mult_le_mult_iff) auto
+    hence "(\<integral>\<^sup>+x. f x * g x \<partial>M)\<^sup>2 \<le> (F' * G')\<^sup>2"
+      by (intro power_mono_ennreal)
+    also have "\<dots> = F * G"
+      by (simp add: algebra_simps F'_sqr G'_sqr)
+    finally show ?thesis
+      by (simp add: F_def G_def)
+  qed
+qed auto
+
+
 (* TODO: rename? *)
 subsection \<open>Integral under concrete measures\<close>
 
--- a/src/HOL/Analysis/Set_Integral.thy	Tue Feb 16 17:12:02 2021 +0000
+++ b/src/HOL/Analysis/Set_Integral.thy	Fri Feb 19 13:42:12 2021 +0100
@@ -1079,4 +1079,87 @@
   qed
 qed
 
+
+
+theorem integral_Markov_inequality':
+  fixes u :: "'a \<Rightarrow> real"
+  assumes [measurable]: "set_integrable M A u" and "A \<in> sets M"
+  assumes "AE x in M. x \<in> A \<longrightarrow> u x \<ge> 0" and "0 < (c::real)"
+  shows "emeasure M {x\<in>A. u x \<ge> c} \<le> (1/c::real) * (\<integral>x\<in>A. u x \<partial>M)"
+proof -
+  have "(\<lambda>x. u x * indicator A x) \<in> borel_measurable M" 
+    using assms by (auto simp: set_integrable_def mult_ac)
+  hence "(\<lambda>x. ennreal (u x * indicator A x)) \<in> borel_measurable M"
+    by measurable
+  also have "(\<lambda>x. ennreal (u x * indicator A x)) = (\<lambda>x. ennreal (u x) * indicator A x)"
+    by (intro ext) (auto simp: indicator_def)
+  finally have meas: "\<dots> \<in> borel_measurable M" .
+  from assms(3) have AE: "AE x in M. 0 \<le> u x * indicator A x"
+    by eventually_elim (auto simp: indicator_def)
+  have nonneg: "set_lebesgue_integral M A u \<ge> 0"
+    unfolding set_lebesgue_integral_def
+    by (intro Bochner_Integration.integral_nonneg_AE eventually_mono[OF AE]) (auto simp: mult_ac)
+
+  have A: "A \<subseteq> space M"
+    using \<open>A \<in> sets M\<close> by (simp add: sets.sets_into_space)
+  have "{x \<in> A. u x \<ge> c} = {x \<in> A. ennreal(1/c) * u x \<ge> 1}"
+    using \<open>c>0\<close> A by (auto simp: ennreal_mult'[symmetric])
+  then have "emeasure M {x \<in> A. u x \<ge> c} = emeasure M ({x \<in> A. ennreal(1/c) * u x \<ge> 1})"
+    by simp
+  also have "... \<le> ennreal(1/c) * (\<integral>\<^sup>+ x. ennreal(u x) * indicator A x \<partial>M)"
+    by (intro nn_integral_Markov_inequality meas assms)
+  also have "(\<integral>\<^sup>+ x. ennreal(u x) * indicator A x \<partial>M) = ennreal (set_lebesgue_integral M A u)"
+    unfolding set_lebesgue_integral_def nn_integral_set_ennreal using assms AE
+    by (subst nn_integral_eq_integral) (simp_all add: mult_ac set_integrable_def)
+  finally show ?thesis
+    using \<open>c > 0\<close> nonneg by (subst ennreal_mult) auto
+qed
+
+theorem integral_Markov_inequality'_measure:
+  assumes [measurable]: "set_integrable M A u" and "A \<in> sets M" 
+     and "AE x in M. x \<in> A \<longrightarrow> 0 \<le> u x" "0 < (c::real)"
+  shows "measure M {x\<in>A. u x \<ge> c} \<le> (\<integral>x\<in>A. u x \<partial>M) / c"
+proof -
+  have nonneg: "set_lebesgue_integral M A u \<ge> 0"
+    unfolding set_lebesgue_integral_def
+    by (intro Bochner_Integration.integral_nonneg_AE eventually_mono[OF assms(3)])
+       (auto simp: mult_ac)
+  have le: "emeasure M {x\<in>A. u x \<ge> c} \<le> ennreal ((1/c) * (\<integral>x\<in>A. u x \<partial>M))"
+    by (rule integral_Markov_inequality') (use assms in auto)
+  also have "\<dots> < top"
+    by auto
+  finally have "ennreal (measure M {x\<in>A. u x \<ge> c}) = emeasure M {x\<in>A. u x \<ge> c}"
+    by (intro emeasure_eq_ennreal_measure [symmetric]) auto
+  also note le
+  finally show ?thesis using nonneg
+    by (subst (asm) ennreal_le_iff)
+       (auto intro!: divide_nonneg_pos Bochner_Integration.integral_nonneg_AE assms)
+qed
+
+theorem%important (in finite_measure) Chernoff_ineq_ge:
+  assumes s: "s > 0"
+  assumes integrable: "set_integrable M A (\<lambda>x. exp (s * f x))" and "A \<in> sets M"
+  shows   "measure M {x\<in>A. f x \<ge> a} \<le> exp (-s * a) * (\<integral>x\<in>A. exp (s * f x) \<partial>M)"
+proof -
+  have "{x\<in>A. f x \<ge> a} = {x\<in>A. exp (s * f x) \<ge> exp (s * a)}"
+    using s by auto
+  also have "measure M \<dots> \<le> set_lebesgue_integral M A (\<lambda>x. exp (s * f x)) / exp (s * a)"
+    by (intro integral_Markov_inequality'_measure assms) auto
+  finally show ?thesis 
+    by (simp add: exp_minus field_simps)
+qed
+
+theorem%important (in finite_measure) Chernoff_ineq_le:
+  assumes s: "s > 0"
+  assumes integrable: "set_integrable M A (\<lambda>x. exp (-s * f x))" and "A \<in> sets M"
+  shows   "measure M {x\<in>A. f x \<le> a} \<le> exp (s * a) * (\<integral>x\<in>A. exp (-s * f x) \<partial>M)"
+proof -
+  have "{x\<in>A. f x \<le> a} = {x\<in>A. exp (-s * f x) \<ge> exp (-s * a)}"
+    using s by auto
+  also have "measure M \<dots> \<le> set_lebesgue_integral M A (\<lambda>x. exp (-s * f x)) / exp (-s * a)"
+    by (intro integral_Markov_inequality'_measure assms) auto
+  finally show ?thesis 
+    by (simp add: exp_minus field_simps)
+qed
+
 end
--- a/src/HOL/Library/Extended_Nonnegative_Real.thy	Tue Feb 16 17:12:02 2021 +0000
+++ b/src/HOL/Library/Extended_Nonnegative_Real.thy	Fri Feb 19 13:42:12 2021 +0100
@@ -555,6 +555,32 @@
 lemma ennreal_add_bot[simp]: "bot + x = (x::ennreal)"
   by transfer simp
 
+lemma add_top_right_ennreal [simp]: "x + top = (top :: ennreal)"
+  by (cases x) auto
+
+lemma add_top_left_ennreal [simp]: "top + x = (top :: ennreal)"
+  by (cases x) auto
+
+lemma ennreal_top_mult_left [simp]: "x \<noteq> 0 \<Longrightarrow> x * top = (top :: ennreal)"
+  by (subst ennreal_mult_eq_top_iff) auto
+
+lemma ennreal_top_mult_right [simp]: "x \<noteq> 0 \<Longrightarrow> top * x = (top :: ennreal)"
+  by (subst ennreal_mult_eq_top_iff) auto
+
+
+lemma power_top_ennreal [simp]: "n > 0 \<Longrightarrow> top ^ n = (top :: ennreal)"
+  by (induction n) auto
+
+lemma power_eq_top_ennreal_iff: "x ^ n = top \<longleftrightarrow> x = (top :: ennreal) \<and> n > 0"
+  by (induction n) (auto simp: ennreal_mult_eq_top_iff)
+
+lemma ennreal_mult_le_mult_iff: "c \<noteq> 0 \<Longrightarrow> c \<noteq> top \<Longrightarrow> c * a \<le> c * b \<longleftrightarrow> a \<le> (b :: ennreal)"
+  including ennreal.lifting
+  by (transfer, subst ereal_mult_le_mult_iff) (auto simp: top_ereal_def)
+
+lemma power_mono_ennreal: "x \<le> y \<Longrightarrow> x ^ n \<le> (y ^ n :: ennreal)"
+  by (induction n) (auto intro!: mult_mono)
+
 instance ennreal :: semiring_char_0
 proof (standard, safe intro!: linorder_injI)
   have *: "1 + of_nat k \<noteq> (0::ennreal)" for k
@@ -683,6 +709,9 @@
   unfolding divide_ennreal_def
   by transfer (auto simp: ereal_zero_less_0_iff top_ereal_def ereal_0_gt_inverse)
 
+lemma add_divide_distrib_ennreal: "(a + b) / c = a / c + b / (c :: ennreal)"
+  by (simp add: divide_ennreal_def ring_distribs)
+
 lemma divide_right_mono_ennreal:
   fixes a b c :: ennreal
   shows "a \<le> b \<Longrightarrow> a / c \<le> b / c"
@@ -976,6 +1005,10 @@
   qed
 qed
 
+lemma power_divide_distrib_ennreal [algebra_simps]:
+  "(x / y) ^ n = x ^ n / (y ^ n :: ennreal)"
+  by (simp add: divide_ennreal_def algebra_simps ennreal_inverse_power)
+
 lemma ennreal_divide_numeral: "0 \<le> x \<Longrightarrow> ennreal x / numeral b = ennreal (x / numeral b)"
   by (subst divide_ennreal[symmetric]) auto
 
@@ -983,6 +1016,11 @@
   by (induction A rule: infinite_finite_induct)
      (auto simp: ennreal_mult prod_nonneg)
 
+lemma prod_mono_ennreal:
+  assumes "\<And>x. x \<in> A \<Longrightarrow> f x \<le> (g x :: ennreal)"
+  shows   "prod f A \<le> prod g A"
+  using assms by (induction A rule: infinite_finite_induct) (auto intro!: mult_mono)
+
 lemma mult_right_ennreal_cancel: "a * ennreal c = b * ennreal c \<longleftrightarrow> (a = b \<or> c \<le> 0)"
 proof (cases "0 \<le> c")
   case True
--- a/src/HOL/Probability/Central_Limit_Theorem.thy	Tue Feb 16 17:12:02 2021 +0000
+++ b/src/HOL/Probability/Central_Limit_Theorem.thy	Fri Feb 19 13:42:12 2021 +0100
@@ -14,7 +14,6 @@
     and \<sigma> :: real
     and S :: "nat \<Rightarrow> 'a \<Rightarrow> real"
   assumes X_indep: "indep_vars (\<lambda>i. borel) X UNIV"
-    and X_integrable: "\<And>n. integrable M (X n)"
     and X_mean_0: "\<And>n. expectation (X n) = 0"
     and \<sigma>_pos: "\<sigma> > 0"
     and X_square_integrable: "\<And>n. integrable M (\<lambda>x. (X n x)\<^sup>2)"
@@ -27,8 +26,10 @@
   define \<phi> where "\<phi> n = char (distr M borel (?S' n))" for n
   define \<psi> where "\<psi> n t = char \<mu> (t / sqrt (\<sigma>\<^sup>2 * n))" for n t
 
-  have X_rv [simp, measurable]: "\<And>n. random_variable borel (X n)"
+  have X_rv [simp, measurable]: "random_variable borel (X n)" for n
     using X_indep unfolding indep_vars_def2 by simp
+  have X_integrable [simp, intro]: "integrable M (X n)" for n
+    by (rule square_integrable_imp_integrable[OF _ X_square_integrable]) simp_all
   interpret \<mu>: real_distribution \<mu>
     by (subst X_distrib [symmetric, of 0], rule real_distribution_distr, simp)
 
@@ -120,7 +121,6 @@
     and \<sigma> :: real
     and S :: "nat \<Rightarrow> 'a \<Rightarrow> real"
   assumes X_indep: "indep_vars (\<lambda>i. borel) X UNIV"
-    and X_integrable: "\<And>n. integrable M (X n)"
     and X_mean: "\<And>n. expectation (X n) = m"
     and \<sigma>_pos: "\<sigma> > 0"
     and X_square_integrable: "\<And>n. integrable M (\<lambda>x. (X n x)\<^sup>2)"
@@ -131,8 +131,12 @@
 proof (intro central_limit_theorem_zero_mean)
   show "indep_vars (\<lambda>i. borel) X' UNIV"
     unfolding X'_def[abs_def] using X_indep by (rule indep_vars_compose2) auto
-  show "integrable M (X' n)" "expectation (X' n) = 0" for n
-    using X_integrable X_mean by (auto simp: X'_def[abs_def] prob_space)
+  have X_rv [simp, measurable]: "random_variable borel (X n)" for n
+    using X_indep unfolding indep_vars_def2 by simp
+  have X_integrable [simp, intro]: "integrable M (X n)" for n
+    by (rule square_integrable_imp_integrable[OF _ X_square_integrable]) simp_all
+  show "expectation (X' n) = 0" for n
+    using X_mean by (auto simp: X'_def[abs_def] prob_space)
   show "\<sigma> > 0" "integrable M (\<lambda>x. (X' n x)\<^sup>2)" "variance (X' n) = \<sigma>\<^sup>2" for n
     using \<open>0 < \<sigma>\<close> X_integrable X_mean X_square_integrable X_variance unfolding X'_def
     by (auto simp: prob_space power2_diff)
--- a/src/HOL/Probability/Conditional_Expectation.thy	Tue Feb 16 17:12:02 2021 +0000
+++ b/src/HOL/Probability/Conditional_Expectation.thy	Fri Feb 19 13:42:12 2021 +0100
@@ -1291,7 +1291,7 @@
         finally have "\<bar>q y\<bar> > \<bar>q 0\<bar> - e" by auto
         then show ?thesis unfolding e_def by simp
       qed
-      have "emeasure M {x \<in> space M. \<bar>X x\<bar> < d} \<le> emeasure M ({x \<in> space M. 1 \<le> ennreal(1/e) * \<bar>q(X x)\<bar>} \<inter> space M)"
+      have "emeasure M {x \<in> space M. \<bar>X x\<bar> < d} \<le> emeasure M ({x \<in> space M. 1 \<le> ennreal(1/e) * \<bar>q(X x)\<bar>})"
         by (rule emeasure_mono, auto simp add: * \<open>e>0\<close> less_imp_le ennreal_mult''[symmetric])
       also have "... \<le> (1/e) * (\<integral>\<^sup>+x. ennreal(\<bar>q(X x)\<bar>) * indicator (space M) x \<partial>M)"
         by (rule nn_integral_Markov_inequality, auto)
@@ -1304,7 +1304,7 @@
 
       have "{x \<in> space M. \<bar>X x\<bar> \<ge> d} = {x \<in> space M. 1 \<le> ennreal(1/d) * \<bar>X x\<bar>} \<inter> space M"
         by (auto simp add: \<open>d>0\<close> ennreal_mult''[symmetric])
-      then have "emeasure M {x \<in> space M. \<bar>X x\<bar> \<ge> d} = emeasure M ({x \<in> space M. 1 \<le> ennreal(1/d) * \<bar>X x\<bar>} \<inter> space M)"
+      then have "emeasure M {x \<in> space M. \<bar>X x\<bar> \<ge> d} = emeasure M ({x \<in> space M. 1 \<le> ennreal(1/d) * \<bar>X x\<bar>})"
         by auto
       also have "... \<le> (1/d) * (\<integral>\<^sup>+x. ennreal(\<bar>X x\<bar>) * indicator (space M) x \<partial>M)"
         by (rule nn_integral_Markov_inequality, auto)
--- a/src/HOL/Probability/Giry_Monad.thy	Tue Feb 16 17:12:02 2021 +0000
+++ b/src/HOL/Probability/Giry_Monad.thy	Fri Feb 19 13:42:12 2021 +0100
@@ -2,15 +2,17 @@
     Author:     Johannes Hölzl, TU München
     Author:     Manuel Eberl, TU München
 
-Defines the subprobability spaces, the subprobability functor and the Giry monad on subprobability
+Defines subprobability spaces, the subprobability functor and the Giry monad on subprobability
 spaces.
 *)
 
+section \<open>The Giry monad\<close>
+
 theory Giry_Monad
   imports Probability_Measure "HOL-Library.Monad_Syntax"
 begin
 
-section \<open>Sub-probability spaces\<close>
+subsection \<open>Sub-probability spaces\<close>
 
 locale subprob_space = finite_measure +
   assumes emeasure_space_le_1: "emeasure M (space M) \<le> 1"
@@ -521,7 +523,7 @@
   qed
 qed
 
-section \<open>Properties of return\<close>
+subsection \<open>Properties of ``return''\<close>
 
 definition return :: "'a measure \<Rightarrow> 'a \<Rightarrow> 'a measure" where
   "return R x = measure_of (space R) (sets R) (\<lambda>A. indicator A x)"
@@ -757,7 +759,7 @@
   "sets M = sets (subprob_algebra N) \<Longrightarrow> space (select_sets M) = space N"
   by (intro sets_eq_imp_space_eq sets_select_sets)
 
-section \<open>Join\<close>
+subsection \<open>Join\<close>
 
 definition join :: "'a measure measure \<Rightarrow> 'a measure" where
   "join M = measure_of (space (select_sets M)) (sets (select_sets M)) (\<lambda>B. \<integral>\<^sup>+ M'. emeasure M' B \<partial>M)"
@@ -1784,4 +1786,28 @@
   by (subst bind_distr[OF _ measurable_compose[OF _ return_measurable]])
      (auto intro!: bind_return_distr')
 
+lemma (in prob_space) AE_eq_constD:
+  assumes "AE x in M. x = y"
+  shows   "M = return M y" "y \<in> space M"
+proof -
+  have "AE x in M. x \<in> space M"
+    by auto
+  with assms have "AE x in M. y \<in> space M"
+    by eventually_elim auto
+  thus "y \<in> space M"
+    by simp
+
+  show "M = return M y"
+  proof (rule measure_eqI)
+    fix X assume X: "X \<in> sets M"
+    have "AE x in M. (x \<in> X) = (x \<in> (if y \<in> X then space M else {}))"
+      using assms by eventually_elim (use X \<open>y \<in> space M\<close> in auto)
+    hence "emeasure M X = emeasure M (if y \<in> X then space M else {})"
+      using X by (intro emeasure_eq_AE) auto
+    also have "\<dots> = emeasure (return M y) X"
+      using X by (auto simp: emeasure_space_1)
+    finally show "emeasure M X = \<dots>" .
+  qed auto
+qed
+
 end
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/Probability/Hoeffding.thy	Fri Feb 19 13:42:12 2021 +0100
@@ -0,0 +1,923 @@
+(*
+  File:    Hoeffding.thy
+  Author:  Manuel Eberl, TU München
+*)
+section \<open>Hoeffding's Lemma and Hoeffding's Inequality\<close>
+theory Hoeffding
+  imports Product_PMF Independent_Family
+begin
+
+text \<open>
+  Hoeffding's inequality shows that a sum of bounded independent random variables is concentrated
+  around its mean, with an exponential decay of the tail probabilities.
+\<close>
+
+subsection \<open>Hoeffding's Lemma\<close>
+
+lemma convex_on_exp: 
+  fixes l :: real
+  assumes "l \<ge> 0"
+  shows   "convex_on UNIV (\<lambda>x. exp(l*x))"
+  using assms
+  by (intro convex_on_realI[where f' = "\<lambda>x. l * exp (l * x)"])
+     (auto intro!: derivative_eq_intros mult_left_mono)
+
+lemma mult_const_minus_self_real_le:
+  fixes x :: real
+  shows "x * (c - x) \<le> c\<^sup>2 / 4"
+proof -
+  have "x * (c - x) = -(x - c / 2)\<^sup>2 + c\<^sup>2 / 4"
+    by (simp add: field_simps power2_eq_square)
+  also have "\<dots> \<le> 0 + c\<^sup>2 / 4"
+    by (intro add_mono) auto
+  finally show ?thesis by simp
+qed
+
+lemma Hoeffdings_lemma_aux:
+  fixes h p :: real
+  assumes "h \<ge> 0" and "p \<ge> 0"
+  defines "L \<equiv> (\<lambda>h. -h * p + ln (1 + p * (exp h - 1)))"
+  shows   "L h \<le> h\<^sup>2 / 8"
+proof (cases "h = 0")
+  case False
+  hence h: "h > 0"
+    using \<open>h \<ge> 0\<close> by simp
+  define L' where "L' = (\<lambda>h. -p + p * exp h / (1 + p * (exp h - 1)))"
+  define L'' where "L'' = (\<lambda>h. -(p\<^sup>2) * exp h * exp h / (1 + p * (exp h - 1))\<^sup>2 +
+                              p * exp h / (1 + p * (exp h - 1)))"
+  define Ls where "Ls = (\<lambda>n. [L, L', L''] ! n)"
+
+  have [simp]: "L 0 = 0" "L' 0 = 0"
+    by (auto simp: L_def L'_def)
+
+  have L': "(L has_real_derivative L' x) (at x)" if "x \<in> {0..h}" for x
+  proof -
+    have "1 + p * (exp x - 1) > 0"
+      using \<open>p \<ge> 0\<close> that by (intro add_pos_nonneg mult_nonneg_nonneg) auto
+    thus ?thesis
+      unfolding L_def L'_def by (auto intro!: derivative_eq_intros)
+  qed
+
+  have L'': "(L' has_real_derivative L'' x) (at x)" if "x \<in> {0..h}" for x
+  proof -
+    have *: "1 + p * (exp x - 1) > 0"
+      using \<open>p \<ge> 0\<close> that by (intro add_pos_nonneg mult_nonneg_nonneg) auto
+    show ?thesis
+      unfolding L'_def L''_def
+      by (insert *, (rule derivative_eq_intros refl | simp)+) (auto simp: divide_simps; algebra)
+  qed
+
+  have diff: "\<forall>m t. m < 2 \<and> 0 \<le> t \<and> t \<le> h \<longrightarrow> (Ls m has_real_derivative Ls (Suc m) t) (at t)"
+    using L' L'' by (auto simp: Ls_def nth_Cons split: nat.splits)
+  from Taylor[of 2 Ls L 0 h 0 h, OF _ _ diff]
+    obtain t where t: "t \<in> {0<..<h}" "L h = L'' t * h\<^sup>2 / 2"
+      using \<open>h > 0\<close> by (auto simp: Ls_def lessThan_nat_numeral)
+  define u where "u = p * exp t / (1 + p * (exp t - 1))"
+
+  have "L'' t = u * (1 - u)"
+    by (simp add: L''_def u_def divide_simps; algebra)
+  also have "\<dots> \<le> 1 / 4"
+    using mult_const_minus_self_real_le[of u 1] by simp
+  finally have "L'' t \<le> 1 / 4" .
+
+  note t(2)
+  also have "L'' t * h\<^sup>2 / 2 \<le> (1 / 4) * h\<^sup>2 / 2"
+    using \<open>L'' t \<le> 1 / 4\<close> by (intro mult_right_mono divide_right_mono) auto
+  finally show "L h \<le> h\<^sup>2 / 8" by simp
+qed (auto simp: L_def)
+
+
+locale interval_bounded_random_variable = prob_space +
+  fixes f :: "'a \<Rightarrow> real" and a b :: real
+  assumes random_variable [measurable]: "random_variable borel f"
+  assumes AE_in_interval: "AE x in M. f x \<in> {a..b}"
+begin
+
+lemma integrable [intro]: "integrable M f"
+proof (rule integrable_const_bound)
+  show "AE x in M. norm (f x) \<le> max \<bar>a\<bar> \<bar>b\<bar>"
+    by (intro eventually_mono[OF AE_in_interval]) auto
+qed (fact random_variable)
+
+text \<open>
+  We first show Hoeffding's lemma for distributions whose expectation is 0. The general
+  case will easily follow from this later.
+\<close>
+lemma Hoeffdings_lemma_nn_integral_0:
+  assumes "l > 0" and E0: "expectation f = 0"
+  shows   "nn_integral M (\<lambda>x. exp (l * f x)) \<le> ennreal (exp (l\<^sup>2 * (b - a)\<^sup>2 / 8))"
+proof (cases "AE x in M. f x = 0")
+  case True
+  hence "nn_integral M (\<lambda>x. exp (l * f x)) = nn_integral M (\<lambda>x. ennreal 1)"
+    by (intro nn_integral_cong_AE) auto
+  also have "\<dots> = ennreal (expectation (\<lambda>_. 1))"
+    by (intro nn_integral_eq_integral) auto
+  finally show ?thesis by (simp add: prob_space)
+next
+  case False
+  have "a < 0"
+  proof (rule ccontr)
+    assume a: "\<not>(a < 0)"
+    have "AE x in M. f x = 0"
+    proof (subst integral_nonneg_eq_0_iff_AE [symmetric])
+      show "AE x in M. f x \<ge> 0"
+        using AE_in_interval by eventually_elim (use a in auto)
+    qed (use E0 in \<open>auto simp: id_def integrable\<close>)
+    with False show False by contradiction
+  qed
+
+  have "b > 0"
+  proof (rule ccontr)
+    assume b: "\<not>(b > 0)"
+    have "AE x in M. -f x = 0"
+    proof (subst integral_nonneg_eq_0_iff_AE [symmetric])
+      show "AE x in M. -f x \<ge> 0"
+        using AE_in_interval by eventually_elim (use b in auto)
+    qed (use E0 in \<open>auto simp: id_def integrable\<close>)
+    with False show False by simp
+  qed
+    
+  have "a < b"
+    using \<open>a < 0\<close> \<open>b > 0\<close> by linarith
+
+  define p where "p = -a / (b - a)"
+  define L where "L = (\<lambda>t. -t* p + ln (1 - p + p * exp t))"
+  define z where "z = l * (b - a)"
+  have "z > 0"
+    unfolding z_def using \<open>a < b\<close> \<open>l > 0\<close> by auto
+  have "p > 0"
+    using \<open>a < 0\<close> \<open>a < b\<close> unfolding p_def by (intro divide_pos_pos) auto
+
+  have "(\<integral>\<^sup>+x. exp (l * f x) \<partial>M) \<le>
+        (\<integral>\<^sup>+x. (b - f x) / (b - a) * exp (l * a) + (f x - a) / (b - a) * exp (l * b) \<partial>M)"
+  proof (intro nn_integral_mono_AE eventually_mono[OF AE_in_interval] ennreal_leI)
+    fix x assume x: "f x \<in> {a..b}"
+    define y where "y = (b - f x) / (b-a)"
+    have y: "y \<in> {0..1}"
+      using x \<open>a < b\<close> by (auto simp: y_def)
+    have conv: "convex_on UNIV (\<lambda>x. exp(l*x))"
+      using \<open>l > 0\<close> by (intro convex_on_exp) auto
+    have "exp (l * ((1 - y) *\<^sub>R b + y *\<^sub>R a)) \<le> (1 - y) * exp (l * b) + y * exp (l * a)"
+      using y \<open>l > 0\<close> by (intro convex_onD[OF convex_on_exp]) auto
+    also have "(1 - y) *\<^sub>R b + y *\<^sub>R a = f x"
+      using \<open>a < b\<close> by (simp add: y_def divide_simps) (simp add: algebra_simps)?
+    also have "1 - y = (f x - a) / (b - a)"
+      using \<open>a < b\<close> by (simp add: field_simps y_def)
+    finally show "exp (l * f x) \<le> (b - f x) / (b - a) * exp (l*a) + (f x - a)/(b-a) * exp (l*b)"
+      by (simp add: y_def)
+  qed
+  also have "\<dots> = (\<integral>\<^sup>+x. ennreal (b - f x) * exp (l * a) / (b - a) +
+                        ennreal (f x - a) * exp (l * b) / (b - a) \<partial>M)"
+    using \<open>a < 0\<close> \<open>b > 0\<close>
+    by (intro nn_integral_cong_AE eventually_mono[OF AE_in_interval])
+       (simp add: ennreal_plus ennreal_mult flip: divide_ennreal)
+  also have "\<dots> = ((\<integral>\<^sup>+ x. ennreal (b - f x) \<partial>M) * ennreal (exp (l * a)) +
+                   (\<integral>\<^sup>+ x. ennreal (f x - a) \<partial>M) * ennreal (exp (l * b))) / ennreal (b - a)"
+    by (simp add: nn_integral_add nn_integral_divide nn_integral_multc add_divide_distrib_ennreal)
+  also have "(\<integral>\<^sup>+ x. ennreal (b - f x) \<partial>M) = ennreal (expectation (\<lambda>x. b - f x))"
+    by (intro nn_integral_eq_integral Bochner_Integration.integrable_diff
+              eventually_mono[OF AE_in_interval] integrable_const integrable) auto
+  also have "expectation (\<lambda>x. b - f x) = b"
+    using assms by (subst Bochner_Integration.integral_diff) (auto simp: prob_space)
+  also have "(\<integral>\<^sup>+ x. ennreal (f x - a) \<partial>M) = ennreal (expectation (\<lambda>x. f x - a))"
+    by (intro nn_integral_eq_integral Bochner_Integration.integrable_diff
+              eventually_mono[OF AE_in_interval] integrable_const integrable) auto
+  also have "expectation (\<lambda>x. f x - a) = (-a)"
+    using assms by (subst Bochner_Integration.integral_diff) (auto simp: prob_space)
+  also have "(ennreal b * (exp (l * a)) + ennreal (-a) * (exp (l * b))) / (b - a) =
+             ennreal (b * exp (l * a) - a * exp (l * b)) / ennreal (b - a)"
+    using \<open>a < 0\<close> \<open>b > 0\<close>
+    by (simp flip: ennreal_mult ennreal_plus add: mult_nonpos_nonneg divide_ennreal mult_mono)
+  also have "b * exp (l * a) - a * exp (l * b) = exp (L z) * (b - a)"
+  proof -
+    have pos: "1 - p + p * exp z > 0"
+    proof -
+      have "exp z > 1" using \<open>l > 0\<close> and \<open>a < b\<close>
+        by (subst one_less_exp_iff) (auto simp: z_def intro!: mult_pos_pos)
+      hence "(exp z - 1) * p \<ge> 0"
+        unfolding p_def using \<open>a < 0\<close> and \<open>a < b\<close>
+        by (intro mult_nonneg_nonneg divide_nonneg_pos) auto
+      thus ?thesis
+        by (simp add: algebra_simps)
+    qed
+
+    have "exp (L z) * (b - a) = exp (-z * p) * (1 - p + p * exp z) * (b - a)"
+      using pos by (simp add: exp_add L_def exp_diff exp_minus divide_simps)
+    also have "\<dots> = b * exp (l * a) - a * exp (l * b)" using \<open>a < b\<close>
+      by (simp add: p_def z_def divide_simps) (simp add:  exp_diff algebra_simps)?
+    finally show ?thesis by simp
+  qed
+  also have "ennreal (exp (L z) * (b - a)) / ennreal (b - a) = ennreal (exp (L z))"
+    using \<open>a < b\<close> by (simp add: divide_ennreal)
+  also have "L z = -z * p + ln (1 + p * (exp z - 1))"
+    by (simp add: L_def algebra_simps)
+  also have "\<dots> \<le> z\<^sup>2 / 8"
+    unfolding L_def by (rule Hoeffdings_lemma_aux[where p = p]) (use \<open>z > 0\<close> \<open>p > 0\<close> in simp_all)
+  hence "ennreal (exp (-z * p + ln (1 + p * (exp z - 1)))) \<le> ennreal (exp (z\<^sup>2 / 8))"
+    by (intro ennreal_leI) auto
+  finally show ?thesis
+    by (simp add: z_def power_mult_distrib)
+qed
+
+context
+begin
+
+interpretation shift: interval_bounded_random_variable M "\<lambda>x. f x - \<mu>" "a - \<mu>" "b - \<mu>"
+  rewrites "b - \<mu> - (a - \<mu>) \<equiv> b - a"
+  by unfold_locales (auto intro!: eventually_mono[OF AE_in_interval])
+
+lemma expectation_shift: "expectation (\<lambda>x. f x - expectation f) = 0"
+  by (subst Bochner_Integration.integral_diff) (auto simp: integrable prob_space)
+
+lemmas Hoeffdings_lemma_nn_integral = shift.Hoeffdings_lemma_nn_integral_0[OF _ expectation_shift]
+
+end
+
+end
+
+
+
+subsection \<open>Hoeffding's Inequality\<close>
+
+text \<open>
+  Consider \<open>n\<close> independent real random variables $X_1, \ldots, X_n$ that each almost surely lie
+  in a compact interval $[a_i, b_i]$. Hoeffding's inequality states that the distribution of the
+  sum of the $X_i$ is tightly concentrated around the sum of the expected values: the probability
+  of it being above or below the sum of the expected values by more than some \<open>\<epsilon>\<close> decreases
+  exponentially with \<open>\<epsilon>\<close>.
+\<close>
+
+locale indep_interval_bounded_random_variables = prob_space +
+  fixes I :: "'b set" and X :: "'b \<Rightarrow> 'a \<Rightarrow> real"
+  fixes a b :: "'b \<Rightarrow> real"
+  assumes fin: "finite I"
+  assumes indep: "indep_vars (\<lambda>_. borel) X I"
+  assumes AE_in_interval: "\<And>i. i \<in> I \<Longrightarrow> AE x in M. X i x \<in> {a i..b i}"
+begin
+
+lemma random_variable [measurable]:
+  assumes i: "i \<in> I"
+  shows "random_variable borel (X i)"
+  using i indep unfolding indep_vars_def by blast
+
+lemma bounded_random_variable [intro]:
+  assumes i: "i \<in> I"
+  shows   "interval_bounded_random_variable M (X i) (a i) (b i)"
+  by unfold_locales (use AE_in_interval[OF i] i in auto)
+
+end
+
+
+locale Hoeffding_ineq = indep_interval_bounded_random_variables +
+  fixes \<mu> :: real
+  defines "\<mu> \<equiv> (\<Sum>i\<in>I. expectation (X i))"
+begin
+
+theorem%important Hoeffding_ineq_ge:
+  assumes "\<epsilon> \<ge> 0"
+  assumes "(\<Sum>i\<in>I. (b i - a i)\<^sup>2) > 0"
+  shows   "prob {x\<in>space M. (\<Sum>i\<in>I. X i x) \<ge> \<mu> + \<epsilon>} \<le> exp (-2 * \<epsilon>\<^sup>2 / (\<Sum>i\<in>I. (b i - a i)\<^sup>2))"
+proof (cases "\<epsilon> = 0")
+  case [simp]: True
+  have "prob {x\<in>space M. (\<Sum>i\<in>I. X i x) \<ge> \<mu> + \<epsilon>} \<le> 1"
+    by simp
+  thus ?thesis by simp
+next
+  case False
+  with \<open>\<epsilon> \<ge> 0\<close> have \<epsilon>: "\<epsilon> > 0"
+    by auto
+
+  define d where "d = (\<Sum>i\<in>I. (b i - a i)\<^sup>2)"
+  define l :: real where "l = 4 * \<epsilon> / d"
+  have d: "d > 0"
+    using assms by (simp add: d_def)
+  have l: "l > 0"
+    using \<epsilon> d by (simp add: l_def)
+  define \<mu>' where "\<mu>' = (\<lambda>i. expectation (X i))"
+
+  have "{x\<in>space M. (\<Sum>i\<in>I. X i x) \<ge> \<mu> + \<epsilon>} = {x\<in>space M. (\<Sum>i\<in>I. X i x) - \<mu> \<ge> \<epsilon>}"
+    by (simp add: algebra_simps)
+  hence "ennreal (prob {x\<in>space M. (\<Sum>i\<in>I. X i x) \<ge> \<mu> + \<epsilon>}) = emeasure M \<dots>"
+    by (simp add: emeasure_eq_measure)
+  also have "\<dots> \<le> ennreal (exp (-l*\<epsilon>)) * (\<integral>\<^sup>+x\<in>space M. exp (l * ((\<Sum>i\<in>I. X i x) - \<mu>)) \<partial>M)"
+    by (intro Chernoff_ineq_nn_integral_ge l) auto
+  also have "(\<lambda>x. (\<Sum>i\<in>I. X i x) - \<mu>) = (\<lambda>x. (\<Sum>i\<in>I. X i x - \<mu>' i))"
+    by (simp add: \<mu>_def sum_subtractf \<mu>'_def)
+  also have "(\<integral>\<^sup>+x\<in>space M. exp (l * ((\<Sum>i\<in>I. X i x - \<mu>' i))) \<partial>M) =
+             (\<integral>\<^sup>+x. (\<Prod>i\<in>I. ennreal (exp (l * (X i x - \<mu>' i)))) \<partial>M)"
+    by (intro nn_integral_cong)
+       (simp_all add: sum_distrib_left ring_distribs exp_diff exp_sum fin prod_ennreal)
+  also have "\<dots> = (\<Prod>i\<in>I. \<integral>\<^sup>+x. ennreal (exp (l * (X i x - \<mu>' i))) \<partial>M)"
+    by (intro indep_vars_nn_integral fin indep_vars_compose2[OF indep]) auto
+  also have "ennreal (exp (-l * \<epsilon>)) * \<dots> \<le>
+               ennreal (exp (-l * \<epsilon>)) * (\<Prod>i\<in>I. ennreal (exp (l\<^sup>2 * (b i - a i)\<^sup>2 / 8)))"
+  proof (intro mult_left_mono prod_mono_ennreal)
+    fix i assume i: "i \<in> I"
+    from i interpret interval_bounded_random_variable M "X i" "a i" "b i" ..
+    show "(\<integral>\<^sup>+x. ennreal (exp (l * (X i x - \<mu>' i))) \<partial>M) \<le> ennreal (exp (l\<^sup>2 * (b i - a i)\<^sup>2 / 8))"
+      unfolding \<mu>'_def by (rule Hoeffdings_lemma_nn_integral) fact+
+  qed auto
+  also have "\<dots> = ennreal (exp (-l*\<epsilon>) * (\<Prod>i\<in>I. exp (l\<^sup>2 * (b i - a i)\<^sup>2 / 8)))"
+    by (simp add: prod_ennreal prod_nonneg flip: ennreal_mult)
+  also have "exp (-l*\<epsilon>) * (\<Prod>i\<in>I. exp (l\<^sup>2 * (b i - a i)\<^sup>2 / 8)) = exp (d * l\<^sup>2 / 8 - l * \<epsilon>)"
+    by (simp add: exp_diff exp_minus sum_divide_distrib sum_distrib_left
+                  sum_distrib_right exp_sum fin divide_simps mult_ac d_def)
+  also have "d * l\<^sup>2 / 8 - l * \<epsilon> = -2 * \<epsilon>\<^sup>2 / d"
+    using d by (simp add: l_def field_simps power2_eq_square)
+  finally show ?thesis
+    by (subst (asm) ennreal_le_iff) (simp_all add: d_def)
+qed
+
+corollary Hoeffding_ineq_le:
+  assumes \<epsilon>: "\<epsilon> \<ge> 0"
+  assumes "(\<Sum>i\<in>I. (b i - a i)\<^sup>2) > 0"
+  shows   "prob {x\<in>space M. (\<Sum>i\<in>I. X i x) \<le> \<mu> - \<epsilon>} \<le> exp (-2 * \<epsilon>\<^sup>2 / (\<Sum>i\<in>I. (b i - a i)\<^sup>2))"
+proof -
+  interpret flip: Hoeffding_ineq M I "\<lambda>i x. -X i x" "\<lambda>i. -b i" "\<lambda>i. -a i" "-\<mu>"
+  proof unfold_locales
+    fix i assume "i \<in> I"
+    then interpret interval_bounded_random_variable M "X i" "a i" "b i" ..
+    show "AE x in M. - X i x \<in> {- b i..- a i}"
+      by (intro eventually_mono[OF AE_in_interval]) auto
+  qed (auto simp: fin \<mu>_def sum_negf intro: indep_vars_compose2[OF indep])
+
+  have "prob {x\<in>space M. (\<Sum>i\<in>I. X i x) \<le> \<mu> - \<epsilon>} = prob {x\<in>space M. (\<Sum>i\<in>I. -X i x) \<ge> -\<mu> + \<epsilon>}"
+    by (simp add: sum_negf algebra_simps)
+  also have "\<dots> \<le> exp (- 2 * \<epsilon>\<^sup>2 / (\<Sum>i\<in>I. (b i - a i)\<^sup>2))"
+    using flip.Hoeffding_ineq_ge[OF \<epsilon>] assms(2) by simp
+  finally show ?thesis .
+qed
+
+corollary Hoeffding_ineq_abs_ge:
+  assumes \<epsilon>: "\<epsilon> \<ge> 0"
+  assumes "(\<Sum>i\<in>I. (b i - a i)\<^sup>2) > 0"
+  shows   "prob {x\<in>space M. \<bar>(\<Sum>i\<in>I. X i x) - \<mu>\<bar> \<ge> \<epsilon>} \<le> 2 * exp (-2 * \<epsilon>\<^sup>2 / (\<Sum>i\<in>I. (b i - a i)\<^sup>2))"
+proof -
+  have "{x\<in>space M. \<bar>(\<Sum>i\<in>I. X i x) - \<mu>\<bar> \<ge> \<epsilon>} =
+        {x\<in>space M. (\<Sum>i\<in>I. X i x) \<ge> \<mu> + \<epsilon>} \<union> {x\<in>space M. (\<Sum>i\<in>I. X i x) \<le> \<mu> - \<epsilon>}"
+    by auto
+  also have "prob \<dots> \<le> prob {x\<in>space M. (\<Sum>i\<in>I. X i x) \<ge> \<mu> + \<epsilon>} +
+                       prob {x\<in>space M. (\<Sum>i\<in>I. X i x) \<le> \<mu> - \<epsilon>}"
+    by (intro measure_Un_le) auto
+  also have "\<dots> \<le> exp (-2 * \<epsilon>\<^sup>2 / (\<Sum>i\<in>I. (b i - a i)\<^sup>2)) + exp (-2 * \<epsilon>\<^sup>2 / (\<Sum>i\<in>I. (b i - a i)\<^sup>2))"
+    by (intro add_mono Hoeffding_ineq_ge Hoeffding_ineq_le assms)
+  finally show ?thesis by simp
+qed
+
+end
+
+
+subsection \<open>Hoeffding's inequality for i.i.d. bounded random variables\<close>
+
+text \<open>
+  If we have \<open>n\<close> even identically-distributed random variables, the statement of Hoeffding's
+  lemma simplifies a bit more: it shows that the probability that the average of the $X_i$
+  is more than \<open>\<epsilon>\<close> above the expected value is no greater than $e^{\frac{-2ny^2}{(b-a)^2}}$.
+
+  This essentially gives us a more concrete version of the weak law of large numbers: the law
+  states that the probability vanishes for \<open>n \<rightarrow> \<infinity>\<close> for any \<open>\<epsilon> > 0\<close>. Unlike Hoeffding's inequality,
+  it does not assume the variables to have bounded support, but it does not provide concrete bounds.
+\<close>
+
+locale iid_interval_bounded_random_variables = prob_space +
+  fixes I :: "'b set" and X :: "'b \<Rightarrow> 'a \<Rightarrow> real" and Y :: "'a \<Rightarrow> real"
+  fixes a b :: real
+  assumes fin: "finite I"
+  assumes indep: "indep_vars (\<lambda>_. borel) X I"
+  assumes distr_X: "i \<in> I \<Longrightarrow> distr M borel (X i) = distr M borel Y"
+  assumes rv_Y [measurable]: "random_variable borel Y"
+  assumes AE_in_interval: "AE x in M. Y x \<in> {a..b}"
+begin
+
+lemma random_variable [measurable]:
+  assumes i: "i \<in> I"
+  shows "random_variable borel (X i)"
+  using i indep unfolding indep_vars_def by blast
+
+sublocale X: indep_interval_bounded_random_variables M I X "\<lambda>_. a" "\<lambda>_. b"
+proof
+  fix i assume i: "i \<in> I"
+  have "AE x in M. Y x \<in> {a..b}"
+    by (fact AE_in_interval)
+  also have "?this \<longleftrightarrow> (AE x in distr M borel Y. x \<in> {a..b})"
+    by (subst AE_distr_iff) auto
+  also have "distr M borel Y = distr M borel (X i)"
+    using i by (simp add: distr_X)
+  also have "(AE x in \<dots>. x \<in> {a..b}) \<longleftrightarrow> (AE x in M. X i x \<in> {a..b})"
+    using i by (subst AE_distr_iff) auto
+  finally show "AE x in M. X i x \<in> {a..b}" .
+qed (simp_all add: fin indep)
+
+lemma expectation_X [simp]:
+  assumes i: "i \<in> I"
+  shows "expectation (X i) = expectation Y"
+proof -
+  have "expectation (X i) = lebesgue_integral (distr M borel (X i)) (\<lambda>x. x)"
+    using i by (intro integral_distr [symmetric]) auto
+  also have "distr M borel (X i) = distr M borel Y"
+    using i by (rule distr_X)
+  also have "lebesgue_integral \<dots> (\<lambda>x. x) = expectation Y"
+    by (rule integral_distr) auto
+  finally show "expectation (X i) = expectation Y" .
+qed
+
+end
+
+
+locale Hoeffding_ineq_iid = iid_interval_bounded_random_variables +
+  fixes \<mu> :: real
+  defines "\<mu> \<equiv> expectation Y"
+begin
+
+sublocale X: Hoeffding_ineq M I X "\<lambda>_. a" "\<lambda>_. b" "real (card I) * \<mu>"
+  by unfold_locales (simp_all add: \<mu>_def)
+
+corollary
+  assumes \<epsilon>: "\<epsilon> \<ge> 0"
+  assumes "a < b" "I \<noteq> {}"
+  defines "n \<equiv> card I"
+  shows   Hoeffding_ineq_ge:
+            "prob {x\<in>space M. (\<Sum>i\<in>I. X i x) \<ge> n * \<mu> + \<epsilon>} \<le>
+               exp (-2 * \<epsilon>\<^sup>2 / (n * (b - a)\<^sup>2))" (is ?le)
+    and   Hoeffding_ineq_le:
+            "prob {x\<in>space M. (\<Sum>i\<in>I. X i x) \<le> n * \<mu> - \<epsilon>} \<le>
+               exp (-2 * \<epsilon>\<^sup>2 / (n * (b - a)\<^sup>2))" (is ?ge)
+    and   Hoeffding_ineq_abs_ge:
+            "prob {x\<in>space M. \<bar>(\<Sum>i\<in>I. X i x) - n * \<mu>\<bar> \<ge> \<epsilon>} \<le>
+               2 * exp (-2 * \<epsilon>\<^sup>2 / (n * (b - a)\<^sup>2))" (is ?abs_ge)
+proof -
+  have pos: "(\<Sum>i\<in>I. (b - a)\<^sup>2) > 0"
+    using \<open>a < b\<close> \<open>I \<noteq> {}\<close> fin by (intro sum_pos) auto
+  show ?le
+    using X.Hoeffding_ineq_ge[OF \<epsilon> pos] by (simp add: n_def)
+  show ?ge
+    using X.Hoeffding_ineq_le[OF \<epsilon> pos] by (simp add: n_def)
+  show ?abs_ge
+    using X.Hoeffding_ineq_abs_ge[OF \<epsilon> pos] by (simp add: n_def)
+qed
+
+lemma 
+  assumes \<epsilon>: "\<epsilon> \<ge> 0"
+  assumes "a < b" "I \<noteq> {}"
+  defines "n \<equiv> card I"
+  shows   Hoeffding_ineq_ge':
+            "prob {x\<in>space M. (\<Sum>i\<in>I. X i x) / n \<ge> \<mu> + \<epsilon>} \<le>
+               exp (-2 * n * \<epsilon>\<^sup>2 / (b - a)\<^sup>2)" (is ?ge)
+    and   Hoeffding_ineq_le':
+            "prob {x\<in>space M. (\<Sum>i\<in>I. X i x) / n \<le> \<mu> - \<epsilon>} \<le>
+               exp (-2 * n * \<epsilon>\<^sup>2 / (b - a)\<^sup>2)" (is ?le)
+    and   Hoeffding_ineq_abs_ge':
+            "prob {x\<in>space M. \<bar>(\<Sum>i\<in>I. X i x) / n - \<mu>\<bar> \<ge> \<epsilon>} \<le>
+               2 * exp (-2 * n * \<epsilon>\<^sup>2 / (b - a)\<^sup>2)" (is ?abs_ge)
+proof -
+  have "n > 0"
+    using assms fin by (auto simp: field_simps)
+  have \<epsilon>': "\<epsilon> * n \<ge> 0"
+    using \<open>n > 0\<close> \<open>\<epsilon> \<ge> 0\<close> by auto
+  have eq: "- (2 * (\<epsilon> * real n)\<^sup>2 / (real (card I) * (b - a)\<^sup>2)) =
+            - (2 * real n * \<epsilon>\<^sup>2 / (b - a)\<^sup>2)"
+    using \<open>n > 0\<close> by (simp add: power2_eq_square divide_simps n_def)
+
+  have "{x\<in>space M. (\<Sum>i\<in>I. X i x) / n \<ge> \<mu> + \<epsilon>} =
+        {x\<in>space M. (\<Sum>i\<in>I. X i x) \<ge> \<mu> * n + \<epsilon> * n}"
+    using \<open>n > 0\<close> by (intro Collect_cong conj_cong refl) (auto simp: field_simps)
+  with Hoeffding_ineq_ge[OF \<epsilon>' \<open>a < b\<close> \<open>I \<noteq> {}\<close>] \<open>n > 0\<close> eq show ?ge
+    by (simp add: n_def mult_ac)
+
+  have "{x\<in>space M. (\<Sum>i\<in>I. X i x) / n \<le> \<mu> - \<epsilon>} =
+        {x\<in>space M. (\<Sum>i\<in>I. X i x) \<le> \<mu> * n - \<epsilon> * n}"
+    using \<open>n > 0\<close> by (intro Collect_cong conj_cong refl) (auto simp: field_simps)
+  with Hoeffding_ineq_le[OF \<epsilon>' \<open>a < b\<close> \<open>I \<noteq> {}\<close>] \<open>n > 0\<close> eq show ?le
+    by (simp add: n_def mult_ac)
+
+  have "{x\<in>space M. \<bar>(\<Sum>i\<in>I. X i x) / n - \<mu>\<bar> \<ge> \<epsilon>} =
+        {x\<in>space M. \<bar>(\<Sum>i\<in>I. X i x) - \<mu> * n\<bar> \<ge> \<epsilon> * n}"
+    using \<open>n > 0\<close> by (intro Collect_cong conj_cong refl) (auto simp: field_simps)
+  with Hoeffding_ineq_abs_ge[OF \<epsilon>' \<open>a < b\<close> \<open>I \<noteq> {}\<close>] \<open>n > 0\<close> eq show ?abs_ge
+    by (simp add: n_def mult_ac)
+qed
+
+end
+
+
+subsection \<open>Hoeffding's Inequality for the Binomial distribution\<close>
+
+text \<open>
+  We can now apply Hoeffding's inequality to the Binomial distribution, which can be seen
+  as the sum of \<open>n\<close> i.i.d. coin flips (the support of each of which is contained in $[0,1]$).
+\<close>
+
+locale binomial_distribution =
+  fixes n :: nat and p :: real
+  assumes p: "p \<in> {0..1}"
+begin
+
+context
+  fixes coins :: "(nat \<Rightarrow> bool) pmf" and \<mu>
+  assumes n: "n > 0"
+  defines "coins \<equiv> Pi_pmf {..<n} False (\<lambda>_. bernoulli_pmf p)"
+begin
+
+lemma coins_component:
+  assumes i: "i < n"
+  shows   "distr (measure_pmf coins) borel (\<lambda>f. if f i then 1 else 0) =
+             distr (measure_pmf (bernoulli_pmf p)) borel (\<lambda>b. if b then 1 else 0)"
+proof -
+  have "distr (measure_pmf coins) borel (\<lambda>f. if f i then 1 else 0) =
+        distr (measure_pmf (map_pmf (\<lambda>f. f i) coins)) borel (\<lambda>b. if b then 1 else 0)"
+    unfolding map_pmf_rep_eq by (subst distr_distr) (auto simp: o_def)
+  also have "map_pmf (\<lambda>f. f i) coins = bernoulli_pmf p"
+    unfolding coins_def using i by (subst Pi_pmf_component) auto
+  finally show ?thesis
+    unfolding map_pmf_rep_eq .
+qed
+
+lemma prob_binomial_pmf_conv_coins:
+  "measure_pmf.prob (binomial_pmf n p) {x. P (real x)} = 
+   measure_pmf.prob coins {x. P (\<Sum>i<n. if x i then 1 else 0)}"
+proof -
+  have eq1: "(\<Sum>i<n. if x i then 1 else 0) = real (card {i\<in>{..<n}. x i})" for x
+  proof -
+    have "(\<Sum>i<n. if x i then 1 else (0::real)) = (\<Sum>i\<in>{i\<in>{..<n}. x i}. 1)"
+      by (intro sum.mono_neutral_cong_right) auto
+    thus ?thesis by simp
+  qed
+  have eq2: "binomial_pmf n p = map_pmf (\<lambda>v. card {i\<in>{..<n}. v i}) coins"
+    unfolding coins_def by (rule binomial_pmf_altdef') (use p in auto)
+  show ?thesis
+    by (subst eq2) (simp_all add: eq1)
+qed
+
+interpretation Hoeffding_ineq_iid
+  coins "{..<n}" "\<lambda>i f. if f i then 1 else 0" "\<lambda>f. if f 0 then 1 else 0" 0 1 p
+proof unfold_locales
+  show "prob_space.indep_vars (measure_pmf coins) (\<lambda>_. borel) (\<lambda>i f. if f i then 1 else 0) {..<n}"
+    unfolding coins_def
+    by (intro prob_space.indep_vars_compose2[OF _ indep_vars_Pi_pmf])
+       (auto simp: measure_pmf.prob_space_axioms)
+next
+  have "measure_pmf.expectation coins (\<lambda>f. if f 0 then 1 else 0 :: real) =
+        measure_pmf.expectation (map_pmf (\<lambda>f. f 0) coins) (\<lambda>b. if b then 1 else 0 :: real)"
+    by (simp add: coins_def)
+  also have "map_pmf (\<lambda>f. f 0) coins = bernoulli_pmf p"
+    using n by (simp add: coins_def Pi_pmf_component)
+  also have "measure_pmf.expectation \<dots> (\<lambda>b. if b then 1 else 0) = p"
+    using p by simp
+  finally show "p \<equiv> measure_pmf.expectation coins (\<lambda>f. if f 0 then 1 else 0)" by simp
+qed (auto simp: coins_component)
+
+corollary
+  fixes \<epsilon> :: real
+  assumes \<epsilon>: "\<epsilon> \<ge> 0"
+  shows prob_ge: "measure_pmf.prob (binomial_pmf n p) {x. x \<ge> n * p + \<epsilon>} \<le> exp (-2 * \<epsilon>\<^sup>2 / n)"
+    and prob_le: "measure_pmf.prob (binomial_pmf n p) {x. x \<le> n * p - \<epsilon>} \<le> exp (-2 * \<epsilon>\<^sup>2 / n)"
+    and prob_abs_ge:
+          "measure_pmf.prob (binomial_pmf n p) {x. \<bar>x - n * p\<bar> \<ge> \<epsilon>} \<le> 2 * exp (-2 * \<epsilon>\<^sup>2 / n)"
+proof -
+  have [simp]: "{..<n} \<noteq> {}"
+    using n by auto
+  show "measure_pmf.prob (binomial_pmf n p) {x. x \<ge> n * p + \<epsilon>} \<le> exp (-2 * \<epsilon>\<^sup>2 / n)"
+    using Hoeffding_ineq_ge[of \<epsilon>] by (subst prob_binomial_pmf_conv_coins) (use assms in simp_all)
+  show "measure_pmf.prob (binomial_pmf n p) {x. x \<le> n * p - \<epsilon>} \<le> exp (-2 * \<epsilon>\<^sup>2 / n)"
+    using Hoeffding_ineq_le[of \<epsilon>] by (subst prob_binomial_pmf_conv_coins) (use assms in simp_all)
+  show "measure_pmf.prob (binomial_pmf n p) {x. \<bar>x - n * p\<bar> \<ge> \<epsilon>} \<le> 2 *  exp (-2 * \<epsilon>\<^sup>2 / n)"
+    using Hoeffding_ineq_abs_ge[of \<epsilon>]
+    by (subst prob_binomial_pmf_conv_coins) (use assms in simp_all)
+qed
+
+corollary
+  fixes \<epsilon> :: real
+  assumes \<epsilon>: "\<epsilon> \<ge> 0"
+  shows prob_ge': "measure_pmf.prob (binomial_pmf n p) {x. x / n \<ge> p + \<epsilon>} \<le> exp (-2 * n * \<epsilon>\<^sup>2)"
+    and prob_le': "measure_pmf.prob (binomial_pmf n p) {x. x / n \<le> p - \<epsilon>} \<le> exp (-2 * n * \<epsilon>\<^sup>2)"
+    and prob_abs_ge':
+          "measure_pmf.prob (binomial_pmf n p) {x. \<bar>x / n - p\<bar> \<ge> \<epsilon>} \<le> 2 * exp (-2 * n * \<epsilon>\<^sup>2)"
+proof -
+  have [simp]: "{..<n} \<noteq> {}"
+    using n by auto
+  show "measure_pmf.prob (binomial_pmf n p) {x. x / n \<ge> p + \<epsilon>} \<le> exp (-2 * n * \<epsilon>\<^sup>2)"
+    using Hoeffding_ineq_ge'[of \<epsilon>] by (subst prob_binomial_pmf_conv_coins) (use assms in simp_all)
+  show "measure_pmf.prob (binomial_pmf n p) {x. x / n \<le> p - \<epsilon>} \<le> exp (-2 * n * \<epsilon>\<^sup>2)"
+    using Hoeffding_ineq_le'[of \<epsilon>] by (subst prob_binomial_pmf_conv_coins) (use assms in simp_all)
+  show "measure_pmf.prob (binomial_pmf n p) {x. \<bar>x / n - p\<bar> \<ge> \<epsilon>} \<le> 2 * exp (-2 * n * \<epsilon>\<^sup>2)"
+    using Hoeffding_ineq_abs_ge'[of \<epsilon>]
+    by (subst prob_binomial_pmf_conv_coins) (use assms in simp_all)
+qed
+
+end
+
+end
+
+
+subsection \<open>Tail bounds for the negative binomial distribution\<close>
+
+text \<open>
+  Since the tail probabilities of a negative Binomial distribution are equal to the
+  tail probabilities of some Binomial distribution, we can obtain tail bounds for the
+  negative Binomial distribution through the Hoeffding tail bounds for the Binomial
+  distribution.
+\<close>
+
+context
+  fixes p q :: real
+  assumes p: "p \<in> {0<..<1}"
+  defines "q \<equiv> 1 - p"
+begin
+
+lemma prob_neg_binomial_pmf_ge_bound:
+  fixes n :: nat and k :: real
+  defines "\<mu> \<equiv> real n * q / p"
+  assumes k: "k \<ge> 0"
+  shows "measure_pmf.prob (neg_binomial_pmf n p) {x. real x \<ge> \<mu> + k}
+         \<le> exp (- 2 * p ^ 3 * k\<^sup>2 / (n + p * k))"
+proof -
+  consider "n = 0" | "p = 1" | "n > 0" "p \<noteq> 1"
+    by blast
+  thus ?thesis
+  proof cases
+    assume [simp]: "n = 0"
+    show ?thesis using k
+      by (simp add: indicator_def \<mu>_def)
+  next
+    assume [simp]: "p = 1"
+    show ?thesis using k
+      by (auto simp add: indicator_def \<mu>_def q_def)
+  next
+    assume n: "n > 0" and "p \<noteq> 1"
+    from \<open>p \<noteq> 1\<close> and p have p: "p \<in> {0<..<1}"
+      by auto
+    from p have q: "q \<in> {0<..<1}"
+      by (auto simp: q_def)
+
+    define k1 where "k1 = \<mu> + k"
+    have k1: "k1 \<ge> \<mu>"
+      using k by (simp add: k1_def)
+    have "k1 > 0"
+      by (rule less_le_trans[OF _ k1]) (use p n in \<open>auto simp: q_def \<mu>_def\<close>)
+  
+    define k1' where "k1' = nat (ceiling k1)"
+    have "\<mu> \<ge> 0" using p
+      by (auto simp: \<mu>_def q_def)
+    have "\<not>(x < k1') \<longleftrightarrow> real x \<ge> k1" for x
+      unfolding k1'_def by linarith
+    hence eq: "UNIV - {..<k1'} = {x. x \<ge> k1}"
+      by auto
+    hence "measure_pmf.prob (neg_binomial_pmf n p) {n. n \<ge> k1} =
+          1 - measure_pmf.prob (neg_binomial_pmf n p) {..<k1'}"
+      using measure_pmf.prob_compl[of "{..<k1'}" "neg_binomial_pmf n p"] by simp
+    also have "measure_pmf.prob (neg_binomial_pmf n p) {..<k1'} =
+               measure_pmf.prob (binomial_pmf (n + k1' - 1) q) {..<k1'}"
+      unfolding q_def using p by (intro prob_neg_binomial_pmf_lessThan) auto
+    also have "1 - \<dots> = measure_pmf.prob (binomial_pmf (n + k1' - 1) q) {n. n \<ge> k1}"
+      using measure_pmf.prob_compl[of "{..<k1'}" "binomial_pmf (n + k1' - 1) q"] eq by simp
+    also have "{x. real x \<ge> k1} = {x. x \<ge> real (n + k1' - 1) * q + (k1 - real (n + k1' - 1) * q)}"
+      by simp
+    also have "measure_pmf.prob (binomial_pmf (n + k1' - 1) q) \<dots> \<le>
+                 exp (-2 * (k1 - real (n + k1' - 1) * q)\<^sup>2 / real (n + k1' - 1))"
+    proof (rule binomial_distribution.prob_ge)
+      show "binomial_distribution q"
+        by unfold_locales (use q in auto)
+    next
+      show "n + k1' - 1 > 0"
+        using \<open>k1 > 0\<close> n unfolding k1'_def by linarith
+    next
+      have "real (n + nat \<lceil>k1\<rceil> - 1) \<le> real n + k1"
+        using \<open>k1 > 0\<close> by linarith
+      hence "real (n + k1' - 1) * q  \<le> (real n + k1) * q"
+        unfolding k1'_def by (intro mult_right_mono) (use p in \<open>simp_all add: q_def\<close>)
+      also have "\<dots> \<le> k1"
+        using k1 p by (simp add: q_def field_simps \<mu>_def)
+      finally show "0 \<le> k1 - real (n + k1' - 1) * q"
+        by simp
+    qed
+    also have "{x. real (n + k1' - 1) * q + (k1 - real (n + k1' - 1) * q) \<le> real x} = {x. real x \<ge> k1}"
+      by simp
+    also have "exp (-2 * (k1 - real (n + k1' - 1) * q)\<^sup>2 / real (n + k1' - 1)) \<le>
+               exp (-2 * (k1 - (n + k1) * q)\<^sup>2 / (n + k1))"
+    proof -
+      have "real (n + k1' - Suc 0) \<le> real n + k1"
+        unfolding k1'_def using \<open>k1 > 0\<close> by linarith
+      moreover have "(real n + k1) * q \<le> k1"
+        using k1 p by (auto simp: q_def field_simps \<mu>_def)
+      moreover have "1 < n + k1'"
+        using n \<open>k1 > 0\<close> unfolding k1'_def by linarith
+      ultimately have "2 * (k1 - real (n + k1' - 1) * q)\<^sup>2 / real (n + k1' - 1) \<ge>
+                       2 * (k1 - (n + k1) * q)\<^sup>2 / (n + k1)"
+        by (intro frac_le mult_left_mono power_mono mult_nonneg_nonneg mult_right_mono diff_mono)
+           (use q in simp_all)
+      thus ?thesis
+        by simp
+    qed
+    also have "\<dots> = exp (-2 * (p * k1 - q * n)\<^sup>2 / (k1 + n))"
+      by (simp add: q_def algebra_simps)
+    also have "-2 * (p * k1 - q * n)\<^sup>2 = -2 * p\<^sup>2 * (k1 - \<mu>)\<^sup>2"
+      using p by (auto simp: field_simps \<mu>_def)
+    also have "k1 - \<mu> = k"
+      by (simp add: k1_def \<mu>_def)
+    also note k1_def
+    also have "\<mu> + k + real n = real n / p + k"
+      using p by (simp add: \<mu>_def q_def field_simps)
+    also have "- 2 * p\<^sup>2 * k\<^sup>2 / (real n / p + k) = - 2 * p ^ 3 * k\<^sup>2 / (p * k + n)"
+      using p by (simp add: field_simps power3_eq_cube power2_eq_square)
+    finally show ?thesis by (simp add: add_ac)
+  qed
+qed
+
+lemma prob_neg_binomial_pmf_le_bound:
+  fixes n :: nat and k :: real
+  defines "\<mu> \<equiv> real n * q / p"
+  assumes k: "k \<ge> 0"
+  shows "measure_pmf.prob (neg_binomial_pmf n p) {x. real x \<le> \<mu> - k}
+         \<le> exp (-2 * p ^ 3 * k\<^sup>2 / (n - p * k))"
+proof -
+  consider "n = 0" | "p = 1" | "k > \<mu>" | "n > 0" "p \<noteq> 1" "k \<le> \<mu>"
+    by force
+  thus ?thesis
+  proof cases
+    assume [simp]: "n = 0"
+    show ?thesis using k
+      by (simp add: indicator_def \<mu>_def)
+  next
+    assume [simp]: "p = 1"
+    show ?thesis using k
+      by (auto simp add: indicator_def \<mu>_def q_def)
+  next
+    assume "k > \<mu>"
+    hence "{x. real x \<le> \<mu> - k} = {}"
+      by auto
+    thus ?thesis by simp
+  next
+    assume n: "n > 0" and "p \<noteq> 1" and "k \<le> \<mu>"
+    from \<open>p \<noteq> 1\<close> and p have p: "p \<in> {0<..<1}"
+      by auto
+    from p have q: "q \<in> {0<..<1}"
+      by (auto simp: q_def)
+
+    define f :: "real \<Rightarrow> real" where "f = (\<lambda>x. (p * x - q * n)\<^sup>2 / (x + n))"
+    have f_mono: "f x \<ge> f y" if "x \<ge> 0" "y \<le> n * q / p" "x \<le> y" for x y :: real
+      using that(3)
+    proof (rule DERIV_nonpos_imp_nonincreasing)
+      fix t assume t: "t \<ge> x" "t \<le> y"
+      have "x > -n"
+        using n \<open>x \<ge> 0\<close> by linarith
+      hence "(f has_field_derivative ((p * t - q * n) * (n * (1 + p) + p * t) / (n + t) ^ 2)) (at t)"
+        unfolding f_def using t
+        by (auto intro!: derivative_eq_intros simp: algebra_simps q_def power2_eq_square)
+      moreover {
+        have "p * t \<le> p * y"
+          using p by (intro mult_left_mono t) auto
+        also have "p * y \<le> q * n"
+          using \<open>y \<le> n * q / p\<close> p by (simp add: field_simps)
+        finally have "p * t \<le> q * n" .
+      }
+      hence "(p * t - q * n) * (n * (1 + p) + p * t) / (n + t) ^ 2 \<le> 0"
+        using p \<open>x \<ge> 0\<close> t
+        by (intro mult_nonpos_nonneg divide_nonpos_nonneg add_nonneg_nonneg mult_nonneg_nonneg) auto
+      ultimately show "\<exists>y. (f has_real_derivative y) (at t) \<and> y \<le> 0"
+        by blast
+    qed
+
+    define k1 where "k1 = \<mu> - k"
+    have k1: "k1 \<le> real n * q / p"
+      using assms by (simp add: \<mu>_def k1_def)
+    have "k1 \<ge> 0"
+      using k \<open>k \<le> \<mu>\<close> by (simp add: \<mu>_def k1_def)
+  
+    define k1' where "k1' = nat (floor k1)"
+    have "\<mu> \<ge> 0" using p
+      by (auto simp: \<mu>_def q_def)
+    have "(x \<le> k1') \<longleftrightarrow> real x \<le> k1" for x
+      unfolding k1'_def not_less using \<open>k1 \<ge> 0\<close> by linarith
+    hence eq: "{n. n \<le> k1}  = {..k1'}"
+      by auto
+    hence "measure_pmf.prob (neg_binomial_pmf n p) {n. n \<le> k1} =
+           measure_pmf.prob (neg_binomial_pmf n p) {..k1'}"
+      by simp
+    also have "measure_pmf.prob (neg_binomial_pmf n p) {..k1'} =
+               measure_pmf.prob (binomial_pmf (n + k1') q) {..k1'}"
+      unfolding q_def using p by (intro prob_neg_binomial_pmf_atMost) auto
+    also note eq [symmetric]
+    also have "{x. real x \<le> k1} = {x. x \<le> real (n + k1') * q - (real (n + k1') * q - real k1')}"
+      using eq by auto
+    also have "measure_pmf.prob (binomial_pmf (n + k1') q) \<dots> \<le>
+                 exp (-2 * (real (n + k1') * q - real k1')\<^sup>2 / real (n + k1'))"
+    proof (rule binomial_distribution.prob_le)
+      show "binomial_distribution q"
+        by unfold_locales (use q in auto)
+    next
+      show "n + k1' > 0"
+        using \<open>k1 \<ge> 0\<close> n unfolding k1'_def by linarith
+    next
+      have "p * k1' \<le> p * k1"
+        using p \<open>k1 \<ge> 0\<close> by (intro mult_left_mono) (auto simp: k1'_def)
+      also have "\<dots> \<le> q * n"
+        using k1 p by (simp add: field_simps)
+      finally show "0 \<le> real (n + k1') * q - real k1'"
+        by (simp add: algebra_simps q_def)
+    qed
+    also have "{x. real x \<le> real (n + k1') * q - (real (n + k1') * q - k1')} = {..k1'}"
+      by auto
+    also have "real (n + k1') * q - k1' = -(p * k1' - q * n)"
+      by (simp add: q_def algebra_simps)
+    also have "\<dots> ^ 2 = (p * k1' - q * n) ^ 2"
+      by algebra
+    also have "- 2 * (p * real k1' - q * real n)\<^sup>2 / real (n + k1') = -2 * f (real k1')"
+      by (simp add: f_def)
+    also have "f (real k1') \<ge> f k1"
+      by (rule f_mono) (use \<open>k1 \<ge> 0\<close> k1 in \<open>auto simp: k1'_def\<close>)
+    hence "exp (-2 * f (real k1')) \<le> exp (-2 * f k1)"
+      by simp
+    also have "\<dots> = exp (-2 * (p * k1 - q * n)\<^sup>2 / (k1 + n))"
+      by (simp add: f_def)
+
+    also have "-2 * (p * k1 - q * n)\<^sup>2 = -2 * p\<^sup>2 * (k1 - \<mu>)\<^sup>2"
+      using p by (auto simp: field_simps \<mu>_def)
+    also have "(k1 - \<mu>) ^ 2 = k ^ 2"
+      by (simp add: k1_def \<mu>_def)
+    also note k1_def
+    also have "\<mu> - k + real n = real n / p - k"
+      using p by (simp add: \<mu>_def q_def field_simps)
+    also have "- 2 * p\<^sup>2 * k\<^sup>2 / (real n / p - k) = - 2 * p ^ 3 * k\<^sup>2 / (n - p * k)"
+      using p by (simp add: field_simps power3_eq_cube power2_eq_square)
+    also have "{..k1'} = {x. real x \<le> \<mu> - k}"
+      using eq by (simp add: k1_def)
+    finally show ?thesis .
+  qed
+qed
+
+text \<open>
+  Due to the function $exp(-l/x)$ being concave for $x \geq \frac{l}{2}$, the above two
+  bounds can be combined into the following one for moderate values of \<open>k\<close>.
+  (cf. \<^url>\<open>https://math.stackexchange.com/questions/1565559\<close>)
+\<close>
+lemma prob_neg_binomial_pmf_abs_ge_bound:
+  fixes n :: nat and k :: real
+  defines "\<mu> \<equiv> real n * q / p"
+  assumes "k \<ge> 0" and n_ge: "n \<ge> p * k * (p\<^sup>2 * k + 1)"
+  shows "measure_pmf.prob (neg_binomial_pmf n p) {x. \<bar>real x - \<mu>\<bar> \<ge> k} \<le>
+           2 * exp (-2 * p ^ 3 * k ^ 2 / n)"
+proof (cases "k = 0")
+  case False
+  with \<open>k \<ge> 0\<close> have k: "k > 0"
+    by auto
+  define l :: real where "l = 2 * p ^ 3 * k ^ 2"
+  have l: "l > 0"
+    using p k by (auto simp: l_def)
+  define f :: "real \<Rightarrow> real" where "f = (\<lambda>x. exp (-l / x))"
+  define f' where "f' = (\<lambda>x. -l * exp (-l / x) / x ^ 2)"
+
+  have f'_mono: "f' x \<le> f' y" if "x \<ge> l / 2" "x \<le> y" for x y :: real
+    using that(2)
+  proof (rule DERIV_nonneg_imp_nondecreasing)
+    fix t assume t: "x \<le> t" "t \<le> y"
+    have "t > 0"
+      using that l t by auto
+    have "(f' has_field_derivative (l * (2 * t - l) / (exp (l / t) * t ^ 4))) (at t)"
+      unfolding f'_def using t that \<open>t > 0\<close>
+      by (auto intro!: derivative_eq_intros simp: field_simps exp_minus simp flip: power_Suc)
+    moreover have "l * (2 * t - l) / (exp (l / t) * t ^ 4) \<ge> 0"
+      using that t l by (intro divide_nonneg_pos mult_nonneg_nonneg) auto
+    ultimately show "\<exists>y. (f' has_real_derivative y) (at t) \<and> 0 \<le> y" by blast
+  qed
+
+  have convex: "convex_on {l/2..} (\<lambda>x. -f x)" unfolding f_def
+  proof (intro convex_on_realI[where f' = f'])
+    show "((\<lambda>x. - exp (- l / x)) has_real_derivative f' x) (at x)" if "x \<in> {l/2..}" for x
+      using that l
+      by (auto intro!: derivative_eq_intros simp: f'_def power2_eq_square algebra_simps)
+  qed (use l in \<open>auto intro!: f'_mono\<close>)
+
+  have eq: "{x. \<bar>real x - \<mu>\<bar> \<ge> k} = {x. real x \<le> \<mu> - k} \<union> {x. real x \<ge> \<mu> + k}"
+    by auto
+  have "measure_pmf.prob (neg_binomial_pmf n p) {x. \<bar>real x - \<mu>\<bar> \<ge> k} \<le>
+        measure_pmf.prob (neg_binomial_pmf n p) {x. real x \<le> \<mu> - k} +
+        measure_pmf.prob (neg_binomial_pmf n p) {x. real x \<ge> \<mu> + k}"
+    by (subst eq, rule measure_Un_le) auto
+  also have "\<dots> \<le> exp (-2 * p ^ 3 * k\<^sup>2 / (n - p * k)) + exp (-2 * p ^ 3 * k\<^sup>2 / (n + p * k))"
+    unfolding \<mu>_def
+    by (intro prob_neg_binomial_pmf_le_bound prob_neg_binomial_pmf_ge_bound add_mono \<open>k \<ge> 0\<close>)
+  also have "\<dots> = 2 * (1/2 * f (n - p * k) + 1/2 * f (n + p * k))"
+    by (simp add: f_def l_def)
+  also have "1/2 * f (n - p * k) + 1/2 * f (n + p * k) \<le> f (1/2 * (n - p * k) + 1/2 * (n + p * k))"
+  proof -
+    let ?x = "n - p * k" and ?y = "n + p * k"
+    have le1: "l / 2 \<le> ?x" using n_ge
+      by (simp add: l_def power2_eq_square power3_eq_cube algebra_simps)
+    also have "\<dots> \<le> ?y"
+      using p k by simp
+    finally have le2: "l / 2 \<le> ?y" .
+    have "-f ((1 - 1 / 2) *\<^sub>R ?x + (1 / 2) *\<^sub>R ?y) \<le> (1 - 1 / 2) * - f ?x + 1 / 2 * - f ?y"
+      using le1 le2 by (intro convex_onD[OF convex]) auto
+    thus ?thesis by simp
+  qed
+  also have "1/2 * (n - p * k) + 1/2 * (n + p * k) = n"
+    by (simp add: algebra_simps)
+  also have "2 * f n = 2 * exp (-l / n)"
+    by (simp add: f_def)
+  finally show ?thesis
+    by (simp add: l_def)
+qed auto
+
+end
+
+end
--- a/src/HOL/Probability/Independent_Family.thy	Tue Feb 16 17:12:02 2021 +0000
+++ b/src/HOL/Probability/Independent_Family.thy	Fri Feb 19 13:42:12 2021 +0100
@@ -1138,6 +1138,33 @@
   qed
 qed
 
+lemma (in prob_space) indep_vars_iff_distr_eq_PiM':
+  fixes I :: "'i set" and X :: "'i \<Rightarrow> 'a \<Rightarrow> 'b"
+  assumes "I \<noteq> {}"
+  assumes rv: "\<And>i. i \<in> I \<Longrightarrow> random_variable (M' i) (X i)"
+  shows "indep_vars M' X I \<longleftrightarrow>
+           distr M (\<Pi>\<^sub>M i\<in>I. M' i) (\<lambda>x. \<lambda>i\<in>I. X i x) = (\<Pi>\<^sub>M i\<in>I. distr M (M' i) (X i))"
+proof -
+  from assms obtain j where j: "j \<in> I"
+    by auto
+  define N' where "N' = (\<lambda>i. if i \<in> I then M' i else M' j)"
+  define Y where "Y = (\<lambda>i. if i \<in> I then X i else X j)"
+  have rv: "random_variable (N' i) (Y i)" for i
+    using j by (auto simp: N'_def Y_def intro: assms)
+
+  have "indep_vars M' X I = indep_vars N' Y I"
+    by (intro indep_vars_cong) (auto simp: N'_def Y_def)
+  also have "\<dots> \<longleftrightarrow> distr M (\<Pi>\<^sub>M i\<in>I. N' i) (\<lambda>x. \<lambda>i\<in>I. Y i x) = (\<Pi>\<^sub>M i\<in>I. distr M (N' i) (Y i))"
+    by (intro indep_vars_iff_distr_eq_PiM rv assms)
+  also have "(\<Pi>\<^sub>M i\<in>I. N' i) = (\<Pi>\<^sub>M i\<in>I. M' i)"
+    by (intro PiM_cong) (simp_all add: N'_def)
+  also have "(\<lambda>x. \<lambda>i\<in>I. Y i x) = (\<lambda>x. \<lambda>i\<in>I. X i x)"
+    by (simp_all add: Y_def fun_eq_iff)
+  also have "(\<Pi>\<^sub>M i\<in>I. distr M (N' i) (Y i)) = (\<Pi>\<^sub>M i\<in>I. distr M (M' i) (X i))"
+    by (intro PiM_cong distr_cong) (simp_all add: N'_def Y_def)
+  finally show ?thesis .
+qed
+
 lemma (in prob_space) indep_varD:
   assumes indep: "indep_var Ma A Mb B"
   assumes sets: "Xa \<in> sets Ma" "Xb \<in> sets Mb"
--- a/src/HOL/Probability/Infinite_Product_Measure.thy	Tue Feb 16 17:12:02 2021 +0000
+++ b/src/HOL/Probability/Infinite_Product_Measure.thy	Fri Feb 19 13:42:12 2021 +0100
@@ -392,4 +392,51 @@
 
 end
 
+lemma PiM_return:
+  assumes "finite I"
+  assumes [measurable]: "\<And>i. i \<in> I \<Longrightarrow> {a i} \<in> sets (M i)"
+  shows "PiM I (\<lambda>i. return (M i) (a i)) = return (PiM I M) (restrict a I)"
+proof -
+  have [simp]: "a i \<in> space (M i)" if "i \<in> I" for i
+    using assms(2)[OF that] by (meson insert_subset sets.sets_into_space)
+  interpret prob_space "PiM I (\<lambda>i. return (M i) (a i))"
+    by (intro prob_space_PiM prob_space_return) auto
+  have "AE x in PiM I (\<lambda>i. return (M i) (a i)). \<forall>i\<in>I. x i = restrict a I i"
+    by (intro eventually_ball_finite ballI AE_PiM_component prob_space_return assms)
+       (auto simp: AE_return)
+  moreover have "AE x in PiM I (\<lambda>i. return (M i) (a i)). x \<in> space (PiM I (\<lambda>i. return (M i) (a i)))"
+    by simp
+  ultimately have "AE x in PiM I (\<lambda>i. return (M i) (a i)). x = restrict a I"
+    by eventually_elim (auto simp: fun_eq_iff space_PiM)
+  hence "Pi\<^sub>M I (\<lambda>i. return (M i) (a i)) = return (Pi\<^sub>M I (\<lambda>i. return (M i) (a i))) (restrict a I)"
+    by (rule AE_eq_constD)
+  also have "\<dots> = return (PiM I M) (restrict a I)"
+    by (intro return_cong sets_PiM_cong) auto
+  finally show ?thesis .
+qed
+
+lemma distr_PiM_finite_prob_space':
+  assumes fin: "finite I"
+  assumes "\<And>i. i \<in> I \<Longrightarrow> prob_space (M i)"
+  assumes "\<And>i. i \<in> I \<Longrightarrow> prob_space (M' i)"
+  assumes [measurable]: "\<And>i. i \<in> I \<Longrightarrow> f \<in> measurable (M i) (M' i)"
+  shows   "distr (PiM I M) (PiM I M') (compose I f) = PiM I (\<lambda>i. distr (M i) (M' i) f)"
+proof -
+  define N where "N = (\<lambda>i. if i \<in> I then M i else return (count_space UNIV) undefined)"
+  define N' where "N' = (\<lambda>i. if i \<in> I then M' i else return (count_space UNIV) undefined)"
+  have [simp]: "PiM I N = PiM I M" "PiM I N' = PiM I M'"
+    by (intro PiM_cong; simp add: N_def N'_def)+
+
+  have "distr (PiM I N) (PiM I N') (compose I f) = PiM I (\<lambda>i. distr (N i) (N' i) f)"
+  proof (rule distr_PiM_finite_prob_space)
+    show "product_prob_space N"
+      by (rule product_prob_spaceI) (auto simp: N_def intro!: prob_space_return assms)
+    show "product_prob_space N'"
+      by (rule product_prob_spaceI) (auto simp: N'_def intro!: prob_space_return assms)
+  qed (auto simp: N_def N'_def fin)
+  also have "Pi\<^sub>M I (\<lambda>i. distr (N i) (N' i) f) = Pi\<^sub>M I (\<lambda>i. distr (M i) (M' i) f)"
+    by (intro PiM_cong) (simp_all add: N_def N'_def)
+  finally show ?thesis by simp
+qed
+
 end
--- a/src/HOL/Probability/Probability.thy	Tue Feb 16 17:12:02 2021 +0000
+++ b/src/HOL/Probability/Probability.thy	Fri Feb 19 13:42:12 2021 +0100
@@ -10,6 +10,8 @@
   Projective_Limit
   Random_Permutations
   SPMF
+  Product_PMF
+  Hoeffding
   Stream_Space
   Tree_Space
   Conditional_Expectation
--- a/src/HOL/Probability/Probability_Mass_Function.thy	Tue Feb 16 17:12:02 2021 +0000
+++ b/src/HOL/Probability/Probability_Mass_Function.thy	Fri Feb 19 13:42:12 2021 +0100
@@ -1,6 +1,7 @@
 (*  Title:      HOL/Probability/Probability_Mass_Function.thy
     Author:     Johannes Hölzl, TU München
     Author:     Andreas Lochbihler, ETH Zurich
+    Author:     Manuel Eberl, TU München
 *)
 
 section \<open> Probability mass function \<close>
@@ -529,6 +530,16 @@
   shows "integral\<^sup>L (map_pmf g p) f = integral\<^sup>L p (\<lambda>x. f (g x))"
   by (simp add: integral_distr map_pmf_rep_eq)
 
+lemma integrable_map_pmf_eq [simp]:
+  fixes g :: "'a \<Rightarrow> 'b::{banach, second_countable_topology}"
+  shows "integrable (map_pmf f p) g \<longleftrightarrow> integrable (measure_pmf p) (\<lambda>x. g (f x))"              
+  by (subst map_pmf_rep_eq, subst integrable_distr_eq) auto
+
+lemma integrable_map_pmf [intro]:
+  fixes g :: "'a \<Rightarrow> 'b::{banach, second_countable_topology}"
+  shows "integrable (measure_pmf p) (\<lambda>x. g (f x)) \<Longrightarrow> integrable (map_pmf f p) g"
+  by (subst integrable_map_pmf_eq)
+
 lemma pmf_abs_summable [intro]: "pmf p abs_summable_on A"
   by (rule abs_summable_on_subset[OF _ subset_UNIV])
      (auto simp:  abs_summable_on_def integrable_iff_bounded nn_integral_pmf)
@@ -669,6 +680,9 @@
   by (auto simp: pmf.rep_eq map_pmf_rep_eq measure_distr AE_measure_pmf_iff inj_onD
            intro!: measure_pmf.finite_measure_eq_AE)
 
+lemma pair_return_pmf [simp]: "pair_pmf (return_pmf x) (return_pmf y) = return_pmf (x, y)"
+  by (auto simp: pair_pmf_def bind_return_pmf)
+
 lemma pmf_map_inj': "inj f \<Longrightarrow> pmf (map_pmf f M) (f x) = pmf M x"
 apply(cases "x \<in> set_pmf M")
  apply(simp add: pmf_map_inj[OF subset_inj_on])
@@ -676,6 +690,28 @@
 apply(auto simp add: pmf_eq_0_set_pmf dest: injD)
 done
 
+lemma expectation_pair_pmf_fst [simp]:
+  fixes f :: "'a \<Rightarrow> 'b::{banach, second_countable_topology}"
+  shows "measure_pmf.expectation (pair_pmf p q) (\<lambda>x. f (fst x)) = measure_pmf.expectation p f"
+proof -
+  have "measure_pmf.expectation (pair_pmf p q) (\<lambda>x. f (fst x)) = 
+          measure_pmf.expectation (map_pmf fst (pair_pmf p q)) f" by simp
+  also have "map_pmf fst (pair_pmf p q) = p"
+    by (simp add: map_fst_pair_pmf)
+  finally show ?thesis .
+qed
+
+lemma expectation_pair_pmf_snd [simp]:
+  fixes f :: "'a \<Rightarrow> 'b::{banach, second_countable_topology}"
+  shows "measure_pmf.expectation (pair_pmf p q) (\<lambda>x. f (snd x)) = measure_pmf.expectation q f"
+proof -
+  have "measure_pmf.expectation (pair_pmf p q) (\<lambda>x. f (snd x)) = 
+          measure_pmf.expectation (map_pmf snd (pair_pmf p q)) f" by simp
+  also have "map_pmf snd (pair_pmf p q) = q"
+    by (simp add: map_snd_pair_pmf)
+  finally show ?thesis .
+qed
+
 lemma pmf_map_outside: "x \<notin> f ` set_pmf M \<Longrightarrow> pmf (map_pmf f M) x = 0"
   unfolding pmf_eq_0_set_pmf by simp
 
@@ -1711,9 +1747,108 @@
 
 end
 
+lemma geometric_pmf_1 [simp]: "geometric_pmf 1 = return_pmf 0"
+  by (intro pmf_eqI) (auto simp: indicator_def)
+
 lemma set_pmf_geometric: "0 < p \<Longrightarrow> p < 1 \<Longrightarrow> set_pmf (geometric_pmf p) = UNIV"
   by (auto simp: set_pmf_iff)
 
+lemma geometric_sums_times_n:
+  fixes c::"'a::{banach,real_normed_field}"
+  assumes "norm c < 1"
+  shows "(\<lambda>n. c^n * of_nat n) sums (c / (1 - c)\<^sup>2)"
+proof -
+  have "(\<lambda>n. c * z ^ n) sums (c / (1 - z))" if "norm z < 1" for z
+    using geometric_sums sums_mult that by fastforce
+  moreover have "((\<lambda>z. c / (1 - z)) has_field_derivative (c / (1 - c)\<^sup>2)) (at c)"
+      using assms by (auto intro!: derivative_eq_intros simp add: semiring_normalization_rules)
+  ultimately have "(\<lambda>n. diffs (\<lambda>n. c) n * c ^ n) sums (c / (1 - c)\<^sup>2)"
+    using assms by (intro termdiffs_sums_strong)
+  then have "(\<lambda>n. of_nat (Suc n) * c ^ (Suc n)) sums (c / (1 - c)\<^sup>2)"
+    unfolding diffs_def by (simp add: power_eq_if mult.assoc)
+  then show ?thesis
+    by (subst (asm) sums_Suc_iff) (auto simp add: mult.commute)
+qed
+
+lemma geometric_sums_times_norm:
+  fixes c::"'a::{banach,real_normed_field}"
+  assumes "norm c < 1"
+  shows "(\<lambda>n. norm (c^n * of_nat n)) sums (norm c / (1 - norm c)\<^sup>2)"
+proof -
+  have "norm (c^n * of_nat n) = (norm c) ^ n * of_nat n" for n::nat
+    by (simp add: norm_power norm_mult)
+  then show ?thesis
+    using geometric_sums_times_n[of "norm c"] assms
+    by force
+qed
+
+lemma integrable_real_geometric_pmf:
+  assumes "p \<in> {0<..1}"
+  shows   "integrable (geometric_pmf p) real"
+proof -
+  have "summable (\<lambda>x. p * ((1 - p) ^ x * real x))"
+    using geometric_sums_times_norm[of "1 - p"] assms
+    by (intro summable_mult) (auto simp: sums_iff)
+  hence "summable (\<lambda>x. (1 - p) ^ x * real x)"
+    by (rule summable_mult_D) (use assms in auto)
+  thus ?thesis
+    unfolding measure_pmf_eq_density using assms
+    by (subst integrable_density)
+       (auto simp: integrable_count_space_nat_iff mult_ac)
+qed
+
+lemma expectation_geometric_pmf:
+  assumes "p \<in> {0<..1}"
+  shows   "measure_pmf.expectation (geometric_pmf p) real = (1 - p) / p"
+proof -
+  have "(\<lambda>n. p * ((1 - p) ^ n * n)) sums (p * ((1 - p) / p^2))"
+    using assms geometric_sums_times_n[of "1-p"] by (intro sums_mult) auto
+  moreover have "(\<lambda>n. p * ((1 - p) ^ n * n)) = (\<lambda>n. (1 - p) ^ n * p  * real n)"
+    by auto
+  ultimately have *: "(\<lambda>n. (1 - p) ^ n * p  * real n) sums ((1 - p) / p)"
+    using assms sums_subst by (auto simp add: power2_eq_square)
+  have "measure_pmf.expectation (geometric_pmf p) real =
+        (\<integral>n. pmf (geometric_pmf p) n * real n \<partial>count_space UNIV)"
+    unfolding measure_pmf_eq_density by (subst integral_density) auto
+  also have "integrable (count_space UNIV) (\<lambda>n. pmf (geometric_pmf p) n * real n)"
+    using * assms unfolding integrable_count_space_nat_iff by (simp add: sums_iff)
+  hence "(\<integral>n. pmf (geometric_pmf p) n * real n \<partial>count_space UNIV) = (1 - p) / p"
+    using * assms by (subst integral_count_space_nat) (simp_all add: sums_iff)
+  finally show ?thesis by auto
+qed
+
+lemma geometric_bind_pmf_unfold:
+  assumes "p \<in> {0<..1}"
+  shows "geometric_pmf p =
+     do {b \<leftarrow> bernoulli_pmf p;
+         if b then return_pmf 0 else map_pmf Suc (geometric_pmf p)}"
+proof -
+  have *: "(Suc -` {i}) = (if i = 0 then {} else {i - 1})" for i
+    by force
+  have "pmf (geometric_pmf p) i =
+        pmf (bernoulli_pmf p \<bind>
+            (\<lambda>b. if b then return_pmf 0 else map_pmf Suc (geometric_pmf p)))
+            i" for i
+  proof -
+    have "pmf (geometric_pmf p) i =
+          (if i = 0 then p else (1 - p) * pmf (geometric_pmf p) (i - 1))"
+      using assms by (simp add: power_eq_if)
+    also have "\<dots> = (if i = 0  then p else (1 - p) * pmf (map_pmf Suc (geometric_pmf p)) i)"
+      by (simp add: pmf_map indicator_def measure_pmf_single *)
+    also have "\<dots> = measure_pmf.expectation (bernoulli_pmf p)
+          (\<lambda>x. pmf (if x then return_pmf 0 else map_pmf Suc (geometric_pmf p)) i)"
+      using assms by (auto simp add: pmf_map *)
+    also have "\<dots> = pmf (bernoulli_pmf p \<bind>
+                   (\<lambda>b. if b then return_pmf 0 else map_pmf Suc (geometric_pmf p)))
+                   i"
+      by (auto simp add: pmf_bind)
+    finally show ?thesis .
+  qed
+  then show ?thesis
+    using pmf_eqI by blast
+qed
+
+
 subsubsection \<open> Uniform Multiset Distribution \<close>
 
 context
@@ -1945,6 +2080,23 @@
 lemma set_pmf_binomial[simp]: "0 < p \<Longrightarrow> p < 1 \<Longrightarrow> set_pmf (binomial_pmf n p) = {..n}"
   by (simp add: set_pmf_binomial_eq)
 
+lemma finite_set_pmf_binomial_pmf [intro]: "p \<in> {0..1} \<Longrightarrow> finite (set_pmf (binomial_pmf n p))"
+  by (subst set_pmf_binomial_eq) auto
+
+lemma expectation_binomial_pmf':
+  fixes f :: "nat \<Rightarrow> 'a :: {banach, second_countable_topology}"
+  assumes p: "p \<in> {0..1}"
+  shows   "measure_pmf.expectation (binomial_pmf n p) f =
+             (\<Sum>k\<le>n. (real (n choose k) * p ^ k * (1 - p) ^ (n - k)) *\<^sub>R f k)"
+  using p by (subst integral_measure_pmf[where A = "{..n}"])
+             (auto simp: set_pmf_binomial_eq split: if_splits)
+
+lemma integrable_binomial_pmf [simp, intro]:
+  fixes f :: "nat \<Rightarrow> 'a :: {banach, second_countable_topology}"
+  assumes p: "p \<in> {0..1}"
+  shows "integrable (binomial_pmf n p) f"
+  by (rule integrable_measure_pmf_finite) (use assms in auto)
+
 context includes lifting_syntax
 begin
 
@@ -2010,6 +2162,222 @@
         bind_return_pmf' binomial_pmf_0 intro!: bind_pmf_cong)
 
 
+subsection \<open>Negative Binomial distribution\<close>
+
+text \<open>
+  The negative binomial distribution counts the number of times a weighted coin comes up
+  tails before having come up heads \<open>n\<close> times. In other words: how many failures do we see before
+  seeing the \<open>n\<close>-th success?
+
+  An alternative view is that the negative binomial distribution is the sum of \<open>n\<close> i.i.d.
+  geometric variables (this is the definition that we use).
+
+  Note that there are sometimes different conventions for this distributions in the literature;
+  for instance, sometimes the number of \<^emph>\<open>attempts\<close> is counted instead of the number of failures.
+  This only shifts the entire distribution by a constant number and is thus not a big difference.
+  I think that the convention we use is the most natural one since the support of the distribution
+  starts at 0, whereas for the other convention it starts at \<open>n\<close>.
+\<close>
+primrec neg_binomial_pmf :: "nat \<Rightarrow> real \<Rightarrow> nat pmf" where
+  "neg_binomial_pmf 0 p = return_pmf 0"
+| "neg_binomial_pmf (Suc n) p =
+     map_pmf (\<lambda>(x,y). (x + y)) (pair_pmf (geometric_pmf p) (neg_binomial_pmf n p))"
+
+lemma neg_binomial_pmf_Suc_0 [simp]: "neg_binomial_pmf (Suc 0) p = geometric_pmf p"
+  by (auto simp: pair_pmf_def bind_return_pmf map_pmf_def bind_assoc_pmf bind_return_pmf')
+
+lemmas neg_binomial_pmf_Suc [simp del] = neg_binomial_pmf.simps(2)
+
+lemma neg_binomial_prob_1 [simp]: "neg_binomial_pmf n 1 = return_pmf 0"
+  by (induction n) (simp_all add: neg_binomial_pmf_Suc)
+
+text \<open>
+  We can now show the aforementioned intuition about counting the failures before the
+  \<open>n\<close>-th success with the following recurrence:
+\<close>
+lemma neg_binomial_pmf_unfold:
+  assumes p: "p \<in> {0<..1}"
+  shows "neg_binomial_pmf (Suc n) p =
+           do {b \<leftarrow> bernoulli_pmf p;
+               if b then neg_binomial_pmf n p else map_pmf Suc (neg_binomial_pmf (Suc n) p)}"
+  (is "_ = ?rhs")
+  unfolding neg_binomial_pmf_Suc
+  by (subst geometric_bind_pmf_unfold[OF p])
+     (auto simp: map_pmf_def pair_pmf_def bind_assoc_pmf bind_return_pmf bind_return_pmf'
+           intro!: bind_pmf_cong)
+
+text \<open>
+  Next, we show an explicit formula for the probability mass function of the negative
+  binomial distribution:
+\<close>
+lemma pmf_neg_binomial:
+  assumes p: "p \<in> {0<..1}"
+  shows   "pmf (neg_binomial_pmf n p) k = real ((k + n - 1) choose k) * p ^ n * (1 - p) ^ k"
+proof (induction n arbitrary: k)
+  case 0
+  thus ?case using assms by (auto simp: indicator_def)
+next
+  case (Suc n)
+  show ?case
+  proof (cases "n = 0")
+    case True
+    thus ?thesis using assms by auto
+  next
+    case False
+    let ?f = "pmf (neg_binomial_pmf n p)"
+    have "pmf (neg_binomial_pmf (Suc n) p) k =
+            pmf (geometric_pmf p \<bind> (\<lambda>x. map_pmf ((+) x) (neg_binomial_pmf n p))) k"
+      by (auto simp: pair_pmf_def bind_return_pmf map_pmf_def bind_assoc_pmf neg_binomial_pmf_Suc)
+    also have "\<dots> = measure_pmf.expectation (geometric_pmf p) 
+                      (\<lambda>x. measure_pmf.prob (neg_binomial_pmf n p) ((+) x -` {k}))"
+      by (simp add: pmf_bind pmf_map)
+    also have "(\<lambda>x. (+) x -` {k}) = (\<lambda>x. if x \<le> k then {k - x} else {})"
+      by (auto simp: fun_eq_iff)
+    also have "(\<lambda>x. measure_pmf.prob (neg_binomial_pmf n p) (\<dots> x)) =
+               (\<lambda>x. if x \<le> k then ?f(k - x) else 0)"
+      by (auto simp: fun_eq_iff measure_pmf_single)
+    also have "measure_pmf.expectation (geometric_pmf p) \<dots> =
+                 (\<Sum>i\<le>k. pmf (neg_binomial_pmf n p) (k - i) * pmf (geometric_pmf p) i)"
+      by (subst integral_measure_pmf_real[where A = "{..k}"]) (auto split: if_splits)
+    also have "\<dots> = p^(n+1) * (1-p)^k * real (\<Sum>i\<le>k. (k - i + n - 1) choose (k - i))"
+      unfolding sum_distrib_left of_nat_sum
+    proof (intro sum.cong refl, goal_cases)
+      case (1 i)
+      have "pmf (neg_binomial_pmf n p) (k - i) * pmf (geometric_pmf p) i =
+              real ((k - i + n - 1) choose (k - i)) * p^(n+1) * ((1-p)^(k-i) * (1-p)^i)"
+        using assms Suc.IH by (simp add: mult_ac)
+      also have "(1-p)^(k-i) * (1-p)^i = (1-p)^k"
+        using 1 by (subst power_add [symmetric]) auto
+      finally show ?case by simp
+    qed
+    also have "(\<Sum>i\<le>k. (k - i + n - 1) choose (k - i)) = (\<Sum>i\<le>k. (n - 1 + i) choose i)"
+      by (intro sum.reindex_bij_witness[of _ "\<lambda>i. k - i" "\<lambda>i. k - i"]) 
+         (use \<open>n \<noteq> 0\<close> in \<open>auto simp: algebra_simps\<close>)
+    also have "\<dots> = (n + k) choose k"
+      by (subst sum_choose_lower) (use \<open>n \<noteq> 0\<close> in auto)
+    finally show ?thesis
+      by (simp add: add_ac)
+  qed
+qed
+
+(* TODO: Move? *)
+lemma gbinomial_0_left: "0 gchoose k = (if k = 0 then 1 else 0)"
+  by (cases k) auto
+
+text \<open>
+  The following alternative formula highlights why it is called `negative binomial distribution':
+\<close>
+lemma pmf_neg_binomial':
+  assumes p: "p \<in> {0<..1}"
+  shows   "pmf (neg_binomial_pmf n p) k = (-1) ^ k * ((-real n) gchoose k) * p ^ n * (1 - p) ^ k"
+proof (cases "n > 0")
+  case n: True
+  have "pmf (neg_binomial_pmf n p) k = real ((k + n - 1) choose k) * p ^ n * (1 - p) ^ k"
+    by (rule pmf_neg_binomial) fact+
+  also have "real ((k + n - 1) choose k) = ((real k + real n - 1) gchoose k)"
+    using n by (subst binomial_gbinomial) (auto simp: of_nat_diff)
+  also have "\<dots> = (-1) ^ k * ((-real n) gchoose k)"
+    by (subst gbinomial_negated_upper) auto
+  finally show ?thesis by simp
+qed (auto simp: indicator_def gbinomial_0_left)
+
+text \<open>
+  The cumulative distribution function of the negative binomial distribution can be
+  expressed in terms of that of the `normal' binomial distribution.
+\<close>
+lemma prob_neg_binomial_pmf_atMost:
+  assumes p: "p \<in> {0<..1}"
+  shows "measure_pmf.prob (neg_binomial_pmf n p) {..k} =
+         measure_pmf.prob (binomial_pmf (n + k) (1 - p)) {..k}"
+proof (cases "n = 0")
+  case [simp]: True
+  have "set_pmf (binomial_pmf (n + k) (1 - p)) \<subseteq> {..n+k}"
+    using p by (subst set_pmf_binomial_eq) auto
+  hence "measure_pmf.prob (binomial_pmf (n + k) (1 - p)) {..k} = 1"
+    by (subst measure_pmf.prob_eq_1) (auto intro!: AE_pmfI)
+  thus ?thesis by simp
+next
+  case False
+  hence n: "n > 0" by auto
+  have "measure_pmf.prob (binomial_pmf (n + k) (1 - p)) {..k} = (\<Sum>i\<le>k. pmf (binomial_pmf (n + k) (1 - p)) i)"
+    by (intro measure_measure_pmf_finite) auto
+  also have "\<dots> = (\<Sum>i\<le>k. real ((n + k) choose i) * p ^ (n + k - i) * (1 - p) ^ i)"
+    using p by (simp add: mult_ac)
+  also have "\<dots> = p ^ n * (\<Sum>i\<le>k. real ((n + k) choose i) * (1 - p) ^ i * p ^ (k - i))"
+    unfolding sum_distrib_left by (intro sum.cong) (auto simp: algebra_simps simp flip: power_add)
+  also have "(\<Sum>i\<le>k. real ((n + k) choose i) * (1 - p) ^ i * p ^ (k - i)) =
+             (\<Sum>i\<le>k. ((n + i - 1) choose i) * (1 - p) ^ i)"
+    using gbinomial_partial_sum_poly_xpos[of k "real n" "1 - p" p] n
+    by (simp add: binomial_gbinomial add_ac of_nat_diff)
+  also have "p ^ n * \<dots> = (\<Sum>i\<le>k. pmf (neg_binomial_pmf n p) i)"
+    using p unfolding sum_distrib_left by (simp add: pmf_neg_binomial algebra_simps)
+  also have "\<dots> = measure_pmf.prob (neg_binomial_pmf n p) {..k}"
+    by (intro measure_measure_pmf_finite [symmetric]) auto
+  finally show ?thesis ..
+qed
+
+lemma prob_neg_binomial_pmf_lessThan:
+  assumes p: "p \<in> {0<..1}"
+  shows "measure_pmf.prob (neg_binomial_pmf n p) {..<k} =
+         measure_pmf.prob (binomial_pmf (n + k - 1) (1 - p)) {..<k}"
+proof (cases "k = 0")
+  case False
+  hence "{..<k} = {..k-1}"
+    by auto
+  thus ?thesis
+    using prob_neg_binomial_pmf_atMost[OF p, of n "k - 1"] False by simp
+qed auto  
+
+text \<open>
+  The expected value of the negative binomial distribution is $n(1-p)/p$:
+\<close>
+lemma nn_integral_neg_binomial_pmf_real:
+  assumes p: "p \<in> {0<..1}"
+  shows "nn_integral (measure_pmf (neg_binomial_pmf n p)) of_nat = ennreal (n * (1 - p) / p)"
+proof (induction n)
+  case 0
+  thus ?case by auto
+next
+  case (Suc n)
+  have "nn_integral (measure_pmf (neg_binomial_pmf (Suc n) p)) of_nat =
+        nn_integral (measure_pmf (geometric_pmf p)) of_nat +
+        nn_integral (measure_pmf (neg_binomial_pmf n p)) of_nat"
+    by (simp add: neg_binomial_pmf_Suc case_prod_unfold nn_integral_add nn_integral_pair_pmf')
+  also have "nn_integral (measure_pmf (geometric_pmf p)) of_nat = ennreal ((1-p) / p)"
+    unfolding ennreal_of_nat_eq_real_of_nat
+    using expectation_geometric_pmf[OF p] integrable_real_geometric_pmf[OF p]
+    by (subst nn_integral_eq_integral) auto
+  also have "nn_integral (measure_pmf (neg_binomial_pmf n p)) of_nat = n * (1 - p) / p" using p
+    by (subst Suc.IH)
+       (auto simp: ennreal_of_nat_eq_real_of_nat ennreal_mult simp flip: divide_ennreal ennreal_minus)
+  also have "ennreal ((1 - p) / p) + ennreal (real n * (1 - p) / p) =
+             ennreal ((1-p) / p + real n * (1 - p) / p)"
+    by (intro ennreal_plus [symmetric] divide_nonneg_pos mult_nonneg_nonneg) (use p in auto)
+  also have "(1-p) / p + real n * (1 - p) / p = real (Suc n) * (1 - p) / p"
+    using p by (auto simp: field_simps)
+  finally show ?case
+    by (simp add: ennreal_of_nat_eq_real_of_nat)
+qed
+
+lemma integrable_neg_binomial_pmf_real:
+  assumes p: "p \<in> {0<..1}"
+  shows "integrable (measure_pmf (neg_binomial_pmf n p)) real"
+  using nn_integral_neg_binomial_pmf_real[OF p, of n]
+  by (subst integrable_iff_bounded) (auto simp flip: ennreal_of_nat_eq_real_of_nat)
+
+lemma expectation_neg_binomial_pmf:
+  assumes p: "p \<in> {0<..1}"
+  shows "measure_pmf.expectation (neg_binomial_pmf n p) real = n * (1 - p) / p"
+proof -
+  have "nn_integral (measure_pmf (neg_binomial_pmf n p)) of_nat = ennreal (n * (1 - p) / p)"
+    by (intro nn_integral_neg_binomial_pmf_real p)
+  also have "of_nat = (\<lambda>x. ennreal (real x))"
+    by (simp add: ennreal_of_nat_eq_real_of_nat fun_eq_iff)
+  finally show ?thesis
+    using p by (subst (asm) nn_integral_eq_integrable) auto
+qed
+
+
 subsection \<open>PMFs from association lists\<close>
 
 definition pmf_of_list ::" ('a \<times> real) list \<Rightarrow> 'a pmf" where
--- a/src/HOL/Probability/Probability_Measure.thy	Tue Feb 16 17:12:02 2021 +0000
+++ b/src/HOL/Probability/Probability_Measure.thy	Fri Feb 19 13:42:12 2021 +0100
@@ -465,6 +465,21 @@
   "expectation X = 0 \<Longrightarrow> variance X = expectation (\<lambda>x. (X x)^2)"
   by simp
 
+theorem%important (in prob_space) Chebyshev_inequality:
+  assumes [measurable]: "random_variable borel f"
+  assumes "integrable M (\<lambda>x. f x ^ 2)"
+  defines "\<mu> \<equiv> expectation f"
+  assumes "a > 0"
+  shows "prob {x\<in>space M. \<bar>f x - \<mu>\<bar> \<ge> a} \<le> variance f / a\<^sup>2"
+  unfolding \<mu>_def
+proof (rule second_moment_method)
+  have integrable: "integrable M f"
+    using assms by (blast dest: square_integrable_imp_integrable)
+  show "integrable M (\<lambda>x. (f x - expectation f)\<^sup>2)"
+    using assms integrable unfolding power2_eq_square ring_distribs
+    by (intro Bochner_Integration.integrable_diff) auto
+qed (use assms in auto)
+
 locale pair_prob_space = pair_sigma_finite M1 M2 + M1: prob_space M1 + M2: prob_space M2 for M1 M2
 
 sublocale pair_prob_space \<subseteq> P?: prob_space "M1 \<Otimes>\<^sub>M M2"
@@ -501,6 +516,18 @@
   finally show ?thesis by (simp add: measure_nonneg prod_nonneg)
 qed
 
+lemma product_prob_spaceI:
+  assumes "\<And>i. prob_space (M i)"
+  shows   "product_prob_space M"
+  unfolding product_prob_space_def product_prob_space_axioms_def product_sigma_finite_def
+proof safe
+  fix i
+  interpret prob_space "M i"
+    by (rule assms)
+  show "sigma_finite_measure (M i)" "prob_space (M i)"
+    by unfold_locales
+qed
+
 subsection \<open>Distributions\<close>
 
 definition distributed :: "'a measure \<Rightarrow> 'b measure \<Rightarrow> ('a \<Rightarrow> 'b) \<Rightarrow> ('b \<Rightarrow> ennreal) \<Rightarrow> bool"
@@ -1226,4 +1253,53 @@
 lemma (in prob_space) prob_space_completion: "prob_space (completion M)"
   by (rule prob_spaceI) (simp add: emeasure_space_1)
 
+lemma distr_PiM_finite_prob_space:
+  assumes fin: "finite I"
+  assumes "product_prob_space M"
+  assumes "product_prob_space M'"
+  assumes [measurable]: "\<And>i. i \<in> I \<Longrightarrow> f \<in> measurable (M i) (M' i)"
+  shows   "distr (PiM I M) (PiM I M') (compose I f) = PiM I (\<lambda>i. distr (M i) (M' i) f)"
+proof -
+  interpret M: product_prob_space M by fact
+  interpret M': product_prob_space M' by fact
+  define N where "N = (\<lambda>i. if i \<in> I then distr (M i) (M' i) f else M' i)"
+  have [intro]: "prob_space (N i)" for i
+    by (auto simp: N_def intro!: M.M.prob_space_distr M'.prob_space)
+
+  interpret N: product_prob_space N
+    by (intro product_prob_spaceI) (auto simp: N_def M'.prob_space intro: M.M.prob_space_distr)
+
+  have "distr (PiM I M) (PiM I M') (compose I f) = PiM I N"
+  proof (rule N.PiM_eqI)
+    have N_events_eq: "sets (Pi\<^sub>M I N) = sets (Pi\<^sub>M I M')"
+      unfolding N_def by (intro sets_PiM_cong) auto
+    also have "\<dots> = sets (distr (Pi\<^sub>M I M) (Pi\<^sub>M I M') (compose I f))"
+      by simp
+    finally show "sets (distr (Pi\<^sub>M I M) (Pi\<^sub>M I M') (compose I f)) = sets (Pi\<^sub>M I N)"  ..
+
+    fix A assume A: "\<And>i. i \<in> I \<Longrightarrow> A i \<in> N.M.events i"
+    have "emeasure (distr (Pi\<^sub>M I M) (Pi\<^sub>M I M') (compose I f)) (Pi\<^sub>E I A) =
+          emeasure (Pi\<^sub>M I M) (compose I f -` Pi\<^sub>E I A \<inter> space (Pi\<^sub>M I M))"
+    proof (intro emeasure_distr)
+      show "compose I f \<in> Pi\<^sub>M I M \<rightarrow>\<^sub>M Pi\<^sub>M I M'"
+        unfolding compose_def by measurable
+      show "Pi\<^sub>E I A \<in> sets (Pi\<^sub>M I M')"
+        unfolding N_events_eq [symmetric] by (intro sets_PiM_I_finite fin A)
+    qed
+    also have "compose I f -` Pi\<^sub>E I A \<inter> space (Pi\<^sub>M I M) = Pi\<^sub>E I (\<lambda>i. f -` A i \<inter> space (M i))"
+      using A by (auto simp: space_PiM PiE_def Pi_def extensional_def N_def compose_def)
+    also have "emeasure (Pi\<^sub>M I M) (Pi\<^sub>E I (\<lambda>i. f -` A i \<inter> space (M i))) =
+          (\<Prod>i\<in>I. emeasure (M i) (f -` A i \<inter> space (M i)))"
+      using A by (intro M.emeasure_PiM fin) (auto simp: N_def)
+    also have "\<dots> = (\<Prod>i\<in>I. emeasure (distr (M i) (M' i) f) (A i))"
+      using A by (intro prod.cong emeasure_distr [symmetric]) (auto simp: N_def)
+    also have "\<dots> = (\<Prod>i\<in>I. emeasure (N i) (A i))"
+      unfolding N_def by (intro prod.cong) (auto simp: N_def)
+    finally show "emeasure (distr (Pi\<^sub>M I M) (Pi\<^sub>M I M') (compose I f)) (Pi\<^sub>E I A) = \<dots>" .
+  qed fact+
+  also have "PiM I N = PiM I (\<lambda>i. distr (M i) (M' i) f)"
+    by (intro PiM_cong) (auto simp: N_def)
+  finally show ?thesis .
+qed
+
 end
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/Probability/Product_PMF.thy	Fri Feb 19 13:42:12 2021 +0100
@@ -0,0 +1,910 @@
+(*
+  File:    Product_PMF.thy
+  Authors: Manuel Eberl, Max W. Haslbeck
+*)
+section \<open>Indexed products of PMFs\<close>
+theory Product_PMF
+  imports Probability_Mass_Function Independent_Family
+begin
+
+subsection \<open>Preliminaries\<close>
+
+lemma pmf_expectation_eq_infsetsum: "measure_pmf.expectation p f = infsetsum (\<lambda>x. pmf p x * f x) UNIV"
+  unfolding infsetsum_def measure_pmf_eq_density by (subst integral_density) simp_all
+
+lemma measure_pmf_prob_product:
+  assumes "countable A" "countable B"
+  shows "measure_pmf.prob (pair_pmf M N) (A \<times> B) = measure_pmf.prob M A * measure_pmf.prob N B"
+proof -
+  have "measure_pmf.prob (pair_pmf M N) (A \<times> B) = (\<Sum>\<^sub>a(a, b)\<in>A \<times> B. pmf M a * pmf N b)"
+    by (auto intro!: infsetsum_cong simp add: measure_pmf_conv_infsetsum pmf_pair)
+  also have "\<dots> = measure_pmf.prob M A * measure_pmf.prob N B"
+    using assms by (subst infsetsum_product) (auto simp add: measure_pmf_conv_infsetsum)
+  finally show ?thesis
+    by simp
+qed
+
+
+subsection \<open>Definition\<close>
+
+text \<open>
+  In analogy to @{const PiM}, we define an indexed product of PMFs. In the literature, this
+  is typically called taking a vector of independent random variables. Note that the components
+  do not have to be identically distributed.
+
+  The operation takes an explicit index set \<^term>\<open>A :: 'a set\<close> and a function \<^term>\<open>f :: 'a \<Rightarrow> 'b pmf\<close>
+  that maps each element from \<^term>\<open>A\<close> to a PMF and defines the product measure
+  $\bigotimes_{i\in A} f(i)$ , which is represented as a \<^typ>\<open>('a \<Rightarrow> 'b) pmf\<close>.
+
+  Note that unlike @{const PiM}, this only works for \<^emph>\<open>finite\<close> index sets. It could
+  be extended to countable sets and beyond, but the construction becomes somewhat more involved.
+\<close>
+definition Pi_pmf :: "'a set \<Rightarrow> 'b \<Rightarrow> ('a \<Rightarrow> 'b pmf) \<Rightarrow> ('a \<Rightarrow> 'b) pmf" where
+  "Pi_pmf A dflt p =
+     embed_pmf (\<lambda>f. if (\<forall>x. x \<notin> A \<longrightarrow> f x = dflt) then \<Prod>x\<in>A. pmf (p x) (f x) else 0)"
+
+text \<open>
+  A technical subtlety that needs to be addressed is this: Intuitively, the functions in the
+  support of a product distribution have domain \<open>A\<close>. However, since HOL is a total logic, these
+  functions must still return \<^emph>\<open>some\<close> value for inputs outside \<open>A\<close>. The product measure
+  @{const PiM} simply lets these functions return @{const undefined} in these cases. We chose a
+  different solution here, which is to supply a default value \<^term>\<open>dflt :: 'b\<close> that is returned
+  in these cases.
+
+  As one possible application, one could model the result of \<open>n\<close> different independent coin
+  tosses as @{term "Pi_pmf {0..<n} False (\<lambda>_. bernoulli_pmf (1 / 2))"}. This returns a function
+  of type \<^typ>\<open>nat \<Rightarrow> bool\<close> that maps every natural number below \<open>n\<close> to the result of the
+  corresponding coin toss, and every other natural number to \<^term>\<open>False\<close>.
+\<close>
+
+lemma pmf_Pi:
+  assumes A: "finite A"
+  shows   "pmf (Pi_pmf A dflt p) f =
+             (if (\<forall>x. x \<notin> A \<longrightarrow> f x = dflt) then \<Prod>x\<in>A. pmf (p x) (f x) else 0)"
+  unfolding Pi_pmf_def
+proof (rule pmf_embed_pmf, goal_cases)
+  case 2
+  define S where "S = {f. \<forall>x. x \<notin> A \<longrightarrow> f x = dflt}"
+  define B where "B = (\<lambda>x. set_pmf (p x))"
+
+  have neutral_left: "(\<Prod>xa\<in>A. pmf (p xa) (f xa)) = 0"
+    if "f \<in> PiE A B - (\<lambda>f. restrict f A) ` S" for f
+  proof -
+    have "restrict (\<lambda>x. if x \<in> A then f x else dflt) A \<in> (\<lambda>f. restrict f A) ` S"
+      by (intro imageI) (auto simp: S_def)
+    also have "restrict (\<lambda>x. if x \<in> A then f x else dflt) A = f"
+      using that by (auto simp: PiE_def Pi_def extensional_def fun_eq_iff)
+    finally show ?thesis using that by blast
+  qed
+  have neutral_right: "(\<Prod>xa\<in>A. pmf (p xa) (f xa)) = 0"
+    if "f \<in> (\<lambda>f. restrict f A) ` S - PiE A B" for f
+  proof -
+    from that obtain f' where f': "f = restrict f' A" "f' \<in> S" by auto
+    moreover from this and that have "restrict f' A \<notin> PiE A B" by simp
+    then obtain x where "x \<in> A" "pmf (p x) (f' x) = 0" by (auto simp: B_def set_pmf_eq)
+    with f' and A show ?thesis by auto
+  qed
+
+  have "(\<lambda>f. \<Prod>x\<in>A. pmf (p x) (f x)) abs_summable_on PiE A B"
+    by (intro abs_summable_on_prod_PiE A) (auto simp: B_def)
+  also have "?this \<longleftrightarrow> (\<lambda>f. \<Prod>x\<in>A. pmf (p x) (f x)) abs_summable_on (\<lambda>f. restrict f A) ` S"
+    by (intro abs_summable_on_cong_neutral neutral_left neutral_right) auto
+  also have "\<dots> \<longleftrightarrow> (\<lambda>f. \<Prod>x\<in>A. pmf (p x) (restrict f A x)) abs_summable_on S"
+    by (rule abs_summable_on_reindex_iff [symmetric]) (force simp: inj_on_def fun_eq_iff S_def)
+  also have "\<dots> \<longleftrightarrow> (\<lambda>f. if \<forall>x. x \<notin> A \<longrightarrow> f x = dflt then \<Prod>x\<in>A. pmf (p x) (f x) else 0)
+                          abs_summable_on UNIV"
+    by (intro abs_summable_on_cong_neutral) (auto simp: S_def)
+  finally have summable: \<dots> .
+
+  have "1 = (\<Prod>x\<in>A. 1::real)" by simp
+  also have "(\<Prod>x\<in>A. 1) = (\<Prod>x\<in>A. \<Sum>\<^sub>ay\<in>B x. pmf (p x) y)"
+    unfolding B_def by (subst infsetsum_pmf_eq_1) auto
+  also have "(\<Prod>x\<in>A. \<Sum>\<^sub>ay\<in>B x. pmf (p x) y) = (\<Sum>\<^sub>af\<in>Pi\<^sub>E A B. \<Prod>x\<in>A. pmf (p x) (f x))"
+    by (intro infsetsum_prod_PiE [symmetric] A) (auto simp: B_def)
+  also have "\<dots> = (\<Sum>\<^sub>af\<in>(\<lambda>f. restrict f A) ` S. \<Prod>x\<in>A. pmf (p x) (f x))" using A
+    by (intro infsetsum_cong_neutral neutral_left neutral_right refl)
+  also have "\<dots> = (\<Sum>\<^sub>af\<in>S. \<Prod>x\<in>A. pmf (p x) (restrict f A x))"
+    by (rule infsetsum_reindex) (force simp: inj_on_def fun_eq_iff S_def)
+  also have "\<dots> = (\<Sum>\<^sub>af\<in>S. \<Prod>x\<in>A. pmf (p x) (f x))"
+    by (intro infsetsum_cong) (auto simp: S_def)
+  also have "\<dots> = (\<Sum>\<^sub>af. if \<forall>x. x \<notin> A \<longrightarrow> f x = dflt then \<Prod>x\<in>A. pmf (p x) (f x) else 0)"
+    by (intro infsetsum_cong_neutral) (auto simp: S_def)
+  also have "ennreal \<dots> = (\<integral>\<^sup>+f. ennreal (if \<forall>x. x \<notin> A \<longrightarrow> f x = dflt
+                             then \<Prod>x\<in>A. pmf (p x) (f x) else 0) \<partial>count_space UNIV)"
+    by (intro nn_integral_conv_infsetsum [symmetric] summable) (auto simp: prod_nonneg)
+  finally show ?case by simp
+qed (auto simp: prod_nonneg)
+
+lemma Pi_pmf_cong:
+  assumes "A = A'" "dflt = dflt'" "\<And>x. x \<in> A \<Longrightarrow> f x = f' x"
+  shows   "Pi_pmf A dflt f = Pi_pmf A' dflt' f'"
+proof -
+  have "(\<lambda>fa. if \<forall>x. x \<notin> A \<longrightarrow> fa x = dflt then \<Prod>x\<in>A. pmf (f x) (fa x) else 0) =
+        (\<lambda>f. if \<forall>x. x \<notin> A' \<longrightarrow> f x = dflt' then \<Prod>x\<in>A'. pmf (f' x) (f x) else 0)"
+    using assms by (intro ext) (auto intro!: prod.cong)
+  thus ?thesis
+    by (simp only: Pi_pmf_def)
+qed
+
+lemma pmf_Pi':
+  assumes "finite A" "\<And>x. x \<notin> A \<Longrightarrow> f x = dflt"
+  shows   "pmf (Pi_pmf A dflt p) f = (\<Prod>x\<in>A. pmf (p x) (f x))"
+  using assms by (subst pmf_Pi) auto
+
+lemma pmf_Pi_outside:
+  assumes "finite A" "\<exists>x. x \<notin> A \<and> f x \<noteq> dflt"
+  shows   "pmf (Pi_pmf A dflt p) f = 0"
+  using assms by (subst pmf_Pi) auto
+
+lemma pmf_Pi_empty [simp]: "Pi_pmf {} dflt p = return_pmf (\<lambda>_. dflt)"
+  by (intro pmf_eqI, subst pmf_Pi) (auto simp: indicator_def)
+
+lemma set_Pi_pmf_subset: "finite A \<Longrightarrow> set_pmf (Pi_pmf A dflt p) \<subseteq> {f. \<forall>x. x \<notin> A \<longrightarrow> f x = dflt}"
+  by (auto simp: set_pmf_eq pmf_Pi)
+
+
+subsection \<open>Dependent product sets with a default\<close>
+
+text \<open>
+  The following describes a dependent product of sets where the functions are required to return
+  the default value \<^term>\<open>dflt\<close> outside their domain, in analogy to @{const PiE}, which uses
+  @{const undefined}.
+\<close>
+definition PiE_dflt
+  where "PiE_dflt A dflt B = {f. \<forall>x. (x \<in> A \<longrightarrow> f x \<in> B x) \<and> (x \<notin> A \<longrightarrow> f x = dflt)}"
+
+lemma restrict_PiE_dflt: "(\<lambda>h. restrict h A) ` PiE_dflt A dflt B = PiE A B"
+proof (intro equalityI subsetI)
+  fix h assume "h \<in> (\<lambda>h. restrict h A) ` PiE_dflt A dflt B"
+  thus "h \<in> PiE A B"
+    by (auto simp: PiE_dflt_def)
+next
+  fix h assume h: "h \<in> PiE A B"
+  hence "restrict (\<lambda>x. if x \<in> A then h x else dflt) A \<in> (\<lambda>h. restrict h A) ` PiE_dflt A dflt B"
+    by (intro imageI) (auto simp: PiE_def extensional_def PiE_dflt_def)
+  also have "restrict (\<lambda>x. if x \<in> A then h x else dflt) A = h"
+    using h by (auto simp: fun_eq_iff)
+  finally show "h \<in> (\<lambda>h. restrict h A) ` PiE_dflt A dflt B" .
+qed
+
+lemma dflt_image_PiE: "(\<lambda>h x. if x \<in> A then h x else dflt) ` PiE A B = PiE_dflt A dflt B"
+  (is "?f ` ?X = ?Y")
+proof (intro equalityI subsetI)
+  fix h assume "h \<in> ?f ` ?X"
+  thus "h \<in> ?Y"
+    by (auto simp: PiE_dflt_def PiE_def)
+next
+  fix h assume h: "h \<in> ?Y"
+  hence "?f (restrict h A) \<in> ?f ` ?X"
+    by (intro imageI) (auto simp: PiE_def extensional_def PiE_dflt_def)
+  also have "?f (restrict h A) = h"
+    using h by (auto simp: fun_eq_iff PiE_dflt_def)
+  finally show "h \<in> ?f ` ?X" .
+qed
+
+lemma finite_PiE_dflt [intro]:
+  assumes "finite A" "\<And>x. x \<in> A \<Longrightarrow> finite (B x)"
+  shows   "finite (PiE_dflt A d B)"
+proof -
+  have "PiE_dflt A d B = (\<lambda>f x. if x \<in> A then f x else d) ` PiE A B"
+    by (rule dflt_image_PiE [symmetric])
+  also have "finite \<dots>"
+    by (intro finite_imageI finite_PiE assms)
+  finally show ?thesis .
+qed
+
+lemma card_PiE_dflt:
+  assumes "finite A" "\<And>x. x \<in> A \<Longrightarrow> finite (B x)"
+  shows   "card (PiE_dflt A d B) = (\<Prod>x\<in>A. card (B x))"
+proof -
+  from assms have "(\<Prod>x\<in>A. card (B x)) = card (PiE A B)"
+    by (intro card_PiE [symmetric]) auto
+  also have "PiE A B = (\<lambda>f. restrict f A) ` PiE_dflt A d B"
+    by (rule restrict_PiE_dflt [symmetric])
+  also have "card \<dots> = card (PiE_dflt A d B)"
+    by (intro card_image) (force simp: inj_on_def restrict_def fun_eq_iff PiE_dflt_def)
+  finally show ?thesis ..
+qed
+
+lemma PiE_dflt_empty_iff [simp]: "PiE_dflt A dflt B = {} \<longleftrightarrow> (\<exists>x\<in>A. B x = {})"
+  by (simp add: dflt_image_PiE [symmetric] PiE_eq_empty_iff)
+
+lemma set_Pi_pmf_subset':
+  assumes "finite A"
+  shows   "set_pmf (Pi_pmf A dflt p) \<subseteq> PiE_dflt A dflt (set_pmf \<circ> p)"
+  using assms by (auto simp: set_pmf_eq pmf_Pi PiE_dflt_def)
+
+lemma set_Pi_pmf:
+  assumes "finite A"
+  shows   "set_pmf (Pi_pmf A dflt p) = PiE_dflt A dflt (set_pmf \<circ> p)"
+proof (rule equalityI)
+  show "PiE_dflt A dflt (set_pmf \<circ> p) \<subseteq> set_pmf (Pi_pmf A dflt p)"
+  proof safe
+    fix f assume f: "f \<in> PiE_dflt A dflt (set_pmf \<circ> p)"
+    hence "pmf (Pi_pmf A dflt p) f = (\<Prod>x\<in>A. pmf (p x) (f x))"
+      using assms by (auto simp: pmf_Pi PiE_dflt_def)
+    also have "\<dots> > 0"
+      using f by (intro prod_pos) (auto simp: PiE_dflt_def set_pmf_eq)
+    finally show "f \<in> set_pmf (Pi_pmf A dflt p)"
+      by (auto simp: set_pmf_eq)
+  qed
+qed (use set_Pi_pmf_subset'[OF assms, of dflt p] in auto)
+
+text \<open>
+  The probability of an independent combination of events is precisely the product
+  of the probabilities of each individual event.
+\<close>
+lemma measure_Pi_pmf_PiE_dflt:
+  assumes [simp]: "finite A"
+  shows   "measure_pmf.prob (Pi_pmf A dflt p) (PiE_dflt A dflt B) =
+             (\<Prod>x\<in>A. measure_pmf.prob (p x) (B x))"
+proof -
+  define B' where "B' = (\<lambda>x. B x \<inter> set_pmf (p x))"
+  have "measure_pmf.prob (Pi_pmf A dflt p) (PiE_dflt A dflt B) =
+          (\<Sum>\<^sub>ah\<in>PiE_dflt A dflt B. pmf (Pi_pmf A dflt p) h)"
+    by (rule measure_pmf_conv_infsetsum)
+  also have "\<dots> = (\<Sum>\<^sub>ah\<in>PiE_dflt A dflt B. \<Prod>x\<in>A. pmf (p x) (h x))"
+    by (intro infsetsum_cong, subst pmf_Pi') (auto simp: PiE_dflt_def)
+  also have "\<dots> = (\<Sum>\<^sub>ah\<in>(\<lambda>h. restrict h A) ` PiE_dflt A dflt B. \<Prod>x\<in>A. pmf (p x) (h x))"
+    by (subst infsetsum_reindex) (force simp: inj_on_def PiE_dflt_def fun_eq_iff)+
+  also have "(\<lambda>h. restrict h A) ` PiE_dflt A dflt B = PiE A B"
+    by (rule restrict_PiE_dflt)
+  also have "(\<Sum>\<^sub>ah\<in>PiE A B. \<Prod>x\<in>A. pmf (p x) (h x)) = (\<Sum>\<^sub>ah\<in>PiE A B'. \<Prod>x\<in>A. pmf (p x) (h x))"
+    by (intro infsetsum_cong_neutral) (auto simp: B'_def set_pmf_eq)
+  also have "(\<Sum>\<^sub>ah\<in>PiE A B'. \<Prod>x\<in>A. pmf (p x) (h x)) = (\<Prod>x\<in>A. infsetsum (pmf (p x)) (B' x))"
+    by (intro infsetsum_prod_PiE) (auto simp: B'_def)
+  also have "\<dots> = (\<Prod>x\<in>A. infsetsum (pmf (p x)) (B x))"
+    by (intro prod.cong infsetsum_cong_neutral) (auto simp: B'_def set_pmf_eq)
+  also have "\<dots> = (\<Prod>x\<in>A. measure_pmf.prob (p x) (B x))"
+    by (subst measure_pmf_conv_infsetsum) (rule refl)
+  finally show ?thesis .
+qed
+
+lemma measure_Pi_pmf_Pi:
+  fixes t::nat
+  assumes [simp]: "finite A"
+  shows   "measure_pmf.prob (Pi_pmf A dflt p) (Pi A B) =
+             (\<Prod>x\<in>A. measure_pmf.prob (p x) (B x))" (is "?lhs = ?rhs")
+proof -
+  have "?lhs = measure_pmf.prob (Pi_pmf A dflt p) (PiE_dflt A dflt B)"
+    by (intro measure_prob_cong_0)
+       (auto simp: PiE_dflt_def PiE_def intro!: pmf_Pi_outside)+
+  also have "\<dots> = ?rhs"
+    using assms by (simp add: measure_Pi_pmf_PiE_dflt)
+  finally show ?thesis
+    by simp
+qed
+
+
+subsection \<open>Common PMF operations on products\<close>
+
+text \<open>
+  @{const Pi_pmf} distributes over the `bind' operation in the Giry monad:
+\<close>
+lemma Pi_pmf_bind:
+  assumes "finite A"
+  shows   "Pi_pmf A d (\<lambda>x. bind_pmf (p x) (q x)) =
+             do {f \<leftarrow> Pi_pmf A d' p; Pi_pmf A d (\<lambda>x. q x (f x))}" (is "?lhs = ?rhs")
+proof (rule pmf_eqI, goal_cases)
+  case (1 f)
+  show ?case
+  proof (cases "\<exists>x\<in>-A. f x \<noteq> d")
+    case False
+    define B where "B = (\<lambda>x. set_pmf (p x))"
+    have [simp]: "countable (B x)" for x by (auto simp: B_def)
+
+    {
+      fix x :: 'a
+      have "(\<lambda>a. pmf (p x) a * 1) abs_summable_on B x"
+        by (simp add: pmf_abs_summable)
+      moreover have "norm (pmf (p x) a * 1) \<ge> norm (pmf (p x) a * pmf (q x a) (f x))" for a
+        unfolding norm_mult by (intro mult_left_mono) (auto simp: pmf_le_1)
+      ultimately have "(\<lambda>a. pmf (p x) a * pmf (q x a) (f x)) abs_summable_on B x"
+        by (rule abs_summable_on_comparison_test)
+    } note summable = this
+
+    have "pmf ?rhs f = (\<Sum>\<^sub>ag. pmf (Pi_pmf A d' p) g * (\<Prod>x\<in>A. pmf (q x (g x)) (f x)))"
+      by (subst pmf_bind, subst pmf_Pi')
+         (insert assms False, simp_all add: pmf_expectation_eq_infsetsum)
+    also have "\<dots> = (\<Sum>\<^sub>ag\<in>PiE_dflt A d' B.
+                      pmf (Pi_pmf A d' p) g * (\<Prod>x\<in>A. pmf (q x (g x)) (f x)))" unfolding B_def
+      using assms by (intro infsetsum_cong_neutral) (auto simp: pmf_Pi PiE_dflt_def set_pmf_eq)
+    also have "\<dots> = (\<Sum>\<^sub>ag\<in>PiE_dflt A d' B.
+                      (\<Prod>x\<in>A. pmf (p x) (g x) * pmf (q x (g x)) (f x)))"
+      using assms by (intro infsetsum_cong) (auto simp: pmf_Pi PiE_dflt_def prod.distrib)
+    also have "\<dots> = (\<Sum>\<^sub>ag\<in>(\<lambda>g. restrict g A) ` PiE_dflt A d' B.
+                      (\<Prod>x\<in>A. pmf (p x) (g x) * pmf (q x (g x)) (f x)))"
+      by (subst infsetsum_reindex) (force simp: PiE_dflt_def inj_on_def fun_eq_iff)+
+    also have "(\<lambda>g. restrict g A) ` PiE_dflt A d' B = PiE A B"
+      by (rule restrict_PiE_dflt)
+    also have "(\<Sum>\<^sub>ag\<in>\<dots>. (\<Prod>x\<in>A. pmf (p x) (g x) * pmf (q x (g x)) (f x))) =
+                 (\<Prod>x\<in>A. \<Sum>\<^sub>aa\<in>B x. pmf (p x) a * pmf (q x a) (f x))"
+      using assms summable by (subst infsetsum_prod_PiE) simp_all
+    also have "\<dots> = (\<Prod>x\<in>A. \<Sum>\<^sub>aa. pmf (p x) a * pmf (q x a) (f x))"
+      by (intro prod.cong infsetsum_cong_neutral) (auto simp: B_def set_pmf_eq)
+    also have "\<dots> = pmf ?lhs f"
+      using False assms by (subst pmf_Pi') (simp_all add: pmf_bind pmf_expectation_eq_infsetsum)
+    finally show ?thesis ..
+  next
+    case True
+    have "pmf ?rhs f =
+            measure_pmf.expectation (Pi_pmf A d' p) (\<lambda>x. pmf (Pi_pmf A d (\<lambda>xa. q xa (x xa))) f)"
+      using assms by (simp add: pmf_bind)
+    also have "\<dots> = measure_pmf.expectation (Pi_pmf A d' p) (\<lambda>x. 0)"
+      using assms True by (intro Bochner_Integration.integral_cong pmf_Pi_outside) auto
+    also have "\<dots> = pmf ?lhs f"
+      using assms True by (subst pmf_Pi_outside) auto
+    finally show ?thesis ..
+  qed
+qed
+
+lemma Pi_pmf_return_pmf [simp]:
+  assumes "finite A"
+  shows   "Pi_pmf A dflt (\<lambda>x. return_pmf (f x)) = return_pmf (\<lambda>x. if x \<in> A then f x else dflt)"
+  using assms by (intro pmf_eqI) (auto simp: pmf_Pi simp: indicator_def)
+
+text \<open>
+  Analogously any componentwise mapping can be pulled outside the product:
+\<close>
+lemma Pi_pmf_map:
+  assumes [simp]: "finite A" and "f dflt = dflt'"
+  shows   "Pi_pmf A dflt' (\<lambda>x. map_pmf f (g x)) = map_pmf (\<lambda>h. f \<circ> h) (Pi_pmf A dflt g)"
+proof -
+  have "Pi_pmf A dflt' (\<lambda>x. map_pmf f (g x)) =
+          Pi_pmf A dflt' (\<lambda>x. g x \<bind> (\<lambda>x. return_pmf (f x)))"
+    using assms by (simp add: map_pmf_def Pi_pmf_bind)
+  also have "\<dots> = Pi_pmf A dflt g \<bind> (\<lambda>h. return_pmf (\<lambda>x. if x \<in> A then f (h x) else dflt'))"
+   by (subst Pi_pmf_bind[where d' = dflt]) (auto simp: )
+  also have "\<dots> = map_pmf (\<lambda>h. f \<circ> h) (Pi_pmf A dflt g)"
+    unfolding map_pmf_def using set_Pi_pmf_subset'[of A dflt g]
+    by (intro bind_pmf_cong refl arg_cong[of _ _ return_pmf])
+       (auto dest: simp: fun_eq_iff PiE_dflt_def assms(2))
+  finally show ?thesis .
+qed
+
+text \<open>
+  We can exchange the default value in a product of PMFs like this:
+\<close>
+lemma Pi_pmf_default_swap:
+  assumes "finite A"
+  shows   "map_pmf (\<lambda>f x. if x \<in> A then f x else dflt') (Pi_pmf A dflt p) =
+             Pi_pmf A dflt' p" (is "?lhs = ?rhs")
+proof (rule pmf_eqI, goal_cases)
+  case (1 f)
+  let ?B = "(\<lambda>f x. if x \<in> A then f x else dflt') -` {f} \<inter> PiE_dflt A dflt (\<lambda>_. UNIV)"
+  show ?case
+  proof (cases "\<exists>x\<in>-A. f x \<noteq> dflt'")
+    case False
+    let ?f' = "\<lambda>x. if x \<in> A then f x else dflt"
+    from False have "pmf ?lhs f = measure_pmf.prob (Pi_pmf A dflt p) ?B"
+      using assms unfolding pmf_map
+      by (intro measure_prob_cong_0) (auto simp: PiE_dflt_def pmf_Pi_outside)
+    also from False have "?B = {?f'}"
+      by (auto simp: fun_eq_iff PiE_dflt_def)
+    also have "measure_pmf.prob (Pi_pmf A dflt p) {?f'} = pmf (Pi_pmf A dflt p) ?f'"
+      by (simp add: measure_pmf_single)
+    also have "\<dots> = pmf ?rhs f"
+      using False assms by (subst (1 2) pmf_Pi) auto
+    finally show ?thesis .
+  next
+    case True
+    have "pmf ?lhs f = measure_pmf.prob (Pi_pmf A dflt p) ?B"
+      using assms unfolding pmf_map
+      by (intro measure_prob_cong_0) (auto simp: PiE_dflt_def pmf_Pi_outside)
+    also from True have "?B = {}" by auto
+    also have "measure_pmf.prob (Pi_pmf A dflt p) \<dots> = 0"
+      by simp
+    also have "0 = pmf ?rhs f"
+      using True assms by (intro pmf_Pi_outside [symmetric]) auto
+    finally show ?thesis .
+  qed
+qed
+
+text \<open>
+  The following rule allows reindexing the product:
+\<close>
+lemma Pi_pmf_bij_betw:
+  assumes "finite A" "bij_betw h A B" "\<And>x. x \<notin> A \<Longrightarrow> h x \<notin> B"
+  shows "Pi_pmf A dflt (\<lambda>_. f) = map_pmf (\<lambda>g. g \<circ> h) (Pi_pmf B dflt (\<lambda>_. f))"
+    (is "?lhs = ?rhs")
+proof -
+  have B: "finite B"
+    using assms bij_betw_finite by auto
+  have "pmf ?lhs g = pmf ?rhs g" for g
+  proof (cases "\<forall>a. a \<notin> A \<longrightarrow> g a = dflt")
+    case True
+    define h' where "h' = the_inv_into A h"
+    have h': "h' (h x) = x" if "x \<in> A" for x
+      unfolding h'_def using that assms by (auto simp add: bij_betw_def the_inv_into_f_f)
+    have h: "h (h' x) = x" if "x \<in> B" for x
+      unfolding h'_def using that assms f_the_inv_into_f_bij_betw by fastforce
+    have "pmf ?rhs g = measure_pmf.prob (Pi_pmf B dflt (\<lambda>_. f)) ((\<lambda>g. g \<circ> h) -` {g})"
+      unfolding pmf_map by simp
+    also have "\<dots> = measure_pmf.prob (Pi_pmf B dflt (\<lambda>_. f))
+                                (((\<lambda>g. g \<circ> h) -` {g}) \<inter> PiE_dflt B dflt (\<lambda>_. UNIV))"
+      using B by (intro measure_prob_cong_0) (auto simp: PiE_dflt_def pmf_Pi_outside)
+    also have "\<dots> = pmf (Pi_pmf B dflt (\<lambda>_. f)) (\<lambda>x. if x \<in> B then g (h' x) else dflt)"
+    proof -
+      have "(if h x \<in> B then g (h' (h x)) else dflt) = g x" for x
+        using h' assms True by (cases "x \<in> A") (auto simp add: bij_betwE)
+      then have "(\<lambda>g. g \<circ> h) -` {g} \<inter> PiE_dflt B dflt (\<lambda>_. UNIV) =
+            {(\<lambda>x. if x \<in> B then g (h' x) else dflt)}"
+        using assms h' h True unfolding PiE_dflt_def by auto
+      then show ?thesis
+        by (simp add: measure_pmf_single)
+    qed
+    also have "\<dots> = pmf (Pi_pmf A dflt (\<lambda>_. f)) g"
+      using B assms True  h'_def
+      by (auto simp add: pmf_Pi intro!: prod.reindex_bij_betw bij_betw_the_inv_into)
+    finally show ?thesis
+      by simp
+  next
+    case False
+    have "pmf ?rhs g = infsetsum (pmf (Pi_pmf B dflt (\<lambda>_. f))) ((\<lambda>g. g \<circ> h) -` {g})"
+      using assms by (auto simp add: measure_pmf_conv_infsetsum pmf_map)
+    also have "\<dots> = infsetsum (\<lambda>_. 0) ((\<lambda>g x. g (h x)) -` {g})"
+      using B False assms by (intro infsetsum_cong pmf_Pi_outside) fastforce+
+    also have "\<dots> = 0"
+      by simp
+    finally show ?thesis
+      using assms False by (auto simp add: pmf_Pi pmf_map)
+  qed
+  then show ?thesis
+    by (rule pmf_eqI)
+qed
+
+text \<open>
+  A product of uniform random choices is again a uniform distribution.
+\<close>
+lemma Pi_pmf_of_set:
+  assumes "finite A" "\<And>x. x \<in> A \<Longrightarrow> finite (B x)" "\<And>x. x \<in> A \<Longrightarrow> B x \<noteq> {}"
+  shows   "Pi_pmf A d (\<lambda>x. pmf_of_set (B x)) = pmf_of_set (PiE_dflt A d B)" (is "?lhs = ?rhs")
+proof (rule pmf_eqI, goal_cases)
+  case (1 f)
+  show ?case
+  proof (cases "\<exists>x. x \<notin> A \<and> f x \<noteq> d")
+    case True
+    hence "pmf ?lhs f = 0"
+      using assms by (intro pmf_Pi_outside) (auto simp: PiE_dflt_def)
+    also from True have "f \<notin> PiE_dflt A d B"
+      by (auto simp: PiE_dflt_def)
+    hence "0 = pmf ?rhs f"
+      using assms by (subst pmf_of_set) auto
+    finally show ?thesis .
+  next
+    case False
+    hence "pmf ?lhs f = (\<Prod>x\<in>A. pmf (pmf_of_set (B x)) (f x))"
+      using assms by (subst pmf_Pi') auto
+    also have "\<dots> = (\<Prod>x\<in>A. indicator (B x) (f x) / real (card (B x)))"
+      by (intro prod.cong refl, subst pmf_of_set) (use assms False in auto)
+    also have "\<dots> = (\<Prod>x\<in>A. indicator (B x) (f x)) / real (\<Prod>x\<in>A. card (B x))"
+      by (subst prod_dividef) simp_all
+    also have "(\<Prod>x\<in>A. indicator (B x) (f x) :: real) = indicator (PiE_dflt A d B) f"
+      using assms False by (auto simp: indicator_def PiE_dflt_def)
+    also have "(\<Prod>x\<in>A. card (B x)) = card (PiE_dflt A d B)"
+      using assms by (intro card_PiE_dflt [symmetric]) auto
+    also have "indicator (PiE_dflt A d B) f / \<dots> = pmf ?rhs f"
+      using assms by (intro pmf_of_set [symmetric]) auto
+    finally show ?thesis .
+  qed
+qed
+
+
+subsection \<open>Merging and splitting PMF products\<close>
+
+text \<open>
+  The following lemma shows that we can add a single PMF to a product:
+\<close>
+lemma Pi_pmf_insert:
+  assumes "finite A" "x \<notin> A"
+  shows   "Pi_pmf (insert x A) dflt p = map_pmf (\<lambda>(y,f). f(x:=y)) (pair_pmf (p x) (Pi_pmf A dflt p))"
+proof (intro pmf_eqI)
+  fix f
+  let ?M = "pair_pmf (p x) (Pi_pmf A dflt p)"
+  have "pmf (map_pmf (\<lambda>(y, f). f(x := y)) ?M) f =
+          measure_pmf.prob ?M ((\<lambda>(y, f). f(x := y)) -` {f})"
+    by (subst pmf_map) auto
+  also have "((\<lambda>(y, f). f(x := y)) -` {f}) = (\<Union>y'. {(f x, f(x := y'))})"
+    by (auto simp: fun_upd_def fun_eq_iff)
+  also have "measure_pmf.prob ?M \<dots> = measure_pmf.prob ?M {(f x, f(x := dflt))}"
+    using assms by (intro measure_prob_cong_0) (auto simp: pmf_pair pmf_Pi split: if_splits)
+  also have "\<dots> = pmf (p x) (f x) * pmf (Pi_pmf A dflt p) (f(x := dflt))"
+    by (simp add: measure_pmf_single pmf_pair pmf_Pi)
+  also have "\<dots> = pmf (Pi_pmf (insert x A) dflt p) f"
+  proof (cases "\<forall>y. y \<notin> insert x A \<longrightarrow> f y = dflt")
+    case True
+    with assms have "pmf (p x) (f x) * pmf (Pi_pmf A dflt p) (f(x := dflt)) =
+                       pmf (p x) (f x) * (\<Prod>xa\<in>A. pmf (p xa) ((f(x := dflt)) xa))"
+      by (subst pmf_Pi') auto
+    also have "(\<Prod>xa\<in>A. pmf (p xa) ((f(x := dflt)) xa)) = (\<Prod>xa\<in>A. pmf (p xa) (f xa))"
+      using assms by (intro prod.cong) auto
+    also have "pmf (p x) (f x) * \<dots> = pmf (Pi_pmf (insert x A) dflt p) f"
+      using assms True by (subst pmf_Pi') auto
+    finally show ?thesis .
+  qed (insert assms, auto simp: pmf_Pi)
+  finally show "\<dots> = pmf (map_pmf (\<lambda>(y, f). f(x := y)) ?M) f" ..
+qed
+
+lemma Pi_pmf_insert':
+  assumes "finite A"  "x \<notin> A"
+  shows   "Pi_pmf (insert x A) dflt p =
+             do {y \<leftarrow> p x; f \<leftarrow> Pi_pmf A dflt p; return_pmf (f(x := y))}"
+  using assms
+  by (subst Pi_pmf_insert)
+     (auto simp add: map_pmf_def pair_pmf_def case_prod_beta' bind_return_pmf bind_assoc_pmf)
+
+lemma Pi_pmf_singleton:
+  "Pi_pmf {x} dflt p = map_pmf (\<lambda>a b. if b = x then a else dflt) (p x)"
+proof -
+  have "Pi_pmf {x} dflt p = map_pmf (fun_upd (\<lambda>_. dflt) x) (p x)"
+    by (subst Pi_pmf_insert) (simp_all add: pair_return_pmf2 pmf.map_comp o_def)
+  also have "fun_upd (\<lambda>_. dflt) x = (\<lambda>z y. if y = x then z else dflt)"
+    by (simp add: fun_upd_def fun_eq_iff)
+  finally show ?thesis .
+qed
+
+text \<open>
+  Projecting a product of PMFs onto a component yields the expected result:
+\<close>
+lemma Pi_pmf_component:
+  assumes "finite A"
+  shows   "map_pmf (\<lambda>f. f x) (Pi_pmf A dflt p) = (if x \<in> A then p x else return_pmf dflt)"
+proof (cases "x \<in> A")
+  case True
+  define A' where "A' = A - {x}"
+  from assms and True have A': "A = insert x A'"
+    by (auto simp: A'_def)
+  from assms have "map_pmf (\<lambda>f. f x) (Pi_pmf A dflt p) = p x" unfolding A'
+    by (subst Pi_pmf_insert)
+       (auto simp: A'_def pmf.map_comp o_def case_prod_unfold map_fst_pair_pmf)
+  with True show ?thesis by simp
+next
+  case False
+  have "map_pmf (\<lambda>f. f x) (Pi_pmf A dflt p) = map_pmf (\<lambda>_. dflt) (Pi_pmf A dflt p)"
+    using assms False set_Pi_pmf_subset[of A dflt p]
+    by (intro pmf.map_cong refl) (auto simp: set_pmf_eq pmf_Pi_outside)
+  with False show ?thesis by simp
+qed
+
+text \<open>
+  We can take merge two PMF products on disjoint sets like this:
+\<close>
+lemma Pi_pmf_union:
+  assumes "finite A" "finite B" "A \<inter> B = {}"
+  shows   "Pi_pmf (A \<union> B) dflt p =
+             map_pmf (\<lambda>(f,g) x. if x \<in> A then f x else g x)
+             (pair_pmf (Pi_pmf A dflt p) (Pi_pmf B dflt p))" (is "_ = map_pmf (?h A) (?q A)")
+  using assms(1,3)
+proof (induction rule: finite_induct)
+  case (insert x A)
+  have "map_pmf (?h (insert x A)) (?q (insert x A)) =
+          do {v \<leftarrow> p x; (f, g) \<leftarrow> pair_pmf (Pi_pmf A dflt p) (Pi_pmf B dflt p);
+              return_pmf (\<lambda>y. if y \<in> insert x A then (f(x := v)) y else g y)}"
+    by (subst Pi_pmf_insert)
+       (insert insert.hyps insert.prems,
+        simp_all add: pair_pmf_def map_bind_pmf bind_map_pmf bind_assoc_pmf bind_return_pmf)
+  also have "\<dots> = do {v \<leftarrow> p x; (f, g) \<leftarrow> ?q A; return_pmf ((?h A (f,g))(x := v))}"
+    by (intro bind_pmf_cong refl) (auto simp: fun_eq_iff)
+  also have "\<dots> = do {v \<leftarrow> p x; f \<leftarrow> map_pmf (?h A) (?q A); return_pmf (f(x := v))}"
+    by (simp add: bind_map_pmf map_bind_pmf case_prod_unfold cong: if_cong)
+  also have "\<dots> = do {v \<leftarrow> p x; f \<leftarrow> Pi_pmf (A \<union> B) dflt p; return_pmf (f(x := v))}"
+    using insert.hyps and insert.prems by (intro bind_pmf_cong insert.IH [symmetric] refl) auto
+  also have "\<dots> = Pi_pmf (insert x (A \<union> B)) dflt p"
+    by (subst Pi_pmf_insert)
+       (insert assms insert.hyps insert.prems, auto simp: pair_pmf_def map_bind_pmf)
+  also have "insert x (A \<union> B) = insert x A \<union> B"
+    by simp
+  finally show ?case ..
+qed (simp_all add: case_prod_unfold map_snd_pair_pmf)
+
+text \<open>
+  We can also project a product to a subset of the indices by mapping all the other
+  indices to the default value:
+\<close>
+lemma Pi_pmf_subset:
+  assumes "finite A" "A' \<subseteq> A"
+  shows   "Pi_pmf A' dflt p = map_pmf (\<lambda>f x. if x \<in> A' then f x else dflt) (Pi_pmf A dflt p)"
+proof -
+  let ?P = "pair_pmf (Pi_pmf A' dflt p) (Pi_pmf (A - A') dflt p)"
+  from assms have [simp]: "finite A'"
+    by (blast dest: finite_subset)
+  from assms have "A = A' \<union> (A - A')"
+    by blast
+  also have "Pi_pmf \<dots> dflt p = map_pmf (\<lambda>(f,g) x. if x \<in> A' then f x else g x) ?P"
+    using assms by (intro Pi_pmf_union) auto
+  also have "map_pmf (\<lambda>f x. if x \<in> A' then f x else dflt) \<dots> = map_pmf fst ?P"
+    unfolding map_pmf_comp o_def case_prod_unfold
+    using set_Pi_pmf_subset[of A' dflt p] by (intro map_pmf_cong refl) (auto simp: fun_eq_iff)
+  also have "\<dots> = Pi_pmf A' dflt p"
+    by (simp add: map_fst_pair_pmf)
+  finally show ?thesis ..
+qed
+
+lemma Pi_pmf_subset':
+  fixes f :: "'a \<Rightarrow> 'b pmf"
+  assumes "finite A" "B \<subseteq> A" "\<And>x. x \<in> A - B \<Longrightarrow> f x = return_pmf dflt"
+  shows "Pi_pmf A dflt f = Pi_pmf B dflt f"
+proof -
+  have "Pi_pmf (B \<union> (A - B)) dflt f =
+          map_pmf (\<lambda>(f, g) x. if x \<in> B then f x else g x)
+                  (pair_pmf (Pi_pmf B dflt f) (Pi_pmf (A - B) dflt f))"
+    using assms by (intro Pi_pmf_union) (auto dest: finite_subset)
+  also have "Pi_pmf (A - B) dflt f = Pi_pmf (A - B) dflt (\<lambda>_. return_pmf dflt)"
+    using assms by (intro Pi_pmf_cong) auto
+  also have "\<dots> = return_pmf (\<lambda>_. dflt)"
+    using assms by simp
+  also have "map_pmf (\<lambda>(f, g) x. if x \<in> B then f x else g x)
+                  (pair_pmf (Pi_pmf B dflt f) (return_pmf (\<lambda>_. dflt))) =
+             map_pmf (\<lambda>f x. if x \<in> B then f x else dflt) (Pi_pmf B dflt f)"
+    by (simp add: map_pmf_def pair_pmf_def bind_assoc_pmf bind_return_pmf bind_return_pmf')
+  also have "\<dots> = Pi_pmf B dflt f"
+    using assms by (intro Pi_pmf_default_swap) (auto dest: finite_subset)
+  also have "B \<union> (A - B) = A"
+    using assms by auto
+  finally show ?thesis .
+qed
+
+lemma Pi_pmf_if_set:
+  assumes "finite A"
+  shows "Pi_pmf A dflt (\<lambda>x. if b x then f x else return_pmf dflt) =
+           Pi_pmf {x\<in>A. b x} dflt f"
+proof -
+  have "Pi_pmf A dflt (\<lambda>x. if b x then f x else return_pmf dflt) =
+          Pi_pmf {x\<in>A. b x} dflt (\<lambda>x. if b x then f x else return_pmf dflt)"
+    using assms by (intro Pi_pmf_subset') auto
+  also have "\<dots> = Pi_pmf {x\<in>A. b x} dflt f"
+    by (intro Pi_pmf_cong) auto
+  finally show ?thesis .
+qed
+
+lemma Pi_pmf_if_set':
+  assumes "finite A"
+  shows "Pi_pmf A dflt (\<lambda>x. if b x then return_pmf dflt else f x) =
+         Pi_pmf {x\<in>A. \<not>b x} dflt f"
+proof -
+  have "Pi_pmf A dflt (\<lambda>x. if b x then return_pmf dflt else  f x) =
+          Pi_pmf {x\<in>A. \<not>b x} dflt (\<lambda>x. if b x then return_pmf dflt else  f x)"
+    using assms by (intro Pi_pmf_subset') auto
+  also have "\<dots> = Pi_pmf {x\<in>A. \<not>b x} dflt f"
+    by (intro Pi_pmf_cong) auto
+  finally show ?thesis .
+qed
+
+text \<open>
+  Lastly, we can delete a single component from a product:
+\<close>
+lemma Pi_pmf_remove:
+  assumes "finite A"
+  shows   "Pi_pmf (A - {x}) dflt p = map_pmf (\<lambda>f. f(x := dflt)) (Pi_pmf A dflt p)"
+proof -
+  have "Pi_pmf (A - {x}) dflt p =
+          map_pmf (\<lambda>f xa. if xa \<in> A - {x} then f xa else dflt) (Pi_pmf A dflt p)"
+    using assms by (intro Pi_pmf_subset) auto
+  also have "\<dots> = map_pmf (\<lambda>f. f(x := dflt)) (Pi_pmf A dflt p)"
+    using set_Pi_pmf_subset[of A dflt p] assms
+    by (intro map_pmf_cong refl) (auto simp: fun_eq_iff)
+  finally show ?thesis .
+qed
+
+
+subsection \<open>Additional properties\<close>
+
+lemma nn_integral_prod_Pi_pmf:
+  assumes "finite A"
+  shows   "nn_integral (Pi_pmf A dflt p) (\<lambda>y. \<Prod>x\<in>A. f x (y x)) = (\<Prod>x\<in>A. nn_integral (p x) (f x))"
+  using assms
+proof (induction rule: finite_induct)
+  case (insert x A)
+  have "nn_integral (Pi_pmf (insert x A) dflt p) (\<lambda>y. \<Prod>z\<in>insert x A. f z (y z)) =
+          (\<integral>\<^sup>+a. \<integral>\<^sup>+b. f x a * (\<Prod>z\<in>A. f z (if z = x then a else b z)) \<partial>Pi_pmf A dflt p \<partial>p x)"
+    using insert by (auto simp: Pi_pmf_insert case_prod_unfold nn_integral_pair_pmf' cong: if_cong)
+  also have "(\<lambda>a b. \<Prod>z\<in>A. f z (if z = x then a else b z)) = (\<lambda>a b. \<Prod>z\<in>A. f z (b z))"
+    by (intro ext prod.cong) (use insert.hyps in auto)
+  also have "(\<integral>\<^sup>+a. \<integral>\<^sup>+b. f x a * (\<Prod>z\<in>A. f z (b z)) \<partial>Pi_pmf A dflt p \<partial>p x) =
+             (\<integral>\<^sup>+y. f x y \<partial>(p x)) * (\<integral>\<^sup>+y. (\<Prod>z\<in>A. f z (y z)) \<partial>(Pi_pmf A dflt p))"
+    by (simp add: nn_integral_multc nn_integral_cmult)
+  also have "(\<integral>\<^sup>+y. (\<Prod>z\<in>A. f z (y z)) \<partial>(Pi_pmf A dflt p)) = (\<Prod>x\<in>A. nn_integral (p x) (f x))"
+    by (rule insert.IH)
+  also have "(\<integral>\<^sup>+y. f x y \<partial>(p x)) * \<dots> = (\<Prod>x\<in>insert x A. nn_integral (p x) (f x))"
+    using insert.hyps by simp
+  finally show ?case .
+qed auto
+
+lemma integrable_prod_Pi_pmf:
+  fixes f :: "'a \<Rightarrow> 'b \<Rightarrow> 'c :: {real_normed_field, second_countable_topology, banach}"
+  assumes "finite A" and "\<And>x. x \<in> A \<Longrightarrow> integrable (measure_pmf (p x)) (f x)"
+  shows   "integrable (measure_pmf (Pi_pmf A dflt p)) (\<lambda>h. \<Prod>x\<in>A. f x (h x))"
+proof (intro integrableI_bounded)
+  have "(\<integral>\<^sup>+ x. ennreal (norm (\<Prod>xa\<in>A. f xa (x xa))) \<partial>measure_pmf (Pi_pmf A dflt p)) =
+        (\<integral>\<^sup>+ x. (\<Prod>y\<in>A. ennreal (norm (f y (x y)))) \<partial>measure_pmf (Pi_pmf A dflt p))"
+    by (simp flip: prod_norm prod_ennreal)
+  also have "\<dots> = (\<Prod>x\<in>A. \<integral>\<^sup>+ a. ennreal (norm (f x a)) \<partial>measure_pmf (p x))"
+    by (intro nn_integral_prod_Pi_pmf) fact
+  also have "(\<integral>\<^sup>+a. ennreal (norm (f i a)) \<partial>measure_pmf (p i)) \<noteq> top" if i: "i \<in> A" for i
+    using assms(2)[OF i] by (simp add: integrable_iff_bounded)
+  hence "(\<Prod>x\<in>A. \<integral>\<^sup>+ a. ennreal (norm (f x a)) \<partial>measure_pmf (p x)) \<noteq> top"
+    by (subst ennreal_prod_eq_top) auto
+  finally show "(\<integral>\<^sup>+ x. ennreal (norm (\<Prod>xa\<in>A. f xa (x xa))) \<partial>measure_pmf (Pi_pmf A dflt p)) < \<infinity>"
+    by (simp add: top.not_eq_extremum)
+qed auto
+
+lemma expectation_prod_Pi_pmf:
+  fixes f :: "_ \<Rightarrow> _ \<Rightarrow> real"
+  assumes "finite A"
+  assumes "\<And>x. x \<in> A \<Longrightarrow> integrable (measure_pmf (p x)) (f x)"
+  assumes "\<And>x y. x \<in> A \<Longrightarrow> y \<in> set_pmf (p x) \<Longrightarrow> f x y \<ge> 0"
+  shows   "measure_pmf.expectation (Pi_pmf A dflt p) (\<lambda>y. \<Prod>x\<in>A. f x (y x)) =
+             (\<Prod>x\<in>A. measure_pmf.expectation (p x) (\<lambda>v. f x v))"
+proof -
+  have nonneg: "measure_pmf.expectation (p x) (f x) \<ge> 0" if "x \<in> A" for x
+    using that by (intro Bochner_Integration.integral_nonneg_AE AE_pmfI assms)
+  have nonneg': "0 \<le> measure_pmf.expectation (Pi_pmf A dflt p) (\<lambda>y. \<Prod>x\<in>A. f x (y x))"
+    by (intro Bochner_Integration.integral_nonneg_AE AE_pmfI assms prod_nonneg)
+       (use assms in \<open>auto simp: set_Pi_pmf PiE_dflt_def\<close>)
+
+  have "ennreal (measure_pmf.expectation (Pi_pmf A dflt p) (\<lambda>y. \<Prod>x\<in>A. f x (y x))) =
+          nn_integral (Pi_pmf A dflt p) (\<lambda>y. ennreal (\<Prod>x\<in>A. f x (y x)))" using assms
+    by (intro nn_integral_eq_integral [symmetric] assms integrable_prod_Pi_pmf)
+       (auto simp: AE_measure_pmf_iff set_Pi_pmf PiE_dflt_def prod_nonneg)
+  also have "\<dots> = nn_integral (Pi_pmf A dflt p) (\<lambda>y. (\<Prod>x\<in>A. ennreal (f x (y x))))"
+    by (intro nn_integral_cong_AE AE_pmfI prod_ennreal [symmetric])
+       (use assms(1) in \<open>auto simp: set_Pi_pmf PiE_dflt_def intro!: assms(3)\<close>)
+  also have "\<dots> = (\<Prod>x\<in>A. \<integral>\<^sup>+ a. ennreal (f x a) \<partial>measure_pmf (p x))"
+    by (rule nn_integral_prod_Pi_pmf) fact+
+  also have "\<dots> = (\<Prod>x\<in>A. ennreal (measure_pmf.expectation (p x) (f x)))"
+    by (intro prod.cong nn_integral_eq_integral assms AE_pmfI) auto
+  also have "\<dots> = ennreal (\<Prod>x\<in>A. measure_pmf.expectation (p x) (f x))"
+    by (intro prod_ennreal nonneg)
+  finally show ?thesis
+    using nonneg nonneg' by (subst (asm) ennreal_inj) (auto intro!: prod_nonneg)
+qed
+
+lemma indep_vars_Pi_pmf:
+  assumes fin: "finite I"
+  shows   "prob_space.indep_vars (measure_pmf (Pi_pmf I dflt p))
+             (\<lambda>_. count_space UNIV) (\<lambda>x f. f x) I"
+proof (cases "I = {}")
+  case True
+  show ?thesis 
+    by (subst prob_space.indep_vars_def [OF measure_pmf.prob_space_axioms],
+        subst prob_space.indep_sets_def [OF measure_pmf.prob_space_axioms]) (simp_all add: True)
+next
+  case [simp]: False
+  show ?thesis
+  proof (subst prob_space.indep_vars_iff_distr_eq_PiM')
+    show "distr (measure_pmf (Pi_pmf I dflt p)) (Pi\<^sub>M I (\<lambda>i. count_space UNIV)) (\<lambda>x. restrict x I) =
+          Pi\<^sub>M I (\<lambda>i. distr (measure_pmf (Pi_pmf I dflt p)) (count_space UNIV) (\<lambda>f. f i))"
+    proof (rule product_sigma_finite.PiM_eqI, goal_cases)
+      case 1
+      interpret product_prob_space "\<lambda>i. distr (measure_pmf (Pi_pmf I dflt p)) (count_space UNIV) (\<lambda>f. f i)"
+        by (intro product_prob_spaceI prob_space.prob_space_distr measure_pmf.prob_space_axioms)
+           simp_all
+      show ?case by unfold_locales
+    next
+      case 3
+      have "sets (Pi\<^sub>M I (\<lambda>i. distr (measure_pmf (Pi_pmf I dflt p)) (count_space UNIV) (\<lambda>f. f i))) =
+            sets (Pi\<^sub>M I (\<lambda>_. count_space UNIV))"
+        by (intro sets_PiM_cong) simp_all
+      thus ?case by simp
+    next
+      case (4 A)
+      have "Pi\<^sub>E I A \<in> sets (Pi\<^sub>M I (\<lambda>i. count_space UNIV))"
+        using 4 by (intro sets_PiM_I_finite fin) auto
+      hence "emeasure (distr (measure_pmf (Pi_pmf I dflt p)) (Pi\<^sub>M I (\<lambda>i. count_space UNIV))
+              (\<lambda>x. restrict x I)) (Pi\<^sub>E I A) =
+             emeasure (measure_pmf (Pi_pmf I dflt p)) ((\<lambda>x. restrict x I) -` Pi\<^sub>E I A)"
+        using 4 by (subst emeasure_distr) (auto simp: space_PiM)
+      also have "\<dots> = emeasure (measure_pmf (Pi_pmf I dflt p)) (PiE_dflt I dflt A)"
+        by (intro emeasure_eq_AE AE_pmfI) (auto simp: PiE_dflt_def set_Pi_pmf fin)
+      also have "\<dots> = (\<Prod>i\<in>I. emeasure (measure_pmf (p i)) (A i))"
+        by (simp add: measure_pmf.emeasure_eq_measure measure_Pi_pmf_PiE_dflt fin prod_ennreal)
+      also have "\<dots> = (\<Prod>i\<in>I. emeasure (measure_pmf (map_pmf (\<lambda>f. f i) (Pi_pmf I dflt p))) (A i))"
+        by (intro prod.cong refl, subst Pi_pmf_component) (auto simp: fin)
+      finally show ?case
+        by (simp add: map_pmf_rep_eq)
+    qed fact+
+  qed (simp_all add: measure_pmf.prob_space_axioms)
+qed
+
+lemma
+  fixes h :: "'a :: comm_monoid_add \<Rightarrow> 'b::{banach, second_countable_topology}"
+  assumes fin: "finite I"
+  assumes integrable: "\<And>i. i \<in> I \<Longrightarrow> integrable (measure_pmf (D i)) h"
+  shows   integrable_sum_Pi_pmf: "integrable (Pi_pmf I dflt D) (\<lambda>g. \<Sum>i\<in>I. h (g i))"
+    and   expectation_sum_Pi_pmf:
+            "measure_pmf.expectation (Pi_pmf I dflt D) (\<lambda>g. \<Sum>i\<in>I. h (g i)) =
+             (\<Sum>i\<in>I. measure_pmf.expectation (D i) h)"
+proof -
+  have integrable': "integrable (Pi_pmf I dflt D) (\<lambda>g. h (g i))" if i: "i \<in> I" for i
+  proof -
+    have "integrable (D i) h"
+      using i by (rule assms)
+    also have "D i = map_pmf (\<lambda>g. g i) (Pi_pmf I dflt D)"
+      by (subst Pi_pmf_component) (use fin i in auto)
+    finally show "integrable (measure_pmf (Pi_pmf I dflt D)) (\<lambda>x. h (x i))"
+      by simp
+  qed
+  thus "integrable (Pi_pmf I dflt D) (\<lambda>g. \<Sum>i\<in>I. h (g i))"
+    by (intro Bochner_Integration.integrable_sum)
+
+  have "measure_pmf.expectation (Pi_pmf I dflt D) (\<lambda>x. \<Sum>i\<in>I. h (x i)) =
+               (\<Sum>i\<in>I. measure_pmf.expectation (map_pmf (\<lambda>x. x i) (Pi_pmf I dflt D)) h)"
+    using integrable' by (subst Bochner_Integration.integral_sum) auto
+  also have "\<dots> = (\<Sum>i\<in>I. measure_pmf.expectation (D i) h)"
+    by (intro sum.cong refl, subst Pi_pmf_component) (use fin in auto)
+  finally show "measure_pmf.expectation (Pi_pmf I dflt D) (\<lambda>g. \<Sum>i\<in>I. h (g i)) =
+                  (\<Sum>i\<in>I. measure_pmf.expectation (D i) h)" .
+qed
+
+
+subsection \<open>Applications\<close>
+
+text \<open>
+  Choosing a subset of a set uniformly at random is equivalent to tossing a fair coin
+  independently for each element and collecting all the elements that came up heads.
+\<close>
+lemma pmf_of_set_Pow_conv_bernoulli:
+  assumes "finite (A :: 'a set)"
+  shows "map_pmf (\<lambda>b. {x\<in>A. b x}) (Pi_pmf A P (\<lambda>_. bernoulli_pmf (1/2))) = pmf_of_set (Pow A)"
+proof -
+  have "Pi_pmf A P (\<lambda>_. bernoulli_pmf (1/2)) = pmf_of_set (PiE_dflt A P (\<lambda>x. UNIV))"
+    using assms by (simp add: bernoulli_pmf_half_conv_pmf_of_set Pi_pmf_of_set)
+  also have "map_pmf (\<lambda>b. {x\<in>A. b x}) \<dots> = pmf_of_set (Pow A)"
+  proof -
+    have "bij_betw (\<lambda>b. {x \<in> A. b x}) (PiE_dflt A P (\<lambda>_. UNIV)) (Pow A)"
+      by (rule bij_betwI[of _ _ _ "\<lambda>B b. if b \<in> A then b \<in> B else P"]) (auto simp add: PiE_dflt_def)
+    then show ?thesis
+      using assms by (intro map_pmf_of_set_bij_betw) auto
+  qed
+  finally show ?thesis
+    by simp
+qed
+
+text \<open>
+  A binomial distribution can be seen as the number of successes in \<open>n\<close> independent coin tosses.
+\<close>
+lemma binomial_pmf_altdef':
+  fixes A :: "'a set"
+  assumes "finite A" and "card A = n" and p: "p \<in> {0..1}"
+  shows   "binomial_pmf n p =
+             map_pmf (\<lambda>f. card {x\<in>A. f x}) (Pi_pmf A dflt (\<lambda>_. bernoulli_pmf p))" (is "?lhs = ?rhs")
+proof -
+  from assms have "?lhs = binomial_pmf (card A) p"
+    by simp
+  also have "\<dots> = ?rhs"
+  using assms(1)
+  proof (induction rule: finite_induct)
+    case empty
+    with p show ?case by (simp add: binomial_pmf_0)
+  next
+    case (insert x A)
+    from insert.hyps have "card (insert x A) = Suc (card A)"
+      by simp
+    also have "binomial_pmf \<dots> p = do {
+                                     b \<leftarrow> bernoulli_pmf p;
+                                     f \<leftarrow> Pi_pmf A dflt (\<lambda>_. bernoulli_pmf p);
+                                     return_pmf ((if b then 1 else 0) + card {y \<in> A. f y})
+                                   }"
+      using p by (simp add: binomial_pmf_Suc insert.IH bind_map_pmf)
+    also have "\<dots> = do {
+                      b \<leftarrow> bernoulli_pmf p;
+                      f \<leftarrow> Pi_pmf A dflt (\<lambda>_. bernoulli_pmf p);
+                      return_pmf (card {y \<in> insert x A. (f(x := b)) y})
+                    }"
+    proof (intro bind_pmf_cong refl, goal_cases)
+      case (1 b f)
+      have "(if b then 1 else 0) + card {y\<in>A. f y} = card ((if b then {x} else {}) \<union> {y\<in>A. f y})"
+        using insert.hyps by auto
+      also have "(if b then {x} else {}) \<union> {y\<in>A. f y} = {y\<in>insert x A. (f(x := b)) y}"
+        using insert.hyps by auto
+      finally show ?case by simp
+    qed
+    also have "\<dots> = map_pmf (\<lambda>f. card {y\<in>insert x A. f y})
+                      (Pi_pmf (insert x A) dflt (\<lambda>_. bernoulli_pmf p))"
+      using insert.hyps by (subst Pi_pmf_insert) (simp_all add: pair_pmf_def map_bind_pmf)
+    finally show ?case .
+  qed
+  finally show ?thesis .
+qed
+
+end