diff -r b4552595b04e -r f6bb31879698 src/HOL/Probability/Hoeffding.thy --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/HOL/Probability/Hoeffding.thy Fri Feb 19 13:42:12 2021 +0100 @@ -0,0 +1,923 @@ +(* + File: Hoeffding.thy + Author: Manuel Eberl, TU München +*) +section \Hoeffding's Lemma and Hoeffding's Inequality\ +theory Hoeffding + imports Product_PMF Independent_Family +begin + +text \ + Hoeffding's inequality shows that a sum of bounded independent random variables is concentrated + around its mean, with an exponential decay of the tail probabilities. +\ + +subsection \Hoeffding's Lemma\ + +lemma convex_on_exp: + fixes l :: real + assumes "l \ 0" + shows "convex_on UNIV (\x. exp(l*x))" + using assms + by (intro convex_on_realI[where f' = "\x. l * exp (l * x)"]) + (auto intro!: derivative_eq_intros mult_left_mono) + +lemma mult_const_minus_self_real_le: + fixes x :: real + shows "x * (c - x) \ c\<^sup>2 / 4" +proof - + have "x * (c - x) = -(x - c / 2)\<^sup>2 + c\<^sup>2 / 4" + by (simp add: field_simps power2_eq_square) + also have "\ \ 0 + c\<^sup>2 / 4" + by (intro add_mono) auto + finally show ?thesis by simp +qed + +lemma Hoeffdings_lemma_aux: + fixes h p :: real + assumes "h \ 0" and "p \ 0" + defines "L \ (\h. -h * p + ln (1 + p * (exp h - 1)))" + shows "L h \ h\<^sup>2 / 8" +proof (cases "h = 0") + case False + hence h: "h > 0" + using \h \ 0\ by simp + define L' where "L' = (\h. -p + p * exp h / (1 + p * (exp h - 1)))" + define L'' where "L'' = (\h. -(p\<^sup>2) * exp h * exp h / (1 + p * (exp h - 1))\<^sup>2 + + p * exp h / (1 + p * (exp h - 1)))" + define Ls where "Ls = (\n. [L, L', L''] ! n)" + + have [simp]: "L 0 = 0" "L' 0 = 0" + by (auto simp: L_def L'_def) + + have L': "(L has_real_derivative L' x) (at x)" if "x \ {0..h}" for x + proof - + have "1 + p * (exp x - 1) > 0" + using \p \ 0\ that by (intro add_pos_nonneg mult_nonneg_nonneg) auto + thus ?thesis + unfolding L_def L'_def by (auto intro!: derivative_eq_intros) + qed + + have L'': "(L' has_real_derivative L'' x) (at x)" if "x \ {0..h}" for x + proof - + have *: "1 + p * (exp x - 1) > 0" + using \p \ 0\ that by (intro add_pos_nonneg mult_nonneg_nonneg) auto + show ?thesis + unfolding L'_def L''_def + by (insert *, (rule derivative_eq_intros refl | simp)+) (auto simp: divide_simps; algebra) + qed + + have diff: "\m t. m < 2 \ 0 \ t \ t \ h \ (Ls m has_real_derivative Ls (Suc m) t) (at t)" + using L' L'' by (auto simp: Ls_def nth_Cons split: nat.splits) + from Taylor[of 2 Ls L 0 h 0 h, OF _ _ diff] + obtain t where t: "t \ {0<..2 / 2" + using \h > 0\ by (auto simp: Ls_def lessThan_nat_numeral) + define u where "u = p * exp t / (1 + p * (exp t - 1))" + + have "L'' t = u * (1 - u)" + by (simp add: L''_def u_def divide_simps; algebra) + also have "\ \ 1 / 4" + using mult_const_minus_self_real_le[of u 1] by simp + finally have "L'' t \ 1 / 4" . + + note t(2) + also have "L'' t * h\<^sup>2 / 2 \ (1 / 4) * h\<^sup>2 / 2" + using \L'' t \ 1 / 4\ by (intro mult_right_mono divide_right_mono) auto + finally show "L h \ h\<^sup>2 / 8" by simp +qed (auto simp: L_def) + + +locale interval_bounded_random_variable = prob_space + + fixes f :: "'a \ real" and a b :: real + assumes random_variable [measurable]: "random_variable borel f" + assumes AE_in_interval: "AE x in M. f x \ {a..b}" +begin + +lemma integrable [intro]: "integrable M f" +proof (rule integrable_const_bound) + show "AE x in M. norm (f x) \ max \a\ \b\" + by (intro eventually_mono[OF AE_in_interval]) auto +qed (fact random_variable) + +text \ + We first show Hoeffding's lemma for distributions whose expectation is 0. The general + case will easily follow from this later. +\ +lemma Hoeffdings_lemma_nn_integral_0: + assumes "l > 0" and E0: "expectation f = 0" + shows "nn_integral M (\x. exp (l * f x)) \ ennreal (exp (l\<^sup>2 * (b - a)\<^sup>2 / 8))" +proof (cases "AE x in M. f x = 0") + case True + hence "nn_integral M (\x. exp (l * f x)) = nn_integral M (\x. ennreal 1)" + by (intro nn_integral_cong_AE) auto + also have "\ = ennreal (expectation (\_. 1))" + by (intro nn_integral_eq_integral) auto + finally show ?thesis by (simp add: prob_space) +next + case False + have "a < 0" + proof (rule ccontr) + assume a: "$a < 0)" + have "AE x in M. f x = 0" + proof (subst integral_nonneg_eq_0_iff_AE [symmetric]) + show "AE x in M. f x \ 0" + using AE_in_interval by eventually_elim (use a in auto) + qed (use E0 in \auto simp: id_def integrable$ + with False show False by contradiction + qed + + have "b > 0" + proof (rule ccontr) + assume b: "$b > 0)" + have "AE x in M. -f x = 0" + proof (subst integral_nonneg_eq_0_iff_AE [symmetric]) + show "AE x in M. -f x \ 0" + using AE_in_interval by eventually_elim (use b in auto) + qed (use E0 in \auto simp: id_def integrable$ + with False show False by simp + qed + + have "a < b" + using \a < 0\ \b > 0\ by linarith + + define p where "p = -a / (b - a)" + define L where "L = (\t. -t* p + ln (1 - p + p * exp t))" + define z where "z = l * (b - a)" + have "z > 0" + unfolding z_def using \a < b\ \l > 0\ by auto + have "p > 0" + using \a < 0\ \a < b\ unfolding p_def by (intro divide_pos_pos) auto + + have "(\\<^sup>+x. exp (l * f x) \M) \ + (\\<^sup>+x. (b - f x) / (b - a) * exp (l * a) + (f x - a) / (b - a) * exp (l * b) \M)" + proof (intro nn_integral_mono_AE eventually_mono[OF AE_in_interval] ennreal_leI) + fix x assume x: "f x \ {a..b}" + define y where "y = (b - f x) / (b-a)" + have y: "y \ {0..1}" + using x \a < b\ by (auto simp: y_def) + have conv: "convex_on UNIV (\x. exp(l*x))" + using \l > 0\ by (intro convex_on_exp) auto + have "exp (l * ((1 - y) *\<^sub>R b + y *\<^sub>R a)) \ (1 - y) * exp (l * b) + y * exp (l * a)" + using y \l > 0\ by (intro convex_onD[OF convex_on_exp]) auto + also have "(1 - y) *\<^sub>R b + y *\<^sub>R a = f x" + using \a < b\ by (simp add: y_def divide_simps) (simp add: algebra_simps)? + also have "1 - y = (f x - a) / (b - a)" + using \a < b\ by (simp add: field_simps y_def) + finally show "exp (l * f x) \ (b - f x) / (b - a) * exp (l*a) + (f x - a)/(b-a) * exp (l*b)" + by (simp add: y_def) + qed + also have "\ = (\\<^sup>+x. ennreal (b - f x) * exp (l * a) / (b - a) + + ennreal (f x - a) * exp (l * b) / (b - a) \M)" + using \a < 0\ \b > 0\ + by (intro nn_integral_cong_AE eventually_mono[OF AE_in_interval]) + (simp add: ennreal_plus ennreal_mult flip: divide_ennreal) + also have "\ = ((\\<^sup>+ x. ennreal (b - f x) \M) * ennreal (exp (l * a)) + + (\\<^sup>+ x. ennreal (f x - a) \M) * ennreal (exp (l * b))) / ennreal (b - a)" + by (simp add: nn_integral_add nn_integral_divide nn_integral_multc add_divide_distrib_ennreal) + also have "(\\<^sup>+ x. ennreal (b - f x) \M) = ennreal (expectation (\x. b - f x))" + by (intro nn_integral_eq_integral Bochner_Integration.integrable_diff + eventually_mono[OF AE_in_interval] integrable_const integrable) auto + also have "expectation (\x. b - f x) = b" + using assms by (subst Bochner_Integration.integral_diff) (auto simp: prob_space) + also have "(\\<^sup>+ x. ennreal (f x - a) \M) = ennreal (expectation (\x. f x - a))" + by (intro nn_integral_eq_integral Bochner_Integration.integrable_diff + eventually_mono[OF AE_in_interval] integrable_const integrable) auto + also have "expectation (\x. f x - a) = (-a)" + using assms by (subst Bochner_Integration.integral_diff) (auto simp: prob_space) + also have "(ennreal b * (exp (l * a)) + ennreal (-a) * (exp (l * b))) / (b - a) = + ennreal (b * exp (l * a) - a * exp (l * b)) / ennreal (b - a)" + using \a < 0\ \b > 0\ + by (simp flip: ennreal_mult ennreal_plus add: mult_nonpos_nonneg divide_ennreal mult_mono) + also have "b * exp (l * a) - a * exp (l * b) = exp (L z) * (b - a)" + proof - + have pos: "1 - p + p * exp z > 0" + proof - + have "exp z > 1" using \l > 0\ and \a < b\ + by (subst one_less_exp_iff) (auto simp: z_def intro!: mult_pos_pos) + hence "(exp z - 1) * p \ 0" + unfolding p_def using \a < 0\ and \a < b\ + by (intro mult_nonneg_nonneg divide_nonneg_pos) auto + thus ?thesis + by (simp add: algebra_simps) + qed + + have "exp (L z) * (b - a) = exp (-z * p) * (1 - p + p * exp z) * (b - a)" + using pos by (simp add: exp_add L_def exp_diff exp_minus divide_simps) + also have "\ = b * exp (l * a) - a * exp (l * b)" using \a < b\ + by (simp add: p_def z_def divide_simps) (simp add: exp_diff algebra_simps)? + finally show ?thesis by simp + qed + also have "ennreal (exp (L z) * (b - a)) / ennreal (b - a) = ennreal (exp (L z))" + using \a < b\ by (simp add: divide_ennreal) + also have "L z = -z * p + ln (1 + p * (exp z - 1))" + by (simp add: L_def algebra_simps) + also have "\ \ z\<^sup>2 / 8" + unfolding L_def by (rule Hoeffdings_lemma_aux[where p = p]) (use \z > 0\ \p > 0\ in simp_all) + hence "ennreal (exp (-z * p + ln (1 + p * (exp z - 1)))) \ ennreal (exp (z\<^sup>2 / 8))" + by (intro ennreal_leI) auto + finally show ?thesis + by (simp add: z_def power_mult_distrib) +qed + +context +begin + +interpretation shift: interval_bounded_random_variable M "\x. f x - \" "a - \" "b - \" + rewrites "b - \ - (a - \) \ b - a" + by unfold_locales (auto intro!: eventually_mono[OF AE_in_interval]) + +lemma expectation_shift: "expectation (\x. f x - expectation f) = 0" + by (subst Bochner_Integration.integral_diff) (auto simp: integrable prob_space) + +lemmas Hoeffdings_lemma_nn_integral = shift.Hoeffdings_lemma_nn_integral_0[OF _ expectation_shift] + +end + +end + + + +subsection \Hoeffding's Inequality\ + +text \ + Consider \n\ independent real random variables $X_1, \ldots, X_n$ that each almost surely lie + in a compact interval $[a_i, b_i]$. Hoeffding's inequality states that the distribution of the + sum of the $X_i$ is tightly concentrated around the sum of the expected values: the probability + of it being above or below the sum of the expected values by more than some \\\ decreases + exponentially with \\\. +\ + +locale indep_interval_bounded_random_variables = prob_space + + fixes I :: "'b set" and X :: "'b \ 'a \ real" + fixes a b :: "'b \ real" + assumes fin: "finite I" + assumes indep: "indep_vars (\_. borel) X I" + assumes AE_in_interval: "\i. i \ I \ AE x in M. X i x \ {a i..b i}" +begin + +lemma random_variable [measurable]: + assumes i: "i \ I" + shows "random_variable borel (X i)" + using i indep unfolding indep_vars_def by blast + +lemma bounded_random_variable [intro]: + assumes i: "i \ I" + shows "interval_bounded_random_variable M (X i) (a i) (b i)" + by unfold_locales (use AE_in_interval[OF i] i in auto) + +end + + +locale Hoeffding_ineq = indep_interval_bounded_random_variables + + fixes \ :: real + defines "\ \ (\i\I. expectation (X i))" +begin + +theorem%important Hoeffding_ineq_ge: + assumes "\ \ 0" + assumes "(\i\I. (b i - a i)\<^sup>2) > 0" + shows "prob {x\space M. (\i\I. X i x) \ \ + \} \ exp (-2 * \\<^sup>2 / (\i\I. (b i - a i)\<^sup>2))" +proof (cases "\ = 0") + case [simp]: True + have "prob {x\space M. (\i\I. X i x) \ \ + \} \ 1" + by simp + thus ?thesis by simp +next + case False + with \\ \ 0\ have \: "\ > 0" + by auto + + define d where "d = (\i\I. (b i - a i)\<^sup>2)" + define l :: real where "l = 4 * \ / d" + have d: "d > 0" + using assms by (simp add: d_def) + have l: "l > 0" + using \ d by (simp add: l_def) + define \' where "\' = (\i. expectation (X i))" + + have "{x\space M. (\i\I. X i x) \ \ + \} = {x\space M. (\i\I. X i x) - \ \ \}" + by (simp add: algebra_simps) + hence "ennreal (prob {x\space M. (\i\I. X i x) \ \ + \}) = emeasure M \" + by (simp add: emeasure_eq_measure) + also have "\ \ ennreal (exp (-l*\)) * (\\<^sup>+x\space M. exp (l * ((\i\I. X i x) - \)) \M)" + by (intro Chernoff_ineq_nn_integral_ge l) auto + also have "(\x. (\i\I. X i x) - \) = (\x. (\i\I. X i x - \' i))" + by (simp add: \_def sum_subtractf \'_def) + also have "(\\<^sup>+x\space M. exp (l * ((\i\I. X i x - \' i))) \M) = + (\\<^sup>+x. (\i\I. ennreal (exp (l * (X i x - \' i)))) \M)" + by (intro nn_integral_cong) + (simp_all add: sum_distrib_left ring_distribs exp_diff exp_sum fin prod_ennreal) + also have "\ = (\i\I. \\<^sup>+x. ennreal (exp (l * (X i x - \' i))) \M)" + by (intro indep_vars_nn_integral fin indep_vars_compose2[OF indep]) auto + also have "ennreal (exp (-l * \)) * \ \ + ennreal (exp (-l * \)) * (\i\I. ennreal (exp (l\<^sup>2 * (b i - a i)\<^sup>2 / 8)))" + proof (intro mult_left_mono prod_mono_ennreal) + fix i assume i: "i \ I" + from i interpret interval_bounded_random_variable M "X i" "a i" "b i" .. + show "(\\<^sup>+x. ennreal (exp (l * (X i x - \' i))) \M) \ ennreal (exp (l\<^sup>2 * (b i - a i)\<^sup>2 / 8))" + unfolding \'_def by (rule Hoeffdings_lemma_nn_integral) fact+ + qed auto + also have "\ = ennreal (exp (-l*\) * (\i\I. exp (l\<^sup>2 * (b i - a i)\<^sup>2 / 8)))" + by (simp add: prod_ennreal prod_nonneg flip: ennreal_mult) + also have "exp (-l*\) * (\i\I. exp (l\<^sup>2 * (b i - a i)\<^sup>2 / 8)) = exp (d * l\<^sup>2 / 8 - l * \)" + by (simp add: exp_diff exp_minus sum_divide_distrib sum_distrib_left + sum_distrib_right exp_sum fin divide_simps mult_ac d_def) + also have "d * l\<^sup>2 / 8 - l * \ = -2 * \\<^sup>2 / d" + using d by (simp add: l_def field_simps power2_eq_square) + finally show ?thesis + by (subst (asm) ennreal_le_iff) (simp_all add: d_def) +qed + +corollary Hoeffding_ineq_le: + assumes \: "\ \ 0" + assumes "(\i\I. (b i - a i)\<^sup>2) > 0" + shows "prob {x\space M. (\i\I. X i x) \ \ - \} \ exp (-2 * \\<^sup>2 / (\i\I. (b i - a i)\<^sup>2))" +proof - + interpret flip: Hoeffding_ineq M I "\i x. -X i x" "\i. -b i" "\i. -a i" "-\" + proof unfold_locales + fix i assume "i \ I" + then interpret interval_bounded_random_variable M "X i" "a i" "b i" .. + show "AE x in M. - X i x \ {- b i..- a i}" + by (intro eventually_mono[OF AE_in_interval]) auto + qed (auto simp: fin \_def sum_negf intro: indep_vars_compose2[OF indep]) + + have "prob {x\space M. (\i\I. X i x) \ \ - \} = prob {x\space M. (\i\I. -X i x) \ -\ + \}" + by (simp add: sum_negf algebra_simps) + also have "\ \ exp (- 2 * \\<^sup>2 / (\i\I. (b i - a i)\<^sup>2))" + using flip.Hoeffding_ineq_ge[OF \] assms(2) by simp + finally show ?thesis . +qed + +corollary Hoeffding_ineq_abs_ge: + assumes \: "\ \ 0" + assumes "(\i\I. (b i - a i)\<^sup>2) > 0" + shows "prob {x\space M. \(\i\I. X i x) - \\ \ \} \ 2 * exp (-2 * \\<^sup>2 / (\i\I. (b i - a i)\<^sup>2))" +proof - + have "{x\space M. \(\i\I. X i x) - \\ \ \} = + {x\space M. (\i\I. X i x) \ \ + \} \ {x\space M. (\i\I. X i x) \ \ - \}" + by auto + also have "prob \ \ prob {x\space M. (\i\I. X i x) \ \ + \} + + prob {x\space M. (\i\I. X i x) \ \ - \}" + by (intro measure_Un_le) auto + also have "\ \ exp (-2 * \\<^sup>2 / (\i\I. (b i - a i)\<^sup>2)) + exp (-2 * \\<^sup>2 / (\i\I. (b i - a i)\<^sup>2))" + by (intro add_mono Hoeffding_ineq_ge Hoeffding_ineq_le assms) + finally show ?thesis by simp +qed + +end + + +subsection \Hoeffding's inequality for i.i.d. bounded random variables\ + +text \ + If we have \