hoelzl@42067: (* Title: HOL/Probability/Information.thy hoelzl@42067: Author: Johannes Hölzl, TU München hoelzl@42067: Author: Armin Heller, TU München hoelzl@42067: *) hoelzl@42067: hoelzl@42067: header {*Information theory*} hoelzl@42067: hoelzl@36080: theory Information wenzelm@41413: imports hoelzl@43340: Independent_Family hoelzl@43556: Radon_Nikodym wenzelm@41413: "~~/src/HOL/Library/Convex" hoelzl@36080: begin hoelzl@36080: hoelzl@39097: lemma log_le: "1 < a \ 0 < x \ x \ y \ log a x \ log a y" hoelzl@39097: by (subst log_le_cancel_iff) auto hoelzl@39097: hoelzl@39097: lemma log_less: "1 < a \ 0 < x \ x < y \ log a x < log a y" hoelzl@39097: by (subst log_less_cancel_iff) auto hoelzl@39097: hoelzl@39097: lemma setsum_cartesian_product': hoelzl@39097: "(\x\A \ B. f x) = (\x\A. setsum (\y. f (x, y)) B)" hoelzl@39097: unfolding setsum_cartesian_product by simp hoelzl@39097: hoelzl@39097: lemma split_pairs: hoelzl@40859: "((A, B) = X) \ (fst X = A \ snd X = B)" and hoelzl@40859: "(X = (A, B)) \ (fst X = A \ snd X = B)" by auto hoelzl@38656: hoelzl@38656: section "Information theory" hoelzl@38656: hoelzl@40859: locale information_space = prob_space + hoelzl@38656: fixes b :: real assumes b_gt_1: "1 < b" hoelzl@38656: hoelzl@40859: context information_space hoelzl@38656: begin hoelzl@38656: hoelzl@40859: text {* Introduce some simplification rules for logarithm of base @{term b}. *} hoelzl@40859: hoelzl@40859: lemma log_neg_const: hoelzl@40859: assumes "x \ 0" hoelzl@40859: shows "log b x = log b 0" hoelzl@36624: proof - hoelzl@40859: { fix u :: real hoelzl@40859: have "x \ 0" by fact hoelzl@40859: also have "0 < exp u" hoelzl@40859: using exp_gt_zero . hoelzl@40859: finally have "exp u \ x" hoelzl@40859: by auto } hoelzl@40859: then show "log b x = log b 0" hoelzl@40859: by (simp add: log_def ln_def) hoelzl@38656: qed hoelzl@38656: hoelzl@40859: lemma log_mult_eq: hoelzl@40859: "log b (A * B) = (if 0 < A * B then log b \A\ + log b \B\ else log b 0)" hoelzl@40859: using log_mult[of b "\A\" "\B\"] b_gt_1 log_neg_const[of "A * B"] hoelzl@40859: by (auto simp: zero_less_mult_iff mult_le_0_iff) hoelzl@38656: hoelzl@40859: lemma log_inverse_eq: hoelzl@40859: "log b (inverse B) = (if 0 < B then - log b B else log b 0)" hoelzl@40859: using log_inverse[of b B] log_neg_const[of "inverse B"] b_gt_1 by simp hoelzl@36080: hoelzl@40859: lemma log_divide_eq: hoelzl@40859: "log b (A / B) = (if 0 < A * B then log b \A\ - log b \B\ else log b 0)" hoelzl@40859: unfolding divide_inverse log_mult_eq log_inverse_eq abs_inverse hoelzl@40859: by (auto simp: zero_less_mult_iff mult_le_0_iff) hoelzl@38656: hoelzl@40859: lemmas log_simps = log_mult_eq log_inverse_eq log_divide_eq hoelzl@38656: hoelzl@38656: end hoelzl@38656: hoelzl@39097: subsection "Kullback$-$Leibler divergence" hoelzl@36080: hoelzl@39097: text {* The Kullback$-$Leibler divergence is also known as relative entropy or hoelzl@39097: Kullback$-$Leibler distance. *} hoelzl@39097: hoelzl@39097: definition hoelzl@47694: "entropy_density b M N = log b \ real \ RN_deriv M N" hoelzl@43340: hoelzl@43340: definition wenzelm@53015: "KL_divergence b M N = integral\<^sup>L N (entropy_density b M N)" hoelzl@43340: hoelzl@43340: lemma (in information_space) measurable_entropy_density: hoelzl@47694: assumes ac: "absolutely_continuous M N" "sets N = events" hoelzl@47694: shows "entropy_density b M N \ borel_measurable M" hoelzl@43340: proof - hoelzl@47694: from borel_measurable_RN_deriv[OF ac] b_gt_1 show ?thesis hoelzl@50003: unfolding entropy_density_def by auto hoelzl@43340: qed hoelzl@43340: hoelzl@50003: lemma borel_measurable_RN_deriv_density[measurable (raw)]: hoelzl@50003: "f \ borel_measurable M \ RN_deriv M (density M f) \ borel_measurable M" hoelzl@50003: using borel_measurable_RN_deriv_density[of "\x. max 0 (f x )" M] hoelzl@50003: by (simp add: density_max_0[symmetric]) hoelzl@50003: hoelzl@47694: lemma (in sigma_finite_measure) KL_density: hoelzl@47694: fixes f :: "'a \ real" hoelzl@47694: assumes "1 < b" hoelzl@47694: assumes f: "f \ borel_measurable M" "AE x in M. 0 \ f x" hoelzl@47694: shows "KL_divergence b M (density M f) = (\x. f x * log b (f x) \M)" hoelzl@47694: unfolding KL_divergence_def hoelzl@47694: proof (subst integral_density) hoelzl@47694: show "entropy_density b M (density M (\x. ereal (f x))) \ borel_measurable M" hoelzl@49776: using f hoelzl@50003: by (auto simp: comp_def entropy_density_def) hoelzl@47694: have "density M (RN_deriv M (density M f)) = density M f" hoelzl@47694: using f by (intro density_RN_deriv_density) auto hoelzl@47694: then have eq: "AE x in M. RN_deriv M (density M f) x = f x" hoelzl@47694: using f hoelzl@47694: by (intro density_unique) hoelzl@47694: (auto intro!: borel_measurable_log borel_measurable_RN_deriv_density simp: RN_deriv_density_nonneg) hoelzl@47694: show "(\x. f x * entropy_density b M (density M (\x. ereal (f x))) x \M) = (\x. f x * log b (f x) \M)" hoelzl@47694: apply (intro integral_cong_AE) hoelzl@47694: using eq hoelzl@47694: apply eventually_elim hoelzl@47694: apply (auto simp: entropy_density_def) hoelzl@47694: done hoelzl@47694: qed fact+ hoelzl@43340: hoelzl@47694: lemma (in sigma_finite_measure) KL_density_density: hoelzl@47694: fixes f g :: "'a \ real" hoelzl@47694: assumes "1 < b" hoelzl@47694: assumes f: "f \ borel_measurable M" "AE x in M. 0 \ f x" hoelzl@47694: assumes g: "g \ borel_measurable M" "AE x in M. 0 \ g x" hoelzl@47694: assumes ac: "AE x in M. f x = 0 \ g x = 0" hoelzl@47694: shows "KL_divergence b (density M f) (density M g) = (\x. g x * log b (g x / f x) \M)" hoelzl@47694: proof - hoelzl@47694: interpret Mf: sigma_finite_measure "density M f" hoelzl@47694: using f by (subst sigma_finite_iff_density_finite) auto hoelzl@47694: have "KL_divergence b (density M f) (density M g) = hoelzl@47694: KL_divergence b (density M f) (density (density M f) (\x. g x / f x))" hoelzl@47694: using f g ac by (subst density_density_divide) simp_all hoelzl@47694: also have "\ = (\x. (g x / f x) * log b (g x / f x) \density M f)" hoelzl@47694: using f g `1 < b` by (intro Mf.KL_density) (auto simp: AE_density divide_nonneg_nonneg) hoelzl@47694: also have "\ = (\x. g x * log b (g x / f x) \M)" hoelzl@47694: using ac f g `1 < b` by (subst integral_density) (auto intro!: integral_cong_AE) hoelzl@47694: finally show ?thesis . hoelzl@47694: qed hoelzl@43340: hoelzl@47694: lemma (in information_space) KL_gt_0: hoelzl@47694: fixes D :: "'a \ real" hoelzl@47694: assumes "prob_space (density M D)" hoelzl@47694: assumes D: "D \ borel_measurable M" "AE x in M. 0 \ D x" hoelzl@47694: assumes int: "integrable M (\x. D x * log b (D x))" hoelzl@47694: assumes A: "density M D \ M" hoelzl@47694: shows "0 < KL_divergence b M (density M D)" hoelzl@47694: proof - hoelzl@47694: interpret N: prob_space "density M D" by fact hoelzl@43340: hoelzl@47694: obtain A where "A \ sets M" "emeasure (density M D) A \ emeasure M A" hoelzl@47694: using measure_eqI[of "density M D" M] `density M D \ M` by auto hoelzl@47694: hoelzl@47694: let ?D_set = "{x\space M. D x \ 0}" hoelzl@47694: have [simp, intro]: "?D_set \ sets M" hoelzl@47694: using D by auto hoelzl@43340: wenzelm@53015: have D_neg: "(\\<^sup>+ x. ereal (- D x) \M) = 0" hoelzl@43340: using D by (subst positive_integral_0_iff_AE) auto hoelzl@43340: wenzelm@53015: have "(\\<^sup>+ x. ereal (D x) \M) = emeasure (density M D) (space M)" hoelzl@47694: using D by (simp add: emeasure_density cong: positive_integral_cong) wenzelm@53015: then have D_pos: "(\\<^sup>+ x. ereal (D x) \M) = 1" hoelzl@47694: using N.emeasure_space_1 by simp hoelzl@43340: wenzelm@53015: have "integrable M D" "integral\<^sup>L M D = 1" hoelzl@47694: using D D_pos D_neg unfolding integrable_def lebesgue_integral_def by simp_all hoelzl@43340: hoelzl@47694: have "0 \ 1 - measure M ?D_set" hoelzl@43340: using prob_le_1 by (auto simp: field_simps) hoelzl@43340: also have "\ = (\ x. D x - indicator ?D_set x \M)" wenzelm@53015: using `integrable M D` `integral\<^sup>L M D = 1` hoelzl@47694: by (simp add: emeasure_eq_measure) hoelzl@47694: also have "\ < (\ x. D x * (ln b * log b (D x)) \M)" hoelzl@43340: proof (rule integral_less_AE) hoelzl@43340: show "integrable M (\x. D x - indicator ?D_set x)" hoelzl@43340: using `integrable M D` hoelzl@43340: by (intro integral_diff integral_indicator) auto hoelzl@43340: next hoelzl@47694: from integral_cmult(1)[OF int, of "ln b"] hoelzl@47694: show "integrable M (\x. D x * (ln b * log b (D x)))" hoelzl@47694: by (simp add: ac_simps) hoelzl@43340: next hoelzl@47694: show "emeasure M {x\space M. D x \ 1 \ D x \ 0} \ 0" hoelzl@43340: proof hoelzl@47694: assume eq_0: "emeasure M {x\space M. D x \ 1 \ D x \ 0} = 0" hoelzl@47694: then have disj: "AE x in M. D x = 1 \ D x = 0" immler@50244: using D(1) by (auto intro!: AE_I[OF subset_refl] sets.sets_Collect) hoelzl@43340: wenzelm@53015: have "emeasure M {x\space M. D x = 1} = (\\<^sup>+ x. indicator {x\space M. D x = 1} x \M)" hoelzl@43340: using D(1) by auto wenzelm@53015: also have "\ = (\\<^sup>+ x. ereal (D x) \M)" hoelzl@43920: using disj by (auto intro!: positive_integral_cong_AE simp: indicator_def one_ereal_def) hoelzl@47694: finally have "AE x in M. D x = 1" hoelzl@47694: using D D_pos by (intro AE_I_eq_1) auto wenzelm@53015: then have "(\\<^sup>+x. indicator A x\M) = (\\<^sup>+x. ereal (D x) * indicator A x\M)" hoelzl@43920: by (intro positive_integral_cong_AE) (auto simp: one_ereal_def[symmetric]) hoelzl@47694: also have "\ = density M D A" hoelzl@47694: using `A \ sets M` D by (simp add: emeasure_density) hoelzl@47694: finally show False using `A \ sets M` `emeasure (density M D) A \ emeasure M A` by simp hoelzl@43340: qed hoelzl@43340: show "{x\space M. D x \ 1 \ D x \ 0} \ sets M" immler@50244: using D(1) by (auto intro: sets.sets_Collect_conj) hoelzl@43340: hoelzl@47694: show "AE t in M. t \ {x\space M. D x \ 1 \ D x \ 0} \ hoelzl@47694: D t - indicator ?D_set t \ D t * (ln b * log b (D t))" hoelzl@43340: using D(2) hoelzl@47694: proof (eventually_elim, safe) hoelzl@47694: fix t assume Dt: "t \ space M" "D t \ 1" "D t \ 0" "0 \ D t" hoelzl@47694: and eq: "D t - indicator ?D_set t = D t * (ln b * log b (D t))" hoelzl@43340: hoelzl@43340: have "D t - 1 = D t - indicator ?D_set t" hoelzl@43340: using Dt by simp hoelzl@43340: also note eq hoelzl@47694: also have "D t * (ln b * log b (D t)) = - D t * ln (1 / D t)" hoelzl@47694: using b_gt_1 `D t \ 0` `0 \ D t` hoelzl@47694: by (simp add: log_def ln_div less_le) hoelzl@43340: finally have "ln (1 / D t) = 1 / D t - 1" hoelzl@43340: using `D t \ 0` by (auto simp: field_simps) hoelzl@43340: from ln_eq_minus_one[OF _ this] `D t \ 0` `0 \ D t` `D t \ 1` hoelzl@43340: show False by auto hoelzl@43340: qed hoelzl@43340: hoelzl@47694: show "AE t in M. D t - indicator ?D_set t \ D t * (ln b * log b (D t))" hoelzl@47694: using D(2) AE_space hoelzl@47694: proof eventually_elim hoelzl@47694: fix t assume "t \ space M" "0 \ D t" hoelzl@47694: show "D t - indicator ?D_set t \ D t * (ln b * log b (D t))" hoelzl@43340: proof cases hoelzl@43340: assume asm: "D t \ 0" hoelzl@43340: then have "0 < D t" using `0 \ D t` by auto hoelzl@43340: then have "0 < 1 / D t" by auto hoelzl@43340: have "D t - indicator ?D_set t \ - D t * (1 / D t - 1)" hoelzl@43340: using asm `t \ space M` by (simp add: field_simps) hoelzl@43340: also have "- D t * (1 / D t - 1) \ - D t * ln (1 / D t)" hoelzl@43340: using ln_le_minus_one `0 < 1 / D t` by (intro mult_left_mono_neg) auto hoelzl@47694: also have "\ = D t * (ln b * log b (D t))" hoelzl@47694: using `0 < D t` b_gt_1 hoelzl@47694: by (simp_all add: log_def ln_div) hoelzl@43340: finally show ?thesis by simp hoelzl@43340: qed simp hoelzl@43340: qed hoelzl@43340: qed hoelzl@47694: also have "\ = (\ x. ln b * (D x * log b (D x)) \M)" hoelzl@47694: by (simp add: ac_simps) hoelzl@47694: also have "\ = ln b * (\ x. D x * log b (D x) \M)" hoelzl@47694: using int by (rule integral_cmult) hoelzl@47694: finally show ?thesis hoelzl@47694: using b_gt_1 D by (subst KL_density) (auto simp: zero_less_mult_iff) hoelzl@43340: qed hoelzl@43340: hoelzl@47694: lemma (in sigma_finite_measure) KL_same_eq_0: "KL_divergence b M M = 0" hoelzl@43340: proof - hoelzl@47694: have "AE x in M. 1 = RN_deriv M M x" hoelzl@43340: proof (rule RN_deriv_unique) hoelzl@47694: show "(\x. 1) \ borel_measurable M" "AE x in M. 0 \ (1 :: ereal)" by auto hoelzl@47694: show "density M (\x. 1) = M" hoelzl@47694: apply (auto intro!: measure_eqI emeasure_density) hoelzl@47694: apply (subst emeasure_density) hoelzl@47694: apply auto hoelzl@47694: done hoelzl@43340: qed hoelzl@47694: then have "AE x in M. log b (real (RN_deriv M M x)) = 0" hoelzl@43340: by (elim AE_mp) simp hoelzl@43340: from integral_cong_AE[OF this] wenzelm@53015: have "integral\<^sup>L M (entropy_density b M M) = 0" hoelzl@43340: by (simp add: entropy_density_def comp_def) hoelzl@47694: then show "KL_divergence b M M = 0" hoelzl@43340: unfolding KL_divergence_def hoelzl@47694: by auto hoelzl@43340: qed hoelzl@43340: hoelzl@47694: lemma (in information_space) KL_eq_0_iff_eq: hoelzl@47694: fixes D :: "'a \ real" hoelzl@47694: assumes "prob_space (density M D)" hoelzl@47694: assumes D: "D \ borel_measurable M" "AE x in M. 0 \ D x" hoelzl@47694: assumes int: "integrable M (\x. D x * log b (D x))" hoelzl@47694: shows "KL_divergence b M (density M D) = 0 \ density M D = M" hoelzl@47694: using KL_same_eq_0[of b] KL_gt_0[OF assms] hoelzl@47694: by (auto simp: less_le) hoelzl@43340: hoelzl@47694: lemma (in information_space) KL_eq_0_iff_eq_ac: hoelzl@47694: fixes D :: "'a \ real" hoelzl@47694: assumes "prob_space N" hoelzl@47694: assumes ac: "absolutely_continuous M N" "sets N = sets M" hoelzl@47694: assumes int: "integrable N (entropy_density b M N)" hoelzl@47694: shows "KL_divergence b M N = 0 \ N = M" hoelzl@41833: proof - hoelzl@47694: interpret N: prob_space N by fact hoelzl@47694: have "finite_measure N" by unfold_locales hoelzl@47694: from real_RN_deriv[OF this ac] guess D . note D = this hoelzl@47694: hoelzl@47694: have "N = density M (RN_deriv M N)" hoelzl@47694: using ac by (rule density_RN_deriv[symmetric]) hoelzl@47694: also have "\ = density M D" hoelzl@47694: using borel_measurable_RN_deriv[OF ac] D by (auto intro!: density_cong) hoelzl@47694: finally have N: "N = density M D" . hoelzl@41833: hoelzl@47694: from absolutely_continuous_AE[OF ac(2,1) D(2)] D b_gt_1 ac measurable_entropy_density hoelzl@47694: have "integrable N (\x. log b (D x))" hoelzl@47694: by (intro integrable_cong_AE[THEN iffD2, OF _ _ _ int]) hoelzl@47694: (auto simp: N entropy_density_def) hoelzl@47694: with D b_gt_1 have "integrable M (\x. D x * log b (D x))" hoelzl@47694: by (subst integral_density(2)[symmetric]) (auto simp: N[symmetric] comp_def) hoelzl@47694: with `prob_space N` D show ?thesis hoelzl@47694: unfolding N hoelzl@47694: by (intro KL_eq_0_iff_eq) auto hoelzl@41833: qed hoelzl@41833: hoelzl@47694: lemma (in information_space) KL_nonneg: hoelzl@47694: assumes "prob_space (density M D)" hoelzl@47694: assumes D: "D \ borel_measurable M" "AE x in M. 0 \ D x" hoelzl@47694: assumes int: "integrable M (\x. D x * log b (D x))" hoelzl@47694: shows "0 \ KL_divergence b M (density M D)" hoelzl@47694: using KL_gt_0[OF assms] by (cases "density M D = M") (auto simp: KL_same_eq_0) hoelzl@40859: hoelzl@47694: lemma (in sigma_finite_measure) KL_density_density_nonneg: hoelzl@47694: fixes f g :: "'a \ real" hoelzl@47694: assumes "1 < b" hoelzl@47694: assumes f: "f \ borel_measurable M" "AE x in M. 0 \ f x" "prob_space (density M f)" hoelzl@47694: assumes g: "g \ borel_measurable M" "AE x in M. 0 \ g x" "prob_space (density M g)" hoelzl@47694: assumes ac: "AE x in M. f x = 0 \ g x = 0" hoelzl@47694: assumes int: "integrable M (\x. g x * log b (g x / f x))" hoelzl@47694: shows "0 \ KL_divergence b (density M f) (density M g)" hoelzl@47694: proof - hoelzl@47694: interpret Mf: prob_space "density M f" by fact hoelzl@47694: interpret Mf: information_space "density M f" b by default fact hoelzl@47694: have eq: "density (density M f) (\x. g x / f x) = density M g" (is "?DD = _") hoelzl@47694: using f g ac by (subst density_density_divide) simp_all hoelzl@36080: hoelzl@47694: have "0 \ KL_divergence b (density M f) (density (density M f) (\x. g x / f x))" hoelzl@47694: proof (rule Mf.KL_nonneg) hoelzl@47694: show "prob_space ?DD" unfolding eq by fact hoelzl@47694: from f g show "(\x. g x / f x) \ borel_measurable (density M f)" hoelzl@47694: by auto hoelzl@47694: show "AE x in density M f. 0 \ g x / f x" hoelzl@47694: using f g by (auto simp: AE_density divide_nonneg_nonneg) hoelzl@47694: show "integrable (density M f) (\x. g x / f x * log b (g x / f x))" hoelzl@47694: using `1 < b` f g ac hoelzl@47694: by (subst integral_density) hoelzl@47694: (auto intro!: integrable_cong_AE[THEN iffD2, OF _ _ _ int] measurable_If) hoelzl@47694: qed hoelzl@47694: also have "\ = KL_divergence b (density M f) (density M g)" hoelzl@47694: using f g ac by (subst density_density_divide) simp_all hoelzl@47694: finally show ?thesis . hoelzl@36080: qed hoelzl@36080: hoelzl@49803: subsection {* Finite Entropy *} hoelzl@49803: hoelzl@49803: definition (in information_space) hoelzl@49803: "finite_entropy S X f \ distributed M S X f \ integrable S (\x. f x * log b (f x))" hoelzl@49803: hoelzl@49803: lemma (in information_space) finite_entropy_simple_function: hoelzl@49803: assumes X: "simple_function M X" hoelzl@49803: shows "finite_entropy (count_space (X`space M)) X (\a. measure M {x \ space M. X x = a})" hoelzl@49803: unfolding finite_entropy_def hoelzl@49803: proof hoelzl@49803: have [simp]: "finite (X ` space M)" hoelzl@49803: using X by (auto simp: simple_function_def) hoelzl@49803: then show "integrable (count_space (X ` space M)) hoelzl@49803: (\x. prob {xa \ space M. X xa = x} * log b (prob {xa \ space M. X xa = x}))" hoelzl@49803: by (rule integrable_count_space) hoelzl@49803: have d: "distributed M (count_space (X ` space M)) X (\x. ereal (if x \ X`space M then prob {xa \ space M. X xa = x} else 0))" hoelzl@49803: by (rule distributed_simple_function_superset[OF X]) (auto intro!: arg_cong[where f=prob]) hoelzl@49803: show "distributed M (count_space (X ` space M)) X (\x. ereal (prob {xa \ space M. X xa = x}))" hoelzl@49803: by (rule distributed_cong_density[THEN iffD1, OF _ _ _ d]) auto hoelzl@49803: qed hoelzl@49803: hoelzl@49803: lemma distributed_transform_AE: hoelzl@49803: assumes T: "T \ measurable P Q" "absolutely_continuous Q (distr P Q T)" hoelzl@49803: assumes g: "distributed M Q Y g" hoelzl@49803: shows "AE x in P. 0 \ g (T x)" hoelzl@49803: using g hoelzl@49803: apply (subst AE_distr_iff[symmetric, OF T(1)]) hoelzl@50003: apply simp hoelzl@49803: apply (rule absolutely_continuous_AE[OF _ T(2)]) hoelzl@49803: apply simp hoelzl@49803: apply (simp add: distributed_AE) hoelzl@49803: done hoelzl@49803: hoelzl@49803: lemma ac_fst: hoelzl@49803: assumes "sigma_finite_measure T" wenzelm@53015: shows "absolutely_continuous S (distr (S \\<^sub>M T) S fst)" hoelzl@49803: proof - hoelzl@49803: interpret sigma_finite_measure T by fact hoelzl@49803: { fix A assume "A \ sets S" "emeasure S A = 0" wenzelm@53015: moreover then have "fst -` A \ space (S \\<^sub>M T) = A \ space T" immler@50244: by (auto simp: space_pair_measure dest!: sets.sets_into_space) wenzelm@53015: ultimately have "emeasure (S \\<^sub>M T) (fst -` A \ space (S \\<^sub>M T)) = 0" hoelzl@49803: by (simp add: emeasure_pair_measure_Times) } hoelzl@49803: then show ?thesis hoelzl@49803: unfolding absolutely_continuous_def hoelzl@49803: apply (auto simp: null_sets_distr_iff) hoelzl@49803: apply (auto simp: null_sets_def intro!: measurable_sets) hoelzl@49803: done hoelzl@49803: qed hoelzl@49803: hoelzl@49803: lemma ac_snd: hoelzl@49803: assumes "sigma_finite_measure T" wenzelm@53015: shows "absolutely_continuous T (distr (S \\<^sub>M T) T snd)" hoelzl@49803: proof - hoelzl@49803: interpret sigma_finite_measure T by fact hoelzl@49803: { fix A assume "A \ sets T" "emeasure T A = 0" wenzelm@53015: moreover then have "snd -` A \ space (S \\<^sub>M T) = space S \ A" immler@50244: by (auto simp: space_pair_measure dest!: sets.sets_into_space) wenzelm@53015: ultimately have "emeasure (S \\<^sub>M T) (snd -` A \ space (S \\<^sub>M T)) = 0" hoelzl@49803: by (simp add: emeasure_pair_measure_Times) } hoelzl@49803: then show ?thesis hoelzl@49803: unfolding absolutely_continuous_def hoelzl@49803: apply (auto simp: null_sets_distr_iff) hoelzl@49803: apply (auto simp: null_sets_def intro!: measurable_sets) hoelzl@49803: done hoelzl@49803: qed hoelzl@49803: hoelzl@49803: lemma distributed_integrable: hoelzl@49803: "distributed M N X f \ g \ borel_measurable N \ hoelzl@49803: integrable N (\x. f x * g x) \ integrable M (\x. g (X x))" hoelzl@50003: by (auto simp: distributed_real_AE hoelzl@49803: distributed_distr_eq_density[symmetric] integral_density[symmetric] integrable_distr_eq) hoelzl@49803: hoelzl@49803: lemma distributed_transform_integrable: hoelzl@49803: assumes Px: "distributed M N X Px" hoelzl@49803: assumes "distributed M P Y Py" hoelzl@49803: assumes Y: "Y = (\x. T (X x))" and T: "T \ measurable N P" and f: "f \ borel_measurable P" hoelzl@49803: shows "integrable P (\x. Py x * f x) \ integrable N (\x. Px x * f (T x))" hoelzl@49803: proof - hoelzl@49803: have "integrable P (\x. Py x * f x) \ integrable M (\x. f (Y x))" hoelzl@49803: by (rule distributed_integrable) fact+ hoelzl@49803: also have "\ \ integrable M (\x. f (T (X x)))" hoelzl@49803: using Y by simp hoelzl@49803: also have "\ \ integrable N (\x. Px x * f (T x))" hoelzl@49803: using measurable_comp[OF T f] Px by (intro distributed_integrable[symmetric]) (auto simp: comp_def) hoelzl@49803: finally show ?thesis . hoelzl@49803: qed hoelzl@49803: hoelzl@49803: lemma integrable_cong_AE_imp: "integrable M g \ f \ borel_measurable M \ (AE x in M. g x = f x) \ integrable M f" hoelzl@49803: using integrable_cong_AE by blast hoelzl@49803: hoelzl@49803: lemma (in information_space) finite_entropy_integrable: hoelzl@49803: "finite_entropy S X Px \ integrable S (\x. Px x * log b (Px x))" hoelzl@49803: unfolding finite_entropy_def by auto hoelzl@49803: hoelzl@49803: lemma (in information_space) finite_entropy_distributed: hoelzl@49803: "finite_entropy S X Px \ distributed M S X Px" hoelzl@49803: unfolding finite_entropy_def by auto hoelzl@49803: hoelzl@49803: lemma (in information_space) finite_entropy_integrable_transform: hoelzl@49803: assumes Fx: "finite_entropy S X Px" hoelzl@49803: assumes Fy: "distributed M T Y Py" hoelzl@49803: and "X = (\x. f (Y x))" hoelzl@49803: and "f \ measurable T S" hoelzl@49803: shows "integrable T (\x. Py x * log b (Px (f x)))" hoelzl@49803: using assms unfolding finite_entropy_def hoelzl@49803: using distributed_transform_integrable[of M T Y Py S X Px f "\x. log b (Px x)"] hoelzl@50003: by auto hoelzl@49803: hoelzl@39097: subsection {* Mutual Information *} hoelzl@39097: hoelzl@36080: definition (in prob_space) hoelzl@38656: "mutual_information b S T X Y = wenzelm@53015: KL_divergence b (distr M S X \\<^sub>M distr M T Y) (distr M (S \\<^sub>M T) (\x. (X x, Y x)))" hoelzl@36080: hoelzl@47694: lemma (in information_space) mutual_information_indep_vars: hoelzl@43340: fixes S T X Y wenzelm@53015: defines "P \ distr M S X \\<^sub>M distr M T Y" wenzelm@53015: defines "Q \ distr M (S \\<^sub>M T) (\x. (X x, Y x))" hoelzl@43340: shows "indep_var S X T Y \ hoelzl@43340: (random_variable S X \ random_variable T Y \ hoelzl@47694: absolutely_continuous P Q \ integrable Q (entropy_density b P Q) \ hoelzl@47694: mutual_information b S T X Y = 0)" hoelzl@47694: unfolding indep_var_distribution_eq hoelzl@43340: proof safe hoelzl@50003: assume rv[measurable]: "random_variable S X" "random_variable T Y" hoelzl@43340: hoelzl@47694: interpret X: prob_space "distr M S X" hoelzl@47694: by (rule prob_space_distr) fact hoelzl@47694: interpret Y: prob_space "distr M T Y" hoelzl@47694: by (rule prob_space_distr) fact hoelzl@47694: interpret XY: pair_prob_space "distr M S X" "distr M T Y" by default hoelzl@47694: interpret P: information_space P b unfolding P_def by default (rule b_gt_1) hoelzl@43340: hoelzl@47694: interpret Q: prob_space Q unfolding Q_def hoelzl@50003: by (rule prob_space_distr) simp hoelzl@43340: wenzelm@53015: { assume "distr M S X \\<^sub>M distr M T Y = distr M (S \\<^sub>M T) (\x. (X x, Y x))" hoelzl@47694: then have [simp]: "Q = P" unfolding Q_def P_def by simp hoelzl@43340: hoelzl@47694: show ac: "absolutely_continuous P Q" by (simp add: absolutely_continuous_def) hoelzl@47694: then have ed: "entropy_density b P Q \ borel_measurable P" hoelzl@47694: by (rule P.measurable_entropy_density) simp hoelzl@43340: hoelzl@47694: have "AE x in P. 1 = RN_deriv P Q x" hoelzl@47694: proof (rule P.RN_deriv_unique) hoelzl@47694: show "density P (\x. 1) = Q" hoelzl@47694: unfolding `Q = P` by (intro measure_eqI) (auto simp: emeasure_density) hoelzl@47694: qed auto hoelzl@47694: then have ae_0: "AE x in P. entropy_density b P Q x = 0" hoelzl@47694: by eventually_elim (auto simp: entropy_density_def) hoelzl@47694: then have "integrable P (entropy_density b P Q) \ integrable Q (\x. 0)" hoelzl@47694: using ed unfolding `Q = P` by (intro integrable_cong_AE) auto hoelzl@47694: then show "integrable Q (entropy_density b P Q)" by simp hoelzl@43340: hoelzl@47694: show "mutual_information b S T X Y = 0" hoelzl@47694: unfolding mutual_information_def KL_divergence_def P_def[symmetric] Q_def[symmetric] `Q = P` hoelzl@47694: using ae_0 by (simp cong: integral_cong_AE) } hoelzl@43340: hoelzl@47694: { assume ac: "absolutely_continuous P Q" hoelzl@47694: assume int: "integrable Q (entropy_density b P Q)" hoelzl@47694: assume I_eq_0: "mutual_information b S T X Y = 0" hoelzl@43340: hoelzl@47694: have eq: "Q = P" hoelzl@47694: proof (rule P.KL_eq_0_iff_eq_ac[THEN iffD1]) hoelzl@47694: show "prob_space Q" by unfold_locales hoelzl@47694: show "absolutely_continuous P Q" by fact hoelzl@47694: show "integrable Q (entropy_density b P Q)" by fact hoelzl@47694: show "sets Q = sets P" by (simp add: P_def Q_def sets_pair_measure) hoelzl@47694: show "KL_divergence b P Q = 0" hoelzl@47694: using I_eq_0 unfolding mutual_information_def by (simp add: P_def Q_def) hoelzl@47694: qed wenzelm@53015: then show "distr M S X \\<^sub>M distr M T Y = distr M (S \\<^sub>M T) (\x. (X x, Y x))" hoelzl@47694: unfolding P_def Q_def .. } hoelzl@43340: qed hoelzl@43340: hoelzl@40859: abbreviation (in information_space) hoelzl@40859: mutual_information_Pow ("\'(_ ; _')") where hoelzl@47694: "\(X ; Y) \ mutual_information b (count_space (X`space M)) (count_space (Y`space M)) X Y" hoelzl@41689: hoelzl@47694: lemma (in information_space) hoelzl@47694: fixes Pxy :: "'b \ 'c \ real" and Px :: "'b \ real" and Py :: "'c \ real" hoelzl@49803: assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" hoelzl@49803: assumes Fx: "finite_entropy S X Px" and Fy: "finite_entropy T Y Py" wenzelm@53015: assumes Fxy: "finite_entropy (S \\<^sub>M T) (\x. (X x, Y x)) Pxy" hoelzl@49803: defines "f \ \x. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x)))" wenzelm@53015: shows mutual_information_distr': "mutual_information b S T X Y = integral\<^sup>L (S \\<^sub>M T) f" (is "?M = ?R") hoelzl@49803: and mutual_information_nonneg': "0 \ mutual_information b S T X Y" hoelzl@49803: proof - hoelzl@49803: have Px: "distributed M S X Px" hoelzl@49803: using Fx by (auto simp: finite_entropy_def) hoelzl@49803: have Py: "distributed M T Y Py" hoelzl@49803: using Fy by (auto simp: finite_entropy_def) wenzelm@53015: have Pxy: "distributed M (S \\<^sub>M T) (\x. (X x, Y x)) Pxy" hoelzl@49803: using Fxy by (auto simp: finite_entropy_def) hoelzl@49803: hoelzl@49803: have X: "random_variable S X" hoelzl@50003: using Px by auto hoelzl@49803: have Y: "random_variable T Y" hoelzl@50003: using Py by auto hoelzl@49803: interpret S: sigma_finite_measure S by fact hoelzl@49803: interpret T: sigma_finite_measure T by fact hoelzl@49803: interpret ST: pair_sigma_finite S T .. hoelzl@49803: interpret X: prob_space "distr M S X" using X by (rule prob_space_distr) hoelzl@49803: interpret Y: prob_space "distr M T Y" using Y by (rule prob_space_distr) hoelzl@49803: interpret XY: pair_prob_space "distr M S X" "distr M T Y" .. wenzelm@53015: let ?P = "S \\<^sub>M T" hoelzl@49803: let ?D = "distr M ?P (\x. (X x, Y x))" hoelzl@49803: hoelzl@49803: { fix A assume "A \ sets S" hoelzl@49803: with X Y have "emeasure (distr M S X) A = emeasure ?D (A \ space T)" hoelzl@49803: by (auto simp: emeasure_distr measurable_Pair measurable_space hoelzl@49803: intro!: arg_cong[where f="emeasure M"]) } hoelzl@49803: note marginal_eq1 = this hoelzl@49803: { fix A assume "A \ sets T" hoelzl@49803: with X Y have "emeasure (distr M T Y) A = emeasure ?D (space S \ A)" hoelzl@49803: by (auto simp: emeasure_distr measurable_Pair measurable_space hoelzl@49803: intro!: arg_cong[where f="emeasure M"]) } hoelzl@49803: note marginal_eq2 = this hoelzl@49803: hoelzl@49803: have eq: "(\x. ereal (Px (fst x) * Py (snd x))) = (\(x, y). ereal (Px x) * ereal (Py y))" hoelzl@49803: by auto hoelzl@49803: wenzelm@53015: have distr_eq: "distr M S X \\<^sub>M distr M T Y = density ?P (\x. ereal (Px (fst x) * Py (snd x)))" hoelzl@49803: unfolding Px(1)[THEN distributed_distr_eq_density] Py(1)[THEN distributed_distr_eq_density] eq hoelzl@49803: proof (subst pair_measure_density) hoelzl@49803: show "(\x. ereal (Px x)) \ borel_measurable S" "(\y. ereal (Py y)) \ borel_measurable T" hoelzl@49803: "AE x in S. 0 \ ereal (Px x)" "AE y in T. 0 \ ereal (Py y)" hoelzl@49803: using Px Py by (auto simp: distributed_def) hoelzl@49803: show "sigma_finite_measure (density T Py)" unfolding Py(1)[THEN distributed_distr_eq_density, symmetric] .. hoelzl@49803: qed (fact | simp)+ hoelzl@49803: hoelzl@49803: have M: "?M = KL_divergence b (density ?P (\x. ereal (Px (fst x) * Py (snd x)))) (density ?P (\x. ereal (Pxy x)))" hoelzl@49803: unfolding mutual_information_def distr_eq Pxy(1)[THEN distributed_distr_eq_density] .. hoelzl@49803: hoelzl@49803: from Px Py have f: "(\x. Px (fst x) * Py (snd x)) \ borel_measurable ?P" hoelzl@49803: by (intro borel_measurable_times) (auto intro: distributed_real_measurable measurable_fst'' measurable_snd'') hoelzl@49803: have PxPy_nonneg: "AE x in ?P. 0 \ Px (fst x) * Py (snd x)" hoelzl@49803: proof (rule ST.AE_pair_measure) hoelzl@49803: show "{x \ space ?P. 0 \ Px (fst x) * Py (snd x)} \ sets ?P" hoelzl@49803: using f by auto hoelzl@49803: show "AE x in S. AE y in T. 0 \ Px (fst (x, y)) * Py (snd (x, y))" hoelzl@49803: using Px Py by (auto simp: zero_le_mult_iff dest!: distributed_real_AE) hoelzl@49803: qed hoelzl@49803: hoelzl@49803: have "(AE x in ?P. Px (fst x) = 0 \ Pxy x = 0)" hoelzl@49803: by (rule subdensity_real[OF measurable_fst Pxy Px]) auto hoelzl@49803: moreover hoelzl@49803: have "(AE x in ?P. Py (snd x) = 0 \ Pxy x = 0)" hoelzl@49803: by (rule subdensity_real[OF measurable_snd Pxy Py]) auto hoelzl@49803: ultimately have ac: "AE x in ?P. Px (fst x) * Py (snd x) = 0 \ Pxy x = 0" hoelzl@49803: by eventually_elim auto hoelzl@49803: hoelzl@49803: show "?M = ?R" hoelzl@49803: unfolding M f_def hoelzl@49803: using b_gt_1 f PxPy_nonneg Pxy[THEN distributed_real_measurable] Pxy[THEN distributed_real_AE] ac hoelzl@49803: by (rule ST.KL_density_density) hoelzl@49803: hoelzl@49803: have X: "X = fst \ (\x. (X x, Y x))" and Y: "Y = snd \ (\x. (X x, Y x))" hoelzl@49803: by auto hoelzl@49803: wenzelm@53015: have "integrable (S \\<^sub>M T) (\x. Pxy x * log b (Pxy x) - Pxy x * log b (Px (fst x)) - Pxy x * log b (Py (snd x)))" hoelzl@49803: using finite_entropy_integrable[OF Fxy] hoelzl@49803: using finite_entropy_integrable_transform[OF Fx Pxy, of fst] hoelzl@49803: using finite_entropy_integrable_transform[OF Fy Pxy, of snd] hoelzl@49803: by simp wenzelm@53015: moreover have "f \ borel_measurable (S \\<^sub>M T)" hoelzl@49803: unfolding f_def using Px Py Pxy hoelzl@49803: by (auto intro: distributed_real_measurable measurable_fst'' measurable_snd'' hoelzl@49803: intro!: borel_measurable_times borel_measurable_log borel_measurable_divide) wenzelm@53015: ultimately have int: "integrable (S \\<^sub>M T) f" hoelzl@49803: apply (rule integrable_cong_AE_imp) hoelzl@49803: using hoelzl@49803: distributed_transform_AE[OF measurable_fst ac_fst, of T, OF T Px] hoelzl@49803: distributed_transform_AE[OF measurable_snd ac_snd, of _ _ _ _ S, OF T Py] hoelzl@49803: subdensity_real[OF measurable_fst Pxy Px X] hoelzl@49803: subdensity_real[OF measurable_snd Pxy Py Y] hoelzl@49803: distributed_real_AE[OF Pxy] hoelzl@49803: by eventually_elim hoelzl@49803: (auto simp: f_def log_divide_eq log_mult_eq field_simps zero_less_mult_iff mult_nonneg_nonneg) hoelzl@49803: hoelzl@49803: show "0 \ ?M" unfolding M hoelzl@49803: proof (rule ST.KL_density_density_nonneg hoelzl@49803: [OF b_gt_1 f PxPy_nonneg _ Pxy[THEN distributed_real_measurable] Pxy[THEN distributed_real_AE] _ ac int[unfolded f_def]]) wenzelm@53015: show "prob_space (density (S \\<^sub>M T) (\x. ereal (Pxy x))) " hoelzl@49803: unfolding distributed_distr_eq_density[OF Pxy, symmetric] hoelzl@49803: using distributed_measurable[OF Pxy] by (rule prob_space_distr) wenzelm@53015: show "prob_space (density (S \\<^sub>M T) (\x. ereal (Px (fst x) * Py (snd x))))" hoelzl@49803: unfolding distr_eq[symmetric] by unfold_locales hoelzl@49803: qed hoelzl@49803: qed hoelzl@49803: hoelzl@49803: hoelzl@49803: lemma (in information_space) hoelzl@49803: fixes Pxy :: "'b \ 'c \ real" and Px :: "'b \ real" and Py :: "'c \ real" hoelzl@47694: assumes "sigma_finite_measure S" "sigma_finite_measure T" hoelzl@47694: assumes Px: "distributed M S X Px" and Py: "distributed M T Y Py" wenzelm@53015: assumes Pxy: "distributed M (S \\<^sub>M T) (\x. (X x, Y x)) Pxy" hoelzl@47694: defines "f \ \x. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x)))" wenzelm@53015: shows mutual_information_distr: "mutual_information b S T X Y = integral\<^sup>L (S \\<^sub>M T) f" (is "?M = ?R") wenzelm@53015: and mutual_information_nonneg: "integrable (S \\<^sub>M T) f \ 0 \ mutual_information b S T X Y" hoelzl@40859: proof - hoelzl@47694: have X: "random_variable S X" hoelzl@47694: using Px by (auto simp: distributed_def) hoelzl@47694: have Y: "random_variable T Y" hoelzl@47694: using Py by (auto simp: distributed_def) hoelzl@47694: interpret S: sigma_finite_measure S by fact hoelzl@47694: interpret T: sigma_finite_measure T by fact hoelzl@47694: interpret ST: pair_sigma_finite S T .. hoelzl@47694: interpret X: prob_space "distr M S X" using X by (rule prob_space_distr) hoelzl@47694: interpret Y: prob_space "distr M T Y" using Y by (rule prob_space_distr) hoelzl@47694: interpret XY: pair_prob_space "distr M S X" "distr M T Y" .. wenzelm@53015: let ?P = "S \\<^sub>M T" hoelzl@47694: let ?D = "distr M ?P (\x. (X x, Y x))" hoelzl@47694: hoelzl@47694: { fix A assume "A \ sets S" hoelzl@47694: with X Y have "emeasure (distr M S X) A = emeasure ?D (A \ space T)" hoelzl@47694: by (auto simp: emeasure_distr measurable_Pair measurable_space hoelzl@47694: intro!: arg_cong[where f="emeasure M"]) } hoelzl@47694: note marginal_eq1 = this hoelzl@47694: { fix A assume "A \ sets T" hoelzl@47694: with X Y have "emeasure (distr M T Y) A = emeasure ?D (space S \ A)" hoelzl@47694: by (auto simp: emeasure_distr measurable_Pair measurable_space hoelzl@47694: intro!: arg_cong[where f="emeasure M"]) } hoelzl@47694: note marginal_eq2 = this hoelzl@47694: hoelzl@47694: have eq: "(\x. ereal (Px (fst x) * Py (snd x))) = (\(x, y). ereal (Px x) * ereal (Py y))" hoelzl@47694: by auto hoelzl@47694: wenzelm@53015: have distr_eq: "distr M S X \\<^sub>M distr M T Y = density ?P (\x. ereal (Px (fst x) * Py (snd x)))" hoelzl@47694: unfolding Px(1)[THEN distributed_distr_eq_density] Py(1)[THEN distributed_distr_eq_density] eq hoelzl@47694: proof (subst pair_measure_density) hoelzl@47694: show "(\x. ereal (Px x)) \ borel_measurable S" "(\y. ereal (Py y)) \ borel_measurable T" hoelzl@47694: "AE x in S. 0 \ ereal (Px x)" "AE y in T. 0 \ ereal (Py y)" hoelzl@47694: using Px Py by (auto simp: distributed_def) hoelzl@47694: show "sigma_finite_measure (density T Py)" unfolding Py(1)[THEN distributed_distr_eq_density, symmetric] .. hoelzl@47694: qed (fact | simp)+ hoelzl@47694: hoelzl@47694: have M: "?M = KL_divergence b (density ?P (\x. ereal (Px (fst x) * Py (snd x)))) (density ?P (\x. ereal (Pxy x)))" hoelzl@47694: unfolding mutual_information_def distr_eq Pxy(1)[THEN distributed_distr_eq_density] .. hoelzl@47694: hoelzl@47694: from Px Py have f: "(\x. Px (fst x) * Py (snd x)) \ borel_measurable ?P" hoelzl@47694: by (intro borel_measurable_times) (auto intro: distributed_real_measurable measurable_fst'' measurable_snd'') hoelzl@47694: have PxPy_nonneg: "AE x in ?P. 0 \ Px (fst x) * Py (snd x)" hoelzl@47694: proof (rule ST.AE_pair_measure) hoelzl@47694: show "{x \ space ?P. 0 \ Px (fst x) * Py (snd x)} \ sets ?P" hoelzl@47694: using f by auto hoelzl@47694: show "AE x in S. AE y in T. 0 \ Px (fst (x, y)) * Py (snd (x, y))" hoelzl@47694: using Px Py by (auto simp: zero_le_mult_iff dest!: distributed_real_AE) hoelzl@47694: qed hoelzl@47694: hoelzl@47694: have "(AE x in ?P. Px (fst x) = 0 \ Pxy x = 0)" hoelzl@47694: by (rule subdensity_real[OF measurable_fst Pxy Px]) auto hoelzl@47694: moreover hoelzl@47694: have "(AE x in ?P. Py (snd x) = 0 \ Pxy x = 0)" hoelzl@47694: by (rule subdensity_real[OF measurable_snd Pxy Py]) auto hoelzl@47694: ultimately have ac: "AE x in ?P. Px (fst x) * Py (snd x) = 0 \ Pxy x = 0" hoelzl@47694: by eventually_elim auto hoelzl@47694: hoelzl@47694: show "?M = ?R" hoelzl@47694: unfolding M f_def hoelzl@47694: using b_gt_1 f PxPy_nonneg Pxy[THEN distributed_real_measurable] Pxy[THEN distributed_real_AE] ac hoelzl@47694: by (rule ST.KL_density_density) hoelzl@47694: wenzelm@53015: assume int: "integrable (S \\<^sub>M T) f" hoelzl@47694: show "0 \ ?M" unfolding M hoelzl@47694: proof (rule ST.KL_density_density_nonneg hoelzl@47694: [OF b_gt_1 f PxPy_nonneg _ Pxy[THEN distributed_real_measurable] Pxy[THEN distributed_real_AE] _ ac int[unfolded f_def]]) wenzelm@53015: show "prob_space (density (S \\<^sub>M T) (\x. ereal (Pxy x))) " hoelzl@47694: unfolding distributed_distr_eq_density[OF Pxy, symmetric] hoelzl@47694: using distributed_measurable[OF Pxy] by (rule prob_space_distr) wenzelm@53015: show "prob_space (density (S \\<^sub>M T) (\x. ereal (Px (fst x) * Py (snd x))))" hoelzl@47694: unfolding distr_eq[symmetric] by unfold_locales hoelzl@40859: qed hoelzl@40859: qed hoelzl@40859: hoelzl@40859: lemma (in information_space) hoelzl@47694: fixes Pxy :: "'b \ 'c \ real" and Px :: "'b \ real" and Py :: "'c \ real" hoelzl@47694: assumes "sigma_finite_measure S" "sigma_finite_measure T" hoelzl@47694: assumes Px: "distributed M S X Px" and Py: "distributed M T Y Py" wenzelm@53015: assumes Pxy: "distributed M (S \\<^sub>M T) (\x. (X x, Y x)) Pxy" hoelzl@47694: assumes ae: "AE x in S. AE y in T. Pxy (x, y) = Px x * Py y" hoelzl@47694: shows mutual_information_eq_0: "mutual_information b S T X Y = 0" hoelzl@36624: proof - hoelzl@47694: interpret S: sigma_finite_measure S by fact hoelzl@47694: interpret T: sigma_finite_measure T by fact hoelzl@47694: interpret ST: pair_sigma_finite S T .. hoelzl@36080: wenzelm@53015: have "AE x in S \\<^sub>M T. Px (fst x) = 0 \ Pxy x = 0" hoelzl@47694: by (rule subdensity_real[OF measurable_fst Pxy Px]) auto hoelzl@47694: moreover wenzelm@53015: have "AE x in S \\<^sub>M T. Py (snd x) = 0 \ Pxy x = 0" hoelzl@47694: by (rule subdensity_real[OF measurable_snd Pxy Py]) auto hoelzl@47694: moreover wenzelm@53015: have "AE x in S \\<^sub>M T. Pxy x = Px (fst x) * Py (snd x)" hoelzl@47694: using distributed_real_measurable[OF Px] distributed_real_measurable[OF Py] distributed_real_measurable[OF Pxy] hoelzl@47694: by (intro ST.AE_pair_measure) (auto simp: ae intro!: measurable_snd'' measurable_fst'') wenzelm@53015: ultimately have "AE x in S \\<^sub>M T. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x))) = 0" hoelzl@47694: by eventually_elim simp wenzelm@53015: then have "(\x. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x))) \(S \\<^sub>M T)) = (\x. 0 \(S \\<^sub>M T))" hoelzl@47694: by (rule integral_cong_AE) hoelzl@47694: then show ?thesis hoelzl@47694: by (subst mutual_information_distr[OF assms(1-5)]) simp hoelzl@36080: qed hoelzl@36080: hoelzl@47694: lemma (in information_space) mutual_information_simple_distributed: hoelzl@47694: assumes X: "simple_distributed M X Px" and Y: "simple_distributed M Y Py" hoelzl@47694: assumes XY: "simple_distributed M (\x. (X x, Y x)) Pxy" hoelzl@47694: shows "\(X ; Y) = (\(x, y)\(\x. (X x, Y x))`space M. Pxy (x, y) * log b (Pxy (x, y) / (Px x * Py y)))" hoelzl@47694: proof (subst mutual_information_distr[OF _ _ simple_distributed[OF X] simple_distributed[OF Y] simple_distributed_joint[OF XY]]) hoelzl@47694: note fin = simple_distributed_joint_finite[OF XY, simp] hoelzl@47694: show "sigma_finite_measure (count_space (X ` space M))" hoelzl@47694: by (simp add: sigma_finite_measure_count_space_finite) hoelzl@47694: show "sigma_finite_measure (count_space (Y ` space M))" hoelzl@47694: by (simp add: sigma_finite_measure_count_space_finite) hoelzl@47694: let ?Pxy = "\x. (if x \ (\x. (X x, Y x)) ` space M then Pxy x else 0)" hoelzl@47694: let ?f = "\x. ?Pxy x * log b (?Pxy x / (Px (fst x) * Py (snd x)))" hoelzl@47694: have "\x. ?f x = (if x \ (\x. (X x, Y x)) ` space M then Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x))) else 0)" hoelzl@47694: by auto wenzelm@53015: with fin show "(\ x. ?f x \(count_space (X ` space M) \\<^sub>M count_space (Y ` space M))) = hoelzl@47694: (\(x, y)\(\x. (X x, Y x)) ` space M. Pxy (x, y) * log b (Pxy (x, y) / (Px x * Py y)))" hoelzl@47694: by (auto simp add: pair_measure_count_space lebesgue_integral_count_space_finite setsum_cases split_beta' hoelzl@47694: intro!: setsum_cong) hoelzl@47694: qed hoelzl@36080: hoelzl@47694: lemma (in information_space) hoelzl@47694: fixes Pxy :: "'b \ 'c \ real" and Px :: "'b \ real" and Py :: "'c \ real" hoelzl@47694: assumes Px: "simple_distributed M X Px" and Py: "simple_distributed M Y Py" hoelzl@47694: assumes Pxy: "simple_distributed M (\x. (X x, Y x)) Pxy" hoelzl@47694: assumes ae: "\x\space M. Pxy (X x, Y x) = Px (X x) * Py (Y x)" hoelzl@47694: shows mutual_information_eq_0_simple: "\(X ; Y) = 0" hoelzl@47694: proof (subst mutual_information_simple_distributed[OF Px Py Pxy]) hoelzl@47694: have "(\(x, y)\(\x. (X x, Y x)) ` space M. Pxy (x, y) * log b (Pxy (x, y) / (Px x * Py y))) = hoelzl@47694: (\(x, y)\(\x. (X x, Y x)) ` space M. 0)" hoelzl@47694: by (intro setsum_cong) (auto simp: ae) hoelzl@47694: then show "(\(x, y)\(\x. (X x, Y x)) ` space M. hoelzl@47694: Pxy (x, y) * log b (Pxy (x, y) / (Px x * Py y))) = 0" by simp hoelzl@47694: qed hoelzl@36080: hoelzl@39097: subsection {* Entropy *} hoelzl@39097: hoelzl@47694: definition (in prob_space) entropy :: "real \ 'b measure \ ('a \ 'b) \ real" where hoelzl@47694: "entropy b S X = - KL_divergence b S (distr M S X)" hoelzl@47694: hoelzl@40859: abbreviation (in information_space) hoelzl@40859: entropy_Pow ("\'(_')") where hoelzl@47694: "\(X) \ entropy b (count_space (X`space M)) X" hoelzl@41981: hoelzl@49791: lemma (in prob_space) distributed_RN_deriv: hoelzl@49791: assumes X: "distributed M S X Px" hoelzl@49791: shows "AE x in S. RN_deriv S (density S Px) x = Px x" hoelzl@49791: proof - hoelzl@49791: note D = distributed_measurable[OF X] distributed_borel_measurable[OF X] distributed_AE[OF X] hoelzl@49791: interpret X: prob_space "distr M S X" hoelzl@49791: using D(1) by (rule prob_space_distr) hoelzl@49791: hoelzl@49791: have sf: "sigma_finite_measure (distr M S X)" by default hoelzl@49791: show ?thesis hoelzl@49791: using D hoelzl@49791: apply (subst eq_commute) hoelzl@49791: apply (intro RN_deriv_unique_sigma_finite) hoelzl@49791: apply (auto intro: divide_nonneg_nonneg measure_nonneg hoelzl@49791: simp: distributed_distr_eq_density[symmetric, OF X] sf) hoelzl@49791: done hoelzl@49791: qed hoelzl@49791: hoelzl@49788: lemma (in information_space) hoelzl@47694: fixes X :: "'a \ 'b" hoelzl@49785: assumes X: "distributed M MX X f" hoelzl@49788: shows entropy_distr: "entropy b MX X = - (\x. f x * log b (f x) \MX)" (is ?eq) hoelzl@49788: proof - hoelzl@49785: note D = distributed_measurable[OF X] distributed_borel_measurable[OF X] distributed_AE[OF X] hoelzl@49791: note ae = distributed_RN_deriv[OF X] hoelzl@49788: hoelzl@49788: have ae_eq: "AE x in distr M MX X. log b (real (RN_deriv MX (distr M MX X) x)) = hoelzl@49785: log b (f x)" hoelzl@49785: unfolding distributed_distr_eq_density[OF X] hoelzl@49785: apply (subst AE_density) hoelzl@49785: using D apply simp hoelzl@49785: using ae apply eventually_elim hoelzl@49785: apply auto hoelzl@49785: done hoelzl@49788: hoelzl@49788: have int_eq: "- (\ x. log b (f x) \distr M MX X) = - (\ x. f x * log b (f x) \MX)" hoelzl@49785: unfolding distributed_distr_eq_density[OF X] hoelzl@49785: using D hoelzl@49785: by (subst integral_density) hoelzl@49785: (auto simp: borel_measurable_ereal_iff) hoelzl@49788: hoelzl@49788: show ?eq hoelzl@49788: unfolding entropy_def KL_divergence_def entropy_density_def comp_def hoelzl@49788: apply (subst integral_cong_AE) hoelzl@49788: apply (rule ae_eq) hoelzl@49788: apply (rule int_eq) hoelzl@49788: done hoelzl@49788: qed hoelzl@49785: hoelzl@49786: lemma (in prob_space) distributed_imp_emeasure_nonzero: hoelzl@49786: assumes X: "distributed M MX X Px" hoelzl@49786: shows "emeasure MX {x \ space MX. Px x \ 0} \ 0" hoelzl@49786: proof hoelzl@49786: note Px = distributed_borel_measurable[OF X] distributed_AE[OF X] hoelzl@49786: interpret X: prob_space "distr M MX X" hoelzl@49786: using distributed_measurable[OF X] by (rule prob_space_distr) hoelzl@49786: hoelzl@49786: assume "emeasure MX {x \ space MX. Px x \ 0} = 0" hoelzl@49786: with Px have "AE x in MX. Px x = 0" hoelzl@49786: by (intro AE_I[OF subset_refl]) (auto simp: borel_measurable_ereal_iff) hoelzl@49786: moreover wenzelm@53015: from X.emeasure_space_1 have "(\\<^sup>+x. Px x \MX) = 1" hoelzl@49786: unfolding distributed_distr_eq_density[OF X] using Px hoelzl@49786: by (subst (asm) emeasure_density) hoelzl@49786: (auto simp: borel_measurable_ereal_iff intro!: integral_cong cong: positive_integral_cong) hoelzl@49786: ultimately show False hoelzl@49786: by (simp add: positive_integral_cong_AE) hoelzl@49786: qed hoelzl@49786: hoelzl@49786: lemma (in information_space) entropy_le: hoelzl@49786: fixes Px :: "'b \ real" and MX :: "'b measure" hoelzl@49786: assumes X: "distributed M MX X Px" hoelzl@49786: and fin: "emeasure MX {x \ space MX. Px x \ 0} \ \" hoelzl@49786: and int: "integrable MX (\x. - Px x * log b (Px x))" hoelzl@49786: shows "entropy b MX X \ log b (measure MX {x \ space MX. Px x \ 0})" hoelzl@49786: proof - hoelzl@49786: note Px = distributed_borel_measurable[OF X] distributed_AE[OF X] hoelzl@49786: interpret X: prob_space "distr M MX X" hoelzl@49786: using distributed_measurable[OF X] by (rule prob_space_distr) hoelzl@49786: hoelzl@49786: have " - log b (measure MX {x \ space MX. Px x \ 0}) = hoelzl@49786: - log b (\ x. indicator {x \ space MX. Px x \ 0} x \MX)" hoelzl@49786: using Px fin hoelzl@49786: by (subst integral_indicator) (auto simp: measure_def borel_measurable_ereal_iff) hoelzl@49786: also have "- log b (\ x. indicator {x \ space MX. Px x \ 0} x \MX) = - log b (\ x. 1 / Px x \distr M MX X)" hoelzl@49786: unfolding distributed_distr_eq_density[OF X] using Px hoelzl@49786: apply (intro arg_cong[where f="log b"] arg_cong[where f=uminus]) hoelzl@49786: by (subst integral_density) (auto simp: borel_measurable_ereal_iff intro!: integral_cong) hoelzl@49786: also have "\ \ (\ x. - log b (1 / Px x) \distr M MX X)" hoelzl@49786: proof (rule X.jensens_inequality[of "\x. 1 / Px x" "{0<..}" 0 1 "\x. - log b x"]) hoelzl@49786: show "AE x in distr M MX X. 1 / Px x \ {0<..}" hoelzl@49786: unfolding distributed_distr_eq_density[OF X] hoelzl@49786: using Px by (auto simp: AE_density) hoelzl@49786: have [simp]: "\x. x \ space MX \ ereal (if Px x = 0 then 0 else 1) = indicator {x \ space MX. Px x \ 0} x" hoelzl@49786: by (auto simp: one_ereal_def) wenzelm@53015: have "(\\<^sup>+ x. max 0 (ereal (- (if Px x = 0 then 0 else 1))) \MX) = (\\<^sup>+ x. 0 \MX)" hoelzl@49786: by (intro positive_integral_cong) (auto split: split_max) hoelzl@49786: then show "integrable (distr M MX X) (\x. 1 / Px x)" hoelzl@49786: unfolding distributed_distr_eq_density[OF X] using Px hoelzl@49786: by (auto simp: positive_integral_density integrable_def borel_measurable_ereal_iff fin positive_integral_max_0 hoelzl@49786: cong: positive_integral_cong) hoelzl@49786: have "integrable MX (\x. Px x * log b (1 / Px x)) = hoelzl@49786: integrable MX (\x. - Px x * log b (Px x))" hoelzl@49786: using Px hoelzl@49786: by (intro integrable_cong_AE) hoelzl@49786: (auto simp: borel_measurable_ereal_iff log_divide_eq hoelzl@49786: intro!: measurable_If) hoelzl@49786: then show "integrable (distr M MX X) (\x. - log b (1 / Px x))" hoelzl@49786: unfolding distributed_distr_eq_density[OF X] hoelzl@49786: using Px int hoelzl@49786: by (subst integral_density) (auto simp: borel_measurable_ereal_iff) hoelzl@49786: qed (auto simp: minus_log_convex[OF b_gt_1]) hoelzl@49786: also have "\ = (\ x. log b (Px x) \distr M MX X)" hoelzl@49786: unfolding distributed_distr_eq_density[OF X] using Px hoelzl@49786: by (intro integral_cong_AE) (auto simp: AE_density log_divide_eq) hoelzl@49786: also have "\ = - entropy b MX X" hoelzl@49786: unfolding distributed_distr_eq_density[OF X] using Px hoelzl@49786: by (subst entropy_distr[OF X]) (auto simp: borel_measurable_ereal_iff integral_density) hoelzl@49786: finally show ?thesis hoelzl@49786: by simp hoelzl@49786: qed hoelzl@49786: hoelzl@49786: lemma (in information_space) entropy_le_space: hoelzl@49786: fixes Px :: "'b \ real" and MX :: "'b measure" hoelzl@49786: assumes X: "distributed M MX X Px" hoelzl@49786: and fin: "finite_measure MX" hoelzl@49786: and int: "integrable MX (\x. - Px x * log b (Px x))" hoelzl@49786: shows "entropy b MX X \ log b (measure MX (space MX))" hoelzl@49786: proof - hoelzl@49786: note Px = distributed_borel_measurable[OF X] distributed_AE[OF X] hoelzl@49786: interpret finite_measure MX by fact hoelzl@49786: have "entropy b MX X \ log b (measure MX {x \ space MX. Px x \ 0})" hoelzl@49786: using int X by (intro entropy_le) auto hoelzl@49786: also have "\ \ log b (measure MX (space MX))" hoelzl@49786: using Px distributed_imp_emeasure_nonzero[OF X] hoelzl@49786: by (intro log_le) hoelzl@49786: (auto intro!: borel_measurable_ereal_iff finite_measure_mono b_gt_1 hoelzl@49786: less_le[THEN iffD2] measure_nonneg simp: emeasure_eq_measure) hoelzl@49786: finally show ?thesis . hoelzl@49786: qed hoelzl@49786: hoelzl@47694: lemma (in information_space) entropy_uniform: hoelzl@49785: assumes X: "distributed M MX X (\x. indicator A x / measure MX A)" (is "distributed _ _ _ ?f") hoelzl@47694: shows "entropy b MX X = log b (measure MX A)" hoelzl@49785: proof (subst entropy_distr[OF X]) hoelzl@49785: have [simp]: "emeasure MX A \ \" hoelzl@49785: using uniform_distributed_params[OF X] by (auto simp add: measure_def) hoelzl@49785: have eq: "(\ x. indicator A x / measure MX A * log b (indicator A x / measure MX A) \MX) = hoelzl@49785: (\ x. (- log b (measure MX A) / measure MX A) * indicator A x \MX)" hoelzl@49785: using measure_nonneg[of MX A] uniform_distributed_params[OF X] hoelzl@49785: by (auto intro!: integral_cong split: split_indicator simp: log_divide_eq) hoelzl@49785: show "- (\ x. indicator A x / measure MX A * log b (indicator A x / measure MX A) \MX) = hoelzl@49785: log b (measure MX A)" hoelzl@49785: unfolding eq using uniform_distributed_params[OF X] hoelzl@49785: by (subst lebesgue_integral_cmult) (auto simp: measure_def) hoelzl@49785: qed hoelzl@36080: hoelzl@47694: lemma (in information_space) entropy_simple_distributed: hoelzl@49786: "simple_distributed M X f \ \(X) = - (\x\X`space M. f x * log b (f x))" hoelzl@49786: by (subst entropy_distr[OF simple_distributed]) hoelzl@49786: (auto simp add: lebesgue_integral_count_space_finite) hoelzl@39097: hoelzl@40859: lemma (in information_space) entropy_le_card_not_0: hoelzl@47694: assumes X: "simple_distributed M X f" hoelzl@47694: shows "\(X) \ log b (card (X ` space M \ {x. f x \ 0}))" hoelzl@39097: proof - hoelzl@49787: let ?X = "count_space (X`space M)" hoelzl@49787: have "\(X) \ log b (measure ?X {x \ space ?X. f x \ 0})" hoelzl@49787: by (rule entropy_le[OF simple_distributed[OF X]]) hoelzl@49787: (simp_all add: simple_distributed_finite[OF X] subset_eq integrable_count_space emeasure_count_space) hoelzl@49787: also have "measure ?X {x \ space ?X. f x \ 0} = card (X ` space M \ {x. f x \ 0})" hoelzl@49787: by (simp_all add: simple_distributed_finite[OF X] subset_eq emeasure_count_space measure_def Int_def) hoelzl@49787: finally show ?thesis . hoelzl@39097: qed hoelzl@39097: hoelzl@40859: lemma (in information_space) entropy_le_card: hoelzl@49787: assumes X: "simple_distributed M X f" hoelzl@40859: shows "\(X) \ log b (real (card (X ` space M)))" hoelzl@49787: proof - hoelzl@49787: let ?X = "count_space (X`space M)" hoelzl@49787: have "\(X) \ log b (measure ?X (space ?X))" hoelzl@49787: by (rule entropy_le_space[OF simple_distributed[OF X]]) hoelzl@49787: (simp_all add: simple_distributed_finite[OF X] subset_eq integrable_count_space emeasure_count_space finite_measure_count_space) hoelzl@49787: also have "measure ?X (space ?X) = card (X ` space M)" hoelzl@49787: by (simp_all add: simple_distributed_finite[OF X] subset_eq emeasure_count_space measure_def) hoelzl@39097: finally show ?thesis . hoelzl@39097: qed hoelzl@39097: hoelzl@39097: subsection {* Conditional Mutual Information *} hoelzl@39097: hoelzl@36080: definition (in prob_space) hoelzl@41689: "conditional_mutual_information b MX MY MZ X Y Z \ wenzelm@53015: mutual_information b MX (MY \\<^sub>M MZ) X (\x. (Y x, Z x)) - hoelzl@41689: mutual_information b MX MZ X Z" hoelzl@36080: hoelzl@40859: abbreviation (in information_space) hoelzl@40859: conditional_mutual_information_Pow ("\'( _ ; _ | _ ')") where hoelzl@36624: "\(X ; Y | Z) \ conditional_mutual_information b hoelzl@47694: (count_space (X ` space M)) (count_space (Y ` space M)) (count_space (Z ` space M)) X Y Z" hoelzl@36080: hoelzl@49787: lemma (in information_space) hoelzl@47694: assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" and P: "sigma_finite_measure P" hoelzl@50003: assumes Px[measurable]: "distributed M S X Px" hoelzl@50003: assumes Pz[measurable]: "distributed M P Z Pz" wenzelm@53015: assumes Pyz[measurable]: "distributed M (T \\<^sub>M P) (\x. (Y x, Z x)) Pyz" wenzelm@53015: assumes Pxz[measurable]: "distributed M (S \\<^sub>M P) (\x. (X x, Z x)) Pxz" wenzelm@53015: assumes Pxyz[measurable]: "distributed M (S \\<^sub>M T \\<^sub>M P) (\x. (X x, Y x, Z x)) Pxyz" wenzelm@53015: assumes I1: "integrable (S \\<^sub>M T \\<^sub>M P) (\(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z))))" wenzelm@53015: assumes I2: "integrable (S \\<^sub>M T \\<^sub>M P) (\(x, y, z). Pxyz (x, y, z) * log b (Pxz (x, z) / (Px x * Pz z)))" hoelzl@49787: shows conditional_mutual_information_generic_eq: "conditional_mutual_information b S T P X Y Z wenzelm@53015: = (\(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))) \(S \\<^sub>M T \\<^sub>M P))" (is "?eq") hoelzl@49787: and conditional_mutual_information_generic_nonneg: "0 \ conditional_mutual_information b S T P X Y Z" (is "?nonneg") hoelzl@40859: proof - hoelzl@47694: interpret S: sigma_finite_measure S by fact hoelzl@47694: interpret T: sigma_finite_measure T by fact hoelzl@47694: interpret P: sigma_finite_measure P by fact hoelzl@47694: interpret TP: pair_sigma_finite T P .. hoelzl@47694: interpret SP: pair_sigma_finite S P .. hoelzl@49787: interpret ST: pair_sigma_finite S T .. wenzelm@53015: interpret SPT: pair_sigma_finite "S \\<^sub>M P" T .. wenzelm@53015: interpret STP: pair_sigma_finite S "T \\<^sub>M P" .. wenzelm@53015: interpret TPS: pair_sigma_finite "T \\<^sub>M P" S .. wenzelm@53015: have TP: "sigma_finite_measure (T \\<^sub>M P)" .. wenzelm@53015: have SP: "sigma_finite_measure (S \\<^sub>M P)" .. wenzelm@53015: have YZ: "random_variable (T \\<^sub>M P) (\x. (Y x, Z x))" hoelzl@47694: using Pyz by (simp add: distributed_measurable) hoelzl@47694: wenzelm@53015: from Pxz Pxyz have distr_eq: "distr M (S \\<^sub>M P) (\x. (X x, Z x)) = wenzelm@53015: distr (distr M (S \\<^sub>M T \\<^sub>M P) (\x. (X x, Y x, Z x))) (S \\<^sub>M P) (\(x, y, z). (x, z))" hoelzl@50003: by (simp add: comp_def distr_distr) hoelzl@40859: hoelzl@47694: have "mutual_information b S P X Z = wenzelm@53015: (\x. Pxz x * log b (Pxz x / (Px (fst x) * Pz (snd x))) \(S \\<^sub>M P))" hoelzl@47694: by (rule mutual_information_distr[OF S P Px Pz Pxz]) wenzelm@53015: also have "\ = (\(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) \(S \\<^sub>M T \\<^sub>M P))" hoelzl@47694: using b_gt_1 Pxz Px Pz hoelzl@50003: by (subst distributed_transform_integral[OF Pxyz Pxz, where T="\(x, y, z). (x, z)"]) (auto simp: split_beta') hoelzl@47694: finally have mi_eq: wenzelm@53015: "mutual_information b S P X Z = (\(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) \(S \\<^sub>M T \\<^sub>M P))" . hoelzl@47694: wenzelm@53015: have ae1: "AE x in S \\<^sub>M T \\<^sub>M P. Px (fst x) = 0 \ Pxyz x = 0" hoelzl@47694: by (intro subdensity_real[of fst, OF _ Pxyz Px]) auto wenzelm@53015: moreover have ae2: "AE x in S \\<^sub>M T \\<^sub>M P. Pz (snd (snd x)) = 0 \ Pxyz x = 0" hoelzl@50003: by (intro subdensity_real[of "\x. snd (snd x)", OF _ Pxyz Pz]) auto wenzelm@53015: moreover have ae3: "AE x in S \\<^sub>M T \\<^sub>M P. Pxz (fst x, snd (snd x)) = 0 \ Pxyz x = 0" hoelzl@50003: by (intro subdensity_real[of "\x. (fst x, snd (snd x))", OF _ Pxyz Pxz]) auto wenzelm@53015: moreover have ae4: "AE x in S \\<^sub>M T \\<^sub>M P. Pyz (snd x) = 0 \ Pxyz x = 0" hoelzl@50003: by (intro subdensity_real[of snd, OF _ Pxyz Pyz]) auto wenzelm@53015: moreover have ae5: "AE x in S \\<^sub>M T \\<^sub>M P. 0 \ Px (fst x)" hoelzl@50003: using Px by (intro STP.AE_pair_measure) (auto simp: comp_def dest: distributed_real_AE) wenzelm@53015: moreover have ae6: "AE x in S \\<^sub>M T \\<^sub>M P. 0 \ Pyz (snd x)" hoelzl@50003: using Pyz by (intro STP.AE_pair_measure) (auto simp: comp_def dest: distributed_real_AE) wenzelm@53015: moreover have ae7: "AE x in S \\<^sub>M T \\<^sub>M P. 0 \ Pz (snd (snd x))" hoelzl@50003: using Pz Pz[THEN distributed_real_measurable] hoelzl@50003: by (auto intro!: TP.AE_pair_measure STP.AE_pair_measure AE_I2[of S] dest: distributed_real_AE) wenzelm@53015: moreover have ae8: "AE x in S \\<^sub>M T \\<^sub>M P. 0 \ Pxz (fst x, snd (snd x))" hoelzl@47694: using Pxz[THEN distributed_real_AE, THEN SP.AE_pair] hoelzl@50003: by (auto intro!: TP.AE_pair_measure STP.AE_pair_measure) hoelzl@47694: moreover note Pxyz[THEN distributed_real_AE] wenzelm@53015: ultimately have ae: "AE x in S \\<^sub>M T \\<^sub>M P. hoelzl@47694: Pxyz x * log b (Pxyz x / (Px (fst x) * Pyz (snd x))) - hoelzl@47694: Pxyz x * log b (Pxz (fst x, snd (snd x)) / (Px (fst x) * Pz (snd (snd x)))) = hoelzl@47694: Pxyz x * log b (Pxyz x * Pz (snd (snd x)) / (Pxz (fst x, snd (snd x)) * Pyz (snd x))) " hoelzl@47694: proof eventually_elim hoelzl@47694: case (goal1 x) hoelzl@47694: show ?case hoelzl@40859: proof cases hoelzl@47694: assume "Pxyz x \ 0" hoelzl@47694: with goal1 have "0 < Px (fst x)" "0 < Pz (snd (snd x))" "0 < Pxz (fst x, snd (snd x))" "0 < Pyz (snd x)" "0 < Pxyz x" hoelzl@47694: by auto hoelzl@47694: then show ?thesis hoelzl@47694: using b_gt_1 by (simp add: log_simps mult_pos_pos less_imp_le field_simps) hoelzl@40859: qed simp hoelzl@40859: qed hoelzl@49787: with I1 I2 show ?eq hoelzl@40859: unfolding conditional_mutual_information_def hoelzl@47694: apply (subst mi_eq) hoelzl@47694: apply (subst mutual_information_distr[OF S TP Px Pyz Pxyz]) hoelzl@47694: apply (subst integral_diff(2)[symmetric]) hoelzl@47694: apply (auto intro!: integral_cong_AE simp: split_beta' simp del: integral_diff) hoelzl@47694: done hoelzl@49787: wenzelm@53015: let ?P = "density (S \\<^sub>M T \\<^sub>M P) Pxyz" hoelzl@49787: interpret P: prob_space ?P hoelzl@49787: unfolding distributed_distr_eq_density[OF Pxyz, symmetric] hoelzl@50003: by (rule prob_space_distr) simp hoelzl@49787: wenzelm@53015: let ?Q = "density (T \\<^sub>M P) Pyz" hoelzl@49787: interpret Q: prob_space ?Q hoelzl@49787: unfolding distributed_distr_eq_density[OF Pyz, symmetric] hoelzl@50003: by (rule prob_space_distr) simp hoelzl@49787: hoelzl@49787: let ?f = "\(x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) / Pxyz (x, y, z)" hoelzl@49787: hoelzl@49787: from subdensity_real[of snd, OF _ Pyz Pz] wenzelm@53015: have aeX1: "AE x in T \\<^sub>M P. Pz (snd x) = 0 \ Pyz x = 0" by (auto simp: comp_def) wenzelm@53015: have aeX2: "AE x in T \\<^sub>M P. 0 \ Pz (snd x)" hoelzl@50003: using Pz by (intro TP.AE_pair_measure) (auto simp: comp_def dest: distributed_real_AE) hoelzl@49787: wenzelm@53015: have aeX3: "AE y in T \\<^sub>M P. (\\<^sup>+ x. ereal (Pxz (x, snd y)) \S) = ereal (Pz (snd y))" hoelzl@49788: using Pz distributed_marginal_eq_joint2[OF P S Pz Pxz] hoelzl@50003: by (intro TP.AE_pair_measure) (auto dest: distributed_real_AE) hoelzl@49787: wenzelm@53015: have "(\\<^sup>+ x. ?f x \?P) \ (\\<^sup>+ (x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) \(S \\<^sub>M T \\<^sub>M P))" hoelzl@49787: apply (subst positive_integral_density) hoelzl@50003: apply simp hoelzl@49787: apply (rule distributed_AE[OF Pxyz]) hoelzl@50003: apply auto [] hoelzl@49787: apply (rule positive_integral_mono_AE) hoelzl@49787: using ae5 ae6 ae7 ae8 hoelzl@49787: apply eventually_elim hoelzl@49787: apply (auto intro!: divide_nonneg_nonneg mult_nonneg_nonneg) hoelzl@49787: done wenzelm@53015: also have "\ = (\\<^sup>+(y, z). \\<^sup>+ x. ereal (Pxz (x, z)) * ereal (Pyz (y, z) / Pz z) \S \T \\<^sub>M P)" hoelzl@50003: by (subst STP.positive_integral_snd_measurable[symmetric]) (auto simp add: split_beta') wenzelm@53015: also have "\ = (\\<^sup>+x. ereal (Pyz x) * 1 \T \\<^sub>M P)" hoelzl@49787: apply (rule positive_integral_cong_AE) hoelzl@49787: using aeX1 aeX2 aeX3 distributed_AE[OF Pyz] AE_space hoelzl@49787: apply eventually_elim hoelzl@49787: proof (case_tac x, simp del: times_ereal.simps add: space_pair_measure) hoelzl@49787: fix a b assume "Pz b = 0 \ Pyz (a, b) = 0" "0 \ Pz b" "a \ space T \ b \ space P" wenzelm@53015: "(\\<^sup>+ x. ereal (Pxz (x, b)) \S) = ereal (Pz b)" "0 \ Pyz (a, b)" wenzelm@53015: then show "(\\<^sup>+ x. ereal (Pxz (x, b)) * ereal (Pyz (a, b) / Pz b) \S) = ereal (Pyz (a, b))" hoelzl@50003: by (subst positive_integral_multc) hoelzl@50003: (auto intro!: divide_nonneg_nonneg split: prod.split) hoelzl@49787: qed hoelzl@49787: also have "\ = 1" hoelzl@49787: using Q.emeasure_space_1 distributed_AE[OF Pyz] distributed_distr_eq_density[OF Pyz] hoelzl@50003: by (subst positive_integral_density[symmetric]) auto wenzelm@53015: finally have le1: "(\\<^sup>+ x. ?f x \?P) \ 1" . hoelzl@49787: also have "\ < \" by simp wenzelm@53015: finally have fin: "(\\<^sup>+ x. ?f x \?P) \ \" by simp hoelzl@49787: wenzelm@53015: have pos: "(\\<^sup>+ x. ?f x \?P) \ 0" hoelzl@49787: apply (subst positive_integral_density) hoelzl@50003: apply simp hoelzl@49787: apply (rule distributed_AE[OF Pxyz]) hoelzl@50003: apply auto [] hoelzl@49787: apply (simp add: split_beta') hoelzl@49787: proof hoelzl@49787: let ?g = "\x. ereal (if Pxyz x = 0 then 0 else Pxz (fst x, snd (snd x)) * Pyz (snd x) / Pz (snd (snd x)))" wenzelm@53015: assume "(\\<^sup>+ x. ?g x \(S \\<^sub>M T \\<^sub>M P)) = 0" wenzelm@53015: then have "AE x in S \\<^sub>M T \\<^sub>M P. ?g x \ 0" hoelzl@50003: by (intro positive_integral_0_iff_AE[THEN iffD1]) auto wenzelm@53015: then have "AE x in S \\<^sub>M T \\<^sub>M P. Pxyz x = 0" hoelzl@49787: using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE] hoelzl@49787: by eventually_elim (auto split: split_if_asm simp: mult_le_0_iff divide_le_0_iff) wenzelm@53015: then have "(\\<^sup>+ x. ereal (Pxyz x) \S \\<^sub>M T \\<^sub>M P) = 0" hoelzl@49787: by (subst positive_integral_cong_AE[of _ "\x. 0"]) auto hoelzl@49787: with P.emeasure_space_1 show False hoelzl@50003: by (subst (asm) emeasure_density) (auto cong: positive_integral_cong) hoelzl@49787: qed hoelzl@49787: wenzelm@53015: have neg: "(\\<^sup>+ x. - ?f x \?P) = 0" hoelzl@49787: apply (rule positive_integral_0_iff_AE[THEN iffD2]) hoelzl@50003: apply simp hoelzl@49787: apply (subst AE_density) hoelzl@50003: apply simp hoelzl@49787: using ae5 ae6 ae7 ae8 hoelzl@49787: apply eventually_elim hoelzl@49787: apply (auto intro!: mult_nonneg_nonneg divide_nonneg_nonneg) hoelzl@49787: done hoelzl@49787: wenzelm@53015: have I3: "integrable (S \\<^sub>M T \\<^sub>M P) (\(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))))" hoelzl@49787: apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ integral_diff(1)[OF I1 I2]]) hoelzl@49787: using ae hoelzl@50003: apply (auto simp: split_beta') hoelzl@49787: done hoelzl@49787: wenzelm@53015: have "- log b 1 \ - log b (integral\<^sup>L ?P ?f)" hoelzl@49787: proof (intro le_imp_neg_le log_le[OF b_gt_1]) wenzelm@53015: show "0 < integral\<^sup>L ?P ?f" hoelzl@49787: using neg pos fin positive_integral_positive[of ?P ?f] wenzelm@53015: by (cases "(\\<^sup>+ x. ?f x \?P)") (auto simp add: lebesgue_integral_def less_le split_beta') wenzelm@53015: show "integral\<^sup>L ?P ?f \ 1" hoelzl@49787: using neg le1 fin positive_integral_positive[of ?P ?f] wenzelm@53015: by (cases "(\\<^sup>+ x. ?f x \?P)") (auto simp add: lebesgue_integral_def split_beta' one_ereal_def) hoelzl@49787: qed wenzelm@53015: also have "- log b (integral\<^sup>L ?P ?f) \ (\ x. - log b (?f x) \?P)" hoelzl@49787: proof (rule P.jensens_inequality[where a=0 and b=1 and I="{0<..}"]) hoelzl@49787: show "AE x in ?P. ?f x \ {0<..}" hoelzl@49787: unfolding AE_density[OF distributed_borel_measurable[OF Pxyz]] hoelzl@49787: using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE] hoelzl@49787: by eventually_elim (auto simp: divide_pos_pos mult_pos_pos) hoelzl@49787: show "integrable ?P ?f" hoelzl@49787: unfolding integrable_def hoelzl@50003: using fin neg by (auto simp: split_beta') hoelzl@49787: show "integrable ?P (\x. - log b (?f x))" hoelzl@49787: apply (subst integral_density) hoelzl@50003: apply simp hoelzl@50003: apply (auto intro!: distributed_real_AE[OF Pxyz]) [] hoelzl@50003: apply simp hoelzl@49787: apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ I3]) hoelzl@50003: apply simp hoelzl@50003: apply simp hoelzl@49787: using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE] hoelzl@49787: apply eventually_elim hoelzl@49787: apply (auto simp: log_divide_eq log_mult_eq zero_le_mult_iff zero_less_mult_iff zero_less_divide_iff field_simps) hoelzl@49787: done hoelzl@49787: qed (auto simp: b_gt_1 minus_log_convex) hoelzl@49787: also have "\ = conditional_mutual_information b S T P X Y Z" hoelzl@49787: unfolding `?eq` hoelzl@49787: apply (subst integral_density) hoelzl@50003: apply simp hoelzl@50003: apply (auto intro!: distributed_real_AE[OF Pxyz]) [] hoelzl@50003: apply simp hoelzl@49787: apply (intro integral_cong_AE) hoelzl@49787: using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE] hoelzl@49787: apply eventually_elim hoelzl@49787: apply (auto simp: log_divide_eq zero_less_mult_iff zero_less_divide_iff field_simps) hoelzl@49787: done hoelzl@49787: finally show ?nonneg hoelzl@49787: by simp hoelzl@40859: qed hoelzl@40859: hoelzl@49803: lemma (in information_space) hoelzl@49803: fixes Px :: "_ \ real" hoelzl@49803: assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" and P: "sigma_finite_measure P" hoelzl@49803: assumes Fx: "finite_entropy S X Px" hoelzl@49803: assumes Fz: "finite_entropy P Z Pz" wenzelm@53015: assumes Fyz: "finite_entropy (T \\<^sub>M P) (\x. (Y x, Z x)) Pyz" wenzelm@53015: assumes Fxz: "finite_entropy (S \\<^sub>M P) (\x. (X x, Z x)) Pxz" wenzelm@53015: assumes Fxyz: "finite_entropy (S \\<^sub>M T \\<^sub>M P) (\x. (X x, Y x, Z x)) Pxyz" hoelzl@49803: shows conditional_mutual_information_generic_eq': "conditional_mutual_information b S T P X Y Z wenzelm@53015: = (\(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))) \(S \\<^sub>M T \\<^sub>M P))" (is "?eq") hoelzl@49803: and conditional_mutual_information_generic_nonneg': "0 \ conditional_mutual_information b S T P X Y Z" (is "?nonneg") hoelzl@49803: proof - hoelzl@50003: note Px = Fx[THEN finite_entropy_distributed, measurable] hoelzl@50003: note Pz = Fz[THEN finite_entropy_distributed, measurable] hoelzl@50003: note Pyz = Fyz[THEN finite_entropy_distributed, measurable] hoelzl@50003: note Pxz = Fxz[THEN finite_entropy_distributed, measurable] hoelzl@50003: note Pxyz = Fxyz[THEN finite_entropy_distributed, measurable] hoelzl@49803: hoelzl@49803: interpret S: sigma_finite_measure S by fact hoelzl@49803: interpret T: sigma_finite_measure T by fact hoelzl@49803: interpret P: sigma_finite_measure P by fact hoelzl@49803: interpret TP: pair_sigma_finite T P .. hoelzl@49803: interpret SP: pair_sigma_finite S P .. hoelzl@49803: interpret ST: pair_sigma_finite S T .. wenzelm@53015: interpret SPT: pair_sigma_finite "S \\<^sub>M P" T .. wenzelm@53015: interpret STP: pair_sigma_finite S "T \\<^sub>M P" .. wenzelm@53015: interpret TPS: pair_sigma_finite "T \\<^sub>M P" S .. wenzelm@53015: have TP: "sigma_finite_measure (T \\<^sub>M P)" .. wenzelm@53015: have SP: "sigma_finite_measure (S \\<^sub>M P)" .. hoelzl@49803: wenzelm@53015: from Pxz Pxyz have distr_eq: "distr M (S \\<^sub>M P) (\x. (X x, Z x)) = wenzelm@53015: distr (distr M (S \\<^sub>M T \\<^sub>M P) (\x. (X x, Y x, Z x))) (S \\<^sub>M P) (\(x, y, z). (x, z))" hoelzl@50003: by (simp add: distr_distr comp_def) hoelzl@49803: hoelzl@49803: have "mutual_information b S P X Z = wenzelm@53015: (\x. Pxz x * log b (Pxz x / (Px (fst x) * Pz (snd x))) \(S \\<^sub>M P))" hoelzl@49803: by (rule mutual_information_distr[OF S P Px Pz Pxz]) wenzelm@53015: also have "\ = (\(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) \(S \\<^sub>M T \\<^sub>M P))" hoelzl@49803: using b_gt_1 Pxz Px Pz hoelzl@49803: by (subst distributed_transform_integral[OF Pxyz Pxz, where T="\(x, y, z). (x, z)"]) hoelzl@50003: (auto simp: split_beta') hoelzl@49803: finally have mi_eq: wenzelm@53015: "mutual_information b S P X Z = (\(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) \(S \\<^sub>M T \\<^sub>M P))" . hoelzl@49803: wenzelm@53015: have ae1: "AE x in S \\<^sub>M T \\<^sub>M P. Px (fst x) = 0 \ Pxyz x = 0" hoelzl@49803: by (intro subdensity_real[of fst, OF _ Pxyz Px]) auto wenzelm@53015: moreover have ae2: "AE x in S \\<^sub>M T \\<^sub>M P. Pz (snd (snd x)) = 0 \ Pxyz x = 0" hoelzl@50003: by (intro subdensity_real[of "\x. snd (snd x)", OF _ Pxyz Pz]) auto wenzelm@53015: moreover have ae3: "AE x in S \\<^sub>M T \\<^sub>M P. Pxz (fst x, snd (snd x)) = 0 \ Pxyz x = 0" hoelzl@50003: by (intro subdensity_real[of "\x. (fst x, snd (snd x))", OF _ Pxyz Pxz]) auto wenzelm@53015: moreover have ae4: "AE x in S \\<^sub>M T \\<^sub>M P. Pyz (snd x) = 0 \ Pxyz x = 0" hoelzl@50003: by (intro subdensity_real[of snd, OF _ Pxyz Pyz]) auto wenzelm@53015: moreover have ae5: "AE x in S \\<^sub>M T \\<^sub>M P. 0 \ Px (fst x)" hoelzl@50003: using Px by (intro STP.AE_pair_measure) (auto dest: distributed_real_AE) wenzelm@53015: moreover have ae6: "AE x in S \\<^sub>M T \\<^sub>M P. 0 \ Pyz (snd x)" hoelzl@50003: using Pyz by (intro STP.AE_pair_measure) (auto dest: distributed_real_AE) wenzelm@53015: moreover have ae7: "AE x in S \\<^sub>M T \\<^sub>M P. 0 \ Pz (snd (snd x))" hoelzl@50003: using Pz Pz[THEN distributed_real_measurable] by (auto intro!: TP.AE_pair_measure STP.AE_pair_measure AE_I2[of S] dest: distributed_real_AE) wenzelm@53015: moreover have ae8: "AE x in S \\<^sub>M T \\<^sub>M P. 0 \ Pxz (fst x, snd (snd x))" hoelzl@49803: using Pxz[THEN distributed_real_AE, THEN SP.AE_pair] hoelzl@49803: by (auto intro!: TP.AE_pair_measure STP.AE_pair_measure simp: comp_def) hoelzl@49803: moreover note ae9 = Pxyz[THEN distributed_real_AE] wenzelm@53015: ultimately have ae: "AE x in S \\<^sub>M T \\<^sub>M P. hoelzl@49803: Pxyz x * log b (Pxyz x / (Px (fst x) * Pyz (snd x))) - hoelzl@49803: Pxyz x * log b (Pxz (fst x, snd (snd x)) / (Px (fst x) * Pz (snd (snd x)))) = hoelzl@49803: Pxyz x * log b (Pxyz x * Pz (snd (snd x)) / (Pxz (fst x, snd (snd x)) * Pyz (snd x))) " hoelzl@49803: proof eventually_elim hoelzl@49803: case (goal1 x) hoelzl@49803: show ?case hoelzl@49803: proof cases hoelzl@49803: assume "Pxyz x \ 0" hoelzl@49803: with goal1 have "0 < Px (fst x)" "0 < Pz (snd (snd x))" "0 < Pxz (fst x, snd (snd x))" "0 < Pyz (snd x)" "0 < Pxyz x" hoelzl@49803: by auto hoelzl@49803: then show ?thesis hoelzl@49803: using b_gt_1 by (simp add: log_simps mult_pos_pos less_imp_le field_simps) hoelzl@49803: qed simp hoelzl@49803: qed hoelzl@49803: wenzelm@53015: have "integrable (S \\<^sub>M T \\<^sub>M P) hoelzl@49803: (\x. Pxyz x * log b (Pxyz x) - Pxyz x * log b (Px (fst x)) - Pxyz x * log b (Pyz (snd x)))" hoelzl@49803: using finite_entropy_integrable[OF Fxyz] hoelzl@49803: using finite_entropy_integrable_transform[OF Fx Pxyz, of fst] hoelzl@49803: using finite_entropy_integrable_transform[OF Fyz Pxyz, of snd] hoelzl@49803: by simp wenzelm@53015: moreover have "(\(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z)))) \ borel_measurable (S \\<^sub>M T \\<^sub>M P)" hoelzl@50003: using Pxyz Px Pyz by simp wenzelm@53015: ultimately have I1: "integrable (S \\<^sub>M T \\<^sub>M P) (\(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z))))" hoelzl@49803: apply (rule integrable_cong_AE_imp) hoelzl@49803: using ae1 ae4 ae5 ae6 ae9 hoelzl@49803: by eventually_elim hoelzl@49803: (auto simp: log_divide_eq log_mult_eq mult_nonneg_nonneg field_simps zero_less_mult_iff) hoelzl@49803: wenzelm@53015: have "integrable (S \\<^sub>M T \\<^sub>M P) hoelzl@49803: (\x. Pxyz x * log b (Pxz (fst x, snd (snd x))) - Pxyz x * log b (Px (fst x)) - Pxyz x * log b (Pz (snd (snd x))))" hoelzl@49803: using finite_entropy_integrable_transform[OF Fxz Pxyz, of "\x. (fst x, snd (snd x))"] hoelzl@49803: using finite_entropy_integrable_transform[OF Fx Pxyz, of fst] hoelzl@49803: using finite_entropy_integrable_transform[OF Fz Pxyz, of "snd \ snd"] hoelzl@50003: by simp wenzelm@53015: moreover have "(\(x, y, z). Pxyz (x, y, z) * log b (Pxz (x, z) / (Px x * Pz z))) \ borel_measurable (S \\<^sub>M T \\<^sub>M P)" hoelzl@49803: using Pxyz Px Pz hoelzl@50003: by auto wenzelm@53015: ultimately have I2: "integrable (S \\<^sub>M T \\<^sub>M P) (\(x, y, z). Pxyz (x, y, z) * log b (Pxz (x, z) / (Px x * Pz z)))" hoelzl@49803: apply (rule integrable_cong_AE_imp) hoelzl@49803: using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 ae9 hoelzl@49803: by eventually_elim hoelzl@49803: (auto simp: log_divide_eq log_mult_eq mult_nonneg_nonneg field_simps zero_less_mult_iff) hoelzl@49803: hoelzl@49803: from ae I1 I2 show ?eq hoelzl@49803: unfolding conditional_mutual_information_def hoelzl@49803: apply (subst mi_eq) hoelzl@49803: apply (subst mutual_information_distr[OF S TP Px Pyz Pxyz]) hoelzl@49803: apply (subst integral_diff(2)[symmetric]) hoelzl@49803: apply (auto intro!: integral_cong_AE simp: split_beta' simp del: integral_diff) hoelzl@49803: done hoelzl@49803: wenzelm@53015: let ?P = "density (S \\<^sub>M T \\<^sub>M P) Pxyz" hoelzl@49803: interpret P: prob_space ?P hoelzl@50003: unfolding distributed_distr_eq_density[OF Pxyz, symmetric] by (rule prob_space_distr) simp hoelzl@49803: wenzelm@53015: let ?Q = "density (T \\<^sub>M P) Pyz" hoelzl@49803: interpret Q: prob_space ?Q hoelzl@50003: unfolding distributed_distr_eq_density[OF Pyz, symmetric] by (rule prob_space_distr) simp hoelzl@49803: hoelzl@49803: let ?f = "\(x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) / Pxyz (x, y, z)" hoelzl@49803: hoelzl@49803: from subdensity_real[of snd, OF _ Pyz Pz] wenzelm@53015: have aeX1: "AE x in T \\<^sub>M P. Pz (snd x) = 0 \ Pyz x = 0" by (auto simp: comp_def) wenzelm@53015: have aeX2: "AE x in T \\<^sub>M P. 0 \ Pz (snd x)" hoelzl@50003: using Pz by (intro TP.AE_pair_measure) (auto dest: distributed_real_AE) hoelzl@49803: wenzelm@53015: have aeX3: "AE y in T \\<^sub>M P. (\\<^sup>+ x. ereal (Pxz (x, snd y)) \S) = ereal (Pz (snd y))" hoelzl@49803: using Pz distributed_marginal_eq_joint2[OF P S Pz Pxz] hoelzl@50003: by (intro TP.AE_pair_measure) (auto dest: distributed_real_AE) wenzelm@53015: have "(\\<^sup>+ x. ?f x \?P) \ (\\<^sup>+ (x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) \(S \\<^sub>M T \\<^sub>M P))" hoelzl@49803: apply (subst positive_integral_density) hoelzl@49803: apply (rule distributed_borel_measurable[OF Pxyz]) hoelzl@49803: apply (rule distributed_AE[OF Pxyz]) hoelzl@50003: apply simp hoelzl@49803: apply (rule positive_integral_mono_AE) hoelzl@49803: using ae5 ae6 ae7 ae8 hoelzl@49803: apply eventually_elim hoelzl@49803: apply (auto intro!: divide_nonneg_nonneg mult_nonneg_nonneg) hoelzl@49803: done wenzelm@53015: also have "\ = (\\<^sup>+(y, z). \\<^sup>+ x. ereal (Pxz (x, z)) * ereal (Pyz (y, z) / Pz z) \S \T \\<^sub>M P)" hoelzl@49803: by (subst STP.positive_integral_snd_measurable[symmetric]) hoelzl@50003: (auto simp add: split_beta') wenzelm@53015: also have "\ = (\\<^sup>+x. ereal (Pyz x) * 1 \T \\<^sub>M P)" hoelzl@49803: apply (rule positive_integral_cong_AE) hoelzl@49803: using aeX1 aeX2 aeX3 distributed_AE[OF Pyz] AE_space hoelzl@49803: apply eventually_elim hoelzl@49803: proof (case_tac x, simp del: times_ereal.simps add: space_pair_measure) hoelzl@49803: fix a b assume "Pz b = 0 \ Pyz (a, b) = 0" "0 \ Pz b" "a \ space T \ b \ space P" wenzelm@53015: "(\\<^sup>+ x. ereal (Pxz (x, b)) \S) = ereal (Pz b)" "0 \ Pyz (a, b)" wenzelm@53015: then show "(\\<^sup>+ x. ereal (Pxz (x, b)) * ereal (Pyz (a, b) / Pz b) \S) = ereal (Pyz (a, b))" hoelzl@50003: by (subst positive_integral_multc) (auto intro!: divide_nonneg_nonneg) hoelzl@49803: qed hoelzl@49803: also have "\ = 1" hoelzl@49803: using Q.emeasure_space_1 distributed_AE[OF Pyz] distributed_distr_eq_density[OF Pyz] hoelzl@50003: by (subst positive_integral_density[symmetric]) auto wenzelm@53015: finally have le1: "(\\<^sup>+ x. ?f x \?P) \ 1" . hoelzl@49803: also have "\ < \" by simp wenzelm@53015: finally have fin: "(\\<^sup>+ x. ?f x \?P) \ \" by simp hoelzl@49803: wenzelm@53015: have pos: "(\\<^sup>+ x. ?f x \?P) \ 0" hoelzl@49803: apply (subst positive_integral_density) hoelzl@50003: apply simp hoelzl@49803: apply (rule distributed_AE[OF Pxyz]) hoelzl@50003: apply simp hoelzl@49803: apply (simp add: split_beta') hoelzl@49803: proof hoelzl@49803: let ?g = "\x. ereal (if Pxyz x = 0 then 0 else Pxz (fst x, snd (snd x)) * Pyz (snd x) / Pz (snd (snd x)))" wenzelm@53015: assume "(\\<^sup>+ x. ?g x \(S \\<^sub>M T \\<^sub>M P)) = 0" wenzelm@53015: then have "AE x in S \\<^sub>M T \\<^sub>M P. ?g x \ 0" hoelzl@50003: by (intro positive_integral_0_iff_AE[THEN iffD1]) (auto intro!: borel_measurable_ereal measurable_If) wenzelm@53015: then have "AE x in S \\<^sub>M T \\<^sub>M P. Pxyz x = 0" hoelzl@49803: using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE] hoelzl@49803: by eventually_elim (auto split: split_if_asm simp: mult_le_0_iff divide_le_0_iff) wenzelm@53015: then have "(\\<^sup>+ x. ereal (Pxyz x) \S \\<^sub>M T \\<^sub>M P) = 0" hoelzl@49803: by (subst positive_integral_cong_AE[of _ "\x. 0"]) auto hoelzl@49803: with P.emeasure_space_1 show False hoelzl@50003: by (subst (asm) emeasure_density) (auto cong: positive_integral_cong) hoelzl@49803: qed hoelzl@49803: wenzelm@53015: have neg: "(\\<^sup>+ x. - ?f x \?P) = 0" hoelzl@49803: apply (rule positive_integral_0_iff_AE[THEN iffD2]) hoelzl@50003: apply (auto simp: split_beta') [] hoelzl@49803: apply (subst AE_density) hoelzl@50003: apply (auto simp: split_beta') [] hoelzl@49803: using ae5 ae6 ae7 ae8 hoelzl@49803: apply eventually_elim hoelzl@49803: apply (auto intro!: mult_nonneg_nonneg divide_nonneg_nonneg) hoelzl@49803: done hoelzl@49803: wenzelm@53015: have I3: "integrable (S \\<^sub>M T \\<^sub>M P) (\(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))))" hoelzl@49803: apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ integral_diff(1)[OF I1 I2]]) hoelzl@49803: using ae hoelzl@50003: apply (auto simp: split_beta') hoelzl@49803: done hoelzl@49803: wenzelm@53015: have "- log b 1 \ - log b (integral\<^sup>L ?P ?f)" hoelzl@49803: proof (intro le_imp_neg_le log_le[OF b_gt_1]) wenzelm@53015: show "0 < integral\<^sup>L ?P ?f" hoelzl@49803: using neg pos fin positive_integral_positive[of ?P ?f] wenzelm@53015: by (cases "(\\<^sup>+ x. ?f x \?P)") (auto simp add: lebesgue_integral_def less_le split_beta') wenzelm@53015: show "integral\<^sup>L ?P ?f \ 1" hoelzl@49803: using neg le1 fin positive_integral_positive[of ?P ?f] wenzelm@53015: by (cases "(\\<^sup>+ x. ?f x \?P)") (auto simp add: lebesgue_integral_def split_beta' one_ereal_def) hoelzl@49803: qed wenzelm@53015: also have "- log b (integral\<^sup>L ?P ?f) \ (\ x. - log b (?f x) \?P)" hoelzl@49803: proof (rule P.jensens_inequality[where a=0 and b=1 and I="{0<..}"]) hoelzl@49803: show "AE x in ?P. ?f x \ {0<..}" hoelzl@49803: unfolding AE_density[OF distributed_borel_measurable[OF Pxyz]] hoelzl@49803: using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE] hoelzl@49803: by eventually_elim (auto simp: divide_pos_pos mult_pos_pos) hoelzl@49803: show "integrable ?P ?f" hoelzl@49803: unfolding integrable_def hoelzl@50003: using fin neg by (auto simp: split_beta') hoelzl@49803: show "integrable ?P (\x. - log b (?f x))" hoelzl@49803: apply (subst integral_density) hoelzl@50003: apply simp hoelzl@50003: apply (auto intro!: distributed_real_AE[OF Pxyz]) [] hoelzl@50003: apply simp hoelzl@49803: apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ I3]) hoelzl@50003: apply simp hoelzl@50003: apply simp hoelzl@49803: using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE] hoelzl@49803: apply eventually_elim hoelzl@49803: apply (auto simp: log_divide_eq log_mult_eq zero_le_mult_iff zero_less_mult_iff zero_less_divide_iff field_simps) hoelzl@49803: done hoelzl@49803: qed (auto simp: b_gt_1 minus_log_convex) hoelzl@49803: also have "\ = conditional_mutual_information b S T P X Y Z" hoelzl@49803: unfolding `?eq` hoelzl@49803: apply (subst integral_density) hoelzl@50003: apply simp hoelzl@50003: apply (auto intro!: distributed_real_AE[OF Pxyz]) [] hoelzl@50003: apply simp hoelzl@49803: apply (intro integral_cong_AE) hoelzl@49803: using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE] hoelzl@49803: apply eventually_elim hoelzl@49803: apply (auto simp: log_divide_eq zero_less_mult_iff zero_less_divide_iff field_simps) hoelzl@49803: done hoelzl@49803: finally show ?nonneg hoelzl@49803: by simp hoelzl@49803: qed hoelzl@49803: hoelzl@40859: lemma (in information_space) conditional_mutual_information_eq: hoelzl@47694: assumes Pz: "simple_distributed M Z Pz" hoelzl@47694: assumes Pyz: "simple_distributed M (\x. (Y x, Z x)) Pyz" hoelzl@47694: assumes Pxz: "simple_distributed M (\x. (X x, Z x)) Pxz" hoelzl@47694: assumes Pxyz: "simple_distributed M (\x. (X x, Y x, Z x)) Pxyz" hoelzl@47694: shows "\(X ; Y | Z) = hoelzl@47694: (\(x, y, z)\(\x. (X x, Y x, Z x))`space M. Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))))" hoelzl@47694: proof (subst conditional_mutual_information_generic_eq[OF _ _ _ _ hoelzl@47694: simple_distributed[OF Pz] simple_distributed_joint[OF Pyz] simple_distributed_joint[OF Pxz] hoelzl@47694: simple_distributed_joint2[OF Pxyz]]) hoelzl@47694: note simple_distributed_joint2_finite[OF Pxyz, simp] hoelzl@47694: show "sigma_finite_measure (count_space (X ` space M))" hoelzl@47694: by (simp add: sigma_finite_measure_count_space_finite) hoelzl@47694: show "sigma_finite_measure (count_space (Y ` space M))" hoelzl@47694: by (simp add: sigma_finite_measure_count_space_finite) hoelzl@47694: show "sigma_finite_measure (count_space (Z ` space M))" hoelzl@47694: by (simp add: sigma_finite_measure_count_space_finite) wenzelm@53015: have "count_space (X ` space M) \\<^sub>M count_space (Y ` space M) \\<^sub>M count_space (Z ` space M) = hoelzl@47694: count_space (X`space M \ Y`space M \ Z`space M)" hoelzl@47694: (is "?P = ?C") hoelzl@47694: by (simp add: pair_measure_count_space) hoelzl@40859: hoelzl@47694: let ?Px = "\x. measure M (X -` {x} \ space M)" wenzelm@53015: have "(\x. (X x, Z x)) \ measurable M (count_space (X ` space M) \\<^sub>M count_space (Z ` space M))" hoelzl@47694: using simple_distributed_joint[OF Pxz] by (rule distributed_measurable) hoelzl@47694: from measurable_comp[OF this measurable_fst] hoelzl@47694: have "random_variable (count_space (X ` space M)) X" hoelzl@47694: by (simp add: comp_def) hoelzl@47694: then have "simple_function M X" hoelzl@50002: unfolding simple_function_def by (auto simp: measurable_count_space_eq2) hoelzl@47694: then have "simple_distributed M X ?Px" hoelzl@47694: by (rule simple_distributedI) auto hoelzl@47694: then show "distributed M (count_space (X ` space M)) X ?Px" hoelzl@47694: by (rule simple_distributed) hoelzl@47694: hoelzl@47694: let ?f = "(\x. if x \ (\x. (X x, Y x, Z x)) ` space M then Pxyz x else 0)" hoelzl@47694: let ?g = "(\x. if x \ (\x. (Y x, Z x)) ` space M then Pyz x else 0)" hoelzl@47694: let ?h = "(\x. if x \ (\x. (X x, Z x)) ` space M then Pxz x else 0)" hoelzl@47694: show hoelzl@47694: "integrable ?P (\(x, y, z). ?f (x, y, z) * log b (?f (x, y, z) / (?Px x * ?g (y, z))))" hoelzl@47694: "integrable ?P (\(x, y, z). ?f (x, y, z) * log b (?h (x, z) / (?Px x * Pz z)))" hoelzl@47694: by (auto intro!: integrable_count_space simp: pair_measure_count_space) hoelzl@47694: let ?i = "\x y z. ?f (x, y, z) * log b (?f (x, y, z) / (?h (x, z) * (?g (y, z) / Pz z)))" hoelzl@47694: let ?j = "\x y z. Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z)))" hoelzl@47694: have "(\(x, y, z). ?i x y z) = (\x. if x \ (\x. (X x, Y x, Z x)) ` space M then ?j (fst x) (fst (snd x)) (snd (snd x)) else 0)" hoelzl@47694: by (auto intro!: ext) hoelzl@47694: then show "(\ (x, y, z). ?i x y z \?P) = (\(x, y, z)\(\x. (X x, Y x, Z x)) ` space M. ?j x y z)" hoelzl@47694: by (auto intro!: setsum_cong simp add: `?P = ?C` lebesgue_integral_count_space_finite simple_distributed_finite setsum_cases split_beta') hoelzl@36624: qed hoelzl@36624: hoelzl@47694: lemma (in information_space) conditional_mutual_information_nonneg: hoelzl@47694: assumes X: "simple_function M X" and Y: "simple_function M Y" and Z: "simple_function M Z" hoelzl@47694: shows "0 \ \(X ; Y | Z)" hoelzl@47694: proof - wenzelm@53015: have [simp]: "count_space (X ` space M) \\<^sub>M count_space (Y ` space M) \\<^sub>M count_space (Z ` space M) = hoelzl@49787: count_space (X`space M \ Y`space M \ Z`space M)" hoelzl@49787: by (simp add: pair_measure_count_space X Y Z simple_functionD) hoelzl@49787: note sf = sigma_finite_measure_count_space_finite[OF simple_functionD(1)] hoelzl@49787: note sd = simple_distributedI[OF _ refl] hoelzl@49787: note sp = simple_function_Pair hoelzl@49787: show ?thesis hoelzl@49787: apply (rule conditional_mutual_information_generic_nonneg[OF sf[OF X] sf[OF Y] sf[OF Z]]) hoelzl@49787: apply (rule simple_distributed[OF sd[OF X]]) hoelzl@49787: apply (rule simple_distributed[OF sd[OF Z]]) hoelzl@49787: apply (rule simple_distributed_joint[OF sd[OF sp[OF Y Z]]]) hoelzl@49787: apply (rule simple_distributed_joint[OF sd[OF sp[OF X Z]]]) hoelzl@49787: apply (rule simple_distributed_joint2[OF sd[OF sp[OF X sp[OF Y Z]]]]) hoelzl@49787: apply (auto intro!: integrable_count_space simp: X Y Z simple_functionD) hoelzl@49787: done hoelzl@36080: qed hoelzl@36080: hoelzl@39097: subsection {* Conditional Entropy *} hoelzl@39097: hoelzl@36080: definition (in prob_space) wenzelm@53015: "conditional_entropy b S T X Y = - (\(x, y). log b (real (RN_deriv (S \\<^sub>M T) (distr M (S \\<^sub>M T) (\x. (X x, Y x))) (x, y)) / wenzelm@53015: real (RN_deriv T (distr M T Y) y)) \distr M (S \\<^sub>M T) (\x. (X x, Y x)))" hoelzl@36080: hoelzl@40859: abbreviation (in information_space) hoelzl@40859: conditional_entropy_Pow ("\'(_ | _')") where hoelzl@47694: "\(X | Y) \ conditional_entropy b (count_space (X`space M)) (count_space (Y`space M)) X Y" hoelzl@36080: hoelzl@49791: lemma (in information_space) conditional_entropy_generic_eq: hoelzl@49791: fixes Px :: "'b \ real" and Py :: "'c \ real" hoelzl@49791: assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" hoelzl@50003: assumes Py[measurable]: "distributed M T Y Py" wenzelm@53015: assumes Pxy[measurable]: "distributed M (S \\<^sub>M T) (\x. (X x, Y x)) Pxy" wenzelm@53015: shows "conditional_entropy b S T X Y = - (\(x, y). Pxy (x, y) * log b (Pxy (x, y) / Py y) \(S \\<^sub>M T))" hoelzl@49791: proof - hoelzl@49791: interpret S: sigma_finite_measure S by fact hoelzl@49791: interpret T: sigma_finite_measure T by fact hoelzl@49791: interpret ST: pair_sigma_finite S T .. hoelzl@49791: wenzelm@53015: have "AE x in density (S \\<^sub>M T) (\x. ereal (Pxy x)). Pxy x = real (RN_deriv (S \\<^sub>M T) (distr M (S \\<^sub>M T) (\x. (X x, Y x))) x)" hoelzl@49791: unfolding AE_density[OF distributed_borel_measurable, OF Pxy] hoelzl@49791: unfolding distributed_distr_eq_density[OF Pxy] hoelzl@49791: using distributed_RN_deriv[OF Pxy] hoelzl@49791: by auto hoelzl@49791: moreover wenzelm@53015: have "AE x in density (S \\<^sub>M T) (\x. ereal (Pxy x)). Py (snd x) = real (RN_deriv T (distr M T Y) (snd x))" hoelzl@49791: unfolding AE_density[OF distributed_borel_measurable, OF Pxy] hoelzl@49791: unfolding distributed_distr_eq_density[OF Py] hoelzl@49791: apply (rule ST.AE_pair_measure) immler@50244: apply (auto intro!: sets.sets_Collect borel_measurable_eq measurable_compose[OF _ distributed_real_measurable[OF Py]] hoelzl@49791: distributed_real_measurable[OF Pxy] distributed_real_AE[OF Py] hoelzl@49791: borel_measurable_real_of_ereal measurable_compose[OF _ borel_measurable_RN_deriv_density]) hoelzl@49791: using distributed_RN_deriv[OF Py] hoelzl@49791: apply auto hoelzl@49791: done hoelzl@49791: ultimately wenzelm@53015: have "conditional_entropy b S T X Y = - (\x. Pxy x * log b (Pxy x / Py (snd x)) \(S \\<^sub>M T))" hoelzl@49791: unfolding conditional_entropy_def neg_equal_iff_equal hoelzl@49791: apply (subst integral_density(1)[symmetric]) hoelzl@49791: apply (auto simp: distributed_real_measurable[OF Pxy] distributed_real_AE[OF Pxy] hoelzl@49791: measurable_compose[OF _ distributed_real_measurable[OF Py]] hoelzl@49791: distributed_distr_eq_density[OF Pxy] hoelzl@49791: intro!: integral_cong_AE) hoelzl@49791: done hoelzl@49791: then show ?thesis by (simp add: split_beta') hoelzl@49791: qed hoelzl@49791: hoelzl@49791: lemma (in information_space) conditional_entropy_eq_entropy: hoelzl@47694: fixes Px :: "'b \ real" and Py :: "'c \ real" hoelzl@47694: assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" hoelzl@47694: assumes Py: "distributed M T Y Py" wenzelm@53015: assumes Pxy: "distributed M (S \\<^sub>M T) (\x. (X x, Y x)) Pxy" wenzelm@53015: assumes I1: "integrable (S \\<^sub>M T) (\x. Pxy x * log b (Pxy x))" wenzelm@53015: assumes I2: "integrable (S \\<^sub>M T) (\x. Pxy x * log b (Py (snd x)))" wenzelm@53015: shows "conditional_entropy b S T X Y = entropy b (S \\<^sub>M T) (\x. (X x, Y x)) - entropy b T Y" hoelzl@40859: proof - hoelzl@47694: interpret S: sigma_finite_measure S by fact hoelzl@47694: interpret T: sigma_finite_measure T by fact hoelzl@47694: interpret ST: pair_sigma_finite S T .. hoelzl@47694: hoelzl@47694: have "entropy b T Y = - (\y. Py y * log b (Py y) \T)" hoelzl@49786: by (rule entropy_distr[OF Py]) wenzelm@53015: also have "\ = - (\(x,y). Pxy (x,y) * log b (Py y) \(S \\<^sub>M T))" hoelzl@47694: using b_gt_1 Py[THEN distributed_real_measurable] hoelzl@47694: by (subst distributed_transform_integral[OF Pxy Py, where T=snd]) (auto intro!: integral_cong) wenzelm@53015: finally have e_eq: "entropy b T Y = - (\(x,y). Pxy (x,y) * log b (Py y) \(S \\<^sub>M T))" . hoelzl@49791: wenzelm@53015: have ae2: "AE x in S \\<^sub>M T. Py (snd x) = 0 \ Pxy x = 0" hoelzl@47694: by (intro subdensity_real[of snd, OF _ Pxy Py]) (auto intro: measurable_Pair) wenzelm@53015: moreover have ae4: "AE x in S \\<^sub>M T. 0 \ Py (snd x)" hoelzl@47694: using Py by (intro ST.AE_pair_measure) (auto simp: comp_def intro!: measurable_snd'' dest: distributed_real_AE distributed_real_measurable) hoelzl@49788: moreover note ae5 = Pxy[THEN distributed_real_AE] wenzelm@53015: ultimately have "AE x in S \\<^sub>M T. 0 \ Pxy x \ 0 \ Py (snd x) \ hoelzl@49790: (Pxy x = 0 \ (Pxy x \ 0 \ 0 < Pxy x \ 0 < Py (snd x)))" hoelzl@47694: by eventually_elim auto wenzelm@53015: then have ae: "AE x in S \\<^sub>M T. hoelzl@47694: Pxy x * log b (Pxy x) - Pxy x * log b (Py (snd x)) = Pxy x * log b (Pxy x / Py (snd x))" hoelzl@47694: by eventually_elim (auto simp: log_simps mult_pos_pos field_simps b_gt_1) hoelzl@49791: have "conditional_entropy b S T X Y = wenzelm@53015: - (\x. Pxy x * log b (Pxy x) - Pxy x * log b (Py (snd x)) \(S \\<^sub>M T))" hoelzl@49791: unfolding conditional_entropy_generic_eq[OF S T Py Pxy] neg_equal_iff_equal hoelzl@49791: apply (intro integral_cong_AE) hoelzl@49791: using ae hoelzl@49791: apply eventually_elim hoelzl@49791: apply auto hoelzl@47694: done wenzelm@53015: also have "\ = - (\x. Pxy x * log b (Pxy x) \(S \\<^sub>M T)) - - (\x. Pxy x * log b (Py (snd x)) \(S \\<^sub>M T))" hoelzl@49791: by (simp add: integral_diff[OF I1 I2]) hoelzl@49791: finally show ?thesis hoelzl@49791: unfolding conditional_entropy_generic_eq[OF S T Py Pxy] entropy_distr[OF Pxy] e_eq hoelzl@49791: by (simp add: split_beta') hoelzl@49791: qed hoelzl@49791: hoelzl@49791: lemma (in information_space) conditional_entropy_eq_entropy_simple: hoelzl@49791: assumes X: "simple_function M X" and Y: "simple_function M Y" wenzelm@53015: shows "\(X | Y) = entropy b (count_space (X`space M) \\<^sub>M count_space (Y`space M)) (\x. (X x, Y x)) - \(Y)" hoelzl@49791: proof - wenzelm@53015: have "count_space (X ` space M) \\<^sub>M count_space (Y ` space M) = count_space (X`space M \ Y`space M)" hoelzl@49791: (is "?P = ?C") using X Y by (simp add: simple_functionD pair_measure_count_space) hoelzl@49791: show ?thesis hoelzl@49791: by (rule conditional_entropy_eq_entropy sigma_finite_measure_count_space_finite hoelzl@49791: simple_functionD X Y simple_distributed simple_distributedI[OF _ refl] hoelzl@49791: simple_distributed_joint simple_function_Pair integrable_count_space)+ hoelzl@49791: (auto simp: `?P = ?C` intro!: integrable_count_space simple_functionD X Y) hoelzl@39097: qed hoelzl@39097: hoelzl@40859: lemma (in information_space) conditional_entropy_eq: hoelzl@49792: assumes Y: "simple_distributed M Y Py" hoelzl@47694: assumes XY: "simple_distributed M (\x. (X x, Y x)) Pxy" hoelzl@47694: shows "\(X | Y) = - (\(x, y)\(\x. (X x, Y x)) ` space M. Pxy (x, y) * log b (Pxy (x, y) / Py y))" hoelzl@47694: proof (subst conditional_entropy_generic_eq[OF _ _ hoelzl@49790: simple_distributed[OF Y] simple_distributed_joint[OF XY]]) hoelzl@49792: have "finite ((\x. (X x, Y x))`space M)" hoelzl@49792: using XY unfolding simple_distributed_def by auto hoelzl@49792: from finite_imageI[OF this, of fst] hoelzl@49792: have [simp]: "finite (X`space M)" hoelzl@49792: by (simp add: image_compose[symmetric] comp_def) hoelzl@47694: note Y[THEN simple_distributed_finite, simp] hoelzl@47694: show "sigma_finite_measure (count_space (X ` space M))" hoelzl@47694: by (simp add: sigma_finite_measure_count_space_finite) hoelzl@47694: show "sigma_finite_measure (count_space (Y ` space M))" hoelzl@47694: by (simp add: sigma_finite_measure_count_space_finite) hoelzl@47694: let ?f = "(\x. if x \ (\x. (X x, Y x)) ` space M then Pxy x else 0)" wenzelm@53015: have "count_space (X ` space M) \\<^sub>M count_space (Y ` space M) = count_space (X`space M \ Y`space M)" hoelzl@47694: (is "?P = ?C") hoelzl@49792: using Y by (simp add: simple_distributed_finite pair_measure_count_space) hoelzl@47694: have eq: "(\(x, y). ?f (x, y) * log b (?f (x, y) / Py y)) = hoelzl@47694: (\x. if x \ (\x. (X x, Y x)) ` space M then Pxy x * log b (Pxy x / Py (snd x)) else 0)" hoelzl@47694: by auto hoelzl@49792: from Y show "- (\ (x, y). ?f (x, y) * log b (?f (x, y) / Py y) \?P) = hoelzl@47694: - (\(x, y)\(\x. (X x, Y x)) ` space M. Pxy (x, y) * log b (Pxy (x, y) / Py y))" hoelzl@47694: by (auto intro!: setsum_cong simp add: `?P = ?C` lebesgue_integral_count_space_finite simple_distributed_finite eq setsum_cases split_beta') hoelzl@47694: qed hoelzl@39097: hoelzl@47694: lemma (in information_space) conditional_mutual_information_eq_conditional_entropy: hoelzl@41689: assumes X: "simple_function M X" and Y: "simple_function M Y" hoelzl@47694: shows "\(X ; X | Y) = \(X | Y)" hoelzl@47694: proof - hoelzl@47694: def Py \ "\x. if x \ Y`space M then measure M (Y -` {x} \ space M) else 0" hoelzl@47694: def Pxy \ "\x. if x \ (\x. (X x, Y x))`space M then measure M ((\x. (X x, Y x)) -` {x} \ space M) else 0" hoelzl@47694: def Pxxy \ "\x. if x \ (\x. (X x, X x, Y x))`space M then measure M ((\x. (X x, X x, Y x)) -` {x} \ space M) else 0" hoelzl@47694: let ?M = "X`space M \ X`space M \ Y`space M" hoelzl@39097: hoelzl@47694: note XY = simple_function_Pair[OF X Y] hoelzl@47694: note XXY = simple_function_Pair[OF X XY] hoelzl@47694: have Py: "simple_distributed M Y Py" hoelzl@47694: using Y by (rule simple_distributedI) (auto simp: Py_def) hoelzl@47694: have Pxy: "simple_distributed M (\x. (X x, Y x)) Pxy" hoelzl@47694: using XY by (rule simple_distributedI) (auto simp: Pxy_def) hoelzl@47694: have Pxxy: "simple_distributed M (\x. (X x, X x, Y x)) Pxxy" hoelzl@47694: using XXY by (rule simple_distributedI) (auto simp: Pxxy_def) hoelzl@47694: have eq: "(\x. (X x, X x, Y x)) ` space M = (\(x, y). (x, x, y)) ` (\x. (X x, Y x)) ` space M" hoelzl@47694: by auto hoelzl@47694: have inj: "\A. inj_on (\(x, y). (x, x, y)) A" hoelzl@47694: by (auto simp: inj_on_def) hoelzl@47694: have Pxxy_eq: "\x y. Pxxy (x, x, y) = Pxy (x, y)" hoelzl@47694: by (auto simp: Pxxy_def Pxy_def intro!: arg_cong[where f=prob]) hoelzl@47694: have "AE x in count_space ((\x. (X x, Y x))`space M). Py (snd x) = 0 \ Pxy x = 0" hoelzl@47694: by (intro subdensity_real[of snd, OF _ Pxy[THEN simple_distributed] Py[THEN simple_distributed]]) (auto intro: measurable_Pair) hoelzl@47694: then show ?thesis hoelzl@47694: apply (subst conditional_mutual_information_eq[OF Py Pxy Pxy Pxxy]) hoelzl@49792: apply (subst conditional_entropy_eq[OF Py Pxy]) hoelzl@47694: apply (auto intro!: setsum_cong simp: Pxxy_eq setsum_negf[symmetric] eq setsum_reindex[OF inj] hoelzl@47694: log_simps zero_less_mult_iff zero_le_mult_iff field_simps mult_less_0_iff AE_count_space) hoelzl@47694: using Py[THEN simple_distributed, THEN distributed_real_AE] Pxy[THEN simple_distributed, THEN distributed_real_AE] hoelzl@49790: apply (auto simp add: not_le[symmetric] AE_count_space) hoelzl@47694: done hoelzl@47694: qed hoelzl@47694: hoelzl@47694: lemma (in information_space) conditional_entropy_nonneg: hoelzl@47694: assumes X: "simple_function M X" and Y: "simple_function M Y" shows "0 \ \(X | Y)" hoelzl@47694: using conditional_mutual_information_eq_conditional_entropy[OF X Y] conditional_mutual_information_nonneg[OF X X Y] hoelzl@47694: by simp hoelzl@36080: hoelzl@39097: subsection {* Equalities *} hoelzl@39097: hoelzl@47694: lemma (in information_space) mutual_information_eq_entropy_conditional_entropy_distr: hoelzl@47694: fixes Px :: "'b \ real" and Py :: "'c \ real" and Pxy :: "('b \ 'c) \ real" hoelzl@47694: assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" hoelzl@47694: assumes Px: "distributed M S X Px" and Py: "distributed M T Y Py" wenzelm@53015: assumes Pxy: "distributed M (S \\<^sub>M T) (\x. (X x, Y x)) Pxy" wenzelm@53015: assumes Ix: "integrable(S \\<^sub>M T) (\x. Pxy x * log b (Px (fst x)))" wenzelm@53015: assumes Iy: "integrable(S \\<^sub>M T) (\x. Pxy x * log b (Py (snd x)))" wenzelm@53015: assumes Ixy: "integrable(S \\<^sub>M T) (\x. Pxy x * log b (Pxy x))" wenzelm@53015: shows "mutual_information b S T X Y = entropy b S X + entropy b T Y - entropy b (S \\<^sub>M T) (\x. (X x, Y x))" hoelzl@40859: proof - wenzelm@53015: have X: "entropy b S X = - (\x. Pxy x * log b (Px (fst x)) \(S \\<^sub>M T))" hoelzl@47694: using b_gt_1 Px[THEN distributed_real_measurable] hoelzl@49786: apply (subst entropy_distr[OF Px]) hoelzl@47694: apply (subst distributed_transform_integral[OF Pxy Px, where T=fst]) hoelzl@47694: apply (auto intro!: integral_cong) hoelzl@47694: done hoelzl@47694: wenzelm@53015: have Y: "entropy b T Y = - (\x. Pxy x * log b (Py (snd x)) \(S \\<^sub>M T))" hoelzl@47694: using b_gt_1 Py[THEN distributed_real_measurable] hoelzl@49786: apply (subst entropy_distr[OF Py]) hoelzl@47694: apply (subst distributed_transform_integral[OF Pxy Py, where T=snd]) hoelzl@47694: apply (auto intro!: integral_cong) hoelzl@47694: done hoelzl@47694: hoelzl@47694: interpret S: sigma_finite_measure S by fact hoelzl@47694: interpret T: sigma_finite_measure T by fact hoelzl@47694: interpret ST: pair_sigma_finite S T .. wenzelm@53015: have ST: "sigma_finite_measure (S \\<^sub>M T)" .. hoelzl@47694: wenzelm@53015: have XY: "entropy b (S \\<^sub>M T) (\x. (X x, Y x)) = - (\x. Pxy x * log b (Pxy x) \(S \\<^sub>M T))" hoelzl@49786: by (subst entropy_distr[OF Pxy]) (auto intro!: integral_cong) hoelzl@47694: wenzelm@53015: have "AE x in S \\<^sub>M T. Px (fst x) = 0 \ Pxy x = 0" hoelzl@47694: by (intro subdensity_real[of fst, OF _ Pxy Px]) (auto intro: measurable_Pair) wenzelm@53015: moreover have "AE x in S \\<^sub>M T. Py (snd x) = 0 \ Pxy x = 0" hoelzl@47694: by (intro subdensity_real[of snd, OF _ Pxy Py]) (auto intro: measurable_Pair) wenzelm@53015: moreover have "AE x in S \\<^sub>M T. 0 \ Px (fst x)" hoelzl@47694: using Px by (intro ST.AE_pair_measure) (auto simp: comp_def intro!: measurable_fst'' dest: distributed_real_AE distributed_real_measurable) wenzelm@53015: moreover have "AE x in S \\<^sub>M T. 0 \ Py (snd x)" hoelzl@47694: using Py by (intro ST.AE_pair_measure) (auto simp: comp_def intro!: measurable_snd'' dest: distributed_real_AE distributed_real_measurable) hoelzl@47694: moreover note Pxy[THEN distributed_real_AE] wenzelm@53015: ultimately have "AE x in S \\<^sub>M T. Pxy x * log b (Pxy x) - Pxy x * log b (Px (fst x)) - Pxy x * log b (Py (snd x)) = hoelzl@47694: Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x)))" hoelzl@47694: (is "AE x in _. ?f x = ?g x") hoelzl@47694: proof eventually_elim hoelzl@47694: case (goal1 x) hoelzl@47694: show ?case hoelzl@47694: proof cases hoelzl@47694: assume "Pxy x \ 0" hoelzl@47694: with goal1 have "0 < Px (fst x)" "0 < Py (snd x)" "0 < Pxy x" hoelzl@47694: by auto hoelzl@47694: then show ?thesis hoelzl@47694: using b_gt_1 by (simp add: log_simps mult_pos_pos less_imp_le field_simps) hoelzl@47694: qed simp hoelzl@47694: qed hoelzl@47694: wenzelm@53015: have "entropy b S X + entropy b T Y - entropy b (S \\<^sub>M T) (\x. (X x, Y x)) = integral\<^sup>L (S \\<^sub>M T) ?f" hoelzl@47694: unfolding X Y XY hoelzl@47694: apply (subst integral_diff) hoelzl@47694: apply (intro integral_diff Ixy Ix Iy)+ hoelzl@47694: apply (subst integral_diff) hoelzl@47694: apply (intro integral_diff Ixy Ix Iy)+ hoelzl@47694: apply (simp add: field_simps) hoelzl@47694: done wenzelm@53015: also have "\ = integral\<^sup>L (S \\<^sub>M T) ?g" hoelzl@47694: using `AE x in _. ?f x = ?g x` by (rule integral_cong_AE) hoelzl@47694: also have "\ = mutual_information b S T X Y" hoelzl@47694: by (rule mutual_information_distr[OF S T Px Py Pxy, symmetric]) hoelzl@47694: finally show ?thesis .. hoelzl@47694: qed hoelzl@47694: hoelzl@49802: lemma (in information_space) mutual_information_eq_entropy_conditional_entropy': hoelzl@49802: fixes Px :: "'b \ real" and Py :: "'c \ real" and Pxy :: "('b \ 'c) \ real" hoelzl@49802: assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" hoelzl@49802: assumes Px: "distributed M S X Px" and Py: "distributed M T Y Py" wenzelm@53015: assumes Pxy: "distributed M (S \\<^sub>M T) (\x. (X x, Y x)) Pxy" wenzelm@53015: assumes Ix: "integrable(S \\<^sub>M T) (\x. Pxy x * log b (Px (fst x)))" wenzelm@53015: assumes Iy: "integrable(S \\<^sub>M T) (\x. Pxy x * log b (Py (snd x)))" wenzelm@53015: assumes Ixy: "integrable(S \\<^sub>M T) (\x. Pxy x * log b (Pxy x))" hoelzl@49802: shows "mutual_information b S T X Y = entropy b S X - conditional_entropy b S T X Y" hoelzl@49802: using hoelzl@49802: mutual_information_eq_entropy_conditional_entropy_distr[OF S T Px Py Pxy Ix Iy Ixy] hoelzl@49802: conditional_entropy_eq_entropy[OF S T Py Pxy Ixy Iy] hoelzl@49802: by simp hoelzl@49802: hoelzl@47694: lemma (in information_space) mutual_information_eq_entropy_conditional_entropy: hoelzl@47694: assumes sf_X: "simple_function M X" and sf_Y: "simple_function M Y" hoelzl@47694: shows "\(X ; Y) = \(X) - \(X | Y)" hoelzl@47694: proof - hoelzl@47694: have X: "simple_distributed M X (\x. measure M (X -` {x} \ space M))" hoelzl@47694: using sf_X by (rule simple_distributedI) auto hoelzl@47694: have Y: "simple_distributed M Y (\x. measure M (Y -` {x} \ space M))" hoelzl@47694: using sf_Y by (rule simple_distributedI) auto hoelzl@47694: have sf_XY: "simple_function M (\x. (X x, Y x))" hoelzl@47694: using sf_X sf_Y by (rule simple_function_Pair) hoelzl@47694: then have XY: "simple_distributed M (\x. (X x, Y x)) (\x. measure M ((\x. (X x, Y x)) -` {x} \ space M))" hoelzl@47694: by (rule simple_distributedI) auto hoelzl@47694: from simple_distributed_joint_finite[OF this, simp] wenzelm@53015: have eq: "count_space (X ` space M) \\<^sub>M count_space (Y ` space M) = count_space (X ` space M \ Y ` space M)" hoelzl@47694: by (simp add: pair_measure_count_space) hoelzl@47694: wenzelm@53015: have "\(X ; Y) = \(X) + \(Y) - entropy b (count_space (X`space M) \\<^sub>M count_space (Y`space M)) (\x. (X x, Y x))" hoelzl@47694: using sigma_finite_measure_count_space_finite sigma_finite_measure_count_space_finite simple_distributed[OF X] simple_distributed[OF Y] simple_distributed_joint[OF XY] hoelzl@47694: by (rule mutual_information_eq_entropy_conditional_entropy_distr) (auto simp: eq integrable_count_space) hoelzl@47694: then show ?thesis hoelzl@49791: unfolding conditional_entropy_eq_entropy_simple[OF sf_X sf_Y] by simp hoelzl@47694: qed hoelzl@47694: hoelzl@47694: lemma (in information_space) mutual_information_nonneg_simple: hoelzl@47694: assumes sf_X: "simple_function M X" and sf_Y: "simple_function M Y" hoelzl@47694: shows "0 \ \(X ; Y)" hoelzl@47694: proof - hoelzl@47694: have X: "simple_distributed M X (\x. measure M (X -` {x} \ space M))" hoelzl@47694: using sf_X by (rule simple_distributedI) auto hoelzl@47694: have Y: "simple_distributed M Y (\x. measure M (Y -` {x} \ space M))" hoelzl@47694: using sf_Y by (rule simple_distributedI) auto hoelzl@47694: hoelzl@47694: have sf_XY: "simple_function M (\x. (X x, Y x))" hoelzl@47694: using sf_X sf_Y by (rule simple_function_Pair) hoelzl@47694: then have XY: "simple_distributed M (\x. (X x, Y x)) (\x. measure M ((\x. (X x, Y x)) -` {x} \ space M))" hoelzl@47694: by (rule simple_distributedI) auto hoelzl@47694: hoelzl@47694: from simple_distributed_joint_finite[OF this, simp] wenzelm@53015: have eq: "count_space (X ` space M) \\<^sub>M count_space (Y ` space M) = count_space (X ` space M \ Y ` space M)" hoelzl@47694: by (simp add: pair_measure_count_space) hoelzl@47694: hoelzl@40859: show ?thesis hoelzl@47694: by (rule mutual_information_nonneg[OF _ _ simple_distributed[OF X] simple_distributed[OF Y] simple_distributed_joint[OF XY]]) hoelzl@47694: (simp_all add: eq integrable_count_space sigma_finite_measure_count_space_finite) hoelzl@40859: qed hoelzl@36080: hoelzl@40859: lemma (in information_space) conditional_entropy_less_eq_entropy: hoelzl@41689: assumes X: "simple_function M X" and Z: "simple_function M Z" hoelzl@40859: shows "\(X | Z) \ \(X)" hoelzl@36624: proof - hoelzl@47694: have "0 \ \(X ; Z)" using X Z by (rule mutual_information_nonneg_simple) hoelzl@47694: also have "\(X ; Z) = \(X) - \(X | Z)" using mutual_information_eq_entropy_conditional_entropy[OF assms] . hoelzl@47694: finally show ?thesis by auto hoelzl@36080: qed hoelzl@36080: hoelzl@49803: lemma (in information_space) hoelzl@49803: fixes Px :: "'b \ real" and Py :: "'c \ real" and Pxy :: "('b \ 'c) \ real" hoelzl@49803: assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" hoelzl@49803: assumes Px: "finite_entropy S X Px" and Py: "finite_entropy T Y Py" wenzelm@53015: assumes Pxy: "finite_entropy (S \\<^sub>M T) (\x. (X x, Y x)) Pxy" hoelzl@49803: shows "conditional_entropy b S T X Y \ entropy b S X" hoelzl@49803: proof - hoelzl@49803: hoelzl@49803: have "0 \ mutual_information b S T X Y" hoelzl@49803: by (rule mutual_information_nonneg') fact+ hoelzl@49803: also have "\ = entropy b S X - conditional_entropy b S T X Y" hoelzl@49803: apply (rule mutual_information_eq_entropy_conditional_entropy') hoelzl@49803: using assms hoelzl@49803: by (auto intro!: finite_entropy_integrable finite_entropy_distributed hoelzl@49803: finite_entropy_integrable_transform[OF Px] hoelzl@49803: finite_entropy_integrable_transform[OF Py]) hoelzl@49803: finally show ?thesis by auto hoelzl@49803: qed hoelzl@49803: hoelzl@40859: lemma (in information_space) entropy_chain_rule: hoelzl@41689: assumes X: "simple_function M X" and Y: "simple_function M Y" hoelzl@40859: shows "\(\x. (X x, Y x)) = \(X) + \(Y|X)" hoelzl@40859: proof - hoelzl@47694: note XY = simple_distributedI[OF simple_function_Pair[OF X Y] refl] hoelzl@47694: note YX = simple_distributedI[OF simple_function_Pair[OF Y X] refl] hoelzl@47694: note simple_distributed_joint_finite[OF this, simp] hoelzl@47694: let ?f = "\x. prob ((\x. (X x, Y x)) -` {x} \ space M)" hoelzl@47694: let ?g = "\x. prob ((\x. (Y x, X x)) -` {x} \ space M)" hoelzl@47694: let ?h = "\x. if x \ (\x. (Y x, X x)) ` space M then prob ((\x. (Y x, X x)) -` {x} \ space M) else 0" hoelzl@47694: have "\(\x. (X x, Y x)) = - (\x\(\x. (X x, Y x)) ` space M. ?f x * log b (?f x))" hoelzl@47694: using XY by (rule entropy_simple_distributed) hoelzl@47694: also have "\ = - (\x\(\(x, y). (y, x)) ` (\x. (X x, Y x)) ` space M. ?g x * log b (?g x))" hoelzl@47694: by (subst (2) setsum_reindex) (auto simp: inj_on_def intro!: setsum_cong arg_cong[where f="\A. prob A * log b (prob A)"]) hoelzl@47694: also have "\ = - (\x\(\x. (Y x, X x)) ` space M. ?h x * log b (?h x))" hoelzl@47694: by (auto intro!: setsum_cong) wenzelm@53015: also have "\ = entropy b (count_space (Y ` space M) \\<^sub>M count_space (X ` space M)) (\x. (Y x, X x))" hoelzl@49786: by (subst entropy_distr[OF simple_distributed_joint[OF YX]]) hoelzl@47694: (auto simp: pair_measure_count_space sigma_finite_measure_count_space_finite lebesgue_integral_count_space_finite hoelzl@47694: cong del: setsum_cong intro!: setsum_mono_zero_left) wenzelm@53015: finally have "\(\x. (X x, Y x)) = entropy b (count_space (Y ` space M) \\<^sub>M count_space (X ` space M)) (\x. (Y x, X x))" . hoelzl@47694: then show ?thesis hoelzl@49791: unfolding conditional_entropy_eq_entropy_simple[OF Y X] by simp hoelzl@36624: qed hoelzl@36624: hoelzl@40859: lemma (in information_space) entropy_partition: hoelzl@47694: assumes X: "simple_function M X" hoelzl@47694: shows "\(X) = \(f \ X) + \(X|f \ X)" hoelzl@36624: proof - hoelzl@47694: note fX = simple_function_compose[OF X, of f] hoelzl@47694: have eq: "(\x. ((f \ X) x, X x)) ` space M = (\x. (f x, x)) ` X ` space M" by auto hoelzl@47694: have inj: "\A. inj_on (\x. (f x, x)) A" hoelzl@47694: by (auto simp: inj_on_def) hoelzl@47694: show ?thesis hoelzl@47694: apply (subst entropy_chain_rule[symmetric, OF fX X]) hoelzl@47694: apply (subst entropy_simple_distributed[OF simple_distributedI[OF simple_function_Pair[OF fX X] refl]]) hoelzl@47694: apply (subst entropy_simple_distributed[OF simple_distributedI[OF X refl]]) hoelzl@47694: unfolding eq hoelzl@47694: apply (subst setsum_reindex[OF inj]) hoelzl@47694: apply (auto intro!: setsum_cong arg_cong[where f="\A. prob A * log b (prob A)"]) hoelzl@47694: done hoelzl@36624: qed hoelzl@36624: hoelzl@40859: corollary (in information_space) entropy_data_processing: hoelzl@41689: assumes X: "simple_function M X" shows "\(f \ X) \ \(X)" hoelzl@40859: proof - hoelzl@47694: note fX = simple_function_compose[OF X, of f] hoelzl@47694: from X have "\(X) = \(f\X) + \(X|f\X)" by (rule entropy_partition) hoelzl@40859: then show "\(f \ X) \ \(X)" hoelzl@47694: by (auto intro: conditional_entropy_nonneg[OF X fX]) hoelzl@40859: qed hoelzl@36624: hoelzl@40859: corollary (in information_space) entropy_of_inj: hoelzl@41689: assumes X: "simple_function M X" and inj: "inj_on f (X`space M)" hoelzl@36624: shows "\(f \ X) = \(X)" hoelzl@36624: proof (rule antisym) hoelzl@40859: show "\(f \ X) \ \(X)" using entropy_data_processing[OF X] . hoelzl@36624: next hoelzl@41689: have sf: "simple_function M (f \ X)" hoelzl@40859: using X by auto hoelzl@36624: have "\(X) = \(the_inv_into (X`space M) f \ (f \ X))" hoelzl@47694: unfolding o_assoc hoelzl@47694: apply (subst entropy_simple_distributed[OF simple_distributedI[OF X refl]]) hoelzl@47694: apply (subst entropy_simple_distributed[OF simple_distributedI[OF simple_function_compose[OF X]], where f="\x. prob (X -` {x} \ space M)"]) hoelzl@47694: apply (auto intro!: setsum_cong arg_cong[where f=prob] image_eqI simp: the_inv_into_f_f[OF inj] comp_def) hoelzl@47694: done hoelzl@36624: also have "... \ \(f \ X)" hoelzl@40859: using entropy_data_processing[OF sf] . hoelzl@36624: finally show "\(X) \ \(f \ X)" . hoelzl@36624: qed hoelzl@36624: hoelzl@36080: end