# HG changeset patch # User hoelzl # Date 1349863956 -7200 # Node ID 2f076e377703f6e8e6ac91a52cf5f76dca611259 # Parent dd8dffaf84b91ef7e45aeaac0c4efae2cdf9a427 add finite entropy diff -r dd8dffaf84b9 -r 2f076e377703 src/HOL/Probability/Information.thy --- a/src/HOL/Probability/Information.thy Wed Oct 10 12:12:36 2012 +0200 +++ b/src/HOL/Probability/Information.thy Wed Oct 10 12:12:36 2012 +0200 @@ -339,6 +339,115 @@ finally show ?thesis . qed +subsection {* Finite Entropy *} + +definition (in information_space) + "finite_entropy S X f \ distributed M S X f \ integrable S (\x. f x * log b (f x))" + +lemma (in information_space) finite_entropy_simple_function: + assumes X: "simple_function M X" + shows "finite_entropy (count_space (X`space M)) X (\a. measure M {x \ space M. X x = a})" + unfolding finite_entropy_def +proof + have [simp]: "finite (X ` space M)" + using X by (auto simp: simple_function_def) + then show "integrable (count_space (X ` space M)) + (\x. prob {xa \ space M. X xa = x} * log b (prob {xa \ space M. X xa = x}))" + by (rule integrable_count_space) + have d: "distributed M (count_space (X ` space M)) X (\x. ereal (if x \ X`space M then prob {xa \ space M. X xa = x} else 0))" + by (rule distributed_simple_function_superset[OF X]) (auto intro!: arg_cong[where f=prob]) + show "distributed M (count_space (X ` space M)) X (\x. ereal (prob {xa \ space M. X xa = x}))" + by (rule distributed_cong_density[THEN iffD1, OF _ _ _ d]) auto +qed + +lemma distributed_transform_AE: + assumes T: "T \ measurable P Q" "absolutely_continuous Q (distr P Q T)" + assumes g: "distributed M Q Y g" + shows "AE x in P. 0 \ g (T x)" + using g + apply (subst AE_distr_iff[symmetric, OF T(1)]) + apply (simp add: distributed_borel_measurable) + apply (rule absolutely_continuous_AE[OF _ T(2)]) + apply simp + apply (simp add: distributed_AE) + done + +lemma ac_fst: + assumes "sigma_finite_measure T" + shows "absolutely_continuous S (distr (S \\<^isub>M T) S fst)" +proof - + interpret sigma_finite_measure T by fact + { fix A assume "A \ sets S" "emeasure S A = 0" + moreover then have "fst -` A \ space (S \\<^isub>M T) = A \ space T" + by (auto simp: space_pair_measure dest!: sets_into_space) + ultimately have "emeasure (S \\<^isub>M T) (fst -` A \ space (S \\<^isub>M T)) = 0" + by (simp add: emeasure_pair_measure_Times) } + then show ?thesis + unfolding absolutely_continuous_def + apply (auto simp: null_sets_distr_iff) + apply (auto simp: null_sets_def intro!: measurable_sets) + done +qed + +lemma ac_snd: + assumes "sigma_finite_measure T" + shows "absolutely_continuous T (distr (S \\<^isub>M T) T snd)" +proof - + interpret sigma_finite_measure T by fact + { fix A assume "A \ sets T" "emeasure T A = 0" + moreover then have "snd -` A \ space (S \\<^isub>M T) = space S \ A" + by (auto simp: space_pair_measure dest!: sets_into_space) + ultimately have "emeasure (S \\<^isub>M T) (snd -` A \ space (S \\<^isub>M T)) = 0" + by (simp add: emeasure_pair_measure_Times) } + then show ?thesis + unfolding absolutely_continuous_def + apply (auto simp: null_sets_distr_iff) + apply (auto simp: null_sets_def intro!: measurable_sets) + done +qed + +lemma distributed_integrable: + "distributed M N X f \ g \ borel_measurable N \ + integrable N (\x. f x * g x) \ integrable M (\x. g (X x))" + by (auto simp: distributed_real_measurable distributed_real_AE distributed_measurable + distributed_distr_eq_density[symmetric] integral_density[symmetric] integrable_distr_eq) + +lemma distributed_transform_integrable: + assumes Px: "distributed M N X Px" + assumes "distributed M P Y Py" + assumes Y: "Y = (\x. T (X x))" and T: "T \ measurable N P" and f: "f \ borel_measurable P" + shows "integrable P (\x. Py x * f x) \ integrable N (\x. Px x * f (T x))" +proof - + have "integrable P (\x. Py x * f x) \ integrable M (\x. f (Y x))" + by (rule distributed_integrable) fact+ + also have "\ \ integrable M (\x. f (T (X x)))" + using Y by simp + also have "\ \ integrable N (\x. Px x * f (T x))" + using measurable_comp[OF T f] Px by (intro distributed_integrable[symmetric]) (auto simp: comp_def) + finally show ?thesis . +qed + +lemma integrable_cong_AE_imp: "integrable M g \ f \ borel_measurable M \ (AE x in M. g x = f x) \ integrable M f" + using integrable_cong_AE by blast + +lemma (in information_space) finite_entropy_integrable: + "finite_entropy S X Px \ integrable S (\x. Px x * log b (Px x))" + unfolding finite_entropy_def by auto + +lemma (in information_space) finite_entropy_distributed: + "finite_entropy S X Px \ distributed M S X Px" + unfolding finite_entropy_def by auto + +lemma (in information_space) finite_entropy_integrable_transform: + assumes Fx: "finite_entropy S X Px" + assumes Fy: "distributed M T Y Py" + and "X = (\x. f (Y x))" + and "f \ measurable T S" + shows "integrable T (\x. Py x * log b (Px (f x)))" + using assms unfolding finite_entropy_def + using distributed_transform_integrable[of M T Y Py S X Px f "\x. log b (Px x)"] + by (auto intro: distributed_real_measurable) + subsection {* Mutual Information *} definition (in prob_space) @@ -412,6 +521,120 @@ lemma (in information_space) fixes Pxy :: "'b \ 'c \ real" and Px :: "'b \ real" and Py :: "'c \ real" + assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" + assumes Fx: "finite_entropy S X Px" and Fy: "finite_entropy T Y Py" + assumes Fxy: "finite_entropy (S \\<^isub>M T) (\x. (X x, Y x)) Pxy" + defines "f \ \x. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x)))" + shows mutual_information_distr': "mutual_information b S T X Y = integral\<^isup>L (S \\<^isub>M T) f" (is "?M = ?R") + and mutual_information_nonneg': "0 \ mutual_information b S T X Y" +proof - + have Px: "distributed M S X Px" + using Fx by (auto simp: finite_entropy_def) + have Py: "distributed M T Y Py" + using Fy by (auto simp: finite_entropy_def) + have Pxy: "distributed M (S \\<^isub>M T) (\x. (X x, Y x)) Pxy" + using Fxy by (auto simp: finite_entropy_def) + + have X: "random_variable S X" + using Px by (auto simp: distributed_def finite_entropy_def) + have Y: "random_variable T Y" + using Py by (auto simp: distributed_def finite_entropy_def) + interpret S: sigma_finite_measure S by fact + interpret T: sigma_finite_measure T by fact + interpret ST: pair_sigma_finite S T .. + interpret X: prob_space "distr M S X" using X by (rule prob_space_distr) + interpret Y: prob_space "distr M T Y" using Y by (rule prob_space_distr) + interpret XY: pair_prob_space "distr M S X" "distr M T Y" .. + let ?P = "S \\<^isub>M T" + let ?D = "distr M ?P (\x. (X x, Y x))" + + { fix A assume "A \ sets S" + with X Y have "emeasure (distr M S X) A = emeasure ?D (A \ space T)" + by (auto simp: emeasure_distr measurable_Pair measurable_space + intro!: arg_cong[where f="emeasure M"]) } + note marginal_eq1 = this + { fix A assume "A \ sets T" + with X Y have "emeasure (distr M T Y) A = emeasure ?D (space S \ A)" + by (auto simp: emeasure_distr measurable_Pair measurable_space + intro!: arg_cong[where f="emeasure M"]) } + note marginal_eq2 = this + + have eq: "(\x. ereal (Px (fst x) * Py (snd x))) = (\(x, y). ereal (Px x) * ereal (Py y))" + by auto + + have distr_eq: "distr M S X \\<^isub>M distr M T Y = density ?P (\x. ereal (Px (fst x) * Py (snd x)))" + unfolding Px(1)[THEN distributed_distr_eq_density] Py(1)[THEN distributed_distr_eq_density] eq + proof (subst pair_measure_density) + show "(\x. ereal (Px x)) \ borel_measurable S" "(\y. ereal (Py y)) \ borel_measurable T" + "AE x in S. 0 \ ereal (Px x)" "AE y in T. 0 \ ereal (Py y)" + using Px Py by (auto simp: distributed_def) + show "sigma_finite_measure (density S Px)" unfolding Px(1)[THEN distributed_distr_eq_density, symmetric] .. + show "sigma_finite_measure (density T Py)" unfolding Py(1)[THEN distributed_distr_eq_density, symmetric] .. + qed (fact | simp)+ + + have M: "?M = KL_divergence b (density ?P (\x. ereal (Px (fst x) * Py (snd x)))) (density ?P (\x. ereal (Pxy x)))" + unfolding mutual_information_def distr_eq Pxy(1)[THEN distributed_distr_eq_density] .. + + from Px Py have f: "(\x. Px (fst x) * Py (snd x)) \ borel_measurable ?P" + by (intro borel_measurable_times) (auto intro: distributed_real_measurable measurable_fst'' measurable_snd'') + have PxPy_nonneg: "AE x in ?P. 0 \ Px (fst x) * Py (snd x)" + proof (rule ST.AE_pair_measure) + show "{x \ space ?P. 0 \ Px (fst x) * Py (snd x)} \ sets ?P" + using f by auto + show "AE x in S. AE y in T. 0 \ Px (fst (x, y)) * Py (snd (x, y))" + using Px Py by (auto simp: zero_le_mult_iff dest!: distributed_real_AE) + qed + + have "(AE x in ?P. Px (fst x) = 0 \ Pxy x = 0)" + by (rule subdensity_real[OF measurable_fst Pxy Px]) auto + moreover + have "(AE x in ?P. Py (snd x) = 0 \ Pxy x = 0)" + by (rule subdensity_real[OF measurable_snd Pxy Py]) auto + ultimately have ac: "AE x in ?P. Px (fst x) * Py (snd x) = 0 \ Pxy x = 0" + by eventually_elim auto + + show "?M = ?R" + unfolding M f_def + using b_gt_1 f PxPy_nonneg Pxy[THEN distributed_real_measurable] Pxy[THEN distributed_real_AE] ac + by (rule ST.KL_density_density) + + have X: "X = fst \ (\x. (X x, Y x))" and Y: "Y = snd \ (\x. (X x, Y x))" + by auto + + have "integrable (S \\<^isub>M T) (\x. Pxy x * log b (Pxy x) - Pxy x * log b (Px (fst x)) - Pxy x * log b (Py (snd x)))" + using finite_entropy_integrable[OF Fxy] + using finite_entropy_integrable_transform[OF Fx Pxy, of fst] + using finite_entropy_integrable_transform[OF Fy Pxy, of snd] + by simp + moreover have "f \ borel_measurable (S \\<^isub>M T)" + unfolding f_def using Px Py Pxy + by (auto intro: distributed_real_measurable measurable_fst'' measurable_snd'' + intro!: borel_measurable_times borel_measurable_log borel_measurable_divide) + ultimately have int: "integrable (S \\<^isub>M T) f" + apply (rule integrable_cong_AE_imp) + using + distributed_transform_AE[OF measurable_fst ac_fst, of T, OF T Px] + distributed_transform_AE[OF measurable_snd ac_snd, of _ _ _ _ S, OF T Py] + subdensity_real[OF measurable_fst Pxy Px X] + subdensity_real[OF measurable_snd Pxy Py Y] + distributed_real_AE[OF Pxy] + by eventually_elim + (auto simp: f_def log_divide_eq log_mult_eq field_simps zero_less_mult_iff mult_nonneg_nonneg) + + show "0 \ ?M" unfolding M + proof (rule ST.KL_density_density_nonneg + [OF b_gt_1 f PxPy_nonneg _ Pxy[THEN distributed_real_measurable] Pxy[THEN distributed_real_AE] _ ac int[unfolded f_def]]) + show "prob_space (density (S \\<^isub>M T) (\x. ereal (Pxy x))) " + unfolding distributed_distr_eq_density[OF Pxy, symmetric] + using distributed_measurable[OF Pxy] by (rule prob_space_distr) + show "prob_space (density (S \\<^isub>M T) (\x. ereal (Px (fst x) * Py (snd x))))" + unfolding distr_eq[symmetric] by unfold_locales + qed +qed + + +lemma (in information_space) + fixes Pxy :: "'b \ 'c \ real" and Px :: "'b \ real" and Py :: "'c \ real" assumes "sigma_finite_measure S" "sigma_finite_measure T" assumes Px: "distributed M S X Px" and Py: "distributed M T Y Py" assumes Pxy: "distributed M (S \\<^isub>M T) (\x. (X x, Y x)) Pxy" @@ -1047,6 +1270,296 @@ by simp qed +lemma (in information_space) + fixes Px :: "_ \ real" + assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" and P: "sigma_finite_measure P" + assumes Fx: "finite_entropy S X Px" + assumes Fz: "finite_entropy P Z Pz" + assumes Fyz: "finite_entropy (T \\<^isub>M P) (\x. (Y x, Z x)) Pyz" + assumes Fxz: "finite_entropy (S \\<^isub>M P) (\x. (X x, Z x)) Pxz" + assumes Fxyz: "finite_entropy (S \\<^isub>M T \\<^isub>M P) (\x. (X x, Y x, Z x)) Pxyz" + shows conditional_mutual_information_generic_eq': "conditional_mutual_information b S T P X Y Z + = (\(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))) \(S \\<^isub>M T \\<^isub>M P))" (is "?eq") + and conditional_mutual_information_generic_nonneg': "0 \ conditional_mutual_information b S T P X Y Z" (is "?nonneg") +proof - + note Px = Fx[THEN finite_entropy_distributed] + note Pz = Fz[THEN finite_entropy_distributed] + note Pyz = Fyz[THEN finite_entropy_distributed] + note Pxz = Fxz[THEN finite_entropy_distributed] + note Pxyz = Fxyz[THEN finite_entropy_distributed] + + interpret S: sigma_finite_measure S by fact + interpret T: sigma_finite_measure T by fact + interpret P: sigma_finite_measure P by fact + interpret TP: pair_sigma_finite T P .. + interpret SP: pair_sigma_finite S P .. + interpret ST: pair_sigma_finite S T .. + interpret SPT: pair_sigma_finite "S \\<^isub>M P" T .. + interpret STP: pair_sigma_finite S "T \\<^isub>M P" .. + interpret TPS: pair_sigma_finite "T \\<^isub>M P" S .. + have TP: "sigma_finite_measure (T \\<^isub>M P)" .. + have SP: "sigma_finite_measure (S \\<^isub>M P)" .. + have YZ: "random_variable (T \\<^isub>M P) (\x. (Y x, Z x))" + using Pyz by (simp add: distributed_measurable) + + have Pxyz_f: "\M f. f \ measurable M (S \\<^isub>M T \\<^isub>M P) \ (\x. Pxyz (f x)) \ borel_measurable M" + using measurable_comp[OF _ Pxyz[THEN distributed_real_measurable]] by (auto simp: comp_def) + + { fix f g h M + assume f: "f \ measurable M S" and g: "g \ measurable M P" and h: "h \ measurable M (S \\<^isub>M P)" + from measurable_comp[OF h Pxz[THEN distributed_real_measurable]] + measurable_comp[OF f Px[THEN distributed_real_measurable]] + measurable_comp[OF g Pz[THEN distributed_real_measurable]] + have "(\x. log b (Pxz (h x) / (Px (f x) * Pz (g x)))) \ borel_measurable M" + by (simp add: comp_def b_gt_1) } + note borel_log = this + + have measurable_cut: "(\(x, y, z). (x, z)) \ measurable (S \\<^isub>M T \\<^isub>M P) (S \\<^isub>M P)" + by (auto simp add: split_beta' comp_def intro!: measurable_Pair measurable_snd') + + from Pxz Pxyz have distr_eq: "distr M (S \\<^isub>M P) (\x. (X x, Z x)) = + distr (distr M (S \\<^isub>M T \\<^isub>M P) (\x. (X x, Y x, Z x))) (S \\<^isub>M P) (\(x, y, z). (x, z))" + by (subst distr_distr[OF measurable_cut]) (auto dest: distributed_measurable simp: comp_def) + + have "mutual_information b S P X Z = + (\x. Pxz x * log b (Pxz x / (Px (fst x) * Pz (snd x))) \(S \\<^isub>M P))" + by (rule mutual_information_distr[OF S P Px Pz Pxz]) + also have "\ = (\(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) \(S \\<^isub>M T \\<^isub>M P))" + using b_gt_1 Pxz Px Pz + by (subst distributed_transform_integral[OF Pxyz Pxz, where T="\(x, y, z). (x, z)"]) + (auto simp: split_beta' intro!: measurable_Pair measurable_snd' measurable_snd'' measurable_fst'' borel_measurable_times + dest!: distributed_real_measurable) + finally have mi_eq: + "mutual_information b S P X Z = (\(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) \(S \\<^isub>M T \\<^isub>M P))" . + + have ae1: "AE x in S \\<^isub>M T \\<^isub>M P. Px (fst x) = 0 \ Pxyz x = 0" + by (intro subdensity_real[of fst, OF _ Pxyz Px]) auto + moreover have ae2: "AE x in S \\<^isub>M T \\<^isub>M P. Pz (snd (snd x)) = 0 \ Pxyz x = 0" + by (intro subdensity_real[of "\x. snd (snd x)", OF _ Pxyz Pz]) (auto intro: measurable_snd') + moreover have ae3: "AE x in S \\<^isub>M T \\<^isub>M P. Pxz (fst x, snd (snd x)) = 0 \ Pxyz x = 0" + by (intro subdensity_real[of "\x. (fst x, snd (snd x))", OF _ Pxyz Pxz]) (auto intro: measurable_Pair measurable_snd') + moreover have ae4: "AE x in S \\<^isub>M T \\<^isub>M P. Pyz (snd x) = 0 \ Pxyz x = 0" + by (intro subdensity_real[of snd, OF _ Pxyz Pyz]) (auto intro: measurable_Pair) + moreover have ae5: "AE x in S \\<^isub>M T \\<^isub>M P. 0 \ Px (fst x)" + using Px by (intro STP.AE_pair_measure) (auto simp: comp_def intro!: measurable_fst'' dest: distributed_real_AE distributed_real_measurable) + moreover have ae6: "AE x in S \\<^isub>M T \\<^isub>M P. 0 \ Pyz (snd x)" + using Pyz by (intro STP.AE_pair_measure) (auto simp: comp_def intro!: measurable_snd'' dest: distributed_real_AE distributed_real_measurable) + moreover have ae7: "AE x in S \\<^isub>M T \\<^isub>M P. 0 \ Pz (snd (snd x))" + using Pz Pz[THEN distributed_real_measurable] by (auto intro!: measurable_snd'' TP.AE_pair_measure STP.AE_pair_measure AE_I2[of S] dest: distributed_real_AE) + moreover have ae8: "AE x in S \\<^isub>M T \\<^isub>M P. 0 \ Pxz (fst x, snd (snd x))" + using Pxz[THEN distributed_real_AE, THEN SP.AE_pair] + using measurable_comp[OF measurable_Pair[OF measurable_fst measurable_comp[OF measurable_snd measurable_snd]] Pxz[THEN distributed_real_measurable], of T] + using measurable_comp[OF measurable_snd measurable_Pair2[OF Pxz[THEN distributed_real_measurable]], of _ T] + by (auto intro!: TP.AE_pair_measure STP.AE_pair_measure simp: comp_def) + moreover note ae9 = Pxyz[THEN distributed_real_AE] + ultimately have ae: "AE x in S \\<^isub>M T \\<^isub>M P. + Pxyz x * log b (Pxyz x / (Px (fst x) * Pyz (snd x))) - + Pxyz x * log b (Pxz (fst x, snd (snd x)) / (Px (fst x) * Pz (snd (snd x)))) = + Pxyz x * log b (Pxyz x * Pz (snd (snd x)) / (Pxz (fst x, snd (snd x)) * Pyz (snd x))) " + proof eventually_elim + case (goal1 x) + show ?case + proof cases + assume "Pxyz x \ 0" + with goal1 have "0 < Px (fst x)" "0 < Pz (snd (snd x))" "0 < Pxz (fst x, snd (snd x))" "0 < Pyz (snd x)" "0 < Pxyz x" + by auto + then show ?thesis + using b_gt_1 by (simp add: log_simps mult_pos_pos less_imp_le field_simps) + qed simp + qed + + have "integrable (S \\<^isub>M T \\<^isub>M P) + (\x. Pxyz x * log b (Pxyz x) - Pxyz x * log b (Px (fst x)) - Pxyz x * log b (Pyz (snd x)))" + using finite_entropy_integrable[OF Fxyz] + using finite_entropy_integrable_transform[OF Fx Pxyz, of fst] + using finite_entropy_integrable_transform[OF Fyz Pxyz, of snd] + by simp + moreover have "(\(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z)))) \ borel_measurable (S \\<^isub>M T \\<^isub>M P)" + using Pxyz Px Pyz + by (auto intro!: borel_measurable_times measurable_fst'' measurable_snd'' dest!: distributed_real_measurable simp: split_beta') + ultimately have I1: "integrable (S \\<^isub>M T \\<^isub>M P) (\(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z))))" + apply (rule integrable_cong_AE_imp) + using ae1 ae4 ae5 ae6 ae9 + by eventually_elim + (auto simp: log_divide_eq log_mult_eq mult_nonneg_nonneg field_simps zero_less_mult_iff) + + have "integrable (S \\<^isub>M T \\<^isub>M P) + (\x. Pxyz x * log b (Pxz (fst x, snd (snd x))) - Pxyz x * log b (Px (fst x)) - Pxyz x * log b (Pz (snd (snd x))))" + using finite_entropy_integrable_transform[OF Fxz Pxyz, of "\x. (fst x, snd (snd x))"] + using finite_entropy_integrable_transform[OF Fx Pxyz, of fst] + using finite_entropy_integrable_transform[OF Fz Pxyz, of "snd \ snd"] + by (simp add: measurable_Pair measurable_snd'' comp_def) + moreover have "(\(x, y, z). Pxyz (x, y, z) * log b (Pxz (x, z) / (Px x * Pz z))) \ borel_measurable (S \\<^isub>M T \\<^isub>M P)" + using Pxyz Px Pz + by (auto intro!: measurable_compose[OF _ distributed_real_measurable[OF Pxz]] + measurable_Pair borel_measurable_times measurable_fst'' measurable_snd'' + dest!: distributed_real_measurable simp: split_beta') + ultimately have I2: "integrable (S \\<^isub>M T \\<^isub>M P) (\(x, y, z). Pxyz (x, y, z) * log b (Pxz (x, z) / (Px x * Pz z)))" + apply (rule integrable_cong_AE_imp) + using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 ae9 + by eventually_elim + (auto simp: log_divide_eq log_mult_eq mult_nonneg_nonneg field_simps zero_less_mult_iff) + + from ae I1 I2 show ?eq + unfolding conditional_mutual_information_def + apply (subst mi_eq) + apply (subst mutual_information_distr[OF S TP Px Pyz Pxyz]) + apply (subst integral_diff(2)[symmetric]) + apply (auto intro!: integral_cong_AE simp: split_beta' simp del: integral_diff) + done + + let ?P = "density (S \\<^isub>M T \\<^isub>M P) Pxyz" + interpret P: prob_space ?P + unfolding distributed_distr_eq_density[OF Pxyz, symmetric] + using distributed_measurable[OF Pxyz] by (rule prob_space_distr) + + let ?Q = "density (T \\<^isub>M P) Pyz" + interpret Q: prob_space ?Q + unfolding distributed_distr_eq_density[OF Pyz, symmetric] + using distributed_measurable[OF Pyz] by (rule prob_space_distr) + + let ?f = "\(x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) / Pxyz (x, y, z)" + + from subdensity_real[of snd, OF _ Pyz Pz] + have aeX1: "AE x in T \\<^isub>M P. Pz (snd x) = 0 \ Pyz x = 0" by (auto simp: comp_def) + have aeX2: "AE x in T \\<^isub>M P. 0 \ Pz (snd x)" + using Pz by (intro TP.AE_pair_measure) (auto simp: comp_def intro!: measurable_snd'' dest: distributed_real_AE distributed_real_measurable) + + have aeX3: "AE y in T \\<^isub>M P. (\\<^isup>+ x. ereal (Pxz (x, snd y)) \S) = ereal (Pz (snd y))" + using Pz distributed_marginal_eq_joint2[OF P S Pz Pxz] + apply (intro TP.AE_pair_measure) + apply (auto simp: comp_def measurable_split_conv + intro!: measurable_snd'' borel_measurable_ereal_eq borel_measurable_ereal + SP.borel_measurable_positive_integral_snd measurable_compose[OF _ Pxz[THEN distributed_real_measurable]] + measurable_Pair + dest: distributed_real_AE distributed_real_measurable) + done + + note M = borel_measurable_divide borel_measurable_diff borel_measurable_times borel_measurable_ereal + measurable_compose[OF _ measurable_snd] + measurable_Pair + measurable_compose[OF _ Pxyz[THEN distributed_real_measurable]] + measurable_compose[OF _ Pxz[THEN distributed_real_measurable]] + measurable_compose[OF _ Pyz[THEN distributed_real_measurable]] + measurable_compose[OF _ Pz[THEN distributed_real_measurable]] + measurable_compose[OF _ Px[THEN distributed_real_measurable]] + STP.borel_measurable_positive_integral_snd + have "(\\<^isup>+ x. ?f x \?P) \ (\\<^isup>+ (x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) \(S \\<^isub>M T \\<^isub>M P))" + apply (subst positive_integral_density) + apply (rule distributed_borel_measurable[OF Pxyz]) + apply (rule distributed_AE[OF Pxyz]) + apply (auto simp add: borel_measurable_ereal_iff split_beta' intro!: M) [] + apply (rule positive_integral_mono_AE) + using ae5 ae6 ae7 ae8 + apply eventually_elim + apply (auto intro!: divide_nonneg_nonneg mult_nonneg_nonneg) + done + also have "\ = (\\<^isup>+(y, z). \\<^isup>+ x. ereal (Pxz (x, z)) * ereal (Pyz (y, z) / Pz z) \S \T \\<^isub>M P)" + by (subst STP.positive_integral_snd_measurable[symmetric]) + (auto simp add: borel_measurable_ereal_iff split_beta' intro!: M) + also have "\ = (\\<^isup>+x. ereal (Pyz x) * 1 \T \\<^isub>M P)" + apply (rule positive_integral_cong_AE) + using aeX1 aeX2 aeX3 distributed_AE[OF Pyz] AE_space + apply eventually_elim + proof (case_tac x, simp del: times_ereal.simps add: space_pair_measure) + fix a b assume "Pz b = 0 \ Pyz (a, b) = 0" "0 \ Pz b" "a \ space T \ b \ space P" + "(\\<^isup>+ x. ereal (Pxz (x, b)) \S) = ereal (Pz b)" "0 \ Pyz (a, b)" + then show "(\\<^isup>+ x. ereal (Pxz (x, b)) * ereal (Pyz (a, b) / Pz b) \S) = ereal (Pyz (a, b))" + apply (subst positive_integral_multc) + apply (auto intro!: borel_measurable_ereal divide_nonneg_nonneg + measurable_compose[OF _ Pxz[THEN distributed_real_measurable]] measurable_Pair + split: prod.split) + done + qed + also have "\ = 1" + using Q.emeasure_space_1 distributed_AE[OF Pyz] distributed_distr_eq_density[OF Pyz] + by (subst positive_integral_density[symmetric]) (auto intro!: M) + finally have le1: "(\\<^isup>+ x. ?f x \?P) \ 1" . + also have "\ < \" by simp + finally have fin: "(\\<^isup>+ x. ?f x \?P) \ \" by simp + + have pos: "(\\<^isup>+ x. ?f x \?P) \ 0" + apply (subst positive_integral_density) + apply (rule distributed_borel_measurable[OF Pxyz]) + apply (rule distributed_AE[OF Pxyz]) + apply (auto simp add: borel_measurable_ereal_iff split_beta' intro!: M) [] + apply (simp add: split_beta') + proof + let ?g = "\x. ereal (if Pxyz x = 0 then 0 else Pxz (fst x, snd (snd x)) * Pyz (snd x) / Pz (snd (snd x)))" + assume "(\\<^isup>+ x. ?g x \(S \\<^isub>M T \\<^isub>M P)) = 0" + then have "AE x in S \\<^isub>M T \\<^isub>M P. ?g x \ 0" + by (intro positive_integral_0_iff_AE[THEN iffD1]) (auto intro!: M borel_measurable_ereal measurable_If) + then have "AE x in S \\<^isub>M T \\<^isub>M P. Pxyz x = 0" + using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE] + by eventually_elim (auto split: split_if_asm simp: mult_le_0_iff divide_le_0_iff) + then have "(\\<^isup>+ x. ereal (Pxyz x) \S \\<^isub>M T \\<^isub>M P) = 0" + by (subst positive_integral_cong_AE[of _ "\x. 0"]) auto + with P.emeasure_space_1 show False + by (subst (asm) emeasure_density) (auto intro!: M cong: positive_integral_cong) + qed + + have neg: "(\\<^isup>+ x. - ?f x \?P) = 0" + apply (rule positive_integral_0_iff_AE[THEN iffD2]) + apply (auto intro!: M simp: split_beta') [] + apply (subst AE_density) + apply (auto intro!: M simp: split_beta') [] + using ae5 ae6 ae7 ae8 + apply eventually_elim + apply (auto intro!: mult_nonneg_nonneg divide_nonneg_nonneg) + done + + have I3: "integrable (S \\<^isub>M T \\<^isub>M P) (\(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))))" + apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ integral_diff(1)[OF I1 I2]]) + using ae + apply (auto intro!: M simp: split_beta') + done + + have "- log b 1 \ - log b (integral\<^isup>L ?P ?f)" + proof (intro le_imp_neg_le log_le[OF b_gt_1]) + show "0 < integral\<^isup>L ?P ?f" + using neg pos fin positive_integral_positive[of ?P ?f] + by (cases "(\\<^isup>+ x. ?f x \?P)") (auto simp add: lebesgue_integral_def less_le split_beta') + show "integral\<^isup>L ?P ?f \ 1" + using neg le1 fin positive_integral_positive[of ?P ?f] + by (cases "(\\<^isup>+ x. ?f x \?P)") (auto simp add: lebesgue_integral_def split_beta' one_ereal_def) + qed + also have "- log b (integral\<^isup>L ?P ?f) \ (\ x. - log b (?f x) \?P)" + proof (rule P.jensens_inequality[where a=0 and b=1 and I="{0<..}"]) + show "AE x in ?P. ?f x \ {0<..}" + unfolding AE_density[OF distributed_borel_measurable[OF Pxyz]] + using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE] + by eventually_elim (auto simp: divide_pos_pos mult_pos_pos) + show "integrable ?P ?f" + unfolding integrable_def + using fin neg by (auto intro!: M simp: split_beta') + show "integrable ?P (\x. - log b (?f x))" + apply (subst integral_density) + apply (auto intro!: M) [] + apply (auto intro!: M distributed_real_AE[OF Pxyz]) [] + apply (auto intro!: M borel_measurable_uminus borel_measurable_log simp: split_beta') [] + apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ I3]) + apply (auto intro!: M borel_measurable_uminus borel_measurable_log simp: split_beta') [] + apply (auto intro!: M borel_measurable_uminus borel_measurable_log simp: split_beta') [] + using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE] + apply eventually_elim + apply (auto simp: log_divide_eq log_mult_eq zero_le_mult_iff zero_less_mult_iff zero_less_divide_iff field_simps) + done + qed (auto simp: b_gt_1 minus_log_convex) + also have "\ = conditional_mutual_information b S T P X Y Z" + unfolding `?eq` + apply (subst integral_density) + apply (auto intro!: M) [] + apply (auto intro!: M distributed_real_AE[OF Pxyz]) [] + apply (auto intro!: M borel_measurable_uminus borel_measurable_log simp: split_beta') [] + apply (intro integral_cong_AE) + using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE] + apply eventually_elim + apply (auto simp: log_divide_eq zero_less_mult_iff zero_less_divide_iff field_simps) + done + finally show ?nonneg + by simp +qed + lemma (in information_space) conditional_mutual_information_eq: assumes Pz: "simple_distributed M Z Pz" assumes Pyz: "simple_distributed M (\x. (Y x, Z x)) Pyz" @@ -1436,6 +1949,25 @@ finally show ?thesis by auto qed +lemma (in information_space) + fixes Px :: "'b \ real" and Py :: "'c \ real" and Pxy :: "('b \ 'c) \ real" + assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" + assumes Px: "finite_entropy S X Px" and Py: "finite_entropy T Y Py" + assumes Pxy: "finite_entropy (S \\<^isub>M T) (\x. (X x, Y x)) Pxy" + shows "conditional_entropy b S T X Y \ entropy b S X" +proof - + + have "0 \ mutual_information b S T X Y" + by (rule mutual_information_nonneg') fact+ + also have "\ = entropy b S X - conditional_entropy b S T X Y" + apply (rule mutual_information_eq_entropy_conditional_entropy') + using assms + by (auto intro!: finite_entropy_integrable finite_entropy_distributed + finite_entropy_integrable_transform[OF Px] + finite_entropy_integrable_transform[OF Py]) + finally show ?thesis by auto +qed + lemma (in information_space) entropy_chain_rule: assumes X: "simple_function M X" and Y: "simple_function M Y" shows "\(\x. (X x, Y x)) = \(X) + \(Y|X)"