# HG changeset patch # User hoelzl # Date 1349863945 -7200 # Node ID d8de705b48d4ca476f40239a056c56549255efb2 # Parent f33d5f009627bf3c42a3b1ea03bab0829bdb7ea9 rule to show that conditional mutual information is non-negative in the continuous case diff -r f33d5f009627 -r d8de705b48d4 src/HOL/Probability/Information.thy --- a/src/HOL/Probability/Information.thy Wed Oct 10 12:12:25 2012 +0200 +++ b/src/HOL/Probability/Information.thy Wed Oct 10 12:12:25 2012 +0200 @@ -22,104 +22,6 @@ "(\x\A \ B. f x) = (\x\A. setsum (\y. f (x, y)) B)" unfolding setsum_cartesian_product by simp -section "Convex theory" - -lemma log_setsum: - assumes "finite s" "s \ {}" - assumes "b > 1" - assumes "(\ i \ s. a i) = 1" - assumes "\ i. i \ s \ a i \ 0" - assumes "\ i. i \ s \ y i \ {0 <..}" - shows "log b (\ i \ s. a i * y i) \ (\ i \ s. a i * log b (y i))" -proof - - have "convex_on {0 <..} (\ x. - log b x)" - by (rule minus_log_convex[OF `b > 1`]) - hence "- log b (\ i \ s. a i * y i) \ (\ i \ s. a i * - log b (y i))" - using convex_on_setsum[of _ _ "\ x. - log b x"] assms pos_is_convex by fastforce - thus ?thesis by (auto simp add:setsum_negf le_imp_neg_le) -qed - -lemma log_setsum': - assumes "finite s" "s \ {}" - assumes "b > 1" - assumes "(\ i \ s. a i) = 1" - assumes pos: "\ i. i \ s \ 0 \ a i" - "\ i. \ i \ s ; 0 < a i \ \ 0 < y i" - shows "log b (\ i \ s. a i * y i) \ (\ i \ s. a i * log b (y i))" -proof - - have "\y. (\ i \ s - {i. a i = 0}. a i * y i) = (\ i \ s. a i * y i)" - using assms by (auto intro!: setsum_mono_zero_cong_left) - moreover have "log b (\ i \ s - {i. a i = 0}. a i * y i) \ (\ i \ s - {i. a i = 0}. a i * log b (y i))" - proof (rule log_setsum) - have "setsum a (s - {i. a i = 0}) = setsum a s" - using assms(1) by (rule setsum_mono_zero_cong_left) auto - thus sum_1: "setsum a (s - {i. a i = 0}) = 1" - "finite (s - {i. a i = 0})" using assms by simp_all - - show "s - {i. a i = 0} \ {}" - proof - assume *: "s - {i. a i = 0} = {}" - hence "setsum a (s - {i. a i = 0}) = 0" by (simp add: * setsum_empty) - with sum_1 show False by simp - qed - - fix i assume "i \ s - {i. a i = 0}" - hence "i \ s" "a i \ 0" by simp_all - thus "0 \ a i" "y i \ {0<..}" using pos[of i] by auto - qed fact+ - ultimately show ?thesis by simp -qed - -lemma log_setsum_divide: - assumes "finite S" and "S \ {}" and "1 < b" - assumes "(\x\S. g x) = 1" - assumes pos: "\x. x \ S \ g x \ 0" "\x. x \ S \ f x \ 0" - assumes g_pos: "\x. \ x \ S ; 0 < g x \ \ 0 < f x" - shows "- (\x\S. g x * log b (g x / f x)) \ log b (\x\S. f x)" -proof - - have log_mono: "\x y. 0 < x \ x \ y \ log b x \ log b y" - using `1 < b` by (subst log_le_cancel_iff) auto - - have "- (\x\S. g x * log b (g x / f x)) = (\x\S. g x * log b (f x / g x))" - proof (unfold setsum_negf[symmetric], rule setsum_cong) - fix x assume x: "x \ S" - show "- (g x * log b (g x / f x)) = g x * log b (f x / g x)" - proof (cases "g x = 0") - case False - with pos[OF x] g_pos[OF x] have "0 < f x" "0 < g x" by simp_all - thus ?thesis using `1 < b` by (simp add: log_divide field_simps) - qed simp - qed rule - also have "... \ log b (\x\S. g x * (f x / g x))" - proof (rule log_setsum') - fix x assume x: "x \ S" "0 < g x" - with g_pos[OF x] show "0 < f x / g x" by (safe intro!: divide_pos_pos) - qed fact+ - also have "... = log b (\x\S - {x. g x = 0}. f x)" using `finite S` - by (auto intro!: setsum_mono_zero_cong_right arg_cong[where f="log b"] - split: split_if_asm) - also have "... \ log b (\x\S. f x)" - proof (rule log_mono) - have "0 = (\x\S - {x. g x = 0}. 0)" by simp - also have "... < (\x\S - {x. g x = 0}. f x)" (is "_ < ?sum") - proof (rule setsum_strict_mono) - show "finite (S - {x. g x = 0})" using `finite S` by simp - show "S - {x. g x = 0} \ {}" - proof - assume "S - {x. g x = 0} = {}" - hence "(\x\S. g x) = 0" by (subst setsum_0') auto - with `(\x\S. g x) = 1` show False by simp - qed - fix x assume "x \ S - {x. g x = 0}" - thus "0 < f x" using g_pos[of x] pos(1)[of x] by auto - qed - finally show "0 < ?sum" . - show "(\x\S - {x. g x = 0}. f x) \ (\x\S. f x)" - using `finite S` pos by (auto intro!: setsum_mono2) - qed - finally show ?thesis . -qed - lemma split_pairs: "((A, B) = X) \ (fst X = A \ snd X = B)" and "(X = (A, B)) \ (fst X = A \ snd X = B)" by auto @@ -831,39 +733,25 @@ assumes X: "simple_distributed M X f" shows "\(X) \ log b (card (X ` space M \ {x. f x \ 0}))" proof - - have "\(X) = (\x\X`space M. f x * log b (1 / f x))" - unfolding entropy_simple_distributed[OF X] setsum_negf[symmetric] - using X by (auto dest: simple_distributed_nonneg intro!: setsum_cong simp: log_simps less_le) - also have "\ \ log b (\x\X`space M. f x * (1 / f x))" - using not_empty b_gt_1 `simple_distributed M X f` - by (intro log_setsum') (auto simp: simple_distributed_nonneg simple_distributed_setsum_space) - also have "\ = log b (\x\X`space M. if f x \ 0 then 1 else 0)" - by (intro arg_cong[where f="\X. log b X"] setsum_cong) auto - finally show ?thesis - using `simple_distributed M X f` by (auto simp: setsum_cases real_eq_of_nat) + let ?X = "count_space (X`space M)" + have "\(X) \ log b (measure ?X {x \ space ?X. f x \ 0})" + by (rule entropy_le[OF simple_distributed[OF X]]) + (simp_all add: simple_distributed_finite[OF X] subset_eq integrable_count_space emeasure_count_space) + also have "measure ?X {x \ space ?X. f x \ 0} = card (X ` space M \ {x. f x \ 0})" + by (simp_all add: simple_distributed_finite[OF X] subset_eq emeasure_count_space measure_def Int_def) + finally show ?thesis . qed lemma (in information_space) entropy_le_card: - assumes "simple_distributed M X f" + assumes X: "simple_distributed M X f" shows "\(X) \ log b (real (card (X ` space M)))" -proof cases - assume "X ` space M \ {x. f x \ 0} = {}" - then have "\x. x\X`space M \ f x = 0" by auto - moreover - have "0 < card (X`space M)" - using `simple_distributed M X f` not_empty by (auto simp: card_gt_0_iff) - then have "log b 1 \ log b (real (card (X`space M)))" - using b_gt_1 by (intro log_le) auto - ultimately show ?thesis using assms by (simp add: entropy_simple_distributed) -next - assume False: "X ` space M \ {x. f x \ 0} \ {}" - have "card (X ` space M \ {x. f x \ 0}) \ card (X ` space M)" - (is "?A \ ?B") using assms not_empty - by (auto intro!: card_mono simp: simple_function_def simple_distributed_def) - note entropy_le_card_not_0[OF assms] - also have "log b (real ?A) \ log b (real ?B)" - using b_gt_1 False not_empty `?A \ ?B` assms - by (auto intro!: log_le simp: card_gt_0_iff simp: simple_distributed_def) +proof - + let ?X = "count_space (X`space M)" + have "\(X) \ log b (measure ?X (space ?X))" + by (rule entropy_le_space[OF simple_distributed[OF X]]) + (simp_all add: simple_distributed_finite[OF X] subset_eq integrable_count_space emeasure_count_space finite_measure_count_space) + also have "measure ?X (space ?X) = card (X ` space M)" + by (simp_all add: simple_distributed_finite[OF X] subset_eq emeasure_count_space measure_def) finally show ?thesis . qed @@ -879,7 +767,18 @@ "\(X ; Y | Z) \ conditional_mutual_information b (count_space (X ` space M)) (count_space (Y ` space M)) (count_space (Z ` space M)) X Y Z" -lemma (in information_space) conditional_mutual_information_generic_eq: +lemma (in pair_sigma_finite) borel_measurable_positive_integral_fst: + "(\(x, y). f x y) \ borel_measurable (M1 \\<^isub>M M2) \ (\x. \\<^isup>+ y. f x y \M2) \ borel_measurable M1" + using positive_integral_fst_measurable(1)[of "\(x, y). f x y"] by simp + +lemma (in pair_sigma_finite) borel_measurable_positive_integral_snd: + assumes "(\(x, y). f x y) \ borel_measurable (M2 \\<^isub>M M1)" shows "(\x. \\<^isup>+ y. f x y \M1) \ borel_measurable M2" +proof - + interpret Q: pair_sigma_finite M2 M1 by default + from Q.borel_measurable_positive_integral_fst assms show ?thesis by simp +qed + +lemma (in information_space) assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T" and P: "sigma_finite_measure P" assumes Px: "distributed M S X Px" assumes Pz: "distributed M P Z Pz" @@ -888,16 +787,19 @@ assumes Pxyz: "distributed M (S \\<^isub>M T \\<^isub>M P) (\x. (X x, Y x, Z x)) Pxyz" assumes I1: "integrable (S \\<^isub>M T \\<^isub>M P) (\(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z))))" assumes I2: "integrable (S \\<^isub>M T \\<^isub>M P) (\(x, y, z). Pxyz (x, y, z) * log b (Pxz (x, z) / (Px x * Pz z)))" - shows "conditional_mutual_information b S T P X Y Z - = (\(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))) \(S \\<^isub>M T \\<^isub>M P))" + shows conditional_mutual_information_generic_eq: "conditional_mutual_information b S T P X Y Z + = (\(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))) \(S \\<^isub>M T \\<^isub>M P))" (is "?eq") + and conditional_mutual_information_generic_nonneg: "0 \ conditional_mutual_information b S T P X Y Z" (is "?nonneg") proof - interpret S: sigma_finite_measure S by fact interpret T: sigma_finite_measure T by fact interpret P: sigma_finite_measure P by fact interpret TP: pair_sigma_finite T P .. interpret SP: pair_sigma_finite S P .. + interpret ST: pair_sigma_finite S T .. interpret SPT: pair_sigma_finite "S \\<^isub>M P" T .. interpret STP: pair_sigma_finite S "T \\<^isub>M P" .. + interpret TPS: pair_sigma_finite "T \\<^isub>M P" S .. have TP: "sigma_finite_measure (T \\<^isub>M P)" .. have SP: "sigma_finite_measure (S \\<^isub>M P)" .. have YZ: "random_variable (T \\<^isub>M P) (\x. (Y x, Z x))" @@ -933,27 +835,27 @@ finally have mi_eq: "mutual_information b S P X Z = (\(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) \(S \\<^isub>M T \\<^isub>M P))" . - have "AE x in S \\<^isub>M T \\<^isub>M P. Px (fst x) = 0 \ Pxyz x = 0" + have ae1: "AE x in S \\<^isub>M T \\<^isub>M P. Px (fst x) = 0 \ Pxyz x = 0" by (intro subdensity_real[of fst, OF _ Pxyz Px]) auto - moreover have "AE x in S \\<^isub>M T \\<^isub>M P. Pz (snd (snd x)) = 0 \ Pxyz x = 0" + moreover have ae2: "AE x in S \\<^isub>M T \\<^isub>M P. Pz (snd (snd x)) = 0 \ Pxyz x = 0" by (intro subdensity_real[of "\x. snd (snd x)", OF _ Pxyz Pz]) (auto intro: measurable_snd') - moreover have "AE x in S \\<^isub>M T \\<^isub>M P. Pxz (fst x, snd (snd x)) = 0 \ Pxyz x = 0" + moreover have ae3: "AE x in S \\<^isub>M T \\<^isub>M P. Pxz (fst x, snd (snd x)) = 0 \ Pxyz x = 0" by (intro subdensity_real[of "\x. (fst x, snd (snd x))", OF _ Pxyz Pxz]) (auto intro: measurable_Pair measurable_snd') - moreover have "AE x in S \\<^isub>M T \\<^isub>M P. Pyz (snd x) = 0 \ Pxyz x = 0" + moreover have ae4: "AE x in S \\<^isub>M T \\<^isub>M P. Pyz (snd x) = 0 \ Pxyz x = 0" by (intro subdensity_real[of snd, OF _ Pxyz Pyz]) (auto intro: measurable_Pair) - moreover have "AE x in S \\<^isub>M T \\<^isub>M P. 0 \ Px (fst x)" + moreover have ae5: "AE x in S \\<^isub>M T \\<^isub>M P. 0 \ Px (fst x)" using Px by (intro STP.AE_pair_measure) (auto simp: comp_def intro!: measurable_fst'' dest: distributed_real_AE distributed_real_measurable) - moreover have "AE x in S \\<^isub>M T \\<^isub>M P. 0 \ Pyz (snd x)" + moreover have ae6: "AE x in S \\<^isub>M T \\<^isub>M P. 0 \ Pyz (snd x)" using Pyz by (intro STP.AE_pair_measure) (auto simp: comp_def intro!: measurable_snd'' dest: distributed_real_AE distributed_real_measurable) - moreover have "AE x in S \\<^isub>M T \\<^isub>M P. 0 \ Pz (snd (snd x))" + moreover have ae7: "AE x in S \\<^isub>M T \\<^isub>M P. 0 \ Pz (snd (snd x))" using Pz Pz[THEN distributed_real_measurable] by (auto intro!: measurable_snd'' TP.AE_pair_measure STP.AE_pair_measure AE_I2[of S] dest: distributed_real_AE) - moreover have "AE x in S \\<^isub>M T \\<^isub>M P. 0 \ Pxz (fst x, snd (snd x))" + moreover have ae8: "AE x in S \\<^isub>M T \\<^isub>M P. 0 \ Pxz (fst x, snd (snd x))" using Pxz[THEN distributed_real_AE, THEN SP.AE_pair] using measurable_comp[OF measurable_Pair[OF measurable_fst measurable_comp[OF measurable_snd measurable_snd]] Pxz[THEN distributed_real_measurable], of T] using measurable_comp[OF measurable_snd measurable_Pair2[OF Pxz[THEN distributed_real_measurable]], of _ T] by (auto intro!: TP.AE_pair_measure STP.AE_pair_measure simp: comp_def) moreover note Pxyz[THEN distributed_real_AE] - ultimately have "AE x in S \\<^isub>M T \\<^isub>M P. + ultimately have ae: "AE x in S \\<^isub>M T \\<^isub>M P. Pxyz x * log b (Pxyz x / (Px (fst x) * Pyz (snd x))) - Pxyz x * log b (Pxz (fst x, snd (snd x)) / (Px (fst x) * Pz (snd (snd x)))) = Pxyz x * log b (Pxyz x * Pz (snd (snd x)) / (Pxz (fst x, snd (snd x)) * Pyz (snd x))) " @@ -968,13 +870,164 @@ using b_gt_1 by (simp add: log_simps mult_pos_pos less_imp_le field_simps) qed simp qed - with I1 I2 show ?thesis + with I1 I2 show ?eq unfolding conditional_mutual_information_def apply (subst mi_eq) apply (subst mutual_information_distr[OF S TP Px Pyz Pxyz]) apply (subst integral_diff(2)[symmetric]) apply (auto intro!: integral_cong_AE simp: split_beta' simp del: integral_diff) done + + let ?P = "density (S \\<^isub>M T \\<^isub>M P) Pxyz" + interpret P: prob_space ?P + unfolding distributed_distr_eq_density[OF Pxyz, symmetric] + using distributed_measurable[OF Pxyz] by (rule prob_space_distr) + + let ?Q = "density (T \\<^isub>M P) Pyz" + interpret Q: prob_space ?Q + unfolding distributed_distr_eq_density[OF Pyz, symmetric] + using distributed_measurable[OF Pyz] by (rule prob_space_distr) + + let ?f = "\(x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) / Pxyz (x, y, z)" + + from subdensity_real[of snd, OF _ Pyz Pz] + have aeX1: "AE x in T \\<^isub>M P. Pz (snd x) = 0 \ Pyz x = 0" by (auto simp: comp_def) + have aeX2: "AE x in T \\<^isub>M P. 0 \ Pz (snd x)" + using Pz by (intro TP.AE_pair_measure) (auto simp: comp_def intro!: measurable_snd'' dest: distributed_real_AE distributed_real_measurable) + + have aeX3: "AE y in T \\<^isub>M P. (\\<^isup>+ x. ereal (Pxz (x, snd y)) \S) = ereal (Pz (snd y))" + using Pz distributed_marginal_eq_joint[OF P S Px Pz Pxz] + apply (intro TP.AE_pair_measure) + apply (auto simp: comp_def measurable_split_conv + intro!: measurable_snd'' borel_measurable_ereal_eq borel_measurable_ereal + SP.borel_measurable_positive_integral_snd measurable_compose[OF _ Pxz[THEN distributed_real_measurable]] + measurable_Pair + dest: distributed_real_AE distributed_real_measurable) + done + + note M = borel_measurable_divide borel_measurable_diff borel_measurable_times borel_measurable_ereal + measurable_compose[OF _ measurable_snd] + measurable_Pair + measurable_compose[OF _ Pxyz[THEN distributed_real_measurable]] + measurable_compose[OF _ Pxz[THEN distributed_real_measurable]] + measurable_compose[OF _ Pyz[THEN distributed_real_measurable]] + measurable_compose[OF _ Pz[THEN distributed_real_measurable]] + measurable_compose[OF _ Px[THEN distributed_real_measurable]] + STP.borel_measurable_positive_integral_snd + have "(\\<^isup>+ x. ?f x \?P) \ (\\<^isup>+ (x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) \(S \\<^isub>M T \\<^isub>M P))" + apply (subst positive_integral_density) + apply (rule distributed_borel_measurable[OF Pxyz]) + apply (rule distributed_AE[OF Pxyz]) + apply (auto simp add: borel_measurable_ereal_iff split_beta' intro!: M) [] + apply (rule positive_integral_mono_AE) + using ae5 ae6 ae7 ae8 + apply eventually_elim + apply (auto intro!: divide_nonneg_nonneg mult_nonneg_nonneg) + done + also have "\ = (\\<^isup>+(y, z). \\<^isup>+ x. ereal (Pxz (x, z)) * ereal (Pyz (y, z) / Pz z) \S \T \\<^isub>M P)" + by (subst STP.positive_integral_snd_measurable[symmetric]) + (auto simp add: borel_measurable_ereal_iff split_beta' intro!: M) + also have "\ = (\\<^isup>+x. ereal (Pyz x) * 1 \T \\<^isub>M P)" + apply (rule positive_integral_cong_AE) + using aeX1 aeX2 aeX3 distributed_AE[OF Pyz] AE_space + apply eventually_elim + proof (case_tac x, simp del: times_ereal.simps add: space_pair_measure) + fix a b assume "Pz b = 0 \ Pyz (a, b) = 0" "0 \ Pz b" "a \ space T \ b \ space P" + "(\\<^isup>+ x. ereal (Pxz (x, b)) \S) = ereal (Pz b)" "0 \ Pyz (a, b)" + then show "(\\<^isup>+ x. ereal (Pxz (x, b)) * ereal (Pyz (a, b) / Pz b) \S) = ereal (Pyz (a, b))" + apply (subst positive_integral_multc) + apply (auto intro!: borel_measurable_ereal divide_nonneg_nonneg + measurable_compose[OF _ Pxz[THEN distributed_real_measurable]] measurable_Pair + split: prod.split) + done + qed + also have "\ = 1" + using Q.emeasure_space_1 distributed_AE[OF Pyz] distributed_distr_eq_density[OF Pyz] + by (subst positive_integral_density[symmetric]) (auto intro!: M) + finally have le1: "(\\<^isup>+ x. ?f x \?P) \ 1" . + also have "\ < \" by simp + finally have fin: "(\\<^isup>+ x. ?f x \?P) \ \" by simp + + have pos: "(\\<^isup>+ x. ?f x \?P) \ 0" + apply (subst positive_integral_density) + apply (rule distributed_borel_measurable[OF Pxyz]) + apply (rule distributed_AE[OF Pxyz]) + apply (auto simp add: borel_measurable_ereal_iff split_beta' intro!: M) [] + apply (simp add: split_beta') + proof + let ?g = "\x. ereal (if Pxyz x = 0 then 0 else Pxz (fst x, snd (snd x)) * Pyz (snd x) / Pz (snd (snd x)))" + assume "(\\<^isup>+ x. ?g x \(S \\<^isub>M T \\<^isub>M P)) = 0" + then have "AE x in S \\<^isub>M T \\<^isub>M P. ?g x \ 0" + by (intro positive_integral_0_iff_AE[THEN iffD1]) (auto intro!: M borel_measurable_ereal measurable_If) + then have "AE x in S \\<^isub>M T \\<^isub>M P. Pxyz x = 0" + using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE] + by eventually_elim (auto split: split_if_asm simp: mult_le_0_iff divide_le_0_iff) + then have "(\\<^isup>+ x. ereal (Pxyz x) \S \\<^isub>M T \\<^isub>M P) = 0" + by (subst positive_integral_cong_AE[of _ "\x. 0"]) auto + with P.emeasure_space_1 show False + by (subst (asm) emeasure_density) (auto intro!: M cong: positive_integral_cong) + qed + + have neg: "(\\<^isup>+ x. - ?f x \?P) = 0" + apply (rule positive_integral_0_iff_AE[THEN iffD2]) + apply (auto intro!: M simp: split_beta') [] + apply (subst AE_density) + apply (auto intro!: M simp: split_beta') [] + using ae5 ae6 ae7 ae8 + apply eventually_elim + apply (auto intro!: mult_nonneg_nonneg divide_nonneg_nonneg) + done + + have I3: "integrable (S \\<^isub>M T \\<^isub>M P) (\(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))))" + apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ integral_diff(1)[OF I1 I2]]) + using ae + apply (auto intro!: M simp: split_beta') + done + + have "- log b 1 \ - log b (integral\<^isup>L ?P ?f)" + proof (intro le_imp_neg_le log_le[OF b_gt_1]) + show "0 < integral\<^isup>L ?P ?f" + using neg pos fin positive_integral_positive[of ?P ?f] + by (cases "(\\<^isup>+ x. ?f x \?P)") (auto simp add: lebesgue_integral_def less_le split_beta') + show "integral\<^isup>L ?P ?f \ 1" + using neg le1 fin positive_integral_positive[of ?P ?f] + by (cases "(\\<^isup>+ x. ?f x \?P)") (auto simp add: lebesgue_integral_def split_beta' one_ereal_def) + qed + also have "- log b (integral\<^isup>L ?P ?f) \ (\ x. - log b (?f x) \?P)" + proof (rule P.jensens_inequality[where a=0 and b=1 and I="{0<..}"]) + show "AE x in ?P. ?f x \ {0<..}" + unfolding AE_density[OF distributed_borel_measurable[OF Pxyz]] + using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE] + by eventually_elim (auto simp: divide_pos_pos mult_pos_pos) + show "integrable ?P ?f" + unfolding integrable_def + using fin neg by (auto intro!: M simp: split_beta') + show "integrable ?P (\x. - log b (?f x))" + apply (subst integral_density) + apply (auto intro!: M) [] + apply (auto intro!: M distributed_real_AE[OF Pxyz]) [] + apply (auto intro!: M borel_measurable_uminus borel_measurable_log simp: split_beta') [] + apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ I3]) + apply (auto intro!: M borel_measurable_uminus borel_measurable_log simp: split_beta') [] + apply (auto intro!: M borel_measurable_uminus borel_measurable_log simp: split_beta') [] + using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE] + apply eventually_elim + apply (auto simp: log_divide_eq log_mult_eq zero_le_mult_iff zero_less_mult_iff zero_less_divide_iff field_simps) + done + qed (auto simp: b_gt_1 minus_log_convex) + also have "\ = conditional_mutual_information b S T P X Y Z" + unfolding `?eq` + apply (subst integral_density) + apply (auto intro!: M) [] + apply (auto intro!: M distributed_real_AE[OF Pxyz]) [] + apply (auto intro!: M borel_measurable_uminus borel_measurable_log simp: split_beta') [] + apply (intro integral_cong_AE) + using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE] + apply eventually_elim + apply (auto simp: log_divide_eq zero_less_mult_iff zero_less_divide_iff field_simps) + done + finally show ?nonneg + by simp qed lemma (in information_space) conditional_mutual_information_eq: @@ -1031,86 +1084,21 @@ assumes X: "simple_function M X" and Y: "simple_function M Y" and Z: "simple_function M Z" shows "0 \ \(X ; Y | Z)" proof - - def Pz \ "\x. if x \ Z`space M then measure M (Z -` {x} \ space M) else 0" - def Pxz \ "\x. if x \ (\x. (X x, Z x))`space M then measure M ((\x. (X x, Z x)) -` {x} \ space M) else 0" - def Pyz \ "\x. if x \ (\x. (Y x, Z x))`space M then measure M ((\x. (Y x, Z x)) -` {x} \ space M) else 0" - def Pxyz \ "\x. if x \ (\x. (X x, Y x, Z x))`space M then measure M ((\x. (X x, Y x, Z x)) -` {x} \ space M) else 0" - let ?M = "X`space M \ Y`space M \ Z`space M" - - note XZ = simple_function_Pair[OF X Z] - note YZ = simple_function_Pair[OF Y Z] - note XYZ = simple_function_Pair[OF X simple_function_Pair[OF Y Z]] - have Pz: "simple_distributed M Z Pz" - using Z by (rule simple_distributedI) (auto simp: Pz_def) - have Pxz: "simple_distributed M (\x. (X x, Z x)) Pxz" - using XZ by (rule simple_distributedI) (auto simp: Pxz_def) - have Pyz: "simple_distributed M (\x. (Y x, Z x)) Pyz" - using YZ by (rule simple_distributedI) (auto simp: Pyz_def) - have Pxyz: "simple_distributed M (\x. (X x, Y x, Z x)) Pxyz" - using XYZ by (rule simple_distributedI) (auto simp: Pxyz_def) - - { fix z assume z: "z \ Z ` space M" then have "(\x\X ` space M. Pxz (x, z)) = Pz z" - using distributed_marginal_eq_joint_simple[OF X Pz Pxz z] - by (auto intro!: setsum_cong simp: Pxz_def) } - note marginal1 = this - - { fix z assume z: "z \ Z ` space M" then have "(\y\Y ` space M. Pyz (y, z)) = Pz z" - using distributed_marginal_eq_joint_simple[OF Y Pz Pyz z] - by (auto intro!: setsum_cong simp: Pyz_def) } - note marginal2 = this - - have "- \(X ; Y | Z) = - (\(x, y, z) \ ?M. Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))))" - unfolding conditional_mutual_information_eq[OF Pz Pyz Pxz Pxyz] - using X Y Z by (auto intro!: setsum_mono_zero_left simp: Pxyz_def simple_functionD) - also have "\ \ log b (\(x, y, z) \ ?M. Pxz (x, z) * (Pyz (y,z) / Pz z))" - unfolding split_beta' - proof (rule log_setsum_divide) - show "?M \ {}" using not_empty by simp - show "1 < b" using b_gt_1 . - - show "finite ?M" using X Y Z by (auto simp: simple_functionD) - - then show "(\x\?M. Pxyz (fst x, fst (snd x), snd (snd x))) = 1" - apply (subst Pxyz[THEN simple_distributed_setsum_space, symmetric]) - apply simp - apply (intro setsum_mono_zero_right) - apply (auto simp: Pxyz_def) - done - let ?N = "(\x. (X x, Y x, Z x)) ` space M" - fix x assume x: "x \ ?M" - let ?Q = "Pxyz (fst x, fst (snd x), snd (snd x))" - let ?P = "Pxz (fst x, snd (snd x)) * (Pyz (fst (snd x), snd (snd x)) / Pz (snd (snd x)))" - from x show "0 \ ?Q" "0 \ ?P" - using Pxyz[THEN simple_distributed, THEN distributed_real_AE] - using Pxz[THEN simple_distributed, THEN distributed_real_AE] - using Pyz[THEN simple_distributed, THEN distributed_real_AE] - using Pz[THEN simple_distributed, THEN distributed_real_AE] - by (auto intro!: mult_nonneg_nonneg divide_nonneg_nonneg simp: AE_count_space Pxyz_def Pxz_def Pyz_def Pz_def) - moreover assume "0 < ?Q" - moreover have "AE x in count_space ?N. Pz (snd (snd x)) = 0 \ Pxyz x = 0" - by (intro subdensity_real[of "\x. snd (snd x)", OF _ Pxyz[THEN simple_distributed] Pz[THEN simple_distributed]]) (auto intro: measurable_snd') - then have "\x. Pz (snd (snd x)) = 0 \ Pxyz x = 0" - by (auto simp: Pz_def Pxyz_def AE_count_space) - moreover have "AE x in count_space ?N. Pxz (fst x, snd (snd x)) = 0 \ Pxyz x = 0" - by (intro subdensity_real[of "\x. (fst x, snd (snd x))", OF _ Pxyz[THEN simple_distributed] Pxz[THEN simple_distributed]]) (auto intro: measurable_Pair measurable_snd') - then have "\x. Pxz (fst x, snd (snd x)) = 0 \ Pxyz x = 0" - by (auto simp: Pz_def Pxyz_def AE_count_space) - moreover have "AE x in count_space ?N. Pyz (snd x) = 0 \ Pxyz x = 0" - by (intro subdensity_real[of snd, OF _ Pxyz[THEN simple_distributed] Pyz[THEN simple_distributed]]) (auto intro: measurable_Pair) - then have "\x. Pyz (snd x) = 0 \ Pxyz x = 0" - by (auto simp: Pz_def Pxyz_def AE_count_space) - ultimately show "0 < ?P" using x by (auto intro!: divide_pos_pos mult_pos_pos simp: less_le) - qed - also have "(\(x, y, z) \ ?M. Pxz (x, z) * (Pyz (y,z) / Pz z)) = (\z\Z`space M. Pz z)" - apply (simp add: setsum_cartesian_product') - apply (subst setsum_commute) - apply (subst (2) setsum_commute) - apply (auto simp: setsum_divide_distrib[symmetric] setsum_product[symmetric] marginal1 marginal2 - intro!: setsum_cong) - done - also have "log b (\z\Z`space M. Pz z) = 0" - using Pz[THEN simple_distributed_setsum_space] by simp - finally show ?thesis by simp + have [simp]: "count_space (X ` space M) \\<^isub>M count_space (Y ` space M) \\<^isub>M count_space (Z ` space M) = + count_space (X`space M \ Y`space M \ Z`space M)" + by (simp add: pair_measure_count_space X Y Z simple_functionD) + note sf = sigma_finite_measure_count_space_finite[OF simple_functionD(1)] + note sd = simple_distributedI[OF _ refl] + note sp = simple_function_Pair + show ?thesis + apply (rule conditional_mutual_information_generic_nonneg[OF sf[OF X] sf[OF Y] sf[OF Z]]) + apply (rule simple_distributed[OF sd[OF X]]) + apply (rule simple_distributed[OF sd[OF Z]]) + apply (rule simple_distributed_joint[OF sd[OF sp[OF Y Z]]]) + apply (rule simple_distributed_joint[OF sd[OF sp[OF X Z]]]) + apply (rule simple_distributed_joint2[OF sd[OF sp[OF X sp[OF Y Z]]]]) + apply (auto intro!: integrable_count_space simp: X Y Z simple_functionD) + done qed subsection {* Conditional Entropy *}