# HG changeset patch # User hoelzl # Date 1307621074 -7200 # Node ID 60e181c4eae4d6e43eb0a415fb428a4879d067a7 # Parent 9ba256ad678191558fae2f275f68fea03778a873 lemma: independence is equal to mutual information = 0 diff -r 9ba256ad6781 -r 60e181c4eae4 src/HOL/Probability/Independent_Family.thy --- a/src/HOL/Probability/Independent_Family.thy Thu Jun 09 13:55:11 2011 +0200 +++ b/src/HOL/Probability/Independent_Family.thy Thu Jun 09 14:04:34 2011 +0200 @@ -117,6 +117,16 @@ using indep[unfolded indep_set_def, THEN indep_setsD, of UNIV "bool_case a b"] ev by (simp add: ac_simps UNIV_bool) +lemma (in prob_space) indep_var_eq: + "indep_var S X T Y \ + (random_variable S X \ random_variable T Y) \ + indep_set + (sigma_sets (space M) { X -` A \ space M | A. A \ sets S}) + (sigma_sets (space M) { Y -` A \ space M | A. A \ sets T})" + unfolding indep_var_def indep_vars_def indep_set_def UNIV_bool + by (intro arg_cong2[where f="op \"] arg_cong2[where f=indep_sets] ext) + (auto split: bool.split) + lemma (in prob_space) assumes indep: "indep_set A B" shows indep_setD_ev1: "A \ events" @@ -491,7 +501,7 @@ proof (simp add: sigma_algebra_iff2, safe) let ?A = "(\n. sigma_sets (space M) (UNION {n..} A))" interpret A: sigma_algebra "\space = space M, sets = A i\" for i by fact - { fix X x assume "X \ ?A" "x \ X" + { fix X x assume "X \ ?A" "x \ X" then have "\n. X \ sigma_sets (space M) (UNION {n..} A)" by auto from this[of 0] have "X \ sigma_sets (space M) (UNION UNIV A)" by simp then have "X \ space M" @@ -572,7 +582,7 @@ show "Int_stable \space = space M, sets = A m\" unfolding Int_stable_def using A.Int by auto qed - also have "(\b. sigma_sets (space M) (\m\bool_case {..n} {Suc n..} b. A m)) = + also have "(\b. sigma_sets (space M) (\m\bool_case {..n} {Suc n..} b. A m)) = bool_case (sigma_sets (space M) (\m\{..n}. A m)) (sigma_sets (space M) (\m\{Suc n..}. A m))" by (auto intro!: ext split: bool.split) finally have indep: "indep_set (sigma_sets (space M) (\m\{..n}. A m)) (sigma_sets (space M) (\m\{Suc n..}. A m))" @@ -732,7 +742,7 @@ by (auto simp del: vimage_Int intro!: exI[of _ "A \ B"] dest: Int_stableD) qed } note indep_sets_sigma_sets_iff[OF this, simp] - + { fix i assume "i \ I" { fix A assume "A \ sets (M' i)" then have "A \ sets (sigma (M' i))" by (auto simp: sets_sigma intro: sigma_sets.Basic) @@ -745,7 +755,7 @@ "space M \ {X i -` A \ space M |A. A \ sets (M' i)}" by (auto intro!: exI[of _ "space (M' i)"]) } note indep_sets_finite[OF I this, simp] - + have "(\A\\ i\I. {X i -` A \ space M |A. A \ sets (M' i)}. prob (INTER I A) = (\j\I. prob (A j))) = (\A\\ i\I. sets (M' i). prob ((\j\I. X j -` A j) \ space M) = (\x\I. prob (X x -` A x \ space M)))" (is "?L = ?R") @@ -847,7 +857,7 @@ by (simp_all add: product_algebra_def) show "A \ sets (sigma P.G)" using `A \ sets P.P` by (simp add: product_algebra_def) - + fix E assume E: "E \ sets P.G" then have "E \ sets P.P" by (simp add: sets_sigma sigma_sets.Basic product_algebra_def) @@ -915,10 +925,67 @@ finally show ?thesis . qed +lemma (in prob_space) + assumes "indep_var S X T Y" + shows indep_var_rv1: "random_variable S X" + and indep_var_rv2: "random_variable T Y" +proof - + have "\i\UNIV. random_variable (bool_case S T i) (bool_case X Y i)" + using assms unfolding indep_var_def indep_vars_def by auto + then show "random_variable S X" "random_variable T Y" + unfolding UNIV_bool by auto +qed + lemma (in prob_space) indep_var_distributionD: - assumes "indep_var Ma A Mb B" - assumes "Xa \ sets Ma" "Xb \ sets Mb" - shows "joint_distribution A B (Xa \ Xb) = distribution A Xa * distribution B Xb" - unfolding distribution_def using assms by (rule indep_varD) + assumes indep: "indep_var S X T Y" + defines "P \ S\measure := extreal\distribution X\ \\<^isub>M T\measure := extreal\distribution Y\" + assumes "A \ sets P" + shows "joint_distribution X Y A = finite_measure.\' P A" +proof - + from indep have rvs: "random_variable S X" "random_variable T Y" + by (blast dest: indep_var_rv1 indep_var_rv2)+ + + let ?S = "S\measure := extreal\distribution X\" + let ?T = "T\measure := extreal\distribution Y\" + interpret X: prob_space ?S by (rule distribution_prob_space) fact + interpret Y: prob_space ?T by (rule distribution_prob_space) fact + interpret XY: pair_prob_space ?S ?T by default + + let ?J = "XY.P\ measure := extreal \ joint_distribution X Y \" + interpret J: prob_space ?J + by (rule joint_distribution_prob_space) (simp_all add: rvs) + + have "finite_measure.\' (XY.P\ measure := extreal \ joint_distribution X Y \) A = XY.\' A" + proof (rule prob_space_unique_Int_stable) + show "Int_stable (pair_measure_generator ?S ?T)" (is "Int_stable ?P") + by fact + show "space ?P \ sets ?P" + unfolding space_pair_measure[simplified pair_measure_def space_sigma] + using X.top Y.top by (auto intro!: pair_measure_generatorI) + + show "prob_space ?J" by default + show "space ?J = space ?P" + by (simp add: pair_measure_generator_def space_pair_measure) + show "sets ?J = sets (sigma ?P)" + by (simp add: pair_measure_def) + + show "prob_space XY.P" by default + show "space XY.P = space ?P" "sets XY.P = sets (sigma ?P)" + by (simp_all add: pair_measure_generator_def pair_measure_def) + + show "A \ sets (sigma ?P)" + using `A \ sets P` unfolding P_def pair_measure_def by simp + + fix X assume "X \ sets ?P" + then obtain A B where "A \ sets S" "B \ sets T" "X = A \ B" + by (auto simp: sets_pair_measure_generator) + then show "J.\' X = XY.\' X" + unfolding J.\'_def XY.\'_def using indep + by (simp add: XY.pair_measure_times) + (simp add: distribution_def indep_varD) + qed + then show ?thesis + using `A \ sets P` unfolding P_def J.\'_def XY.\'_def by simp +qed end diff -r 9ba256ad6781 -r 60e181c4eae4 src/HOL/Probability/Information.thy --- a/src/HOL/Probability/Information.thy Thu Jun 09 13:55:11 2011 +0200 +++ b/src/HOL/Probability/Information.thy Thu Jun 09 14:04:34 2011 +0200 @@ -7,14 +7,10 @@ theory Information imports - Probability_Measure + Independent_Family "~~/src/HOL/Library/Convex" begin -lemma (in prob_space) not_zero_less_distribution[simp]: - "(\ 0 < distribution X A) \ distribution X A = 0" - using distribution_positive[of X A] by arith - lemma log_le: "1 < a \ 0 < x \ x \ y \ log a x \ log a y" by (subst log_le_cancel_iff) auto @@ -175,7 +171,211 @@ Kullback$-$Leibler distance. *} definition - "KL_divergence b M \ = \x. log b (real (RN_deriv M \ x)) \M\measure := \\" + "entropy_density b M \ = log b \ real \ RN_deriv M \" + +definition + "KL_divergence b M \ = integral\<^isup>L (M\measure := \\) (entropy_density b M \)" + +lemma (in information_space) measurable_entropy_density: + assumes ps: "prob_space (M\measure := \\)" + assumes ac: "absolutely_continuous \" + shows "entropy_density b M \ \ borel_measurable M" +proof - + interpret \: prob_space "M\measure := \\" by fact + have "measure_space (M\measure := \\)" by fact + from RN_deriv[OF this ac] b_gt_1 show ?thesis + unfolding entropy_density_def + by (intro measurable_comp) auto +qed + +lemma (in information_space) KL_gt_0: + assumes ps: "prob_space (M\measure := \\)" + assumes ac: "absolutely_continuous \" + assumes int: "integrable (M\ measure := \ \) (entropy_density b M \)" + assumes A: "A \ sets M" "\ A \ \ A" + shows "0 < KL_divergence b M \" +proof - + interpret \: prob_space "M\measure := \\" by fact + have ms: "measure_space (M\measure := \\)" by default + have fms: "finite_measure (M\measure := \\)" by default + note RN = RN_deriv[OF ms ac] + + from real_RN_deriv[OF fms ac] guess D . note D = this + with absolutely_continuous_AE[OF ms] ac + have D\: "AE x in M\measure := \\. RN_deriv M \ x = extreal (D x)" + by auto + + def f \ "\x. if D x = 0 then 1 else 1 / D x" + with D have f_borel: "f \ borel_measurable M" + by (auto intro!: measurable_If) + + have "KL_divergence b M \ = 1 / ln b * (\ x. ln b * entropy_density b M \ x \M\measure := \\)" + unfolding KL_divergence_def using int b_gt_1 + by (simp add: integral_cmult) + + { fix A assume "A \ sets M" + with RN D have "\.\ A = (\\<^isup>+ x. extreal (D x) * indicator A x \M)" + by (auto intro!: positive_integral_cong_AE) } + note D_density = this + + have ln_entropy: "(\x. ln b * entropy_density b M \ x) \ borel_measurable M" + using measurable_entropy_density[OF ps ac] by auto + + have "integrable (M\measure := \\) (\x. ln b * entropy_density b M \ x)" + using int by auto + moreover have "integrable (M\measure := \\) (\x. ln b * entropy_density b M \ x) \ + integrable M (\x. D x * (ln b * entropy_density b M \ x))" + using D D_density ln_entropy + by (intro integral_translated_density) auto + ultimately have M_int: "integrable M (\x. D x * (ln b * entropy_density b M \ x))" + by simp + + have D_neg: "(\\<^isup>+ x. extreal (- D x) \M) = 0" + using D by (subst positive_integral_0_iff_AE) auto + + have "(\\<^isup>+ x. extreal (D x) \M) = \ (space M)" + using RN D by (auto intro!: positive_integral_cong_AE) + then have D_pos: "(\\<^isup>+ x. extreal (D x) \M) = 1" + using \.measure_space_1 by simp + + have "integrable M D" + using D_pos D_neg D by (auto simp: integrable_def) + + have "integral\<^isup>L M D = 1" + using D_pos D_neg by (auto simp: lebesgue_integral_def) + + let ?D_set = "{x\space M. D x \ 0}" + have [simp, intro]: "?D_set \ sets M" + using D by (auto intro: sets_Collect) + + have "0 \ 1 - \' ?D_set" + using prob_le_1 by (auto simp: field_simps) + also have "\ = (\ x. D x - indicator ?D_set x \M)" + using `integrable M D` `integral\<^isup>L M D = 1` + by (simp add: \'_def) + also have "\ < (\ x. D x * (ln b * entropy_density b M \ x) \M)" + proof (rule integral_less_AE) + show "integrable M (\x. D x - indicator ?D_set x)" + using `integrable M D` + by (intro integral_diff integral_indicator) auto + next + show "integrable M (\x. D x * (ln b * entropy_density b M \ x))" + by fact + next + show "\ {x\space M. D x \ 1 \ D x \ 0} \ 0" + proof + assume eq_0: "\ {x\space M. D x \ 1 \ D x \ 0} = 0" + then have disj: "AE x. D x = 1 \ D x = 0" + using D(1) by (auto intro!: AE_I[OF subset_refl] sets_Collect) + + have "\ {x\space M. D x = 1} = (\\<^isup>+ x. indicator {x\space M. D x = 1} x \M)" + using D(1) by auto + also have "\ = (\\<^isup>+ x. extreal (D x) * indicator {x\space M. D x \ 0} x \M)" + using disj by (auto intro!: positive_integral_cong_AE simp: indicator_def one_extreal_def) + also have "\ = \ {x\space M. D x \ 0}" + using D(1) D_density by auto + also have "\ = \ (space M)" + using D_density D(1) by (auto intro!: positive_integral_cong simp: indicator_def) + finally have "AE x. D x = 1" + using D(1) \.measure_space_1 by (intro AE_I_eq_1) auto + then have "(\\<^isup>+x. indicator A x\M) = (\\<^isup>+x. extreal (D x) * indicator A x\M)" + by (intro positive_integral_cong_AE) (auto simp: one_extreal_def[symmetric]) + also have "\ = \ A" + using `A \ sets M` D_density by simp + finally show False using `A \ sets M` `\ A \ \ A` by simp + qed + show "{x\space M. D x \ 1 \ D x \ 0} \ sets M" + using D(1) by (auto intro: sets_Collect) + + show "AE t. t \ {x\space M. D x \ 1 \ D x \ 0} \ + D t - indicator ?D_set t \ D t * (ln b * entropy_density b M \ t)" + using D(2) + proof (elim AE_mp, safe intro!: AE_I2) + fix t assume Dt: "t \ space M" "D t \ 1" "D t \ 0" + and RN: "RN_deriv M \ t = extreal (D t)" + and eq: "D t - indicator ?D_set t = D t * (ln b * entropy_density b M \ t)" + + have "D t - 1 = D t - indicator ?D_set t" + using Dt by simp + also note eq + also have "D t * (ln b * entropy_density b M \ t) = - D t * ln (1 / D t)" + using RN b_gt_1 `D t \ 0` `0 \ D t` + by (simp add: entropy_density_def log_def ln_div less_le) + finally have "ln (1 / D t) = 1 / D t - 1" + using `D t \ 0` by (auto simp: field_simps) + from ln_eq_minus_one[OF _ this] `D t \ 0` `0 \ D t` `D t \ 1` + show False by auto + qed + + show "AE t. D t - indicator ?D_set t \ D t * (ln b * entropy_density b M \ t)" + using D(2) + proof (elim AE_mp, intro AE_I2 impI) + fix t assume "t \ space M" and RN: "RN_deriv M \ t = extreal (D t)" + show "D t - indicator ?D_set t \ D t * (ln b * entropy_density b M \ t)" + proof cases + assume asm: "D t \ 0" + then have "0 < D t" using `0 \ D t` by auto + then have "0 < 1 / D t" by auto + have "D t - indicator ?D_set t \ - D t * (1 / D t - 1)" + using asm `t \ space M` by (simp add: field_simps) + also have "- D t * (1 / D t - 1) \ - D t * ln (1 / D t)" + using ln_le_minus_one `0 < 1 / D t` by (intro mult_left_mono_neg) auto + also have "\ = D t * (ln b * entropy_density b M \ t)" + using `0 < D t` RN b_gt_1 + by (simp_all add: log_def ln_div entropy_density_def) + finally show ?thesis by simp + qed simp + qed + qed + also have "\ = (\ x. ln b * entropy_density b M \ x \M\measure := \\)" + using D D_density ln_entropy + by (intro integral_translated_density[symmetric]) auto + also have "\ = ln b * (\ x. entropy_density b M \ x \M\measure := \\)" + using int by (rule \.integral_cmult) + finally show "0 < KL_divergence b M \" + using b_gt_1 by (auto simp: KL_divergence_def zero_less_mult_iff) +qed + +lemma (in sigma_finite_measure) KL_eq_0: + assumes eq: "\A\sets M. \ A = measure M A" + shows "KL_divergence b M \ = 0" +proof - + have "AE x. 1 = RN_deriv M \ x" + proof (rule RN_deriv_unique) + show "measure_space (M\measure := \\)" + using eq by (intro measure_space_cong) auto + show "absolutely_continuous \" + unfolding absolutely_continuous_def using eq by auto + show "(\x. 1) \ borel_measurable M" "AE x. 0 \ (1 :: extreal)" by auto + fix A assume "A \ sets M" + with eq show "\ A = \\<^isup>+ x. 1 * indicator A x \M" by simp + qed + then have "AE x. log b (real (RN_deriv M \ x)) = 0" + by (elim AE_mp) simp + from integral_cong_AE[OF this] + have "integral\<^isup>L M (entropy_density b M \) = 0" + by (simp add: entropy_density_def comp_def) + with eq show "KL_divergence b M \ = 0" + unfolding KL_divergence_def + by (subst integral_cong_measure) auto +qed + +lemma (in information_space) KL_eq_0_imp: + assumes ps: "prob_space (M\measure := \\)" + assumes ac: "absolutely_continuous \" + assumes int: "integrable (M\ measure := \ \) (entropy_density b M \)" + assumes KL: "KL_divergence b M \ = 0" + shows "\A\sets M. \ A = \ A" + by (metis less_imp_neq KL_gt_0 assms) + +lemma (in information_space) KL_ge_0: + assumes ps: "prob_space (M\measure := \\)" + assumes ac: "absolutely_continuous \" + assumes int: "integrable (M\ measure := \ \) (entropy_density b M \)" + shows "0 \ KL_divergence b M \" + using KL_eq_0 KL_gt_0[OF ps ac int] + by (cases "\A\sets M. \ A = measure M A") (auto simp: le_less) + lemma (in sigma_finite_measure) KL_divergence_vimage: assumes T: "T \ measure_preserving M M'" @@ -209,7 +409,7 @@ have AE: "AE x. RN_deriv M' \' (T x) = RN_deriv M \ x" by (rule RN_deriv_vimage[OF T T' inv \']) show ?thesis - unfolding KL_divergence_def + unfolding KL_divergence_def entropy_density_def comp_def proof (subst \'.integral_vimage[OF sa T']) show "(\x. log b (real (RN_deriv M \ x))) \ borel_measurable (M\measure := \\)" by (auto intro!: RN_deriv[OF M ac] borel_measurable_log[OF _ `1 < b`]) @@ -233,9 +433,9 @@ proof - interpret \: measure_space ?\ by fact have "KL_divergence b M \ = \x. log b (real (RN_deriv N \' x)) \?\" - by (simp cong: RN_deriv_cong \.integral_cong add: KL_divergence_def) + by (simp cong: RN_deriv_cong \.integral_cong add: KL_divergence_def entropy_density_def) also have "\ = KL_divergence b N \'" - by (auto intro!: \.integral_cong_measure[symmetric] simp: KL_divergence_def) + by (auto intro!: \.integral_cong_measure[symmetric] simp: KL_divergence_def entropy_density_def comp_def) finally show ?thesis . qed @@ -243,7 +443,7 @@ assumes v: "finite_measure_space (M\measure := \\)" assumes ac: "absolutely_continuous \" shows "KL_divergence b M \ = (\x\space M. real (\ {x}) * log b (real (\