# HG changeset patch # User hoelzl # Date 1298457618 -3600 # Node ID 563bea92b2c05af525a5e5933a1e931a746daf16 # Parent 27cb9113b1a0760c51ae6be97b953a595b697907 add lemma KL_divergence_vimage, mutual_information_generic diff -r 27cb9113b1a0 -r 563bea92b2c0 src/HOL/Probability/Information.thy --- a/src/HOL/Probability/Information.thy Wed Feb 23 11:40:17 2011 +0100 +++ b/src/HOL/Probability/Information.thy Wed Feb 23 11:40:18 2011 +0100 @@ -167,6 +167,53 @@ definition "KL_divergence b M \ = \x. log b (real (RN_deriv M \ x)) \M\measure := \\" +lemma (in sigma_finite_measure) KL_divergence_vimage: + assumes T: "T \ measure_preserving M M'" + and T': "T' \ measure_preserving (M'\ measure := \' \) (M\ measure := \ \)" + and inv: "\x. x \ space M \ T' (T x) = x" + and inv': "\x. x \ space M' \ T (T' x) = x" + and \': "measure_space (M'\measure := \'\)" "measure_space.absolutely_continuous M' \'" + and "1 < b" + shows "KL_divergence b M' \' = KL_divergence b M \" +proof - + interpret \': measure_space "M'\measure := \'\" by fact + have M: "measure_space (M\ measure := \\)" + by (rule \'.measure_space_vimage[OF _ T'], simp) default + have "sigma_algebra (M'\ measure := \'\)" by default + then have saM': "sigma_algebra M'" by simp + then interpret M': measure_space M' by (rule measure_space_vimage) fact + have ac: "absolutely_continuous \" unfolding absolutely_continuous_def + proof safe + fix N assume N: "N \ sets M" and N_0: "\ N = 0" + then have N': "T' -` N \ space M' \ sets M'" + using T' by (auto simp: measurable_def measure_preserving_def) + have "T -` (T' -` N \ space M') \ space M = N" + using inv T N sets_into_space[OF N] by (auto simp: measurable_def measure_preserving_def) + then have "measure M' (T' -` N \ space M') = 0" + using measure_preservingD[OF T N'] N_0 by auto + with \'(2) N' show "\ N = 0" using measure_preservingD[OF T', of N] N + unfolding M'.absolutely_continuous_def measurable_def by auto + qed + + have sa: "sigma_algebra (M\measure := \\)" by simp default + have AE: "AE x. RN_deriv M' \' (T x) = RN_deriv M \ x" + by (rule RN_deriv_vimage[OF T T' inv \']) + show ?thesis + unfolding KL_divergence_def + proof (subst \'.integral_vimage[OF sa T']) + show "(\x. log b (real (RN_deriv M \ x))) \ borel_measurable (M\measure := \\)" + by (auto intro!: RN_deriv[OF M ac] borel_measurable_log[OF _ `1 < b`]) + have "(\ x. log b (real (RN_deriv M' \' x)) \M'\measure := \'\) = + (\ x. log b (real (RN_deriv M' \' (T (T' x)))) \M'\measure := \'\)" (is "?l = _") + using inv' by (auto intro!: \'.integral_cong) + also have "\ = (\ x. log b (real (RN_deriv M \ (T' x))) \M'\measure := \'\)" (is "_ = ?r") + using M ac AE + by (intro \'.integral_cong_AE \'.almost_everywhere_vimage[OF sa T'] absolutely_continuous_AE[OF M]) + (auto elim!: AE_mp) + finally show "?l = ?r" . + qed +qed + lemma (in sigma_finite_measure) KL_divergence_cong: assumes "measure_space (M\measure := \\)" (is "measure_space ?\") assumes [simp]: "sets N = sets M" "space N = space M" @@ -236,18 +283,6 @@ \ space = X`space M, sets = Pow (X`space M), measure = distribution X \ \ space = Y`space M, sets = Pow (Y`space M), measure = distribution Y \ X Y" -lemma algebra_measure_update[simp]: - "algebra (M'\measure := m\) \ algebra M'" - unfolding algebra_def by simp - -lemma sigma_algebra_measure_update[simp]: - "sigma_algebra (M'\measure := m\) \ sigma_algebra M'" - unfolding sigma_algebra_def sigma_algebra_axioms_def by simp - -lemma finite_sigma_algebra_measure_update[simp]: - "finite_sigma_algebra (M'\measure := m\) \ finite_sigma_algebra M'" - unfolding finite_sigma_algebra_def finite_sigma_algebra_axioms_def by simp - lemma (in prob_space) finite_variables_absolutely_continuous: assumes X: "finite_random_variable S X" and Y: "finite_random_variable T Y" shows "measure_space.absolutely_continuous @@ -313,6 +348,32 @@ unfolding mutual_information_def . qed +lemma (in information_space) mutual_information_commute_generic: + assumes X: "random_variable S X" and Y: "random_variable T Y" + assumes ac: "measure_space.absolutely_continuous + (S\measure := distribution X\ \\<^isub>M T\measure := distribution Y\) (joint_distribution X Y)" + shows "mutual_information b S T X Y = mutual_information b T S Y X" +proof - + let ?S = "S\measure := distribution X\" and ?T = "T\measure := distribution Y\" + interpret S: prob_space ?S using X by (rule distribution_prob_space) + interpret T: prob_space ?T using Y by (rule distribution_prob_space) + interpret P: pair_prob_space ?S ?T .. + interpret Q: pair_prob_space ?T ?S .. + show ?thesis + unfolding mutual_information_def + proof (intro Q.KL_divergence_vimage[OF Q.measure_preserving_swap _ _ _ _ ac b_gt_1]) + show "(\(x,y). (y,x)) \ measure_preserving + (P.P \ measure := joint_distribution X Y\) (Q.P \ measure := joint_distribution Y X\)" + using X Y unfolding measurable_def + unfolding measure_preserving_def using P.pair_sigma_algebra_swap_measurable + by (auto simp add: space_pair_measure distribution_def intro!: arg_cong[where f=\]) + have "prob_space (P.P\ measure := joint_distribution X Y\)" + using X Y by (auto intro!: distribution_prob_space random_variable_pairI) + then show "measure_space (P.P\ measure := joint_distribution X Y\)" + unfolding prob_space_def by simp + qed auto +qed + lemma (in information_space) mutual_information_commute: assumes X: "finite_random_variable S X" and Y: "finite_random_variable T Y" shows "mutual_information b S T X Y = mutual_information b T S Y X" @@ -323,7 +384,7 @@ lemma (in information_space) mutual_information_commute_simple: assumes X: "simple_function M X" and Y: "simple_function M Y" shows "\(X;Y) = \(Y;X)" - by (intro X Y simple_function_imp_finite_random_variable mutual_information_commute) + by (intro mutual_information_commute X Y simple_function_imp_finite_random_variable) lemma (in information_space) mutual_information_eq: assumes "simple_function M X" "simple_function M Y"