diff r 27cb9113b1a0 r 563bea92b2c0 src/HOL/Probability/Information.thy
 a/src/HOL/Probability/Information.thy Wed Feb 23 11:40:17 2011 +0100
+++ b/src/HOL/Probability/Information.thy Wed Feb 23 11:40:18 2011 +0100
@@ 167,6 +167,53 @@
definition
"KL_divergence b M \ = \x. log b (real (RN_deriv M \ x)) \M\measure := \\"
+lemma (in sigma_finite_measure) KL_divergence_vimage:
+ assumes T: "T \ measure_preserving M M'"
+ and T': "T' \ measure_preserving (M'\ measure := \' \) (M\ measure := \ \)"
+ and inv: "\x. x \ space M \ T' (T x) = x"
+ and inv': "\x. x \ space M' \ T (T' x) = x"
+ and \': "measure_space (M'\measure := \'\)" "measure_space.absolutely_continuous M' \'"
+ and "1 < b"
+ shows "KL_divergence b M' \' = KL_divergence b M \"
+proof 
+ interpret \': measure_space "M'\measure := \'\" by fact
+ have M: "measure_space (M\ measure := \\)"
+ by (rule \'.measure_space_vimage[OF _ T'], simp) default
+ have "sigma_algebra (M'\ measure := \'\)" by default
+ then have saM': "sigma_algebra M'" by simp
+ then interpret M': measure_space M' by (rule measure_space_vimage) fact
+ have ac: "absolutely_continuous \" unfolding absolutely_continuous_def
+ proof safe
+ fix N assume N: "N \ sets M" and N_0: "\ N = 0"
+ then have N': "T' ` N \ space M' \ sets M'"
+ using T' by (auto simp: measurable_def measure_preserving_def)
+ have "T ` (T' ` N \ space M') \ space M = N"
+ using inv T N sets_into_space[OF N] by (auto simp: measurable_def measure_preserving_def)
+ then have "measure M' (T' ` N \ space M') = 0"
+ using measure_preservingD[OF T N'] N_0 by auto
+ with \'(2) N' show "\ N = 0" using measure_preservingD[OF T', of N] N
+ unfolding M'.absolutely_continuous_def measurable_def by auto
+ qed
+
+ have sa: "sigma_algebra (M\measure := \\)" by simp default
+ have AE: "AE x. RN_deriv M' \' (T x) = RN_deriv M \ x"
+ by (rule RN_deriv_vimage[OF T T' inv \'])
+ show ?thesis
+ unfolding KL_divergence_def
+ proof (subst \'.integral_vimage[OF sa T'])
+ show "(\x. log b (real (RN_deriv M \ x))) \ borel_measurable (M\measure := \\)"
+ by (auto intro!: RN_deriv[OF M ac] borel_measurable_log[OF _ `1 < b`])
+ have "(\ x. log b (real (RN_deriv M' \' x)) \M'\measure := \'\) =
+ (\ x. log b (real (RN_deriv M' \' (T (T' x)))) \M'\measure := \'\)" (is "?l = _")
+ using inv' by (auto intro!: \'.integral_cong)
+ also have "\ = (\ x. log b (real (RN_deriv M \ (T' x))) \M'\measure := \'\)" (is "_ = ?r")
+ using M ac AE
+ by (intro \'.integral_cong_AE \'.almost_everywhere_vimage[OF sa T'] absolutely_continuous_AE[OF M])
+ (auto elim!: AE_mp)
+ finally show "?l = ?r" .
+ qed
+qed
+
lemma (in sigma_finite_measure) KL_divergence_cong:
assumes "measure_space (M\measure := \\)" (is "measure_space ?\")
assumes [simp]: "sets N = sets M" "space N = space M"
@@ 236,18 +283,6 @@
\ space = X`space M, sets = Pow (X`space M), measure = distribution X \
\ space = Y`space M, sets = Pow (Y`space M), measure = distribution Y \ X Y"
lemma algebra_measure_update[simp]:
 "algebra (M'\measure := m\) \ algebra M'"
 unfolding algebra_def by simp

lemma sigma_algebra_measure_update[simp]:
 "sigma_algebra (M'\measure := m\) \ sigma_algebra M'"
 unfolding sigma_algebra_def sigma_algebra_axioms_def by simp

lemma finite_sigma_algebra_measure_update[simp]:
 "finite_sigma_algebra (M'\measure := m\) \ finite_sigma_algebra M'"
 unfolding finite_sigma_algebra_def finite_sigma_algebra_axioms_def by simp

lemma (in prob_space) finite_variables_absolutely_continuous:
assumes X: "finite_random_variable S X" and Y: "finite_random_variable T Y"
shows "measure_space.absolutely_continuous
@@ 313,6 +348,32 @@
unfolding mutual_information_def .
qed
+lemma (in information_space) mutual_information_commute_generic:
+ assumes X: "random_variable S X" and Y: "random_variable T Y"
+ assumes ac: "measure_space.absolutely_continuous
+ (S\measure := distribution X\ \\<^isub>M T\measure := distribution Y\) (joint_distribution X Y)"
+ shows "mutual_information b S T X Y = mutual_information b T S Y X"
+proof 
+ let ?S = "S\measure := distribution X\" and ?T = "T\measure := distribution Y\"
+ interpret S: prob_space ?S using X by (rule distribution_prob_space)
+ interpret T: prob_space ?T using Y by (rule distribution_prob_space)
+ interpret P: pair_prob_space ?S ?T ..
+ interpret Q: pair_prob_space ?T ?S ..
+ show ?thesis
+ unfolding mutual_information_def
+ proof (intro Q.KL_divergence_vimage[OF Q.measure_preserving_swap _ _ _ _ ac b_gt_1])
+ show "(\(x,y). (y,x)) \ measure_preserving
+ (P.P \ measure := joint_distribution X Y\) (Q.P \ measure := joint_distribution Y X\)"
+ using X Y unfolding measurable_def
+ unfolding measure_preserving_def using P.pair_sigma_algebra_swap_measurable
+ by (auto simp add: space_pair_measure distribution_def intro!: arg_cong[where f=\])
+ have "prob_space (P.P\ measure := joint_distribution X Y\)"
+ using X Y by (auto intro!: distribution_prob_space random_variable_pairI)
+ then show "measure_space (P.P\ measure := joint_distribution X Y\)"
+ unfolding prob_space_def by simp
+ qed auto
+qed
+
lemma (in information_space) mutual_information_commute:
assumes X: "finite_random_variable S X" and Y: "finite_random_variable T Y"
shows "mutual_information b S T X Y = mutual_information b T S Y X"
@@ 323,7 +384,7 @@
lemma (in information_space) mutual_information_commute_simple:
assumes X: "simple_function M X" and Y: "simple_function M Y"
shows "\(X;Y) = \(Y;X)"
 by (intro X Y simple_function_imp_finite_random_variable mutual_information_commute)
+ by (intro mutual_information_commute X Y simple_function_imp_finite_random_variable)
lemma (in information_space) mutual_information_eq:
assumes "simple_function M X" "simple_function M Y"