add lemma KL_divergence_vimage, mutual_information_generic
authorhoelzl
Wed Feb 23 11:40:18 2011 +0100 (2011-02-23)
changeset 41833563bea92b2c0
parent 41832 27cb9113b1a0
child 41834 2f8f2685e0c0
add lemma KL_divergence_vimage, mutual_information_generic
src/HOL/Probability/Information.thy
     1.1 --- a/src/HOL/Probability/Information.thy	Wed Feb 23 11:40:17 2011 +0100
     1.2 +++ b/src/HOL/Probability/Information.thy	Wed Feb 23 11:40:18 2011 +0100
     1.3 @@ -167,6 +167,53 @@
     1.4  definition
     1.5    "KL_divergence b M \<nu> = \<integral>x. log b (real (RN_deriv M \<nu> x)) \<partial>M\<lparr>measure := \<nu>\<rparr>"
     1.6  
     1.7 +lemma (in sigma_finite_measure) KL_divergence_vimage:
     1.8 +  assumes T: "T \<in> measure_preserving M M'"
     1.9 +    and T': "T' \<in> measure_preserving (M'\<lparr> measure := \<nu>' \<rparr>) (M\<lparr> measure := \<nu> \<rparr>)"
    1.10 +    and inv: "\<And>x. x \<in> space M \<Longrightarrow> T' (T x) = x"
    1.11 +    and inv': "\<And>x. x \<in> space M' \<Longrightarrow> T (T' x) = x"
    1.12 +  and \<nu>': "measure_space (M'\<lparr>measure := \<nu>'\<rparr>)" "measure_space.absolutely_continuous M' \<nu>'"
    1.13 +  and "1 < b"
    1.14 +  shows "KL_divergence b M' \<nu>' = KL_divergence b M \<nu>"
    1.15 +proof -
    1.16 +  interpret \<nu>': measure_space "M'\<lparr>measure := \<nu>'\<rparr>" by fact
    1.17 +  have M: "measure_space (M\<lparr> measure := \<nu>\<rparr>)"
    1.18 +    by (rule \<nu>'.measure_space_vimage[OF _ T'], simp) default
    1.19 +  have "sigma_algebra (M'\<lparr> measure := \<nu>'\<rparr>)" by default
    1.20 +  then have saM': "sigma_algebra M'" by simp
    1.21 +  then interpret M': measure_space M' by (rule measure_space_vimage) fact
    1.22 +  have ac: "absolutely_continuous \<nu>" unfolding absolutely_continuous_def
    1.23 +  proof safe
    1.24 +    fix N assume N: "N \<in> sets M" and N_0: "\<mu> N = 0"
    1.25 +    then have N': "T' -` N \<inter> space M' \<in> sets M'"
    1.26 +      using T' by (auto simp: measurable_def measure_preserving_def)
    1.27 +    have "T -` (T' -` N \<inter> space M') \<inter> space M = N"
    1.28 +      using inv T N sets_into_space[OF N] by (auto simp: measurable_def measure_preserving_def)
    1.29 +    then have "measure M' (T' -` N \<inter> space M') = 0"
    1.30 +      using measure_preservingD[OF T N'] N_0 by auto
    1.31 +    with \<nu>'(2) N' show "\<nu> N = 0" using measure_preservingD[OF T', of N] N
    1.32 +      unfolding M'.absolutely_continuous_def measurable_def by auto
    1.33 +  qed
    1.34 +
    1.35 +  have sa: "sigma_algebra (M\<lparr>measure := \<nu>\<rparr>)" by simp default
    1.36 +  have AE: "AE x. RN_deriv M' \<nu>' (T x) = RN_deriv M \<nu> x"
    1.37 +    by (rule RN_deriv_vimage[OF T T' inv \<nu>'])
    1.38 +  show ?thesis
    1.39 +    unfolding KL_divergence_def
    1.40 +  proof (subst \<nu>'.integral_vimage[OF sa T'])
    1.41 +    show "(\<lambda>x. log b (real (RN_deriv M \<nu> x))) \<in> borel_measurable (M\<lparr>measure := \<nu>\<rparr>)"
    1.42 +      by (auto intro!: RN_deriv[OF M ac] borel_measurable_log[OF _ `1 < b`])
    1.43 +    have "(\<integral> x. log b (real (RN_deriv M' \<nu>' x)) \<partial>M'\<lparr>measure := \<nu>'\<rparr>) =
    1.44 +      (\<integral> x. log b (real (RN_deriv M' \<nu>' (T (T' x)))) \<partial>M'\<lparr>measure := \<nu>'\<rparr>)" (is "?l = _")
    1.45 +      using inv' by (auto intro!: \<nu>'.integral_cong)
    1.46 +    also have "\<dots> = (\<integral> x. log b (real (RN_deriv M \<nu> (T' x))) \<partial>M'\<lparr>measure := \<nu>'\<rparr>)" (is "_ = ?r")
    1.47 +      using M ac AE
    1.48 +      by (intro \<nu>'.integral_cong_AE \<nu>'.almost_everywhere_vimage[OF sa T'] absolutely_continuous_AE[OF M])
    1.49 +         (auto elim!: AE_mp)
    1.50 +    finally show "?l = ?r" .
    1.51 +  qed
    1.52 +qed
    1.53 +
    1.54  lemma (in sigma_finite_measure) KL_divergence_cong:
    1.55    assumes "measure_space (M\<lparr>measure := \<nu>\<rparr>)" (is "measure_space ?\<nu>")
    1.56    assumes [simp]: "sets N = sets M" "space N = space M"
    1.57 @@ -236,18 +283,6 @@
    1.58      \<lparr> space = X`space M, sets = Pow (X`space M), measure = distribution X \<rparr>
    1.59      \<lparr> space = Y`space M, sets = Pow (Y`space M), measure = distribution Y \<rparr> X Y"
    1.60  
    1.61 -lemma algebra_measure_update[simp]:
    1.62 -  "algebra (M'\<lparr>measure := m\<rparr>) \<longleftrightarrow> algebra M'"
    1.63 -  unfolding algebra_def by simp
    1.64 -
    1.65 -lemma sigma_algebra_measure_update[simp]:
    1.66 -  "sigma_algebra (M'\<lparr>measure := m\<rparr>) \<longleftrightarrow> sigma_algebra M'"
    1.67 -  unfolding sigma_algebra_def sigma_algebra_axioms_def by simp
    1.68 -
    1.69 -lemma finite_sigma_algebra_measure_update[simp]:
    1.70 -  "finite_sigma_algebra (M'\<lparr>measure := m\<rparr>) \<longleftrightarrow> finite_sigma_algebra M'"
    1.71 -  unfolding finite_sigma_algebra_def finite_sigma_algebra_axioms_def by simp
    1.72 -
    1.73  lemma (in prob_space) finite_variables_absolutely_continuous:
    1.74    assumes X: "finite_random_variable S X" and Y: "finite_random_variable T Y"
    1.75    shows "measure_space.absolutely_continuous
    1.76 @@ -313,6 +348,32 @@
    1.77      unfolding mutual_information_def .
    1.78  qed
    1.79  
    1.80 +lemma (in information_space) mutual_information_commute_generic:
    1.81 +  assumes X: "random_variable S X" and Y: "random_variable T Y"
    1.82 +  assumes ac: "measure_space.absolutely_continuous
    1.83 +    (S\<lparr>measure := distribution X\<rparr> \<Otimes>\<^isub>M T\<lparr>measure := distribution Y\<rparr>) (joint_distribution X Y)"
    1.84 +  shows "mutual_information b S T X Y = mutual_information b T S Y X"
    1.85 +proof -
    1.86 +  let ?S = "S\<lparr>measure := distribution X\<rparr>" and ?T = "T\<lparr>measure := distribution Y\<rparr>"
    1.87 +  interpret S: prob_space ?S using X by (rule distribution_prob_space)
    1.88 +  interpret T: prob_space ?T using Y by (rule distribution_prob_space)
    1.89 +  interpret P: pair_prob_space ?S ?T ..
    1.90 +  interpret Q: pair_prob_space ?T ?S ..
    1.91 +  show ?thesis
    1.92 +    unfolding mutual_information_def
    1.93 +  proof (intro Q.KL_divergence_vimage[OF Q.measure_preserving_swap _ _ _ _ ac b_gt_1])
    1.94 +    show "(\<lambda>(x,y). (y,x)) \<in> measure_preserving
    1.95 +      (P.P \<lparr> measure := joint_distribution X Y\<rparr>) (Q.P \<lparr> measure := joint_distribution Y X\<rparr>)"
    1.96 +      using X Y unfolding measurable_def
    1.97 +      unfolding measure_preserving_def using P.pair_sigma_algebra_swap_measurable
    1.98 +      by (auto simp add: space_pair_measure distribution_def intro!: arg_cong[where f=\<mu>])
    1.99 +    have "prob_space (P.P\<lparr> measure := joint_distribution X Y\<rparr>)"
   1.100 +      using X Y by (auto intro!: distribution_prob_space random_variable_pairI)
   1.101 +    then show "measure_space (P.P\<lparr> measure := joint_distribution X Y\<rparr>)"
   1.102 +      unfolding prob_space_def by simp
   1.103 +  qed auto
   1.104 +qed
   1.105 +
   1.106  lemma (in information_space) mutual_information_commute:
   1.107    assumes X: "finite_random_variable S X" and Y: "finite_random_variable T Y"
   1.108    shows "mutual_information b S T X Y = mutual_information b T S Y X"
   1.109 @@ -323,7 +384,7 @@
   1.110  lemma (in information_space) mutual_information_commute_simple:
   1.111    assumes X: "simple_function M X" and Y: "simple_function M Y"
   1.112    shows "\<I>(X;Y) = \<I>(Y;X)"
   1.113 -  by (intro X Y simple_function_imp_finite_random_variable mutual_information_commute)
   1.114 +  by (intro mutual_information_commute X Y simple_function_imp_finite_random_variable)
   1.115  
   1.116  lemma (in information_space) mutual_information_eq:
   1.117    assumes "simple_function M X" "simple_function M Y"