alternative definition of conditional entropy
authorhoelzl
Wed Oct 10 12:12:28 2012 +0200 (2012-10-10)
changeset 49791e0854abfb3fc
parent 49790 6b9b9ebba47d
child 49792 43f49922811d
alternative definition of conditional entropy
src/HOL/Probability/Information.thy
     1.1 --- a/src/HOL/Probability/Information.thy	Wed Oct 10 12:12:27 2012 +0200
     1.2 +++ b/src/HOL/Probability/Information.thy	Wed Oct 10 12:12:28 2012 +0200
     1.3 @@ -566,21 +566,31 @@
     1.4    entropy_Pow ("\<H>'(_')") where
     1.5    "\<H>(X) \<equiv> entropy b (count_space (X`space M)) X"
     1.6  
     1.7 +lemma (in prob_space) distributed_RN_deriv:
     1.8 +  assumes X: "distributed M S X Px"
     1.9 +  shows "AE x in S. RN_deriv S (density S Px) x = Px x"
    1.10 +proof -
    1.11 +  note D = distributed_measurable[OF X] distributed_borel_measurable[OF X] distributed_AE[OF X]
    1.12 +  interpret X: prob_space "distr M S X"
    1.13 +    using D(1) by (rule prob_space_distr)
    1.14 +
    1.15 +  have sf: "sigma_finite_measure (distr M S X)" by default
    1.16 +  show ?thesis
    1.17 +    using D
    1.18 +    apply (subst eq_commute)
    1.19 +    apply (intro RN_deriv_unique_sigma_finite)
    1.20 +    apply (auto intro: divide_nonneg_nonneg measure_nonneg
    1.21 +             simp: distributed_distr_eq_density[symmetric, OF X] sf)
    1.22 +    done
    1.23 +qed
    1.24 +
    1.25  lemma (in information_space)
    1.26    fixes X :: "'a \<Rightarrow> 'b"
    1.27    assumes X: "distributed M MX X f"
    1.28    shows entropy_distr: "entropy b MX X = - (\<integral>x. f x * log b (f x) \<partial>MX)" (is ?eq)
    1.29  proof -
    1.30    note D = distributed_measurable[OF X] distributed_borel_measurable[OF X] distributed_AE[OF X]
    1.31 -    interpret X: prob_space "distr M MX X"
    1.32 -    using D(1) by (rule prob_space_distr)
    1.33 -
    1.34 -  have sf: "sigma_finite_measure (distr M MX X)" by default
    1.35 -  have ae: "AE x in MX. f x = RN_deriv MX (density MX f) x"
    1.36 -    using D
    1.37 -    by (intro RN_deriv_unique_sigma_finite)
    1.38 -       (auto intro: divide_nonneg_nonneg measure_nonneg
    1.39 -             simp: distributed_distr_eq_density[symmetric, OF X] sf)
    1.40 +  note ae = distributed_RN_deriv[OF X]
    1.41  
    1.42    have ae_eq: "AE x in distr M MX X. log b (real (RN_deriv MX (distr M MX X) x)) =
    1.43      log b (f x)"
    1.44 @@ -588,7 +598,6 @@
    1.45      apply (subst AE_density)
    1.46      using D apply simp
    1.47      using ae apply eventually_elim
    1.48 -    apply (subst (asm) eq_commute)
    1.49      apply auto
    1.50      done
    1.51  
    1.52 @@ -1112,34 +1121,64 @@
    1.53  subsection {* Conditional Entropy *}
    1.54  
    1.55  definition (in prob_space)
    1.56 -  "conditional_entropy b S T X Y = entropy b (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) - entropy b T Y"
    1.57 +  "conditional_entropy b S T X Y = - (\<integral>(x, y). log b (real (RN_deriv (S \<Otimes>\<^isub>M T) (distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x))) (x, y)) / 
    1.58 +    real (RN_deriv T (distr M T Y) y)) \<partial>distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)))"
    1.59  
    1.60  abbreviation (in information_space)
    1.61    conditional_entropy_Pow ("\<H>'(_ | _')") where
    1.62    "\<H>(X | Y) \<equiv> conditional_entropy b (count_space (X`space M)) (count_space (Y`space M)) X Y"
    1.63  
    1.64 -lemma (in information_space)
    1.65 +lemma (in information_space) conditional_entropy_generic_eq:
    1.66 +  fixes Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real"
    1.67 +  assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T"
    1.68 +  assumes Py: "distributed M T Y Py"
    1.69 +  assumes Pxy: "distributed M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) Pxy"
    1.70 +  shows "conditional_entropy b S T X Y = - (\<integral>(x, y). Pxy (x, y) * log b (Pxy (x, y) / Py y) \<partial>(S \<Otimes>\<^isub>M T))"
    1.71 +proof -
    1.72 +  interpret S: sigma_finite_measure S by fact
    1.73 +  interpret T: sigma_finite_measure T by fact
    1.74 +  interpret ST: pair_sigma_finite S T ..
    1.75 +
    1.76 +  have "AE x in density (S \<Otimes>\<^isub>M T) (\<lambda>x. ereal (Pxy x)). Pxy x = real (RN_deriv (S \<Otimes>\<^isub>M T) (distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x))) x)"
    1.77 +    unfolding AE_density[OF distributed_borel_measurable, OF Pxy]
    1.78 +    unfolding distributed_distr_eq_density[OF Pxy]
    1.79 +    using distributed_RN_deriv[OF Pxy]
    1.80 +    by auto
    1.81 +  moreover
    1.82 +  have "AE x in density (S \<Otimes>\<^isub>M T) (\<lambda>x. ereal (Pxy x)). Py (snd x) = real (RN_deriv T (distr M T Y) (snd x))"
    1.83 +    unfolding AE_density[OF distributed_borel_measurable, OF Pxy]
    1.84 +    unfolding distributed_distr_eq_density[OF Py]
    1.85 +    apply (rule ST.AE_pair_measure)
    1.86 +    apply (auto intro!: sets_Collect borel_measurable_eq measurable_compose[OF _ distributed_real_measurable[OF Py]]
    1.87 +                        distributed_real_measurable[OF Pxy] distributed_real_AE[OF Py]
    1.88 +                        borel_measurable_real_of_ereal measurable_compose[OF _ borel_measurable_RN_deriv_density])
    1.89 +    using distributed_RN_deriv[OF Py]
    1.90 +    apply auto
    1.91 +    done    
    1.92 +  ultimately
    1.93 +  have "conditional_entropy b S T X Y = - (\<integral>x. Pxy x * log b (Pxy x / Py (snd x)) \<partial>(S \<Otimes>\<^isub>M T))"
    1.94 +    unfolding conditional_entropy_def neg_equal_iff_equal
    1.95 +    apply (subst integral_density(1)[symmetric])
    1.96 +    apply (auto simp: distributed_real_measurable[OF Pxy] distributed_real_AE[OF Pxy]
    1.97 +                      measurable_compose[OF _ distributed_real_measurable[OF Py]]
    1.98 +                      distributed_distr_eq_density[OF Pxy]
    1.99 +                intro!: integral_cong_AE)
   1.100 +    done
   1.101 +  then show ?thesis by (simp add: split_beta')
   1.102 +qed
   1.103 +
   1.104 +lemma (in information_space) conditional_entropy_eq_entropy:
   1.105    fixes Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real"
   1.106    assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T"
   1.107    assumes Py: "distributed M T Y Py"
   1.108    assumes Pxy: "distributed M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) Pxy"
   1.109    assumes I1: "integrable (S \<Otimes>\<^isub>M T) (\<lambda>x. Pxy x * log b (Pxy x))"
   1.110    assumes I2: "integrable (S \<Otimes>\<^isub>M T) (\<lambda>x. Pxy x * log b (Py (snd x)))"
   1.111 -  shows conditional_entropy_generic_eq: "conditional_entropy b S T X Y = - (\<integral>(x, y). Pxy (x, y) * log b (Pxy (x, y) / Py y) \<partial>(S \<Otimes>\<^isub>M T))" (is ?eq)
   1.112 +  shows "conditional_entropy b S T X Y = entropy b (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) - entropy b T Y"
   1.113  proof -
   1.114    interpret S: sigma_finite_measure S by fact
   1.115    interpret T: sigma_finite_measure T by fact
   1.116    interpret ST: pair_sigma_finite S T ..
   1.117 -  have ST: "sigma_finite_measure (S \<Otimes>\<^isub>M T)" ..
   1.118 -
   1.119 -  let ?P = "density (S \<Otimes>\<^isub>M T) Pxy"
   1.120 -  interpret Pxy: prob_space ?P
   1.121 -    unfolding Pxy[THEN distributed_distr_eq_density, symmetric]
   1.122 -    using Pxy[THEN distributed_measurable] by (rule prob_space_distr)
   1.123 -
   1.124 -  from Py Pxy have distr_eq: "distr M T Y =
   1.125 -    distr (distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x))) T snd"
   1.126 -    by (subst distr_distr[OF measurable_snd]) (auto dest: distributed_measurable simp: comp_def)
   1.127  
   1.128    have "entropy b T Y = - (\<integral>y. Py y * log b (Py y) \<partial>T)"
   1.129      by (rule entropy_distr[OF Py])
   1.130 @@ -1147,27 +1186,44 @@
   1.131      using b_gt_1 Py[THEN distributed_real_measurable]
   1.132      by (subst distributed_transform_integral[OF Pxy Py, where T=snd]) (auto intro!: integral_cong)
   1.133    finally have e_eq: "entropy b T Y = - (\<integral>(x,y). Pxy (x,y) * log b (Py y) \<partial>(S \<Otimes>\<^isub>M T))" .
   1.134 -  
   1.135 +
   1.136    have ae2: "AE x in S \<Otimes>\<^isub>M T. Py (snd x) = 0 \<longrightarrow> Pxy x = 0"
   1.137      by (intro subdensity_real[of snd, OF _ Pxy Py]) (auto intro: measurable_Pair)
   1.138    moreover have ae4: "AE x in S \<Otimes>\<^isub>M T. 0 \<le> Py (snd x)"
   1.139      using Py by (intro ST.AE_pair_measure) (auto simp: comp_def intro!: measurable_snd'' dest: distributed_real_AE distributed_real_measurable)
   1.140    moreover note ae5 = Pxy[THEN distributed_real_AE]
   1.141 -  ultimately have pos: "AE x in S \<Otimes>\<^isub>M T. 0 \<le> Pxy x \<and> 0 \<le> Py (snd x) \<and>
   1.142 +  ultimately have "AE x in S \<Otimes>\<^isub>M T. 0 \<le> Pxy x \<and> 0 \<le> Py (snd x) \<and>
   1.143      (Pxy x = 0 \<or> (Pxy x \<noteq> 0 \<longrightarrow> 0 < Pxy x \<and> 0 < Py (snd x)))"
   1.144      by eventually_elim auto
   1.145 -
   1.146 -  from pos have ae: "AE x in S \<Otimes>\<^isub>M T.
   1.147 +  then have ae: "AE x in S \<Otimes>\<^isub>M T.
   1.148       Pxy x * log b (Pxy x) - Pxy x * log b (Py (snd x)) = Pxy x * log b (Pxy x / Py (snd x))"
   1.149      by eventually_elim (auto simp: log_simps mult_pos_pos field_simps b_gt_1)
   1.150 -  with I1 I2 show ?eq
   1.151 -    unfolding conditional_entropy_def
   1.152 -    apply (subst e_eq)
   1.153 -    apply (subst entropy_distr[OF Pxy])
   1.154 -    unfolding minus_diff_minus
   1.155 -    apply (subst integral_diff(2)[symmetric])
   1.156 -    apply (auto intro!: integral_cong_AE simp: split_beta' simp del: integral_diff)
   1.157 +  have "conditional_entropy b S T X Y = 
   1.158 +    - (\<integral>x. Pxy x * log b (Pxy x) - Pxy x * log b (Py (snd x)) \<partial>(S \<Otimes>\<^isub>M T))"
   1.159 +    unfolding conditional_entropy_generic_eq[OF S T Py Pxy] neg_equal_iff_equal
   1.160 +    apply (intro integral_cong_AE)
   1.161 +    using ae
   1.162 +    apply eventually_elim
   1.163 +    apply auto
   1.164      done
   1.165 +  also have "\<dots> = - (\<integral>x. Pxy x * log b (Pxy x) \<partial>(S \<Otimes>\<^isub>M T)) - - (\<integral>x.  Pxy x * log b (Py (snd x)) \<partial>(S \<Otimes>\<^isub>M T))"
   1.166 +    by (simp add: integral_diff[OF I1 I2])
   1.167 +  finally show ?thesis 
   1.168 +    unfolding conditional_entropy_generic_eq[OF S T Py Pxy] entropy_distr[OF Pxy] e_eq
   1.169 +    by (simp add: split_beta')
   1.170 +qed
   1.171 +
   1.172 +lemma (in information_space) conditional_entropy_eq_entropy_simple:
   1.173 +  assumes X: "simple_function M X" and Y: "simple_function M Y"
   1.174 +  shows "\<H>(X | Y) = entropy b (count_space (X`space M) \<Otimes>\<^isub>M count_space (Y`space M)) (\<lambda>x. (X x, Y x)) - \<H>(Y)"
   1.175 +proof -
   1.176 +  have "count_space (X ` space M) \<Otimes>\<^isub>M count_space (Y ` space M) = count_space (X`space M \<times> Y`space M)"
   1.177 +    (is "?P = ?C") using X Y by (simp add: simple_functionD pair_measure_count_space)
   1.178 +  show ?thesis
   1.179 +    by (rule conditional_entropy_eq_entropy sigma_finite_measure_count_space_finite
   1.180 +                 simple_functionD  X Y simple_distributed simple_distributedI[OF _ refl]
   1.181 +                 simple_distributed_joint simple_function_Pair integrable_count_space)+
   1.182 +       (auto simp: `?P = ?C` intro!: integrable_count_space simple_functionD  X Y)
   1.183  qed
   1.184  
   1.185  lemma (in information_space) conditional_entropy_eq:
   1.186 @@ -1186,10 +1242,6 @@
   1.187    have "count_space (X ` space M) \<Otimes>\<^isub>M count_space (Y ` space M) = count_space (X`space M \<times> Y`space M)"
   1.188      (is "?P = ?C")
   1.189      using X Y by (simp add: simple_distributed_finite pair_measure_count_space)
   1.190 -  with X Y show
   1.191 -      "integrable ?P (\<lambda>x. ?f x * log b (?f x))"
   1.192 -      "integrable ?P (\<lambda>x. ?f x * log b (Py (snd x)))"
   1.193 -    by (auto intro!: integrable_count_space simp: simple_distributed_finite)
   1.194    have eq: "(\<lambda>(x, y). ?f (x, y) * log b (?f (x, y) / Py y)) =
   1.195      (\<lambda>x. if x \<in> (\<lambda>x. (X x, Y x)) ` space M then Pxy x * log b (Pxy x / Py (snd x)) else 0)"
   1.196      by auto
   1.197 @@ -1331,7 +1383,7 @@
   1.198      using sigma_finite_measure_count_space_finite sigma_finite_measure_count_space_finite simple_distributed[OF X] simple_distributed[OF Y] simple_distributed_joint[OF XY]
   1.199      by (rule mutual_information_eq_entropy_conditional_entropy_distr) (auto simp: eq integrable_count_space)
   1.200    then show ?thesis
   1.201 -    unfolding conditional_entropy_def by simp
   1.202 +    unfolding conditional_entropy_eq_entropy_simple[OF sf_X sf_Y] by simp
   1.203  qed
   1.204  
   1.205  lemma (in information_space) mutual_information_nonneg_simple:
   1.206 @@ -1388,7 +1440,7 @@
   1.207               cong del: setsum_cong  intro!: setsum_mono_zero_left)
   1.208    finally have "\<H>(\<lambda>x. (X x, Y x)) = entropy b (count_space (Y ` space M) \<Otimes>\<^isub>M count_space (X ` space M)) (\<lambda>x. (Y x, X x))" .
   1.209    then show ?thesis
   1.210 -    unfolding conditional_entropy_def by simp
   1.211 +    unfolding conditional_entropy_eq_entropy_simple[OF Y X] by simp
   1.212  qed
   1.213  
   1.214  lemma (in information_space) entropy_partition: