author hoelzl Wed Oct 10 12:12:28 2012 +0200 (2012-10-10) changeset 49791 e0854abfb3fc parent 49790 6b9b9ebba47d child 49792 43f49922811d
alternative definition of conditional entropy
```     1.1 --- a/src/HOL/Probability/Information.thy	Wed Oct 10 12:12:27 2012 +0200
1.2 +++ b/src/HOL/Probability/Information.thy	Wed Oct 10 12:12:28 2012 +0200
1.3 @@ -566,21 +566,31 @@
1.4    entropy_Pow ("\<H>'(_')") where
1.5    "\<H>(X) \<equiv> entropy b (count_space (X`space M)) X"
1.6
1.7 +lemma (in prob_space) distributed_RN_deriv:
1.8 +  assumes X: "distributed M S X Px"
1.9 +  shows "AE x in S. RN_deriv S (density S Px) x = Px x"
1.10 +proof -
1.11 +  note D = distributed_measurable[OF X] distributed_borel_measurable[OF X] distributed_AE[OF X]
1.12 +  interpret X: prob_space "distr M S X"
1.13 +    using D(1) by (rule prob_space_distr)
1.14 +
1.15 +  have sf: "sigma_finite_measure (distr M S X)" by default
1.16 +  show ?thesis
1.17 +    using D
1.18 +    apply (subst eq_commute)
1.19 +    apply (intro RN_deriv_unique_sigma_finite)
1.20 +    apply (auto intro: divide_nonneg_nonneg measure_nonneg
1.21 +             simp: distributed_distr_eq_density[symmetric, OF X] sf)
1.22 +    done
1.23 +qed
1.24 +
1.25  lemma (in information_space)
1.26    fixes X :: "'a \<Rightarrow> 'b"
1.27    assumes X: "distributed M MX X f"
1.28    shows entropy_distr: "entropy b MX X = - (\<integral>x. f x * log b (f x) \<partial>MX)" (is ?eq)
1.29  proof -
1.30    note D = distributed_measurable[OF X] distributed_borel_measurable[OF X] distributed_AE[OF X]
1.31 -    interpret X: prob_space "distr M MX X"
1.32 -    using D(1) by (rule prob_space_distr)
1.33 -
1.34 -  have sf: "sigma_finite_measure (distr M MX X)" by default
1.35 -  have ae: "AE x in MX. f x = RN_deriv MX (density MX f) x"
1.36 -    using D
1.37 -    by (intro RN_deriv_unique_sigma_finite)
1.38 -       (auto intro: divide_nonneg_nonneg measure_nonneg
1.39 -             simp: distributed_distr_eq_density[symmetric, OF X] sf)
1.40 +  note ae = distributed_RN_deriv[OF X]
1.41
1.42    have ae_eq: "AE x in distr M MX X. log b (real (RN_deriv MX (distr M MX X) x)) =
1.43      log b (f x)"
1.44 @@ -588,7 +598,6 @@
1.45      apply (subst AE_density)
1.46      using D apply simp
1.47      using ae apply eventually_elim
1.48 -    apply (subst (asm) eq_commute)
1.49      apply auto
1.50      done
1.51
1.52 @@ -1112,34 +1121,64 @@
1.53  subsection {* Conditional Entropy *}
1.54
1.55  definition (in prob_space)
1.56 -  "conditional_entropy b S T X Y = entropy b (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) - entropy b T Y"
1.57 +  "conditional_entropy b S T X Y = - (\<integral>(x, y). log b (real (RN_deriv (S \<Otimes>\<^isub>M T) (distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x))) (x, y)) /
1.58 +    real (RN_deriv T (distr M T Y) y)) \<partial>distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)))"
1.59
1.60  abbreviation (in information_space)
1.61    conditional_entropy_Pow ("\<H>'(_ | _')") where
1.62    "\<H>(X | Y) \<equiv> conditional_entropy b (count_space (X`space M)) (count_space (Y`space M)) X Y"
1.63
1.64 -lemma (in information_space)
1.65 +lemma (in information_space) conditional_entropy_generic_eq:
1.66 +  fixes Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real"
1.67 +  assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T"
1.68 +  assumes Py: "distributed M T Y Py"
1.69 +  assumes Pxy: "distributed M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) Pxy"
1.70 +  shows "conditional_entropy b S T X Y = - (\<integral>(x, y). Pxy (x, y) * log b (Pxy (x, y) / Py y) \<partial>(S \<Otimes>\<^isub>M T))"
1.71 +proof -
1.72 +  interpret S: sigma_finite_measure S by fact
1.73 +  interpret T: sigma_finite_measure T by fact
1.74 +  interpret ST: pair_sigma_finite S T ..
1.75 +
1.76 +  have "AE x in density (S \<Otimes>\<^isub>M T) (\<lambda>x. ereal (Pxy x)). Pxy x = real (RN_deriv (S \<Otimes>\<^isub>M T) (distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x))) x)"
1.77 +    unfolding AE_density[OF distributed_borel_measurable, OF Pxy]
1.78 +    unfolding distributed_distr_eq_density[OF Pxy]
1.79 +    using distributed_RN_deriv[OF Pxy]
1.80 +    by auto
1.81 +  moreover
1.82 +  have "AE x in density (S \<Otimes>\<^isub>M T) (\<lambda>x. ereal (Pxy x)). Py (snd x) = real (RN_deriv T (distr M T Y) (snd x))"
1.83 +    unfolding AE_density[OF distributed_borel_measurable, OF Pxy]
1.84 +    unfolding distributed_distr_eq_density[OF Py]
1.85 +    apply (rule ST.AE_pair_measure)
1.86 +    apply (auto intro!: sets_Collect borel_measurable_eq measurable_compose[OF _ distributed_real_measurable[OF Py]]
1.87 +                        distributed_real_measurable[OF Pxy] distributed_real_AE[OF Py]
1.88 +                        borel_measurable_real_of_ereal measurable_compose[OF _ borel_measurable_RN_deriv_density])
1.89 +    using distributed_RN_deriv[OF Py]
1.90 +    apply auto
1.91 +    done
1.92 +  ultimately
1.93 +  have "conditional_entropy b S T X Y = - (\<integral>x. Pxy x * log b (Pxy x / Py (snd x)) \<partial>(S \<Otimes>\<^isub>M T))"
1.94 +    unfolding conditional_entropy_def neg_equal_iff_equal
1.95 +    apply (subst integral_density(1)[symmetric])
1.96 +    apply (auto simp: distributed_real_measurable[OF Pxy] distributed_real_AE[OF Pxy]
1.97 +                      measurable_compose[OF _ distributed_real_measurable[OF Py]]
1.98 +                      distributed_distr_eq_density[OF Pxy]
1.99 +                intro!: integral_cong_AE)
1.100 +    done
1.101 +  then show ?thesis by (simp add: split_beta')
1.102 +qed
1.103 +
1.104 +lemma (in information_space) conditional_entropy_eq_entropy:
1.105    fixes Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real"
1.106    assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T"
1.107    assumes Py: "distributed M T Y Py"
1.108    assumes Pxy: "distributed M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) Pxy"
1.109    assumes I1: "integrable (S \<Otimes>\<^isub>M T) (\<lambda>x. Pxy x * log b (Pxy x))"
1.110    assumes I2: "integrable (S \<Otimes>\<^isub>M T) (\<lambda>x. Pxy x * log b (Py (snd x)))"
1.111 -  shows conditional_entropy_generic_eq: "conditional_entropy b S T X Y = - (\<integral>(x, y). Pxy (x, y) * log b (Pxy (x, y) / Py y) \<partial>(S \<Otimes>\<^isub>M T))" (is ?eq)
1.112 +  shows "conditional_entropy b S T X Y = entropy b (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) - entropy b T Y"
1.113  proof -
1.114    interpret S: sigma_finite_measure S by fact
1.115    interpret T: sigma_finite_measure T by fact
1.116    interpret ST: pair_sigma_finite S T ..
1.117 -  have ST: "sigma_finite_measure (S \<Otimes>\<^isub>M T)" ..
1.118 -
1.119 -  let ?P = "density (S \<Otimes>\<^isub>M T) Pxy"
1.120 -  interpret Pxy: prob_space ?P
1.121 -    unfolding Pxy[THEN distributed_distr_eq_density, symmetric]
1.122 -    using Pxy[THEN distributed_measurable] by (rule prob_space_distr)
1.123 -
1.124 -  from Py Pxy have distr_eq: "distr M T Y =
1.125 -    distr (distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x))) T snd"
1.126 -    by (subst distr_distr[OF measurable_snd]) (auto dest: distributed_measurable simp: comp_def)
1.127
1.128    have "entropy b T Y = - (\<integral>y. Py y * log b (Py y) \<partial>T)"
1.129      by (rule entropy_distr[OF Py])
1.130 @@ -1147,27 +1186,44 @@
1.131      using b_gt_1 Py[THEN distributed_real_measurable]
1.132      by (subst distributed_transform_integral[OF Pxy Py, where T=snd]) (auto intro!: integral_cong)
1.133    finally have e_eq: "entropy b T Y = - (\<integral>(x,y). Pxy (x,y) * log b (Py y) \<partial>(S \<Otimes>\<^isub>M T))" .
1.134 -
1.135 +
1.136    have ae2: "AE x in S \<Otimes>\<^isub>M T. Py (snd x) = 0 \<longrightarrow> Pxy x = 0"
1.137      by (intro subdensity_real[of snd, OF _ Pxy Py]) (auto intro: measurable_Pair)
1.138    moreover have ae4: "AE x in S \<Otimes>\<^isub>M T. 0 \<le> Py (snd x)"
1.139      using Py by (intro ST.AE_pair_measure) (auto simp: comp_def intro!: measurable_snd'' dest: distributed_real_AE distributed_real_measurable)
1.140    moreover note ae5 = Pxy[THEN distributed_real_AE]
1.141 -  ultimately have pos: "AE x in S \<Otimes>\<^isub>M T. 0 \<le> Pxy x \<and> 0 \<le> Py (snd x) \<and>
1.142 +  ultimately have "AE x in S \<Otimes>\<^isub>M T. 0 \<le> Pxy x \<and> 0 \<le> Py (snd x) \<and>
1.143      (Pxy x = 0 \<or> (Pxy x \<noteq> 0 \<longrightarrow> 0 < Pxy x \<and> 0 < Py (snd x)))"
1.144      by eventually_elim auto
1.145 -
1.146 -  from pos have ae: "AE x in S \<Otimes>\<^isub>M T.
1.147 +  then have ae: "AE x in S \<Otimes>\<^isub>M T.
1.148       Pxy x * log b (Pxy x) - Pxy x * log b (Py (snd x)) = Pxy x * log b (Pxy x / Py (snd x))"
1.149      by eventually_elim (auto simp: log_simps mult_pos_pos field_simps b_gt_1)
1.150 -  with I1 I2 show ?eq
1.151 -    unfolding conditional_entropy_def
1.152 -    apply (subst e_eq)
1.153 -    apply (subst entropy_distr[OF Pxy])
1.154 -    unfolding minus_diff_minus
1.155 -    apply (subst integral_diff(2)[symmetric])
1.156 -    apply (auto intro!: integral_cong_AE simp: split_beta' simp del: integral_diff)
1.157 +  have "conditional_entropy b S T X Y =
1.158 +    - (\<integral>x. Pxy x * log b (Pxy x) - Pxy x * log b (Py (snd x)) \<partial>(S \<Otimes>\<^isub>M T))"
1.159 +    unfolding conditional_entropy_generic_eq[OF S T Py Pxy] neg_equal_iff_equal
1.160 +    apply (intro integral_cong_AE)
1.161 +    using ae
1.162 +    apply eventually_elim
1.163 +    apply auto
1.164      done
1.165 +  also have "\<dots> = - (\<integral>x. Pxy x * log b (Pxy x) \<partial>(S \<Otimes>\<^isub>M T)) - - (\<integral>x.  Pxy x * log b (Py (snd x)) \<partial>(S \<Otimes>\<^isub>M T))"
1.166 +    by (simp add: integral_diff[OF I1 I2])
1.167 +  finally show ?thesis
1.168 +    unfolding conditional_entropy_generic_eq[OF S T Py Pxy] entropy_distr[OF Pxy] e_eq
1.169 +    by (simp add: split_beta')
1.170 +qed
1.171 +
1.172 +lemma (in information_space) conditional_entropy_eq_entropy_simple:
1.173 +  assumes X: "simple_function M X" and Y: "simple_function M Y"
1.174 +  shows "\<H>(X | Y) = entropy b (count_space (X`space M) \<Otimes>\<^isub>M count_space (Y`space M)) (\<lambda>x. (X x, Y x)) - \<H>(Y)"
1.175 +proof -
1.176 +  have "count_space (X ` space M) \<Otimes>\<^isub>M count_space (Y ` space M) = count_space (X`space M \<times> Y`space M)"
1.177 +    (is "?P = ?C") using X Y by (simp add: simple_functionD pair_measure_count_space)
1.178 +  show ?thesis
1.179 +    by (rule conditional_entropy_eq_entropy sigma_finite_measure_count_space_finite
1.180 +                 simple_functionD  X Y simple_distributed simple_distributedI[OF _ refl]
1.181 +                 simple_distributed_joint simple_function_Pair integrable_count_space)+
1.182 +       (auto simp: `?P = ?C` intro!: integrable_count_space simple_functionD  X Y)
1.183  qed
1.184
1.185  lemma (in information_space) conditional_entropy_eq:
1.186 @@ -1186,10 +1242,6 @@
1.187    have "count_space (X ` space M) \<Otimes>\<^isub>M count_space (Y ` space M) = count_space (X`space M \<times> Y`space M)"
1.188      (is "?P = ?C")
1.189      using X Y by (simp add: simple_distributed_finite pair_measure_count_space)
1.190 -  with X Y show
1.191 -      "integrable ?P (\<lambda>x. ?f x * log b (?f x))"
1.192 -      "integrable ?P (\<lambda>x. ?f x * log b (Py (snd x)))"
1.193 -    by (auto intro!: integrable_count_space simp: simple_distributed_finite)
1.194    have eq: "(\<lambda>(x, y). ?f (x, y) * log b (?f (x, y) / Py y)) =
1.195      (\<lambda>x. if x \<in> (\<lambda>x. (X x, Y x)) ` space M then Pxy x * log b (Pxy x / Py (snd x)) else 0)"
1.196      by auto
1.197 @@ -1331,7 +1383,7 @@
1.198      using sigma_finite_measure_count_space_finite sigma_finite_measure_count_space_finite simple_distributed[OF X] simple_distributed[OF Y] simple_distributed_joint[OF XY]
1.199      by (rule mutual_information_eq_entropy_conditional_entropy_distr) (auto simp: eq integrable_count_space)
1.200    then show ?thesis
1.201 -    unfolding conditional_entropy_def by simp
1.202 +    unfolding conditional_entropy_eq_entropy_simple[OF sf_X sf_Y] by simp
1.203  qed
1.204
1.205  lemma (in information_space) mutual_information_nonneg_simple:
1.206 @@ -1388,7 +1440,7 @@
1.207               cong del: setsum_cong  intro!: setsum_mono_zero_left)
1.208    finally have "\<H>(\<lambda>x. (X x, Y x)) = entropy b (count_space (Y ` space M) \<Otimes>\<^isub>M count_space (X ` space M)) (\<lambda>x. (Y x, X x))" .
1.209    then show ?thesis
1.210 -    unfolding conditional_entropy_def by simp
1.211 +    unfolding conditional_entropy_eq_entropy_simple[OF Y X] by simp
1.212  qed
1.213
1.214  lemma (in information_space) entropy_partition:
```