src/HOL/Probability/Information.thy
changeset 49791 e0854abfb3fc
parent 49790 6b9b9ebba47d
child 49792 43f49922811d
--- a/src/HOL/Probability/Information.thy	Wed Oct 10 12:12:27 2012 +0200
+++ b/src/HOL/Probability/Information.thy	Wed Oct 10 12:12:28 2012 +0200
@@ -566,21 +566,31 @@
   entropy_Pow ("\<H>'(_')") where
   "\<H>(X) \<equiv> entropy b (count_space (X`space M)) X"
 
+lemma (in prob_space) distributed_RN_deriv:
+  assumes X: "distributed M S X Px"
+  shows "AE x in S. RN_deriv S (density S Px) x = Px x"
+proof -
+  note D = distributed_measurable[OF X] distributed_borel_measurable[OF X] distributed_AE[OF X]
+  interpret X: prob_space "distr M S X"
+    using D(1) by (rule prob_space_distr)
+
+  have sf: "sigma_finite_measure (distr M S X)" by default
+  show ?thesis
+    using D
+    apply (subst eq_commute)
+    apply (intro RN_deriv_unique_sigma_finite)
+    apply (auto intro: divide_nonneg_nonneg measure_nonneg
+             simp: distributed_distr_eq_density[symmetric, OF X] sf)
+    done
+qed
+
 lemma (in information_space)
   fixes X :: "'a \<Rightarrow> 'b"
   assumes X: "distributed M MX X f"
   shows entropy_distr: "entropy b MX X = - (\<integral>x. f x * log b (f x) \<partial>MX)" (is ?eq)
 proof -
   note D = distributed_measurable[OF X] distributed_borel_measurable[OF X] distributed_AE[OF X]
-    interpret X: prob_space "distr M MX X"
-    using D(1) by (rule prob_space_distr)
-
-  have sf: "sigma_finite_measure (distr M MX X)" by default
-  have ae: "AE x in MX. f x = RN_deriv MX (density MX f) x"
-    using D
-    by (intro RN_deriv_unique_sigma_finite)
-       (auto intro: divide_nonneg_nonneg measure_nonneg
-             simp: distributed_distr_eq_density[symmetric, OF X] sf)
+  note ae = distributed_RN_deriv[OF X]
 
   have ae_eq: "AE x in distr M MX X. log b (real (RN_deriv MX (distr M MX X) x)) =
     log b (f x)"
@@ -588,7 +598,6 @@
     apply (subst AE_density)
     using D apply simp
     using ae apply eventually_elim
-    apply (subst (asm) eq_commute)
     apply auto
     done
 
@@ -1112,34 +1121,64 @@
 subsection {* Conditional Entropy *}
 
 definition (in prob_space)
-  "conditional_entropy b S T X Y = entropy b (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) - entropy b T Y"
+  "conditional_entropy b S T X Y = - (\<integral>(x, y). log b (real (RN_deriv (S \<Otimes>\<^isub>M T) (distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x))) (x, y)) / 
+    real (RN_deriv T (distr M T Y) y)) \<partial>distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)))"
 
 abbreviation (in information_space)
   conditional_entropy_Pow ("\<H>'(_ | _')") where
   "\<H>(X | Y) \<equiv> conditional_entropy b (count_space (X`space M)) (count_space (Y`space M)) X Y"
 
-lemma (in information_space)
+lemma (in information_space) conditional_entropy_generic_eq:
+  fixes Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real"
+  assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T"
+  assumes Py: "distributed M T Y Py"
+  assumes Pxy: "distributed M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) Pxy"
+  shows "conditional_entropy b S T X Y = - (\<integral>(x, y). Pxy (x, y) * log b (Pxy (x, y) / Py y) \<partial>(S \<Otimes>\<^isub>M T))"
+proof -
+  interpret S: sigma_finite_measure S by fact
+  interpret T: sigma_finite_measure T by fact
+  interpret ST: pair_sigma_finite S T ..
+
+  have "AE x in density (S \<Otimes>\<^isub>M T) (\<lambda>x. ereal (Pxy x)). Pxy x = real (RN_deriv (S \<Otimes>\<^isub>M T) (distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x))) x)"
+    unfolding AE_density[OF distributed_borel_measurable, OF Pxy]
+    unfolding distributed_distr_eq_density[OF Pxy]
+    using distributed_RN_deriv[OF Pxy]
+    by auto
+  moreover
+  have "AE x in density (S \<Otimes>\<^isub>M T) (\<lambda>x. ereal (Pxy x)). Py (snd x) = real (RN_deriv T (distr M T Y) (snd x))"
+    unfolding AE_density[OF distributed_borel_measurable, OF Pxy]
+    unfolding distributed_distr_eq_density[OF Py]
+    apply (rule ST.AE_pair_measure)
+    apply (auto intro!: sets_Collect borel_measurable_eq measurable_compose[OF _ distributed_real_measurable[OF Py]]
+                        distributed_real_measurable[OF Pxy] distributed_real_AE[OF Py]
+                        borel_measurable_real_of_ereal measurable_compose[OF _ borel_measurable_RN_deriv_density])
+    using distributed_RN_deriv[OF Py]
+    apply auto
+    done    
+  ultimately
+  have "conditional_entropy b S T X Y = - (\<integral>x. Pxy x * log b (Pxy x / Py (snd x)) \<partial>(S \<Otimes>\<^isub>M T))"
+    unfolding conditional_entropy_def neg_equal_iff_equal
+    apply (subst integral_density(1)[symmetric])
+    apply (auto simp: distributed_real_measurable[OF Pxy] distributed_real_AE[OF Pxy]
+                      measurable_compose[OF _ distributed_real_measurable[OF Py]]
+                      distributed_distr_eq_density[OF Pxy]
+                intro!: integral_cong_AE)
+    done
+  then show ?thesis by (simp add: split_beta')
+qed
+
+lemma (in information_space) conditional_entropy_eq_entropy:
   fixes Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real"
   assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T"
   assumes Py: "distributed M T Y Py"
   assumes Pxy: "distributed M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) Pxy"
   assumes I1: "integrable (S \<Otimes>\<^isub>M T) (\<lambda>x. Pxy x * log b (Pxy x))"
   assumes I2: "integrable (S \<Otimes>\<^isub>M T) (\<lambda>x. Pxy x * log b (Py (snd x)))"
-  shows conditional_entropy_generic_eq: "conditional_entropy b S T X Y = - (\<integral>(x, y). Pxy (x, y) * log b (Pxy (x, y) / Py y) \<partial>(S \<Otimes>\<^isub>M T))" (is ?eq)
+  shows "conditional_entropy b S T X Y = entropy b (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x)) - entropy b T Y"
 proof -
   interpret S: sigma_finite_measure S by fact
   interpret T: sigma_finite_measure T by fact
   interpret ST: pair_sigma_finite S T ..
-  have ST: "sigma_finite_measure (S \<Otimes>\<^isub>M T)" ..
-
-  let ?P = "density (S \<Otimes>\<^isub>M T) Pxy"
-  interpret Pxy: prob_space ?P
-    unfolding Pxy[THEN distributed_distr_eq_density, symmetric]
-    using Pxy[THEN distributed_measurable] by (rule prob_space_distr)
-
-  from Py Pxy have distr_eq: "distr M T Y =
-    distr (distr M (S \<Otimes>\<^isub>M T) (\<lambda>x. (X x, Y x))) T snd"
-    by (subst distr_distr[OF measurable_snd]) (auto dest: distributed_measurable simp: comp_def)
 
   have "entropy b T Y = - (\<integral>y. Py y * log b (Py y) \<partial>T)"
     by (rule entropy_distr[OF Py])
@@ -1147,27 +1186,44 @@
     using b_gt_1 Py[THEN distributed_real_measurable]
     by (subst distributed_transform_integral[OF Pxy Py, where T=snd]) (auto intro!: integral_cong)
   finally have e_eq: "entropy b T Y = - (\<integral>(x,y). Pxy (x,y) * log b (Py y) \<partial>(S \<Otimes>\<^isub>M T))" .
-  
+
   have ae2: "AE x in S \<Otimes>\<^isub>M T. Py (snd x) = 0 \<longrightarrow> Pxy x = 0"
     by (intro subdensity_real[of snd, OF _ Pxy Py]) (auto intro: measurable_Pair)
   moreover have ae4: "AE x in S \<Otimes>\<^isub>M T. 0 \<le> Py (snd x)"
     using Py by (intro ST.AE_pair_measure) (auto simp: comp_def intro!: measurable_snd'' dest: distributed_real_AE distributed_real_measurable)
   moreover note ae5 = Pxy[THEN distributed_real_AE]
-  ultimately have pos: "AE x in S \<Otimes>\<^isub>M T. 0 \<le> Pxy x \<and> 0 \<le> Py (snd x) \<and>
+  ultimately have "AE x in S \<Otimes>\<^isub>M T. 0 \<le> Pxy x \<and> 0 \<le> Py (snd x) \<and>
     (Pxy x = 0 \<or> (Pxy x \<noteq> 0 \<longrightarrow> 0 < Pxy x \<and> 0 < Py (snd x)))"
     by eventually_elim auto
-
-  from pos have ae: "AE x in S \<Otimes>\<^isub>M T.
+  then have ae: "AE x in S \<Otimes>\<^isub>M T.
      Pxy x * log b (Pxy x) - Pxy x * log b (Py (snd x)) = Pxy x * log b (Pxy x / Py (snd x))"
     by eventually_elim (auto simp: log_simps mult_pos_pos field_simps b_gt_1)
-  with I1 I2 show ?eq
-    unfolding conditional_entropy_def
-    apply (subst e_eq)
-    apply (subst entropy_distr[OF Pxy])
-    unfolding minus_diff_minus
-    apply (subst integral_diff(2)[symmetric])
-    apply (auto intro!: integral_cong_AE simp: split_beta' simp del: integral_diff)
+  have "conditional_entropy b S T X Y = 
+    - (\<integral>x. Pxy x * log b (Pxy x) - Pxy x * log b (Py (snd x)) \<partial>(S \<Otimes>\<^isub>M T))"
+    unfolding conditional_entropy_generic_eq[OF S T Py Pxy] neg_equal_iff_equal
+    apply (intro integral_cong_AE)
+    using ae
+    apply eventually_elim
+    apply auto
     done
+  also have "\<dots> = - (\<integral>x. Pxy x * log b (Pxy x) \<partial>(S \<Otimes>\<^isub>M T)) - - (\<integral>x.  Pxy x * log b (Py (snd x)) \<partial>(S \<Otimes>\<^isub>M T))"
+    by (simp add: integral_diff[OF I1 I2])
+  finally show ?thesis 
+    unfolding conditional_entropy_generic_eq[OF S T Py Pxy] entropy_distr[OF Pxy] e_eq
+    by (simp add: split_beta')
+qed
+
+lemma (in information_space) conditional_entropy_eq_entropy_simple:
+  assumes X: "simple_function M X" and Y: "simple_function M Y"
+  shows "\<H>(X | Y) = entropy b (count_space (X`space M) \<Otimes>\<^isub>M count_space (Y`space M)) (\<lambda>x. (X x, Y x)) - \<H>(Y)"
+proof -
+  have "count_space (X ` space M) \<Otimes>\<^isub>M count_space (Y ` space M) = count_space (X`space M \<times> Y`space M)"
+    (is "?P = ?C") using X Y by (simp add: simple_functionD pair_measure_count_space)
+  show ?thesis
+    by (rule conditional_entropy_eq_entropy sigma_finite_measure_count_space_finite
+                 simple_functionD  X Y simple_distributed simple_distributedI[OF _ refl]
+                 simple_distributed_joint simple_function_Pair integrable_count_space)+
+       (auto simp: `?P = ?C` intro!: integrable_count_space simple_functionD  X Y)
 qed
 
 lemma (in information_space) conditional_entropy_eq:
@@ -1186,10 +1242,6 @@
   have "count_space (X ` space M) \<Otimes>\<^isub>M count_space (Y ` space M) = count_space (X`space M \<times> Y`space M)"
     (is "?P = ?C")
     using X Y by (simp add: simple_distributed_finite pair_measure_count_space)
-  with X Y show
-      "integrable ?P (\<lambda>x. ?f x * log b (?f x))"
-      "integrable ?P (\<lambda>x. ?f x * log b (Py (snd x)))"
-    by (auto intro!: integrable_count_space simp: simple_distributed_finite)
   have eq: "(\<lambda>(x, y). ?f (x, y) * log b (?f (x, y) / Py y)) =
     (\<lambda>x. if x \<in> (\<lambda>x. (X x, Y x)) ` space M then Pxy x * log b (Pxy x / Py (snd x)) else 0)"
     by auto
@@ -1331,7 +1383,7 @@
     using sigma_finite_measure_count_space_finite sigma_finite_measure_count_space_finite simple_distributed[OF X] simple_distributed[OF Y] simple_distributed_joint[OF XY]
     by (rule mutual_information_eq_entropy_conditional_entropy_distr) (auto simp: eq integrable_count_space)
   then show ?thesis
-    unfolding conditional_entropy_def by simp
+    unfolding conditional_entropy_eq_entropy_simple[OF sf_X sf_Y] by simp
 qed
 
 lemma (in information_space) mutual_information_nonneg_simple:
@@ -1388,7 +1440,7 @@
              cong del: setsum_cong  intro!: setsum_mono_zero_left)
   finally have "\<H>(\<lambda>x. (X x, Y x)) = entropy b (count_space (Y ` space M) \<Otimes>\<^isub>M count_space (X ` space M)) (\<lambda>x. (Y x, X x))" .
   then show ?thesis
-    unfolding conditional_entropy_def by simp
+    unfolding conditional_entropy_eq_entropy_simple[OF Y X] by simp
 qed
 
 lemma (in information_space) entropy_partition: