src/HOL/Probability/Information.thy
changeset 56993 e5366291d6aa
parent 56571 f4635657d66f
child 56994 8d5e5ec1cac3
--- a/src/HOL/Probability/Information.thy	Mon May 19 11:27:02 2014 +0200
+++ b/src/HOL/Probability/Information.thy	Mon May 19 12:04:45 2014 +0200
@@ -79,37 +79,26 @@
 definition
   "KL_divergence b M N = integral\<^sup>L N (entropy_density b M N)"
 
-lemma (in information_space) measurable_entropy_density:
-  assumes ac: "absolutely_continuous M N" "sets N = events"
-  shows "entropy_density b M N \<in> borel_measurable M"
-proof -
-  from borel_measurable_RN_deriv[OF ac] b_gt_1 show ?thesis
-    unfolding entropy_density_def by auto
-qed
-
-lemma borel_measurable_RN_deriv_density[measurable (raw)]:
-  "f \<in> borel_measurable M \<Longrightarrow> RN_deriv M (density M f) \<in> borel_measurable M"
-  using borel_measurable_RN_deriv_density[of "\<lambda>x. max 0 (f x )" M]
-  by (simp add: density_max_0[symmetric])
+lemma measurable_entropy_density[measurable]: "entropy_density b M N \<in> borel_measurable M"
+  unfolding entropy_density_def by auto
 
 lemma (in sigma_finite_measure) KL_density:
   fixes f :: "'a \<Rightarrow> real"
   assumes "1 < b"
-  assumes f: "f \<in> borel_measurable M" "AE x in M. 0 \<le> f x"
+  assumes f[measurable]: "f \<in> borel_measurable M" and nn: "AE x in M. 0 \<le> f x"
   shows "KL_divergence b M (density M f) = (\<integral>x. f x * log b (f x) \<partial>M)"
   unfolding KL_divergence_def
-proof (subst integral_density)
-  show "entropy_density b M (density M (\<lambda>x. ereal (f x))) \<in> borel_measurable M"
+proof (subst integral_real_density)
+  show [measurable]: "entropy_density b M (density M (\<lambda>x. ereal (f x))) \<in> borel_measurable M"
     using f
     by (auto simp: comp_def entropy_density_def)
   have "density M (RN_deriv M (density M f)) = density M f"
-    using f by (intro density_RN_deriv_density) auto
+    using f nn by (intro density_RN_deriv_density) auto
   then have eq: "AE x in M. RN_deriv M (density M f) x = f x"
-    using f
-    by (intro density_unique)
-       (auto intro!: borel_measurable_log borel_measurable_RN_deriv_density simp: RN_deriv_density_nonneg)
+    using f nn by (intro density_unique) (auto simp: RN_deriv_nonneg)
   show "(\<integral>x. f x * entropy_density b M (density M (\<lambda>x. ereal (f x))) x \<partial>M) = (\<integral>x. f x * log b (f x) \<partial>M)"
     apply (intro integral_cong_AE)
+    apply measurable
     using eq
     apply eventually_elim
     apply (auto simp: entropy_density_def)
@@ -161,8 +150,10 @@
   then have D_pos: "(\<integral>\<^sup>+ x. ereal (D x) \<partial>M) = 1"
     using N.emeasure_space_1 by simp
 
-  have "integrable M D" "integral\<^sup>L M D = 1"
-    using D D_pos D_neg unfolding integrable_def lebesgue_integral_def by simp_all
+  have "integrable M D"
+    using D D_pos D_neg unfolding real_integrable_def real_lebesgue_integral_def by simp_all
+  then have "integral\<^sup>L M D = 1"
+    using D D_pos D_neg by (simp add: real_lebesgue_integral_def)
 
   have "0 \<le> 1 - measure M ?D_set"
     using prob_le_1 by (auto simp: field_simps)
@@ -172,10 +163,9 @@
   also have "\<dots> < (\<integral> x. D x * (ln b * log b (D x)) \<partial>M)"
   proof (rule integral_less_AE)
     show "integrable M (\<lambda>x. D x - indicator ?D_set x)"
-      using `integrable M D`
-      by (intro integral_diff integral_indicator) auto
+      using `integrable M D` by auto
   next
-    from integral_cmult(1)[OF int, of "ln b"]
+    from integrable_mult_left(1)[OF int, of "ln b"]
     show "integrable M (\<lambda>x. D x * (ln b * log b (D x)))" 
       by (simp add: ac_simps)
   next
@@ -242,7 +232,7 @@
   also have "\<dots> = (\<integral> x. ln b * (D x * log b (D x)) \<partial>M)"
     by (simp add: ac_simps)
   also have "\<dots> = ln b * (\<integral> x. D x * log b (D x) \<partial>M)"
-    using int by (rule integral_cmult)
+    using int by simp
   finally show ?thesis
     using b_gt_1 D by (subst KL_density) (auto simp: zero_less_mult_iff)
 qed
@@ -260,7 +250,7 @@
   qed
   then have "AE x in M. log b (real (RN_deriv M M x)) = 0"
     by (elim AE_mp) simp
-  from integral_cong_AE[OF this]
+  from integral_cong_AE[OF _ _ this]
   have "integral\<^sup>L M (entropy_density b M M) = 0"
     by (simp add: entropy_density_def comp_def)
   then show "KL_divergence b M M = 0"
@@ -291,7 +281,7 @@
   have "N = density M (RN_deriv M N)"
     using ac by (rule density_RN_deriv[symmetric])
   also have "\<dots> = density M D"
-    using borel_measurable_RN_deriv[OF ac] D by (auto intro!: density_cong)
+    using D by (auto intro!: density_cong)
   finally have N: "N = density M D" .
 
   from absolutely_continuous_AE[OF ac(2,1) D(2)] D b_gt_1 ac measurable_entropy_density
@@ -299,7 +289,7 @@
     by (intro integrable_cong_AE[THEN iffD2, OF _ _ _ int])
        (auto simp: N entropy_density_def)
   with D b_gt_1 have "integrable M (\<lambda>x. D x * log b (D x))"
-    by (subst integral_density(2)[symmetric]) (auto simp: N[symmetric] comp_def)
+    by (subst integrable_real_density[symmetric]) (auto simp: N[symmetric] comp_def)
   with `prob_space N` D show ?thesis
     unfolding N
     by (intro KL_eq_0_iff_eq) auto
@@ -335,7 +325,7 @@
       using f g by (auto simp: AE_density)
     show "integrable (density M f) (\<lambda>x. g x / f x * log b (g x / f x))"
       using `1 < b` f g ac
-      by (subst integral_density)
+      by (subst integrable_density)
          (auto intro!: integrable_cong_AE[THEN iffD2, OF _ _ _ int] measurable_If)
   qed
   also have "\<dots> = KL_divergence b (density M f) (density M g)"
@@ -414,7 +404,7 @@
   "distributed M N X f \<Longrightarrow> g \<in> borel_measurable N \<Longrightarrow>
     integrable N (\<lambda>x. f x * g x) \<longleftrightarrow> integrable M (\<lambda>x. g (X x))"
   by (auto simp: distributed_real_AE
-                    distributed_distr_eq_density[symmetric] integral_density[symmetric] integrable_distr_eq)
+                    distributed_distr_eq_density[symmetric] integrable_real_density[symmetric] integrable_distr_eq)
   
 lemma distributed_transform_integrable:
   assumes Px: "distributed M N X Px"
@@ -431,8 +421,9 @@
   finally show ?thesis .
 qed
 
-lemma integrable_cong_AE_imp: "integrable M g \<Longrightarrow> f \<in> borel_measurable M \<Longrightarrow> (AE x in M. g x = f x) \<Longrightarrow> integrable M f"
-  using integrable_cong_AE by blast
+lemma integrable_cong_AE_imp:
+  "integrable M g \<Longrightarrow> f \<in> borel_measurable M \<Longrightarrow> (AE x in M. g x = f x) \<Longrightarrow> integrable M f"
+  using integrable_cong_AE[of f M g] by (auto simp: eq_commute)
 
 lemma (in information_space) finite_entropy_integrable:
   "finite_entropy S X Px \<Longrightarrow> integrable S (\<lambda>x. Px x * log b (Px x))"
@@ -485,7 +476,7 @@
 
     show ac: "absolutely_continuous P Q" by (simp add: absolutely_continuous_def)
     then have ed: "entropy_density b P Q \<in> borel_measurable P"
-      by (rule P.measurable_entropy_density) simp
+      by simp
 
     have "AE x in P. 1 = RN_deriv P Q x"
     proof (rule P.RN_deriv_unique)
@@ -494,13 +485,15 @@
     qed auto
     then have ae_0: "AE x in P. entropy_density b P Q x = 0"
       by eventually_elim (auto simp: entropy_density_def)
-    then have "integrable P (entropy_density b P Q) \<longleftrightarrow> integrable Q (\<lambda>x. 0)"
+    then have "integrable P (entropy_density b P Q) \<longleftrightarrow> integrable Q (\<lambda>x. 0::real)"
       using ed unfolding `Q = P` by (intro integrable_cong_AE) auto
     then show "integrable Q (entropy_density b P Q)" by simp
 
-    show "mutual_information b S T X Y = 0"
+    from ae_0 have "mutual_information b S T X Y = (\<integral>x. 0 \<partial>P)"
       unfolding mutual_information_def KL_divergence_def P_def[symmetric] Q_def[symmetric] `Q = P`
-      using ae_0 by (simp cong: integral_cong_AE) }
+      by (intro integral_cong_AE) auto
+    then show "mutual_information b S T X Y = 0"
+      by simp }
 
   { assume ac: "absolutely_continuous P Q"
     assume int: "integrable Q (entropy_density b P Q)"
@@ -722,8 +715,8 @@
 lemma (in information_space)
   fixes Pxy :: "'b \<times> 'c \<Rightarrow> real" and Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real"
   assumes "sigma_finite_measure S" "sigma_finite_measure T"
-  assumes Px: "distributed M S X Px" and Py: "distributed M T Y Py"
-  assumes Pxy: "distributed M (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x)) Pxy"
+  assumes Px[measurable]: "distributed M S X Px" and Py[measurable]: "distributed M T Y Py"
+  assumes Pxy[measurable]: "distributed M (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x)) Pxy"
   assumes ae: "AE x in S. AE y in T. Pxy (x, y) = Px x * Py y"
   shows mutual_information_eq_0: "mutual_information b S T X Y = 0"
 proof -
@@ -743,7 +736,7 @@
   ultimately have "AE x in S \<Otimes>\<^sub>M T. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x))) = 0"
     by eventually_elim simp
   then have "(\<integral>x. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x))) \<partial>(S \<Otimes>\<^sub>M T)) = (\<integral>x. 0 \<partial>(S \<Otimes>\<^sub>M T))"
-    by (rule integral_cong_AE)
+    by (intro integral_cong_AE) auto
   then show ?thesis
     by (subst mutual_information_distr[OF assms(1-5)]) simp
 qed
@@ -810,7 +803,7 @@
 
 lemma (in information_space)
   fixes X :: "'a \<Rightarrow> 'b"
-  assumes X: "distributed M MX X f"
+  assumes X[measurable]: "distributed M MX X f"
   shows entropy_distr: "entropy b MX X = - (\<integral>x. f x * log b (f x) \<partial>MX)" (is ?eq)
 proof -
   note D = distributed_measurable[OF X] distributed_borel_measurable[OF X] distributed_AE[OF X]
@@ -825,18 +818,15 @@
     apply auto
     done
 
-  have int_eq: "- (\<integral> x. log b (f x) \<partial>distr M MX X) = - (\<integral> x. f x * log b (f x) \<partial>MX)"
+  have int_eq: "(\<integral> x. f x * log b (f x) \<partial>MX) = (\<integral> x. log b (f x) \<partial>distr M MX X)"
     unfolding distributed_distr_eq_density[OF X]
     using D
     by (subst integral_density)
        (auto simp: borel_measurable_ereal_iff)
 
   show ?eq
-    unfolding entropy_def KL_divergence_def entropy_density_def comp_def
-    apply (subst integral_cong_AE)
-    apply (rule ae_eq)
-    apply (rule int_eq)
-    done
+    unfolding entropy_def KL_divergence_def entropy_density_def comp_def int_eq neg_equal_iff_equal
+    using ae_eq by (intro integral_cong_AE) auto
 qed
 
 lemma (in prob_space) distributed_imp_emeasure_nonzero:
@@ -861,7 +851,7 @@
 
 lemma (in information_space) entropy_le:
   fixes Px :: "'b \<Rightarrow> real" and MX :: "'b measure"
-  assumes X: "distributed M MX X Px"
+  assumes X[measurable]: "distributed M MX X Px"
   and fin: "emeasure MX {x \<in> space MX. Px x \<noteq> 0} \<noteq> \<infinity>"
   and int: "integrable MX (\<lambda>x. - Px x * log b (Px x))"
   shows "entropy b MX X \<le> log b (measure MX {x \<in> space MX. Px x \<noteq> 0})"
@@ -873,7 +863,7 @@
   have " - log b (measure MX {x \<in> space MX. Px x \<noteq> 0}) = 
     - log b (\<integral> x. indicator {x \<in> space MX. Px x \<noteq> 0} x \<partial>MX)"
     using Px fin
-    by (subst integral_indicator) (auto simp: measure_def borel_measurable_ereal_iff)
+    by (subst integral_real_indicator) (auto simp: measure_def borel_measurable_ereal_iff)
   also have "- log b (\<integral> x. indicator {x \<in> space MX. Px x \<noteq> 0} x \<partial>MX) = - log b (\<integral> x. 1 / Px x \<partial>distr M MX X)"
     unfolding distributed_distr_eq_density[OF X] using Px
     apply (intro arg_cong[where f="log b"] arg_cong[where f=uminus])
@@ -889,7 +879,7 @@
       by (intro positive_integral_cong) (auto split: split_max)
     then show "integrable (distr M MX X) (\<lambda>x. 1 / Px x)"
       unfolding distributed_distr_eq_density[OF X] using Px
-      by (auto simp: positive_integral_density integrable_def borel_measurable_ereal_iff fin positive_integral_max_0
+      by (auto simp: positive_integral_density real_integrable_def borel_measurable_ereal_iff fin positive_integral_max_0
               cong: positive_integral_cong)
     have "integrable MX (\<lambda>x. Px x * log b (1 / Px x)) =
       integrable MX (\<lambda>x. - Px x * log b (Px x))"
@@ -900,7 +890,7 @@
     then show "integrable (distr M MX X) (\<lambda>x. - log b (1 / Px x))"
       unfolding distributed_distr_eq_density[OF X]
       using Px int
-      by (subst integral_density) (auto simp: borel_measurable_ereal_iff)
+      by (subst integrable_real_density) (auto simp: borel_measurable_ereal_iff)
   qed (auto simp: minus_log_convex[OF b_gt_1])
   also have "\<dots> = (\<integral> x. log b (Px x) \<partial>distr M MX X)"
     unfolding distributed_distr_eq_density[OF X] using Px
@@ -940,11 +930,11 @@
   have eq: "(\<integral> x. indicator A x / measure MX A * log b (indicator A x / measure MX A) \<partial>MX) =
     (\<integral> x. (- log b (measure MX A) / measure MX A) * indicator A x \<partial>MX)"
     using measure_nonneg[of MX A] uniform_distributed_params[OF X]
-    by (auto intro!: integral_cong split: split_indicator simp: log_divide_eq)
+    by (intro integral_cong) (auto split: split_indicator simp: log_divide_eq)
   show "- (\<integral> x. indicator A x / measure MX A * log b (indicator A x / measure MX A) \<partial>MX) =
     log b (measure MX A)"
     unfolding eq using uniform_distributed_params[OF X]
-    by (subst lebesgue_integral_cmult) (auto simp: measure_def)
+    by (subst integral_mult_right) (auto simp: measure_def)
 qed
 
 lemma (in information_space) entropy_simple_distributed:
@@ -1068,7 +1058,7 @@
     unfolding conditional_mutual_information_def
     apply (subst mi_eq)
     apply (subst mutual_information_distr[OF S TP Px Pyz Pxyz])
-    apply (subst integral_diff(2)[symmetric])
+    apply (subst integral_diff[symmetric])
     apply (auto intro!: integral_cong_AE simp: split_beta' simp del: integral_diff)
     done
 
@@ -1104,7 +1094,7 @@
     apply auto
     done
   also have "\<dots> = (\<integral>\<^sup>+(y, z). \<integral>\<^sup>+ x. ereal (Pxz (x, z)) * ereal (Pyz (y, z) / Pz z) \<partial>S \<partial>T \<Otimes>\<^sub>M P)"
-    by (subst STP.positive_integral_snd_measurable[symmetric]) (auto simp add: split_beta')
+    by (subst STP.positive_integral_snd[symmetric]) (auto simp add: split_beta')
   also have "\<dots> = (\<integral>\<^sup>+x. ereal (Pyz x) * 1 \<partial>T \<Otimes>\<^sub>M P)"
     apply (rule positive_integral_cong_AE)
     using aeX1 aeX2 aeX3 distributed_AE[OF Pyz] AE_space
@@ -1123,7 +1113,7 @@
   also have "\<dots> < \<infinity>" by simp
   finally have fin: "(\<integral>\<^sup>+ x. ?f x \<partial>?P) \<noteq> \<infinity>" by simp
 
-  have pos: "(\<integral>\<^sup>+ x. ?f x \<partial>?P) \<noteq> 0"
+  have pos: "(\<integral>\<^sup>+x. ?f x \<partial>?P) \<noteq> 0"
     apply (subst positive_integral_density)
     apply simp
     apply (rule distributed_AE[OF Pxyz])
@@ -1131,7 +1121,7 @@
     apply (simp add: split_beta')
   proof
     let ?g = "\<lambda>x. ereal (if Pxyz x = 0 then 0 else Pxz (fst x, snd (snd x)) * Pyz (snd x) / Pz (snd (snd x)))"
-    assume "(\<integral>\<^sup>+ x. ?g x \<partial>(S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P)) = 0"
+    assume "(\<integral>\<^sup>+x. ?g x \<partial>(S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P)) = 0"
     then have "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. ?g x \<le> 0"
       by (intro positive_integral_0_iff_AE[THEN iffD1]) auto
     then have "AE x in S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P. Pxyz x = 0"
@@ -1154,19 +1144,32 @@
     done
 
   have I3: "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))))"
-    apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ integral_diff(1)[OF I1 I2]])
+    apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ integrable_diff[OF I1 I2]])
     using ae
     apply (auto simp: split_beta')
     done
 
   have "- log b 1 \<le> - log b (integral\<^sup>L ?P ?f)"
   proof (intro le_imp_neg_le log_le[OF b_gt_1])
-    show "0 < integral\<^sup>L ?P ?f"
-      using neg pos fin positive_integral_positive[of ?P ?f]
-      by (cases "(\<integral>\<^sup>+ x. ?f x \<partial>?P)") (auto simp add: lebesgue_integral_def less_le split_beta')
-    show "integral\<^sup>L ?P ?f \<le> 1"
-      using neg le1 fin positive_integral_positive[of ?P ?f]
-      by (cases "(\<integral>\<^sup>+ x. ?f x \<partial>?P)") (auto simp add: lebesgue_integral_def split_beta' one_ereal_def)
+    have If: "integrable ?P ?f"
+      unfolding real_integrable_def
+    proof (intro conjI)
+      from neg show "(\<integral>\<^sup>+ x. - ?f x \<partial>?P) \<noteq> \<infinity>"
+        by simp
+      from fin show "(\<integral>\<^sup>+ x. ?f x \<partial>?P) \<noteq> \<infinity>"
+        by simp
+    qed simp
+    then have "(\<integral>\<^sup>+ x. ?f x \<partial>?P) = (\<integral>x. ?f x \<partial>?P)"
+      apply (rule positive_integral_eq_integral)
+      apply (subst AE_density)
+      apply simp
+      using ae5 ae6 ae7 ae8
+      apply eventually_elim
+      apply auto
+      done
+    with positive_integral_positive[of ?P ?f] pos le1
+    show "0 < (\<integral>x. ?f x \<partial>?P)" "(\<integral>x. ?f x \<partial>?P) \<le> 1"
+      by (simp_all add: one_ereal_def)
   qed
   also have "- log b (integral\<^sup>L ?P ?f) \<le> (\<integral> x. - log b (?f x) \<partial>?P)"
   proof (rule P.jensens_inequality[where a=0 and b=1 and I="{0<..}"])
@@ -1175,10 +1178,10 @@
       using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE]
       by eventually_elim (auto)
     show "integrable ?P ?f"
-      unfolding integrable_def 
+      unfolding real_integrable_def 
       using fin neg by (auto simp: split_beta')
     show "integrable ?P (\<lambda>x. - log b (?f x))"
-      apply (subst integral_density)
+      apply (subst integrable_real_density)
       apply simp
       apply (auto intro!: distributed_real_AE[OF Pxyz]) []
       apply simp
@@ -1192,13 +1195,12 @@
   qed (auto simp: b_gt_1 minus_log_convex)
   also have "\<dots> = conditional_mutual_information b S T P X Y Z"
     unfolding `?eq`
-    apply (subst integral_density)
+    apply (subst integral_real_density)
     apply simp
     apply (auto intro!: distributed_real_AE[OF Pxyz]) []
     apply simp
     apply (intro integral_cong_AE)
     using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE]
-    apply eventually_elim
     apply (auto simp: log_divide_eq zero_less_mult_iff zero_less_divide_iff field_simps)
     done
   finally show ?nonneg
@@ -1316,7 +1318,7 @@
     unfolding conditional_mutual_information_def
     apply (subst mi_eq)
     apply (subst mutual_information_distr[OF S TP Px Pyz Pxyz])
-    apply (subst integral_diff(2)[symmetric])
+    apply (subst integral_diff[symmetric])
     apply (auto intro!: integral_cong_AE simp: split_beta' simp del: integral_diff)
     done
 
@@ -1349,8 +1351,7 @@
     apply auto
     done
   also have "\<dots> = (\<integral>\<^sup>+(y, z). \<integral>\<^sup>+ x. ereal (Pxz (x, z)) * ereal (Pyz (y, z) / Pz z) \<partial>S \<partial>T \<Otimes>\<^sub>M P)"
-    by (subst STP.positive_integral_snd_measurable[symmetric])
-       (auto simp add: split_beta')
+    by (subst STP.positive_integral_snd[symmetric]) (auto simp add: split_beta')
   also have "\<dots> = (\<integral>\<^sup>+x. ereal (Pyz x) * 1 \<partial>T \<Otimes>\<^sub>M P)"
     apply (rule positive_integral_cong_AE)
     using aeX1 aeX2 aeX3 distributed_AE[OF Pyz] AE_space
@@ -1399,19 +1400,32 @@
     done
 
   have I3: "integrable (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))))"
-    apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ integral_diff(1)[OF I1 I2]])
+    apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ integrable_diff[OF I1 I2]])
     using ae
     apply (auto simp: split_beta')
     done
 
   have "- log b 1 \<le> - log b (integral\<^sup>L ?P ?f)"
   proof (intro le_imp_neg_le log_le[OF b_gt_1])
-    show "0 < integral\<^sup>L ?P ?f"
-      using neg pos fin positive_integral_positive[of ?P ?f]
-      by (cases "(\<integral>\<^sup>+ x. ?f x \<partial>?P)") (auto simp add: lebesgue_integral_def less_le split_beta')
-    show "integral\<^sup>L ?P ?f \<le> 1"
-      using neg le1 fin positive_integral_positive[of ?P ?f]
-      by (cases "(\<integral>\<^sup>+ x. ?f x \<partial>?P)") (auto simp add: lebesgue_integral_def split_beta' one_ereal_def)
+    have If: "integrable ?P ?f"
+      unfolding real_integrable_def
+    proof (intro conjI)
+      from neg show "(\<integral>\<^sup>+ x. - ?f x \<partial>?P) \<noteq> \<infinity>"
+        by simp
+      from fin show "(\<integral>\<^sup>+ x. ?f x \<partial>?P) \<noteq> \<infinity>"
+        by simp
+    qed simp
+    then have "(\<integral>\<^sup>+ x. ?f x \<partial>?P) = (\<integral>x. ?f x \<partial>?P)"
+      apply (rule positive_integral_eq_integral)
+      apply (subst AE_density)
+      apply simp
+      using ae5 ae6 ae7 ae8
+      apply eventually_elim
+      apply auto
+      done
+    with positive_integral_positive[of ?P ?f] pos le1
+    show "0 < (\<integral>x. ?f x \<partial>?P)" "(\<integral>x. ?f x \<partial>?P) \<le> 1"
+      by (simp_all add: one_ereal_def)
   qed
   also have "- log b (integral\<^sup>L ?P ?f) \<le> (\<integral> x. - log b (?f x) \<partial>?P)"
   proof (rule P.jensens_inequality[where a=0 and b=1 and I="{0<..}"])
@@ -1420,10 +1434,10 @@
       using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE]
       by eventually_elim (auto)
     show "integrable ?P ?f"
-      unfolding integrable_def
+      unfolding real_integrable_def
       using fin neg by (auto simp: split_beta')
     show "integrable ?P (\<lambda>x. - log b (?f x))"
-      apply (subst integral_density)
+      apply (subst integrable_real_density)
       apply simp
       apply (auto intro!: distributed_real_AE[OF Pxyz]) []
       apply simp
@@ -1437,13 +1451,12 @@
   qed (auto simp: b_gt_1 minus_log_convex)
   also have "\<dots> = conditional_mutual_information b S T P X Y Z"
     unfolding `?eq`
-    apply (subst integral_density)
+    apply (subst integral_real_density)
     apply simp
     apply (auto intro!: distributed_real_AE[OF Pxyz]) []
     apply simp
     apply (intro integral_cong_AE)
     using ae1 ae2 ae3 ae4 ae5 ae6 ae7 ae8 Pxyz[THEN distributed_real_AE]
-    apply eventually_elim
     apply (auto simp: log_divide_eq zero_less_mult_iff zero_less_divide_iff field_simps)
     done
   finally show ?nonneg
@@ -1532,7 +1545,7 @@
   "\<H>(X | Y) \<equiv> conditional_entropy b (count_space (X`space M)) (count_space (Y`space M)) X Y"
 
 lemma (in information_space) conditional_entropy_generic_eq:
-  fixes Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real"
+  fixes Pxy :: "_ \<Rightarrow> real" and Py :: "'c \<Rightarrow> real"
   assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T"
   assumes Py[measurable]: "distributed M T Y Py"
   assumes Pxy[measurable]: "distributed M (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x)) Pxy"
@@ -1552,19 +1565,15 @@
     unfolding AE_density[OF distributed_borel_measurable, OF Pxy]
     unfolding distributed_distr_eq_density[OF Py]
     apply (rule ST.AE_pair_measure)
-    apply (auto intro!: sets.sets_Collect borel_measurable_eq measurable_compose[OF _ distributed_real_measurable[OF Py]]
-                        distributed_real_measurable[OF Pxy] distributed_real_AE[OF Py]
-                        borel_measurable_real_of_ereal measurable_compose[OF _ borel_measurable_RN_deriv_density])
+    apply auto
     using distributed_RN_deriv[OF Py]
     apply auto
     done    
   ultimately
   have "conditional_entropy b S T X Y = - (\<integral>x. Pxy x * log b (Pxy x / Py (snd x)) \<partial>(S \<Otimes>\<^sub>M T))"
     unfolding conditional_entropy_def neg_equal_iff_equal
-    apply (subst integral_density(1)[symmetric])
-    apply (auto simp: distributed_real_measurable[OF Pxy] distributed_real_AE[OF Pxy]
-                      measurable_compose[OF _ distributed_real_measurable[OF Py]]
-                      distributed_distr_eq_density[OF Pxy]
+    apply (subst integral_real_density[symmetric])
+    apply (auto simp: distributed_real_AE[OF Pxy] distributed_distr_eq_density[OF Pxy]
                 intro!: integral_cong_AE)
     done
   then show ?thesis by (simp add: split_beta')
@@ -1573,8 +1582,8 @@
 lemma (in information_space) conditional_entropy_eq_entropy:
   fixes Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real"
   assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T"
-  assumes Py: "distributed M T Y Py"
-  assumes Pxy: "distributed M (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x)) Pxy"
+  assumes Py[measurable]: "distributed M T Y Py"
+  assumes Pxy[measurable]: "distributed M (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x)) Pxy"
   assumes I1: "integrable (S \<Otimes>\<^sub>M T) (\<lambda>x. Pxy x * log b (Pxy x))"
   assumes I2: "integrable (S \<Otimes>\<^sub>M T) (\<lambda>x. Pxy x * log b (Py (snd x)))"
   shows "conditional_entropy b S T X Y = entropy b (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x)) - entropy b T Y"
@@ -1606,7 +1615,6 @@
     unfolding conditional_entropy_generic_eq[OF S T Py Pxy] neg_equal_iff_equal
     apply (intro integral_cong_AE)
     using ae
-    apply eventually_elim
     apply auto
     done
   also have "\<dots> = - (\<integral>x. Pxy x * log b (Pxy x) \<partial>(S \<Otimes>\<^sub>M T)) - - (\<integral>x.  Pxy x * log b (Py (snd x)) \<partial>(S \<Otimes>\<^sub>M T))"
@@ -1702,8 +1710,8 @@
 lemma (in information_space) mutual_information_eq_entropy_conditional_entropy_distr:
   fixes Px :: "'b \<Rightarrow> real" and Py :: "'c \<Rightarrow> real" and Pxy :: "('b \<times> 'c) \<Rightarrow> real"
   assumes S: "sigma_finite_measure S" and T: "sigma_finite_measure T"
-  assumes Px: "distributed M S X Px" and Py: "distributed M T Y Py"
-  assumes Pxy: "distributed M (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x)) Pxy"
+  assumes Px[measurable]: "distributed M S X Px" and Py[measurable]: "distributed M T Y Py"
+  assumes Pxy[measurable]: "distributed M (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x)) Pxy"
   assumes Ix: "integrable(S \<Otimes>\<^sub>M T) (\<lambda>x. Pxy x * log b (Px (fst x)))"
   assumes Iy: "integrable(S \<Otimes>\<^sub>M T) (\<lambda>x. Pxy x * log b (Py (snd x)))"
   assumes Ixy: "integrable(S \<Otimes>\<^sub>M T) (\<lambda>x. Pxy x * log b (Pxy x))"
@@ -1758,13 +1766,13 @@
   have "entropy b S X + entropy b T Y - entropy b (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x)) = integral\<^sup>L (S \<Otimes>\<^sub>M T) ?f"
     unfolding X Y XY
     apply (subst integral_diff)
-    apply (intro integral_diff Ixy Ix Iy)+
+    apply (intro integrable_diff Ixy Ix Iy)+
     apply (subst integral_diff)
-    apply (intro integral_diff Ixy Ix Iy)+
+    apply (intro Ixy Ix Iy)+
     apply (simp add: field_simps)
     done
   also have "\<dots> = integral\<^sup>L (S \<Otimes>\<^sub>M T) ?g"
-    using `AE x in _. ?f x = ?g x` by (rule integral_cong_AE)
+    using `AE x in _. ?f x = ?g x` by (intro integral_cong_AE) auto
   also have "\<dots> = mutual_information b S T X Y"
     by (rule mutual_information_distr[OF S T Px Py Pxy, symmetric])
   finally show ?thesis ..