lemma: independence is equal to mutual information = 0
authorhoelzl
Thu Jun 09 14:04:34 2011 +0200 (2011-06-09)
changeset 4334060e181c4eae4
parent 43339 9ba256ad6781
child 43341 acdac535c7fa
lemma: independence is equal to mutual information = 0
src/HOL/Probability/Independent_Family.thy
src/HOL/Probability/Information.thy
src/HOL/Probability/Probability_Measure.thy
src/HOL/Probability/Radon_Nikodym.thy
     1.1 --- a/src/HOL/Probability/Independent_Family.thy	Thu Jun 09 13:55:11 2011 +0200
     1.2 +++ b/src/HOL/Probability/Independent_Family.thy	Thu Jun 09 14:04:34 2011 +0200
     1.3 @@ -117,6 +117,16 @@
     1.4    using indep[unfolded indep_set_def, THEN indep_setsD, of UNIV "bool_case a b"] ev
     1.5    by (simp add: ac_simps UNIV_bool)
     1.6  
     1.7 +lemma (in prob_space) indep_var_eq:
     1.8 +  "indep_var S X T Y \<longleftrightarrow>
     1.9 +    (random_variable S X \<and> random_variable T Y) \<and>
    1.10 +    indep_set
    1.11 +      (sigma_sets (space M) { X -` A \<inter> space M | A. A \<in> sets S})
    1.12 +      (sigma_sets (space M) { Y -` A \<inter> space M | A. A \<in> sets T})"
    1.13 +  unfolding indep_var_def indep_vars_def indep_set_def UNIV_bool
    1.14 +  by (intro arg_cong2[where f="op \<and>"] arg_cong2[where f=indep_sets] ext)
    1.15 +     (auto split: bool.split)
    1.16 +
    1.17  lemma (in prob_space)
    1.18    assumes indep: "indep_set A B"
    1.19    shows indep_setD_ev1: "A \<subseteq> events"
    1.20 @@ -491,7 +501,7 @@
    1.21  proof (simp add: sigma_algebra_iff2, safe)
    1.22    let ?A = "(\<Inter>n. sigma_sets (space M) (UNION {n..} A))"
    1.23    interpret A: sigma_algebra "\<lparr>space = space M, sets = A i\<rparr>" for i by fact
    1.24 -  { fix X x assume "X \<in> ?A" "x \<in> X" 
    1.25 +  { fix X x assume "X \<in> ?A" "x \<in> X"
    1.26      then have "\<And>n. X \<in> sigma_sets (space M) (UNION {n..} A)" by auto
    1.27      from this[of 0] have "X \<in> sigma_sets (space M) (UNION UNIV A)" by simp
    1.28      then have "X \<subseteq> space M"
    1.29 @@ -572,7 +582,7 @@
    1.30        show "Int_stable \<lparr>space = space M, sets = A m\<rparr>"
    1.31          unfolding Int_stable_def using A.Int by auto
    1.32      qed
    1.33 -    also have "(\<lambda>b. sigma_sets (space M) (\<Union>m\<in>bool_case {..n} {Suc n..} b. A m)) = 
    1.34 +    also have "(\<lambda>b. sigma_sets (space M) (\<Union>m\<in>bool_case {..n} {Suc n..} b. A m)) =
    1.35        bool_case (sigma_sets (space M) (\<Union>m\<in>{..n}. A m)) (sigma_sets (space M) (\<Union>m\<in>{Suc n..}. A m))"
    1.36        by (auto intro!: ext split: bool.split)
    1.37      finally have indep: "indep_set (sigma_sets (space M) (\<Union>m\<in>{..n}. A m)) (sigma_sets (space M) (\<Union>m\<in>{Suc n..}. A m))"
    1.38 @@ -732,7 +742,7 @@
    1.39          by (auto simp del: vimage_Int intro!: exI[of _ "A \<inter> B"] dest: Int_stableD)
    1.40      qed }
    1.41    note indep_sets_sigma_sets_iff[OF this, simp]
    1.42 - 
    1.43 +
    1.44    { fix i assume "i \<in> I"
    1.45      { fix A assume "A \<in> sets (M' i)"
    1.46        then have "A \<in> sets (sigma (M' i))" by (auto simp: sets_sigma intro: sigma_sets.Basic)
    1.47 @@ -745,7 +755,7 @@
    1.48        "space M \<in> {X i -` A \<inter> space M |A. A \<in> sets (M' i)}"
    1.49        by (auto intro!: exI[of _ "space (M' i)"]) }
    1.50    note indep_sets_finite[OF I this, simp]
    1.51 -  
    1.52 +
    1.53    have "(\<forall>A\<in>\<Pi> i\<in>I. {X i -` A \<inter> space M |A. A \<in> sets (M' i)}. prob (INTER I A) = (\<Prod>j\<in>I. prob (A j))) =
    1.54      (\<forall>A\<in>\<Pi> i\<in>I. sets (M' i). prob ((\<Inter>j\<in>I. X j -` A j) \<inter> space M) = (\<Prod>x\<in>I. prob (X x -` A x \<inter> space M)))"
    1.55      (is "?L = ?R")
    1.56 @@ -847,7 +857,7 @@
    1.57          by (simp_all add: product_algebra_def)
    1.58        show "A \<in> sets (sigma P.G)"
    1.59          using `A \<in> sets P.P` by (simp add: product_algebra_def)
    1.60 -    
    1.61 +
    1.62        fix E assume E: "E \<in> sets P.G"
    1.63        then have "E \<in> sets P.P"
    1.64          by (simp add: sets_sigma sigma_sets.Basic product_algebra_def)
    1.65 @@ -915,10 +925,67 @@
    1.66    finally show ?thesis .
    1.67  qed
    1.68  
    1.69 +lemma (in prob_space)
    1.70 +  assumes "indep_var S X T Y"
    1.71 +  shows indep_var_rv1: "random_variable S X"
    1.72 +    and indep_var_rv2: "random_variable T Y"
    1.73 +proof -
    1.74 +  have "\<forall>i\<in>UNIV. random_variable (bool_case S T i) (bool_case X Y i)"
    1.75 +    using assms unfolding indep_var_def indep_vars_def by auto
    1.76 +  then show "random_variable S X" "random_variable T Y"
    1.77 +    unfolding UNIV_bool by auto
    1.78 +qed
    1.79 +
    1.80  lemma (in prob_space) indep_var_distributionD:
    1.81 -  assumes "indep_var Ma A Mb B"
    1.82 -  assumes "Xa \<in> sets Ma" "Xb \<in> sets Mb"
    1.83 -  shows "joint_distribution A B (Xa \<times> Xb) = distribution A Xa * distribution B Xb"
    1.84 -  unfolding distribution_def using assms by (rule indep_varD)
    1.85 +  assumes indep: "indep_var S X T Y"
    1.86 +  defines "P \<equiv> S\<lparr>measure := extreal\<circ>distribution X\<rparr> \<Otimes>\<^isub>M T\<lparr>measure := extreal\<circ>distribution Y\<rparr>"
    1.87 +  assumes "A \<in> sets P"
    1.88 +  shows "joint_distribution X Y A = finite_measure.\<mu>' P A"
    1.89 +proof -
    1.90 +  from indep have rvs: "random_variable S X" "random_variable T Y"
    1.91 +    by (blast dest: indep_var_rv1 indep_var_rv2)+
    1.92 +
    1.93 +  let ?S = "S\<lparr>measure := extreal\<circ>distribution X\<rparr>"
    1.94 +  let ?T = "T\<lparr>measure := extreal\<circ>distribution Y\<rparr>"
    1.95 +  interpret X: prob_space ?S by (rule distribution_prob_space) fact
    1.96 +  interpret Y: prob_space ?T by (rule distribution_prob_space) fact
    1.97 +  interpret XY: pair_prob_space ?S ?T by default
    1.98 +
    1.99 +  let ?J = "XY.P\<lparr> measure := extreal \<circ> joint_distribution X Y \<rparr>"
   1.100 +  interpret J: prob_space ?J
   1.101 +    by (rule joint_distribution_prob_space) (simp_all add: rvs)
   1.102 +
   1.103 +  have "finite_measure.\<mu>' (XY.P\<lparr> measure := extreal \<circ> joint_distribution X Y \<rparr>) A = XY.\<mu>' A"
   1.104 +  proof (rule prob_space_unique_Int_stable)
   1.105 +    show "Int_stable (pair_measure_generator ?S ?T)" (is "Int_stable ?P")
   1.106 +      by fact
   1.107 +    show "space ?P \<in> sets ?P"
   1.108 +      unfolding space_pair_measure[simplified pair_measure_def space_sigma]
   1.109 +      using X.top Y.top by (auto intro!: pair_measure_generatorI)
   1.110 +
   1.111 +    show "prob_space ?J" by default
   1.112 +    show "space ?J = space ?P"
   1.113 +      by (simp add: pair_measure_generator_def space_pair_measure)
   1.114 +    show "sets ?J = sets (sigma ?P)"
   1.115 +      by (simp add: pair_measure_def)
   1.116 +
   1.117 +    show "prob_space XY.P" by default
   1.118 +    show "space XY.P = space ?P" "sets XY.P = sets (sigma ?P)"
   1.119 +      by (simp_all add: pair_measure_generator_def pair_measure_def)
   1.120 +
   1.121 +    show "A \<in> sets (sigma ?P)"
   1.122 +      using `A \<in> sets P` unfolding P_def pair_measure_def by simp
   1.123 +
   1.124 +    fix X assume "X \<in> sets ?P"
   1.125 +    then obtain A B where "A \<in> sets S" "B \<in> sets T" "X = A \<times> B"
   1.126 +      by (auto simp: sets_pair_measure_generator)
   1.127 +    then show "J.\<mu>' X = XY.\<mu>' X"
   1.128 +      unfolding J.\<mu>'_def XY.\<mu>'_def using indep
   1.129 +      by (simp add: XY.pair_measure_times)
   1.130 +         (simp add: distribution_def indep_varD)
   1.131 +  qed
   1.132 +  then show ?thesis
   1.133 +    using `A \<in> sets P` unfolding P_def J.\<mu>'_def XY.\<mu>'_def by simp
   1.134 +qed
   1.135  
   1.136  end
     2.1 --- a/src/HOL/Probability/Information.thy	Thu Jun 09 13:55:11 2011 +0200
     2.2 +++ b/src/HOL/Probability/Information.thy	Thu Jun 09 14:04:34 2011 +0200
     2.3 @@ -7,14 +7,10 @@
     2.4  
     2.5  theory Information
     2.6  imports
     2.7 -  Probability_Measure
     2.8 +  Independent_Family
     2.9    "~~/src/HOL/Library/Convex"
    2.10  begin
    2.11  
    2.12 -lemma (in prob_space) not_zero_less_distribution[simp]:
    2.13 -  "(\<not> 0 < distribution X A) \<longleftrightarrow> distribution X A = 0"
    2.14 -  using distribution_positive[of X A] by arith
    2.15 -
    2.16  lemma log_le: "1 < a \<Longrightarrow> 0 < x \<Longrightarrow> x \<le> y \<Longrightarrow> log a x \<le> log a y"
    2.17    by (subst log_le_cancel_iff) auto
    2.18  
    2.19 @@ -175,7 +171,211 @@
    2.20  Kullback$-$Leibler distance. *}
    2.21  
    2.22  definition
    2.23 -  "KL_divergence b M \<nu> = \<integral>x. log b (real (RN_deriv M \<nu> x)) \<partial>M\<lparr>measure := \<nu>\<rparr>"
    2.24 +  "entropy_density b M \<nu> = log b \<circ> real \<circ> RN_deriv M \<nu>"
    2.25 +
    2.26 +definition
    2.27 +  "KL_divergence b M \<nu> = integral\<^isup>L (M\<lparr>measure := \<nu>\<rparr>) (entropy_density b M \<nu>)"
    2.28 +
    2.29 +lemma (in information_space) measurable_entropy_density:
    2.30 +  assumes ps: "prob_space (M\<lparr>measure := \<nu>\<rparr>)"
    2.31 +  assumes ac: "absolutely_continuous \<nu>"
    2.32 +  shows "entropy_density b M \<nu> \<in> borel_measurable M"
    2.33 +proof -
    2.34 +  interpret \<nu>: prob_space "M\<lparr>measure := \<nu>\<rparr>" by fact
    2.35 +  have "measure_space (M\<lparr>measure := \<nu>\<rparr>)" by fact
    2.36 +  from RN_deriv[OF this ac] b_gt_1 show ?thesis
    2.37 +    unfolding entropy_density_def
    2.38 +    by (intro measurable_comp) auto
    2.39 +qed
    2.40 +
    2.41 +lemma (in information_space) KL_gt_0:
    2.42 +  assumes ps: "prob_space (M\<lparr>measure := \<nu>\<rparr>)"
    2.43 +  assumes ac: "absolutely_continuous \<nu>"
    2.44 +  assumes int: "integrable (M\<lparr> measure := \<nu> \<rparr>) (entropy_density b M \<nu>)"
    2.45 +  assumes A: "A \<in> sets M" "\<nu> A \<noteq> \<mu> A"
    2.46 +  shows "0 < KL_divergence b M \<nu>"
    2.47 +proof -
    2.48 +  interpret \<nu>: prob_space "M\<lparr>measure := \<nu>\<rparr>" by fact
    2.49 +  have ms: "measure_space (M\<lparr>measure := \<nu>\<rparr>)" by default
    2.50 +  have fms: "finite_measure (M\<lparr>measure := \<nu>\<rparr>)" by default
    2.51 +  note RN = RN_deriv[OF ms ac]
    2.52 +
    2.53 +  from real_RN_deriv[OF fms ac] guess D . note D = this
    2.54 +  with absolutely_continuous_AE[OF ms] ac
    2.55 +  have D\<nu>: "AE x in M\<lparr>measure := \<nu>\<rparr>. RN_deriv M \<nu> x = extreal (D x)"
    2.56 +    by auto
    2.57 +
    2.58 +  def f \<equiv> "\<lambda>x. if D x = 0 then 1 else 1 / D x"
    2.59 +  with D have f_borel: "f \<in> borel_measurable M"
    2.60 +    by (auto intro!: measurable_If)
    2.61 +
    2.62 +  have "KL_divergence b M \<nu> = 1 / ln b * (\<integral> x. ln b * entropy_density b M \<nu> x \<partial>M\<lparr>measure := \<nu>\<rparr>)"
    2.63 +    unfolding KL_divergence_def using int b_gt_1
    2.64 +    by (simp add: integral_cmult)
    2.65 +
    2.66 +  { fix A assume "A \<in> sets M"
    2.67 +    with RN D have "\<nu>.\<mu> A = (\<integral>\<^isup>+ x. extreal (D x) * indicator A x \<partial>M)"
    2.68 +      by (auto intro!: positive_integral_cong_AE) }
    2.69 +  note D_density = this
    2.70 +
    2.71 +  have ln_entropy: "(\<lambda>x. ln b * entropy_density b M \<nu> x) \<in> borel_measurable M"
    2.72 +    using measurable_entropy_density[OF ps ac] by auto
    2.73 +
    2.74 +  have "integrable (M\<lparr>measure := \<nu>\<rparr>) (\<lambda>x. ln b * entropy_density b M \<nu> x)"
    2.75 +    using int by auto
    2.76 +  moreover have "integrable (M\<lparr>measure := \<nu>\<rparr>) (\<lambda>x. ln b * entropy_density b M \<nu> x) \<longleftrightarrow>
    2.77 +      integrable M (\<lambda>x. D x * (ln b * entropy_density b M \<nu> x))"
    2.78 +    using D D_density ln_entropy
    2.79 +    by (intro integral_translated_density) auto
    2.80 +  ultimately have M_int: "integrable M (\<lambda>x. D x * (ln b * entropy_density b M \<nu> x))"
    2.81 +    by simp
    2.82 +
    2.83 +  have D_neg: "(\<integral>\<^isup>+ x. extreal (- D x) \<partial>M) = 0"
    2.84 +    using D by (subst positive_integral_0_iff_AE) auto
    2.85 +
    2.86 +  have "(\<integral>\<^isup>+ x. extreal (D x) \<partial>M) = \<nu> (space M)"
    2.87 +    using RN D by (auto intro!: positive_integral_cong_AE)
    2.88 +  then have D_pos: "(\<integral>\<^isup>+ x. extreal (D x) \<partial>M) = 1"
    2.89 +    using \<nu>.measure_space_1 by simp
    2.90 +
    2.91 +  have "integrable M D"
    2.92 +    using D_pos D_neg D by (auto simp: integrable_def)
    2.93 +
    2.94 +  have "integral\<^isup>L M D = 1"
    2.95 +    using D_pos D_neg by (auto simp: lebesgue_integral_def)
    2.96 +
    2.97 +  let ?D_set = "{x\<in>space M. D x \<noteq> 0}"
    2.98 +  have [simp, intro]: "?D_set \<in> sets M"
    2.99 +    using D by (auto intro: sets_Collect)
   2.100 +
   2.101 +  have "0 \<le> 1 - \<mu>' ?D_set"
   2.102 +    using prob_le_1 by (auto simp: field_simps)
   2.103 +  also have "\<dots> = (\<integral> x. D x - indicator ?D_set x \<partial>M)"
   2.104 +    using `integrable M D` `integral\<^isup>L M D = 1`
   2.105 +    by (simp add: \<mu>'_def)
   2.106 +  also have "\<dots> < (\<integral> x. D x * (ln b * entropy_density b M \<nu> x) \<partial>M)"
   2.107 +  proof (rule integral_less_AE)
   2.108 +    show "integrable M (\<lambda>x. D x - indicator ?D_set x)"
   2.109 +      using `integrable M D`
   2.110 +      by (intro integral_diff integral_indicator) auto
   2.111 +  next
   2.112 +    show "integrable M (\<lambda>x. D x * (ln b * entropy_density b M \<nu> x))"
   2.113 +      by fact
   2.114 +  next
   2.115 +    show "\<mu> {x\<in>space M. D x \<noteq> 1 \<and> D x \<noteq> 0} \<noteq> 0"
   2.116 +    proof
   2.117 +      assume eq_0: "\<mu> {x\<in>space M. D x \<noteq> 1 \<and> D x \<noteq> 0} = 0"
   2.118 +      then have disj: "AE x. D x = 1 \<or> D x = 0"
   2.119 +        using D(1) by (auto intro!: AE_I[OF subset_refl] sets_Collect)
   2.120 +
   2.121 +      have "\<mu> {x\<in>space M. D x = 1} = (\<integral>\<^isup>+ x. indicator {x\<in>space M. D x = 1} x \<partial>M)"
   2.122 +        using D(1) by auto
   2.123 +      also have "\<dots> = (\<integral>\<^isup>+ x. extreal (D x) * indicator {x\<in>space M. D x \<noteq> 0} x \<partial>M)"
   2.124 +        using disj by (auto intro!: positive_integral_cong_AE simp: indicator_def one_extreal_def)
   2.125 +      also have "\<dots> = \<nu> {x\<in>space M. D x \<noteq> 0}"
   2.126 +        using D(1) D_density by auto
   2.127 +      also have "\<dots> = \<nu> (space M)"
   2.128 +        using D_density D(1) by (auto intro!: positive_integral_cong simp: indicator_def)
   2.129 +      finally have "AE x. D x = 1"
   2.130 +        using D(1) \<nu>.measure_space_1 by (intro AE_I_eq_1) auto
   2.131 +      then have "(\<integral>\<^isup>+x. indicator A x\<partial>M) = (\<integral>\<^isup>+x. extreal (D x) * indicator A x\<partial>M)"
   2.132 +        by (intro positive_integral_cong_AE) (auto simp: one_extreal_def[symmetric])
   2.133 +      also have "\<dots> = \<nu> A"
   2.134 +        using `A \<in> sets M` D_density by simp
   2.135 +      finally show False using `A \<in> sets M` `\<nu> A \<noteq> \<mu> A` by simp
   2.136 +    qed
   2.137 +    show "{x\<in>space M. D x \<noteq> 1 \<and> D x \<noteq> 0} \<in> sets M"
   2.138 +      using D(1) by (auto intro: sets_Collect)
   2.139 +
   2.140 +    show "AE t. t \<in> {x\<in>space M. D x \<noteq> 1 \<and> D x \<noteq> 0} \<longrightarrow>
   2.141 +      D t - indicator ?D_set t \<noteq> D t * (ln b * entropy_density b M \<nu> t)"
   2.142 +      using D(2)
   2.143 +    proof (elim AE_mp, safe intro!: AE_I2)
   2.144 +      fix t assume Dt: "t \<in> space M" "D t \<noteq> 1" "D t \<noteq> 0"
   2.145 +        and RN: "RN_deriv M \<nu> t = extreal (D t)"
   2.146 +        and eq: "D t - indicator ?D_set t = D t * (ln b * entropy_density b M \<nu> t)"
   2.147 +
   2.148 +      have "D t - 1 = D t - indicator ?D_set t"
   2.149 +        using Dt by simp
   2.150 +      also note eq
   2.151 +      also have "D t * (ln b * entropy_density b M \<nu> t) = - D t * ln (1 / D t)"
   2.152 +        using RN b_gt_1 `D t \<noteq> 0` `0 \<le> D t`
   2.153 +        by (simp add: entropy_density_def log_def ln_div less_le)
   2.154 +      finally have "ln (1 / D t) = 1 / D t - 1"
   2.155 +        using `D t \<noteq> 0` by (auto simp: field_simps)
   2.156 +      from ln_eq_minus_one[OF _ this] `D t \<noteq> 0` `0 \<le> D t` `D t \<noteq> 1`
   2.157 +      show False by auto
   2.158 +    qed
   2.159 +
   2.160 +    show "AE t. D t - indicator ?D_set t \<le> D t * (ln b * entropy_density b M \<nu> t)"
   2.161 +      using D(2)
   2.162 +    proof (elim AE_mp, intro AE_I2 impI)
   2.163 +      fix t assume "t \<in> space M" and RN: "RN_deriv M \<nu> t = extreal (D t)"
   2.164 +      show "D t - indicator ?D_set t \<le> D t * (ln b * entropy_density b M \<nu> t)"
   2.165 +      proof cases
   2.166 +        assume asm: "D t \<noteq> 0"
   2.167 +        then have "0 < D t" using `0 \<le> D t` by auto
   2.168 +        then have "0 < 1 / D t" by auto
   2.169 +        have "D t - indicator ?D_set t \<le> - D t * (1 / D t - 1)"
   2.170 +          using asm `t \<in> space M` by (simp add: field_simps)
   2.171 +        also have "- D t * (1 / D t - 1) \<le> - D t * ln (1 / D t)"
   2.172 +          using ln_le_minus_one `0 < 1 / D t` by (intro mult_left_mono_neg) auto
   2.173 +        also have "\<dots> = D t * (ln b * entropy_density b M \<nu> t)"
   2.174 +          using `0 < D t` RN b_gt_1
   2.175 +          by (simp_all add: log_def ln_div entropy_density_def)
   2.176 +        finally show ?thesis by simp
   2.177 +      qed simp
   2.178 +    qed
   2.179 +  qed
   2.180 +  also have "\<dots> = (\<integral> x. ln b * entropy_density b M \<nu> x \<partial>M\<lparr>measure := \<nu>\<rparr>)"
   2.181 +    using D D_density ln_entropy
   2.182 +    by (intro integral_translated_density[symmetric]) auto
   2.183 +  also have "\<dots> = ln b * (\<integral> x. entropy_density b M \<nu> x \<partial>M\<lparr>measure := \<nu>\<rparr>)"
   2.184 +    using int by (rule \<nu>.integral_cmult)
   2.185 +  finally show "0 < KL_divergence b M \<nu>"
   2.186 +    using b_gt_1 by (auto simp: KL_divergence_def zero_less_mult_iff)
   2.187 +qed
   2.188 +
   2.189 +lemma (in sigma_finite_measure) KL_eq_0:
   2.190 +  assumes eq: "\<forall>A\<in>sets M. \<nu> A = measure M A"
   2.191 +  shows "KL_divergence b M \<nu> = 0"
   2.192 +proof -
   2.193 +  have "AE x. 1 = RN_deriv M \<nu> x"
   2.194 +  proof (rule RN_deriv_unique)
   2.195 +    show "measure_space (M\<lparr>measure := \<nu>\<rparr>)"
   2.196 +      using eq by (intro measure_space_cong) auto
   2.197 +    show "absolutely_continuous \<nu>"
   2.198 +      unfolding absolutely_continuous_def using eq by auto
   2.199 +    show "(\<lambda>x. 1) \<in> borel_measurable M" "AE x. 0 \<le> (1 :: extreal)" by auto
   2.200 +    fix A assume "A \<in> sets M"
   2.201 +    with eq show "\<nu> A = \<integral>\<^isup>+ x. 1 * indicator A x \<partial>M" by simp
   2.202 +  qed
   2.203 +  then have "AE x. log b (real (RN_deriv M \<nu> x)) = 0"
   2.204 +    by (elim AE_mp) simp
   2.205 +  from integral_cong_AE[OF this]
   2.206 +  have "integral\<^isup>L M (entropy_density b M \<nu>) = 0"
   2.207 +    by (simp add: entropy_density_def comp_def)
   2.208 +  with eq show "KL_divergence b M \<nu> = 0"
   2.209 +    unfolding KL_divergence_def
   2.210 +    by (subst integral_cong_measure) auto
   2.211 +qed
   2.212 +
   2.213 +lemma (in information_space) KL_eq_0_imp:
   2.214 +  assumes ps: "prob_space (M\<lparr>measure := \<nu>\<rparr>)"
   2.215 +  assumes ac: "absolutely_continuous \<nu>"
   2.216 +  assumes int: "integrable (M\<lparr> measure := \<nu> \<rparr>) (entropy_density b M \<nu>)"
   2.217 +  assumes KL: "KL_divergence b M \<nu> = 0"
   2.218 +  shows "\<forall>A\<in>sets M. \<nu> A = \<mu> A"
   2.219 +  by (metis less_imp_neq KL_gt_0 assms)
   2.220 +
   2.221 +lemma (in information_space) KL_ge_0:
   2.222 +  assumes ps: "prob_space (M\<lparr>measure := \<nu>\<rparr>)"
   2.223 +  assumes ac: "absolutely_continuous \<nu>"
   2.224 +  assumes int: "integrable (M\<lparr> measure := \<nu> \<rparr>) (entropy_density b M \<nu>)"
   2.225 +  shows "0 \<le> KL_divergence b M \<nu>"
   2.226 +  using KL_eq_0 KL_gt_0[OF ps ac int]
   2.227 +  by (cases "\<forall>A\<in>sets M. \<nu> A = measure M A") (auto simp: le_less)
   2.228 +
   2.229  
   2.230  lemma (in sigma_finite_measure) KL_divergence_vimage:
   2.231    assumes T: "T \<in> measure_preserving M M'"
   2.232 @@ -209,7 +409,7 @@
   2.233    have AE: "AE x. RN_deriv M' \<nu>' (T x) = RN_deriv M \<nu> x"
   2.234      by (rule RN_deriv_vimage[OF T T' inv \<nu>'])
   2.235    show ?thesis
   2.236 -    unfolding KL_divergence_def
   2.237 +    unfolding KL_divergence_def entropy_density_def comp_def
   2.238    proof (subst \<nu>'.integral_vimage[OF sa T'])
   2.239      show "(\<lambda>x. log b (real (RN_deriv M \<nu> x))) \<in> borel_measurable (M\<lparr>measure := \<nu>\<rparr>)"
   2.240        by (auto intro!: RN_deriv[OF M ac] borel_measurable_log[OF _ `1 < b`])
   2.241 @@ -233,9 +433,9 @@
   2.242  proof -
   2.243    interpret \<nu>: measure_space ?\<nu> by fact
   2.244    have "KL_divergence b M \<nu> = \<integral>x. log b (real (RN_deriv N \<nu>' x)) \<partial>?\<nu>"
   2.245 -    by (simp cong: RN_deriv_cong \<nu>.integral_cong add: KL_divergence_def)
   2.246 +    by (simp cong: RN_deriv_cong \<nu>.integral_cong add: KL_divergence_def entropy_density_def)
   2.247    also have "\<dots> = KL_divergence b N \<nu>'"
   2.248 -    by (auto intro!: \<nu>.integral_cong_measure[symmetric] simp: KL_divergence_def)
   2.249 +    by (auto intro!: \<nu>.integral_cong_measure[symmetric] simp: KL_divergence_def entropy_density_def comp_def)
   2.250    finally show ?thesis .
   2.251  qed
   2.252  
   2.253 @@ -243,7 +443,7 @@
   2.254    assumes v: "finite_measure_space (M\<lparr>measure := \<nu>\<rparr>)"
   2.255    assumes ac: "absolutely_continuous \<nu>"
   2.256    shows "KL_divergence b M \<nu> = (\<Sum>x\<in>space M. real (\<nu> {x}) * log b (real (\<nu> {x}) / real (\<mu> {x})))" (is "_ = ?sum")
   2.257 -proof (simp add: KL_divergence_def finite_measure_space.integral_finite_singleton[OF v])
   2.258 +proof (simp add: KL_divergence_def finite_measure_space.integral_finite_singleton[OF v] entropy_density_def)
   2.259    interpret v: finite_measure_space "M\<lparr>measure := \<nu>\<rparr>" by fact
   2.260    have ms: "measure_space (M\<lparr>measure := \<nu>\<rparr>)" by default
   2.261    show "(\<Sum>x \<in> space M. log b (real (RN_deriv M \<nu> x)) * real (\<nu> {x})) = ?sum"
   2.262 @@ -257,27 +457,10 @@
   2.263    and "1 < b"
   2.264    shows "0 \<le> KL_divergence b M \<nu>"
   2.265  proof -
   2.266 +  interpret information_space M by default fact
   2.267    interpret v: finite_prob_space "M\<lparr>measure := \<nu>\<rparr>" by fact
   2.268 -  have ms: "finite_measure_space (M\<lparr>measure := \<nu>\<rparr>)" by default
   2.269 -
   2.270 -  have "- (KL_divergence b M \<nu>) \<le> log b (\<Sum>x\<in>space M. real (\<mu> {x}))"
   2.271 -  proof (subst KL_divergence_eq_finite[OF ms ac], safe intro!: log_setsum_divide not_empty)
   2.272 -    show "finite (space M)" using finite_space by simp
   2.273 -    show "1 < b" by fact
   2.274 -    show "(\<Sum>x\<in>space M. real (\<nu> {x})) = 1"
   2.275 -      using v.finite_sum_over_space_eq_1 by (simp add: v.\<mu>'_def)
   2.276 -
   2.277 -    fix x assume "x \<in> space M"
   2.278 -    then have x: "{x} \<in> sets M" unfolding sets_eq_Pow by auto
   2.279 -    { assume "0 < real (\<nu> {x})"
   2.280 -      then have "\<nu> {x} \<noteq> 0" by auto
   2.281 -      then have "\<mu> {x} \<noteq> 0"
   2.282 -        using ac[unfolded absolutely_continuous_def, THEN bspec, of "{x}"] x by auto
   2.283 -      thus "0 < real (\<mu> {x})" using real_measure[OF x] by auto }
   2.284 -    show "0 \<le> real (\<mu> {x})" "0 \<le> real (\<nu> {x})"
   2.285 -      using real_measure[OF x] v.real_measure[of "{x}"] x by auto
   2.286 -  qed
   2.287 -  thus "0 \<le> KL_divergence b M \<nu>" using finite_sum_over_space_eq_1 by (simp add: \<mu>'_def)
   2.288 +  have ps: "prob_space (M\<lparr>measure := \<nu>\<rparr>)" by default
   2.289 +  from KL_ge_0[OF this ac v.integral_finite_singleton(1)] show ?thesis .
   2.290  qed
   2.291  
   2.292  subsection {* Mutual Information *}
   2.293 @@ -287,6 +470,163 @@
   2.294      KL_divergence b (S\<lparr>measure := extreal\<circ>distribution X\<rparr> \<Otimes>\<^isub>M T\<lparr>measure := extreal\<circ>distribution Y\<rparr>)
   2.295        (extreal\<circ>joint_distribution X Y)"
   2.296  
   2.297 +lemma (in information_space)
   2.298 +  fixes S T X Y
   2.299 +  defines "P \<equiv> S\<lparr>measure := extreal\<circ>distribution X\<rparr> \<Otimes>\<^isub>M T\<lparr>measure := extreal\<circ>distribution Y\<rparr>"
   2.300 +  shows "indep_var S X T Y \<longleftrightarrow>
   2.301 +    (random_variable S X \<and> random_variable T Y \<and>
   2.302 +      measure_space.absolutely_continuous P (extreal\<circ>joint_distribution X Y) \<and>
   2.303 +      integrable (P\<lparr>measure := (extreal\<circ>joint_distribution X Y)\<rparr>)
   2.304 +        (entropy_density b P (extreal\<circ>joint_distribution X Y)) \<and>
   2.305 +     mutual_information b S T X Y = 0)"
   2.306 +proof safe
   2.307 +  assume indep: "indep_var S X T Y"
   2.308 +  then have "random_variable S X" "random_variable T Y"
   2.309 +    by (blast dest: indep_var_rv1 indep_var_rv2)+
   2.310 +  then show "sigma_algebra S" "X \<in> measurable M S" "sigma_algebra T" "Y \<in> measurable M T"
   2.311 +    by blast+
   2.312 +
   2.313 +  interpret X: prob_space "S\<lparr>measure := extreal\<circ>distribution X\<rparr>"
   2.314 +    by (rule distribution_prob_space) fact
   2.315 +  interpret Y: prob_space "T\<lparr>measure := extreal\<circ>distribution Y\<rparr>"
   2.316 +    by (rule distribution_prob_space) fact
   2.317 +  interpret XY: pair_prob_space "S\<lparr>measure := extreal\<circ>distribution X\<rparr>" "T\<lparr>measure := extreal\<circ>distribution Y\<rparr>" by default
   2.318 +  interpret XY: information_space XY.P b by default (rule b_gt_1)
   2.319 +
   2.320 +  let ?J = "XY.P\<lparr> measure := (extreal\<circ>joint_distribution X Y) \<rparr>"
   2.321 +  { fix A assume "A \<in> sets XY.P"
   2.322 +    then have "extreal (joint_distribution X Y A) = XY.\<mu> A"
   2.323 +      using indep_var_distributionD[OF indep]
   2.324 +      by (simp add: XY.P.finite_measure_eq) }
   2.325 +  note j_eq = this
   2.326 +
   2.327 +  interpret J: prob_space ?J
   2.328 +    using j_eq by (intro XY.prob_space_cong) auto
   2.329 +
   2.330 +  have ac: "XY.absolutely_continuous (extreal\<circ>joint_distribution X Y)"
   2.331 +    by (simp add: XY.absolutely_continuous_def j_eq)
   2.332 +  then show "measure_space.absolutely_continuous P (extreal\<circ>joint_distribution X Y)"
   2.333 +    unfolding P_def .
   2.334 +
   2.335 +  have ed: "entropy_density b XY.P (extreal\<circ>joint_distribution X Y) \<in> borel_measurable XY.P"
   2.336 +    by (rule XY.measurable_entropy_density) (default | fact)+
   2.337 +
   2.338 +  have "AE x in XY.P. 1 = RN_deriv XY.P (extreal\<circ>joint_distribution X Y) x"
   2.339 +  proof (rule XY.RN_deriv_unique[OF _ ac])
   2.340 +    show "measure_space ?J" by default
   2.341 +    fix A assume "A \<in> sets XY.P"
   2.342 +    then show "(extreal\<circ>joint_distribution X Y) A = (\<integral>\<^isup>+ x. 1 * indicator A x \<partial>XY.P)"
   2.343 +      by (simp add: j_eq)
   2.344 +  qed (insert XY.measurable_const[of 1 borel], auto)
   2.345 +  then have ae_XY: "AE x in XY.P. entropy_density b XY.P (extreal\<circ>joint_distribution X Y) x = 0"
   2.346 +    by (elim XY.AE_mp) (simp add: entropy_density_def)
   2.347 +  have ae_J: "AE x in ?J. entropy_density b XY.P (extreal\<circ>joint_distribution X Y) x = 0"
   2.348 +  proof (rule XY.absolutely_continuous_AE)
   2.349 +    show "measure_space ?J" by default
   2.350 +    show "XY.absolutely_continuous (measure ?J)"
   2.351 +      using ac by simp
   2.352 +  qed (insert ae_XY, simp_all)
   2.353 +  then show "integrable (P\<lparr>measure := (extreal\<circ>joint_distribution X Y)\<rparr>)
   2.354 +        (entropy_density b P (extreal\<circ>joint_distribution X Y))"
   2.355 +    unfolding P_def
   2.356 +    using ed XY.measurable_const[of 0 borel]
   2.357 +    by (subst J.integrable_cong_AE) auto
   2.358 +
   2.359 +  show "mutual_information b S T X Y = 0"
   2.360 +    unfolding mutual_information_def KL_divergence_def P_def
   2.361 +    by (subst J.integral_cong_AE[OF ae_J]) simp
   2.362 +next
   2.363 +  assume "sigma_algebra S" "X \<in> measurable M S" "sigma_algebra T" "Y \<in> measurable M T"
   2.364 +  then have rvs: "random_variable S X" "random_variable T Y" by blast+
   2.365 +
   2.366 +  interpret X: prob_space "S\<lparr>measure := extreal\<circ>distribution X\<rparr>"
   2.367 +    by (rule distribution_prob_space) fact
   2.368 +  interpret Y: prob_space "T\<lparr>measure := extreal\<circ>distribution Y\<rparr>"
   2.369 +    by (rule distribution_prob_space) fact
   2.370 +  interpret XY: pair_prob_space "S\<lparr>measure := extreal\<circ>distribution X\<rparr>" "T\<lparr>measure := extreal\<circ>distribution Y\<rparr>" by default
   2.371 +  interpret XY: information_space XY.P b by default (rule b_gt_1)
   2.372 +
   2.373 +  let ?J = "XY.P\<lparr> measure := (extreal\<circ>joint_distribution X Y) \<rparr>"
   2.374 +  interpret J: prob_space ?J
   2.375 +    using rvs by (intro joint_distribution_prob_space) auto
   2.376 +
   2.377 +  assume ac: "measure_space.absolutely_continuous P (extreal\<circ>joint_distribution X Y)"
   2.378 +  assume int: "integrable (P\<lparr>measure := (extreal\<circ>joint_distribution X Y)\<rparr>)
   2.379 +        (entropy_density b P (extreal\<circ>joint_distribution X Y))"
   2.380 +  assume I_eq_0: "mutual_information b S T X Y = 0"
   2.381 +
   2.382 +  have eq: "\<forall>A\<in>sets XY.P. (extreal \<circ> joint_distribution X Y) A = XY.\<mu> A"
   2.383 +  proof (rule XY.KL_eq_0_imp)
   2.384 +    show "prob_space ?J" by default
   2.385 +    show "XY.absolutely_continuous (extreal\<circ>joint_distribution X Y)"
   2.386 +      using ac by (simp add: P_def)
   2.387 +    show "integrable ?J (entropy_density b XY.P (extreal\<circ>joint_distribution X Y))"
   2.388 +      using int by (simp add: P_def)
   2.389 +    show "KL_divergence b XY.P (extreal\<circ>joint_distribution X Y) = 0"
   2.390 +      using I_eq_0 unfolding mutual_information_def by (simp add: P_def)
   2.391 +  qed
   2.392 +
   2.393 +  { fix S X assume "sigma_algebra S"
   2.394 +    interpret S: sigma_algebra S by fact
   2.395 +    have "Int_stable \<lparr>space = space M, sets = {X -` A \<inter> space M |A. A \<in> sets S}\<rparr>"
   2.396 +    proof (safe intro!: Int_stableI)
   2.397 +      fix A B assume "A \<in> sets S" "B \<in> sets S"
   2.398 +      then show "\<exists>C. (X -` A \<inter> space M) \<inter> (X -` B \<inter> space M) = (X -` C \<inter> space M) \<and> C \<in> sets S"
   2.399 +        by (intro exI[of _ "A \<inter> B"]) auto
   2.400 +    qed }
   2.401 +  note Int_stable = this
   2.402 +
   2.403 +  show "indep_var S X T Y" unfolding indep_var_eq
   2.404 +  proof (intro conjI indep_set_sigma_sets Int_stable)
   2.405 +    show "indep_set {X -` A \<inter> space M |A. A \<in> sets S} {Y -` A \<inter> space M |A. A \<in> sets T}"
   2.406 +    proof (safe intro!: indep_setI)
   2.407 +      { fix A assume "A \<in> sets S" then show "X -` A \<inter> space M \<in> sets M"
   2.408 +        using `X \<in> measurable M S` by (auto intro: measurable_sets) }
   2.409 +      { fix A assume "A \<in> sets T" then show "Y -` A \<inter> space M \<in> sets M"
   2.410 +        using `Y \<in> measurable M T` by (auto intro: measurable_sets) }
   2.411 +    next
   2.412 +      fix A B assume ab: "A \<in> sets S" "B \<in> sets T"
   2.413 +      have "extreal (prob ((X -` A \<inter> space M) \<inter> (Y -` B \<inter> space M))) =
   2.414 +        extreal (joint_distribution X Y (A \<times> B))"
   2.415 +        unfolding distribution_def
   2.416 +        by (intro arg_cong[where f="\<lambda>C. extreal (prob C)"]) auto
   2.417 +      also have "\<dots> = XY.\<mu> (A \<times> B)"
   2.418 +        using ab eq by (auto simp: XY.finite_measure_eq)
   2.419 +      also have "\<dots> = extreal (distribution X A) * extreal (distribution Y B)"
   2.420 +        using ab by (simp add: XY.pair_measure_times)
   2.421 +      finally show "prob ((X -` A \<inter> space M) \<inter> (Y -` B \<inter> space M)) =
   2.422 +        prob (X -` A \<inter> space M) * prob (Y -` B \<inter> space M)"
   2.423 +        unfolding distribution_def by simp
   2.424 +    qed
   2.425 +  qed fact+
   2.426 +qed
   2.427 +
   2.428 +lemma (in information_space) mutual_information_commute_generic:
   2.429 +  assumes X: "random_variable S X" and Y: "random_variable T Y"
   2.430 +  assumes ac: "measure_space.absolutely_continuous
   2.431 +    (S\<lparr>measure := extreal\<circ>distribution X\<rparr> \<Otimes>\<^isub>M T\<lparr>measure := extreal\<circ>distribution Y\<rparr>) (extreal\<circ>joint_distribution X Y)"
   2.432 +  shows "mutual_information b S T X Y = mutual_information b T S Y X"
   2.433 +proof -
   2.434 +  let ?S = "S\<lparr>measure := extreal\<circ>distribution X\<rparr>" and ?T = "T\<lparr>measure := extreal\<circ>distribution Y\<rparr>"
   2.435 +  interpret S: prob_space ?S using X by (rule distribution_prob_space)
   2.436 +  interpret T: prob_space ?T using Y by (rule distribution_prob_space)
   2.437 +  interpret P: pair_prob_space ?S ?T ..
   2.438 +  interpret Q: pair_prob_space ?T ?S ..
   2.439 +  show ?thesis
   2.440 +    unfolding mutual_information_def
   2.441 +  proof (intro Q.KL_divergence_vimage[OF Q.measure_preserving_swap _ _ _ _ ac b_gt_1])
   2.442 +    show "(\<lambda>(x,y). (y,x)) \<in> measure_preserving
   2.443 +      (P.P \<lparr> measure := extreal\<circ>joint_distribution X Y\<rparr>) (Q.P \<lparr> measure := extreal\<circ>joint_distribution Y X\<rparr>)"
   2.444 +      using X Y unfolding measurable_def
   2.445 +      unfolding measure_preserving_def using P.pair_sigma_algebra_swap_measurable
   2.446 +      by (auto simp add: space_pair_measure distribution_def intro!: arg_cong[where f=\<mu>'])
   2.447 +    have "prob_space (P.P\<lparr> measure := extreal\<circ>joint_distribution X Y\<rparr>)"
   2.448 +      using X Y by (auto intro!: distribution_prob_space random_variable_pairI)
   2.449 +    then show "measure_space (P.P\<lparr> measure := extreal\<circ>joint_distribution X Y\<rparr>)"
   2.450 +      unfolding prob_space_def by simp
   2.451 +  qed auto
   2.452 +qed
   2.453 +
   2.454  definition (in prob_space)
   2.455    "entropy b s X = mutual_information b s s X X"
   2.456  
   2.457 @@ -356,32 +696,6 @@
   2.458      unfolding mutual_information_def .
   2.459  qed
   2.460  
   2.461 -lemma (in information_space) mutual_information_commute_generic:
   2.462 -  assumes X: "random_variable S X" and Y: "random_variable T Y"
   2.463 -  assumes ac: "measure_space.absolutely_continuous
   2.464 -    (S\<lparr>measure := extreal\<circ>distribution X\<rparr> \<Otimes>\<^isub>M T\<lparr>measure := extreal\<circ>distribution Y\<rparr>) (extreal\<circ>joint_distribution X Y)"
   2.465 -  shows "mutual_information b S T X Y = mutual_information b T S Y X"
   2.466 -proof -
   2.467 -  let ?S = "S\<lparr>measure := extreal\<circ>distribution X\<rparr>" and ?T = "T\<lparr>measure := extreal\<circ>distribution Y\<rparr>"
   2.468 -  interpret S: prob_space ?S using X by (rule distribution_prob_space)
   2.469 -  interpret T: prob_space ?T using Y by (rule distribution_prob_space)
   2.470 -  interpret P: pair_prob_space ?S ?T ..
   2.471 -  interpret Q: pair_prob_space ?T ?S ..
   2.472 -  show ?thesis
   2.473 -    unfolding mutual_information_def
   2.474 -  proof (intro Q.KL_divergence_vimage[OF Q.measure_preserving_swap _ _ _ _ ac b_gt_1])
   2.475 -    show "(\<lambda>(x,y). (y,x)) \<in> measure_preserving
   2.476 -      (P.P \<lparr> measure := extreal\<circ>joint_distribution X Y\<rparr>) (Q.P \<lparr> measure := extreal\<circ>joint_distribution Y X\<rparr>)"
   2.477 -      using X Y unfolding measurable_def
   2.478 -      unfolding measure_preserving_def using P.pair_sigma_algebra_swap_measurable
   2.479 -      by (auto simp add: space_pair_measure distribution_def intro!: arg_cong[where f=\<mu>'])
   2.480 -    have "prob_space (P.P\<lparr> measure := extreal\<circ>joint_distribution X Y\<rparr>)"
   2.481 -      using X Y by (auto intro!: distribution_prob_space random_variable_pairI)
   2.482 -    then show "measure_space (P.P\<lparr> measure := extreal\<circ>joint_distribution X Y\<rparr>)"
   2.483 -      unfolding prob_space_def by simp
   2.484 -  qed auto
   2.485 -qed
   2.486 -
   2.487  lemma (in information_space) mutual_information_commute:
   2.488    assumes X: "finite_random_variable S X" and Y: "finite_random_variable T Y"
   2.489    shows "mutual_information b S T X Y = mutual_information b T S Y X"
     3.1 --- a/src/HOL/Probability/Probability_Measure.thy	Thu Jun 09 13:55:11 2011 +0200
     3.2 +++ b/src/HOL/Probability/Probability_Measure.thy	Thu Jun 09 14:04:34 2011 +0200
     3.3 @@ -275,7 +275,7 @@
     3.4  proof -
     3.5    have "expectation X < expectation (\<lambda>x. b)"
     3.6      using gt measure_space_1
     3.7 -    by (intro integral_less_AE) auto
     3.8 +    by (intro integral_less_AE_space) auto
     3.9    then show ?thesis using prob_space by simp
    3.10  qed
    3.11  
    3.12 @@ -286,7 +286,7 @@
    3.13  proof -
    3.14    have "expectation (\<lambda>x. a) < expectation X"
    3.15      using gt measure_space_1
    3.16 -    by (intro integral_less_AE) auto
    3.17 +    by (intro integral_less_AE_space) auto
    3.18    then show ?thesis using prob_space by simp
    3.19  qed
    3.20  
     4.1 --- a/src/HOL/Probability/Radon_Nikodym.thy	Thu Jun 09 13:55:11 2011 +0200
     4.2 +++ b/src/HOL/Probability/Radon_Nikodym.thy	Thu Jun 09 14:04:34 2011 +0200
     4.3 @@ -1311,6 +1311,59 @@
     4.4      by (auto simp: RN_deriv_positive_integral[OF ms \<nu>(2)])
     4.5  qed
     4.6  
     4.7 +lemma (in sigma_finite_measure) real_RN_deriv:
     4.8 +  assumes "finite_measure (M\<lparr>measure := \<nu>\<rparr>)" (is "finite_measure ?\<nu>")
     4.9 +  assumes ac: "absolutely_continuous \<nu>"
    4.10 +  obtains D where "D \<in> borel_measurable M"
    4.11 +    and "AE x. RN_deriv M \<nu> x = extreal (D x)"
    4.12 +    and "AE x in M\<lparr>measure := \<nu>\<rparr>. 0 < D x"
    4.13 +    and "\<And>x. 0 \<le> D x"
    4.14 +proof
    4.15 +  interpret \<nu>: finite_measure ?\<nu> by fact
    4.16 +  have ms: "measure_space ?\<nu>" by default
    4.17 +  note RN = RN_deriv[OF ms ac]
    4.18 +
    4.19 +  let ?RN = "\<lambda>t. {x \<in> space M. RN_deriv M \<nu> x = t}"
    4.20 +
    4.21 +  show "(\<lambda>x. real (RN_deriv M \<nu> x)) \<in> borel_measurable M"
    4.22 +    using RN by auto
    4.23 +
    4.24 +  have "\<nu> (?RN \<infinity>) = (\<integral>\<^isup>+ x. RN_deriv M \<nu> x * indicator (?RN \<infinity>) x \<partial>M)"
    4.25 +    using RN by auto
    4.26 +  also have "\<dots> = (\<integral>\<^isup>+ x. \<infinity> * indicator (?RN \<infinity>) x \<partial>M)"
    4.27 +    by (intro positive_integral_cong) (auto simp: indicator_def)
    4.28 +  also have "\<dots> = \<infinity> * \<mu> (?RN \<infinity>)"
    4.29 +    using RN by (intro positive_integral_cmult_indicator) auto
    4.30 +  finally have eq: "\<nu> (?RN \<infinity>) = \<infinity> * \<mu> (?RN \<infinity>)" .
    4.31 +  moreover
    4.32 +  have "\<mu> (?RN \<infinity>) = 0"
    4.33 +  proof (rule ccontr)
    4.34 +    assume "\<mu> {x \<in> space M. RN_deriv M \<nu> x = \<infinity>} \<noteq> 0"
    4.35 +    moreover from RN have "0 \<le> \<mu> {x \<in> space M. RN_deriv M \<nu> x = \<infinity>}" by auto
    4.36 +    ultimately have "0 < \<mu> {x \<in> space M. RN_deriv M \<nu> x = \<infinity>}" by auto
    4.37 +    with eq have "\<nu> (?RN \<infinity>) = \<infinity>" by simp
    4.38 +    with \<nu>.finite_measure[of "?RN \<infinity>"] RN show False by auto
    4.39 +  qed
    4.40 +  ultimately have "AE x. RN_deriv M \<nu> x < \<infinity>"
    4.41 +    using RN by (intro AE_iff_measurable[THEN iffD2]) auto
    4.42 +  then show "AE x. RN_deriv M \<nu> x = extreal (real (RN_deriv M \<nu> x))"
    4.43 +    using RN(3) by (auto simp: extreal_real)
    4.44 +  then have eq: "AE x in (M\<lparr>measure := \<nu>\<rparr>) . RN_deriv M \<nu> x = extreal (real (RN_deriv M \<nu> x))"
    4.45 +    using ac absolutely_continuous_AE[OF ms] by auto
    4.46 +
    4.47 +  show "\<And>x. 0 \<le> real (RN_deriv M \<nu> x)"
    4.48 +    using RN by (auto intro: real_of_extreal_pos)
    4.49 +
    4.50 +  have "\<nu> (?RN 0) = (\<integral>\<^isup>+ x. RN_deriv M \<nu> x * indicator (?RN 0) x \<partial>M)"
    4.51 +    using RN by auto
    4.52 +  also have "\<dots> = (\<integral>\<^isup>+ x. 0 \<partial>M)"
    4.53 +    by (intro positive_integral_cong) (auto simp: indicator_def)
    4.54 +  finally have "AE x in (M\<lparr>measure := \<nu>\<rparr>). RN_deriv M \<nu> x \<noteq> 0"
    4.55 +    using RN by (intro \<nu>.AE_iff_measurable[THEN iffD2]) auto
    4.56 +  with RN(3) eq show "AE x in (M\<lparr>measure := \<nu>\<rparr>). 0 < real (RN_deriv M \<nu> x)"
    4.57 +    by (auto simp: zero_less_real_of_extreal le_less)
    4.58 +qed
    4.59 +
    4.60  lemma (in sigma_finite_measure) RN_deriv_singleton:
    4.61    assumes "measure_space (M\<lparr>measure := \<nu>\<rparr>)"
    4.62    and ac: "absolutely_continuous \<nu>"